1 // 2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ _masm. 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 cbuf.insts()->emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 cbuf.insts()->emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(CodeBuffer &cbuf, int code) { 353 cbuf.insts()->emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 358 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 359 emit_opcode(cbuf, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(CodeBuffer &cbuf, int d8) { 364 cbuf.insts()->emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(CodeBuffer &cbuf, int d16) { 369 cbuf.insts()->emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(CodeBuffer &cbuf, int d32) { 374 cbuf.insts()->emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 379 int format) { 380 cbuf.relocate(cbuf.insts_mark(), reloc, format); 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 cbuf.relocate(cbuf.insts_mark(), rspec, format); 393 cbuf.insts()->emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 398 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (cbuf, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(cbuf, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(cbuf, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(cbuf, 0x1, reg_encoding, base); 423 emit_d8(cbuf, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (cbuf, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(cbuf, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (cbuf, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 450 emit_rm(cbuf, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 456 emit_rm(cbuf, scale, index, base); 457 emit_d8(cbuf, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 462 emit_rm(cbuf, scale, index, 0x04); 463 } else { 464 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 465 emit_rm(cbuf, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (cbuf, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( cbuf, 0x8B ); 483 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler& _masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 static void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 C2_MacroAssembler _masm(&cbuf); 613 614 __ verified_entry(C); 615 616 C->output()->set_frame_complete(cbuf.insts_size()); 617 618 if (C->has_mach_constant_base_node()) { 619 // NOTE: We set the table base offset here because users might be 620 // emitted before MachConstantBaseNode. 621 ConstantTable& constant_table = C->output()->constant_table(); 622 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 623 } 624 } 625 626 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 627 return MachNode::size(ra_); // too many variables; just compute it the hard way 628 } 629 630 int MachPrologNode::reloc() const { 631 return 0; // a large enough number 632 } 633 634 //============================================================================= 635 #ifndef PRODUCT 636 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 637 Compile *C = ra_->C; 638 int framesize = C->output()->frame_size_in_bytes(); 639 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 640 // Remove two words for return addr and rbp, 641 framesize -= 2*wordSize; 642 643 if (C->max_vector_size() > 16) { 644 st->print("VZEROUPPER"); 645 st->cr(); st->print("\t"); 646 } 647 if (C->in_24_bit_fp_mode()) { 648 st->print("FLDCW standard control word"); 649 st->cr(); st->print("\t"); 650 } 651 if (framesize) { 652 st->print("ADD ESP,%d\t# Destroy frame",framesize); 653 st->cr(); st->print("\t"); 654 } 655 st->print_cr("POPL EBP"); st->print("\t"); 656 if (do_polling() && C->is_method_compilation()) { 657 st->print("CMPL rsp, poll_offset[thread] \n\t" 658 "JA #safepoint_stub\t" 659 "# Safepoint: poll for GC"); 660 } 661 } 662 #endif 663 664 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 665 Compile *C = ra_->C; 666 MacroAssembler _masm(&cbuf); 667 668 if (C->max_vector_size() > 16) { 669 // Clear upper bits of YMM registers when current compiled code uses 670 // wide vectors to avoid AVX <-> SSE transition penalty during call. 671 _masm.vzeroupper(); 672 } 673 // If method set FPU control word, restore to standard control word 674 if (C->in_24_bit_fp_mode()) { 675 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 676 } 677 678 int framesize = C->output()->frame_size_in_bytes(); 679 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 680 // Remove two words for return addr and rbp, 681 framesize -= 2*wordSize; 682 683 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 684 685 if (framesize >= 128) { 686 emit_opcode(cbuf, 0x81); // add SP, #framesize 687 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 688 emit_d32(cbuf, framesize); 689 } else if (framesize) { 690 emit_opcode(cbuf, 0x83); // add SP, #framesize 691 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 692 emit_d8(cbuf, framesize); 693 } 694 695 emit_opcode(cbuf, 0x58 | EBP_enc); 696 697 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 698 __ reserved_stack_check(); 699 } 700 701 if (do_polling() && C->is_method_compilation()) { 702 Register thread = as_Register(EBX_enc); 703 MacroAssembler masm(&cbuf); 704 __ get_thread(thread); 705 Label dummy_label; 706 Label* code_stub = &dummy_label; 707 if (!C->output()->in_scratch_emit_size()) { 708 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 709 C->output()->add_stub(stub); 710 code_stub = &stub->entry(); 711 } 712 __ relocate(relocInfo::poll_return_type); 713 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 714 } 715 } 716 717 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 718 return MachNode::size(ra_); // too many variables; just compute it 719 // the hard way 720 } 721 722 int MachEpilogNode::reloc() const { 723 return 0; // a large enough number 724 } 725 726 const Pipeline * MachEpilogNode::pipeline() const { 727 return MachNode::pipeline_class(); 728 } 729 730 //============================================================================= 731 732 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 733 static enum RC rc_class( OptoReg::Name reg ) { 734 735 if( !OptoReg::is_valid(reg) ) return rc_bad; 736 if (OptoReg::is_stack(reg)) return rc_stack; 737 738 VMReg r = OptoReg::as_VMReg(reg); 739 if (r->is_Register()) return rc_int; 740 if (r->is_FloatRegister()) { 741 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 742 return rc_float; 743 } 744 if (r->is_KRegister()) return rc_kreg; 745 assert(r->is_XMMRegister(), "must be"); 746 return rc_xmm; 747 } 748 749 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 750 int opcode, const char *op_str, int size, outputStream* st ) { 751 if( cbuf ) { 752 emit_opcode (*cbuf, opcode ); 753 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 754 #ifndef PRODUCT 755 } else if( !do_size ) { 756 if( size != 0 ) st->print("\n\t"); 757 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 758 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 759 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 760 } else { // FLD, FST, PUSH, POP 761 st->print("%s [ESP + #%d]",op_str,offset); 762 } 763 #endif 764 } 765 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 766 return size+3+offset_size; 767 } 768 769 // Helper for XMM registers. Extra opcode bits, limited syntax. 770 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 771 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 772 int in_size_in_bits = Assembler::EVEX_32bit; 773 int evex_encoding = 0; 774 if (reg_lo+1 == reg_hi) { 775 in_size_in_bits = Assembler::EVEX_64bit; 776 evex_encoding = Assembler::VEX_W; 777 } 778 if (cbuf) { 779 MacroAssembler _masm(cbuf); 780 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 781 // it maps more cases to single byte displacement 782 _masm.set_managed(); 783 if (reg_lo+1 == reg_hi) { // double move? 784 if (is_load) { 785 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 786 } else { 787 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 788 } 789 } else { 790 if (is_load) { 791 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 792 } else { 793 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 794 } 795 } 796 #ifndef PRODUCT 797 } else if (!do_size) { 798 if (size != 0) st->print("\n\t"); 799 if (reg_lo+1 == reg_hi) { // double move? 800 if (is_load) st->print("%s %s,[ESP + #%d]", 801 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 802 Matcher::regName[reg_lo], offset); 803 else st->print("MOVSD [ESP + #%d],%s", 804 offset, Matcher::regName[reg_lo]); 805 } else { 806 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 807 Matcher::regName[reg_lo], offset); 808 else st->print("MOVSS [ESP + #%d],%s", 809 offset, Matcher::regName[reg_lo]); 810 } 811 #endif 812 } 813 bool is_single_byte = false; 814 if ((UseAVX > 2) && (offset != 0)) { 815 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 816 } 817 int offset_size = 0; 818 if (UseAVX > 2 ) { 819 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 820 } else { 821 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 822 } 823 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 824 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 825 return size+5+offset_size; 826 } 827 828 829 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 830 int src_hi, int dst_hi, int size, outputStream* st ) { 831 if (cbuf) { 832 MacroAssembler _masm(cbuf); 833 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 834 _masm.set_managed(); 835 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 836 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 837 as_XMMRegister(Matcher::_regEncode[src_lo])); 838 } else { 839 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 840 as_XMMRegister(Matcher::_regEncode[src_lo])); 841 } 842 #ifndef PRODUCT 843 } else if (!do_size) { 844 if (size != 0) st->print("\n\t"); 845 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 846 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 847 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 848 } else { 849 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 850 } 851 } else { 852 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 853 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 854 } else { 855 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 856 } 857 } 858 #endif 859 } 860 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 861 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 862 int sz = (UseAVX > 2) ? 6 : 4; 863 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 864 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 865 return size + sz; 866 } 867 868 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 869 int src_hi, int dst_hi, int size, outputStream* st ) { 870 // 32-bit 871 if (cbuf) { 872 MacroAssembler _masm(cbuf); 873 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 874 _masm.set_managed(); 875 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 876 as_Register(Matcher::_regEncode[src_lo])); 877 #ifndef PRODUCT 878 } else if (!do_size) { 879 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 880 #endif 881 } 882 return (UseAVX> 2) ? 6 : 4; 883 } 884 885 886 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 887 int src_hi, int dst_hi, int size, outputStream* st ) { 888 // 32-bit 889 if (cbuf) { 890 MacroAssembler _masm(cbuf); 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 _masm.set_managed(); 893 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 894 as_XMMRegister(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 904 if( cbuf ) { 905 emit_opcode(*cbuf, 0x8B ); 906 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 907 #ifndef PRODUCT 908 } else if( !do_size ) { 909 if( size != 0 ) st->print("\n\t"); 910 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 911 #endif 912 } 913 return size+2; 914 } 915 916 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 917 int offset, int size, outputStream* st ) { 918 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 919 if( cbuf ) { 920 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 921 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 922 #ifndef PRODUCT 923 } else if( !do_size ) { 924 if( size != 0 ) st->print("\n\t"); 925 st->print("FLD %s",Matcher::regName[src_lo]); 926 #endif 927 } 928 size += 2; 929 } 930 931 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 932 const char *op_str; 933 int op; 934 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 935 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 936 op = 0xDD; 937 } else { // 32-bit store 938 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 939 op = 0xD9; 940 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 941 } 942 943 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 944 } 945 946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 947 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 948 int src_hi, int dst_hi, uint ireg, outputStream* st); 949 950 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 951 int stack_offset, int reg, uint ireg, outputStream* st); 952 953 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 954 int dst_offset, uint ireg, outputStream* st) { 955 if (cbuf) { 956 MacroAssembler _masm(cbuf); 957 switch (ireg) { 958 case Op_VecS: 959 __ pushl(Address(rsp, src_offset)); 960 __ popl (Address(rsp, dst_offset)); 961 break; 962 case Op_VecD: 963 __ pushl(Address(rsp, src_offset)); 964 __ popl (Address(rsp, dst_offset)); 965 __ pushl(Address(rsp, src_offset+4)); 966 __ popl (Address(rsp, dst_offset+4)); 967 break; 968 case Op_VecX: 969 __ movdqu(Address(rsp, -16), xmm0); 970 __ movdqu(xmm0, Address(rsp, src_offset)); 971 __ movdqu(Address(rsp, dst_offset), xmm0); 972 __ movdqu(xmm0, Address(rsp, -16)); 973 break; 974 case Op_VecY: 975 __ vmovdqu(Address(rsp, -32), xmm0); 976 __ vmovdqu(xmm0, Address(rsp, src_offset)); 977 __ vmovdqu(Address(rsp, dst_offset), xmm0); 978 __ vmovdqu(xmm0, Address(rsp, -32)); 979 break; 980 case Op_VecZ: 981 __ evmovdquq(Address(rsp, -64), xmm0, 2); 982 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 983 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 984 __ evmovdquq(xmm0, Address(rsp, -64), 2); 985 break; 986 default: 987 ShouldNotReachHere(); 988 } 989 #ifndef PRODUCT 990 } else { 991 switch (ireg) { 992 case Op_VecS: 993 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 994 "popl [rsp + #%d]", 995 src_offset, dst_offset); 996 break; 997 case Op_VecD: 998 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 999 "popq [rsp + #%d]\n\t" 1000 "pushl [rsp + #%d]\n\t" 1001 "popq [rsp + #%d]", 1002 src_offset, dst_offset, src_offset+4, dst_offset+4); 1003 break; 1004 case Op_VecX: 1005 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1006 "movdqu xmm0, [rsp + #%d]\n\t" 1007 "movdqu [rsp + #%d], xmm0\n\t" 1008 "movdqu xmm0, [rsp - #16]", 1009 src_offset, dst_offset); 1010 break; 1011 case Op_VecY: 1012 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1013 "vmovdqu xmm0, [rsp + #%d]\n\t" 1014 "vmovdqu [rsp + #%d], xmm0\n\t" 1015 "vmovdqu xmm0, [rsp - #32]", 1016 src_offset, dst_offset); 1017 break; 1018 case Op_VecZ: 1019 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1020 "vmovdqu xmm0, [rsp + #%d]\n\t" 1021 "vmovdqu [rsp + #%d], xmm0\n\t" 1022 "vmovdqu xmm0, [rsp - #64]", 1023 src_offset, dst_offset); 1024 break; 1025 default: 1026 ShouldNotReachHere(); 1027 } 1028 #endif 1029 } 1030 } 1031 1032 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1033 // Get registers to move 1034 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1035 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1036 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1037 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1038 1039 enum RC src_second_rc = rc_class(src_second); 1040 enum RC src_first_rc = rc_class(src_first); 1041 enum RC dst_second_rc = rc_class(dst_second); 1042 enum RC dst_first_rc = rc_class(dst_first); 1043 1044 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1045 1046 // Generate spill code! 1047 int size = 0; 1048 1049 if( src_first == dst_first && src_second == dst_second ) 1050 return size; // Self copy, no move 1051 1052 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) { 1053 uint ireg = ideal_reg(); 1054 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1055 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1056 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1057 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1058 // mem -> mem 1059 int src_offset = ra_->reg2offset(src_first); 1060 int dst_offset = ra_->reg2offset(dst_first); 1061 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1062 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1063 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1064 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1065 int stack_offset = ra_->reg2offset(dst_first); 1066 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1067 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1068 int stack_offset = ra_->reg2offset(src_first); 1069 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1070 } else { 1071 ShouldNotReachHere(); 1072 } 1073 return 0; 1074 } 1075 1076 // -------------------------------------- 1077 // Check for mem-mem move. push/pop to move. 1078 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1079 if( src_second == dst_first ) { // overlapping stack copy ranges 1080 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1081 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1082 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1083 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1084 } 1085 // move low bits 1086 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1087 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1088 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1089 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1090 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1091 } 1092 return size; 1093 } 1094 1095 // -------------------------------------- 1096 // Check for integer reg-reg copy 1097 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1098 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1099 1100 // Check for integer store 1101 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1102 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1103 1104 // Check for integer load 1105 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1106 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1107 1108 // Check for integer reg-xmm reg copy 1109 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1110 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1111 "no 64 bit integer-float reg moves" ); 1112 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1113 } 1114 // -------------------------------------- 1115 // Check for float reg-reg copy 1116 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1117 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1118 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1119 if( cbuf ) { 1120 1121 // Note the mucking with the register encode to compensate for the 0/1 1122 // indexing issue mentioned in a comment in the reg_def sections 1123 // for FPR registers many lines above here. 1124 1125 if( src_first != FPR1L_num ) { 1126 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1127 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1128 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1129 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1130 } else { 1131 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1132 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1133 } 1134 #ifndef PRODUCT 1135 } else if( !do_size ) { 1136 if( size != 0 ) st->print("\n\t"); 1137 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1138 else st->print( "FST %s", Matcher::regName[dst_first]); 1139 #endif 1140 } 1141 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1142 } 1143 1144 // Check for float store 1145 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1146 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1147 } 1148 1149 // Check for float load 1150 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1151 int offset = ra_->reg2offset(src_first); 1152 const char *op_str; 1153 int op; 1154 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1155 op_str = "FLD_D"; 1156 op = 0xDD; 1157 } else { // 32-bit load 1158 op_str = "FLD_S"; 1159 op = 0xD9; 1160 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1161 } 1162 if( cbuf ) { 1163 emit_opcode (*cbuf, op ); 1164 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 #ifndef PRODUCT 1168 } else if( !do_size ) { 1169 if( size != 0 ) st->print("\n\t"); 1170 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1171 #endif 1172 } 1173 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1174 return size + 3+offset_size+2; 1175 } 1176 1177 // Check for xmm reg-reg copy 1178 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1179 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1180 (src_first+1 == src_second && dst_first+1 == dst_second), 1181 "no non-adjacent float-moves" ); 1182 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1183 } 1184 1185 // Check for xmm reg-integer reg copy 1186 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1187 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1188 "no 64 bit float-integer reg moves" ); 1189 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1190 } 1191 1192 // Check for xmm store 1193 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1194 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1195 } 1196 1197 // Check for float xmm load 1198 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1199 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1200 } 1201 1202 // Copy from float reg to xmm reg 1203 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1204 // copy to the top of stack from floating point reg 1205 // and use LEA to preserve flags 1206 if( cbuf ) { 1207 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1208 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1209 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1210 emit_d8(*cbuf,0xF8); 1211 #ifndef PRODUCT 1212 } else if( !do_size ) { 1213 if( size != 0 ) st->print("\n\t"); 1214 st->print("LEA ESP,[ESP-8]"); 1215 #endif 1216 } 1217 size += 4; 1218 1219 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1220 1221 // Copy from the temp memory to the xmm reg. 1222 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1223 1224 if( cbuf ) { 1225 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1226 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1227 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1228 emit_d8(*cbuf,0x08); 1229 #ifndef PRODUCT 1230 } else if( !do_size ) { 1231 if( size != 0 ) st->print("\n\t"); 1232 st->print("LEA ESP,[ESP+8]"); 1233 #endif 1234 } 1235 size += 4; 1236 return size; 1237 } 1238 1239 // AVX-512 opmask specific spilling. 1240 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1241 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1242 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1243 int offset = ra_->reg2offset(src_first); 1244 if (cbuf != nullptr) { 1245 MacroAssembler _masm(cbuf); 1246 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1247 #ifndef PRODUCT 1248 } else { 1249 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); 1250 #endif 1251 } 1252 return 0; 1253 } 1254 1255 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1256 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1257 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1258 int offset = ra_->reg2offset(dst_first); 1259 if (cbuf != nullptr) { 1260 MacroAssembler _masm(cbuf); 1261 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1262 #ifndef PRODUCT 1263 } else { 1264 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); 1265 #endif 1266 } 1267 return 0; 1268 } 1269 1270 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1271 Unimplemented(); 1272 return 0; 1273 } 1274 1275 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1276 Unimplemented(); 1277 return 0; 1278 } 1279 1280 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1281 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1282 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1283 if (cbuf != nullptr) { 1284 MacroAssembler _masm(cbuf); 1285 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1286 #ifndef PRODUCT 1287 } else { 1288 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); 1289 #endif 1290 } 1291 return 0; 1292 } 1293 1294 assert( size > 0, "missed a case" ); 1295 1296 // -------------------------------------------------------------------- 1297 // Check for second bits still needing moving. 1298 if( src_second == dst_second ) 1299 return size; // Self copy; no move 1300 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1301 1302 // Check for second word int-int move 1303 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1304 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1305 1306 // Check for second word integer store 1307 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1308 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1309 1310 // Check for second word integer load 1311 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1312 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1313 1314 Unimplemented(); 1315 return 0; // Mute compiler 1316 } 1317 1318 #ifndef PRODUCT 1319 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1320 implementation( nullptr, ra_, false, st ); 1321 } 1322 #endif 1323 1324 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1325 implementation( &cbuf, ra_, false, nullptr ); 1326 } 1327 1328 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1329 return MachNode::size(ra_); 1330 } 1331 1332 1333 //============================================================================= 1334 #ifndef PRODUCT 1335 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1336 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1337 int reg = ra_->get_reg_first(this); 1338 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1339 } 1340 #endif 1341 1342 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 int reg = ra_->get_encode(this); 1345 if( offset >= 128 ) { 1346 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1347 emit_rm(cbuf, 0x2, reg, 0x04); 1348 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1349 emit_d32(cbuf, offset); 1350 } 1351 else { 1352 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1353 emit_rm(cbuf, 0x1, reg, 0x04); 1354 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1355 emit_d8(cbuf, offset); 1356 } 1357 } 1358 1359 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1360 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1361 if( offset >= 128 ) { 1362 return 7; 1363 } 1364 else { 1365 return 4; 1366 } 1367 } 1368 1369 //============================================================================= 1370 #ifndef PRODUCT 1371 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1372 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1373 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1374 st->print_cr("\tNOP"); 1375 st->print_cr("\tNOP"); 1376 if( !OptoBreakpoint ) 1377 st->print_cr("\tNOP"); 1378 } 1379 #endif 1380 1381 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1382 MacroAssembler masm(&cbuf); 1383 masm.ic_check(CodeEntryAlignment); 1384 } 1385 1386 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1387 return MachNode::size(ra_); // too many variables; just compute it 1388 // the hard way 1389 } 1390 1391 1392 //============================================================================= 1393 1394 // Vector calling convention not supported. 1395 bool Matcher::supports_vector_calling_convention() { 1396 return false; 1397 } 1398 1399 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1400 Unimplemented(); 1401 return OptoRegPair(0, 0); 1402 } 1403 1404 // Is this branch offset short enough that a short branch can be used? 1405 // 1406 // NOTE: If the platform does not provide any short branch variants, then 1407 // this method should return false for offset 0. 1408 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1409 // The passed offset is relative to address of the branch. 1410 // On 86 a branch displacement is calculated relative to address 1411 // of a next instruction. 1412 offset -= br_size; 1413 1414 // the short version of jmpConUCF2 contains multiple branches, 1415 // making the reach slightly less 1416 if (rule == jmpConUCF2_rule) 1417 return (-126 <= offset && offset <= 125); 1418 return (-128 <= offset && offset <= 127); 1419 } 1420 1421 // Return whether or not this register is ever used as an argument. This 1422 // function is used on startup to build the trampoline stubs in generateOptoStub. 1423 // Registers not mentioned will be killed by the VM call in the trampoline, and 1424 // arguments in those registers not be available to the callee. 1425 bool Matcher::can_be_java_arg( int reg ) { 1426 if( reg == ECX_num || reg == EDX_num ) return true; 1427 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1428 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1429 return false; 1430 } 1431 1432 bool Matcher::is_spillable_arg( int reg ) { 1433 return can_be_java_arg(reg); 1434 } 1435 1436 uint Matcher::int_pressure_limit() 1437 { 1438 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1439 } 1440 1441 uint Matcher::float_pressure_limit() 1442 { 1443 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1444 } 1445 1446 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1447 // Use hardware integer DIV instruction when 1448 // it is faster than a code which use multiply. 1449 // Only when constant divisor fits into 32 bit 1450 // (min_jint is excluded to get only correct 1451 // positive 32 bit values from negative). 1452 return VM_Version::has_fast_idiv() && 1453 (divisor == (int)divisor && divisor != min_jint); 1454 } 1455 1456 // Register for DIVI projection of divmodI 1457 RegMask Matcher::divI_proj_mask() { 1458 return EAX_REG_mask(); 1459 } 1460 1461 // Register for MODI projection of divmodI 1462 RegMask Matcher::modI_proj_mask() { 1463 return EDX_REG_mask(); 1464 } 1465 1466 // Register for DIVL projection of divmodL 1467 RegMask Matcher::divL_proj_mask() { 1468 ShouldNotReachHere(); 1469 return RegMask(); 1470 } 1471 1472 // Register for MODL projection of divmodL 1473 RegMask Matcher::modL_proj_mask() { 1474 ShouldNotReachHere(); 1475 return RegMask(); 1476 } 1477 1478 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1479 return NO_REG_mask(); 1480 } 1481 1482 // Returns true if the high 32 bits of the value is known to be zero. 1483 bool is_operand_hi32_zero(Node* n) { 1484 int opc = n->Opcode(); 1485 if (opc == Op_AndL) { 1486 Node* o2 = n->in(2); 1487 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1488 return true; 1489 } 1490 } 1491 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1492 return true; 1493 } 1494 return false; 1495 } 1496 1497 %} 1498 1499 //----------ENCODING BLOCK----------------------------------------------------- 1500 // This block specifies the encoding classes used by the compiler to output 1501 // byte streams. Encoding classes generate functions which are called by 1502 // Machine Instruction Nodes in order to generate the bit encoding of the 1503 // instruction. Operands specify their base encoding interface with the 1504 // interface keyword. There are currently supported four interfaces, 1505 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1506 // operand to generate a function which returns its register number when 1507 // queried. CONST_INTER causes an operand to generate a function which 1508 // returns the value of the constant when queried. MEMORY_INTER causes an 1509 // operand to generate four functions which return the Base Register, the 1510 // Index Register, the Scale Value, and the Offset Value of the operand when 1511 // queried. COND_INTER causes an operand to generate six functions which 1512 // return the encoding code (ie - encoding bits for the instruction) 1513 // associated with each basic boolean condition for a conditional instruction. 1514 // Instructions specify two basic values for encoding. They use the 1515 // ins_encode keyword to specify their encoding class (which must be one of 1516 // the class names specified in the encoding block), and they use the 1517 // opcode keyword to specify, in order, their primary, secondary, and 1518 // tertiary opcode. Only the opcode sections which a particular instruction 1519 // needs for encoding need to be specified. 1520 encode %{ 1521 // Build emit functions for each basic byte or larger field in the intel 1522 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1523 // code in the enc_class source block. Emit functions will live in the 1524 // main source block for now. In future, we can generalize this by 1525 // adding a syntax that specifies the sizes of fields in an order, 1526 // so that the adlc can build the emit functions automagically 1527 1528 // Emit primary opcode 1529 enc_class OpcP %{ 1530 emit_opcode(cbuf, $primary); 1531 %} 1532 1533 // Emit secondary opcode 1534 enc_class OpcS %{ 1535 emit_opcode(cbuf, $secondary); 1536 %} 1537 1538 // Emit opcode directly 1539 enc_class Opcode(immI d8) %{ 1540 emit_opcode(cbuf, $d8$$constant); 1541 %} 1542 1543 enc_class SizePrefix %{ 1544 emit_opcode(cbuf,0x66); 1545 %} 1546 1547 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1548 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1549 %} 1550 1551 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1552 emit_opcode(cbuf,$opcode$$constant); 1553 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1554 %} 1555 1556 enc_class mov_r32_imm0( rRegI dst ) %{ 1557 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1558 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1559 %} 1560 1561 enc_class cdq_enc %{ 1562 // Full implementation of Java idiv and irem; checks for 1563 // special case as described in JVM spec., p.243 & p.271. 1564 // 1565 // normal case special case 1566 // 1567 // input : rax,: dividend min_int 1568 // reg: divisor -1 1569 // 1570 // output: rax,: quotient (= rax, idiv reg) min_int 1571 // rdx: remainder (= rax, irem reg) 0 1572 // 1573 // Code sequnce: 1574 // 1575 // 81 F8 00 00 00 80 cmp rax,80000000h 1576 // 0F 85 0B 00 00 00 jne normal_case 1577 // 33 D2 xor rdx,edx 1578 // 83 F9 FF cmp rcx,0FFh 1579 // 0F 84 03 00 00 00 je done 1580 // normal_case: 1581 // 99 cdq 1582 // F7 F9 idiv rax,ecx 1583 // done: 1584 // 1585 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1586 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1587 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1588 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1589 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1590 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1591 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1592 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1593 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1594 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1595 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1596 // normal_case: 1597 emit_opcode(cbuf,0x99); // cdq 1598 // idiv (note: must be emitted by the user of this rule) 1599 // normal: 1600 %} 1601 1602 // Dense encoding for older common ops 1603 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1604 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1605 %} 1606 1607 1608 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1609 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1610 // Check for 8-bit immediate, and set sign extend bit in opcode 1611 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1612 emit_opcode(cbuf, $primary | 0x02); 1613 } 1614 else { // If 32-bit immediate 1615 emit_opcode(cbuf, $primary); 1616 } 1617 %} 1618 1619 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1620 // Emit primary opcode and set sign-extend bit 1621 // Check for 8-bit immediate, and set sign extend bit in opcode 1622 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1623 emit_opcode(cbuf, $primary | 0x02); } 1624 else { // If 32-bit immediate 1625 emit_opcode(cbuf, $primary); 1626 } 1627 // Emit r/m byte with secondary opcode, after primary opcode. 1628 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1629 %} 1630 1631 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1632 // Check for 8-bit immediate, and set sign extend bit in opcode 1633 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1634 $$$emit8$imm$$constant; 1635 } 1636 else { // If 32-bit immediate 1637 // Output immediate 1638 $$$emit32$imm$$constant; 1639 } 1640 %} 1641 1642 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1643 // Emit primary opcode and set sign-extend bit 1644 // Check for 8-bit immediate, and set sign extend bit in opcode 1645 int con = (int)$imm$$constant; // Throw away top bits 1646 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1647 // Emit r/m byte with secondary opcode, after primary opcode. 1648 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1649 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1650 else emit_d32(cbuf,con); 1651 %} 1652 1653 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1654 // Emit primary opcode and set sign-extend bit 1655 // Check for 8-bit immediate, and set sign extend bit in opcode 1656 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1657 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1658 // Emit r/m byte with tertiary opcode, after primary opcode. 1659 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1660 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1661 else emit_d32(cbuf,con); 1662 %} 1663 1664 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1665 emit_cc(cbuf, $secondary, $dst$$reg ); 1666 %} 1667 1668 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1669 int destlo = $dst$$reg; 1670 int desthi = HIGH_FROM_LOW_ENC(destlo); 1671 // bswap lo 1672 emit_opcode(cbuf, 0x0F); 1673 emit_cc(cbuf, 0xC8, destlo); 1674 // bswap hi 1675 emit_opcode(cbuf, 0x0F); 1676 emit_cc(cbuf, 0xC8, desthi); 1677 // xchg lo and hi 1678 emit_opcode(cbuf, 0x87); 1679 emit_rm(cbuf, 0x3, destlo, desthi); 1680 %} 1681 1682 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1683 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1684 %} 1685 1686 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1687 $$$emit8$primary; 1688 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1689 %} 1690 1691 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1692 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1693 emit_d8(cbuf, op >> 8 ); 1694 emit_d8(cbuf, op & 255); 1695 %} 1696 1697 // emulate a CMOV with a conditional branch around a MOV 1698 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1699 // Invert sense of branch from sense of CMOV 1700 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1701 emit_d8( cbuf, $brOffs$$constant ); 1702 %} 1703 1704 enc_class enc_PartialSubtypeCheck( ) %{ 1705 Register Redi = as_Register(EDI_enc); // result register 1706 Register Reax = as_Register(EAX_enc); // super class 1707 Register Recx = as_Register(ECX_enc); // killed 1708 Register Resi = as_Register(ESI_enc); // sub class 1709 Label miss; 1710 1711 MacroAssembler _masm(&cbuf); 1712 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1713 nullptr, &miss, 1714 /*set_cond_codes:*/ true); 1715 if ($primary) { 1716 __ xorptr(Redi, Redi); 1717 } 1718 __ bind(miss); 1719 %} 1720 1721 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1722 MacroAssembler masm(&cbuf); 1723 int start = masm.offset(); 1724 if (UseSSE >= 2) { 1725 if (VerifyFPU) { 1726 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1727 } 1728 } else { 1729 // External c_calling_convention expects the FPU stack to be 'clean'. 1730 // Compiled code leaves it dirty. Do cleanup now. 1731 masm.empty_FPU_stack(); 1732 } 1733 if (sizeof_FFree_Float_Stack_All == -1) { 1734 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1735 } else { 1736 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1737 } 1738 %} 1739 1740 enc_class Verify_FPU_For_Leaf %{ 1741 if( VerifyFPU ) { 1742 MacroAssembler masm(&cbuf); 1743 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1744 } 1745 %} 1746 1747 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1748 // This is the instruction starting address for relocation info. 1749 MacroAssembler _masm(&cbuf); 1750 cbuf.set_insts_mark(); 1751 $$$emit8$primary; 1752 // CALL directly to the runtime 1753 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1754 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1755 __ post_call_nop(); 1756 1757 if (UseSSE >= 2) { 1758 MacroAssembler _masm(&cbuf); 1759 BasicType rt = tf()->return_type(); 1760 1761 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1762 // A C runtime call where the return value is unused. In SSE2+ 1763 // mode the result needs to be removed from the FPU stack. It's 1764 // likely that this function call could be removed by the 1765 // optimizer if the C function is a pure function. 1766 __ ffree(0); 1767 } else if (rt == T_FLOAT) { 1768 __ lea(rsp, Address(rsp, -4)); 1769 __ fstp_s(Address(rsp, 0)); 1770 __ movflt(xmm0, Address(rsp, 0)); 1771 __ lea(rsp, Address(rsp, 4)); 1772 } else if (rt == T_DOUBLE) { 1773 __ lea(rsp, Address(rsp, -8)); 1774 __ fstp_d(Address(rsp, 0)); 1775 __ movdbl(xmm0, Address(rsp, 0)); 1776 __ lea(rsp, Address(rsp, 8)); 1777 } 1778 } 1779 %} 1780 1781 enc_class pre_call_resets %{ 1782 // If method sets FPU control word restore it here 1783 debug_only(int off0 = cbuf.insts_size()); 1784 if (ra_->C->in_24_bit_fp_mode()) { 1785 MacroAssembler _masm(&cbuf); 1786 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1787 } 1788 // Clear upper bits of YMM registers when current compiled code uses 1789 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1790 MacroAssembler _masm(&cbuf); 1791 __ vzeroupper(); 1792 debug_only(int off1 = cbuf.insts_size()); 1793 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1794 %} 1795 1796 enc_class post_call_FPU %{ 1797 // If method sets FPU control word do it here also 1798 if (Compile::current()->in_24_bit_fp_mode()) { 1799 MacroAssembler masm(&cbuf); 1800 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1801 } 1802 %} 1803 1804 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1805 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1806 // who we intended to call. 1807 MacroAssembler _masm(&cbuf); 1808 cbuf.set_insts_mark(); 1809 $$$emit8$primary; 1810 1811 if (!_method) { 1812 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1813 runtime_call_Relocation::spec(), 1814 RELOC_IMM32); 1815 __ post_call_nop(); 1816 } else { 1817 int method_index = resolved_method_index(cbuf); 1818 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1819 : static_call_Relocation::spec(method_index); 1820 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1821 rspec, RELOC_DISP32); 1822 __ post_call_nop(); 1823 address mark = cbuf.insts_mark(); 1824 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1825 // Calls of the same statically bound method can share 1826 // a stub to the interpreter. 1827 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off()); 1828 } else { 1829 // Emit stubs for static call. 1830 address stub = CompiledDirectCall::emit_to_interp_stub(cbuf, mark); 1831 if (stub == nullptr) { 1832 ciEnv::current()->record_failure("CodeCache is full"); 1833 return; 1834 } 1835 } 1836 } 1837 %} 1838 1839 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1840 MacroAssembler _masm(&cbuf); 1841 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1842 __ post_call_nop(); 1843 %} 1844 1845 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1846 int disp = in_bytes(Method::from_compiled_offset()); 1847 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1848 1849 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1850 MacroAssembler _masm(&cbuf); 1851 cbuf.set_insts_mark(); 1852 $$$emit8$primary; 1853 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1854 emit_d8(cbuf, disp); // Displacement 1855 __ post_call_nop(); 1856 %} 1857 1858 // Following encoding is no longer used, but may be restored if calling 1859 // convention changes significantly. 1860 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1861 // 1862 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1863 // // int ic_reg = Matcher::inline_cache_reg(); 1864 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1865 // // int imo_reg = Matcher::interpreter_method_reg(); 1866 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1867 // 1868 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1869 // // // so we load it immediately before the call 1870 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1871 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1872 // 1873 // // xor rbp,ebp 1874 // emit_opcode(cbuf, 0x33); 1875 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1876 // 1877 // // CALL to interpreter. 1878 // cbuf.set_insts_mark(); 1879 // $$$emit8$primary; 1880 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1881 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1882 // %} 1883 1884 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1885 $$$emit8$primary; 1886 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1887 $$$emit8$shift$$constant; 1888 %} 1889 1890 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1891 // Load immediate does not have a zero or sign extended version 1892 // for 8-bit immediates 1893 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1894 $$$emit32$src$$constant; 1895 %} 1896 1897 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1898 // Load immediate does not have a zero or sign extended version 1899 // for 8-bit immediates 1900 emit_opcode(cbuf, $primary + $dst$$reg); 1901 $$$emit32$src$$constant; 1902 %} 1903 1904 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1905 // Load immediate does not have a zero or sign extended version 1906 // for 8-bit immediates 1907 int dst_enc = $dst$$reg; 1908 int src_con = $src$$constant & 0x0FFFFFFFFL; 1909 if (src_con == 0) { 1910 // xor dst, dst 1911 emit_opcode(cbuf, 0x33); 1912 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1913 } else { 1914 emit_opcode(cbuf, $primary + dst_enc); 1915 emit_d32(cbuf, src_con); 1916 } 1917 %} 1918 1919 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1920 // Load immediate does not have a zero or sign extended version 1921 // for 8-bit immediates 1922 int dst_enc = $dst$$reg + 2; 1923 int src_con = ((julong)($src$$constant)) >> 32; 1924 if (src_con == 0) { 1925 // xor dst, dst 1926 emit_opcode(cbuf, 0x33); 1927 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1928 } else { 1929 emit_opcode(cbuf, $primary + dst_enc); 1930 emit_d32(cbuf, src_con); 1931 } 1932 %} 1933 1934 1935 // Encode a reg-reg copy. If it is useless, then empty encoding. 1936 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1937 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1938 %} 1939 1940 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1941 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1942 %} 1943 1944 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1945 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1946 %} 1947 1948 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1949 $$$emit8$primary; 1950 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1951 %} 1952 1953 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1954 $$$emit8$secondary; 1955 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1956 %} 1957 1958 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1959 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1960 %} 1961 1962 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1963 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1964 %} 1965 1966 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1967 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1968 %} 1969 1970 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1971 // Output immediate 1972 $$$emit32$src$$constant; 1973 %} 1974 1975 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1976 // Output Float immediate bits 1977 jfloat jf = $src$$constant; 1978 int jf_as_bits = jint_cast( jf ); 1979 emit_d32(cbuf, jf_as_bits); 1980 %} 1981 1982 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1983 // Output Float immediate bits 1984 jfloat jf = $src$$constant; 1985 int jf_as_bits = jint_cast( jf ); 1986 emit_d32(cbuf, jf_as_bits); 1987 %} 1988 1989 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1990 // Output immediate 1991 $$$emit16$src$$constant; 1992 %} 1993 1994 enc_class Con_d32(immI src) %{ 1995 emit_d32(cbuf,$src$$constant); 1996 %} 1997 1998 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1999 // Output immediate memory reference 2000 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2001 emit_d32(cbuf, 0x00); 2002 %} 2003 2004 enc_class lock_prefix( ) %{ 2005 emit_opcode(cbuf,0xF0); // [Lock] 2006 %} 2007 2008 // Cmp-xchg long value. 2009 // Note: we need to swap rbx, and rcx before and after the 2010 // cmpxchg8 instruction because the instruction uses 2011 // rcx as the high order word of the new value to store but 2012 // our register encoding uses rbx,. 2013 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2014 2015 // XCHG rbx,ecx 2016 emit_opcode(cbuf,0x87); 2017 emit_opcode(cbuf,0xD9); 2018 // [Lock] 2019 emit_opcode(cbuf,0xF0); 2020 // CMPXCHG8 [Eptr] 2021 emit_opcode(cbuf,0x0F); 2022 emit_opcode(cbuf,0xC7); 2023 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2024 // XCHG rbx,ecx 2025 emit_opcode(cbuf,0x87); 2026 emit_opcode(cbuf,0xD9); 2027 %} 2028 2029 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2030 // [Lock] 2031 emit_opcode(cbuf,0xF0); 2032 2033 // CMPXCHG [Eptr] 2034 emit_opcode(cbuf,0x0F); 2035 emit_opcode(cbuf,0xB1); 2036 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2037 %} 2038 2039 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2040 // [Lock] 2041 emit_opcode(cbuf,0xF0); 2042 2043 // CMPXCHGB [Eptr] 2044 emit_opcode(cbuf,0x0F); 2045 emit_opcode(cbuf,0xB0); 2046 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2047 %} 2048 2049 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2050 // [Lock] 2051 emit_opcode(cbuf,0xF0); 2052 2053 // 16-bit mode 2054 emit_opcode(cbuf, 0x66); 2055 2056 // CMPXCHGW [Eptr] 2057 emit_opcode(cbuf,0x0F); 2058 emit_opcode(cbuf,0xB1); 2059 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2060 %} 2061 2062 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2063 int res_encoding = $res$$reg; 2064 2065 // MOV res,0 2066 emit_opcode( cbuf, 0xB8 + res_encoding); 2067 emit_d32( cbuf, 0 ); 2068 // JNE,s fail 2069 emit_opcode(cbuf,0x75); 2070 emit_d8(cbuf, 5 ); 2071 // MOV res,1 2072 emit_opcode( cbuf, 0xB8 + res_encoding); 2073 emit_d32( cbuf, 1 ); 2074 // fail: 2075 %} 2076 2077 enc_class set_instruction_start( ) %{ 2078 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2079 %} 2080 2081 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2082 int reg_encoding = $ereg$$reg; 2083 int base = $mem$$base; 2084 int index = $mem$$index; 2085 int scale = $mem$$scale; 2086 int displace = $mem$$disp; 2087 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2088 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2089 %} 2090 2091 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2092 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2093 int base = $mem$$base; 2094 int index = $mem$$index; 2095 int scale = $mem$$scale; 2096 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2097 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2098 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2099 %} 2100 2101 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2102 int r1, r2; 2103 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2104 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2105 emit_opcode(cbuf,0x0F); 2106 emit_opcode(cbuf,$tertiary); 2107 emit_rm(cbuf, 0x3, r1, r2); 2108 emit_d8(cbuf,$cnt$$constant); 2109 emit_d8(cbuf,$primary); 2110 emit_rm(cbuf, 0x3, $secondary, r1); 2111 emit_d8(cbuf,$cnt$$constant); 2112 %} 2113 2114 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2115 emit_opcode( cbuf, 0x8B ); // Move 2116 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2117 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2118 emit_d8(cbuf,$primary); 2119 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2120 emit_d8(cbuf,$cnt$$constant-32); 2121 } 2122 emit_d8(cbuf,$primary); 2123 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2124 emit_d8(cbuf,31); 2125 %} 2126 2127 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2128 int r1, r2; 2129 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2130 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2131 2132 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2133 emit_rm(cbuf, 0x3, r1, r2); 2134 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2135 emit_opcode(cbuf,$primary); 2136 emit_rm(cbuf, 0x3, $secondary, r1); 2137 emit_d8(cbuf,$cnt$$constant-32); 2138 } 2139 emit_opcode(cbuf,0x33); // XOR r2,r2 2140 emit_rm(cbuf, 0x3, r2, r2); 2141 %} 2142 2143 // Clone of RegMem but accepts an extra parameter to access each 2144 // half of a double in memory; it never needs relocation info. 2145 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2146 emit_opcode(cbuf,$opcode$$constant); 2147 int reg_encoding = $rm_reg$$reg; 2148 int base = $mem$$base; 2149 int index = $mem$$index; 2150 int scale = $mem$$scale; 2151 int displace = $mem$$disp + $disp_for_half$$constant; 2152 relocInfo::relocType disp_reloc = relocInfo::none; 2153 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2154 %} 2155 2156 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2157 // 2158 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2159 // and it never needs relocation information. 2160 // Frequently used to move data between FPU's Stack Top and memory. 2161 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2162 int rm_byte_opcode = $rm_opcode$$constant; 2163 int base = $mem$$base; 2164 int index = $mem$$index; 2165 int scale = $mem$$scale; 2166 int displace = $mem$$disp; 2167 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2168 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2169 %} 2170 2171 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2172 int rm_byte_opcode = $rm_opcode$$constant; 2173 int base = $mem$$base; 2174 int index = $mem$$index; 2175 int scale = $mem$$scale; 2176 int displace = $mem$$disp; 2177 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2178 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2179 %} 2180 2181 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2182 int reg_encoding = $dst$$reg; 2183 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2184 int index = 0x04; // 0x04 indicates no index 2185 int scale = 0x00; // 0x00 indicates no scale 2186 int displace = $src1$$constant; // 0x00 indicates no displacement 2187 relocInfo::relocType disp_reloc = relocInfo::none; 2188 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2189 %} 2190 2191 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2192 // Compare dst,src 2193 emit_opcode(cbuf,0x3B); 2194 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2195 // jmp dst < src around move 2196 emit_opcode(cbuf,0x7C); 2197 emit_d8(cbuf,2); 2198 // move dst,src 2199 emit_opcode(cbuf,0x8B); 2200 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2201 %} 2202 2203 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2204 // Compare dst,src 2205 emit_opcode(cbuf,0x3B); 2206 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2207 // jmp dst > src around move 2208 emit_opcode(cbuf,0x7F); 2209 emit_d8(cbuf,2); 2210 // move dst,src 2211 emit_opcode(cbuf,0x8B); 2212 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2213 %} 2214 2215 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2216 // If src is FPR1, we can just FST to store it. 2217 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2218 int reg_encoding = 0x2; // Just store 2219 int base = $mem$$base; 2220 int index = $mem$$index; 2221 int scale = $mem$$scale; 2222 int displace = $mem$$disp; 2223 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2224 if( $src$$reg != FPR1L_enc ) { 2225 reg_encoding = 0x3; // Store & pop 2226 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2227 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2228 } 2229 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2230 emit_opcode(cbuf,$primary); 2231 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2232 %} 2233 2234 enc_class neg_reg(rRegI dst) %{ 2235 // NEG $dst 2236 emit_opcode(cbuf,0xF7); 2237 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2238 %} 2239 2240 enc_class setLT_reg(eCXRegI dst) %{ 2241 // SETLT $dst 2242 emit_opcode(cbuf,0x0F); 2243 emit_opcode(cbuf,0x9C); 2244 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2245 %} 2246 2247 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2248 int tmpReg = $tmp$$reg; 2249 2250 // SUB $p,$q 2251 emit_opcode(cbuf,0x2B); 2252 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2253 // SBB $tmp,$tmp 2254 emit_opcode(cbuf,0x1B); 2255 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2256 // AND $tmp,$y 2257 emit_opcode(cbuf,0x23); 2258 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2259 // ADD $p,$tmp 2260 emit_opcode(cbuf,0x03); 2261 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2262 %} 2263 2264 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2265 // TEST shift,32 2266 emit_opcode(cbuf,0xF7); 2267 emit_rm(cbuf, 0x3, 0, ECX_enc); 2268 emit_d32(cbuf,0x20); 2269 // JEQ,s small 2270 emit_opcode(cbuf, 0x74); 2271 emit_d8(cbuf, 0x04); 2272 // MOV $dst.hi,$dst.lo 2273 emit_opcode( cbuf, 0x8B ); 2274 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2275 // CLR $dst.lo 2276 emit_opcode(cbuf, 0x33); 2277 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2278 // small: 2279 // SHLD $dst.hi,$dst.lo,$shift 2280 emit_opcode(cbuf,0x0F); 2281 emit_opcode(cbuf,0xA5); 2282 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2283 // SHL $dst.lo,$shift" 2284 emit_opcode(cbuf,0xD3); 2285 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2286 %} 2287 2288 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2289 // TEST shift,32 2290 emit_opcode(cbuf,0xF7); 2291 emit_rm(cbuf, 0x3, 0, ECX_enc); 2292 emit_d32(cbuf,0x20); 2293 // JEQ,s small 2294 emit_opcode(cbuf, 0x74); 2295 emit_d8(cbuf, 0x04); 2296 // MOV $dst.lo,$dst.hi 2297 emit_opcode( cbuf, 0x8B ); 2298 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2299 // CLR $dst.hi 2300 emit_opcode(cbuf, 0x33); 2301 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2302 // small: 2303 // SHRD $dst.lo,$dst.hi,$shift 2304 emit_opcode(cbuf,0x0F); 2305 emit_opcode(cbuf,0xAD); 2306 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2307 // SHR $dst.hi,$shift" 2308 emit_opcode(cbuf,0xD3); 2309 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2310 %} 2311 2312 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2313 // TEST shift,32 2314 emit_opcode(cbuf,0xF7); 2315 emit_rm(cbuf, 0x3, 0, ECX_enc); 2316 emit_d32(cbuf,0x20); 2317 // JEQ,s small 2318 emit_opcode(cbuf, 0x74); 2319 emit_d8(cbuf, 0x05); 2320 // MOV $dst.lo,$dst.hi 2321 emit_opcode( cbuf, 0x8B ); 2322 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2323 // SAR $dst.hi,31 2324 emit_opcode(cbuf, 0xC1); 2325 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2326 emit_d8(cbuf, 0x1F ); 2327 // small: 2328 // SHRD $dst.lo,$dst.hi,$shift 2329 emit_opcode(cbuf,0x0F); 2330 emit_opcode(cbuf,0xAD); 2331 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2332 // SAR $dst.hi,$shift" 2333 emit_opcode(cbuf,0xD3); 2334 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2335 %} 2336 2337 2338 // ----------------- Encodings for floating point unit ----------------- 2339 // May leave result in FPU-TOS or FPU reg depending on opcodes 2340 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2341 $$$emit8$primary; 2342 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2343 %} 2344 2345 // Pop argument in FPR0 with FSTP ST(0) 2346 enc_class PopFPU() %{ 2347 emit_opcode( cbuf, 0xDD ); 2348 emit_d8( cbuf, 0xD8 ); 2349 %} 2350 2351 // !!!!! equivalent to Pop_Reg_F 2352 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2353 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2354 emit_d8( cbuf, 0xD8+$dst$$reg ); 2355 %} 2356 2357 enc_class Push_Reg_DPR( regDPR dst ) %{ 2358 emit_opcode( cbuf, 0xD9 ); 2359 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2360 %} 2361 2362 enc_class strictfp_bias1( regDPR dst ) %{ 2363 emit_opcode( cbuf, 0xDB ); // FLD m80real 2364 emit_opcode( cbuf, 0x2D ); 2365 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2366 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2367 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2368 %} 2369 2370 enc_class strictfp_bias2( regDPR dst ) %{ 2371 emit_opcode( cbuf, 0xDB ); // FLD m80real 2372 emit_opcode( cbuf, 0x2D ); 2373 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2374 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2375 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2376 %} 2377 2378 // Special case for moving an integer register to a stack slot. 2379 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2380 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2381 %} 2382 2383 // Special case for moving a register to a stack slot. 2384 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2385 // Opcode already emitted 2386 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2387 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2388 emit_d32(cbuf, $dst$$disp); // Displacement 2389 %} 2390 2391 // Push the integer in stackSlot 'src' onto FP-stack 2392 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2393 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2394 %} 2395 2396 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2397 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2398 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2399 %} 2400 2401 // Same as Pop_Mem_F except for opcode 2402 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2403 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2404 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2405 %} 2406 2407 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2408 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2409 emit_d8( cbuf, 0xD8+$dst$$reg ); 2410 %} 2411 2412 enc_class Push_Reg_FPR( regFPR dst ) %{ 2413 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2414 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2415 %} 2416 2417 // Push FPU's float to a stack-slot, and pop FPU-stack 2418 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2419 int pop = 0x02; 2420 if ($src$$reg != FPR1L_enc) { 2421 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2422 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2423 pop = 0x03; 2424 } 2425 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2426 %} 2427 2428 // Push FPU's double to a stack-slot, and pop FPU-stack 2429 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2430 int pop = 0x02; 2431 if ($src$$reg != FPR1L_enc) { 2432 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2433 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2434 pop = 0x03; 2435 } 2436 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2437 %} 2438 2439 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2440 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2441 int pop = 0xD0 - 1; // -1 since we skip FLD 2442 if ($src$$reg != FPR1L_enc) { 2443 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2444 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2445 pop = 0xD8; 2446 } 2447 emit_opcode( cbuf, 0xDD ); 2448 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2449 %} 2450 2451 2452 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2453 // load dst in FPR0 2454 emit_opcode( cbuf, 0xD9 ); 2455 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2456 if ($src$$reg != FPR1L_enc) { 2457 // fincstp 2458 emit_opcode (cbuf, 0xD9); 2459 emit_opcode (cbuf, 0xF7); 2460 // swap src with FPR1: 2461 // FXCH FPR1 with src 2462 emit_opcode(cbuf, 0xD9); 2463 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2464 // fdecstp 2465 emit_opcode (cbuf, 0xD9); 2466 emit_opcode (cbuf, 0xF6); 2467 } 2468 %} 2469 2470 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2471 MacroAssembler _masm(&cbuf); 2472 __ subptr(rsp, 8); 2473 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2474 __ fld_d(Address(rsp, 0)); 2475 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2476 __ fld_d(Address(rsp, 0)); 2477 %} 2478 2479 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2480 MacroAssembler _masm(&cbuf); 2481 __ subptr(rsp, 4); 2482 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2483 __ fld_s(Address(rsp, 0)); 2484 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2485 __ fld_s(Address(rsp, 0)); 2486 %} 2487 2488 enc_class Push_ResultD(regD dst) %{ 2489 MacroAssembler _masm(&cbuf); 2490 __ fstp_d(Address(rsp, 0)); 2491 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2492 __ addptr(rsp, 8); 2493 %} 2494 2495 enc_class Push_ResultF(regF dst, immI d8) %{ 2496 MacroAssembler _masm(&cbuf); 2497 __ fstp_s(Address(rsp, 0)); 2498 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2499 __ addptr(rsp, $d8$$constant); 2500 %} 2501 2502 enc_class Push_SrcD(regD src) %{ 2503 MacroAssembler _masm(&cbuf); 2504 __ subptr(rsp, 8); 2505 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2506 __ fld_d(Address(rsp, 0)); 2507 %} 2508 2509 enc_class push_stack_temp_qword() %{ 2510 MacroAssembler _masm(&cbuf); 2511 __ subptr(rsp, 8); 2512 %} 2513 2514 enc_class pop_stack_temp_qword() %{ 2515 MacroAssembler _masm(&cbuf); 2516 __ addptr(rsp, 8); 2517 %} 2518 2519 enc_class push_xmm_to_fpr1(regD src) %{ 2520 MacroAssembler _masm(&cbuf); 2521 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2522 __ fld_d(Address(rsp, 0)); 2523 %} 2524 2525 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2526 if ($src$$reg != FPR1L_enc) { 2527 // fincstp 2528 emit_opcode (cbuf, 0xD9); 2529 emit_opcode (cbuf, 0xF7); 2530 // FXCH FPR1 with src 2531 emit_opcode(cbuf, 0xD9); 2532 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2533 // fdecstp 2534 emit_opcode (cbuf, 0xD9); 2535 emit_opcode (cbuf, 0xF6); 2536 } 2537 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2538 // // FSTP FPR$dst$$reg 2539 // emit_opcode( cbuf, 0xDD ); 2540 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2541 %} 2542 2543 enc_class fnstsw_sahf_skip_parity() %{ 2544 // fnstsw ax 2545 emit_opcode( cbuf, 0xDF ); 2546 emit_opcode( cbuf, 0xE0 ); 2547 // sahf 2548 emit_opcode( cbuf, 0x9E ); 2549 // jnp ::skip 2550 emit_opcode( cbuf, 0x7B ); 2551 emit_opcode( cbuf, 0x05 ); 2552 %} 2553 2554 enc_class emitModDPR() %{ 2555 // fprem must be iterative 2556 // :: loop 2557 // fprem 2558 emit_opcode( cbuf, 0xD9 ); 2559 emit_opcode( cbuf, 0xF8 ); 2560 // wait 2561 emit_opcode( cbuf, 0x9b ); 2562 // fnstsw ax 2563 emit_opcode( cbuf, 0xDF ); 2564 emit_opcode( cbuf, 0xE0 ); 2565 // sahf 2566 emit_opcode( cbuf, 0x9E ); 2567 // jp ::loop 2568 emit_opcode( cbuf, 0x0F ); 2569 emit_opcode( cbuf, 0x8A ); 2570 emit_opcode( cbuf, 0xF4 ); 2571 emit_opcode( cbuf, 0xFF ); 2572 emit_opcode( cbuf, 0xFF ); 2573 emit_opcode( cbuf, 0xFF ); 2574 %} 2575 2576 enc_class fpu_flags() %{ 2577 // fnstsw_ax 2578 emit_opcode( cbuf, 0xDF); 2579 emit_opcode( cbuf, 0xE0); 2580 // test ax,0x0400 2581 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2582 emit_opcode( cbuf, 0xA9 ); 2583 emit_d16 ( cbuf, 0x0400 ); 2584 // // // This sequence works, but stalls for 12-16 cycles on PPro 2585 // // test rax,0x0400 2586 // emit_opcode( cbuf, 0xA9 ); 2587 // emit_d32 ( cbuf, 0x00000400 ); 2588 // 2589 // jz exit (no unordered comparison) 2590 emit_opcode( cbuf, 0x74 ); 2591 emit_d8 ( cbuf, 0x02 ); 2592 // mov ah,1 - treat as LT case (set carry flag) 2593 emit_opcode( cbuf, 0xB4 ); 2594 emit_d8 ( cbuf, 0x01 ); 2595 // sahf 2596 emit_opcode( cbuf, 0x9E); 2597 %} 2598 2599 enc_class cmpF_P6_fixup() %{ 2600 // Fixup the integer flags in case comparison involved a NaN 2601 // 2602 // JNP exit (no unordered comparison, P-flag is set by NaN) 2603 emit_opcode( cbuf, 0x7B ); 2604 emit_d8 ( cbuf, 0x03 ); 2605 // MOV AH,1 - treat as LT case (set carry flag) 2606 emit_opcode( cbuf, 0xB4 ); 2607 emit_d8 ( cbuf, 0x01 ); 2608 // SAHF 2609 emit_opcode( cbuf, 0x9E); 2610 // NOP // target for branch to avoid branch to branch 2611 emit_opcode( cbuf, 0x90); 2612 %} 2613 2614 // fnstsw_ax(); 2615 // sahf(); 2616 // movl(dst, nan_result); 2617 // jcc(Assembler::parity, exit); 2618 // movl(dst, less_result); 2619 // jcc(Assembler::below, exit); 2620 // movl(dst, equal_result); 2621 // jcc(Assembler::equal, exit); 2622 // movl(dst, greater_result); 2623 2624 // less_result = 1; 2625 // greater_result = -1; 2626 // equal_result = 0; 2627 // nan_result = -1; 2628 2629 enc_class CmpF_Result(rRegI dst) %{ 2630 // fnstsw_ax(); 2631 emit_opcode( cbuf, 0xDF); 2632 emit_opcode( cbuf, 0xE0); 2633 // sahf 2634 emit_opcode( cbuf, 0x9E); 2635 // movl(dst, nan_result); 2636 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2637 emit_d32( cbuf, -1 ); 2638 // jcc(Assembler::parity, exit); 2639 emit_opcode( cbuf, 0x7A ); 2640 emit_d8 ( cbuf, 0x13 ); 2641 // movl(dst, less_result); 2642 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2643 emit_d32( cbuf, -1 ); 2644 // jcc(Assembler::below, exit); 2645 emit_opcode( cbuf, 0x72 ); 2646 emit_d8 ( cbuf, 0x0C ); 2647 // movl(dst, equal_result); 2648 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2649 emit_d32( cbuf, 0 ); 2650 // jcc(Assembler::equal, exit); 2651 emit_opcode( cbuf, 0x74 ); 2652 emit_d8 ( cbuf, 0x05 ); 2653 // movl(dst, greater_result); 2654 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2655 emit_d32( cbuf, 1 ); 2656 %} 2657 2658 2659 // Compare the longs and set flags 2660 // BROKEN! Do Not use as-is 2661 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2662 // CMP $src1.hi,$src2.hi 2663 emit_opcode( cbuf, 0x3B ); 2664 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2665 // JNE,s done 2666 emit_opcode(cbuf,0x75); 2667 emit_d8(cbuf, 2 ); 2668 // CMP $src1.lo,$src2.lo 2669 emit_opcode( cbuf, 0x3B ); 2670 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2671 // done: 2672 %} 2673 2674 enc_class convert_int_long( regL dst, rRegI src ) %{ 2675 // mov $dst.lo,$src 2676 int dst_encoding = $dst$$reg; 2677 int src_encoding = $src$$reg; 2678 encode_Copy( cbuf, dst_encoding , src_encoding ); 2679 // mov $dst.hi,$src 2680 encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2681 // sar $dst.hi,31 2682 emit_opcode( cbuf, 0xC1 ); 2683 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2684 emit_d8(cbuf, 0x1F ); 2685 %} 2686 2687 enc_class convert_long_double( eRegL src ) %{ 2688 // push $src.hi 2689 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2690 // push $src.lo 2691 emit_opcode(cbuf, 0x50+$src$$reg ); 2692 // fild 64-bits at [SP] 2693 emit_opcode(cbuf,0xdf); 2694 emit_d8(cbuf, 0x6C); 2695 emit_d8(cbuf, 0x24); 2696 emit_d8(cbuf, 0x00); 2697 // pop stack 2698 emit_opcode(cbuf, 0x83); // add SP, #8 2699 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2700 emit_d8(cbuf, 0x8); 2701 %} 2702 2703 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2704 // IMUL EDX:EAX,$src1 2705 emit_opcode( cbuf, 0xF7 ); 2706 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2707 // SAR EDX,$cnt-32 2708 int shift_count = ((int)$cnt$$constant) - 32; 2709 if (shift_count > 0) { 2710 emit_opcode(cbuf, 0xC1); 2711 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2712 emit_d8(cbuf, shift_count); 2713 } 2714 %} 2715 2716 // this version doesn't have add sp, 8 2717 enc_class convert_long_double2( eRegL src ) %{ 2718 // push $src.hi 2719 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2720 // push $src.lo 2721 emit_opcode(cbuf, 0x50+$src$$reg ); 2722 // fild 64-bits at [SP] 2723 emit_opcode(cbuf,0xdf); 2724 emit_d8(cbuf, 0x6C); 2725 emit_d8(cbuf, 0x24); 2726 emit_d8(cbuf, 0x00); 2727 %} 2728 2729 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2730 // Basic idea: long = (long)int * (long)int 2731 // IMUL EDX:EAX, src 2732 emit_opcode( cbuf, 0xF7 ); 2733 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2734 %} 2735 2736 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2737 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2738 // MUL EDX:EAX, src 2739 emit_opcode( cbuf, 0xF7 ); 2740 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2741 %} 2742 2743 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2744 // Basic idea: lo(result) = lo(x_lo * y_lo) 2745 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2746 // MOV $tmp,$src.lo 2747 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2748 // IMUL $tmp,EDX 2749 emit_opcode( cbuf, 0x0F ); 2750 emit_opcode( cbuf, 0xAF ); 2751 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2752 // MOV EDX,$src.hi 2753 encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2754 // IMUL EDX,EAX 2755 emit_opcode( cbuf, 0x0F ); 2756 emit_opcode( cbuf, 0xAF ); 2757 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2758 // ADD $tmp,EDX 2759 emit_opcode( cbuf, 0x03 ); 2760 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2761 // MUL EDX:EAX,$src.lo 2762 emit_opcode( cbuf, 0xF7 ); 2763 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2764 // ADD EDX,ESI 2765 emit_opcode( cbuf, 0x03 ); 2766 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2767 %} 2768 2769 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2770 // Basic idea: lo(result) = lo(src * y_lo) 2771 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2772 // IMUL $tmp,EDX,$src 2773 emit_opcode( cbuf, 0x6B ); 2774 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2775 emit_d8( cbuf, (int)$src$$constant ); 2776 // MOV EDX,$src 2777 emit_opcode(cbuf, 0xB8 + EDX_enc); 2778 emit_d32( cbuf, (int)$src$$constant ); 2779 // MUL EDX:EAX,EDX 2780 emit_opcode( cbuf, 0xF7 ); 2781 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2782 // ADD EDX,ESI 2783 emit_opcode( cbuf, 0x03 ); 2784 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2785 %} 2786 2787 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2788 // PUSH src1.hi 2789 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2790 // PUSH src1.lo 2791 emit_opcode(cbuf, 0x50+$src1$$reg ); 2792 // PUSH src2.hi 2793 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2794 // PUSH src2.lo 2795 emit_opcode(cbuf, 0x50+$src2$$reg ); 2796 // CALL directly to the runtime 2797 MacroAssembler _masm(&cbuf); 2798 cbuf.set_insts_mark(); 2799 emit_opcode(cbuf,0xE8); // Call into runtime 2800 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2801 __ post_call_nop(); 2802 // Restore stack 2803 emit_opcode(cbuf, 0x83); // add SP, #framesize 2804 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2805 emit_d8(cbuf, 4*4); 2806 %} 2807 2808 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2809 // PUSH src1.hi 2810 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2811 // PUSH src1.lo 2812 emit_opcode(cbuf, 0x50+$src1$$reg ); 2813 // PUSH src2.hi 2814 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2815 // PUSH src2.lo 2816 emit_opcode(cbuf, 0x50+$src2$$reg ); 2817 // CALL directly to the runtime 2818 MacroAssembler _masm(&cbuf); 2819 cbuf.set_insts_mark(); 2820 emit_opcode(cbuf,0xE8); // Call into runtime 2821 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2822 __ post_call_nop(); 2823 // Restore stack 2824 emit_opcode(cbuf, 0x83); // add SP, #framesize 2825 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2826 emit_d8(cbuf, 4*4); 2827 %} 2828 2829 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2830 // MOV $tmp,$src.lo 2831 emit_opcode(cbuf, 0x8B); 2832 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2833 // OR $tmp,$src.hi 2834 emit_opcode(cbuf, 0x0B); 2835 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2836 %} 2837 2838 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2839 // CMP $src1.lo,$src2.lo 2840 emit_opcode( cbuf, 0x3B ); 2841 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2842 // JNE,s skip 2843 emit_cc(cbuf, 0x70, 0x5); 2844 emit_d8(cbuf,2); 2845 // CMP $src1.hi,$src2.hi 2846 emit_opcode( cbuf, 0x3B ); 2847 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2848 %} 2849 2850 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2851 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2852 emit_opcode( cbuf, 0x3B ); 2853 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2854 // MOV $tmp,$src1.hi 2855 emit_opcode( cbuf, 0x8B ); 2856 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2857 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2858 emit_opcode( cbuf, 0x1B ); 2859 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2860 %} 2861 2862 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2863 // XOR $tmp,$tmp 2864 emit_opcode(cbuf,0x33); // XOR 2865 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2866 // CMP $tmp,$src.lo 2867 emit_opcode( cbuf, 0x3B ); 2868 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2869 // SBB $tmp,$src.hi 2870 emit_opcode( cbuf, 0x1B ); 2871 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2872 %} 2873 2874 // Sniff, sniff... smells like Gnu Superoptimizer 2875 enc_class neg_long( eRegL dst ) %{ 2876 emit_opcode(cbuf,0xF7); // NEG hi 2877 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2878 emit_opcode(cbuf,0xF7); // NEG lo 2879 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2880 emit_opcode(cbuf,0x83); // SBB hi,0 2881 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2882 emit_d8 (cbuf,0 ); 2883 %} 2884 2885 enc_class enc_pop_rdx() %{ 2886 emit_opcode(cbuf,0x5A); 2887 %} 2888 2889 enc_class enc_rethrow() %{ 2890 MacroAssembler _masm(&cbuf); 2891 cbuf.set_insts_mark(); 2892 emit_opcode(cbuf, 0xE9); // jmp entry 2893 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2894 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2895 __ post_call_nop(); 2896 %} 2897 2898 2899 // Convert a double to an int. Java semantics require we do complex 2900 // manglelations in the corner cases. So we set the rounding mode to 2901 // 'zero', store the darned double down as an int, and reset the 2902 // rounding mode to 'nearest'. The hardware throws an exception which 2903 // patches up the correct value directly to the stack. 2904 enc_class DPR2I_encoding( regDPR src ) %{ 2905 // Flip to round-to-zero mode. We attempted to allow invalid-op 2906 // exceptions here, so that a NAN or other corner-case value will 2907 // thrown an exception (but normal values get converted at full speed). 2908 // However, I2C adapters and other float-stack manglers leave pending 2909 // invalid-op exceptions hanging. We would have to clear them before 2910 // enabling them and that is more expensive than just testing for the 2911 // invalid value Intel stores down in the corner cases. 2912 emit_opcode(cbuf,0xD9); // FLDCW trunc 2913 emit_opcode(cbuf,0x2D); 2914 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2915 // Allocate a word 2916 emit_opcode(cbuf,0x83); // SUB ESP,4 2917 emit_opcode(cbuf,0xEC); 2918 emit_d8(cbuf,0x04); 2919 // Encoding assumes a double has been pushed into FPR0. 2920 // Store down the double as an int, popping the FPU stack 2921 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2922 emit_opcode(cbuf,0x1C); 2923 emit_d8(cbuf,0x24); 2924 // Restore the rounding mode; mask the exception 2925 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2926 emit_opcode(cbuf,0x2D); 2927 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2928 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2929 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2930 2931 // Load the converted int; adjust CPU stack 2932 emit_opcode(cbuf,0x58); // POP EAX 2933 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2934 emit_d32 (cbuf,0x80000000); // 0x80000000 2935 emit_opcode(cbuf,0x75); // JNE around_slow_call 2936 emit_d8 (cbuf,0x07); // Size of slow_call 2937 // Push src onto stack slow-path 2938 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2939 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2940 // CALL directly to the runtime 2941 MacroAssembler _masm(&cbuf); 2942 cbuf.set_insts_mark(); 2943 emit_opcode(cbuf,0xE8); // Call into runtime 2944 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2945 __ post_call_nop(); 2946 // Carry on here... 2947 %} 2948 2949 enc_class DPR2L_encoding( regDPR src ) %{ 2950 emit_opcode(cbuf,0xD9); // FLDCW trunc 2951 emit_opcode(cbuf,0x2D); 2952 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2953 // Allocate a word 2954 emit_opcode(cbuf,0x83); // SUB ESP,8 2955 emit_opcode(cbuf,0xEC); 2956 emit_d8(cbuf,0x08); 2957 // Encoding assumes a double has been pushed into FPR0. 2958 // Store down the double as a long, popping the FPU stack 2959 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2960 emit_opcode(cbuf,0x3C); 2961 emit_d8(cbuf,0x24); 2962 // Restore the rounding mode; mask the exception 2963 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2964 emit_opcode(cbuf,0x2D); 2965 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2966 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2967 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2968 2969 // Load the converted int; adjust CPU stack 2970 emit_opcode(cbuf,0x58); // POP EAX 2971 emit_opcode(cbuf,0x5A); // POP EDX 2972 emit_opcode(cbuf,0x81); // CMP EDX,imm 2973 emit_d8 (cbuf,0xFA); // rdx 2974 emit_d32 (cbuf,0x80000000); // 0x80000000 2975 emit_opcode(cbuf,0x75); // JNE around_slow_call 2976 emit_d8 (cbuf,0x07+4); // Size of slow_call 2977 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2978 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2979 emit_opcode(cbuf,0x75); // JNE around_slow_call 2980 emit_d8 (cbuf,0x07); // Size of slow_call 2981 // Push src onto stack slow-path 2982 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2983 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2984 // CALL directly to the runtime 2985 MacroAssembler _masm(&cbuf); 2986 cbuf.set_insts_mark(); 2987 emit_opcode(cbuf,0xE8); // Call into runtime 2988 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2989 __ post_call_nop(); 2990 // Carry on here... 2991 %} 2992 2993 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2994 // Operand was loaded from memory into fp ST (stack top) 2995 // FMUL ST,$src /* D8 C8+i */ 2996 emit_opcode(cbuf, 0xD8); 2997 emit_opcode(cbuf, 0xC8 + $src1$$reg); 2998 %} 2999 3000 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3001 // FADDP ST,src2 /* D8 C0+i */ 3002 emit_opcode(cbuf, 0xD8); 3003 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3004 //could use FADDP src2,fpST /* DE C0+i */ 3005 %} 3006 3007 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3008 // FADDP src2,ST /* DE C0+i */ 3009 emit_opcode(cbuf, 0xDE); 3010 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3011 %} 3012 3013 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3014 // Operand has been loaded into fp ST (stack top) 3015 // FSUB ST,$src1 3016 emit_opcode(cbuf, 0xD8); 3017 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3018 3019 // FDIV 3020 emit_opcode(cbuf, 0xD8); 3021 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3022 %} 3023 3024 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3025 // Operand was loaded from memory into fp ST (stack top) 3026 // FADD ST,$src /* D8 C0+i */ 3027 emit_opcode(cbuf, 0xD8); 3028 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3029 3030 // FMUL ST,src2 /* D8 C*+i */ 3031 emit_opcode(cbuf, 0xD8); 3032 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3033 %} 3034 3035 3036 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3037 // Operand was loaded from memory into fp ST (stack top) 3038 // FADD ST,$src /* D8 C0+i */ 3039 emit_opcode(cbuf, 0xD8); 3040 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3041 3042 // FMULP src2,ST /* DE C8+i */ 3043 emit_opcode(cbuf, 0xDE); 3044 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3045 %} 3046 3047 // Atomically load the volatile long 3048 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3049 emit_opcode(cbuf,0xDF); 3050 int rm_byte_opcode = 0x05; 3051 int base = $mem$$base; 3052 int index = $mem$$index; 3053 int scale = $mem$$scale; 3054 int displace = $mem$$disp; 3055 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3056 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3057 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3058 %} 3059 3060 // Volatile Store Long. Must be atomic, so move it into 3061 // the FP TOS and then do a 64-bit FIST. Has to probe the 3062 // target address before the store (for null-ptr checks) 3063 // so the memory operand is used twice in the encoding. 3064 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3065 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3066 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3067 emit_opcode(cbuf,0xDF); 3068 int rm_byte_opcode = 0x07; 3069 int base = $mem$$base; 3070 int index = $mem$$index; 3071 int scale = $mem$$scale; 3072 int displace = $mem$$disp; 3073 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3074 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3075 %} 3076 3077 %} 3078 3079 3080 //----------FRAME-------------------------------------------------------------- 3081 // Definition of frame structure and management information. 3082 // 3083 // S T A C K L A Y O U T Allocators stack-slot number 3084 // | (to get allocators register number 3085 // G Owned by | | v add OptoReg::stack0()) 3086 // r CALLER | | 3087 // o | +--------+ pad to even-align allocators stack-slot 3088 // w V | pad0 | numbers; owned by CALLER 3089 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3090 // h ^ | in | 5 3091 // | | args | 4 Holes in incoming args owned by SELF 3092 // | | | | 3 3093 // | | +--------+ 3094 // V | | old out| Empty on Intel, window on Sparc 3095 // | old |preserve| Must be even aligned. 3096 // | SP-+--------+----> Matcher::_old_SP, even aligned 3097 // | | in | 3 area for Intel ret address 3098 // Owned by |preserve| Empty on Sparc. 3099 // SELF +--------+ 3100 // | | pad2 | 2 pad to align old SP 3101 // | +--------+ 1 3102 // | | locks | 0 3103 // | +--------+----> OptoReg::stack0(), even aligned 3104 // | | pad1 | 11 pad to align new SP 3105 // | +--------+ 3106 // | | | 10 3107 // | | spills | 9 spills 3108 // V | | 8 (pad0 slot for callee) 3109 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3110 // ^ | out | 7 3111 // | | args | 6 Holes in outgoing args owned by CALLEE 3112 // Owned by +--------+ 3113 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3114 // | new |preserve| Must be even-aligned. 3115 // | SP-+--------+----> Matcher::_new_SP, even aligned 3116 // | | | 3117 // 3118 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3119 // known from SELF's arguments and the Java calling convention. 3120 // Region 6-7 is determined per call site. 3121 // Note 2: If the calling convention leaves holes in the incoming argument 3122 // area, those holes are owned by SELF. Holes in the outgoing area 3123 // are owned by the CALLEE. Holes should not be necessary in the 3124 // incoming area, as the Java calling convention is completely under 3125 // the control of the AD file. Doubles can be sorted and packed to 3126 // avoid holes. Holes in the outgoing arguments may be necessary for 3127 // varargs C calling conventions. 3128 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3129 // even aligned with pad0 as needed. 3130 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3131 // region 6-11 is even aligned; it may be padded out more so that 3132 // the region from SP to FP meets the minimum stack alignment. 3133 3134 frame %{ 3135 // These three registers define part of the calling convention 3136 // between compiled code and the interpreter. 3137 inline_cache_reg(EAX); // Inline Cache Register 3138 3139 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3140 cisc_spilling_operand_name(indOffset32); 3141 3142 // Number of stack slots consumed by locking an object 3143 sync_stack_slots(1); 3144 3145 // Compiled code's Frame Pointer 3146 frame_pointer(ESP); 3147 // Interpreter stores its frame pointer in a register which is 3148 // stored to the stack by I2CAdaptors. 3149 // I2CAdaptors convert from interpreted java to compiled java. 3150 interpreter_frame_pointer(EBP); 3151 3152 // Stack alignment requirement 3153 // Alignment size in bytes (128-bit -> 16 bytes) 3154 stack_alignment(StackAlignmentInBytes); 3155 3156 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3157 // for calls to C. Supports the var-args backing area for register parms. 3158 varargs_C_out_slots_killed(0); 3159 3160 // The after-PROLOG location of the return address. Location of 3161 // return address specifies a type (REG or STACK) and a number 3162 // representing the register number (i.e. - use a register name) or 3163 // stack slot. 3164 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3165 // Otherwise, it is above the locks and verification slot and alignment word 3166 return_addr(STACK - 1 + 3167 align_up((Compile::current()->in_preserve_stack_slots() + 3168 Compile::current()->fixed_slots()), 3169 stack_alignment_in_slots())); 3170 3171 // Location of C & interpreter return values 3172 c_return_value %{ 3173 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3174 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3175 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3176 3177 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3178 // that C functions return float and double results in XMM0. 3179 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3180 return OptoRegPair(XMM0b_num,XMM0_num); 3181 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3182 return OptoRegPair(OptoReg::Bad,XMM0_num); 3183 3184 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3185 %} 3186 3187 // Location of return values 3188 return_value %{ 3189 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3190 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3191 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3192 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3193 return OptoRegPair(XMM0b_num,XMM0_num); 3194 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3195 return OptoRegPair(OptoReg::Bad,XMM0_num); 3196 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3197 %} 3198 3199 %} 3200 3201 //----------ATTRIBUTES--------------------------------------------------------- 3202 //----------Operand Attributes------------------------------------------------- 3203 op_attrib op_cost(0); // Required cost attribute 3204 3205 //----------Instruction Attributes--------------------------------------------- 3206 ins_attrib ins_cost(100); // Required cost attribute 3207 ins_attrib ins_size(8); // Required size attribute (in bits) 3208 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3209 // non-matching short branch variant of some 3210 // long branch? 3211 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3212 // specifies the alignment that some part of the instruction (not 3213 // necessarily the start) requires. If > 1, a compute_padding() 3214 // function must be provided for the instruction 3215 3216 //----------OPERANDS----------------------------------------------------------- 3217 // Operand definitions must precede instruction definitions for correct parsing 3218 // in the ADLC because operands constitute user defined types which are used in 3219 // instruction definitions. 3220 3221 //----------Simple Operands---------------------------------------------------- 3222 // Immediate Operands 3223 // Integer Immediate 3224 operand immI() %{ 3225 match(ConI); 3226 3227 op_cost(10); 3228 format %{ %} 3229 interface(CONST_INTER); 3230 %} 3231 3232 // Constant for test vs zero 3233 operand immI_0() %{ 3234 predicate(n->get_int() == 0); 3235 match(ConI); 3236 3237 op_cost(0); 3238 format %{ %} 3239 interface(CONST_INTER); 3240 %} 3241 3242 // Constant for increment 3243 operand immI_1() %{ 3244 predicate(n->get_int() == 1); 3245 match(ConI); 3246 3247 op_cost(0); 3248 format %{ %} 3249 interface(CONST_INTER); 3250 %} 3251 3252 // Constant for decrement 3253 operand immI_M1() %{ 3254 predicate(n->get_int() == -1); 3255 match(ConI); 3256 3257 op_cost(0); 3258 format %{ %} 3259 interface(CONST_INTER); 3260 %} 3261 3262 // Valid scale values for addressing modes 3263 operand immI2() %{ 3264 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3265 match(ConI); 3266 3267 format %{ %} 3268 interface(CONST_INTER); 3269 %} 3270 3271 operand immI8() %{ 3272 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3273 match(ConI); 3274 3275 op_cost(5); 3276 format %{ %} 3277 interface(CONST_INTER); 3278 %} 3279 3280 operand immU8() %{ 3281 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3282 match(ConI); 3283 3284 op_cost(5); 3285 format %{ %} 3286 interface(CONST_INTER); 3287 %} 3288 3289 operand immI16() %{ 3290 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3291 match(ConI); 3292 3293 op_cost(10); 3294 format %{ %} 3295 interface(CONST_INTER); 3296 %} 3297 3298 // Int Immediate non-negative 3299 operand immU31() 3300 %{ 3301 predicate(n->get_int() >= 0); 3302 match(ConI); 3303 3304 op_cost(0); 3305 format %{ %} 3306 interface(CONST_INTER); 3307 %} 3308 3309 // Constant for long shifts 3310 operand immI_32() %{ 3311 predicate( n->get_int() == 32 ); 3312 match(ConI); 3313 3314 op_cost(0); 3315 format %{ %} 3316 interface(CONST_INTER); 3317 %} 3318 3319 operand immI_1_31() %{ 3320 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3321 match(ConI); 3322 3323 op_cost(0); 3324 format %{ %} 3325 interface(CONST_INTER); 3326 %} 3327 3328 operand immI_32_63() %{ 3329 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3330 match(ConI); 3331 op_cost(0); 3332 3333 format %{ %} 3334 interface(CONST_INTER); 3335 %} 3336 3337 operand immI_2() %{ 3338 predicate( n->get_int() == 2 ); 3339 match(ConI); 3340 3341 op_cost(0); 3342 format %{ %} 3343 interface(CONST_INTER); 3344 %} 3345 3346 operand immI_3() %{ 3347 predicate( n->get_int() == 3 ); 3348 match(ConI); 3349 3350 op_cost(0); 3351 format %{ %} 3352 interface(CONST_INTER); 3353 %} 3354 3355 operand immI_4() 3356 %{ 3357 predicate(n->get_int() == 4); 3358 match(ConI); 3359 3360 op_cost(0); 3361 format %{ %} 3362 interface(CONST_INTER); 3363 %} 3364 3365 operand immI_8() 3366 %{ 3367 predicate(n->get_int() == 8); 3368 match(ConI); 3369 3370 op_cost(0); 3371 format %{ %} 3372 interface(CONST_INTER); 3373 %} 3374 3375 // Pointer Immediate 3376 operand immP() %{ 3377 match(ConP); 3378 3379 op_cost(10); 3380 format %{ %} 3381 interface(CONST_INTER); 3382 %} 3383 3384 // Null Pointer Immediate 3385 operand immP0() %{ 3386 predicate( n->get_ptr() == 0 ); 3387 match(ConP); 3388 op_cost(0); 3389 3390 format %{ %} 3391 interface(CONST_INTER); 3392 %} 3393 3394 // Long Immediate 3395 operand immL() %{ 3396 match(ConL); 3397 3398 op_cost(20); 3399 format %{ %} 3400 interface(CONST_INTER); 3401 %} 3402 3403 // Long Immediate zero 3404 operand immL0() %{ 3405 predicate( n->get_long() == 0L ); 3406 match(ConL); 3407 op_cost(0); 3408 3409 format %{ %} 3410 interface(CONST_INTER); 3411 %} 3412 3413 // Long Immediate zero 3414 operand immL_M1() %{ 3415 predicate( n->get_long() == -1L ); 3416 match(ConL); 3417 op_cost(0); 3418 3419 format %{ %} 3420 interface(CONST_INTER); 3421 %} 3422 3423 // Long immediate from 0 to 127. 3424 // Used for a shorter form of long mul by 10. 3425 operand immL_127() %{ 3426 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3427 match(ConL); 3428 op_cost(0); 3429 3430 format %{ %} 3431 interface(CONST_INTER); 3432 %} 3433 3434 // Long Immediate: low 32-bit mask 3435 operand immL_32bits() %{ 3436 predicate(n->get_long() == 0xFFFFFFFFL); 3437 match(ConL); 3438 op_cost(0); 3439 3440 format %{ %} 3441 interface(CONST_INTER); 3442 %} 3443 3444 // Long Immediate: low 32-bit mask 3445 operand immL32() %{ 3446 predicate(n->get_long() == (int)(n->get_long())); 3447 match(ConL); 3448 op_cost(20); 3449 3450 format %{ %} 3451 interface(CONST_INTER); 3452 %} 3453 3454 //Double Immediate zero 3455 operand immDPR0() %{ 3456 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3457 // bug that generates code such that NaNs compare equal to 0.0 3458 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3459 match(ConD); 3460 3461 op_cost(5); 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 // Double Immediate one 3467 operand immDPR1() %{ 3468 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3469 match(ConD); 3470 3471 op_cost(5); 3472 format %{ %} 3473 interface(CONST_INTER); 3474 %} 3475 3476 // Double Immediate 3477 operand immDPR() %{ 3478 predicate(UseSSE<=1); 3479 match(ConD); 3480 3481 op_cost(5); 3482 format %{ %} 3483 interface(CONST_INTER); 3484 %} 3485 3486 operand immD() %{ 3487 predicate(UseSSE>=2); 3488 match(ConD); 3489 3490 op_cost(5); 3491 format %{ %} 3492 interface(CONST_INTER); 3493 %} 3494 3495 // Double Immediate zero 3496 operand immD0() %{ 3497 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3498 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3499 // compare equal to -0.0. 3500 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3501 match(ConD); 3502 3503 format %{ %} 3504 interface(CONST_INTER); 3505 %} 3506 3507 // Float Immediate zero 3508 operand immFPR0() %{ 3509 predicate(UseSSE == 0 && n->getf() == 0.0F); 3510 match(ConF); 3511 3512 op_cost(5); 3513 format %{ %} 3514 interface(CONST_INTER); 3515 %} 3516 3517 // Float Immediate one 3518 operand immFPR1() %{ 3519 predicate(UseSSE == 0 && n->getf() == 1.0F); 3520 match(ConF); 3521 3522 op_cost(5); 3523 format %{ %} 3524 interface(CONST_INTER); 3525 %} 3526 3527 // Float Immediate 3528 operand immFPR() %{ 3529 predicate( UseSSE == 0 ); 3530 match(ConF); 3531 3532 op_cost(5); 3533 format %{ %} 3534 interface(CONST_INTER); 3535 %} 3536 3537 // Float Immediate 3538 operand immF() %{ 3539 predicate(UseSSE >= 1); 3540 match(ConF); 3541 3542 op_cost(5); 3543 format %{ %} 3544 interface(CONST_INTER); 3545 %} 3546 3547 // Float Immediate zero. Zero and not -0.0 3548 operand immF0() %{ 3549 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3550 match(ConF); 3551 3552 op_cost(5); 3553 format %{ %} 3554 interface(CONST_INTER); 3555 %} 3556 3557 // Immediates for special shifts (sign extend) 3558 3559 // Constants for increment 3560 operand immI_16() %{ 3561 predicate( n->get_int() == 16 ); 3562 match(ConI); 3563 3564 format %{ %} 3565 interface(CONST_INTER); 3566 %} 3567 3568 operand immI_24() %{ 3569 predicate( n->get_int() == 24 ); 3570 match(ConI); 3571 3572 format %{ %} 3573 interface(CONST_INTER); 3574 %} 3575 3576 // Constant for byte-wide masking 3577 operand immI_255() %{ 3578 predicate( n->get_int() == 255 ); 3579 match(ConI); 3580 3581 format %{ %} 3582 interface(CONST_INTER); 3583 %} 3584 3585 // Constant for short-wide masking 3586 operand immI_65535() %{ 3587 predicate(n->get_int() == 65535); 3588 match(ConI); 3589 3590 format %{ %} 3591 interface(CONST_INTER); 3592 %} 3593 3594 operand kReg() 3595 %{ 3596 constraint(ALLOC_IN_RC(vectmask_reg)); 3597 match(RegVectMask); 3598 format %{%} 3599 interface(REG_INTER); 3600 %} 3601 3602 operand kReg_K1() 3603 %{ 3604 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3605 match(RegVectMask); 3606 format %{%} 3607 interface(REG_INTER); 3608 %} 3609 3610 operand kReg_K2() 3611 %{ 3612 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3613 match(RegVectMask); 3614 format %{%} 3615 interface(REG_INTER); 3616 %} 3617 3618 // Special Registers 3619 operand kReg_K3() 3620 %{ 3621 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3622 match(RegVectMask); 3623 format %{%} 3624 interface(REG_INTER); 3625 %} 3626 3627 operand kReg_K4() 3628 %{ 3629 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3630 match(RegVectMask); 3631 format %{%} 3632 interface(REG_INTER); 3633 %} 3634 3635 operand kReg_K5() 3636 %{ 3637 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3638 match(RegVectMask); 3639 format %{%} 3640 interface(REG_INTER); 3641 %} 3642 3643 operand kReg_K6() 3644 %{ 3645 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3646 match(RegVectMask); 3647 format %{%} 3648 interface(REG_INTER); 3649 %} 3650 3651 // Special Registers 3652 operand kReg_K7() 3653 %{ 3654 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3655 match(RegVectMask); 3656 format %{%} 3657 interface(REG_INTER); 3658 %} 3659 3660 // Register Operands 3661 // Integer Register 3662 operand rRegI() %{ 3663 constraint(ALLOC_IN_RC(int_reg)); 3664 match(RegI); 3665 match(xRegI); 3666 match(eAXRegI); 3667 match(eBXRegI); 3668 match(eCXRegI); 3669 match(eDXRegI); 3670 match(eDIRegI); 3671 match(eSIRegI); 3672 3673 format %{ %} 3674 interface(REG_INTER); 3675 %} 3676 3677 // Subset of Integer Register 3678 operand xRegI(rRegI reg) %{ 3679 constraint(ALLOC_IN_RC(int_x_reg)); 3680 match(reg); 3681 match(eAXRegI); 3682 match(eBXRegI); 3683 match(eCXRegI); 3684 match(eDXRegI); 3685 3686 format %{ %} 3687 interface(REG_INTER); 3688 %} 3689 3690 // Special Registers 3691 operand eAXRegI(xRegI reg) %{ 3692 constraint(ALLOC_IN_RC(eax_reg)); 3693 match(reg); 3694 match(rRegI); 3695 3696 format %{ "EAX" %} 3697 interface(REG_INTER); 3698 %} 3699 3700 // Special Registers 3701 operand eBXRegI(xRegI reg) %{ 3702 constraint(ALLOC_IN_RC(ebx_reg)); 3703 match(reg); 3704 match(rRegI); 3705 3706 format %{ "EBX" %} 3707 interface(REG_INTER); 3708 %} 3709 3710 operand eCXRegI(xRegI reg) %{ 3711 constraint(ALLOC_IN_RC(ecx_reg)); 3712 match(reg); 3713 match(rRegI); 3714 3715 format %{ "ECX" %} 3716 interface(REG_INTER); 3717 %} 3718 3719 operand eDXRegI(xRegI reg) %{ 3720 constraint(ALLOC_IN_RC(edx_reg)); 3721 match(reg); 3722 match(rRegI); 3723 3724 format %{ "EDX" %} 3725 interface(REG_INTER); 3726 %} 3727 3728 operand eDIRegI(xRegI reg) %{ 3729 constraint(ALLOC_IN_RC(edi_reg)); 3730 match(reg); 3731 match(rRegI); 3732 3733 format %{ "EDI" %} 3734 interface(REG_INTER); 3735 %} 3736 3737 operand naxRegI() %{ 3738 constraint(ALLOC_IN_RC(nax_reg)); 3739 match(RegI); 3740 match(eCXRegI); 3741 match(eDXRegI); 3742 match(eSIRegI); 3743 match(eDIRegI); 3744 3745 format %{ %} 3746 interface(REG_INTER); 3747 %} 3748 3749 operand nadxRegI() %{ 3750 constraint(ALLOC_IN_RC(nadx_reg)); 3751 match(RegI); 3752 match(eBXRegI); 3753 match(eCXRegI); 3754 match(eSIRegI); 3755 match(eDIRegI); 3756 3757 format %{ %} 3758 interface(REG_INTER); 3759 %} 3760 3761 operand ncxRegI() %{ 3762 constraint(ALLOC_IN_RC(ncx_reg)); 3763 match(RegI); 3764 match(eAXRegI); 3765 match(eDXRegI); 3766 match(eSIRegI); 3767 match(eDIRegI); 3768 3769 format %{ %} 3770 interface(REG_INTER); 3771 %} 3772 3773 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3774 // // 3775 operand eSIRegI(xRegI reg) %{ 3776 constraint(ALLOC_IN_RC(esi_reg)); 3777 match(reg); 3778 match(rRegI); 3779 3780 format %{ "ESI" %} 3781 interface(REG_INTER); 3782 %} 3783 3784 // Pointer Register 3785 operand anyRegP() %{ 3786 constraint(ALLOC_IN_RC(any_reg)); 3787 match(RegP); 3788 match(eAXRegP); 3789 match(eBXRegP); 3790 match(eCXRegP); 3791 match(eDIRegP); 3792 match(eRegP); 3793 3794 format %{ %} 3795 interface(REG_INTER); 3796 %} 3797 3798 operand eRegP() %{ 3799 constraint(ALLOC_IN_RC(int_reg)); 3800 match(RegP); 3801 match(eAXRegP); 3802 match(eBXRegP); 3803 match(eCXRegP); 3804 match(eDIRegP); 3805 3806 format %{ %} 3807 interface(REG_INTER); 3808 %} 3809 3810 operand rRegP() %{ 3811 constraint(ALLOC_IN_RC(int_reg)); 3812 match(RegP); 3813 match(eAXRegP); 3814 match(eBXRegP); 3815 match(eCXRegP); 3816 match(eDIRegP); 3817 3818 format %{ %} 3819 interface(REG_INTER); 3820 %} 3821 3822 // On windows95, EBP is not safe to use for implicit null tests. 3823 operand eRegP_no_EBP() %{ 3824 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3825 match(RegP); 3826 match(eAXRegP); 3827 match(eBXRegP); 3828 match(eCXRegP); 3829 match(eDIRegP); 3830 3831 op_cost(100); 3832 format %{ %} 3833 interface(REG_INTER); 3834 %} 3835 3836 operand naxRegP() %{ 3837 constraint(ALLOC_IN_RC(nax_reg)); 3838 match(RegP); 3839 match(eBXRegP); 3840 match(eDXRegP); 3841 match(eCXRegP); 3842 match(eSIRegP); 3843 match(eDIRegP); 3844 3845 format %{ %} 3846 interface(REG_INTER); 3847 %} 3848 3849 operand nabxRegP() %{ 3850 constraint(ALLOC_IN_RC(nabx_reg)); 3851 match(RegP); 3852 match(eCXRegP); 3853 match(eDXRegP); 3854 match(eSIRegP); 3855 match(eDIRegP); 3856 3857 format %{ %} 3858 interface(REG_INTER); 3859 %} 3860 3861 operand pRegP() %{ 3862 constraint(ALLOC_IN_RC(p_reg)); 3863 match(RegP); 3864 match(eBXRegP); 3865 match(eDXRegP); 3866 match(eSIRegP); 3867 match(eDIRegP); 3868 3869 format %{ %} 3870 interface(REG_INTER); 3871 %} 3872 3873 // Special Registers 3874 // Return a pointer value 3875 operand eAXRegP(eRegP reg) %{ 3876 constraint(ALLOC_IN_RC(eax_reg)); 3877 match(reg); 3878 format %{ "EAX" %} 3879 interface(REG_INTER); 3880 %} 3881 3882 // Used in AtomicAdd 3883 operand eBXRegP(eRegP reg) %{ 3884 constraint(ALLOC_IN_RC(ebx_reg)); 3885 match(reg); 3886 format %{ "EBX" %} 3887 interface(REG_INTER); 3888 %} 3889 3890 // Tail-call (interprocedural jump) to interpreter 3891 operand eCXRegP(eRegP reg) %{ 3892 constraint(ALLOC_IN_RC(ecx_reg)); 3893 match(reg); 3894 format %{ "ECX" %} 3895 interface(REG_INTER); 3896 %} 3897 3898 operand eDXRegP(eRegP reg) %{ 3899 constraint(ALLOC_IN_RC(edx_reg)); 3900 match(reg); 3901 format %{ "EDX" %} 3902 interface(REG_INTER); 3903 %} 3904 3905 operand eSIRegP(eRegP reg) %{ 3906 constraint(ALLOC_IN_RC(esi_reg)); 3907 match(reg); 3908 format %{ "ESI" %} 3909 interface(REG_INTER); 3910 %} 3911 3912 // Used in rep stosw 3913 operand eDIRegP(eRegP reg) %{ 3914 constraint(ALLOC_IN_RC(edi_reg)); 3915 match(reg); 3916 format %{ "EDI" %} 3917 interface(REG_INTER); 3918 %} 3919 3920 operand eRegL() %{ 3921 constraint(ALLOC_IN_RC(long_reg)); 3922 match(RegL); 3923 match(eADXRegL); 3924 3925 format %{ %} 3926 interface(REG_INTER); 3927 %} 3928 3929 operand eADXRegL( eRegL reg ) %{ 3930 constraint(ALLOC_IN_RC(eadx_reg)); 3931 match(reg); 3932 3933 format %{ "EDX:EAX" %} 3934 interface(REG_INTER); 3935 %} 3936 3937 operand eBCXRegL( eRegL reg ) %{ 3938 constraint(ALLOC_IN_RC(ebcx_reg)); 3939 match(reg); 3940 3941 format %{ "EBX:ECX" %} 3942 interface(REG_INTER); 3943 %} 3944 3945 operand eBDPRegL( eRegL reg ) %{ 3946 constraint(ALLOC_IN_RC(ebpd_reg)); 3947 match(reg); 3948 3949 format %{ "EBP:EDI" %} 3950 interface(REG_INTER); 3951 %} 3952 // Special case for integer high multiply 3953 operand eADXRegL_low_only() %{ 3954 constraint(ALLOC_IN_RC(eadx_reg)); 3955 match(RegL); 3956 3957 format %{ "EAX" %} 3958 interface(REG_INTER); 3959 %} 3960 3961 // Flags register, used as output of compare instructions 3962 operand rFlagsReg() %{ 3963 constraint(ALLOC_IN_RC(int_flags)); 3964 match(RegFlags); 3965 3966 format %{ "EFLAGS" %} 3967 interface(REG_INTER); 3968 %} 3969 3970 // Flags register, used as output of compare instructions 3971 operand eFlagsReg() %{ 3972 constraint(ALLOC_IN_RC(int_flags)); 3973 match(RegFlags); 3974 3975 format %{ "EFLAGS" %} 3976 interface(REG_INTER); 3977 %} 3978 3979 // Flags register, used as output of FLOATING POINT compare instructions 3980 operand eFlagsRegU() %{ 3981 constraint(ALLOC_IN_RC(int_flags)); 3982 match(RegFlags); 3983 3984 format %{ "EFLAGS_U" %} 3985 interface(REG_INTER); 3986 %} 3987 3988 operand eFlagsRegUCF() %{ 3989 constraint(ALLOC_IN_RC(int_flags)); 3990 match(RegFlags); 3991 predicate(false); 3992 3993 format %{ "EFLAGS_U_CF" %} 3994 interface(REG_INTER); 3995 %} 3996 3997 // Condition Code Register used by long compare 3998 operand flagsReg_long_LTGE() %{ 3999 constraint(ALLOC_IN_RC(int_flags)); 4000 match(RegFlags); 4001 format %{ "FLAGS_LTGE" %} 4002 interface(REG_INTER); 4003 %} 4004 operand flagsReg_long_EQNE() %{ 4005 constraint(ALLOC_IN_RC(int_flags)); 4006 match(RegFlags); 4007 format %{ "FLAGS_EQNE" %} 4008 interface(REG_INTER); 4009 %} 4010 operand flagsReg_long_LEGT() %{ 4011 constraint(ALLOC_IN_RC(int_flags)); 4012 match(RegFlags); 4013 format %{ "FLAGS_LEGT" %} 4014 interface(REG_INTER); 4015 %} 4016 4017 // Condition Code Register used by unsigned long compare 4018 operand flagsReg_ulong_LTGE() %{ 4019 constraint(ALLOC_IN_RC(int_flags)); 4020 match(RegFlags); 4021 format %{ "FLAGS_U_LTGE" %} 4022 interface(REG_INTER); 4023 %} 4024 operand flagsReg_ulong_EQNE() %{ 4025 constraint(ALLOC_IN_RC(int_flags)); 4026 match(RegFlags); 4027 format %{ "FLAGS_U_EQNE" %} 4028 interface(REG_INTER); 4029 %} 4030 operand flagsReg_ulong_LEGT() %{ 4031 constraint(ALLOC_IN_RC(int_flags)); 4032 match(RegFlags); 4033 format %{ "FLAGS_U_LEGT" %} 4034 interface(REG_INTER); 4035 %} 4036 4037 // Float register operands 4038 operand regDPR() %{ 4039 predicate( UseSSE < 2 ); 4040 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4041 match(RegD); 4042 match(regDPR1); 4043 match(regDPR2); 4044 format %{ %} 4045 interface(REG_INTER); 4046 %} 4047 4048 operand regDPR1(regDPR reg) %{ 4049 predicate( UseSSE < 2 ); 4050 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4051 match(reg); 4052 format %{ "FPR1" %} 4053 interface(REG_INTER); 4054 %} 4055 4056 operand regDPR2(regDPR reg) %{ 4057 predicate( UseSSE < 2 ); 4058 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4059 match(reg); 4060 format %{ "FPR2" %} 4061 interface(REG_INTER); 4062 %} 4063 4064 operand regnotDPR1(regDPR reg) %{ 4065 predicate( UseSSE < 2 ); 4066 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4067 match(reg); 4068 format %{ %} 4069 interface(REG_INTER); 4070 %} 4071 4072 // Float register operands 4073 operand regFPR() %{ 4074 predicate( UseSSE < 2 ); 4075 constraint(ALLOC_IN_RC(fp_flt_reg)); 4076 match(RegF); 4077 match(regFPR1); 4078 format %{ %} 4079 interface(REG_INTER); 4080 %} 4081 4082 // Float register operands 4083 operand regFPR1(regFPR reg) %{ 4084 predicate( UseSSE < 2 ); 4085 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4086 match(reg); 4087 format %{ "FPR1" %} 4088 interface(REG_INTER); 4089 %} 4090 4091 // XMM Float register operands 4092 operand regF() %{ 4093 predicate( UseSSE>=1 ); 4094 constraint(ALLOC_IN_RC(float_reg_legacy)); 4095 match(RegF); 4096 format %{ %} 4097 interface(REG_INTER); 4098 %} 4099 4100 operand legRegF() %{ 4101 predicate( UseSSE>=1 ); 4102 constraint(ALLOC_IN_RC(float_reg_legacy)); 4103 match(RegF); 4104 format %{ %} 4105 interface(REG_INTER); 4106 %} 4107 4108 // Float register operands 4109 operand vlRegF() %{ 4110 constraint(ALLOC_IN_RC(float_reg_vl)); 4111 match(RegF); 4112 4113 format %{ %} 4114 interface(REG_INTER); 4115 %} 4116 4117 // XMM Double register operands 4118 operand regD() %{ 4119 predicate( UseSSE>=2 ); 4120 constraint(ALLOC_IN_RC(double_reg_legacy)); 4121 match(RegD); 4122 format %{ %} 4123 interface(REG_INTER); 4124 %} 4125 4126 // Double register operands 4127 operand legRegD() %{ 4128 predicate( UseSSE>=2 ); 4129 constraint(ALLOC_IN_RC(double_reg_legacy)); 4130 match(RegD); 4131 format %{ %} 4132 interface(REG_INTER); 4133 %} 4134 4135 operand vlRegD() %{ 4136 constraint(ALLOC_IN_RC(double_reg_vl)); 4137 match(RegD); 4138 4139 format %{ %} 4140 interface(REG_INTER); 4141 %} 4142 4143 //----------Memory Operands---------------------------------------------------- 4144 // Direct Memory Operand 4145 operand direct(immP addr) %{ 4146 match(addr); 4147 4148 format %{ "[$addr]" %} 4149 interface(MEMORY_INTER) %{ 4150 base(0xFFFFFFFF); 4151 index(0x4); 4152 scale(0x0); 4153 disp($addr); 4154 %} 4155 %} 4156 4157 // Indirect Memory Operand 4158 operand indirect(eRegP reg) %{ 4159 constraint(ALLOC_IN_RC(int_reg)); 4160 match(reg); 4161 4162 format %{ "[$reg]" %} 4163 interface(MEMORY_INTER) %{ 4164 base($reg); 4165 index(0x4); 4166 scale(0x0); 4167 disp(0x0); 4168 %} 4169 %} 4170 4171 // Indirect Memory Plus Short Offset Operand 4172 operand indOffset8(eRegP reg, immI8 off) %{ 4173 match(AddP reg off); 4174 4175 format %{ "[$reg + $off]" %} 4176 interface(MEMORY_INTER) %{ 4177 base($reg); 4178 index(0x4); 4179 scale(0x0); 4180 disp($off); 4181 %} 4182 %} 4183 4184 // Indirect Memory Plus Long Offset Operand 4185 operand indOffset32(eRegP reg, immI off) %{ 4186 match(AddP reg off); 4187 4188 format %{ "[$reg + $off]" %} 4189 interface(MEMORY_INTER) %{ 4190 base($reg); 4191 index(0x4); 4192 scale(0x0); 4193 disp($off); 4194 %} 4195 %} 4196 4197 // Indirect Memory Plus Long Offset Operand 4198 operand indOffset32X(rRegI reg, immP off) %{ 4199 match(AddP off reg); 4200 4201 format %{ "[$reg + $off]" %} 4202 interface(MEMORY_INTER) %{ 4203 base($reg); 4204 index(0x4); 4205 scale(0x0); 4206 disp($off); 4207 %} 4208 %} 4209 4210 // Indirect Memory Plus Index Register Plus Offset Operand 4211 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4212 match(AddP (AddP reg ireg) off); 4213 4214 op_cost(10); 4215 format %{"[$reg + $off + $ireg]" %} 4216 interface(MEMORY_INTER) %{ 4217 base($reg); 4218 index($ireg); 4219 scale(0x0); 4220 disp($off); 4221 %} 4222 %} 4223 4224 // Indirect Memory Plus Index Register Plus Offset Operand 4225 operand indIndex(eRegP reg, rRegI ireg) %{ 4226 match(AddP reg ireg); 4227 4228 op_cost(10); 4229 format %{"[$reg + $ireg]" %} 4230 interface(MEMORY_INTER) %{ 4231 base($reg); 4232 index($ireg); 4233 scale(0x0); 4234 disp(0x0); 4235 %} 4236 %} 4237 4238 // // ------------------------------------------------------------------------- 4239 // // 486 architecture doesn't support "scale * index + offset" with out a base 4240 // // ------------------------------------------------------------------------- 4241 // // Scaled Memory Operands 4242 // // Indirect Memory Times Scale Plus Offset Operand 4243 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4244 // match(AddP off (LShiftI ireg scale)); 4245 // 4246 // op_cost(10); 4247 // format %{"[$off + $ireg << $scale]" %} 4248 // interface(MEMORY_INTER) %{ 4249 // base(0x4); 4250 // index($ireg); 4251 // scale($scale); 4252 // disp($off); 4253 // %} 4254 // %} 4255 4256 // Indirect Memory Times Scale Plus Index Register 4257 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4258 match(AddP reg (LShiftI ireg scale)); 4259 4260 op_cost(10); 4261 format %{"[$reg + $ireg << $scale]" %} 4262 interface(MEMORY_INTER) %{ 4263 base($reg); 4264 index($ireg); 4265 scale($scale); 4266 disp(0x0); 4267 %} 4268 %} 4269 4270 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4271 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4272 match(AddP (AddP reg (LShiftI ireg scale)) off); 4273 4274 op_cost(10); 4275 format %{"[$reg + $off + $ireg << $scale]" %} 4276 interface(MEMORY_INTER) %{ 4277 base($reg); 4278 index($ireg); 4279 scale($scale); 4280 disp($off); 4281 %} 4282 %} 4283 4284 //----------Load Long Memory Operands------------------------------------------ 4285 // The load-long idiom will use it's address expression again after loading 4286 // the first word of the long. If the load-long destination overlaps with 4287 // registers used in the addressing expression, the 2nd half will be loaded 4288 // from a clobbered address. Fix this by requiring that load-long use 4289 // address registers that do not overlap with the load-long target. 4290 4291 // load-long support 4292 operand load_long_RegP() %{ 4293 constraint(ALLOC_IN_RC(esi_reg)); 4294 match(RegP); 4295 match(eSIRegP); 4296 op_cost(100); 4297 format %{ %} 4298 interface(REG_INTER); 4299 %} 4300 4301 // Indirect Memory Operand Long 4302 operand load_long_indirect(load_long_RegP reg) %{ 4303 constraint(ALLOC_IN_RC(esi_reg)); 4304 match(reg); 4305 4306 format %{ "[$reg]" %} 4307 interface(MEMORY_INTER) %{ 4308 base($reg); 4309 index(0x4); 4310 scale(0x0); 4311 disp(0x0); 4312 %} 4313 %} 4314 4315 // Indirect Memory Plus Long Offset Operand 4316 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4317 match(AddP reg off); 4318 4319 format %{ "[$reg + $off]" %} 4320 interface(MEMORY_INTER) %{ 4321 base($reg); 4322 index(0x4); 4323 scale(0x0); 4324 disp($off); 4325 %} 4326 %} 4327 4328 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4329 4330 4331 //----------Special Memory Operands-------------------------------------------- 4332 // Stack Slot Operand - This operand is used for loading and storing temporary 4333 // values on the stack where a match requires a value to 4334 // flow through memory. 4335 operand stackSlotP(sRegP reg) %{ 4336 constraint(ALLOC_IN_RC(stack_slots)); 4337 // No match rule because this operand is only generated in matching 4338 format %{ "[$reg]" %} 4339 interface(MEMORY_INTER) %{ 4340 base(0x4); // ESP 4341 index(0x4); // No Index 4342 scale(0x0); // No Scale 4343 disp($reg); // Stack Offset 4344 %} 4345 %} 4346 4347 operand stackSlotI(sRegI reg) %{ 4348 constraint(ALLOC_IN_RC(stack_slots)); 4349 // No match rule because this operand is only generated in matching 4350 format %{ "[$reg]" %} 4351 interface(MEMORY_INTER) %{ 4352 base(0x4); // ESP 4353 index(0x4); // No Index 4354 scale(0x0); // No Scale 4355 disp($reg); // Stack Offset 4356 %} 4357 %} 4358 4359 operand stackSlotF(sRegF reg) %{ 4360 constraint(ALLOC_IN_RC(stack_slots)); 4361 // No match rule because this operand is only generated in matching 4362 format %{ "[$reg]" %} 4363 interface(MEMORY_INTER) %{ 4364 base(0x4); // ESP 4365 index(0x4); // No Index 4366 scale(0x0); // No Scale 4367 disp($reg); // Stack Offset 4368 %} 4369 %} 4370 4371 operand stackSlotD(sRegD reg) %{ 4372 constraint(ALLOC_IN_RC(stack_slots)); 4373 // No match rule because this operand is only generated in matching 4374 format %{ "[$reg]" %} 4375 interface(MEMORY_INTER) %{ 4376 base(0x4); // ESP 4377 index(0x4); // No Index 4378 scale(0x0); // No Scale 4379 disp($reg); // Stack Offset 4380 %} 4381 %} 4382 4383 operand stackSlotL(sRegL reg) %{ 4384 constraint(ALLOC_IN_RC(stack_slots)); 4385 // No match rule because this operand is only generated in matching 4386 format %{ "[$reg]" %} 4387 interface(MEMORY_INTER) %{ 4388 base(0x4); // ESP 4389 index(0x4); // No Index 4390 scale(0x0); // No Scale 4391 disp($reg); // Stack Offset 4392 %} 4393 %} 4394 4395 //----------Conditional Branch Operands---------------------------------------- 4396 // Comparison Op - This is the operation of the comparison, and is limited to 4397 // the following set of codes: 4398 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4399 // 4400 // Other attributes of the comparison, such as unsignedness, are specified 4401 // by the comparison instruction that sets a condition code flags register. 4402 // That result is represented by a flags operand whose subtype is appropriate 4403 // to the unsignedness (etc.) of the comparison. 4404 // 4405 // Later, the instruction which matches both the Comparison Op (a Bool) and 4406 // the flags (produced by the Cmp) specifies the coding of the comparison op 4407 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4408 4409 // Comparison Code 4410 operand cmpOp() %{ 4411 match(Bool); 4412 4413 format %{ "" %} 4414 interface(COND_INTER) %{ 4415 equal(0x4, "e"); 4416 not_equal(0x5, "ne"); 4417 less(0xC, "l"); 4418 greater_equal(0xD, "ge"); 4419 less_equal(0xE, "le"); 4420 greater(0xF, "g"); 4421 overflow(0x0, "o"); 4422 no_overflow(0x1, "no"); 4423 %} 4424 %} 4425 4426 // Comparison Code, unsigned compare. Used by FP also, with 4427 // C2 (unordered) turned into GT or LT already. The other bits 4428 // C0 and C3 are turned into Carry & Zero flags. 4429 operand cmpOpU() %{ 4430 match(Bool); 4431 4432 format %{ "" %} 4433 interface(COND_INTER) %{ 4434 equal(0x4, "e"); 4435 not_equal(0x5, "ne"); 4436 less(0x2, "b"); 4437 greater_equal(0x3, "nb"); 4438 less_equal(0x6, "be"); 4439 greater(0x7, "nbe"); 4440 overflow(0x0, "o"); 4441 no_overflow(0x1, "no"); 4442 %} 4443 %} 4444 4445 // Floating comparisons that don't require any fixup for the unordered case 4446 operand cmpOpUCF() %{ 4447 match(Bool); 4448 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4449 n->as_Bool()->_test._test == BoolTest::ge || 4450 n->as_Bool()->_test._test == BoolTest::le || 4451 n->as_Bool()->_test._test == BoolTest::gt); 4452 format %{ "" %} 4453 interface(COND_INTER) %{ 4454 equal(0x4, "e"); 4455 not_equal(0x5, "ne"); 4456 less(0x2, "b"); 4457 greater_equal(0x3, "nb"); 4458 less_equal(0x6, "be"); 4459 greater(0x7, "nbe"); 4460 overflow(0x0, "o"); 4461 no_overflow(0x1, "no"); 4462 %} 4463 %} 4464 4465 4466 // Floating comparisons that can be fixed up with extra conditional jumps 4467 operand cmpOpUCF2() %{ 4468 match(Bool); 4469 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4470 n->as_Bool()->_test._test == BoolTest::eq); 4471 format %{ "" %} 4472 interface(COND_INTER) %{ 4473 equal(0x4, "e"); 4474 not_equal(0x5, "ne"); 4475 less(0x2, "b"); 4476 greater_equal(0x3, "nb"); 4477 less_equal(0x6, "be"); 4478 greater(0x7, "nbe"); 4479 overflow(0x0, "o"); 4480 no_overflow(0x1, "no"); 4481 %} 4482 %} 4483 4484 // Comparison Code for FP conditional move 4485 operand cmpOp_fcmov() %{ 4486 match(Bool); 4487 4488 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4489 n->as_Bool()->_test._test != BoolTest::no_overflow); 4490 format %{ "" %} 4491 interface(COND_INTER) %{ 4492 equal (0x0C8); 4493 not_equal (0x1C8); 4494 less (0x0C0); 4495 greater_equal(0x1C0); 4496 less_equal (0x0D0); 4497 greater (0x1D0); 4498 overflow(0x0, "o"); // not really supported by the instruction 4499 no_overflow(0x1, "no"); // not really supported by the instruction 4500 %} 4501 %} 4502 4503 // Comparison Code used in long compares 4504 operand cmpOp_commute() %{ 4505 match(Bool); 4506 4507 format %{ "" %} 4508 interface(COND_INTER) %{ 4509 equal(0x4, "e"); 4510 not_equal(0x5, "ne"); 4511 less(0xF, "g"); 4512 greater_equal(0xE, "le"); 4513 less_equal(0xD, "ge"); 4514 greater(0xC, "l"); 4515 overflow(0x0, "o"); 4516 no_overflow(0x1, "no"); 4517 %} 4518 %} 4519 4520 // Comparison Code used in unsigned long compares 4521 operand cmpOpU_commute() %{ 4522 match(Bool); 4523 4524 format %{ "" %} 4525 interface(COND_INTER) %{ 4526 equal(0x4, "e"); 4527 not_equal(0x5, "ne"); 4528 less(0x7, "nbe"); 4529 greater_equal(0x6, "be"); 4530 less_equal(0x3, "nb"); 4531 greater(0x2, "b"); 4532 overflow(0x0, "o"); 4533 no_overflow(0x1, "no"); 4534 %} 4535 %} 4536 4537 //----------OPERAND CLASSES---------------------------------------------------- 4538 // Operand Classes are groups of operands that are used as to simplify 4539 // instruction definitions by not requiring the AD writer to specify separate 4540 // instructions for every form of operand when the instruction accepts 4541 // multiple operand types with the same basic encoding and format. The classic 4542 // case of this is memory operands. 4543 4544 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4545 indIndex, indIndexScale, indIndexScaleOffset); 4546 4547 // Long memory operations are encoded in 2 instructions and a +4 offset. 4548 // This means some kind of offset is always required and you cannot use 4549 // an oop as the offset (done when working on static globals). 4550 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4551 indIndex, indIndexScale, indIndexScaleOffset); 4552 4553 4554 //----------PIPELINE----------------------------------------------------------- 4555 // Rules which define the behavior of the target architectures pipeline. 4556 pipeline %{ 4557 4558 //----------ATTRIBUTES--------------------------------------------------------- 4559 attributes %{ 4560 variable_size_instructions; // Fixed size instructions 4561 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4562 instruction_unit_size = 1; // An instruction is 1 bytes long 4563 instruction_fetch_unit_size = 16; // The processor fetches one line 4564 instruction_fetch_units = 1; // of 16 bytes 4565 4566 // List of nop instructions 4567 nops( MachNop ); 4568 %} 4569 4570 //----------RESOURCES---------------------------------------------------------- 4571 // Resources are the functional units available to the machine 4572 4573 // Generic P2/P3 pipeline 4574 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4575 // 3 instructions decoded per cycle. 4576 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4577 // 2 ALU op, only ALU0 handles mul/div instructions. 4578 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4579 MS0, MS1, MEM = MS0 | MS1, 4580 BR, FPU, 4581 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4582 4583 //----------PIPELINE DESCRIPTION----------------------------------------------- 4584 // Pipeline Description specifies the stages in the machine's pipeline 4585 4586 // Generic P2/P3 pipeline 4587 pipe_desc(S0, S1, S2, S3, S4, S5); 4588 4589 //----------PIPELINE CLASSES--------------------------------------------------- 4590 // Pipeline Classes describe the stages in which input and output are 4591 // referenced by the hardware pipeline. 4592 4593 // Naming convention: ialu or fpu 4594 // Then: _reg 4595 // Then: _reg if there is a 2nd register 4596 // Then: _long if it's a pair of instructions implementing a long 4597 // Then: _fat if it requires the big decoder 4598 // Or: _mem if it requires the big decoder and a memory unit. 4599 4600 // Integer ALU reg operation 4601 pipe_class ialu_reg(rRegI dst) %{ 4602 single_instruction; 4603 dst : S4(write); 4604 dst : S3(read); 4605 DECODE : S0; // any decoder 4606 ALU : S3; // any alu 4607 %} 4608 4609 // Long ALU reg operation 4610 pipe_class ialu_reg_long(eRegL dst) %{ 4611 instruction_count(2); 4612 dst : S4(write); 4613 dst : S3(read); 4614 DECODE : S0(2); // any 2 decoders 4615 ALU : S3(2); // both alus 4616 %} 4617 4618 // Integer ALU reg operation using big decoder 4619 pipe_class ialu_reg_fat(rRegI dst) %{ 4620 single_instruction; 4621 dst : S4(write); 4622 dst : S3(read); 4623 D0 : S0; // big decoder only 4624 ALU : S3; // any alu 4625 %} 4626 4627 // Long ALU reg operation using big decoder 4628 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4629 instruction_count(2); 4630 dst : S4(write); 4631 dst : S3(read); 4632 D0 : S0(2); // big decoder only; twice 4633 ALU : S3(2); // any 2 alus 4634 %} 4635 4636 // Integer ALU reg-reg operation 4637 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4638 single_instruction; 4639 dst : S4(write); 4640 src : S3(read); 4641 DECODE : S0; // any decoder 4642 ALU : S3; // any alu 4643 %} 4644 4645 // Long ALU reg-reg operation 4646 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4647 instruction_count(2); 4648 dst : S4(write); 4649 src : S3(read); 4650 DECODE : S0(2); // any 2 decoders 4651 ALU : S3(2); // both alus 4652 %} 4653 4654 // Integer ALU reg-reg operation 4655 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4656 single_instruction; 4657 dst : S4(write); 4658 src : S3(read); 4659 D0 : S0; // big decoder only 4660 ALU : S3; // any alu 4661 %} 4662 4663 // Long ALU reg-reg operation 4664 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4665 instruction_count(2); 4666 dst : S4(write); 4667 src : S3(read); 4668 D0 : S0(2); // big decoder only; twice 4669 ALU : S3(2); // both alus 4670 %} 4671 4672 // Integer ALU reg-mem operation 4673 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4674 single_instruction; 4675 dst : S5(write); 4676 mem : S3(read); 4677 D0 : S0; // big decoder only 4678 ALU : S4; // any alu 4679 MEM : S3; // any mem 4680 %} 4681 4682 // Long ALU reg-mem operation 4683 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4684 instruction_count(2); 4685 dst : S5(write); 4686 mem : S3(read); 4687 D0 : S0(2); // big decoder only; twice 4688 ALU : S4(2); // any 2 alus 4689 MEM : S3(2); // both mems 4690 %} 4691 4692 // Integer mem operation (prefetch) 4693 pipe_class ialu_mem(memory mem) 4694 %{ 4695 single_instruction; 4696 mem : S3(read); 4697 D0 : S0; // big decoder only 4698 MEM : S3; // any mem 4699 %} 4700 4701 // Integer Store to Memory 4702 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4703 single_instruction; 4704 mem : S3(read); 4705 src : S5(read); 4706 D0 : S0; // big decoder only 4707 ALU : S4; // any alu 4708 MEM : S3; 4709 %} 4710 4711 // Long Store to Memory 4712 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4713 instruction_count(2); 4714 mem : S3(read); 4715 src : S5(read); 4716 D0 : S0(2); // big decoder only; twice 4717 ALU : S4(2); // any 2 alus 4718 MEM : S3(2); // Both mems 4719 %} 4720 4721 // Integer Store to Memory 4722 pipe_class ialu_mem_imm(memory mem) %{ 4723 single_instruction; 4724 mem : S3(read); 4725 D0 : S0; // big decoder only 4726 ALU : S4; // any alu 4727 MEM : S3; 4728 %} 4729 4730 // Integer ALU0 reg-reg operation 4731 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4732 single_instruction; 4733 dst : S4(write); 4734 src : S3(read); 4735 D0 : S0; // Big decoder only 4736 ALU0 : S3; // only alu0 4737 %} 4738 4739 // Integer ALU0 reg-mem operation 4740 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4741 single_instruction; 4742 dst : S5(write); 4743 mem : S3(read); 4744 D0 : S0; // big decoder only 4745 ALU0 : S4; // ALU0 only 4746 MEM : S3; // any mem 4747 %} 4748 4749 // Integer ALU reg-reg operation 4750 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4751 single_instruction; 4752 cr : S4(write); 4753 src1 : S3(read); 4754 src2 : S3(read); 4755 DECODE : S0; // any decoder 4756 ALU : S3; // any alu 4757 %} 4758 4759 // Integer ALU reg-imm operation 4760 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4761 single_instruction; 4762 cr : S4(write); 4763 src1 : S3(read); 4764 DECODE : S0; // any decoder 4765 ALU : S3; // any alu 4766 %} 4767 4768 // Integer ALU reg-mem operation 4769 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4770 single_instruction; 4771 cr : S4(write); 4772 src1 : S3(read); 4773 src2 : S3(read); 4774 D0 : S0; // big decoder only 4775 ALU : S4; // any alu 4776 MEM : S3; 4777 %} 4778 4779 // Conditional move reg-reg 4780 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4781 instruction_count(4); 4782 y : S4(read); 4783 q : S3(read); 4784 p : S3(read); 4785 DECODE : S0(4); // any decoder 4786 %} 4787 4788 // Conditional move reg-reg 4789 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4790 single_instruction; 4791 dst : S4(write); 4792 src : S3(read); 4793 cr : S3(read); 4794 DECODE : S0; // any decoder 4795 %} 4796 4797 // Conditional move reg-mem 4798 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4799 single_instruction; 4800 dst : S4(write); 4801 src : S3(read); 4802 cr : S3(read); 4803 DECODE : S0; // any decoder 4804 MEM : S3; 4805 %} 4806 4807 // Conditional move reg-reg long 4808 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4809 single_instruction; 4810 dst : S4(write); 4811 src : S3(read); 4812 cr : S3(read); 4813 DECODE : S0(2); // any 2 decoders 4814 %} 4815 4816 // Conditional move double reg-reg 4817 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4818 single_instruction; 4819 dst : S4(write); 4820 src : S3(read); 4821 cr : S3(read); 4822 DECODE : S0; // any decoder 4823 %} 4824 4825 // Float reg-reg operation 4826 pipe_class fpu_reg(regDPR dst) %{ 4827 instruction_count(2); 4828 dst : S3(read); 4829 DECODE : S0(2); // any 2 decoders 4830 FPU : S3; 4831 %} 4832 4833 // Float reg-reg operation 4834 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4835 instruction_count(2); 4836 dst : S4(write); 4837 src : S3(read); 4838 DECODE : S0(2); // any 2 decoders 4839 FPU : S3; 4840 %} 4841 4842 // Float reg-reg operation 4843 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4844 instruction_count(3); 4845 dst : S4(write); 4846 src1 : S3(read); 4847 src2 : S3(read); 4848 DECODE : S0(3); // any 3 decoders 4849 FPU : S3(2); 4850 %} 4851 4852 // Float reg-reg operation 4853 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4854 instruction_count(4); 4855 dst : S4(write); 4856 src1 : S3(read); 4857 src2 : S3(read); 4858 src3 : S3(read); 4859 DECODE : S0(4); // any 3 decoders 4860 FPU : S3(2); 4861 %} 4862 4863 // Float reg-reg operation 4864 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4865 instruction_count(4); 4866 dst : S4(write); 4867 src1 : S3(read); 4868 src2 : S3(read); 4869 src3 : S3(read); 4870 DECODE : S1(3); // any 3 decoders 4871 D0 : S0; // Big decoder only 4872 FPU : S3(2); 4873 MEM : S3; 4874 %} 4875 4876 // Float reg-mem operation 4877 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4878 instruction_count(2); 4879 dst : S5(write); 4880 mem : S3(read); 4881 D0 : S0; // big decoder only 4882 DECODE : S1; // any decoder for FPU POP 4883 FPU : S4; 4884 MEM : S3; // any mem 4885 %} 4886 4887 // Float reg-mem operation 4888 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4889 instruction_count(3); 4890 dst : S5(write); 4891 src1 : S3(read); 4892 mem : S3(read); 4893 D0 : S0; // big decoder only 4894 DECODE : S1(2); // any decoder for FPU POP 4895 FPU : S4; 4896 MEM : S3; // any mem 4897 %} 4898 4899 // Float mem-reg operation 4900 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4901 instruction_count(2); 4902 src : S5(read); 4903 mem : S3(read); 4904 DECODE : S0; // any decoder for FPU PUSH 4905 D0 : S1; // big decoder only 4906 FPU : S4; 4907 MEM : S3; // any mem 4908 %} 4909 4910 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4911 instruction_count(3); 4912 src1 : S3(read); 4913 src2 : S3(read); 4914 mem : S3(read); 4915 DECODE : S0(2); // any decoder for FPU PUSH 4916 D0 : S1; // big decoder only 4917 FPU : S4; 4918 MEM : S3; // any mem 4919 %} 4920 4921 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4922 instruction_count(3); 4923 src1 : S3(read); 4924 src2 : S3(read); 4925 mem : S4(read); 4926 DECODE : S0; // any decoder for FPU PUSH 4927 D0 : S0(2); // big decoder only 4928 FPU : S4; 4929 MEM : S3(2); // any mem 4930 %} 4931 4932 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4933 instruction_count(2); 4934 src1 : S3(read); 4935 dst : S4(read); 4936 D0 : S0(2); // big decoder only 4937 MEM : S3(2); // any mem 4938 %} 4939 4940 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4941 instruction_count(3); 4942 src1 : S3(read); 4943 src2 : S3(read); 4944 dst : S4(read); 4945 D0 : S0(3); // big decoder only 4946 FPU : S4; 4947 MEM : S3(3); // any mem 4948 %} 4949 4950 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4951 instruction_count(3); 4952 src1 : S4(read); 4953 mem : S4(read); 4954 DECODE : S0; // any decoder for FPU PUSH 4955 D0 : S0(2); // big decoder only 4956 FPU : S4; 4957 MEM : S3(2); // any mem 4958 %} 4959 4960 // Float load constant 4961 pipe_class fpu_reg_con(regDPR dst) %{ 4962 instruction_count(2); 4963 dst : S5(write); 4964 D0 : S0; // big decoder only for the load 4965 DECODE : S1; // any decoder for FPU POP 4966 FPU : S4; 4967 MEM : S3; // any mem 4968 %} 4969 4970 // Float load constant 4971 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4972 instruction_count(3); 4973 dst : S5(write); 4974 src : S3(read); 4975 D0 : S0; // big decoder only for the load 4976 DECODE : S1(2); // any decoder for FPU POP 4977 FPU : S4; 4978 MEM : S3; // any mem 4979 %} 4980 4981 // UnConditional branch 4982 pipe_class pipe_jmp( label labl ) %{ 4983 single_instruction; 4984 BR : S3; 4985 %} 4986 4987 // Conditional branch 4988 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4989 single_instruction; 4990 cr : S1(read); 4991 BR : S3; 4992 %} 4993 4994 // Allocation idiom 4995 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4996 instruction_count(1); force_serialization; 4997 fixed_latency(6); 4998 heap_ptr : S3(read); 4999 DECODE : S0(3); 5000 D0 : S2; 5001 MEM : S3; 5002 ALU : S3(2); 5003 dst : S5(write); 5004 BR : S5; 5005 %} 5006 5007 // Generic big/slow expanded idiom 5008 pipe_class pipe_slow( ) %{ 5009 instruction_count(10); multiple_bundles; force_serialization; 5010 fixed_latency(100); 5011 D0 : S0(2); 5012 MEM : S3(2); 5013 %} 5014 5015 // The real do-nothing guy 5016 pipe_class empty( ) %{ 5017 instruction_count(0); 5018 %} 5019 5020 // Define the class for the Nop node 5021 define %{ 5022 MachNop = empty; 5023 %} 5024 5025 %} 5026 5027 //----------INSTRUCTIONS------------------------------------------------------- 5028 // 5029 // match -- States which machine-independent subtree may be replaced 5030 // by this instruction. 5031 // ins_cost -- The estimated cost of this instruction is used by instruction 5032 // selection to identify a minimum cost tree of machine 5033 // instructions that matches a tree of machine-independent 5034 // instructions. 5035 // format -- A string providing the disassembly for this instruction. 5036 // The value of an instruction's operand may be inserted 5037 // by referring to it with a '$' prefix. 5038 // opcode -- Three instruction opcodes may be provided. These are referred 5039 // to within an encode class as $primary, $secondary, and $tertiary 5040 // respectively. The primary opcode is commonly used to 5041 // indicate the type of machine instruction, while secondary 5042 // and tertiary are often used for prefix options or addressing 5043 // modes. 5044 // ins_encode -- A list of encode classes with parameters. The encode class 5045 // name must have been defined in an 'enc_class' specification 5046 // in the encode section of the architecture description. 5047 5048 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 5049 // Load Float 5050 instruct MoveF2LEG(legRegF dst, regF src) %{ 5051 match(Set dst src); 5052 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5053 ins_encode %{ 5054 ShouldNotReachHere(); 5055 %} 5056 ins_pipe( fpu_reg_reg ); 5057 %} 5058 5059 // Load Float 5060 instruct MoveLEG2F(regF dst, legRegF src) %{ 5061 match(Set dst src); 5062 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5063 ins_encode %{ 5064 ShouldNotReachHere(); 5065 %} 5066 ins_pipe( fpu_reg_reg ); 5067 %} 5068 5069 // Load Float 5070 instruct MoveF2VL(vlRegF dst, regF src) %{ 5071 match(Set dst src); 5072 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5073 ins_encode %{ 5074 ShouldNotReachHere(); 5075 %} 5076 ins_pipe( fpu_reg_reg ); 5077 %} 5078 5079 // Load Float 5080 instruct MoveVL2F(regF dst, vlRegF src) %{ 5081 match(Set dst src); 5082 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5083 ins_encode %{ 5084 ShouldNotReachHere(); 5085 %} 5086 ins_pipe( fpu_reg_reg ); 5087 %} 5088 5089 5090 5091 // Load Double 5092 instruct MoveD2LEG(legRegD dst, regD src) %{ 5093 match(Set dst src); 5094 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5095 ins_encode %{ 5096 ShouldNotReachHere(); 5097 %} 5098 ins_pipe( fpu_reg_reg ); 5099 %} 5100 5101 // Load Double 5102 instruct MoveLEG2D(regD dst, legRegD src) %{ 5103 match(Set dst src); 5104 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5105 ins_encode %{ 5106 ShouldNotReachHere(); 5107 %} 5108 ins_pipe( fpu_reg_reg ); 5109 %} 5110 5111 // Load Double 5112 instruct MoveD2VL(vlRegD dst, regD src) %{ 5113 match(Set dst src); 5114 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5115 ins_encode %{ 5116 ShouldNotReachHere(); 5117 %} 5118 ins_pipe( fpu_reg_reg ); 5119 %} 5120 5121 // Load Double 5122 instruct MoveVL2D(regD dst, vlRegD src) %{ 5123 match(Set dst src); 5124 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5125 ins_encode %{ 5126 ShouldNotReachHere(); 5127 %} 5128 ins_pipe( fpu_reg_reg ); 5129 %} 5130 5131 //----------BSWAP-Instruction-------------------------------------------------- 5132 instruct bytes_reverse_int(rRegI dst) %{ 5133 match(Set dst (ReverseBytesI dst)); 5134 5135 format %{ "BSWAP $dst" %} 5136 opcode(0x0F, 0xC8); 5137 ins_encode( OpcP, OpcSReg(dst) ); 5138 ins_pipe( ialu_reg ); 5139 %} 5140 5141 instruct bytes_reverse_long(eRegL dst) %{ 5142 match(Set dst (ReverseBytesL dst)); 5143 5144 format %{ "BSWAP $dst.lo\n\t" 5145 "BSWAP $dst.hi\n\t" 5146 "XCHG $dst.lo $dst.hi" %} 5147 5148 ins_cost(125); 5149 ins_encode( bswap_long_bytes(dst) ); 5150 ins_pipe( ialu_reg_reg); 5151 %} 5152 5153 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5154 match(Set dst (ReverseBytesUS dst)); 5155 effect(KILL cr); 5156 5157 format %{ "BSWAP $dst\n\t" 5158 "SHR $dst,16\n\t" %} 5159 ins_encode %{ 5160 __ bswapl($dst$$Register); 5161 __ shrl($dst$$Register, 16); 5162 %} 5163 ins_pipe( ialu_reg ); 5164 %} 5165 5166 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5167 match(Set dst (ReverseBytesS dst)); 5168 effect(KILL cr); 5169 5170 format %{ "BSWAP $dst\n\t" 5171 "SAR $dst,16\n\t" %} 5172 ins_encode %{ 5173 __ bswapl($dst$$Register); 5174 __ sarl($dst$$Register, 16); 5175 %} 5176 ins_pipe( ialu_reg ); 5177 %} 5178 5179 5180 //---------- Zeros Count Instructions ------------------------------------------ 5181 5182 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5183 predicate(UseCountLeadingZerosInstruction); 5184 match(Set dst (CountLeadingZerosI src)); 5185 effect(KILL cr); 5186 5187 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5188 ins_encode %{ 5189 __ lzcntl($dst$$Register, $src$$Register); 5190 %} 5191 ins_pipe(ialu_reg); 5192 %} 5193 5194 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5195 predicate(!UseCountLeadingZerosInstruction); 5196 match(Set dst (CountLeadingZerosI src)); 5197 effect(KILL cr); 5198 5199 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5200 "JNZ skip\n\t" 5201 "MOV $dst, -1\n" 5202 "skip:\n\t" 5203 "NEG $dst\n\t" 5204 "ADD $dst, 31" %} 5205 ins_encode %{ 5206 Register Rdst = $dst$$Register; 5207 Register Rsrc = $src$$Register; 5208 Label skip; 5209 __ bsrl(Rdst, Rsrc); 5210 __ jccb(Assembler::notZero, skip); 5211 __ movl(Rdst, -1); 5212 __ bind(skip); 5213 __ negl(Rdst); 5214 __ addl(Rdst, BitsPerInt - 1); 5215 %} 5216 ins_pipe(ialu_reg); 5217 %} 5218 5219 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5220 predicate(UseCountLeadingZerosInstruction); 5221 match(Set dst (CountLeadingZerosL src)); 5222 effect(TEMP dst, KILL cr); 5223 5224 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5225 "JNC done\n\t" 5226 "LZCNT $dst, $src.lo\n\t" 5227 "ADD $dst, 32\n" 5228 "done:" %} 5229 ins_encode %{ 5230 Register Rdst = $dst$$Register; 5231 Register Rsrc = $src$$Register; 5232 Label done; 5233 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5234 __ jccb(Assembler::carryClear, done); 5235 __ lzcntl(Rdst, Rsrc); 5236 __ addl(Rdst, BitsPerInt); 5237 __ bind(done); 5238 %} 5239 ins_pipe(ialu_reg); 5240 %} 5241 5242 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5243 predicate(!UseCountLeadingZerosInstruction); 5244 match(Set dst (CountLeadingZerosL src)); 5245 effect(TEMP dst, KILL cr); 5246 5247 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5248 "JZ msw_is_zero\n\t" 5249 "ADD $dst, 32\n\t" 5250 "JMP not_zero\n" 5251 "msw_is_zero:\n\t" 5252 "BSR $dst, $src.lo\n\t" 5253 "JNZ not_zero\n\t" 5254 "MOV $dst, -1\n" 5255 "not_zero:\n\t" 5256 "NEG $dst\n\t" 5257 "ADD $dst, 63\n" %} 5258 ins_encode %{ 5259 Register Rdst = $dst$$Register; 5260 Register Rsrc = $src$$Register; 5261 Label msw_is_zero; 5262 Label not_zero; 5263 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5264 __ jccb(Assembler::zero, msw_is_zero); 5265 __ addl(Rdst, BitsPerInt); 5266 __ jmpb(not_zero); 5267 __ bind(msw_is_zero); 5268 __ bsrl(Rdst, Rsrc); 5269 __ jccb(Assembler::notZero, not_zero); 5270 __ movl(Rdst, -1); 5271 __ bind(not_zero); 5272 __ negl(Rdst); 5273 __ addl(Rdst, BitsPerLong - 1); 5274 %} 5275 ins_pipe(ialu_reg); 5276 %} 5277 5278 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5279 predicate(UseCountTrailingZerosInstruction); 5280 match(Set dst (CountTrailingZerosI src)); 5281 effect(KILL cr); 5282 5283 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5284 ins_encode %{ 5285 __ tzcntl($dst$$Register, $src$$Register); 5286 %} 5287 ins_pipe(ialu_reg); 5288 %} 5289 5290 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5291 predicate(!UseCountTrailingZerosInstruction); 5292 match(Set dst (CountTrailingZerosI src)); 5293 effect(KILL cr); 5294 5295 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5296 "JNZ done\n\t" 5297 "MOV $dst, 32\n" 5298 "done:" %} 5299 ins_encode %{ 5300 Register Rdst = $dst$$Register; 5301 Label done; 5302 __ bsfl(Rdst, $src$$Register); 5303 __ jccb(Assembler::notZero, done); 5304 __ movl(Rdst, BitsPerInt); 5305 __ bind(done); 5306 %} 5307 ins_pipe(ialu_reg); 5308 %} 5309 5310 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5311 predicate(UseCountTrailingZerosInstruction); 5312 match(Set dst (CountTrailingZerosL src)); 5313 effect(TEMP dst, KILL cr); 5314 5315 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5316 "JNC done\n\t" 5317 "TZCNT $dst, $src.hi\n\t" 5318 "ADD $dst, 32\n" 5319 "done:" %} 5320 ins_encode %{ 5321 Register Rdst = $dst$$Register; 5322 Register Rsrc = $src$$Register; 5323 Label done; 5324 __ tzcntl(Rdst, Rsrc); 5325 __ jccb(Assembler::carryClear, done); 5326 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5327 __ addl(Rdst, BitsPerInt); 5328 __ bind(done); 5329 %} 5330 ins_pipe(ialu_reg); 5331 %} 5332 5333 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5334 predicate(!UseCountTrailingZerosInstruction); 5335 match(Set dst (CountTrailingZerosL src)); 5336 effect(TEMP dst, KILL cr); 5337 5338 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5339 "JNZ done\n\t" 5340 "BSF $dst, $src.hi\n\t" 5341 "JNZ msw_not_zero\n\t" 5342 "MOV $dst, 32\n" 5343 "msw_not_zero:\n\t" 5344 "ADD $dst, 32\n" 5345 "done:" %} 5346 ins_encode %{ 5347 Register Rdst = $dst$$Register; 5348 Register Rsrc = $src$$Register; 5349 Label msw_not_zero; 5350 Label done; 5351 __ bsfl(Rdst, Rsrc); 5352 __ jccb(Assembler::notZero, done); 5353 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5354 __ jccb(Assembler::notZero, msw_not_zero); 5355 __ movl(Rdst, BitsPerInt); 5356 __ bind(msw_not_zero); 5357 __ addl(Rdst, BitsPerInt); 5358 __ bind(done); 5359 %} 5360 ins_pipe(ialu_reg); 5361 %} 5362 5363 5364 //---------- Population Count Instructions ------------------------------------- 5365 5366 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5367 predicate(UsePopCountInstruction); 5368 match(Set dst (PopCountI src)); 5369 effect(KILL cr); 5370 5371 format %{ "POPCNT $dst, $src" %} 5372 ins_encode %{ 5373 __ popcntl($dst$$Register, $src$$Register); 5374 %} 5375 ins_pipe(ialu_reg); 5376 %} 5377 5378 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5379 predicate(UsePopCountInstruction); 5380 match(Set dst (PopCountI (LoadI mem))); 5381 effect(KILL cr); 5382 5383 format %{ "POPCNT $dst, $mem" %} 5384 ins_encode %{ 5385 __ popcntl($dst$$Register, $mem$$Address); 5386 %} 5387 ins_pipe(ialu_reg); 5388 %} 5389 5390 // Note: Long.bitCount(long) returns an int. 5391 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5392 predicate(UsePopCountInstruction); 5393 match(Set dst (PopCountL src)); 5394 effect(KILL cr, TEMP tmp, TEMP dst); 5395 5396 format %{ "POPCNT $dst, $src.lo\n\t" 5397 "POPCNT $tmp, $src.hi\n\t" 5398 "ADD $dst, $tmp" %} 5399 ins_encode %{ 5400 __ popcntl($dst$$Register, $src$$Register); 5401 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5402 __ addl($dst$$Register, $tmp$$Register); 5403 %} 5404 ins_pipe(ialu_reg); 5405 %} 5406 5407 // Note: Long.bitCount(long) returns an int. 5408 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5409 predicate(UsePopCountInstruction); 5410 match(Set dst (PopCountL (LoadL mem))); 5411 effect(KILL cr, TEMP tmp, TEMP dst); 5412 5413 format %{ "POPCNT $dst, $mem\n\t" 5414 "POPCNT $tmp, $mem+4\n\t" 5415 "ADD $dst, $tmp" %} 5416 ins_encode %{ 5417 //__ popcntl($dst$$Register, $mem$$Address$$first); 5418 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5419 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5420 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5421 __ addl($dst$$Register, $tmp$$Register); 5422 %} 5423 ins_pipe(ialu_reg); 5424 %} 5425 5426 5427 //----------Load/Store/Move Instructions--------------------------------------- 5428 //----------Load Instructions-------------------------------------------------- 5429 // Load Byte (8bit signed) 5430 instruct loadB(xRegI dst, memory mem) %{ 5431 match(Set dst (LoadB mem)); 5432 5433 ins_cost(125); 5434 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5435 5436 ins_encode %{ 5437 __ movsbl($dst$$Register, $mem$$Address); 5438 %} 5439 5440 ins_pipe(ialu_reg_mem); 5441 %} 5442 5443 // Load Byte (8bit signed) into Long Register 5444 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5445 match(Set dst (ConvI2L (LoadB mem))); 5446 effect(KILL cr); 5447 5448 ins_cost(375); 5449 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5450 "MOV $dst.hi,$dst.lo\n\t" 5451 "SAR $dst.hi,7" %} 5452 5453 ins_encode %{ 5454 __ movsbl($dst$$Register, $mem$$Address); 5455 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5456 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5457 %} 5458 5459 ins_pipe(ialu_reg_mem); 5460 %} 5461 5462 // Load Unsigned Byte (8bit UNsigned) 5463 instruct loadUB(xRegI dst, memory mem) %{ 5464 match(Set dst (LoadUB mem)); 5465 5466 ins_cost(125); 5467 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5468 5469 ins_encode %{ 5470 __ movzbl($dst$$Register, $mem$$Address); 5471 %} 5472 5473 ins_pipe(ialu_reg_mem); 5474 %} 5475 5476 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5477 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5478 match(Set dst (ConvI2L (LoadUB mem))); 5479 effect(KILL cr); 5480 5481 ins_cost(250); 5482 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5483 "XOR $dst.hi,$dst.hi" %} 5484 5485 ins_encode %{ 5486 Register Rdst = $dst$$Register; 5487 __ movzbl(Rdst, $mem$$Address); 5488 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5489 %} 5490 5491 ins_pipe(ialu_reg_mem); 5492 %} 5493 5494 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5495 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5496 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5497 effect(KILL cr); 5498 5499 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5500 "XOR $dst.hi,$dst.hi\n\t" 5501 "AND $dst.lo,right_n_bits($mask, 8)" %} 5502 ins_encode %{ 5503 Register Rdst = $dst$$Register; 5504 __ movzbl(Rdst, $mem$$Address); 5505 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5506 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5507 %} 5508 ins_pipe(ialu_reg_mem); 5509 %} 5510 5511 // Load Short (16bit signed) 5512 instruct loadS(rRegI dst, memory mem) %{ 5513 match(Set dst (LoadS mem)); 5514 5515 ins_cost(125); 5516 format %{ "MOVSX $dst,$mem\t# short" %} 5517 5518 ins_encode %{ 5519 __ movswl($dst$$Register, $mem$$Address); 5520 %} 5521 5522 ins_pipe(ialu_reg_mem); 5523 %} 5524 5525 // Load Short (16 bit signed) to Byte (8 bit signed) 5526 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5527 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5528 5529 ins_cost(125); 5530 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5531 ins_encode %{ 5532 __ movsbl($dst$$Register, $mem$$Address); 5533 %} 5534 ins_pipe(ialu_reg_mem); 5535 %} 5536 5537 // Load Short (16bit signed) into Long Register 5538 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5539 match(Set dst (ConvI2L (LoadS mem))); 5540 effect(KILL cr); 5541 5542 ins_cost(375); 5543 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5544 "MOV $dst.hi,$dst.lo\n\t" 5545 "SAR $dst.hi,15" %} 5546 5547 ins_encode %{ 5548 __ movswl($dst$$Register, $mem$$Address); 5549 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5550 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5551 %} 5552 5553 ins_pipe(ialu_reg_mem); 5554 %} 5555 5556 // Load Unsigned Short/Char (16bit unsigned) 5557 instruct loadUS(rRegI dst, memory mem) %{ 5558 match(Set dst (LoadUS mem)); 5559 5560 ins_cost(125); 5561 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5562 5563 ins_encode %{ 5564 __ movzwl($dst$$Register, $mem$$Address); 5565 %} 5566 5567 ins_pipe(ialu_reg_mem); 5568 %} 5569 5570 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5571 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5572 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5573 5574 ins_cost(125); 5575 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5576 ins_encode %{ 5577 __ movsbl($dst$$Register, $mem$$Address); 5578 %} 5579 ins_pipe(ialu_reg_mem); 5580 %} 5581 5582 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5583 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5584 match(Set dst (ConvI2L (LoadUS mem))); 5585 effect(KILL cr); 5586 5587 ins_cost(250); 5588 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5589 "XOR $dst.hi,$dst.hi" %} 5590 5591 ins_encode %{ 5592 __ movzwl($dst$$Register, $mem$$Address); 5593 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5594 %} 5595 5596 ins_pipe(ialu_reg_mem); 5597 %} 5598 5599 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5600 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5601 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5602 effect(KILL cr); 5603 5604 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5605 "XOR $dst.hi,$dst.hi" %} 5606 ins_encode %{ 5607 Register Rdst = $dst$$Register; 5608 __ movzbl(Rdst, $mem$$Address); 5609 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5610 %} 5611 ins_pipe(ialu_reg_mem); 5612 %} 5613 5614 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5615 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5616 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5617 effect(KILL cr); 5618 5619 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5620 "XOR $dst.hi,$dst.hi\n\t" 5621 "AND $dst.lo,right_n_bits($mask, 16)" %} 5622 ins_encode %{ 5623 Register Rdst = $dst$$Register; 5624 __ movzwl(Rdst, $mem$$Address); 5625 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5626 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5627 %} 5628 ins_pipe(ialu_reg_mem); 5629 %} 5630 5631 // Load Integer 5632 instruct loadI(rRegI dst, memory mem) %{ 5633 match(Set dst (LoadI mem)); 5634 5635 ins_cost(125); 5636 format %{ "MOV $dst,$mem\t# int" %} 5637 5638 ins_encode %{ 5639 __ movl($dst$$Register, $mem$$Address); 5640 %} 5641 5642 ins_pipe(ialu_reg_mem); 5643 %} 5644 5645 // Load Integer (32 bit signed) to Byte (8 bit signed) 5646 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5647 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5648 5649 ins_cost(125); 5650 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5651 ins_encode %{ 5652 __ movsbl($dst$$Register, $mem$$Address); 5653 %} 5654 ins_pipe(ialu_reg_mem); 5655 %} 5656 5657 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5658 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5659 match(Set dst (AndI (LoadI mem) mask)); 5660 5661 ins_cost(125); 5662 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5663 ins_encode %{ 5664 __ movzbl($dst$$Register, $mem$$Address); 5665 %} 5666 ins_pipe(ialu_reg_mem); 5667 %} 5668 5669 // Load Integer (32 bit signed) to Short (16 bit signed) 5670 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5671 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5672 5673 ins_cost(125); 5674 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5675 ins_encode %{ 5676 __ movswl($dst$$Register, $mem$$Address); 5677 %} 5678 ins_pipe(ialu_reg_mem); 5679 %} 5680 5681 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5682 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5683 match(Set dst (AndI (LoadI mem) mask)); 5684 5685 ins_cost(125); 5686 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5687 ins_encode %{ 5688 __ movzwl($dst$$Register, $mem$$Address); 5689 %} 5690 ins_pipe(ialu_reg_mem); 5691 %} 5692 5693 // Load Integer into Long Register 5694 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5695 match(Set dst (ConvI2L (LoadI mem))); 5696 effect(KILL cr); 5697 5698 ins_cost(375); 5699 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5700 "MOV $dst.hi,$dst.lo\n\t" 5701 "SAR $dst.hi,31" %} 5702 5703 ins_encode %{ 5704 __ movl($dst$$Register, $mem$$Address); 5705 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5706 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5707 %} 5708 5709 ins_pipe(ialu_reg_mem); 5710 %} 5711 5712 // Load Integer with mask 0xFF into Long Register 5713 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5714 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5715 effect(KILL cr); 5716 5717 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5718 "XOR $dst.hi,$dst.hi" %} 5719 ins_encode %{ 5720 Register Rdst = $dst$$Register; 5721 __ movzbl(Rdst, $mem$$Address); 5722 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5723 %} 5724 ins_pipe(ialu_reg_mem); 5725 %} 5726 5727 // Load Integer with mask 0xFFFF into Long Register 5728 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5729 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5730 effect(KILL cr); 5731 5732 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5733 "XOR $dst.hi,$dst.hi" %} 5734 ins_encode %{ 5735 Register Rdst = $dst$$Register; 5736 __ movzwl(Rdst, $mem$$Address); 5737 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5738 %} 5739 ins_pipe(ialu_reg_mem); 5740 %} 5741 5742 // Load Integer with 31-bit mask into Long Register 5743 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5744 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5745 effect(KILL cr); 5746 5747 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5748 "XOR $dst.hi,$dst.hi\n\t" 5749 "AND $dst.lo,$mask" %} 5750 ins_encode %{ 5751 Register Rdst = $dst$$Register; 5752 __ movl(Rdst, $mem$$Address); 5753 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5754 __ andl(Rdst, $mask$$constant); 5755 %} 5756 ins_pipe(ialu_reg_mem); 5757 %} 5758 5759 // Load Unsigned Integer into Long Register 5760 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5761 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5762 effect(KILL cr); 5763 5764 ins_cost(250); 5765 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5766 "XOR $dst.hi,$dst.hi" %} 5767 5768 ins_encode %{ 5769 __ movl($dst$$Register, $mem$$Address); 5770 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5771 %} 5772 5773 ins_pipe(ialu_reg_mem); 5774 %} 5775 5776 // Load Long. Cannot clobber address while loading, so restrict address 5777 // register to ESI 5778 instruct loadL(eRegL dst, load_long_memory mem) %{ 5779 predicate(!((LoadLNode*)n)->require_atomic_access()); 5780 match(Set dst (LoadL mem)); 5781 5782 ins_cost(250); 5783 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5784 "MOV $dst.hi,$mem+4" %} 5785 5786 ins_encode %{ 5787 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5788 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5789 __ movl($dst$$Register, Amemlo); 5790 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5791 %} 5792 5793 ins_pipe(ialu_reg_long_mem); 5794 %} 5795 5796 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5797 // then store it down to the stack and reload on the int 5798 // side. 5799 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5800 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5801 match(Set dst (LoadL mem)); 5802 5803 ins_cost(200); 5804 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5805 "FISTp $dst" %} 5806 ins_encode(enc_loadL_volatile(mem,dst)); 5807 ins_pipe( fpu_reg_mem ); 5808 %} 5809 5810 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5811 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5812 match(Set dst (LoadL mem)); 5813 effect(TEMP tmp); 5814 ins_cost(180); 5815 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5816 "MOVSD $dst,$tmp" %} 5817 ins_encode %{ 5818 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5819 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5820 %} 5821 ins_pipe( pipe_slow ); 5822 %} 5823 5824 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5825 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5826 match(Set dst (LoadL mem)); 5827 effect(TEMP tmp); 5828 ins_cost(160); 5829 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5830 "MOVD $dst.lo,$tmp\n\t" 5831 "PSRLQ $tmp,32\n\t" 5832 "MOVD $dst.hi,$tmp" %} 5833 ins_encode %{ 5834 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5835 __ movdl($dst$$Register, $tmp$$XMMRegister); 5836 __ psrlq($tmp$$XMMRegister, 32); 5837 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5838 %} 5839 ins_pipe( pipe_slow ); 5840 %} 5841 5842 // Load Range 5843 instruct loadRange(rRegI dst, memory mem) %{ 5844 match(Set dst (LoadRange mem)); 5845 5846 ins_cost(125); 5847 format %{ "MOV $dst,$mem" %} 5848 opcode(0x8B); 5849 ins_encode( OpcP, RegMem(dst,mem)); 5850 ins_pipe( ialu_reg_mem ); 5851 %} 5852 5853 5854 // Load Pointer 5855 instruct loadP(eRegP dst, memory mem) %{ 5856 match(Set dst (LoadP mem)); 5857 5858 ins_cost(125); 5859 format %{ "MOV $dst,$mem" %} 5860 opcode(0x8B); 5861 ins_encode( OpcP, RegMem(dst,mem)); 5862 ins_pipe( ialu_reg_mem ); 5863 %} 5864 5865 // Load Klass Pointer 5866 instruct loadKlass(eRegP dst, memory mem) %{ 5867 match(Set dst (LoadKlass mem)); 5868 5869 ins_cost(125); 5870 format %{ "MOV $dst,$mem" %} 5871 opcode(0x8B); 5872 ins_encode( OpcP, RegMem(dst,mem)); 5873 ins_pipe( ialu_reg_mem ); 5874 %} 5875 5876 // Load Double 5877 instruct loadDPR(regDPR dst, memory mem) %{ 5878 predicate(UseSSE<=1); 5879 match(Set dst (LoadD mem)); 5880 5881 ins_cost(150); 5882 format %{ "FLD_D ST,$mem\n\t" 5883 "FSTP $dst" %} 5884 opcode(0xDD); /* DD /0 */ 5885 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5886 Pop_Reg_DPR(dst) ); 5887 ins_pipe( fpu_reg_mem ); 5888 %} 5889 5890 // Load Double to XMM 5891 instruct loadD(regD dst, memory mem) %{ 5892 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5893 match(Set dst (LoadD mem)); 5894 ins_cost(145); 5895 format %{ "MOVSD $dst,$mem" %} 5896 ins_encode %{ 5897 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5898 %} 5899 ins_pipe( pipe_slow ); 5900 %} 5901 5902 instruct loadD_partial(regD dst, memory mem) %{ 5903 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5904 match(Set dst (LoadD mem)); 5905 ins_cost(145); 5906 format %{ "MOVLPD $dst,$mem" %} 5907 ins_encode %{ 5908 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5909 %} 5910 ins_pipe( pipe_slow ); 5911 %} 5912 5913 // Load to XMM register (single-precision floating point) 5914 // MOVSS instruction 5915 instruct loadF(regF dst, memory mem) %{ 5916 predicate(UseSSE>=1); 5917 match(Set dst (LoadF mem)); 5918 ins_cost(145); 5919 format %{ "MOVSS $dst,$mem" %} 5920 ins_encode %{ 5921 __ movflt ($dst$$XMMRegister, $mem$$Address); 5922 %} 5923 ins_pipe( pipe_slow ); 5924 %} 5925 5926 // Load Float 5927 instruct loadFPR(regFPR dst, memory mem) %{ 5928 predicate(UseSSE==0); 5929 match(Set dst (LoadF mem)); 5930 5931 ins_cost(150); 5932 format %{ "FLD_S ST,$mem\n\t" 5933 "FSTP $dst" %} 5934 opcode(0xD9); /* D9 /0 */ 5935 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5936 Pop_Reg_FPR(dst) ); 5937 ins_pipe( fpu_reg_mem ); 5938 %} 5939 5940 // Load Effective Address 5941 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5942 match(Set dst mem); 5943 5944 ins_cost(110); 5945 format %{ "LEA $dst,$mem" %} 5946 opcode(0x8D); 5947 ins_encode( OpcP, RegMem(dst,mem)); 5948 ins_pipe( ialu_reg_reg_fat ); 5949 %} 5950 5951 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5952 match(Set dst mem); 5953 5954 ins_cost(110); 5955 format %{ "LEA $dst,$mem" %} 5956 opcode(0x8D); 5957 ins_encode( OpcP, RegMem(dst,mem)); 5958 ins_pipe( ialu_reg_reg_fat ); 5959 %} 5960 5961 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5962 match(Set dst mem); 5963 5964 ins_cost(110); 5965 format %{ "LEA $dst,$mem" %} 5966 opcode(0x8D); 5967 ins_encode( OpcP, RegMem(dst,mem)); 5968 ins_pipe( ialu_reg_reg_fat ); 5969 %} 5970 5971 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5972 match(Set dst mem); 5973 5974 ins_cost(110); 5975 format %{ "LEA $dst,$mem" %} 5976 opcode(0x8D); 5977 ins_encode( OpcP, RegMem(dst,mem)); 5978 ins_pipe( ialu_reg_reg_fat ); 5979 %} 5980 5981 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5982 match(Set dst mem); 5983 5984 ins_cost(110); 5985 format %{ "LEA $dst,$mem" %} 5986 opcode(0x8D); 5987 ins_encode( OpcP, RegMem(dst,mem)); 5988 ins_pipe( ialu_reg_reg_fat ); 5989 %} 5990 5991 // Load Constant 5992 instruct loadConI(rRegI dst, immI src) %{ 5993 match(Set dst src); 5994 5995 format %{ "MOV $dst,$src" %} 5996 ins_encode( LdImmI(dst, src) ); 5997 ins_pipe( ialu_reg_fat ); 5998 %} 5999 6000 // Load Constant zero 6001 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 6002 match(Set dst src); 6003 effect(KILL cr); 6004 6005 ins_cost(50); 6006 format %{ "XOR $dst,$dst" %} 6007 opcode(0x33); /* + rd */ 6008 ins_encode( OpcP, RegReg( dst, dst ) ); 6009 ins_pipe( ialu_reg ); 6010 %} 6011 6012 instruct loadConP(eRegP dst, immP src) %{ 6013 match(Set dst src); 6014 6015 format %{ "MOV $dst,$src" %} 6016 opcode(0xB8); /* + rd */ 6017 ins_encode( LdImmP(dst, src) ); 6018 ins_pipe( ialu_reg_fat ); 6019 %} 6020 6021 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6022 match(Set dst src); 6023 effect(KILL cr); 6024 ins_cost(200); 6025 format %{ "MOV $dst.lo,$src.lo\n\t" 6026 "MOV $dst.hi,$src.hi" %} 6027 opcode(0xB8); 6028 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6029 ins_pipe( ialu_reg_long_fat ); 6030 %} 6031 6032 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6033 match(Set dst src); 6034 effect(KILL cr); 6035 ins_cost(150); 6036 format %{ "XOR $dst.lo,$dst.lo\n\t" 6037 "XOR $dst.hi,$dst.hi" %} 6038 opcode(0x33,0x33); 6039 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6040 ins_pipe( ialu_reg_long ); 6041 %} 6042 6043 // The instruction usage is guarded by predicate in operand immFPR(). 6044 instruct loadConFPR(regFPR dst, immFPR con) %{ 6045 match(Set dst con); 6046 ins_cost(125); 6047 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6048 "FSTP $dst" %} 6049 ins_encode %{ 6050 __ fld_s($constantaddress($con)); 6051 __ fstp_d($dst$$reg); 6052 %} 6053 ins_pipe(fpu_reg_con); 6054 %} 6055 6056 // The instruction usage is guarded by predicate in operand immFPR0(). 6057 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6058 match(Set dst con); 6059 ins_cost(125); 6060 format %{ "FLDZ ST\n\t" 6061 "FSTP $dst" %} 6062 ins_encode %{ 6063 __ fldz(); 6064 __ fstp_d($dst$$reg); 6065 %} 6066 ins_pipe(fpu_reg_con); 6067 %} 6068 6069 // The instruction usage is guarded by predicate in operand immFPR1(). 6070 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6071 match(Set dst con); 6072 ins_cost(125); 6073 format %{ "FLD1 ST\n\t" 6074 "FSTP $dst" %} 6075 ins_encode %{ 6076 __ fld1(); 6077 __ fstp_d($dst$$reg); 6078 %} 6079 ins_pipe(fpu_reg_con); 6080 %} 6081 6082 // The instruction usage is guarded by predicate in operand immF(). 6083 instruct loadConF(regF dst, immF con) %{ 6084 match(Set dst con); 6085 ins_cost(125); 6086 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6087 ins_encode %{ 6088 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6089 %} 6090 ins_pipe(pipe_slow); 6091 %} 6092 6093 // The instruction usage is guarded by predicate in operand immF0(). 6094 instruct loadConF0(regF dst, immF0 src) %{ 6095 match(Set dst src); 6096 ins_cost(100); 6097 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6098 ins_encode %{ 6099 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6100 %} 6101 ins_pipe(pipe_slow); 6102 %} 6103 6104 // The instruction usage is guarded by predicate in operand immDPR(). 6105 instruct loadConDPR(regDPR dst, immDPR con) %{ 6106 match(Set dst con); 6107 ins_cost(125); 6108 6109 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6110 "FSTP $dst" %} 6111 ins_encode %{ 6112 __ fld_d($constantaddress($con)); 6113 __ fstp_d($dst$$reg); 6114 %} 6115 ins_pipe(fpu_reg_con); 6116 %} 6117 6118 // The instruction usage is guarded by predicate in operand immDPR0(). 6119 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6120 match(Set dst con); 6121 ins_cost(125); 6122 6123 format %{ "FLDZ ST\n\t" 6124 "FSTP $dst" %} 6125 ins_encode %{ 6126 __ fldz(); 6127 __ fstp_d($dst$$reg); 6128 %} 6129 ins_pipe(fpu_reg_con); 6130 %} 6131 6132 // The instruction usage is guarded by predicate in operand immDPR1(). 6133 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6134 match(Set dst con); 6135 ins_cost(125); 6136 6137 format %{ "FLD1 ST\n\t" 6138 "FSTP $dst" %} 6139 ins_encode %{ 6140 __ fld1(); 6141 __ fstp_d($dst$$reg); 6142 %} 6143 ins_pipe(fpu_reg_con); 6144 %} 6145 6146 // The instruction usage is guarded by predicate in operand immD(). 6147 instruct loadConD(regD dst, immD con) %{ 6148 match(Set dst con); 6149 ins_cost(125); 6150 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6151 ins_encode %{ 6152 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6153 %} 6154 ins_pipe(pipe_slow); 6155 %} 6156 6157 // The instruction usage is guarded by predicate in operand immD0(). 6158 instruct loadConD0(regD dst, immD0 src) %{ 6159 match(Set dst src); 6160 ins_cost(100); 6161 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6162 ins_encode %{ 6163 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6164 %} 6165 ins_pipe( pipe_slow ); 6166 %} 6167 6168 // Load Stack Slot 6169 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6170 match(Set dst src); 6171 ins_cost(125); 6172 6173 format %{ "MOV $dst,$src" %} 6174 opcode(0x8B); 6175 ins_encode( OpcP, RegMem(dst,src)); 6176 ins_pipe( ialu_reg_mem ); 6177 %} 6178 6179 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6180 match(Set dst src); 6181 6182 ins_cost(200); 6183 format %{ "MOV $dst,$src.lo\n\t" 6184 "MOV $dst+4,$src.hi" %} 6185 opcode(0x8B, 0x8B); 6186 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6187 ins_pipe( ialu_mem_long_reg ); 6188 %} 6189 6190 // Load Stack Slot 6191 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6192 match(Set dst src); 6193 ins_cost(125); 6194 6195 format %{ "MOV $dst,$src" %} 6196 opcode(0x8B); 6197 ins_encode( OpcP, RegMem(dst,src)); 6198 ins_pipe( ialu_reg_mem ); 6199 %} 6200 6201 // Load Stack Slot 6202 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6203 match(Set dst src); 6204 ins_cost(125); 6205 6206 format %{ "FLD_S $src\n\t" 6207 "FSTP $dst" %} 6208 opcode(0xD9); /* D9 /0, FLD m32real */ 6209 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6210 Pop_Reg_FPR(dst) ); 6211 ins_pipe( fpu_reg_mem ); 6212 %} 6213 6214 // Load Stack Slot 6215 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6216 match(Set dst src); 6217 ins_cost(125); 6218 6219 format %{ "FLD_D $src\n\t" 6220 "FSTP $dst" %} 6221 opcode(0xDD); /* DD /0, FLD m64real */ 6222 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6223 Pop_Reg_DPR(dst) ); 6224 ins_pipe( fpu_reg_mem ); 6225 %} 6226 6227 // Prefetch instructions for allocation. 6228 // Must be safe to execute with invalid address (cannot fault). 6229 6230 instruct prefetchAlloc0( memory mem ) %{ 6231 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6232 match(PrefetchAllocation mem); 6233 ins_cost(0); 6234 size(0); 6235 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6236 ins_encode(); 6237 ins_pipe(empty); 6238 %} 6239 6240 instruct prefetchAlloc( memory mem ) %{ 6241 predicate(AllocatePrefetchInstr==3); 6242 match( PrefetchAllocation mem ); 6243 ins_cost(100); 6244 6245 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6246 ins_encode %{ 6247 __ prefetchw($mem$$Address); 6248 %} 6249 ins_pipe(ialu_mem); 6250 %} 6251 6252 instruct prefetchAllocNTA( memory mem ) %{ 6253 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6254 match(PrefetchAllocation mem); 6255 ins_cost(100); 6256 6257 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6258 ins_encode %{ 6259 __ prefetchnta($mem$$Address); 6260 %} 6261 ins_pipe(ialu_mem); 6262 %} 6263 6264 instruct prefetchAllocT0( memory mem ) %{ 6265 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6266 match(PrefetchAllocation mem); 6267 ins_cost(100); 6268 6269 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6270 ins_encode %{ 6271 __ prefetcht0($mem$$Address); 6272 %} 6273 ins_pipe(ialu_mem); 6274 %} 6275 6276 instruct prefetchAllocT2( memory mem ) %{ 6277 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6278 match(PrefetchAllocation mem); 6279 ins_cost(100); 6280 6281 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6282 ins_encode %{ 6283 __ prefetcht2($mem$$Address); 6284 %} 6285 ins_pipe(ialu_mem); 6286 %} 6287 6288 //----------Store Instructions------------------------------------------------- 6289 6290 // Store Byte 6291 instruct storeB(memory mem, xRegI src) %{ 6292 match(Set mem (StoreB mem src)); 6293 6294 ins_cost(125); 6295 format %{ "MOV8 $mem,$src" %} 6296 opcode(0x88); 6297 ins_encode( OpcP, RegMem( src, mem ) ); 6298 ins_pipe( ialu_mem_reg ); 6299 %} 6300 6301 // Store Char/Short 6302 instruct storeC(memory mem, rRegI src) %{ 6303 match(Set mem (StoreC mem src)); 6304 6305 ins_cost(125); 6306 format %{ "MOV16 $mem,$src" %} 6307 opcode(0x89, 0x66); 6308 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6309 ins_pipe( ialu_mem_reg ); 6310 %} 6311 6312 // Store Integer 6313 instruct storeI(memory mem, rRegI src) %{ 6314 match(Set mem (StoreI mem src)); 6315 6316 ins_cost(125); 6317 format %{ "MOV $mem,$src" %} 6318 opcode(0x89); 6319 ins_encode( OpcP, RegMem( src, mem ) ); 6320 ins_pipe( ialu_mem_reg ); 6321 %} 6322 6323 // Store Long 6324 instruct storeL(long_memory mem, eRegL src) %{ 6325 predicate(!((StoreLNode*)n)->require_atomic_access()); 6326 match(Set mem (StoreL mem src)); 6327 6328 ins_cost(200); 6329 format %{ "MOV $mem,$src.lo\n\t" 6330 "MOV $mem+4,$src.hi" %} 6331 opcode(0x89, 0x89); 6332 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6333 ins_pipe( ialu_mem_long_reg ); 6334 %} 6335 6336 // Store Long to Integer 6337 instruct storeL2I(memory mem, eRegL src) %{ 6338 match(Set mem (StoreI mem (ConvL2I src))); 6339 6340 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6341 ins_encode %{ 6342 __ movl($mem$$Address, $src$$Register); 6343 %} 6344 ins_pipe(ialu_mem_reg); 6345 %} 6346 6347 // Volatile Store Long. Must be atomic, so move it into 6348 // the FP TOS and then do a 64-bit FIST. Has to probe the 6349 // target address before the store (for null-ptr checks) 6350 // so the memory operand is used twice in the encoding. 6351 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6352 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6353 match(Set mem (StoreL mem src)); 6354 effect( KILL cr ); 6355 ins_cost(400); 6356 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6357 "FILD $src\n\t" 6358 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6359 opcode(0x3B); 6360 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6361 ins_pipe( fpu_reg_mem ); 6362 %} 6363 6364 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6365 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6366 match(Set mem (StoreL mem src)); 6367 effect( TEMP tmp, KILL cr ); 6368 ins_cost(380); 6369 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6370 "MOVSD $tmp,$src\n\t" 6371 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6372 ins_encode %{ 6373 __ cmpl(rax, $mem$$Address); 6374 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6375 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6376 %} 6377 ins_pipe( pipe_slow ); 6378 %} 6379 6380 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6381 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6382 match(Set mem (StoreL mem src)); 6383 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6384 ins_cost(360); 6385 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6386 "MOVD $tmp,$src.lo\n\t" 6387 "MOVD $tmp2,$src.hi\n\t" 6388 "PUNPCKLDQ $tmp,$tmp2\n\t" 6389 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6390 ins_encode %{ 6391 __ cmpl(rax, $mem$$Address); 6392 __ movdl($tmp$$XMMRegister, $src$$Register); 6393 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6394 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6395 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6396 %} 6397 ins_pipe( pipe_slow ); 6398 %} 6399 6400 // Store Pointer; for storing unknown oops and raw pointers 6401 instruct storeP(memory mem, anyRegP src) %{ 6402 match(Set mem (StoreP mem src)); 6403 6404 ins_cost(125); 6405 format %{ "MOV $mem,$src" %} 6406 opcode(0x89); 6407 ins_encode( OpcP, RegMem( src, mem ) ); 6408 ins_pipe( ialu_mem_reg ); 6409 %} 6410 6411 // Store Integer Immediate 6412 instruct storeImmI(memory mem, immI src) %{ 6413 match(Set mem (StoreI mem src)); 6414 6415 ins_cost(150); 6416 format %{ "MOV $mem,$src" %} 6417 opcode(0xC7); /* C7 /0 */ 6418 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6419 ins_pipe( ialu_mem_imm ); 6420 %} 6421 6422 // Store Short/Char Immediate 6423 instruct storeImmI16(memory mem, immI16 src) %{ 6424 predicate(UseStoreImmI16); 6425 match(Set mem (StoreC mem src)); 6426 6427 ins_cost(150); 6428 format %{ "MOV16 $mem,$src" %} 6429 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6430 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6431 ins_pipe( ialu_mem_imm ); 6432 %} 6433 6434 // Store Pointer Immediate; null pointers or constant oops that do not 6435 // need card-mark barriers. 6436 instruct storeImmP(memory mem, immP src) %{ 6437 match(Set mem (StoreP mem src)); 6438 6439 ins_cost(150); 6440 format %{ "MOV $mem,$src" %} 6441 opcode(0xC7); /* C7 /0 */ 6442 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6443 ins_pipe( ialu_mem_imm ); 6444 %} 6445 6446 // Store Byte Immediate 6447 instruct storeImmB(memory mem, immI8 src) %{ 6448 match(Set mem (StoreB mem src)); 6449 6450 ins_cost(150); 6451 format %{ "MOV8 $mem,$src" %} 6452 opcode(0xC6); /* C6 /0 */ 6453 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6454 ins_pipe( ialu_mem_imm ); 6455 %} 6456 6457 // Store CMS card-mark Immediate 6458 instruct storeImmCM(memory mem, immI8 src) %{ 6459 match(Set mem (StoreCM mem src)); 6460 6461 ins_cost(150); 6462 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6463 opcode(0xC6); /* C6 /0 */ 6464 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6465 ins_pipe( ialu_mem_imm ); 6466 %} 6467 6468 // Store Double 6469 instruct storeDPR( memory mem, regDPR1 src) %{ 6470 predicate(UseSSE<=1); 6471 match(Set mem (StoreD mem src)); 6472 6473 ins_cost(100); 6474 format %{ "FST_D $mem,$src" %} 6475 opcode(0xDD); /* DD /2 */ 6476 ins_encode( enc_FPR_store(mem,src) ); 6477 ins_pipe( fpu_mem_reg ); 6478 %} 6479 6480 // Store double does rounding on x86 6481 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6482 predicate(UseSSE<=1); 6483 match(Set mem (StoreD mem (RoundDouble src))); 6484 6485 ins_cost(100); 6486 format %{ "FST_D $mem,$src\t# round" %} 6487 opcode(0xDD); /* DD /2 */ 6488 ins_encode( enc_FPR_store(mem,src) ); 6489 ins_pipe( fpu_mem_reg ); 6490 %} 6491 6492 // Store XMM register to memory (double-precision floating points) 6493 // MOVSD instruction 6494 instruct storeD(memory mem, regD src) %{ 6495 predicate(UseSSE>=2); 6496 match(Set mem (StoreD mem src)); 6497 ins_cost(95); 6498 format %{ "MOVSD $mem,$src" %} 6499 ins_encode %{ 6500 __ movdbl($mem$$Address, $src$$XMMRegister); 6501 %} 6502 ins_pipe( pipe_slow ); 6503 %} 6504 6505 // Store XMM register to memory (single-precision floating point) 6506 // MOVSS instruction 6507 instruct storeF(memory mem, regF src) %{ 6508 predicate(UseSSE>=1); 6509 match(Set mem (StoreF mem src)); 6510 ins_cost(95); 6511 format %{ "MOVSS $mem,$src" %} 6512 ins_encode %{ 6513 __ movflt($mem$$Address, $src$$XMMRegister); 6514 %} 6515 ins_pipe( pipe_slow ); 6516 %} 6517 6518 6519 // Store Float 6520 instruct storeFPR( memory mem, regFPR1 src) %{ 6521 predicate(UseSSE==0); 6522 match(Set mem (StoreF mem src)); 6523 6524 ins_cost(100); 6525 format %{ "FST_S $mem,$src" %} 6526 opcode(0xD9); /* D9 /2 */ 6527 ins_encode( enc_FPR_store(mem,src) ); 6528 ins_pipe( fpu_mem_reg ); 6529 %} 6530 6531 // Store Float does rounding on x86 6532 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6533 predicate(UseSSE==0); 6534 match(Set mem (StoreF mem (RoundFloat src))); 6535 6536 ins_cost(100); 6537 format %{ "FST_S $mem,$src\t# round" %} 6538 opcode(0xD9); /* D9 /2 */ 6539 ins_encode( enc_FPR_store(mem,src) ); 6540 ins_pipe( fpu_mem_reg ); 6541 %} 6542 6543 // Store Float does rounding on x86 6544 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6545 predicate(UseSSE<=1); 6546 match(Set mem (StoreF mem (ConvD2F src))); 6547 6548 ins_cost(100); 6549 format %{ "FST_S $mem,$src\t# D-round" %} 6550 opcode(0xD9); /* D9 /2 */ 6551 ins_encode( enc_FPR_store(mem,src) ); 6552 ins_pipe( fpu_mem_reg ); 6553 %} 6554 6555 // Store immediate Float value (it is faster than store from FPU register) 6556 // The instruction usage is guarded by predicate in operand immFPR(). 6557 instruct storeFPR_imm( memory mem, immFPR src) %{ 6558 match(Set mem (StoreF mem src)); 6559 6560 ins_cost(50); 6561 format %{ "MOV $mem,$src\t# store float" %} 6562 opcode(0xC7); /* C7 /0 */ 6563 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6564 ins_pipe( ialu_mem_imm ); 6565 %} 6566 6567 // Store immediate Float value (it is faster than store from XMM register) 6568 // The instruction usage is guarded by predicate in operand immF(). 6569 instruct storeF_imm( memory mem, immF src) %{ 6570 match(Set mem (StoreF mem src)); 6571 6572 ins_cost(50); 6573 format %{ "MOV $mem,$src\t# store float" %} 6574 opcode(0xC7); /* C7 /0 */ 6575 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6576 ins_pipe( ialu_mem_imm ); 6577 %} 6578 6579 // Store Integer to stack slot 6580 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6581 match(Set dst src); 6582 6583 ins_cost(100); 6584 format %{ "MOV $dst,$src" %} 6585 opcode(0x89); 6586 ins_encode( OpcPRegSS( dst, src ) ); 6587 ins_pipe( ialu_mem_reg ); 6588 %} 6589 6590 // Store Integer to stack slot 6591 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6592 match(Set dst src); 6593 6594 ins_cost(100); 6595 format %{ "MOV $dst,$src" %} 6596 opcode(0x89); 6597 ins_encode( OpcPRegSS( dst, src ) ); 6598 ins_pipe( ialu_mem_reg ); 6599 %} 6600 6601 // Store Long to stack slot 6602 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6603 match(Set dst src); 6604 6605 ins_cost(200); 6606 format %{ "MOV $dst,$src.lo\n\t" 6607 "MOV $dst+4,$src.hi" %} 6608 opcode(0x89, 0x89); 6609 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6610 ins_pipe( ialu_mem_long_reg ); 6611 %} 6612 6613 //----------MemBar Instructions----------------------------------------------- 6614 // Memory barrier flavors 6615 6616 instruct membar_acquire() %{ 6617 match(MemBarAcquire); 6618 match(LoadFence); 6619 ins_cost(400); 6620 6621 size(0); 6622 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6623 ins_encode(); 6624 ins_pipe(empty); 6625 %} 6626 6627 instruct membar_acquire_lock() %{ 6628 match(MemBarAcquireLock); 6629 ins_cost(0); 6630 6631 size(0); 6632 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6633 ins_encode( ); 6634 ins_pipe(empty); 6635 %} 6636 6637 instruct membar_release() %{ 6638 match(MemBarRelease); 6639 match(StoreFence); 6640 ins_cost(400); 6641 6642 size(0); 6643 format %{ "MEMBAR-release ! (empty encoding)" %} 6644 ins_encode( ); 6645 ins_pipe(empty); 6646 %} 6647 6648 instruct membar_release_lock() %{ 6649 match(MemBarReleaseLock); 6650 ins_cost(0); 6651 6652 size(0); 6653 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6654 ins_encode( ); 6655 ins_pipe(empty); 6656 %} 6657 6658 instruct membar_volatile(eFlagsReg cr) %{ 6659 match(MemBarVolatile); 6660 effect(KILL cr); 6661 ins_cost(400); 6662 6663 format %{ 6664 $$template 6665 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6666 %} 6667 ins_encode %{ 6668 __ membar(Assembler::StoreLoad); 6669 %} 6670 ins_pipe(pipe_slow); 6671 %} 6672 6673 instruct unnecessary_membar_volatile() %{ 6674 match(MemBarVolatile); 6675 predicate(Matcher::post_store_load_barrier(n)); 6676 ins_cost(0); 6677 6678 size(0); 6679 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6680 ins_encode( ); 6681 ins_pipe(empty); 6682 %} 6683 6684 instruct membar_storestore() %{ 6685 match(MemBarStoreStore); 6686 match(StoreStoreFence); 6687 ins_cost(0); 6688 6689 size(0); 6690 format %{ "MEMBAR-storestore (empty encoding)" %} 6691 ins_encode( ); 6692 ins_pipe(empty); 6693 %} 6694 6695 //----------Move Instructions-------------------------------------------------- 6696 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6697 match(Set dst (CastX2P src)); 6698 format %{ "# X2P $dst, $src" %} 6699 ins_encode( /*empty encoding*/ ); 6700 ins_cost(0); 6701 ins_pipe(empty); 6702 %} 6703 6704 instruct castP2X(rRegI dst, eRegP src ) %{ 6705 match(Set dst (CastP2X src)); 6706 ins_cost(50); 6707 format %{ "MOV $dst, $src\t# CastP2X" %} 6708 ins_encode( enc_Copy( dst, src) ); 6709 ins_pipe( ialu_reg_reg ); 6710 %} 6711 6712 //----------Conditional Move--------------------------------------------------- 6713 // Conditional move 6714 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6715 predicate(!VM_Version::supports_cmov() ); 6716 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6717 ins_cost(200); 6718 format %{ "J$cop,us skip\t# signed cmove\n\t" 6719 "MOV $dst,$src\n" 6720 "skip:" %} 6721 ins_encode %{ 6722 Label Lskip; 6723 // Invert sense of branch from sense of CMOV 6724 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6725 __ movl($dst$$Register, $src$$Register); 6726 __ bind(Lskip); 6727 %} 6728 ins_pipe( pipe_cmov_reg ); 6729 %} 6730 6731 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6732 predicate(!VM_Version::supports_cmov() ); 6733 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6734 ins_cost(200); 6735 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6736 "MOV $dst,$src\n" 6737 "skip:" %} 6738 ins_encode %{ 6739 Label Lskip; 6740 // Invert sense of branch from sense of CMOV 6741 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6742 __ movl($dst$$Register, $src$$Register); 6743 __ bind(Lskip); 6744 %} 6745 ins_pipe( pipe_cmov_reg ); 6746 %} 6747 6748 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6749 predicate(VM_Version::supports_cmov() ); 6750 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6751 ins_cost(200); 6752 format %{ "CMOV$cop $dst,$src" %} 6753 opcode(0x0F,0x40); 6754 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6755 ins_pipe( pipe_cmov_reg ); 6756 %} 6757 6758 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6759 predicate(VM_Version::supports_cmov() ); 6760 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6761 ins_cost(200); 6762 format %{ "CMOV$cop $dst,$src" %} 6763 opcode(0x0F,0x40); 6764 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6765 ins_pipe( pipe_cmov_reg ); 6766 %} 6767 6768 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6769 predicate(VM_Version::supports_cmov() ); 6770 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6771 ins_cost(200); 6772 expand %{ 6773 cmovI_regU(cop, cr, dst, src); 6774 %} 6775 %} 6776 6777 // Conditional move 6778 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6779 predicate(VM_Version::supports_cmov() ); 6780 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6781 ins_cost(250); 6782 format %{ "CMOV$cop $dst,$src" %} 6783 opcode(0x0F,0x40); 6784 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6785 ins_pipe( pipe_cmov_mem ); 6786 %} 6787 6788 // Conditional move 6789 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6790 predicate(VM_Version::supports_cmov() ); 6791 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6792 ins_cost(250); 6793 format %{ "CMOV$cop $dst,$src" %} 6794 opcode(0x0F,0x40); 6795 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6796 ins_pipe( pipe_cmov_mem ); 6797 %} 6798 6799 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6800 predicate(VM_Version::supports_cmov() ); 6801 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6802 ins_cost(250); 6803 expand %{ 6804 cmovI_memU(cop, cr, dst, src); 6805 %} 6806 %} 6807 6808 // Conditional move 6809 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6810 predicate(VM_Version::supports_cmov() ); 6811 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6812 ins_cost(200); 6813 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6814 opcode(0x0F,0x40); 6815 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6816 ins_pipe( pipe_cmov_reg ); 6817 %} 6818 6819 // Conditional move (non-P6 version) 6820 // Note: a CMoveP is generated for stubs and native wrappers 6821 // regardless of whether we are on a P6, so we 6822 // emulate a cmov here 6823 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6824 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6825 ins_cost(300); 6826 format %{ "Jn$cop skip\n\t" 6827 "MOV $dst,$src\t# pointer\n" 6828 "skip:" %} 6829 opcode(0x8b); 6830 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6831 ins_pipe( pipe_cmov_reg ); 6832 %} 6833 6834 // Conditional move 6835 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6836 predicate(VM_Version::supports_cmov() ); 6837 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6838 ins_cost(200); 6839 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6840 opcode(0x0F,0x40); 6841 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6842 ins_pipe( pipe_cmov_reg ); 6843 %} 6844 6845 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6846 predicate(VM_Version::supports_cmov() ); 6847 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6848 ins_cost(200); 6849 expand %{ 6850 cmovP_regU(cop, cr, dst, src); 6851 %} 6852 %} 6853 6854 // DISABLED: Requires the ADLC to emit a bottom_type call that 6855 // correctly meets the two pointer arguments; one is an incoming 6856 // register but the other is a memory operand. ALSO appears to 6857 // be buggy with implicit null checks. 6858 // 6859 //// Conditional move 6860 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6861 // predicate(VM_Version::supports_cmov() ); 6862 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6863 // ins_cost(250); 6864 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6865 // opcode(0x0F,0x40); 6866 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6867 // ins_pipe( pipe_cmov_mem ); 6868 //%} 6869 // 6870 //// Conditional move 6871 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6872 // predicate(VM_Version::supports_cmov() ); 6873 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6874 // ins_cost(250); 6875 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6876 // opcode(0x0F,0x40); 6877 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6878 // ins_pipe( pipe_cmov_mem ); 6879 //%} 6880 6881 // Conditional move 6882 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6883 predicate(UseSSE<=1); 6884 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6885 ins_cost(200); 6886 format %{ "FCMOV$cop $dst,$src\t# double" %} 6887 opcode(0xDA); 6888 ins_encode( enc_cmov_dpr(cop,src) ); 6889 ins_pipe( pipe_cmovDPR_reg ); 6890 %} 6891 6892 // Conditional move 6893 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6894 predicate(UseSSE==0); 6895 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6896 ins_cost(200); 6897 format %{ "FCMOV$cop $dst,$src\t# float" %} 6898 opcode(0xDA); 6899 ins_encode( enc_cmov_dpr(cop,src) ); 6900 ins_pipe( pipe_cmovDPR_reg ); 6901 %} 6902 6903 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6904 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6905 predicate(UseSSE<=1); 6906 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6907 ins_cost(200); 6908 format %{ "Jn$cop skip\n\t" 6909 "MOV $dst,$src\t# double\n" 6910 "skip:" %} 6911 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6912 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6913 ins_pipe( pipe_cmovDPR_reg ); 6914 %} 6915 6916 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6917 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6918 predicate(UseSSE==0); 6919 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6920 ins_cost(200); 6921 format %{ "Jn$cop skip\n\t" 6922 "MOV $dst,$src\t# float\n" 6923 "skip:" %} 6924 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6925 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6926 ins_pipe( pipe_cmovDPR_reg ); 6927 %} 6928 6929 // No CMOVE with SSE/SSE2 6930 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6931 predicate (UseSSE>=1); 6932 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6933 ins_cost(200); 6934 format %{ "Jn$cop skip\n\t" 6935 "MOVSS $dst,$src\t# float\n" 6936 "skip:" %} 6937 ins_encode %{ 6938 Label skip; 6939 // Invert sense of branch from sense of CMOV 6940 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6941 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6942 __ bind(skip); 6943 %} 6944 ins_pipe( pipe_slow ); 6945 %} 6946 6947 // No CMOVE with SSE/SSE2 6948 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6949 predicate (UseSSE>=2); 6950 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6951 ins_cost(200); 6952 format %{ "Jn$cop skip\n\t" 6953 "MOVSD $dst,$src\t# float\n" 6954 "skip:" %} 6955 ins_encode %{ 6956 Label skip; 6957 // Invert sense of branch from sense of CMOV 6958 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6959 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6960 __ bind(skip); 6961 %} 6962 ins_pipe( pipe_slow ); 6963 %} 6964 6965 // unsigned version 6966 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6967 predicate (UseSSE>=1); 6968 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6969 ins_cost(200); 6970 format %{ "Jn$cop skip\n\t" 6971 "MOVSS $dst,$src\t# float\n" 6972 "skip:" %} 6973 ins_encode %{ 6974 Label skip; 6975 // Invert sense of branch from sense of CMOV 6976 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6977 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6978 __ bind(skip); 6979 %} 6980 ins_pipe( pipe_slow ); 6981 %} 6982 6983 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6984 predicate (UseSSE>=1); 6985 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6986 ins_cost(200); 6987 expand %{ 6988 fcmovF_regU(cop, cr, dst, src); 6989 %} 6990 %} 6991 6992 // unsigned version 6993 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6994 predicate (UseSSE>=2); 6995 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6996 ins_cost(200); 6997 format %{ "Jn$cop skip\n\t" 6998 "MOVSD $dst,$src\t# float\n" 6999 "skip:" %} 7000 ins_encode %{ 7001 Label skip; 7002 // Invert sense of branch from sense of CMOV 7003 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7004 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7005 __ bind(skip); 7006 %} 7007 ins_pipe( pipe_slow ); 7008 %} 7009 7010 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7011 predicate (UseSSE>=2); 7012 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7013 ins_cost(200); 7014 expand %{ 7015 fcmovD_regU(cop, cr, dst, src); 7016 %} 7017 %} 7018 7019 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7020 predicate(VM_Version::supports_cmov() ); 7021 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7022 ins_cost(200); 7023 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7024 "CMOV$cop $dst.hi,$src.hi" %} 7025 opcode(0x0F,0x40); 7026 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7027 ins_pipe( pipe_cmov_reg_long ); 7028 %} 7029 7030 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7031 predicate(VM_Version::supports_cmov() ); 7032 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7033 ins_cost(200); 7034 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7035 "CMOV$cop $dst.hi,$src.hi" %} 7036 opcode(0x0F,0x40); 7037 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7038 ins_pipe( pipe_cmov_reg_long ); 7039 %} 7040 7041 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7042 predicate(VM_Version::supports_cmov() ); 7043 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7044 ins_cost(200); 7045 expand %{ 7046 cmovL_regU(cop, cr, dst, src); 7047 %} 7048 %} 7049 7050 //----------Arithmetic Instructions-------------------------------------------- 7051 //----------Addition Instructions---------------------------------------------- 7052 7053 // Integer Addition Instructions 7054 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7055 match(Set dst (AddI dst src)); 7056 effect(KILL cr); 7057 7058 size(2); 7059 format %{ "ADD $dst,$src" %} 7060 opcode(0x03); 7061 ins_encode( OpcP, RegReg( dst, src) ); 7062 ins_pipe( ialu_reg_reg ); 7063 %} 7064 7065 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7066 match(Set dst (AddI dst src)); 7067 effect(KILL cr); 7068 7069 format %{ "ADD $dst,$src" %} 7070 opcode(0x81, 0x00); /* /0 id */ 7071 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7072 ins_pipe( ialu_reg ); 7073 %} 7074 7075 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7076 predicate(UseIncDec); 7077 match(Set dst (AddI dst src)); 7078 effect(KILL cr); 7079 7080 size(1); 7081 format %{ "INC $dst" %} 7082 opcode(0x40); /* */ 7083 ins_encode( Opc_plus( primary, dst ) ); 7084 ins_pipe( ialu_reg ); 7085 %} 7086 7087 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7088 match(Set dst (AddI src0 src1)); 7089 ins_cost(110); 7090 7091 format %{ "LEA $dst,[$src0 + $src1]" %} 7092 opcode(0x8D); /* 0x8D /r */ 7093 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7094 ins_pipe( ialu_reg_reg ); 7095 %} 7096 7097 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7098 match(Set dst (AddP src0 src1)); 7099 ins_cost(110); 7100 7101 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7102 opcode(0x8D); /* 0x8D /r */ 7103 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7104 ins_pipe( ialu_reg_reg ); 7105 %} 7106 7107 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7108 predicate(UseIncDec); 7109 match(Set dst (AddI dst src)); 7110 effect(KILL cr); 7111 7112 size(1); 7113 format %{ "DEC $dst" %} 7114 opcode(0x48); /* */ 7115 ins_encode( Opc_plus( primary, dst ) ); 7116 ins_pipe( ialu_reg ); 7117 %} 7118 7119 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7120 match(Set dst (AddP dst src)); 7121 effect(KILL cr); 7122 7123 size(2); 7124 format %{ "ADD $dst,$src" %} 7125 opcode(0x03); 7126 ins_encode( OpcP, RegReg( dst, src) ); 7127 ins_pipe( ialu_reg_reg ); 7128 %} 7129 7130 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7131 match(Set dst (AddP dst src)); 7132 effect(KILL cr); 7133 7134 format %{ "ADD $dst,$src" %} 7135 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7136 // ins_encode( RegImm( dst, src) ); 7137 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7138 ins_pipe( ialu_reg ); 7139 %} 7140 7141 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7142 match(Set dst (AddI dst (LoadI src))); 7143 effect(KILL cr); 7144 7145 ins_cost(150); 7146 format %{ "ADD $dst,$src" %} 7147 opcode(0x03); 7148 ins_encode( OpcP, RegMem( dst, src) ); 7149 ins_pipe( ialu_reg_mem ); 7150 %} 7151 7152 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7153 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7154 effect(KILL cr); 7155 7156 ins_cost(150); 7157 format %{ "ADD $dst,$src" %} 7158 opcode(0x01); /* Opcode 01 /r */ 7159 ins_encode( OpcP, RegMem( src, dst ) ); 7160 ins_pipe( ialu_mem_reg ); 7161 %} 7162 7163 // Add Memory with Immediate 7164 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7165 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7166 effect(KILL cr); 7167 7168 ins_cost(125); 7169 format %{ "ADD $dst,$src" %} 7170 opcode(0x81); /* Opcode 81 /0 id */ 7171 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7172 ins_pipe( ialu_mem_imm ); 7173 %} 7174 7175 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7176 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7177 effect(KILL cr); 7178 7179 ins_cost(125); 7180 format %{ "INC $dst" %} 7181 opcode(0xFF); /* Opcode FF /0 */ 7182 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7183 ins_pipe( ialu_mem_imm ); 7184 %} 7185 7186 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7187 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7188 effect(KILL cr); 7189 7190 ins_cost(125); 7191 format %{ "DEC $dst" %} 7192 opcode(0xFF); /* Opcode FF /1 */ 7193 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7194 ins_pipe( ialu_mem_imm ); 7195 %} 7196 7197 7198 instruct checkCastPP( eRegP dst ) %{ 7199 match(Set dst (CheckCastPP dst)); 7200 7201 size(0); 7202 format %{ "#checkcastPP of $dst" %} 7203 ins_encode( /*empty encoding*/ ); 7204 ins_pipe( empty ); 7205 %} 7206 7207 instruct castPP( eRegP dst ) %{ 7208 match(Set dst (CastPP dst)); 7209 format %{ "#castPP of $dst" %} 7210 ins_encode( /*empty encoding*/ ); 7211 ins_pipe( empty ); 7212 %} 7213 7214 instruct castII( rRegI dst ) %{ 7215 match(Set dst (CastII dst)); 7216 format %{ "#castII of $dst" %} 7217 ins_encode( /*empty encoding*/ ); 7218 ins_cost(0); 7219 ins_pipe( empty ); 7220 %} 7221 7222 instruct castLL( eRegL dst ) %{ 7223 match(Set dst (CastLL dst)); 7224 format %{ "#castLL of $dst" %} 7225 ins_encode( /*empty encoding*/ ); 7226 ins_cost(0); 7227 ins_pipe( empty ); 7228 %} 7229 7230 instruct castFF( regF dst ) %{ 7231 predicate(UseSSE >= 1); 7232 match(Set dst (CastFF dst)); 7233 format %{ "#castFF of $dst" %} 7234 ins_encode( /*empty encoding*/ ); 7235 ins_cost(0); 7236 ins_pipe( empty ); 7237 %} 7238 7239 instruct castDD( regD dst ) %{ 7240 predicate(UseSSE >= 2); 7241 match(Set dst (CastDD dst)); 7242 format %{ "#castDD of $dst" %} 7243 ins_encode( /*empty encoding*/ ); 7244 ins_cost(0); 7245 ins_pipe( empty ); 7246 %} 7247 7248 instruct castFF_PR( regFPR dst ) %{ 7249 predicate(UseSSE < 1); 7250 match(Set dst (CastFF dst)); 7251 format %{ "#castFF of $dst" %} 7252 ins_encode( /*empty encoding*/ ); 7253 ins_cost(0); 7254 ins_pipe( empty ); 7255 %} 7256 7257 instruct castDD_PR( regDPR dst ) %{ 7258 predicate(UseSSE < 2); 7259 match(Set dst (CastDD dst)); 7260 format %{ "#castDD of $dst" %} 7261 ins_encode( /*empty encoding*/ ); 7262 ins_cost(0); 7263 ins_pipe( empty ); 7264 %} 7265 7266 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7267 7268 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7269 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7270 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7271 effect(KILL cr, KILL oldval); 7272 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7273 "MOV $res,0\n\t" 7274 "JNE,s fail\n\t" 7275 "MOV $res,1\n" 7276 "fail:" %} 7277 ins_encode( enc_cmpxchg8(mem_ptr), 7278 enc_flags_ne_to_boolean(res) ); 7279 ins_pipe( pipe_cmpxchg ); 7280 %} 7281 7282 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7283 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7284 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7285 effect(KILL cr, KILL oldval); 7286 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7287 "MOV $res,0\n\t" 7288 "JNE,s fail\n\t" 7289 "MOV $res,1\n" 7290 "fail:" %} 7291 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7292 ins_pipe( pipe_cmpxchg ); 7293 %} 7294 7295 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7296 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7297 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7298 effect(KILL cr, KILL oldval); 7299 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7300 "MOV $res,0\n\t" 7301 "JNE,s fail\n\t" 7302 "MOV $res,1\n" 7303 "fail:" %} 7304 ins_encode( enc_cmpxchgb(mem_ptr), 7305 enc_flags_ne_to_boolean(res) ); 7306 ins_pipe( pipe_cmpxchg ); 7307 %} 7308 7309 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7310 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7311 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7312 effect(KILL cr, KILL oldval); 7313 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7314 "MOV $res,0\n\t" 7315 "JNE,s fail\n\t" 7316 "MOV $res,1\n" 7317 "fail:" %} 7318 ins_encode( enc_cmpxchgw(mem_ptr), 7319 enc_flags_ne_to_boolean(res) ); 7320 ins_pipe( pipe_cmpxchg ); 7321 %} 7322 7323 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7324 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7325 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7326 effect(KILL cr, KILL oldval); 7327 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7328 "MOV $res,0\n\t" 7329 "JNE,s fail\n\t" 7330 "MOV $res,1\n" 7331 "fail:" %} 7332 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7333 ins_pipe( pipe_cmpxchg ); 7334 %} 7335 7336 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7337 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7338 effect(KILL cr); 7339 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7340 ins_encode( enc_cmpxchg8(mem_ptr) ); 7341 ins_pipe( pipe_cmpxchg ); 7342 %} 7343 7344 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7345 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7346 effect(KILL cr); 7347 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7348 ins_encode( enc_cmpxchg(mem_ptr) ); 7349 ins_pipe( pipe_cmpxchg ); 7350 %} 7351 7352 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7353 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7354 effect(KILL cr); 7355 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7356 ins_encode( enc_cmpxchgb(mem_ptr) ); 7357 ins_pipe( pipe_cmpxchg ); 7358 %} 7359 7360 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7361 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7362 effect(KILL cr); 7363 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7364 ins_encode( enc_cmpxchgw(mem_ptr) ); 7365 ins_pipe( pipe_cmpxchg ); 7366 %} 7367 7368 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7369 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7370 effect(KILL cr); 7371 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7372 ins_encode( enc_cmpxchg(mem_ptr) ); 7373 ins_pipe( pipe_cmpxchg ); 7374 %} 7375 7376 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7377 predicate(n->as_LoadStore()->result_not_used()); 7378 match(Set dummy (GetAndAddB mem add)); 7379 effect(KILL cr); 7380 format %{ "ADDB [$mem],$add" %} 7381 ins_encode %{ 7382 __ lock(); 7383 __ addb($mem$$Address, $add$$constant); 7384 %} 7385 ins_pipe( pipe_cmpxchg ); 7386 %} 7387 7388 // Important to match to xRegI: only 8-bit regs. 7389 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7390 match(Set newval (GetAndAddB mem newval)); 7391 effect(KILL cr); 7392 format %{ "XADDB [$mem],$newval" %} 7393 ins_encode %{ 7394 __ lock(); 7395 __ xaddb($mem$$Address, $newval$$Register); 7396 %} 7397 ins_pipe( pipe_cmpxchg ); 7398 %} 7399 7400 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7401 predicate(n->as_LoadStore()->result_not_used()); 7402 match(Set dummy (GetAndAddS mem add)); 7403 effect(KILL cr); 7404 format %{ "ADDS [$mem],$add" %} 7405 ins_encode %{ 7406 __ lock(); 7407 __ addw($mem$$Address, $add$$constant); 7408 %} 7409 ins_pipe( pipe_cmpxchg ); 7410 %} 7411 7412 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7413 match(Set newval (GetAndAddS mem newval)); 7414 effect(KILL cr); 7415 format %{ "XADDS [$mem],$newval" %} 7416 ins_encode %{ 7417 __ lock(); 7418 __ xaddw($mem$$Address, $newval$$Register); 7419 %} 7420 ins_pipe( pipe_cmpxchg ); 7421 %} 7422 7423 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7424 predicate(n->as_LoadStore()->result_not_used()); 7425 match(Set dummy (GetAndAddI mem add)); 7426 effect(KILL cr); 7427 format %{ "ADDL [$mem],$add" %} 7428 ins_encode %{ 7429 __ lock(); 7430 __ addl($mem$$Address, $add$$constant); 7431 %} 7432 ins_pipe( pipe_cmpxchg ); 7433 %} 7434 7435 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7436 match(Set newval (GetAndAddI mem newval)); 7437 effect(KILL cr); 7438 format %{ "XADDL [$mem],$newval" %} 7439 ins_encode %{ 7440 __ lock(); 7441 __ xaddl($mem$$Address, $newval$$Register); 7442 %} 7443 ins_pipe( pipe_cmpxchg ); 7444 %} 7445 7446 // Important to match to xRegI: only 8-bit regs. 7447 instruct xchgB( memory mem, xRegI newval) %{ 7448 match(Set newval (GetAndSetB mem newval)); 7449 format %{ "XCHGB $newval,[$mem]" %} 7450 ins_encode %{ 7451 __ xchgb($newval$$Register, $mem$$Address); 7452 %} 7453 ins_pipe( pipe_cmpxchg ); 7454 %} 7455 7456 instruct xchgS( memory mem, rRegI newval) %{ 7457 match(Set newval (GetAndSetS mem newval)); 7458 format %{ "XCHGW $newval,[$mem]" %} 7459 ins_encode %{ 7460 __ xchgw($newval$$Register, $mem$$Address); 7461 %} 7462 ins_pipe( pipe_cmpxchg ); 7463 %} 7464 7465 instruct xchgI( memory mem, rRegI newval) %{ 7466 match(Set newval (GetAndSetI mem newval)); 7467 format %{ "XCHGL $newval,[$mem]" %} 7468 ins_encode %{ 7469 __ xchgl($newval$$Register, $mem$$Address); 7470 %} 7471 ins_pipe( pipe_cmpxchg ); 7472 %} 7473 7474 instruct xchgP( memory mem, pRegP newval) %{ 7475 match(Set newval (GetAndSetP mem newval)); 7476 format %{ "XCHGL $newval,[$mem]" %} 7477 ins_encode %{ 7478 __ xchgl($newval$$Register, $mem$$Address); 7479 %} 7480 ins_pipe( pipe_cmpxchg ); 7481 %} 7482 7483 //----------Subtraction Instructions------------------------------------------- 7484 7485 // Integer Subtraction Instructions 7486 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7487 match(Set dst (SubI dst src)); 7488 effect(KILL cr); 7489 7490 size(2); 7491 format %{ "SUB $dst,$src" %} 7492 opcode(0x2B); 7493 ins_encode( OpcP, RegReg( dst, src) ); 7494 ins_pipe( ialu_reg_reg ); 7495 %} 7496 7497 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7498 match(Set dst (SubI dst src)); 7499 effect(KILL cr); 7500 7501 format %{ "SUB $dst,$src" %} 7502 opcode(0x81,0x05); /* Opcode 81 /5 */ 7503 // ins_encode( RegImm( dst, src) ); 7504 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7505 ins_pipe( ialu_reg ); 7506 %} 7507 7508 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7509 match(Set dst (SubI dst (LoadI src))); 7510 effect(KILL cr); 7511 7512 ins_cost(150); 7513 format %{ "SUB $dst,$src" %} 7514 opcode(0x2B); 7515 ins_encode( OpcP, RegMem( dst, src) ); 7516 ins_pipe( ialu_reg_mem ); 7517 %} 7518 7519 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7520 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7521 effect(KILL cr); 7522 7523 ins_cost(150); 7524 format %{ "SUB $dst,$src" %} 7525 opcode(0x29); /* Opcode 29 /r */ 7526 ins_encode( OpcP, RegMem( src, dst ) ); 7527 ins_pipe( ialu_mem_reg ); 7528 %} 7529 7530 // Subtract from a pointer 7531 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7532 match(Set dst (AddP dst (SubI zero src))); 7533 effect(KILL cr); 7534 7535 size(2); 7536 format %{ "SUB $dst,$src" %} 7537 opcode(0x2B); 7538 ins_encode( OpcP, RegReg( dst, src) ); 7539 ins_pipe( ialu_reg_reg ); 7540 %} 7541 7542 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7543 match(Set dst (SubI zero dst)); 7544 effect(KILL cr); 7545 7546 size(2); 7547 format %{ "NEG $dst" %} 7548 opcode(0xF7,0x03); // Opcode F7 /3 7549 ins_encode( OpcP, RegOpc( dst ) ); 7550 ins_pipe( ialu_reg ); 7551 %} 7552 7553 //----------Multiplication/Division Instructions------------------------------- 7554 // Integer Multiplication Instructions 7555 // Multiply Register 7556 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7557 match(Set dst (MulI dst src)); 7558 effect(KILL cr); 7559 7560 size(3); 7561 ins_cost(300); 7562 format %{ "IMUL $dst,$src" %} 7563 opcode(0xAF, 0x0F); 7564 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7565 ins_pipe( ialu_reg_reg_alu0 ); 7566 %} 7567 7568 // Multiply 32-bit Immediate 7569 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7570 match(Set dst (MulI src imm)); 7571 effect(KILL cr); 7572 7573 ins_cost(300); 7574 format %{ "IMUL $dst,$src,$imm" %} 7575 opcode(0x69); /* 69 /r id */ 7576 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7577 ins_pipe( ialu_reg_reg_alu0 ); 7578 %} 7579 7580 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7581 match(Set dst src); 7582 effect(KILL cr); 7583 7584 // Note that this is artificially increased to make it more expensive than loadConL 7585 ins_cost(250); 7586 format %{ "MOV EAX,$src\t// low word only" %} 7587 opcode(0xB8); 7588 ins_encode( LdImmL_Lo(dst, src) ); 7589 ins_pipe( ialu_reg_fat ); 7590 %} 7591 7592 // Multiply by 32-bit Immediate, taking the shifted high order results 7593 // (special case for shift by 32) 7594 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7595 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7596 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7597 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7598 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7599 effect(USE src1, KILL cr); 7600 7601 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7602 ins_cost(0*100 + 1*400 - 150); 7603 format %{ "IMUL EDX:EAX,$src1" %} 7604 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7605 ins_pipe( pipe_slow ); 7606 %} 7607 7608 // Multiply by 32-bit Immediate, taking the shifted high order results 7609 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7610 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7611 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7612 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7613 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7614 effect(USE src1, KILL cr); 7615 7616 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7617 ins_cost(1*100 + 1*400 - 150); 7618 format %{ "IMUL EDX:EAX,$src1\n\t" 7619 "SAR EDX,$cnt-32" %} 7620 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7621 ins_pipe( pipe_slow ); 7622 %} 7623 7624 // Multiply Memory 32-bit Immediate 7625 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7626 match(Set dst (MulI (LoadI src) imm)); 7627 effect(KILL cr); 7628 7629 ins_cost(300); 7630 format %{ "IMUL $dst,$src,$imm" %} 7631 opcode(0x69); /* 69 /r id */ 7632 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7633 ins_pipe( ialu_reg_mem_alu0 ); 7634 %} 7635 7636 // Multiply Memory 7637 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7638 match(Set dst (MulI dst (LoadI src))); 7639 effect(KILL cr); 7640 7641 ins_cost(350); 7642 format %{ "IMUL $dst,$src" %} 7643 opcode(0xAF, 0x0F); 7644 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7645 ins_pipe( ialu_reg_mem_alu0 ); 7646 %} 7647 7648 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7649 %{ 7650 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7651 effect(KILL cr, KILL src2); 7652 7653 expand %{ mulI_eReg(dst, src1, cr); 7654 mulI_eReg(src2, src3, cr); 7655 addI_eReg(dst, src2, cr); %} 7656 %} 7657 7658 // Multiply Register Int to Long 7659 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7660 // Basic Idea: long = (long)int * (long)int 7661 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7662 effect(DEF dst, USE src, USE src1, KILL flags); 7663 7664 ins_cost(300); 7665 format %{ "IMUL $dst,$src1" %} 7666 7667 ins_encode( long_int_multiply( dst, src1 ) ); 7668 ins_pipe( ialu_reg_reg_alu0 ); 7669 %} 7670 7671 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7672 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7673 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7674 effect(KILL flags); 7675 7676 ins_cost(300); 7677 format %{ "MUL $dst,$src1" %} 7678 7679 ins_encode( long_uint_multiply(dst, src1) ); 7680 ins_pipe( ialu_reg_reg_alu0 ); 7681 %} 7682 7683 // Multiply Register Long 7684 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7685 match(Set dst (MulL dst src)); 7686 effect(KILL cr, TEMP tmp); 7687 ins_cost(4*100+3*400); 7688 // Basic idea: lo(result) = lo(x_lo * y_lo) 7689 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7690 format %{ "MOV $tmp,$src.lo\n\t" 7691 "IMUL $tmp,EDX\n\t" 7692 "MOV EDX,$src.hi\n\t" 7693 "IMUL EDX,EAX\n\t" 7694 "ADD $tmp,EDX\n\t" 7695 "MUL EDX:EAX,$src.lo\n\t" 7696 "ADD EDX,$tmp" %} 7697 ins_encode( long_multiply( dst, src, tmp ) ); 7698 ins_pipe( pipe_slow ); 7699 %} 7700 7701 // Multiply Register Long where the left operand's high 32 bits are zero 7702 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7703 predicate(is_operand_hi32_zero(n->in(1))); 7704 match(Set dst (MulL dst src)); 7705 effect(KILL cr, TEMP tmp); 7706 ins_cost(2*100+2*400); 7707 // Basic idea: lo(result) = lo(x_lo * y_lo) 7708 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7709 format %{ "MOV $tmp,$src.hi\n\t" 7710 "IMUL $tmp,EAX\n\t" 7711 "MUL EDX:EAX,$src.lo\n\t" 7712 "ADD EDX,$tmp" %} 7713 ins_encode %{ 7714 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7715 __ imull($tmp$$Register, rax); 7716 __ mull($src$$Register); 7717 __ addl(rdx, $tmp$$Register); 7718 %} 7719 ins_pipe( pipe_slow ); 7720 %} 7721 7722 // Multiply Register Long where the right operand's high 32 bits are zero 7723 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7724 predicate(is_operand_hi32_zero(n->in(2))); 7725 match(Set dst (MulL dst src)); 7726 effect(KILL cr, TEMP tmp); 7727 ins_cost(2*100+2*400); 7728 // Basic idea: lo(result) = lo(x_lo * y_lo) 7729 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7730 format %{ "MOV $tmp,$src.lo\n\t" 7731 "IMUL $tmp,EDX\n\t" 7732 "MUL EDX:EAX,$src.lo\n\t" 7733 "ADD EDX,$tmp" %} 7734 ins_encode %{ 7735 __ movl($tmp$$Register, $src$$Register); 7736 __ imull($tmp$$Register, rdx); 7737 __ mull($src$$Register); 7738 __ addl(rdx, $tmp$$Register); 7739 %} 7740 ins_pipe( pipe_slow ); 7741 %} 7742 7743 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7744 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7745 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7746 match(Set dst (MulL dst src)); 7747 effect(KILL cr); 7748 ins_cost(1*400); 7749 // Basic idea: lo(result) = lo(x_lo * y_lo) 7750 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7751 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7752 ins_encode %{ 7753 __ mull($src$$Register); 7754 %} 7755 ins_pipe( pipe_slow ); 7756 %} 7757 7758 // Multiply Register Long by small constant 7759 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7760 match(Set dst (MulL dst src)); 7761 effect(KILL cr, TEMP tmp); 7762 ins_cost(2*100+2*400); 7763 size(12); 7764 // Basic idea: lo(result) = lo(src * EAX) 7765 // hi(result) = hi(src * EAX) + lo(src * EDX) 7766 format %{ "IMUL $tmp,EDX,$src\n\t" 7767 "MOV EDX,$src\n\t" 7768 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7769 "ADD EDX,$tmp" %} 7770 ins_encode( long_multiply_con( dst, src, tmp ) ); 7771 ins_pipe( pipe_slow ); 7772 %} 7773 7774 // Integer DIV with Register 7775 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7776 match(Set rax (DivI rax div)); 7777 effect(KILL rdx, KILL cr); 7778 size(26); 7779 ins_cost(30*100+10*100); 7780 format %{ "CMP EAX,0x80000000\n\t" 7781 "JNE,s normal\n\t" 7782 "XOR EDX,EDX\n\t" 7783 "CMP ECX,-1\n\t" 7784 "JE,s done\n" 7785 "normal: CDQ\n\t" 7786 "IDIV $div\n\t" 7787 "done:" %} 7788 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7789 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7790 ins_pipe( ialu_reg_reg_alu0 ); 7791 %} 7792 7793 // Divide Register Long 7794 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7795 match(Set dst (DivL src1 src2)); 7796 effect(CALL); 7797 ins_cost(10000); 7798 format %{ "PUSH $src1.hi\n\t" 7799 "PUSH $src1.lo\n\t" 7800 "PUSH $src2.hi\n\t" 7801 "PUSH $src2.lo\n\t" 7802 "CALL SharedRuntime::ldiv\n\t" 7803 "ADD ESP,16" %} 7804 ins_encode( long_div(src1,src2) ); 7805 ins_pipe( pipe_slow ); 7806 %} 7807 7808 // Integer DIVMOD with Register, both quotient and mod results 7809 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7810 match(DivModI rax div); 7811 effect(KILL cr); 7812 size(26); 7813 ins_cost(30*100+10*100); 7814 format %{ "CMP EAX,0x80000000\n\t" 7815 "JNE,s normal\n\t" 7816 "XOR EDX,EDX\n\t" 7817 "CMP ECX,-1\n\t" 7818 "JE,s done\n" 7819 "normal: CDQ\n\t" 7820 "IDIV $div\n\t" 7821 "done:" %} 7822 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7823 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7824 ins_pipe( pipe_slow ); 7825 %} 7826 7827 // Integer MOD with Register 7828 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7829 match(Set rdx (ModI rax div)); 7830 effect(KILL rax, KILL cr); 7831 7832 size(26); 7833 ins_cost(300); 7834 format %{ "CDQ\n\t" 7835 "IDIV $div" %} 7836 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7837 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7838 ins_pipe( ialu_reg_reg_alu0 ); 7839 %} 7840 7841 // Remainder Register Long 7842 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7843 match(Set dst (ModL src1 src2)); 7844 effect(CALL); 7845 ins_cost(10000); 7846 format %{ "PUSH $src1.hi\n\t" 7847 "PUSH $src1.lo\n\t" 7848 "PUSH $src2.hi\n\t" 7849 "PUSH $src2.lo\n\t" 7850 "CALL SharedRuntime::lrem\n\t" 7851 "ADD ESP,16" %} 7852 ins_encode( long_mod(src1,src2) ); 7853 ins_pipe( pipe_slow ); 7854 %} 7855 7856 // Divide Register Long (no special case since divisor != -1) 7857 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7858 match(Set dst (DivL dst imm)); 7859 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7860 ins_cost(1000); 7861 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7862 "XOR $tmp2,$tmp2\n\t" 7863 "CMP $tmp,EDX\n\t" 7864 "JA,s fast\n\t" 7865 "MOV $tmp2,EAX\n\t" 7866 "MOV EAX,EDX\n\t" 7867 "MOV EDX,0\n\t" 7868 "JLE,s pos\n\t" 7869 "LNEG EAX : $tmp2\n\t" 7870 "DIV $tmp # unsigned division\n\t" 7871 "XCHG EAX,$tmp2\n\t" 7872 "DIV $tmp\n\t" 7873 "LNEG $tmp2 : EAX\n\t" 7874 "JMP,s done\n" 7875 "pos:\n\t" 7876 "DIV $tmp\n\t" 7877 "XCHG EAX,$tmp2\n" 7878 "fast:\n\t" 7879 "DIV $tmp\n" 7880 "done:\n\t" 7881 "MOV EDX,$tmp2\n\t" 7882 "NEG EDX:EAX # if $imm < 0" %} 7883 ins_encode %{ 7884 int con = (int)$imm$$constant; 7885 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7886 int pcon = (con > 0) ? con : -con; 7887 Label Lfast, Lpos, Ldone; 7888 7889 __ movl($tmp$$Register, pcon); 7890 __ xorl($tmp2$$Register,$tmp2$$Register); 7891 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7892 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7893 7894 __ movl($tmp2$$Register, $dst$$Register); // save 7895 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7896 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7897 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7898 7899 // Negative dividend. 7900 // convert value to positive to use unsigned division 7901 __ lneg($dst$$Register, $tmp2$$Register); 7902 __ divl($tmp$$Register); 7903 __ xchgl($dst$$Register, $tmp2$$Register); 7904 __ divl($tmp$$Register); 7905 // revert result back to negative 7906 __ lneg($tmp2$$Register, $dst$$Register); 7907 __ jmpb(Ldone); 7908 7909 __ bind(Lpos); 7910 __ divl($tmp$$Register); // Use unsigned division 7911 __ xchgl($dst$$Register, $tmp2$$Register); 7912 // Fallthrow for final divide, tmp2 has 32 bit hi result 7913 7914 __ bind(Lfast); 7915 // fast path: src is positive 7916 __ divl($tmp$$Register); // Use unsigned division 7917 7918 __ bind(Ldone); 7919 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7920 if (con < 0) { 7921 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7922 } 7923 %} 7924 ins_pipe( pipe_slow ); 7925 %} 7926 7927 // Remainder Register Long (remainder fit into 32 bits) 7928 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7929 match(Set dst (ModL dst imm)); 7930 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7931 ins_cost(1000); 7932 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7933 "CMP $tmp,EDX\n\t" 7934 "JA,s fast\n\t" 7935 "MOV $tmp2,EAX\n\t" 7936 "MOV EAX,EDX\n\t" 7937 "MOV EDX,0\n\t" 7938 "JLE,s pos\n\t" 7939 "LNEG EAX : $tmp2\n\t" 7940 "DIV $tmp # unsigned division\n\t" 7941 "MOV EAX,$tmp2\n\t" 7942 "DIV $tmp\n\t" 7943 "NEG EDX\n\t" 7944 "JMP,s done\n" 7945 "pos:\n\t" 7946 "DIV $tmp\n\t" 7947 "MOV EAX,$tmp2\n" 7948 "fast:\n\t" 7949 "DIV $tmp\n" 7950 "done:\n\t" 7951 "MOV EAX,EDX\n\t" 7952 "SAR EDX,31\n\t" %} 7953 ins_encode %{ 7954 int con = (int)$imm$$constant; 7955 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7956 int pcon = (con > 0) ? con : -con; 7957 Label Lfast, Lpos, Ldone; 7958 7959 __ movl($tmp$$Register, pcon); 7960 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7961 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7962 7963 __ movl($tmp2$$Register, $dst$$Register); // save 7964 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7965 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7966 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7967 7968 // Negative dividend. 7969 // convert value to positive to use unsigned division 7970 __ lneg($dst$$Register, $tmp2$$Register); 7971 __ divl($tmp$$Register); 7972 __ movl($dst$$Register, $tmp2$$Register); 7973 __ divl($tmp$$Register); 7974 // revert remainder back to negative 7975 __ negl(HIGH_FROM_LOW($dst$$Register)); 7976 __ jmpb(Ldone); 7977 7978 __ bind(Lpos); 7979 __ divl($tmp$$Register); 7980 __ movl($dst$$Register, $tmp2$$Register); 7981 7982 __ bind(Lfast); 7983 // fast path: src is positive 7984 __ divl($tmp$$Register); 7985 7986 __ bind(Ldone); 7987 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7988 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7989 7990 %} 7991 ins_pipe( pipe_slow ); 7992 %} 7993 7994 // Integer Shift Instructions 7995 // Shift Left by one 7996 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7997 match(Set dst (LShiftI dst shift)); 7998 effect(KILL cr); 7999 8000 size(2); 8001 format %{ "SHL $dst,$shift" %} 8002 opcode(0xD1, 0x4); /* D1 /4 */ 8003 ins_encode( OpcP, RegOpc( dst ) ); 8004 ins_pipe( ialu_reg ); 8005 %} 8006 8007 // Shift Left by 8-bit immediate 8008 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8009 match(Set dst (LShiftI dst shift)); 8010 effect(KILL cr); 8011 8012 size(3); 8013 format %{ "SHL $dst,$shift" %} 8014 opcode(0xC1, 0x4); /* C1 /4 ib */ 8015 ins_encode( RegOpcImm( dst, shift) ); 8016 ins_pipe( ialu_reg ); 8017 %} 8018 8019 // Shift Left by variable 8020 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8021 match(Set dst (LShiftI dst shift)); 8022 effect(KILL cr); 8023 8024 size(2); 8025 format %{ "SHL $dst,$shift" %} 8026 opcode(0xD3, 0x4); /* D3 /4 */ 8027 ins_encode( OpcP, RegOpc( dst ) ); 8028 ins_pipe( ialu_reg_reg ); 8029 %} 8030 8031 // Arithmetic shift right by one 8032 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8033 match(Set dst (RShiftI dst shift)); 8034 effect(KILL cr); 8035 8036 size(2); 8037 format %{ "SAR $dst,$shift" %} 8038 opcode(0xD1, 0x7); /* D1 /7 */ 8039 ins_encode( OpcP, RegOpc( dst ) ); 8040 ins_pipe( ialu_reg ); 8041 %} 8042 8043 // Arithmetic shift right by one 8044 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8045 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8046 effect(KILL cr); 8047 format %{ "SAR $dst,$shift" %} 8048 opcode(0xD1, 0x7); /* D1 /7 */ 8049 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8050 ins_pipe( ialu_mem_imm ); 8051 %} 8052 8053 // Arithmetic Shift Right by 8-bit immediate 8054 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8055 match(Set dst (RShiftI dst shift)); 8056 effect(KILL cr); 8057 8058 size(3); 8059 format %{ "SAR $dst,$shift" %} 8060 opcode(0xC1, 0x7); /* C1 /7 ib */ 8061 ins_encode( RegOpcImm( dst, shift ) ); 8062 ins_pipe( ialu_mem_imm ); 8063 %} 8064 8065 // Arithmetic Shift Right by 8-bit immediate 8066 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8067 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8068 effect(KILL cr); 8069 8070 format %{ "SAR $dst,$shift" %} 8071 opcode(0xC1, 0x7); /* C1 /7 ib */ 8072 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8073 ins_pipe( ialu_mem_imm ); 8074 %} 8075 8076 // Arithmetic Shift Right by variable 8077 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8078 match(Set dst (RShiftI dst shift)); 8079 effect(KILL cr); 8080 8081 size(2); 8082 format %{ "SAR $dst,$shift" %} 8083 opcode(0xD3, 0x7); /* D3 /7 */ 8084 ins_encode( OpcP, RegOpc( dst ) ); 8085 ins_pipe( ialu_reg_reg ); 8086 %} 8087 8088 // Logical shift right by one 8089 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8090 match(Set dst (URShiftI dst shift)); 8091 effect(KILL cr); 8092 8093 size(2); 8094 format %{ "SHR $dst,$shift" %} 8095 opcode(0xD1, 0x5); /* D1 /5 */ 8096 ins_encode( OpcP, RegOpc( dst ) ); 8097 ins_pipe( ialu_reg ); 8098 %} 8099 8100 // Logical Shift Right by 8-bit immediate 8101 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8102 match(Set dst (URShiftI dst shift)); 8103 effect(KILL cr); 8104 8105 size(3); 8106 format %{ "SHR $dst,$shift" %} 8107 opcode(0xC1, 0x5); /* C1 /5 ib */ 8108 ins_encode( RegOpcImm( dst, shift) ); 8109 ins_pipe( ialu_reg ); 8110 %} 8111 8112 8113 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8114 // This idiom is used by the compiler for the i2b bytecode. 8115 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8116 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8117 8118 size(3); 8119 format %{ "MOVSX $dst,$src :8" %} 8120 ins_encode %{ 8121 __ movsbl($dst$$Register, $src$$Register); 8122 %} 8123 ins_pipe(ialu_reg_reg); 8124 %} 8125 8126 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8127 // This idiom is used by the compiler the i2s bytecode. 8128 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8129 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8130 8131 size(3); 8132 format %{ "MOVSX $dst,$src :16" %} 8133 ins_encode %{ 8134 __ movswl($dst$$Register, $src$$Register); 8135 %} 8136 ins_pipe(ialu_reg_reg); 8137 %} 8138 8139 8140 // Logical Shift Right by variable 8141 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8142 match(Set dst (URShiftI dst shift)); 8143 effect(KILL cr); 8144 8145 size(2); 8146 format %{ "SHR $dst,$shift" %} 8147 opcode(0xD3, 0x5); /* D3 /5 */ 8148 ins_encode( OpcP, RegOpc( dst ) ); 8149 ins_pipe( ialu_reg_reg ); 8150 %} 8151 8152 8153 //----------Logical Instructions----------------------------------------------- 8154 //----------Integer Logical Instructions--------------------------------------- 8155 // And Instructions 8156 // And Register with Register 8157 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8158 match(Set dst (AndI dst src)); 8159 effect(KILL cr); 8160 8161 size(2); 8162 format %{ "AND $dst,$src" %} 8163 opcode(0x23); 8164 ins_encode( OpcP, RegReg( dst, src) ); 8165 ins_pipe( ialu_reg_reg ); 8166 %} 8167 8168 // And Register with Immediate 8169 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8170 match(Set dst (AndI dst src)); 8171 effect(KILL cr); 8172 8173 format %{ "AND $dst,$src" %} 8174 opcode(0x81,0x04); /* Opcode 81 /4 */ 8175 // ins_encode( RegImm( dst, src) ); 8176 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8177 ins_pipe( ialu_reg ); 8178 %} 8179 8180 // And Register with Memory 8181 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8182 match(Set dst (AndI dst (LoadI src))); 8183 effect(KILL cr); 8184 8185 ins_cost(150); 8186 format %{ "AND $dst,$src" %} 8187 opcode(0x23); 8188 ins_encode( OpcP, RegMem( dst, src) ); 8189 ins_pipe( ialu_reg_mem ); 8190 %} 8191 8192 // And Memory with Register 8193 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8194 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8195 effect(KILL cr); 8196 8197 ins_cost(150); 8198 format %{ "AND $dst,$src" %} 8199 opcode(0x21); /* Opcode 21 /r */ 8200 ins_encode( OpcP, RegMem( src, dst ) ); 8201 ins_pipe( ialu_mem_reg ); 8202 %} 8203 8204 // And Memory with Immediate 8205 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8206 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8207 effect(KILL cr); 8208 8209 ins_cost(125); 8210 format %{ "AND $dst,$src" %} 8211 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8212 // ins_encode( MemImm( dst, src) ); 8213 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8214 ins_pipe( ialu_mem_imm ); 8215 %} 8216 8217 // BMI1 instructions 8218 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8219 match(Set dst (AndI (XorI src1 minus_1) src2)); 8220 predicate(UseBMI1Instructions); 8221 effect(KILL cr); 8222 8223 format %{ "ANDNL $dst, $src1, $src2" %} 8224 8225 ins_encode %{ 8226 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8227 %} 8228 ins_pipe(ialu_reg); 8229 %} 8230 8231 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8232 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8233 predicate(UseBMI1Instructions); 8234 effect(KILL cr); 8235 8236 ins_cost(125); 8237 format %{ "ANDNL $dst, $src1, $src2" %} 8238 8239 ins_encode %{ 8240 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8241 %} 8242 ins_pipe(ialu_reg_mem); 8243 %} 8244 8245 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8246 match(Set dst (AndI (SubI imm_zero src) src)); 8247 predicate(UseBMI1Instructions); 8248 effect(KILL cr); 8249 8250 format %{ "BLSIL $dst, $src" %} 8251 8252 ins_encode %{ 8253 __ blsil($dst$$Register, $src$$Register); 8254 %} 8255 ins_pipe(ialu_reg); 8256 %} 8257 8258 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8259 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8260 predicate(UseBMI1Instructions); 8261 effect(KILL cr); 8262 8263 ins_cost(125); 8264 format %{ "BLSIL $dst, $src" %} 8265 8266 ins_encode %{ 8267 __ blsil($dst$$Register, $src$$Address); 8268 %} 8269 ins_pipe(ialu_reg_mem); 8270 %} 8271 8272 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8273 %{ 8274 match(Set dst (XorI (AddI src minus_1) src)); 8275 predicate(UseBMI1Instructions); 8276 effect(KILL cr); 8277 8278 format %{ "BLSMSKL $dst, $src" %} 8279 8280 ins_encode %{ 8281 __ blsmskl($dst$$Register, $src$$Register); 8282 %} 8283 8284 ins_pipe(ialu_reg); 8285 %} 8286 8287 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8288 %{ 8289 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8290 predicate(UseBMI1Instructions); 8291 effect(KILL cr); 8292 8293 ins_cost(125); 8294 format %{ "BLSMSKL $dst, $src" %} 8295 8296 ins_encode %{ 8297 __ blsmskl($dst$$Register, $src$$Address); 8298 %} 8299 8300 ins_pipe(ialu_reg_mem); 8301 %} 8302 8303 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8304 %{ 8305 match(Set dst (AndI (AddI src minus_1) src) ); 8306 predicate(UseBMI1Instructions); 8307 effect(KILL cr); 8308 8309 format %{ "BLSRL $dst, $src" %} 8310 8311 ins_encode %{ 8312 __ blsrl($dst$$Register, $src$$Register); 8313 %} 8314 8315 ins_pipe(ialu_reg); 8316 %} 8317 8318 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8319 %{ 8320 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8321 predicate(UseBMI1Instructions); 8322 effect(KILL cr); 8323 8324 ins_cost(125); 8325 format %{ "BLSRL $dst, $src" %} 8326 8327 ins_encode %{ 8328 __ blsrl($dst$$Register, $src$$Address); 8329 %} 8330 8331 ins_pipe(ialu_reg_mem); 8332 %} 8333 8334 // Or Instructions 8335 // Or Register with Register 8336 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8337 match(Set dst (OrI dst src)); 8338 effect(KILL cr); 8339 8340 size(2); 8341 format %{ "OR $dst,$src" %} 8342 opcode(0x0B); 8343 ins_encode( OpcP, RegReg( dst, src) ); 8344 ins_pipe( ialu_reg_reg ); 8345 %} 8346 8347 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8348 match(Set dst (OrI dst (CastP2X src))); 8349 effect(KILL cr); 8350 8351 size(2); 8352 format %{ "OR $dst,$src" %} 8353 opcode(0x0B); 8354 ins_encode( OpcP, RegReg( dst, src) ); 8355 ins_pipe( ialu_reg_reg ); 8356 %} 8357 8358 8359 // Or Register with Immediate 8360 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8361 match(Set dst (OrI dst src)); 8362 effect(KILL cr); 8363 8364 format %{ "OR $dst,$src" %} 8365 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8366 // ins_encode( RegImm( dst, src) ); 8367 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8368 ins_pipe( ialu_reg ); 8369 %} 8370 8371 // Or Register with Memory 8372 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8373 match(Set dst (OrI dst (LoadI src))); 8374 effect(KILL cr); 8375 8376 ins_cost(150); 8377 format %{ "OR $dst,$src" %} 8378 opcode(0x0B); 8379 ins_encode( OpcP, RegMem( dst, src) ); 8380 ins_pipe( ialu_reg_mem ); 8381 %} 8382 8383 // Or Memory with Register 8384 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8385 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8386 effect(KILL cr); 8387 8388 ins_cost(150); 8389 format %{ "OR $dst,$src" %} 8390 opcode(0x09); /* Opcode 09 /r */ 8391 ins_encode( OpcP, RegMem( src, dst ) ); 8392 ins_pipe( ialu_mem_reg ); 8393 %} 8394 8395 // Or Memory with Immediate 8396 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8397 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8398 effect(KILL cr); 8399 8400 ins_cost(125); 8401 format %{ "OR $dst,$src" %} 8402 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8403 // ins_encode( MemImm( dst, src) ); 8404 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8405 ins_pipe( ialu_mem_imm ); 8406 %} 8407 8408 // ROL/ROR 8409 // ROL expand 8410 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8411 effect(USE_DEF dst, USE shift, KILL cr); 8412 8413 format %{ "ROL $dst, $shift" %} 8414 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8415 ins_encode( OpcP, RegOpc( dst )); 8416 ins_pipe( ialu_reg ); 8417 %} 8418 8419 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8420 effect(USE_DEF dst, USE shift, KILL cr); 8421 8422 format %{ "ROL $dst, $shift" %} 8423 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8424 ins_encode( RegOpcImm(dst, shift) ); 8425 ins_pipe(ialu_reg); 8426 %} 8427 8428 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8429 effect(USE_DEF dst, USE shift, KILL cr); 8430 8431 format %{ "ROL $dst, $shift" %} 8432 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8433 ins_encode(OpcP, RegOpc(dst)); 8434 ins_pipe( ialu_reg_reg ); 8435 %} 8436 // end of ROL expand 8437 8438 // ROL 32bit by one once 8439 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8440 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8441 8442 expand %{ 8443 rolI_eReg_imm1(dst, lshift, cr); 8444 %} 8445 %} 8446 8447 // ROL 32bit var by imm8 once 8448 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8449 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8450 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8451 8452 expand %{ 8453 rolI_eReg_imm8(dst, lshift, cr); 8454 %} 8455 %} 8456 8457 // ROL 32bit var by var once 8458 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8459 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8460 8461 expand %{ 8462 rolI_eReg_CL(dst, shift, cr); 8463 %} 8464 %} 8465 8466 // ROL 32bit var by var once 8467 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8468 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8469 8470 expand %{ 8471 rolI_eReg_CL(dst, shift, cr); 8472 %} 8473 %} 8474 8475 // ROR expand 8476 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8477 effect(USE_DEF dst, USE shift, KILL cr); 8478 8479 format %{ "ROR $dst, $shift" %} 8480 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8481 ins_encode( OpcP, RegOpc( dst ) ); 8482 ins_pipe( ialu_reg ); 8483 %} 8484 8485 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8486 effect (USE_DEF dst, USE shift, KILL cr); 8487 8488 format %{ "ROR $dst, $shift" %} 8489 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8490 ins_encode( RegOpcImm(dst, shift) ); 8491 ins_pipe( ialu_reg ); 8492 %} 8493 8494 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8495 effect(USE_DEF dst, USE shift, KILL cr); 8496 8497 format %{ "ROR $dst, $shift" %} 8498 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8499 ins_encode(OpcP, RegOpc(dst)); 8500 ins_pipe( ialu_reg_reg ); 8501 %} 8502 // end of ROR expand 8503 8504 // ROR right once 8505 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8506 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8507 8508 expand %{ 8509 rorI_eReg_imm1(dst, rshift, cr); 8510 %} 8511 %} 8512 8513 // ROR 32bit by immI8 once 8514 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8515 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8516 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8517 8518 expand %{ 8519 rorI_eReg_imm8(dst, rshift, cr); 8520 %} 8521 %} 8522 8523 // ROR 32bit var by var once 8524 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8525 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8526 8527 expand %{ 8528 rorI_eReg_CL(dst, shift, cr); 8529 %} 8530 %} 8531 8532 // ROR 32bit var by var once 8533 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8534 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8535 8536 expand %{ 8537 rorI_eReg_CL(dst, shift, cr); 8538 %} 8539 %} 8540 8541 // Xor Instructions 8542 // Xor Register with Register 8543 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8544 match(Set dst (XorI dst src)); 8545 effect(KILL cr); 8546 8547 size(2); 8548 format %{ "XOR $dst,$src" %} 8549 opcode(0x33); 8550 ins_encode( OpcP, RegReg( dst, src) ); 8551 ins_pipe( ialu_reg_reg ); 8552 %} 8553 8554 // Xor Register with Immediate -1 8555 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8556 match(Set dst (XorI dst imm)); 8557 8558 size(2); 8559 format %{ "NOT $dst" %} 8560 ins_encode %{ 8561 __ notl($dst$$Register); 8562 %} 8563 ins_pipe( ialu_reg ); 8564 %} 8565 8566 // Xor Register with Immediate 8567 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8568 match(Set dst (XorI dst src)); 8569 effect(KILL cr); 8570 8571 format %{ "XOR $dst,$src" %} 8572 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8573 // ins_encode( RegImm( dst, src) ); 8574 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8575 ins_pipe( ialu_reg ); 8576 %} 8577 8578 // Xor Register with Memory 8579 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8580 match(Set dst (XorI dst (LoadI src))); 8581 effect(KILL cr); 8582 8583 ins_cost(150); 8584 format %{ "XOR $dst,$src" %} 8585 opcode(0x33); 8586 ins_encode( OpcP, RegMem(dst, src) ); 8587 ins_pipe( ialu_reg_mem ); 8588 %} 8589 8590 // Xor Memory with Register 8591 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8592 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8593 effect(KILL cr); 8594 8595 ins_cost(150); 8596 format %{ "XOR $dst,$src" %} 8597 opcode(0x31); /* Opcode 31 /r */ 8598 ins_encode( OpcP, RegMem( src, dst ) ); 8599 ins_pipe( ialu_mem_reg ); 8600 %} 8601 8602 // Xor Memory with Immediate 8603 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8604 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8605 effect(KILL cr); 8606 8607 ins_cost(125); 8608 format %{ "XOR $dst,$src" %} 8609 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8610 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8611 ins_pipe( ialu_mem_imm ); 8612 %} 8613 8614 //----------Convert Int to Boolean--------------------------------------------- 8615 8616 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8617 effect( DEF dst, USE src ); 8618 format %{ "MOV $dst,$src" %} 8619 ins_encode( enc_Copy( dst, src) ); 8620 ins_pipe( ialu_reg_reg ); 8621 %} 8622 8623 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8624 effect( USE_DEF dst, USE src, KILL cr ); 8625 8626 size(4); 8627 format %{ "NEG $dst\n\t" 8628 "ADC $dst,$src" %} 8629 ins_encode( neg_reg(dst), 8630 OpcRegReg(0x13,dst,src) ); 8631 ins_pipe( ialu_reg_reg_long ); 8632 %} 8633 8634 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8635 match(Set dst (Conv2B src)); 8636 8637 expand %{ 8638 movI_nocopy(dst,src); 8639 ci2b(dst,src,cr); 8640 %} 8641 %} 8642 8643 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8644 effect( DEF dst, USE src ); 8645 format %{ "MOV $dst,$src" %} 8646 ins_encode( enc_Copy( dst, src) ); 8647 ins_pipe( ialu_reg_reg ); 8648 %} 8649 8650 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8651 effect( USE_DEF dst, USE src, KILL cr ); 8652 format %{ "NEG $dst\n\t" 8653 "ADC $dst,$src" %} 8654 ins_encode( neg_reg(dst), 8655 OpcRegReg(0x13,dst,src) ); 8656 ins_pipe( ialu_reg_reg_long ); 8657 %} 8658 8659 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8660 match(Set dst (Conv2B src)); 8661 8662 expand %{ 8663 movP_nocopy(dst,src); 8664 cp2b(dst,src,cr); 8665 %} 8666 %} 8667 8668 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8669 match(Set dst (CmpLTMask p q)); 8670 effect(KILL cr); 8671 ins_cost(400); 8672 8673 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8674 format %{ "XOR $dst,$dst\n\t" 8675 "CMP $p,$q\n\t" 8676 "SETlt $dst\n\t" 8677 "NEG $dst" %} 8678 ins_encode %{ 8679 Register Rp = $p$$Register; 8680 Register Rq = $q$$Register; 8681 Register Rd = $dst$$Register; 8682 Label done; 8683 __ xorl(Rd, Rd); 8684 __ cmpl(Rp, Rq); 8685 __ setb(Assembler::less, Rd); 8686 __ negl(Rd); 8687 %} 8688 8689 ins_pipe(pipe_slow); 8690 %} 8691 8692 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8693 match(Set dst (CmpLTMask dst zero)); 8694 effect(DEF dst, KILL cr); 8695 ins_cost(100); 8696 8697 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8698 ins_encode %{ 8699 __ sarl($dst$$Register, 31); 8700 %} 8701 ins_pipe(ialu_reg); 8702 %} 8703 8704 /* better to save a register than avoid a branch */ 8705 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8706 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8707 effect(KILL cr); 8708 ins_cost(400); 8709 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8710 "JGE done\n\t" 8711 "ADD $p,$y\n" 8712 "done: " %} 8713 ins_encode %{ 8714 Register Rp = $p$$Register; 8715 Register Rq = $q$$Register; 8716 Register Ry = $y$$Register; 8717 Label done; 8718 __ subl(Rp, Rq); 8719 __ jccb(Assembler::greaterEqual, done); 8720 __ addl(Rp, Ry); 8721 __ bind(done); 8722 %} 8723 8724 ins_pipe(pipe_cmplt); 8725 %} 8726 8727 /* better to save a register than avoid a branch */ 8728 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8729 match(Set y (AndI (CmpLTMask p q) y)); 8730 effect(KILL cr); 8731 8732 ins_cost(300); 8733 8734 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8735 "JLT done\n\t" 8736 "XORL $y, $y\n" 8737 "done: " %} 8738 ins_encode %{ 8739 Register Rp = $p$$Register; 8740 Register Rq = $q$$Register; 8741 Register Ry = $y$$Register; 8742 Label done; 8743 __ cmpl(Rp, Rq); 8744 __ jccb(Assembler::less, done); 8745 __ xorl(Ry, Ry); 8746 __ bind(done); 8747 %} 8748 8749 ins_pipe(pipe_cmplt); 8750 %} 8751 8752 /* If I enable this, I encourage spilling in the inner loop of compress. 8753 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8754 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8755 */ 8756 //----------Overflow Math Instructions----------------------------------------- 8757 8758 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8759 %{ 8760 match(Set cr (OverflowAddI op1 op2)); 8761 effect(DEF cr, USE_KILL op1, USE op2); 8762 8763 format %{ "ADD $op1, $op2\t# overflow check int" %} 8764 8765 ins_encode %{ 8766 __ addl($op1$$Register, $op2$$Register); 8767 %} 8768 ins_pipe(ialu_reg_reg); 8769 %} 8770 8771 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8772 %{ 8773 match(Set cr (OverflowAddI op1 op2)); 8774 effect(DEF cr, USE_KILL op1, USE op2); 8775 8776 format %{ "ADD $op1, $op2\t# overflow check int" %} 8777 8778 ins_encode %{ 8779 __ addl($op1$$Register, $op2$$constant); 8780 %} 8781 ins_pipe(ialu_reg_reg); 8782 %} 8783 8784 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8785 %{ 8786 match(Set cr (OverflowSubI op1 op2)); 8787 8788 format %{ "CMP $op1, $op2\t# overflow check int" %} 8789 ins_encode %{ 8790 __ cmpl($op1$$Register, $op2$$Register); 8791 %} 8792 ins_pipe(ialu_reg_reg); 8793 %} 8794 8795 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8796 %{ 8797 match(Set cr (OverflowSubI op1 op2)); 8798 8799 format %{ "CMP $op1, $op2\t# overflow check int" %} 8800 ins_encode %{ 8801 __ cmpl($op1$$Register, $op2$$constant); 8802 %} 8803 ins_pipe(ialu_reg_reg); 8804 %} 8805 8806 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8807 %{ 8808 match(Set cr (OverflowSubI zero op2)); 8809 effect(DEF cr, USE_KILL op2); 8810 8811 format %{ "NEG $op2\t# overflow check int" %} 8812 ins_encode %{ 8813 __ negl($op2$$Register); 8814 %} 8815 ins_pipe(ialu_reg_reg); 8816 %} 8817 8818 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8819 %{ 8820 match(Set cr (OverflowMulI op1 op2)); 8821 effect(DEF cr, USE_KILL op1, USE op2); 8822 8823 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8824 ins_encode %{ 8825 __ imull($op1$$Register, $op2$$Register); 8826 %} 8827 ins_pipe(ialu_reg_reg_alu0); 8828 %} 8829 8830 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8831 %{ 8832 match(Set cr (OverflowMulI op1 op2)); 8833 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8834 8835 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8836 ins_encode %{ 8837 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8838 %} 8839 ins_pipe(ialu_reg_reg_alu0); 8840 %} 8841 8842 // Integer Absolute Instructions 8843 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8844 %{ 8845 match(Set dst (AbsI src)); 8846 effect(TEMP dst, TEMP tmp, KILL cr); 8847 format %{ "movl $tmp, $src\n\t" 8848 "sarl $tmp, 31\n\t" 8849 "movl $dst, $src\n\t" 8850 "xorl $dst, $tmp\n\t" 8851 "subl $dst, $tmp\n" 8852 %} 8853 ins_encode %{ 8854 __ movl($tmp$$Register, $src$$Register); 8855 __ sarl($tmp$$Register, 31); 8856 __ movl($dst$$Register, $src$$Register); 8857 __ xorl($dst$$Register, $tmp$$Register); 8858 __ subl($dst$$Register, $tmp$$Register); 8859 %} 8860 8861 ins_pipe(ialu_reg_reg); 8862 %} 8863 8864 //----------Long Instructions------------------------------------------------ 8865 // Add Long Register with Register 8866 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8867 match(Set dst (AddL dst src)); 8868 effect(KILL cr); 8869 ins_cost(200); 8870 format %{ "ADD $dst.lo,$src.lo\n\t" 8871 "ADC $dst.hi,$src.hi" %} 8872 opcode(0x03, 0x13); 8873 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8874 ins_pipe( ialu_reg_reg_long ); 8875 %} 8876 8877 // Add Long Register with Immediate 8878 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8879 match(Set dst (AddL dst src)); 8880 effect(KILL cr); 8881 format %{ "ADD $dst.lo,$src.lo\n\t" 8882 "ADC $dst.hi,$src.hi" %} 8883 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8884 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8885 ins_pipe( ialu_reg_long ); 8886 %} 8887 8888 // Add Long Register with Memory 8889 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8890 match(Set dst (AddL dst (LoadL mem))); 8891 effect(KILL cr); 8892 ins_cost(125); 8893 format %{ "ADD $dst.lo,$mem\n\t" 8894 "ADC $dst.hi,$mem+4" %} 8895 opcode(0x03, 0x13); 8896 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8897 ins_pipe( ialu_reg_long_mem ); 8898 %} 8899 8900 // Subtract Long Register with Register. 8901 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8902 match(Set dst (SubL dst src)); 8903 effect(KILL cr); 8904 ins_cost(200); 8905 format %{ "SUB $dst.lo,$src.lo\n\t" 8906 "SBB $dst.hi,$src.hi" %} 8907 opcode(0x2B, 0x1B); 8908 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8909 ins_pipe( ialu_reg_reg_long ); 8910 %} 8911 8912 // Subtract Long Register with Immediate 8913 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8914 match(Set dst (SubL dst src)); 8915 effect(KILL cr); 8916 format %{ "SUB $dst.lo,$src.lo\n\t" 8917 "SBB $dst.hi,$src.hi" %} 8918 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8919 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8920 ins_pipe( ialu_reg_long ); 8921 %} 8922 8923 // Subtract Long Register with Memory 8924 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8925 match(Set dst (SubL dst (LoadL mem))); 8926 effect(KILL cr); 8927 ins_cost(125); 8928 format %{ "SUB $dst.lo,$mem\n\t" 8929 "SBB $dst.hi,$mem+4" %} 8930 opcode(0x2B, 0x1B); 8931 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8932 ins_pipe( ialu_reg_long_mem ); 8933 %} 8934 8935 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8936 match(Set dst (SubL zero dst)); 8937 effect(KILL cr); 8938 ins_cost(300); 8939 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8940 ins_encode( neg_long(dst) ); 8941 ins_pipe( ialu_reg_reg_long ); 8942 %} 8943 8944 // And Long Register with Register 8945 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8946 match(Set dst (AndL dst src)); 8947 effect(KILL cr); 8948 format %{ "AND $dst.lo,$src.lo\n\t" 8949 "AND $dst.hi,$src.hi" %} 8950 opcode(0x23,0x23); 8951 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8952 ins_pipe( ialu_reg_reg_long ); 8953 %} 8954 8955 // And Long Register with Immediate 8956 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8957 match(Set dst (AndL dst src)); 8958 effect(KILL cr); 8959 format %{ "AND $dst.lo,$src.lo\n\t" 8960 "AND $dst.hi,$src.hi" %} 8961 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8962 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8963 ins_pipe( ialu_reg_long ); 8964 %} 8965 8966 // And Long Register with Memory 8967 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8968 match(Set dst (AndL dst (LoadL mem))); 8969 effect(KILL cr); 8970 ins_cost(125); 8971 format %{ "AND $dst.lo,$mem\n\t" 8972 "AND $dst.hi,$mem+4" %} 8973 opcode(0x23, 0x23); 8974 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8975 ins_pipe( ialu_reg_long_mem ); 8976 %} 8977 8978 // BMI1 instructions 8979 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8980 match(Set dst (AndL (XorL src1 minus_1) src2)); 8981 predicate(UseBMI1Instructions); 8982 effect(KILL cr, TEMP dst); 8983 8984 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8985 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8986 %} 8987 8988 ins_encode %{ 8989 Register Rdst = $dst$$Register; 8990 Register Rsrc1 = $src1$$Register; 8991 Register Rsrc2 = $src2$$Register; 8992 __ andnl(Rdst, Rsrc1, Rsrc2); 8993 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8994 %} 8995 ins_pipe(ialu_reg_reg_long); 8996 %} 8997 8998 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8999 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9000 predicate(UseBMI1Instructions); 9001 effect(KILL cr, TEMP dst); 9002 9003 ins_cost(125); 9004 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9005 "ANDNL $dst.hi, $src1.hi, $src2+4" 9006 %} 9007 9008 ins_encode %{ 9009 Register Rdst = $dst$$Register; 9010 Register Rsrc1 = $src1$$Register; 9011 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9012 9013 __ andnl(Rdst, Rsrc1, $src2$$Address); 9014 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9015 %} 9016 ins_pipe(ialu_reg_mem); 9017 %} 9018 9019 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9020 match(Set dst (AndL (SubL imm_zero src) src)); 9021 predicate(UseBMI1Instructions); 9022 effect(KILL cr, TEMP dst); 9023 9024 format %{ "MOVL $dst.hi, 0\n\t" 9025 "BLSIL $dst.lo, $src.lo\n\t" 9026 "JNZ done\n\t" 9027 "BLSIL $dst.hi, $src.hi\n" 9028 "done:" 9029 %} 9030 9031 ins_encode %{ 9032 Label done; 9033 Register Rdst = $dst$$Register; 9034 Register Rsrc = $src$$Register; 9035 __ movl(HIGH_FROM_LOW(Rdst), 0); 9036 __ blsil(Rdst, Rsrc); 9037 __ jccb(Assembler::notZero, done); 9038 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9039 __ bind(done); 9040 %} 9041 ins_pipe(ialu_reg); 9042 %} 9043 9044 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9045 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9046 predicate(UseBMI1Instructions); 9047 effect(KILL cr, TEMP dst); 9048 9049 ins_cost(125); 9050 format %{ "MOVL $dst.hi, 0\n\t" 9051 "BLSIL $dst.lo, $src\n\t" 9052 "JNZ done\n\t" 9053 "BLSIL $dst.hi, $src+4\n" 9054 "done:" 9055 %} 9056 9057 ins_encode %{ 9058 Label done; 9059 Register Rdst = $dst$$Register; 9060 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9061 9062 __ movl(HIGH_FROM_LOW(Rdst), 0); 9063 __ blsil(Rdst, $src$$Address); 9064 __ jccb(Assembler::notZero, done); 9065 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9066 __ bind(done); 9067 %} 9068 ins_pipe(ialu_reg_mem); 9069 %} 9070 9071 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9072 %{ 9073 match(Set dst (XorL (AddL src minus_1) src)); 9074 predicate(UseBMI1Instructions); 9075 effect(KILL cr, TEMP dst); 9076 9077 format %{ "MOVL $dst.hi, 0\n\t" 9078 "BLSMSKL $dst.lo, $src.lo\n\t" 9079 "JNC done\n\t" 9080 "BLSMSKL $dst.hi, $src.hi\n" 9081 "done:" 9082 %} 9083 9084 ins_encode %{ 9085 Label done; 9086 Register Rdst = $dst$$Register; 9087 Register Rsrc = $src$$Register; 9088 __ movl(HIGH_FROM_LOW(Rdst), 0); 9089 __ blsmskl(Rdst, Rsrc); 9090 __ jccb(Assembler::carryClear, done); 9091 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9092 __ bind(done); 9093 %} 9094 9095 ins_pipe(ialu_reg); 9096 %} 9097 9098 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9099 %{ 9100 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9101 predicate(UseBMI1Instructions); 9102 effect(KILL cr, TEMP dst); 9103 9104 ins_cost(125); 9105 format %{ "MOVL $dst.hi, 0\n\t" 9106 "BLSMSKL $dst.lo, $src\n\t" 9107 "JNC done\n\t" 9108 "BLSMSKL $dst.hi, $src+4\n" 9109 "done:" 9110 %} 9111 9112 ins_encode %{ 9113 Label done; 9114 Register Rdst = $dst$$Register; 9115 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9116 9117 __ movl(HIGH_FROM_LOW(Rdst), 0); 9118 __ blsmskl(Rdst, $src$$Address); 9119 __ jccb(Assembler::carryClear, done); 9120 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9121 __ bind(done); 9122 %} 9123 9124 ins_pipe(ialu_reg_mem); 9125 %} 9126 9127 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9128 %{ 9129 match(Set dst (AndL (AddL src minus_1) src) ); 9130 predicate(UseBMI1Instructions); 9131 effect(KILL cr, TEMP dst); 9132 9133 format %{ "MOVL $dst.hi, $src.hi\n\t" 9134 "BLSRL $dst.lo, $src.lo\n\t" 9135 "JNC done\n\t" 9136 "BLSRL $dst.hi, $src.hi\n" 9137 "done:" 9138 %} 9139 9140 ins_encode %{ 9141 Label done; 9142 Register Rdst = $dst$$Register; 9143 Register Rsrc = $src$$Register; 9144 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9145 __ blsrl(Rdst, Rsrc); 9146 __ jccb(Assembler::carryClear, done); 9147 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9148 __ bind(done); 9149 %} 9150 9151 ins_pipe(ialu_reg); 9152 %} 9153 9154 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9155 %{ 9156 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9157 predicate(UseBMI1Instructions); 9158 effect(KILL cr, TEMP dst); 9159 9160 ins_cost(125); 9161 format %{ "MOVL $dst.hi, $src+4\n\t" 9162 "BLSRL $dst.lo, $src\n\t" 9163 "JNC done\n\t" 9164 "BLSRL $dst.hi, $src+4\n" 9165 "done:" 9166 %} 9167 9168 ins_encode %{ 9169 Label done; 9170 Register Rdst = $dst$$Register; 9171 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9172 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9173 __ blsrl(Rdst, $src$$Address); 9174 __ jccb(Assembler::carryClear, done); 9175 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9176 __ bind(done); 9177 %} 9178 9179 ins_pipe(ialu_reg_mem); 9180 %} 9181 9182 // Or Long Register with Register 9183 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9184 match(Set dst (OrL dst src)); 9185 effect(KILL cr); 9186 format %{ "OR $dst.lo,$src.lo\n\t" 9187 "OR $dst.hi,$src.hi" %} 9188 opcode(0x0B,0x0B); 9189 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9190 ins_pipe( ialu_reg_reg_long ); 9191 %} 9192 9193 // Or Long Register with Immediate 9194 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9195 match(Set dst (OrL dst src)); 9196 effect(KILL cr); 9197 format %{ "OR $dst.lo,$src.lo\n\t" 9198 "OR $dst.hi,$src.hi" %} 9199 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9200 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9201 ins_pipe( ialu_reg_long ); 9202 %} 9203 9204 // Or Long Register with Memory 9205 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9206 match(Set dst (OrL dst (LoadL mem))); 9207 effect(KILL cr); 9208 ins_cost(125); 9209 format %{ "OR $dst.lo,$mem\n\t" 9210 "OR $dst.hi,$mem+4" %} 9211 opcode(0x0B,0x0B); 9212 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9213 ins_pipe( ialu_reg_long_mem ); 9214 %} 9215 9216 // Xor Long Register with Register 9217 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9218 match(Set dst (XorL dst src)); 9219 effect(KILL cr); 9220 format %{ "XOR $dst.lo,$src.lo\n\t" 9221 "XOR $dst.hi,$src.hi" %} 9222 opcode(0x33,0x33); 9223 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9224 ins_pipe( ialu_reg_reg_long ); 9225 %} 9226 9227 // Xor Long Register with Immediate -1 9228 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9229 match(Set dst (XorL dst imm)); 9230 format %{ "NOT $dst.lo\n\t" 9231 "NOT $dst.hi" %} 9232 ins_encode %{ 9233 __ notl($dst$$Register); 9234 __ notl(HIGH_FROM_LOW($dst$$Register)); 9235 %} 9236 ins_pipe( ialu_reg_long ); 9237 %} 9238 9239 // Xor Long Register with Immediate 9240 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9241 match(Set dst (XorL dst src)); 9242 effect(KILL cr); 9243 format %{ "XOR $dst.lo,$src.lo\n\t" 9244 "XOR $dst.hi,$src.hi" %} 9245 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9246 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9247 ins_pipe( ialu_reg_long ); 9248 %} 9249 9250 // Xor Long Register with Memory 9251 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9252 match(Set dst (XorL dst (LoadL mem))); 9253 effect(KILL cr); 9254 ins_cost(125); 9255 format %{ "XOR $dst.lo,$mem\n\t" 9256 "XOR $dst.hi,$mem+4" %} 9257 opcode(0x33,0x33); 9258 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9259 ins_pipe( ialu_reg_long_mem ); 9260 %} 9261 9262 // Shift Left Long by 1 9263 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9264 predicate(UseNewLongLShift); 9265 match(Set dst (LShiftL dst cnt)); 9266 effect(KILL cr); 9267 ins_cost(100); 9268 format %{ "ADD $dst.lo,$dst.lo\n\t" 9269 "ADC $dst.hi,$dst.hi" %} 9270 ins_encode %{ 9271 __ addl($dst$$Register,$dst$$Register); 9272 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9273 %} 9274 ins_pipe( ialu_reg_long ); 9275 %} 9276 9277 // Shift Left Long by 2 9278 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9279 predicate(UseNewLongLShift); 9280 match(Set dst (LShiftL dst cnt)); 9281 effect(KILL cr); 9282 ins_cost(100); 9283 format %{ "ADD $dst.lo,$dst.lo\n\t" 9284 "ADC $dst.hi,$dst.hi\n\t" 9285 "ADD $dst.lo,$dst.lo\n\t" 9286 "ADC $dst.hi,$dst.hi" %} 9287 ins_encode %{ 9288 __ addl($dst$$Register,$dst$$Register); 9289 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9290 __ addl($dst$$Register,$dst$$Register); 9291 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9292 %} 9293 ins_pipe( ialu_reg_long ); 9294 %} 9295 9296 // Shift Left Long by 3 9297 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9298 predicate(UseNewLongLShift); 9299 match(Set dst (LShiftL dst cnt)); 9300 effect(KILL cr); 9301 ins_cost(100); 9302 format %{ "ADD $dst.lo,$dst.lo\n\t" 9303 "ADC $dst.hi,$dst.hi\n\t" 9304 "ADD $dst.lo,$dst.lo\n\t" 9305 "ADC $dst.hi,$dst.hi\n\t" 9306 "ADD $dst.lo,$dst.lo\n\t" 9307 "ADC $dst.hi,$dst.hi" %} 9308 ins_encode %{ 9309 __ addl($dst$$Register,$dst$$Register); 9310 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9311 __ addl($dst$$Register,$dst$$Register); 9312 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9313 __ addl($dst$$Register,$dst$$Register); 9314 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9315 %} 9316 ins_pipe( ialu_reg_long ); 9317 %} 9318 9319 // Shift Left Long by 1-31 9320 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9321 match(Set dst (LShiftL dst cnt)); 9322 effect(KILL cr); 9323 ins_cost(200); 9324 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9325 "SHL $dst.lo,$cnt" %} 9326 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9327 ins_encode( move_long_small_shift(dst,cnt) ); 9328 ins_pipe( ialu_reg_long ); 9329 %} 9330 9331 // Shift Left Long by 32-63 9332 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9333 match(Set dst (LShiftL dst cnt)); 9334 effect(KILL cr); 9335 ins_cost(300); 9336 format %{ "MOV $dst.hi,$dst.lo\n" 9337 "\tSHL $dst.hi,$cnt-32\n" 9338 "\tXOR $dst.lo,$dst.lo" %} 9339 opcode(0xC1, 0x4); /* C1 /4 ib */ 9340 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9341 ins_pipe( ialu_reg_long ); 9342 %} 9343 9344 // Shift Left Long by variable 9345 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9346 match(Set dst (LShiftL dst shift)); 9347 effect(KILL cr); 9348 ins_cost(500+200); 9349 size(17); 9350 format %{ "TEST $shift,32\n\t" 9351 "JEQ,s small\n\t" 9352 "MOV $dst.hi,$dst.lo\n\t" 9353 "XOR $dst.lo,$dst.lo\n" 9354 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9355 "SHL $dst.lo,$shift" %} 9356 ins_encode( shift_left_long( dst, shift ) ); 9357 ins_pipe( pipe_slow ); 9358 %} 9359 9360 // Shift Right Long by 1-31 9361 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9362 match(Set dst (URShiftL dst cnt)); 9363 effect(KILL cr); 9364 ins_cost(200); 9365 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9366 "SHR $dst.hi,$cnt" %} 9367 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9368 ins_encode( move_long_small_shift(dst,cnt) ); 9369 ins_pipe( ialu_reg_long ); 9370 %} 9371 9372 // Shift Right Long by 32-63 9373 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9374 match(Set dst (URShiftL dst cnt)); 9375 effect(KILL cr); 9376 ins_cost(300); 9377 format %{ "MOV $dst.lo,$dst.hi\n" 9378 "\tSHR $dst.lo,$cnt-32\n" 9379 "\tXOR $dst.hi,$dst.hi" %} 9380 opcode(0xC1, 0x5); /* C1 /5 ib */ 9381 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9382 ins_pipe( ialu_reg_long ); 9383 %} 9384 9385 // Shift Right Long by variable 9386 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9387 match(Set dst (URShiftL dst shift)); 9388 effect(KILL cr); 9389 ins_cost(600); 9390 size(17); 9391 format %{ "TEST $shift,32\n\t" 9392 "JEQ,s small\n\t" 9393 "MOV $dst.lo,$dst.hi\n\t" 9394 "XOR $dst.hi,$dst.hi\n" 9395 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9396 "SHR $dst.hi,$shift" %} 9397 ins_encode( shift_right_long( dst, shift ) ); 9398 ins_pipe( pipe_slow ); 9399 %} 9400 9401 // Shift Right Long by 1-31 9402 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9403 match(Set dst (RShiftL dst cnt)); 9404 effect(KILL cr); 9405 ins_cost(200); 9406 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9407 "SAR $dst.hi,$cnt" %} 9408 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9409 ins_encode( move_long_small_shift(dst,cnt) ); 9410 ins_pipe( ialu_reg_long ); 9411 %} 9412 9413 // Shift Right Long by 32-63 9414 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9415 match(Set dst (RShiftL dst cnt)); 9416 effect(KILL cr); 9417 ins_cost(300); 9418 format %{ "MOV $dst.lo,$dst.hi\n" 9419 "\tSAR $dst.lo,$cnt-32\n" 9420 "\tSAR $dst.hi,31" %} 9421 opcode(0xC1, 0x7); /* C1 /7 ib */ 9422 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9423 ins_pipe( ialu_reg_long ); 9424 %} 9425 9426 // Shift Right arithmetic Long by variable 9427 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9428 match(Set dst (RShiftL dst shift)); 9429 effect(KILL cr); 9430 ins_cost(600); 9431 size(18); 9432 format %{ "TEST $shift,32\n\t" 9433 "JEQ,s small\n\t" 9434 "MOV $dst.lo,$dst.hi\n\t" 9435 "SAR $dst.hi,31\n" 9436 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9437 "SAR $dst.hi,$shift" %} 9438 ins_encode( shift_right_arith_long( dst, shift ) ); 9439 ins_pipe( pipe_slow ); 9440 %} 9441 9442 9443 //----------Double Instructions------------------------------------------------ 9444 // Double Math 9445 9446 // Compare & branch 9447 9448 // P6 version of float compare, sets condition codes in EFLAGS 9449 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9450 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9451 match(Set cr (CmpD src1 src2)); 9452 effect(KILL rax); 9453 ins_cost(150); 9454 format %{ "FLD $src1\n\t" 9455 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9456 "JNP exit\n\t" 9457 "MOV ah,1 // saw a NaN, set CF\n\t" 9458 "SAHF\n" 9459 "exit:\tNOP // avoid branch to branch" %} 9460 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9461 ins_encode( Push_Reg_DPR(src1), 9462 OpcP, RegOpc(src2), 9463 cmpF_P6_fixup ); 9464 ins_pipe( pipe_slow ); 9465 %} 9466 9467 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9468 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9469 match(Set cr (CmpD src1 src2)); 9470 ins_cost(150); 9471 format %{ "FLD $src1\n\t" 9472 "FUCOMIP ST,$src2 // P6 instruction" %} 9473 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9474 ins_encode( Push_Reg_DPR(src1), 9475 OpcP, RegOpc(src2)); 9476 ins_pipe( pipe_slow ); 9477 %} 9478 9479 // Compare & branch 9480 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9481 predicate(UseSSE<=1); 9482 match(Set cr (CmpD src1 src2)); 9483 effect(KILL rax); 9484 ins_cost(200); 9485 format %{ "FLD $src1\n\t" 9486 "FCOMp $src2\n\t" 9487 "FNSTSW AX\n\t" 9488 "TEST AX,0x400\n\t" 9489 "JZ,s flags\n\t" 9490 "MOV AH,1\t# unordered treat as LT\n" 9491 "flags:\tSAHF" %} 9492 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9493 ins_encode( Push_Reg_DPR(src1), 9494 OpcP, RegOpc(src2), 9495 fpu_flags); 9496 ins_pipe( pipe_slow ); 9497 %} 9498 9499 // Compare vs zero into -1,0,1 9500 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9501 predicate(UseSSE<=1); 9502 match(Set dst (CmpD3 src1 zero)); 9503 effect(KILL cr, KILL rax); 9504 ins_cost(280); 9505 format %{ "FTSTD $dst,$src1" %} 9506 opcode(0xE4, 0xD9); 9507 ins_encode( Push_Reg_DPR(src1), 9508 OpcS, OpcP, PopFPU, 9509 CmpF_Result(dst)); 9510 ins_pipe( pipe_slow ); 9511 %} 9512 9513 // Compare into -1,0,1 9514 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9515 predicate(UseSSE<=1); 9516 match(Set dst (CmpD3 src1 src2)); 9517 effect(KILL cr, KILL rax); 9518 ins_cost(300); 9519 format %{ "FCMPD $dst,$src1,$src2" %} 9520 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9521 ins_encode( Push_Reg_DPR(src1), 9522 OpcP, RegOpc(src2), 9523 CmpF_Result(dst)); 9524 ins_pipe( pipe_slow ); 9525 %} 9526 9527 // float compare and set condition codes in EFLAGS by XMM regs 9528 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9529 predicate(UseSSE>=2); 9530 match(Set cr (CmpD src1 src2)); 9531 ins_cost(145); 9532 format %{ "UCOMISD $src1,$src2\n\t" 9533 "JNP,s exit\n\t" 9534 "PUSHF\t# saw NaN, set CF\n\t" 9535 "AND [rsp], #0xffffff2b\n\t" 9536 "POPF\n" 9537 "exit:" %} 9538 ins_encode %{ 9539 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9540 emit_cmpfp_fixup(_masm); 9541 %} 9542 ins_pipe( pipe_slow ); 9543 %} 9544 9545 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9546 predicate(UseSSE>=2); 9547 match(Set cr (CmpD src1 src2)); 9548 ins_cost(100); 9549 format %{ "UCOMISD $src1,$src2" %} 9550 ins_encode %{ 9551 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9552 %} 9553 ins_pipe( pipe_slow ); 9554 %} 9555 9556 // float compare and set condition codes in EFLAGS by XMM regs 9557 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9558 predicate(UseSSE>=2); 9559 match(Set cr (CmpD src1 (LoadD src2))); 9560 ins_cost(145); 9561 format %{ "UCOMISD $src1,$src2\n\t" 9562 "JNP,s exit\n\t" 9563 "PUSHF\t# saw NaN, set CF\n\t" 9564 "AND [rsp], #0xffffff2b\n\t" 9565 "POPF\n" 9566 "exit:" %} 9567 ins_encode %{ 9568 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9569 emit_cmpfp_fixup(_masm); 9570 %} 9571 ins_pipe( pipe_slow ); 9572 %} 9573 9574 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9575 predicate(UseSSE>=2); 9576 match(Set cr (CmpD src1 (LoadD src2))); 9577 ins_cost(100); 9578 format %{ "UCOMISD $src1,$src2" %} 9579 ins_encode %{ 9580 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9581 %} 9582 ins_pipe( pipe_slow ); 9583 %} 9584 9585 // Compare into -1,0,1 in XMM 9586 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9587 predicate(UseSSE>=2); 9588 match(Set dst (CmpD3 src1 src2)); 9589 effect(KILL cr); 9590 ins_cost(255); 9591 format %{ "UCOMISD $src1, $src2\n\t" 9592 "MOV $dst, #-1\n\t" 9593 "JP,s done\n\t" 9594 "JB,s done\n\t" 9595 "SETNE $dst\n\t" 9596 "MOVZB $dst, $dst\n" 9597 "done:" %} 9598 ins_encode %{ 9599 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9600 emit_cmpfp3(_masm, $dst$$Register); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 // Compare into -1,0,1 in XMM and memory 9606 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9607 predicate(UseSSE>=2); 9608 match(Set dst (CmpD3 src1 (LoadD src2))); 9609 effect(KILL cr); 9610 ins_cost(275); 9611 format %{ "UCOMISD $src1, $src2\n\t" 9612 "MOV $dst, #-1\n\t" 9613 "JP,s done\n\t" 9614 "JB,s done\n\t" 9615 "SETNE $dst\n\t" 9616 "MOVZB $dst, $dst\n" 9617 "done:" %} 9618 ins_encode %{ 9619 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9620 emit_cmpfp3(_masm, $dst$$Register); 9621 %} 9622 ins_pipe( pipe_slow ); 9623 %} 9624 9625 9626 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9627 predicate (UseSSE <=1); 9628 match(Set dst (SubD dst src)); 9629 9630 format %{ "FLD $src\n\t" 9631 "DSUBp $dst,ST" %} 9632 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9633 ins_cost(150); 9634 ins_encode( Push_Reg_DPR(src), 9635 OpcP, RegOpc(dst) ); 9636 ins_pipe( fpu_reg_reg ); 9637 %} 9638 9639 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9640 predicate (UseSSE <=1); 9641 match(Set dst (RoundDouble (SubD src1 src2))); 9642 ins_cost(250); 9643 9644 format %{ "FLD $src2\n\t" 9645 "DSUB ST,$src1\n\t" 9646 "FSTP_D $dst\t# D-round" %} 9647 opcode(0xD8, 0x5); 9648 ins_encode( Push_Reg_DPR(src2), 9649 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9650 ins_pipe( fpu_mem_reg_reg ); 9651 %} 9652 9653 9654 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9655 predicate (UseSSE <=1); 9656 match(Set dst (SubD dst (LoadD src))); 9657 ins_cost(150); 9658 9659 format %{ "FLD $src\n\t" 9660 "DSUBp $dst,ST" %} 9661 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9662 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9663 OpcP, RegOpc(dst) ); 9664 ins_pipe( fpu_reg_mem ); 9665 %} 9666 9667 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9668 predicate (UseSSE<=1); 9669 match(Set dst (AbsD src)); 9670 ins_cost(100); 9671 format %{ "FABS" %} 9672 opcode(0xE1, 0xD9); 9673 ins_encode( OpcS, OpcP ); 9674 ins_pipe( fpu_reg_reg ); 9675 %} 9676 9677 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9678 predicate(UseSSE<=1); 9679 match(Set dst (NegD src)); 9680 ins_cost(100); 9681 format %{ "FCHS" %} 9682 opcode(0xE0, 0xD9); 9683 ins_encode( OpcS, OpcP ); 9684 ins_pipe( fpu_reg_reg ); 9685 %} 9686 9687 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9688 predicate(UseSSE<=1); 9689 match(Set dst (AddD dst src)); 9690 format %{ "FLD $src\n\t" 9691 "DADD $dst,ST" %} 9692 size(4); 9693 ins_cost(150); 9694 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9695 ins_encode( Push_Reg_DPR(src), 9696 OpcP, RegOpc(dst) ); 9697 ins_pipe( fpu_reg_reg ); 9698 %} 9699 9700 9701 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9702 predicate(UseSSE<=1); 9703 match(Set dst (RoundDouble (AddD src1 src2))); 9704 ins_cost(250); 9705 9706 format %{ "FLD $src2\n\t" 9707 "DADD ST,$src1\n\t" 9708 "FSTP_D $dst\t# D-round" %} 9709 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9710 ins_encode( Push_Reg_DPR(src2), 9711 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9712 ins_pipe( fpu_mem_reg_reg ); 9713 %} 9714 9715 9716 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9717 predicate(UseSSE<=1); 9718 match(Set dst (AddD dst (LoadD src))); 9719 ins_cost(150); 9720 9721 format %{ "FLD $src\n\t" 9722 "DADDp $dst,ST" %} 9723 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9724 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9725 OpcP, RegOpc(dst) ); 9726 ins_pipe( fpu_reg_mem ); 9727 %} 9728 9729 // add-to-memory 9730 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9731 predicate(UseSSE<=1); 9732 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9733 ins_cost(150); 9734 9735 format %{ "FLD_D $dst\n\t" 9736 "DADD ST,$src\n\t" 9737 "FST_D $dst" %} 9738 opcode(0xDD, 0x0); 9739 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9740 Opcode(0xD8), RegOpc(src), 9741 set_instruction_start, 9742 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9743 ins_pipe( fpu_reg_mem ); 9744 %} 9745 9746 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9747 predicate(UseSSE<=1); 9748 match(Set dst (AddD dst con)); 9749 ins_cost(125); 9750 format %{ "FLD1\n\t" 9751 "DADDp $dst,ST" %} 9752 ins_encode %{ 9753 __ fld1(); 9754 __ faddp($dst$$reg); 9755 %} 9756 ins_pipe(fpu_reg); 9757 %} 9758 9759 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9760 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9761 match(Set dst (AddD dst con)); 9762 ins_cost(200); 9763 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9764 "DADDp $dst,ST" %} 9765 ins_encode %{ 9766 __ fld_d($constantaddress($con)); 9767 __ faddp($dst$$reg); 9768 %} 9769 ins_pipe(fpu_reg_mem); 9770 %} 9771 9772 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9773 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9774 match(Set dst (RoundDouble (AddD src con))); 9775 ins_cost(200); 9776 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9777 "DADD ST,$src\n\t" 9778 "FSTP_D $dst\t# D-round" %} 9779 ins_encode %{ 9780 __ fld_d($constantaddress($con)); 9781 __ fadd($src$$reg); 9782 __ fstp_d(Address(rsp, $dst$$disp)); 9783 %} 9784 ins_pipe(fpu_mem_reg_con); 9785 %} 9786 9787 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9788 predicate(UseSSE<=1); 9789 match(Set dst (MulD dst src)); 9790 format %{ "FLD $src\n\t" 9791 "DMULp $dst,ST" %} 9792 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9793 ins_cost(150); 9794 ins_encode( Push_Reg_DPR(src), 9795 OpcP, RegOpc(dst) ); 9796 ins_pipe( fpu_reg_reg ); 9797 %} 9798 9799 // Strict FP instruction biases argument before multiply then 9800 // biases result to avoid double rounding of subnormals. 9801 // 9802 // scale arg1 by multiplying arg1 by 2^(-15360) 9803 // load arg2 9804 // multiply scaled arg1 by arg2 9805 // rescale product by 2^(15360) 9806 // 9807 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9808 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9809 match(Set dst (MulD dst src)); 9810 ins_cost(1); // Select this instruction for all FP double multiplies 9811 9812 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9813 "DMULp $dst,ST\n\t" 9814 "FLD $src\n\t" 9815 "DMULp $dst,ST\n\t" 9816 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9817 "DMULp $dst,ST\n\t" %} 9818 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9819 ins_encode( strictfp_bias1(dst), 9820 Push_Reg_DPR(src), 9821 OpcP, RegOpc(dst), 9822 strictfp_bias2(dst) ); 9823 ins_pipe( fpu_reg_reg ); 9824 %} 9825 9826 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9827 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9828 match(Set dst (MulD dst con)); 9829 ins_cost(200); 9830 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9831 "DMULp $dst,ST" %} 9832 ins_encode %{ 9833 __ fld_d($constantaddress($con)); 9834 __ fmulp($dst$$reg); 9835 %} 9836 ins_pipe(fpu_reg_mem); 9837 %} 9838 9839 9840 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9841 predicate( UseSSE<=1 ); 9842 match(Set dst (MulD dst (LoadD src))); 9843 ins_cost(200); 9844 format %{ "FLD_D $src\n\t" 9845 "DMULp $dst,ST" %} 9846 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9847 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9848 OpcP, RegOpc(dst) ); 9849 ins_pipe( fpu_reg_mem ); 9850 %} 9851 9852 // 9853 // Cisc-alternate to reg-reg multiply 9854 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9855 predicate( UseSSE<=1 ); 9856 match(Set dst (MulD src (LoadD mem))); 9857 ins_cost(250); 9858 format %{ "FLD_D $mem\n\t" 9859 "DMUL ST,$src\n\t" 9860 "FSTP_D $dst" %} 9861 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9862 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9863 OpcReg_FPR(src), 9864 Pop_Reg_DPR(dst) ); 9865 ins_pipe( fpu_reg_reg_mem ); 9866 %} 9867 9868 9869 // MACRO3 -- addDPR a mulDPR 9870 // This instruction is a '2-address' instruction in that the result goes 9871 // back to src2. This eliminates a move from the macro; possibly the 9872 // register allocator will have to add it back (and maybe not). 9873 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9874 predicate( UseSSE<=1 ); 9875 match(Set src2 (AddD (MulD src0 src1) src2)); 9876 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9877 "DMUL ST,$src1\n\t" 9878 "DADDp $src2,ST" %} 9879 ins_cost(250); 9880 opcode(0xDD); /* LoadD DD /0 */ 9881 ins_encode( Push_Reg_FPR(src0), 9882 FMul_ST_reg(src1), 9883 FAddP_reg_ST(src2) ); 9884 ins_pipe( fpu_reg_reg_reg ); 9885 %} 9886 9887 9888 // MACRO3 -- subDPR a mulDPR 9889 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9890 predicate( UseSSE<=1 ); 9891 match(Set src2 (SubD (MulD src0 src1) src2)); 9892 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9893 "DMUL ST,$src1\n\t" 9894 "DSUBRp $src2,ST" %} 9895 ins_cost(250); 9896 ins_encode( Push_Reg_FPR(src0), 9897 FMul_ST_reg(src1), 9898 Opcode(0xDE), Opc_plus(0xE0,src2)); 9899 ins_pipe( fpu_reg_reg_reg ); 9900 %} 9901 9902 9903 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9904 predicate( UseSSE<=1 ); 9905 match(Set dst (DivD dst src)); 9906 9907 format %{ "FLD $src\n\t" 9908 "FDIVp $dst,ST" %} 9909 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9910 ins_cost(150); 9911 ins_encode( Push_Reg_DPR(src), 9912 OpcP, RegOpc(dst) ); 9913 ins_pipe( fpu_reg_reg ); 9914 %} 9915 9916 // Strict FP instruction biases argument before division then 9917 // biases result, to avoid double rounding of subnormals. 9918 // 9919 // scale dividend by multiplying dividend by 2^(-15360) 9920 // load divisor 9921 // divide scaled dividend by divisor 9922 // rescale quotient by 2^(15360) 9923 // 9924 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9925 predicate (UseSSE<=1); 9926 match(Set dst (DivD dst src)); 9927 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9928 ins_cost(01); 9929 9930 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9931 "DMULp $dst,ST\n\t" 9932 "FLD $src\n\t" 9933 "FDIVp $dst,ST\n\t" 9934 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9935 "DMULp $dst,ST\n\t" %} 9936 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9937 ins_encode( strictfp_bias1(dst), 9938 Push_Reg_DPR(src), 9939 OpcP, RegOpc(dst), 9940 strictfp_bias2(dst) ); 9941 ins_pipe( fpu_reg_reg ); 9942 %} 9943 9944 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9945 predicate(UseSSE<=1); 9946 match(Set dst (ModD dst src)); 9947 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9948 9949 format %{ "DMOD $dst,$src" %} 9950 ins_cost(250); 9951 ins_encode(Push_Reg_Mod_DPR(dst, src), 9952 emitModDPR(), 9953 Push_Result_Mod_DPR(src), 9954 Pop_Reg_DPR(dst)); 9955 ins_pipe( pipe_slow ); 9956 %} 9957 9958 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9959 predicate(UseSSE>=2); 9960 match(Set dst (ModD src0 src1)); 9961 effect(KILL rax, KILL cr); 9962 9963 format %{ "SUB ESP,8\t # DMOD\n" 9964 "\tMOVSD [ESP+0],$src1\n" 9965 "\tFLD_D [ESP+0]\n" 9966 "\tMOVSD [ESP+0],$src0\n" 9967 "\tFLD_D [ESP+0]\n" 9968 "loop:\tFPREM\n" 9969 "\tFWAIT\n" 9970 "\tFNSTSW AX\n" 9971 "\tSAHF\n" 9972 "\tJP loop\n" 9973 "\tFSTP_D [ESP+0]\n" 9974 "\tMOVSD $dst,[ESP+0]\n" 9975 "\tADD ESP,8\n" 9976 "\tFSTP ST0\t # Restore FPU Stack" 9977 %} 9978 ins_cost(250); 9979 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9980 ins_pipe( pipe_slow ); 9981 %} 9982 9983 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9984 predicate (UseSSE<=1); 9985 match(Set dst(AtanD dst src)); 9986 format %{ "DATA $dst,$src" %} 9987 opcode(0xD9, 0xF3); 9988 ins_encode( Push_Reg_DPR(src), 9989 OpcP, OpcS, RegOpc(dst) ); 9990 ins_pipe( pipe_slow ); 9991 %} 9992 9993 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9994 predicate (UseSSE>=2); 9995 match(Set dst(AtanD dst src)); 9996 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9997 format %{ "DATA $dst,$src" %} 9998 opcode(0xD9, 0xF3); 9999 ins_encode( Push_SrcD(src), 10000 OpcP, OpcS, Push_ResultD(dst) ); 10001 ins_pipe( pipe_slow ); 10002 %} 10003 10004 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10005 predicate (UseSSE<=1); 10006 match(Set dst (SqrtD src)); 10007 format %{ "DSQRT $dst,$src" %} 10008 opcode(0xFA, 0xD9); 10009 ins_encode( Push_Reg_DPR(src), 10010 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10011 ins_pipe( pipe_slow ); 10012 %} 10013 10014 //-------------Float Instructions------------------------------- 10015 // Float Math 10016 10017 // Code for float compare: 10018 // fcompp(); 10019 // fwait(); fnstsw_ax(); 10020 // sahf(); 10021 // movl(dst, unordered_result); 10022 // jcc(Assembler::parity, exit); 10023 // movl(dst, less_result); 10024 // jcc(Assembler::below, exit); 10025 // movl(dst, equal_result); 10026 // jcc(Assembler::equal, exit); 10027 // movl(dst, greater_result); 10028 // exit: 10029 10030 // P6 version of float compare, sets condition codes in EFLAGS 10031 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10032 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10033 match(Set cr (CmpF src1 src2)); 10034 effect(KILL rax); 10035 ins_cost(150); 10036 format %{ "FLD $src1\n\t" 10037 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10038 "JNP exit\n\t" 10039 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10040 "SAHF\n" 10041 "exit:\tNOP // avoid branch to branch" %} 10042 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10043 ins_encode( Push_Reg_DPR(src1), 10044 OpcP, RegOpc(src2), 10045 cmpF_P6_fixup ); 10046 ins_pipe( pipe_slow ); 10047 %} 10048 10049 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10050 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10051 match(Set cr (CmpF src1 src2)); 10052 ins_cost(100); 10053 format %{ "FLD $src1\n\t" 10054 "FUCOMIP ST,$src2 // P6 instruction" %} 10055 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10056 ins_encode( Push_Reg_DPR(src1), 10057 OpcP, RegOpc(src2)); 10058 ins_pipe( pipe_slow ); 10059 %} 10060 10061 10062 // Compare & branch 10063 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10064 predicate(UseSSE == 0); 10065 match(Set cr (CmpF src1 src2)); 10066 effect(KILL rax); 10067 ins_cost(200); 10068 format %{ "FLD $src1\n\t" 10069 "FCOMp $src2\n\t" 10070 "FNSTSW AX\n\t" 10071 "TEST AX,0x400\n\t" 10072 "JZ,s flags\n\t" 10073 "MOV AH,1\t# unordered treat as LT\n" 10074 "flags:\tSAHF" %} 10075 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10076 ins_encode( Push_Reg_DPR(src1), 10077 OpcP, RegOpc(src2), 10078 fpu_flags); 10079 ins_pipe( pipe_slow ); 10080 %} 10081 10082 // Compare vs zero into -1,0,1 10083 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10084 predicate(UseSSE == 0); 10085 match(Set dst (CmpF3 src1 zero)); 10086 effect(KILL cr, KILL rax); 10087 ins_cost(280); 10088 format %{ "FTSTF $dst,$src1" %} 10089 opcode(0xE4, 0xD9); 10090 ins_encode( Push_Reg_DPR(src1), 10091 OpcS, OpcP, PopFPU, 10092 CmpF_Result(dst)); 10093 ins_pipe( pipe_slow ); 10094 %} 10095 10096 // Compare into -1,0,1 10097 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10098 predicate(UseSSE == 0); 10099 match(Set dst (CmpF3 src1 src2)); 10100 effect(KILL cr, KILL rax); 10101 ins_cost(300); 10102 format %{ "FCMPF $dst,$src1,$src2" %} 10103 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10104 ins_encode( Push_Reg_DPR(src1), 10105 OpcP, RegOpc(src2), 10106 CmpF_Result(dst)); 10107 ins_pipe( pipe_slow ); 10108 %} 10109 10110 // float compare and set condition codes in EFLAGS by XMM regs 10111 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10112 predicate(UseSSE>=1); 10113 match(Set cr (CmpF src1 src2)); 10114 ins_cost(145); 10115 format %{ "UCOMISS $src1,$src2\n\t" 10116 "JNP,s exit\n\t" 10117 "PUSHF\t# saw NaN, set CF\n\t" 10118 "AND [rsp], #0xffffff2b\n\t" 10119 "POPF\n" 10120 "exit:" %} 10121 ins_encode %{ 10122 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10123 emit_cmpfp_fixup(_masm); 10124 %} 10125 ins_pipe( pipe_slow ); 10126 %} 10127 10128 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10129 predicate(UseSSE>=1); 10130 match(Set cr (CmpF src1 src2)); 10131 ins_cost(100); 10132 format %{ "UCOMISS $src1,$src2" %} 10133 ins_encode %{ 10134 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10135 %} 10136 ins_pipe( pipe_slow ); 10137 %} 10138 10139 // float compare and set condition codes in EFLAGS by XMM regs 10140 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10141 predicate(UseSSE>=1); 10142 match(Set cr (CmpF src1 (LoadF src2))); 10143 ins_cost(165); 10144 format %{ "UCOMISS $src1,$src2\n\t" 10145 "JNP,s exit\n\t" 10146 "PUSHF\t# saw NaN, set CF\n\t" 10147 "AND [rsp], #0xffffff2b\n\t" 10148 "POPF\n" 10149 "exit:" %} 10150 ins_encode %{ 10151 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10152 emit_cmpfp_fixup(_masm); 10153 %} 10154 ins_pipe( pipe_slow ); 10155 %} 10156 10157 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10158 predicate(UseSSE>=1); 10159 match(Set cr (CmpF src1 (LoadF src2))); 10160 ins_cost(100); 10161 format %{ "UCOMISS $src1,$src2" %} 10162 ins_encode %{ 10163 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10164 %} 10165 ins_pipe( pipe_slow ); 10166 %} 10167 10168 // Compare into -1,0,1 in XMM 10169 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10170 predicate(UseSSE>=1); 10171 match(Set dst (CmpF3 src1 src2)); 10172 effect(KILL cr); 10173 ins_cost(255); 10174 format %{ "UCOMISS $src1, $src2\n\t" 10175 "MOV $dst, #-1\n\t" 10176 "JP,s done\n\t" 10177 "JB,s done\n\t" 10178 "SETNE $dst\n\t" 10179 "MOVZB $dst, $dst\n" 10180 "done:" %} 10181 ins_encode %{ 10182 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10183 emit_cmpfp3(_masm, $dst$$Register); 10184 %} 10185 ins_pipe( pipe_slow ); 10186 %} 10187 10188 // Compare into -1,0,1 in XMM and memory 10189 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10190 predicate(UseSSE>=1); 10191 match(Set dst (CmpF3 src1 (LoadF src2))); 10192 effect(KILL cr); 10193 ins_cost(275); 10194 format %{ "UCOMISS $src1, $src2\n\t" 10195 "MOV $dst, #-1\n\t" 10196 "JP,s done\n\t" 10197 "JB,s done\n\t" 10198 "SETNE $dst\n\t" 10199 "MOVZB $dst, $dst\n" 10200 "done:" %} 10201 ins_encode %{ 10202 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10203 emit_cmpfp3(_masm, $dst$$Register); 10204 %} 10205 ins_pipe( pipe_slow ); 10206 %} 10207 10208 // Spill to obtain 24-bit precision 10209 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10210 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10211 match(Set dst (SubF src1 src2)); 10212 10213 format %{ "FSUB $dst,$src1 - $src2" %} 10214 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10215 ins_encode( Push_Reg_FPR(src1), 10216 OpcReg_FPR(src2), 10217 Pop_Mem_FPR(dst) ); 10218 ins_pipe( fpu_mem_reg_reg ); 10219 %} 10220 // 10221 // This instruction does not round to 24-bits 10222 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10223 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10224 match(Set dst (SubF dst src)); 10225 10226 format %{ "FSUB $dst,$src" %} 10227 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10228 ins_encode( Push_Reg_FPR(src), 10229 OpcP, RegOpc(dst) ); 10230 ins_pipe( fpu_reg_reg ); 10231 %} 10232 10233 // Spill to obtain 24-bit precision 10234 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10235 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10236 match(Set dst (AddF src1 src2)); 10237 10238 format %{ "FADD $dst,$src1,$src2" %} 10239 opcode(0xD8, 0x0); /* D8 C0+i */ 10240 ins_encode( Push_Reg_FPR(src2), 10241 OpcReg_FPR(src1), 10242 Pop_Mem_FPR(dst) ); 10243 ins_pipe( fpu_mem_reg_reg ); 10244 %} 10245 // 10246 // This instruction does not round to 24-bits 10247 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10248 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10249 match(Set dst (AddF dst src)); 10250 10251 format %{ "FLD $src\n\t" 10252 "FADDp $dst,ST" %} 10253 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10254 ins_encode( Push_Reg_FPR(src), 10255 OpcP, RegOpc(dst) ); 10256 ins_pipe( fpu_reg_reg ); 10257 %} 10258 10259 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10260 predicate(UseSSE==0); 10261 match(Set dst (AbsF src)); 10262 ins_cost(100); 10263 format %{ "FABS" %} 10264 opcode(0xE1, 0xD9); 10265 ins_encode( OpcS, OpcP ); 10266 ins_pipe( fpu_reg_reg ); 10267 %} 10268 10269 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10270 predicate(UseSSE==0); 10271 match(Set dst (NegF src)); 10272 ins_cost(100); 10273 format %{ "FCHS" %} 10274 opcode(0xE0, 0xD9); 10275 ins_encode( OpcS, OpcP ); 10276 ins_pipe( fpu_reg_reg ); 10277 %} 10278 10279 // Cisc-alternate to addFPR_reg 10280 // Spill to obtain 24-bit precision 10281 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10282 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10283 match(Set dst (AddF src1 (LoadF src2))); 10284 10285 format %{ "FLD $src2\n\t" 10286 "FADD ST,$src1\n\t" 10287 "FSTP_S $dst" %} 10288 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10289 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10290 OpcReg_FPR(src1), 10291 Pop_Mem_FPR(dst) ); 10292 ins_pipe( fpu_mem_reg_mem ); 10293 %} 10294 // 10295 // Cisc-alternate to addFPR_reg 10296 // This instruction does not round to 24-bits 10297 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10298 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10299 match(Set dst (AddF dst (LoadF src))); 10300 10301 format %{ "FADD $dst,$src" %} 10302 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10303 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10304 OpcP, RegOpc(dst) ); 10305 ins_pipe( fpu_reg_mem ); 10306 %} 10307 10308 // // Following two instructions for _222_mpegaudio 10309 // Spill to obtain 24-bit precision 10310 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10311 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10312 match(Set dst (AddF src1 src2)); 10313 10314 format %{ "FADD $dst,$src1,$src2" %} 10315 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10316 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10317 OpcReg_FPR(src2), 10318 Pop_Mem_FPR(dst) ); 10319 ins_pipe( fpu_mem_reg_mem ); 10320 %} 10321 10322 // Cisc-spill variant 10323 // Spill to obtain 24-bit precision 10324 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10325 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10326 match(Set dst (AddF src1 (LoadF src2))); 10327 10328 format %{ "FADD $dst,$src1,$src2 cisc" %} 10329 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10330 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10331 set_instruction_start, 10332 OpcP, RMopc_Mem(secondary,src1), 10333 Pop_Mem_FPR(dst) ); 10334 ins_pipe( fpu_mem_mem_mem ); 10335 %} 10336 10337 // Spill to obtain 24-bit precision 10338 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10339 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10340 match(Set dst (AddF src1 src2)); 10341 10342 format %{ "FADD $dst,$src1,$src2" %} 10343 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10344 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10345 set_instruction_start, 10346 OpcP, RMopc_Mem(secondary,src1), 10347 Pop_Mem_FPR(dst) ); 10348 ins_pipe( fpu_mem_mem_mem ); 10349 %} 10350 10351 10352 // Spill to obtain 24-bit precision 10353 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10354 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10355 match(Set dst (AddF src con)); 10356 format %{ "FLD $src\n\t" 10357 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10358 "FSTP_S $dst" %} 10359 ins_encode %{ 10360 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10361 __ fadd_s($constantaddress($con)); 10362 __ fstp_s(Address(rsp, $dst$$disp)); 10363 %} 10364 ins_pipe(fpu_mem_reg_con); 10365 %} 10366 // 10367 // This instruction does not round to 24-bits 10368 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10369 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10370 match(Set dst (AddF src con)); 10371 format %{ "FLD $src\n\t" 10372 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10373 "FSTP $dst" %} 10374 ins_encode %{ 10375 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10376 __ fadd_s($constantaddress($con)); 10377 __ fstp_d($dst$$reg); 10378 %} 10379 ins_pipe(fpu_reg_reg_con); 10380 %} 10381 10382 // Spill to obtain 24-bit precision 10383 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10384 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10385 match(Set dst (MulF src1 src2)); 10386 10387 format %{ "FLD $src1\n\t" 10388 "FMUL $src2\n\t" 10389 "FSTP_S $dst" %} 10390 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10391 ins_encode( Push_Reg_FPR(src1), 10392 OpcReg_FPR(src2), 10393 Pop_Mem_FPR(dst) ); 10394 ins_pipe( fpu_mem_reg_reg ); 10395 %} 10396 // 10397 // This instruction does not round to 24-bits 10398 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10399 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10400 match(Set dst (MulF src1 src2)); 10401 10402 format %{ "FLD $src1\n\t" 10403 "FMUL $src2\n\t" 10404 "FSTP_S $dst" %} 10405 opcode(0xD8, 0x1); /* D8 C8+i */ 10406 ins_encode( Push_Reg_FPR(src2), 10407 OpcReg_FPR(src1), 10408 Pop_Reg_FPR(dst) ); 10409 ins_pipe( fpu_reg_reg_reg ); 10410 %} 10411 10412 10413 // Spill to obtain 24-bit precision 10414 // Cisc-alternate to reg-reg multiply 10415 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10416 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10417 match(Set dst (MulF src1 (LoadF src2))); 10418 10419 format %{ "FLD_S $src2\n\t" 10420 "FMUL $src1\n\t" 10421 "FSTP_S $dst" %} 10422 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10423 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10424 OpcReg_FPR(src1), 10425 Pop_Mem_FPR(dst) ); 10426 ins_pipe( fpu_mem_reg_mem ); 10427 %} 10428 // 10429 // This instruction does not round to 24-bits 10430 // Cisc-alternate to reg-reg multiply 10431 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10432 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10433 match(Set dst (MulF src1 (LoadF src2))); 10434 10435 format %{ "FMUL $dst,$src1,$src2" %} 10436 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10437 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10438 OpcReg_FPR(src1), 10439 Pop_Reg_FPR(dst) ); 10440 ins_pipe( fpu_reg_reg_mem ); 10441 %} 10442 10443 // Spill to obtain 24-bit precision 10444 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10445 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10446 match(Set dst (MulF src1 src2)); 10447 10448 format %{ "FMUL $dst,$src1,$src2" %} 10449 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10450 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10451 set_instruction_start, 10452 OpcP, RMopc_Mem(secondary,src1), 10453 Pop_Mem_FPR(dst) ); 10454 ins_pipe( fpu_mem_mem_mem ); 10455 %} 10456 10457 // Spill to obtain 24-bit precision 10458 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10459 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10460 match(Set dst (MulF src con)); 10461 10462 format %{ "FLD $src\n\t" 10463 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10464 "FSTP_S $dst" %} 10465 ins_encode %{ 10466 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10467 __ fmul_s($constantaddress($con)); 10468 __ fstp_s(Address(rsp, $dst$$disp)); 10469 %} 10470 ins_pipe(fpu_mem_reg_con); 10471 %} 10472 // 10473 // This instruction does not round to 24-bits 10474 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10475 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10476 match(Set dst (MulF src con)); 10477 10478 format %{ "FLD $src\n\t" 10479 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10480 "FSTP $dst" %} 10481 ins_encode %{ 10482 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10483 __ fmul_s($constantaddress($con)); 10484 __ fstp_d($dst$$reg); 10485 %} 10486 ins_pipe(fpu_reg_reg_con); 10487 %} 10488 10489 10490 // 10491 // MACRO1 -- subsume unshared load into mulFPR 10492 // This instruction does not round to 24-bits 10493 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10494 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10495 match(Set dst (MulF (LoadF mem1) src)); 10496 10497 format %{ "FLD $mem1 ===MACRO1===\n\t" 10498 "FMUL ST,$src\n\t" 10499 "FSTP $dst" %} 10500 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10501 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10502 OpcReg_FPR(src), 10503 Pop_Reg_FPR(dst) ); 10504 ins_pipe( fpu_reg_reg_mem ); 10505 %} 10506 // 10507 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10508 // This instruction does not round to 24-bits 10509 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10510 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10511 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10512 ins_cost(95); 10513 10514 format %{ "FLD $mem1 ===MACRO2===\n\t" 10515 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10516 "FADD ST,$src2\n\t" 10517 "FSTP $dst" %} 10518 opcode(0xD9); /* LoadF D9 /0 */ 10519 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10520 FMul_ST_reg(src1), 10521 FAdd_ST_reg(src2), 10522 Pop_Reg_FPR(dst) ); 10523 ins_pipe( fpu_reg_mem_reg_reg ); 10524 %} 10525 10526 // MACRO3 -- addFPR a mulFPR 10527 // This instruction does not round to 24-bits. It is a '2-address' 10528 // instruction in that the result goes back to src2. This eliminates 10529 // a move from the macro; possibly the register allocator will have 10530 // to add it back (and maybe not). 10531 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10532 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10533 match(Set src2 (AddF (MulF src0 src1) src2)); 10534 10535 format %{ "FLD $src0 ===MACRO3===\n\t" 10536 "FMUL ST,$src1\n\t" 10537 "FADDP $src2,ST" %} 10538 opcode(0xD9); /* LoadF D9 /0 */ 10539 ins_encode( Push_Reg_FPR(src0), 10540 FMul_ST_reg(src1), 10541 FAddP_reg_ST(src2) ); 10542 ins_pipe( fpu_reg_reg_reg ); 10543 %} 10544 10545 // MACRO4 -- divFPR subFPR 10546 // This instruction does not round to 24-bits 10547 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10548 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10549 match(Set dst (DivF (SubF src2 src1) src3)); 10550 10551 format %{ "FLD $src2 ===MACRO4===\n\t" 10552 "FSUB ST,$src1\n\t" 10553 "FDIV ST,$src3\n\t" 10554 "FSTP $dst" %} 10555 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10556 ins_encode( Push_Reg_FPR(src2), 10557 subFPR_divFPR_encode(src1,src3), 10558 Pop_Reg_FPR(dst) ); 10559 ins_pipe( fpu_reg_reg_reg_reg ); 10560 %} 10561 10562 // Spill to obtain 24-bit precision 10563 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10564 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10565 match(Set dst (DivF src1 src2)); 10566 10567 format %{ "FDIV $dst,$src1,$src2" %} 10568 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10569 ins_encode( Push_Reg_FPR(src1), 10570 OpcReg_FPR(src2), 10571 Pop_Mem_FPR(dst) ); 10572 ins_pipe( fpu_mem_reg_reg ); 10573 %} 10574 // 10575 // This instruction does not round to 24-bits 10576 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10577 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10578 match(Set dst (DivF dst src)); 10579 10580 format %{ "FDIV $dst,$src" %} 10581 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10582 ins_encode( Push_Reg_FPR(src), 10583 OpcP, RegOpc(dst) ); 10584 ins_pipe( fpu_reg_reg ); 10585 %} 10586 10587 10588 // Spill to obtain 24-bit precision 10589 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10590 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10591 match(Set dst (ModF src1 src2)); 10592 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10593 10594 format %{ "FMOD $dst,$src1,$src2" %} 10595 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10596 emitModDPR(), 10597 Push_Result_Mod_DPR(src2), 10598 Pop_Mem_FPR(dst)); 10599 ins_pipe( pipe_slow ); 10600 %} 10601 // 10602 // This instruction does not round to 24-bits 10603 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10604 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10605 match(Set dst (ModF dst src)); 10606 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10607 10608 format %{ "FMOD $dst,$src" %} 10609 ins_encode(Push_Reg_Mod_DPR(dst, src), 10610 emitModDPR(), 10611 Push_Result_Mod_DPR(src), 10612 Pop_Reg_FPR(dst)); 10613 ins_pipe( pipe_slow ); 10614 %} 10615 10616 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10617 predicate(UseSSE>=1); 10618 match(Set dst (ModF src0 src1)); 10619 effect(KILL rax, KILL cr); 10620 format %{ "SUB ESP,4\t # FMOD\n" 10621 "\tMOVSS [ESP+0],$src1\n" 10622 "\tFLD_S [ESP+0]\n" 10623 "\tMOVSS [ESP+0],$src0\n" 10624 "\tFLD_S [ESP+0]\n" 10625 "loop:\tFPREM\n" 10626 "\tFWAIT\n" 10627 "\tFNSTSW AX\n" 10628 "\tSAHF\n" 10629 "\tJP loop\n" 10630 "\tFSTP_S [ESP+0]\n" 10631 "\tMOVSS $dst,[ESP+0]\n" 10632 "\tADD ESP,4\n" 10633 "\tFSTP ST0\t # Restore FPU Stack" 10634 %} 10635 ins_cost(250); 10636 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10637 ins_pipe( pipe_slow ); 10638 %} 10639 10640 10641 //----------Arithmetic Conversion Instructions--------------------------------- 10642 // The conversions operations are all Alpha sorted. Please keep it that way! 10643 10644 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10645 predicate(UseSSE==0); 10646 match(Set dst (RoundFloat src)); 10647 ins_cost(125); 10648 format %{ "FST_S $dst,$src\t# F-round" %} 10649 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10650 ins_pipe( fpu_mem_reg ); 10651 %} 10652 10653 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10654 predicate(UseSSE<=1); 10655 match(Set dst (RoundDouble src)); 10656 ins_cost(125); 10657 format %{ "FST_D $dst,$src\t# D-round" %} 10658 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10659 ins_pipe( fpu_mem_reg ); 10660 %} 10661 10662 // Force rounding to 24-bit precision and 6-bit exponent 10663 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10664 predicate(UseSSE==0); 10665 match(Set dst (ConvD2F src)); 10666 format %{ "FST_S $dst,$src\t# F-round" %} 10667 expand %{ 10668 roundFloat_mem_reg(dst,src); 10669 %} 10670 %} 10671 10672 // Force rounding to 24-bit precision and 6-bit exponent 10673 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10674 predicate(UseSSE==1); 10675 match(Set dst (ConvD2F src)); 10676 effect( KILL cr ); 10677 format %{ "SUB ESP,4\n\t" 10678 "FST_S [ESP],$src\t# F-round\n\t" 10679 "MOVSS $dst,[ESP]\n\t" 10680 "ADD ESP,4" %} 10681 ins_encode %{ 10682 __ subptr(rsp, 4); 10683 if ($src$$reg != FPR1L_enc) { 10684 __ fld_s($src$$reg-1); 10685 __ fstp_s(Address(rsp, 0)); 10686 } else { 10687 __ fst_s(Address(rsp, 0)); 10688 } 10689 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10690 __ addptr(rsp, 4); 10691 %} 10692 ins_pipe( pipe_slow ); 10693 %} 10694 10695 // Force rounding double precision to single precision 10696 instruct convD2F_reg(regF dst, regD src) %{ 10697 predicate(UseSSE>=2); 10698 match(Set dst (ConvD2F src)); 10699 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10700 ins_encode %{ 10701 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10702 %} 10703 ins_pipe( pipe_slow ); 10704 %} 10705 10706 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10707 predicate(UseSSE==0); 10708 match(Set dst (ConvF2D src)); 10709 format %{ "FST_S $dst,$src\t# D-round" %} 10710 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10711 ins_pipe( fpu_reg_reg ); 10712 %} 10713 10714 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10715 predicate(UseSSE==1); 10716 match(Set dst (ConvF2D src)); 10717 format %{ "FST_D $dst,$src\t# D-round" %} 10718 expand %{ 10719 roundDouble_mem_reg(dst,src); 10720 %} 10721 %} 10722 10723 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10724 predicate(UseSSE==1); 10725 match(Set dst (ConvF2D src)); 10726 effect( KILL cr ); 10727 format %{ "SUB ESP,4\n\t" 10728 "MOVSS [ESP] $src\n\t" 10729 "FLD_S [ESP]\n\t" 10730 "ADD ESP,4\n\t" 10731 "FSTP $dst\t# D-round" %} 10732 ins_encode %{ 10733 __ subptr(rsp, 4); 10734 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10735 __ fld_s(Address(rsp, 0)); 10736 __ addptr(rsp, 4); 10737 __ fstp_d($dst$$reg); 10738 %} 10739 ins_pipe( pipe_slow ); 10740 %} 10741 10742 instruct convF2D_reg(regD dst, regF src) %{ 10743 predicate(UseSSE>=2); 10744 match(Set dst (ConvF2D src)); 10745 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10746 ins_encode %{ 10747 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10748 %} 10749 ins_pipe( pipe_slow ); 10750 %} 10751 10752 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10753 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10754 predicate(UseSSE<=1); 10755 match(Set dst (ConvD2I src)); 10756 effect( KILL tmp, KILL cr ); 10757 format %{ "FLD $src\t# Convert double to int \n\t" 10758 "FLDCW trunc mode\n\t" 10759 "SUB ESP,4\n\t" 10760 "FISTp [ESP + #0]\n\t" 10761 "FLDCW std/24-bit mode\n\t" 10762 "POP EAX\n\t" 10763 "CMP EAX,0x80000000\n\t" 10764 "JNE,s fast\n\t" 10765 "FLD_D $src\n\t" 10766 "CALL d2i_wrapper\n" 10767 "fast:" %} 10768 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10769 ins_pipe( pipe_slow ); 10770 %} 10771 10772 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10773 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10774 predicate(UseSSE>=2); 10775 match(Set dst (ConvD2I src)); 10776 effect( KILL tmp, KILL cr ); 10777 format %{ "CVTTSD2SI $dst, $src\n\t" 10778 "CMP $dst,0x80000000\n\t" 10779 "JNE,s fast\n\t" 10780 "SUB ESP, 8\n\t" 10781 "MOVSD [ESP], $src\n\t" 10782 "FLD_D [ESP]\n\t" 10783 "ADD ESP, 8\n\t" 10784 "CALL d2i_wrapper\n" 10785 "fast:" %} 10786 ins_encode %{ 10787 Label fast; 10788 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10789 __ cmpl($dst$$Register, 0x80000000); 10790 __ jccb(Assembler::notEqual, fast); 10791 __ subptr(rsp, 8); 10792 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10793 __ fld_d(Address(rsp, 0)); 10794 __ addptr(rsp, 8); 10795 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10796 __ post_call_nop(); 10797 __ bind(fast); 10798 %} 10799 ins_pipe( pipe_slow ); 10800 %} 10801 10802 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10803 predicate(UseSSE<=1); 10804 match(Set dst (ConvD2L src)); 10805 effect( KILL cr ); 10806 format %{ "FLD $src\t# Convert double to long\n\t" 10807 "FLDCW trunc mode\n\t" 10808 "SUB ESP,8\n\t" 10809 "FISTp [ESP + #0]\n\t" 10810 "FLDCW std/24-bit mode\n\t" 10811 "POP EAX\n\t" 10812 "POP EDX\n\t" 10813 "CMP EDX,0x80000000\n\t" 10814 "JNE,s fast\n\t" 10815 "TEST EAX,EAX\n\t" 10816 "JNE,s fast\n\t" 10817 "FLD $src\n\t" 10818 "CALL d2l_wrapper\n" 10819 "fast:" %} 10820 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10821 ins_pipe( pipe_slow ); 10822 %} 10823 10824 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10825 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10826 predicate (UseSSE>=2); 10827 match(Set dst (ConvD2L src)); 10828 effect( KILL cr ); 10829 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10830 "MOVSD [ESP],$src\n\t" 10831 "FLD_D [ESP]\n\t" 10832 "FLDCW trunc mode\n\t" 10833 "FISTp [ESP + #0]\n\t" 10834 "FLDCW std/24-bit mode\n\t" 10835 "POP EAX\n\t" 10836 "POP EDX\n\t" 10837 "CMP EDX,0x80000000\n\t" 10838 "JNE,s fast\n\t" 10839 "TEST EAX,EAX\n\t" 10840 "JNE,s fast\n\t" 10841 "SUB ESP,8\n\t" 10842 "MOVSD [ESP],$src\n\t" 10843 "FLD_D [ESP]\n\t" 10844 "ADD ESP,8\n\t" 10845 "CALL d2l_wrapper\n" 10846 "fast:" %} 10847 ins_encode %{ 10848 Label fast; 10849 __ subptr(rsp, 8); 10850 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10851 __ fld_d(Address(rsp, 0)); 10852 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10853 __ fistp_d(Address(rsp, 0)); 10854 // Restore the rounding mode, mask the exception 10855 if (Compile::current()->in_24_bit_fp_mode()) { 10856 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10857 } else { 10858 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10859 } 10860 // Load the converted long, adjust CPU stack 10861 __ pop(rax); 10862 __ pop(rdx); 10863 __ cmpl(rdx, 0x80000000); 10864 __ jccb(Assembler::notEqual, fast); 10865 __ testl(rax, rax); 10866 __ jccb(Assembler::notEqual, fast); 10867 __ subptr(rsp, 8); 10868 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10869 __ fld_d(Address(rsp, 0)); 10870 __ addptr(rsp, 8); 10871 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10872 __ post_call_nop(); 10873 __ bind(fast); 10874 %} 10875 ins_pipe( pipe_slow ); 10876 %} 10877 10878 // Convert a double to an int. Java semantics require we do complex 10879 // manglations in the corner cases. So we set the rounding mode to 10880 // 'zero', store the darned double down as an int, and reset the 10881 // rounding mode to 'nearest'. The hardware stores a flag value down 10882 // if we would overflow or converted a NAN; we check for this and 10883 // and go the slow path if needed. 10884 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10885 predicate(UseSSE==0); 10886 match(Set dst (ConvF2I src)); 10887 effect( KILL tmp, KILL cr ); 10888 format %{ "FLD $src\t# Convert float to int \n\t" 10889 "FLDCW trunc mode\n\t" 10890 "SUB ESP,4\n\t" 10891 "FISTp [ESP + #0]\n\t" 10892 "FLDCW std/24-bit mode\n\t" 10893 "POP EAX\n\t" 10894 "CMP EAX,0x80000000\n\t" 10895 "JNE,s fast\n\t" 10896 "FLD $src\n\t" 10897 "CALL d2i_wrapper\n" 10898 "fast:" %} 10899 // DPR2I_encoding works for FPR2I 10900 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10901 ins_pipe( pipe_slow ); 10902 %} 10903 10904 // Convert a float in xmm to an int reg. 10905 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10906 predicate(UseSSE>=1); 10907 match(Set dst (ConvF2I src)); 10908 effect( KILL tmp, KILL cr ); 10909 format %{ "CVTTSS2SI $dst, $src\n\t" 10910 "CMP $dst,0x80000000\n\t" 10911 "JNE,s fast\n\t" 10912 "SUB ESP, 4\n\t" 10913 "MOVSS [ESP], $src\n\t" 10914 "FLD [ESP]\n\t" 10915 "ADD ESP, 4\n\t" 10916 "CALL d2i_wrapper\n" 10917 "fast:" %} 10918 ins_encode %{ 10919 Label fast; 10920 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10921 __ cmpl($dst$$Register, 0x80000000); 10922 __ jccb(Assembler::notEqual, fast); 10923 __ subptr(rsp, 4); 10924 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10925 __ fld_s(Address(rsp, 0)); 10926 __ addptr(rsp, 4); 10927 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10928 __ post_call_nop(); 10929 __ bind(fast); 10930 %} 10931 ins_pipe( pipe_slow ); 10932 %} 10933 10934 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10935 predicate(UseSSE==0); 10936 match(Set dst (ConvF2L src)); 10937 effect( KILL cr ); 10938 format %{ "FLD $src\t# Convert float to long\n\t" 10939 "FLDCW trunc mode\n\t" 10940 "SUB ESP,8\n\t" 10941 "FISTp [ESP + #0]\n\t" 10942 "FLDCW std/24-bit mode\n\t" 10943 "POP EAX\n\t" 10944 "POP EDX\n\t" 10945 "CMP EDX,0x80000000\n\t" 10946 "JNE,s fast\n\t" 10947 "TEST EAX,EAX\n\t" 10948 "JNE,s fast\n\t" 10949 "FLD $src\n\t" 10950 "CALL d2l_wrapper\n" 10951 "fast:" %} 10952 // DPR2L_encoding works for FPR2L 10953 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10954 ins_pipe( pipe_slow ); 10955 %} 10956 10957 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10958 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10959 predicate (UseSSE>=1); 10960 match(Set dst (ConvF2L src)); 10961 effect( KILL cr ); 10962 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10963 "MOVSS [ESP],$src\n\t" 10964 "FLD_S [ESP]\n\t" 10965 "FLDCW trunc mode\n\t" 10966 "FISTp [ESP + #0]\n\t" 10967 "FLDCW std/24-bit mode\n\t" 10968 "POP EAX\n\t" 10969 "POP EDX\n\t" 10970 "CMP EDX,0x80000000\n\t" 10971 "JNE,s fast\n\t" 10972 "TEST EAX,EAX\n\t" 10973 "JNE,s fast\n\t" 10974 "SUB ESP,4\t# Convert float to long\n\t" 10975 "MOVSS [ESP],$src\n\t" 10976 "FLD_S [ESP]\n\t" 10977 "ADD ESP,4\n\t" 10978 "CALL d2l_wrapper\n" 10979 "fast:" %} 10980 ins_encode %{ 10981 Label fast; 10982 __ subptr(rsp, 8); 10983 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10984 __ fld_s(Address(rsp, 0)); 10985 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10986 __ fistp_d(Address(rsp, 0)); 10987 // Restore the rounding mode, mask the exception 10988 if (Compile::current()->in_24_bit_fp_mode()) { 10989 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10990 } else { 10991 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10992 } 10993 // Load the converted long, adjust CPU stack 10994 __ pop(rax); 10995 __ pop(rdx); 10996 __ cmpl(rdx, 0x80000000); 10997 __ jccb(Assembler::notEqual, fast); 10998 __ testl(rax, rax); 10999 __ jccb(Assembler::notEqual, fast); 11000 __ subptr(rsp, 4); 11001 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11002 __ fld_s(Address(rsp, 0)); 11003 __ addptr(rsp, 4); 11004 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11005 __ post_call_nop(); 11006 __ bind(fast); 11007 %} 11008 ins_pipe( pipe_slow ); 11009 %} 11010 11011 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11012 predicate( UseSSE<=1 ); 11013 match(Set dst (ConvI2D src)); 11014 format %{ "FILD $src\n\t" 11015 "FSTP $dst" %} 11016 opcode(0xDB, 0x0); /* DB /0 */ 11017 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11018 ins_pipe( fpu_reg_mem ); 11019 %} 11020 11021 instruct convI2D_reg(regD dst, rRegI src) %{ 11022 predicate( UseSSE>=2 && !UseXmmI2D ); 11023 match(Set dst (ConvI2D src)); 11024 format %{ "CVTSI2SD $dst,$src" %} 11025 ins_encode %{ 11026 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11027 %} 11028 ins_pipe( pipe_slow ); 11029 %} 11030 11031 instruct convI2D_mem(regD dst, memory mem) %{ 11032 predicate( UseSSE>=2 ); 11033 match(Set dst (ConvI2D (LoadI mem))); 11034 format %{ "CVTSI2SD $dst,$mem" %} 11035 ins_encode %{ 11036 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11037 %} 11038 ins_pipe( pipe_slow ); 11039 %} 11040 11041 instruct convXI2D_reg(regD dst, rRegI src) 11042 %{ 11043 predicate( UseSSE>=2 && UseXmmI2D ); 11044 match(Set dst (ConvI2D src)); 11045 11046 format %{ "MOVD $dst,$src\n\t" 11047 "CVTDQ2PD $dst,$dst\t# i2d" %} 11048 ins_encode %{ 11049 __ movdl($dst$$XMMRegister, $src$$Register); 11050 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11051 %} 11052 ins_pipe(pipe_slow); // XXX 11053 %} 11054 11055 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11056 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11057 match(Set dst (ConvI2D (LoadI mem))); 11058 format %{ "FILD $mem\n\t" 11059 "FSTP $dst" %} 11060 opcode(0xDB); /* DB /0 */ 11061 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11062 Pop_Reg_DPR(dst)); 11063 ins_pipe( fpu_reg_mem ); 11064 %} 11065 11066 // Convert a byte to a float; no rounding step needed. 11067 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11068 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11069 match(Set dst (ConvI2F src)); 11070 format %{ "FILD $src\n\t" 11071 "FSTP $dst" %} 11072 11073 opcode(0xDB, 0x0); /* DB /0 */ 11074 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11075 ins_pipe( fpu_reg_mem ); 11076 %} 11077 11078 // In 24-bit mode, force exponent rounding by storing back out 11079 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11080 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11081 match(Set dst (ConvI2F src)); 11082 ins_cost(200); 11083 format %{ "FILD $src\n\t" 11084 "FSTP_S $dst" %} 11085 opcode(0xDB, 0x0); /* DB /0 */ 11086 ins_encode( Push_Mem_I(src), 11087 Pop_Mem_FPR(dst)); 11088 ins_pipe( fpu_mem_mem ); 11089 %} 11090 11091 // In 24-bit mode, force exponent rounding by storing back out 11092 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11093 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11094 match(Set dst (ConvI2F (LoadI mem))); 11095 ins_cost(200); 11096 format %{ "FILD $mem\n\t" 11097 "FSTP_S $dst" %} 11098 opcode(0xDB); /* DB /0 */ 11099 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11100 Pop_Mem_FPR(dst)); 11101 ins_pipe( fpu_mem_mem ); 11102 %} 11103 11104 // This instruction does not round to 24-bits 11105 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11106 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11107 match(Set dst (ConvI2F src)); 11108 format %{ "FILD $src\n\t" 11109 "FSTP $dst" %} 11110 opcode(0xDB, 0x0); /* DB /0 */ 11111 ins_encode( Push_Mem_I(src), 11112 Pop_Reg_FPR(dst)); 11113 ins_pipe( fpu_reg_mem ); 11114 %} 11115 11116 // This instruction does not round to 24-bits 11117 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11118 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11119 match(Set dst (ConvI2F (LoadI mem))); 11120 format %{ "FILD $mem\n\t" 11121 "FSTP $dst" %} 11122 opcode(0xDB); /* DB /0 */ 11123 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11124 Pop_Reg_FPR(dst)); 11125 ins_pipe( fpu_reg_mem ); 11126 %} 11127 11128 // Convert an int to a float in xmm; no rounding step needed. 11129 instruct convI2F_reg(regF dst, rRegI src) %{ 11130 predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F )); 11131 match(Set dst (ConvI2F src)); 11132 format %{ "CVTSI2SS $dst, $src" %} 11133 ins_encode %{ 11134 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11135 %} 11136 ins_pipe( pipe_slow ); 11137 %} 11138 11139 instruct convXI2F_reg(regF dst, rRegI src) 11140 %{ 11141 predicate( UseSSE>=2 && UseXmmI2F ); 11142 match(Set dst (ConvI2F src)); 11143 11144 format %{ "MOVD $dst,$src\n\t" 11145 "CVTDQ2PS $dst,$dst\t# i2f" %} 11146 ins_encode %{ 11147 __ movdl($dst$$XMMRegister, $src$$Register); 11148 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11149 %} 11150 ins_pipe(pipe_slow); // XXX 11151 %} 11152 11153 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11154 match(Set dst (ConvI2L src)); 11155 effect(KILL cr); 11156 ins_cost(375); 11157 format %{ "MOV $dst.lo,$src\n\t" 11158 "MOV $dst.hi,$src\n\t" 11159 "SAR $dst.hi,31" %} 11160 ins_encode(convert_int_long(dst,src)); 11161 ins_pipe( ialu_reg_reg_long ); 11162 %} 11163 11164 // Zero-extend convert int to long 11165 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11166 match(Set dst (AndL (ConvI2L src) mask) ); 11167 effect( KILL flags ); 11168 ins_cost(250); 11169 format %{ "MOV $dst.lo,$src\n\t" 11170 "XOR $dst.hi,$dst.hi" %} 11171 opcode(0x33); // XOR 11172 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11173 ins_pipe( ialu_reg_reg_long ); 11174 %} 11175 11176 // Zero-extend long 11177 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11178 match(Set dst (AndL src mask) ); 11179 effect( KILL flags ); 11180 ins_cost(250); 11181 format %{ "MOV $dst.lo,$src.lo\n\t" 11182 "XOR $dst.hi,$dst.hi\n\t" %} 11183 opcode(0x33); // XOR 11184 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11185 ins_pipe( ialu_reg_reg_long ); 11186 %} 11187 11188 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11189 predicate (UseSSE<=1); 11190 match(Set dst (ConvL2D src)); 11191 effect( KILL cr ); 11192 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11193 "PUSH $src.lo\n\t" 11194 "FILD ST,[ESP + #0]\n\t" 11195 "ADD ESP,8\n\t" 11196 "FSTP_D $dst\t# D-round" %} 11197 opcode(0xDF, 0x5); /* DF /5 */ 11198 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11199 ins_pipe( pipe_slow ); 11200 %} 11201 11202 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11203 predicate (UseSSE>=2); 11204 match(Set dst (ConvL2D src)); 11205 effect( KILL cr ); 11206 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11207 "PUSH $src.lo\n\t" 11208 "FILD_D [ESP]\n\t" 11209 "FSTP_D [ESP]\n\t" 11210 "MOVSD $dst,[ESP]\n\t" 11211 "ADD ESP,8" %} 11212 opcode(0xDF, 0x5); /* DF /5 */ 11213 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11214 ins_pipe( pipe_slow ); 11215 %} 11216 11217 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11218 predicate (UseSSE>=1); 11219 match(Set dst (ConvL2F src)); 11220 effect( KILL cr ); 11221 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11222 "PUSH $src.lo\n\t" 11223 "FILD_D [ESP]\n\t" 11224 "FSTP_S [ESP]\n\t" 11225 "MOVSS $dst,[ESP]\n\t" 11226 "ADD ESP,8" %} 11227 opcode(0xDF, 0x5); /* DF /5 */ 11228 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11229 ins_pipe( pipe_slow ); 11230 %} 11231 11232 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11233 match(Set dst (ConvL2F src)); 11234 effect( KILL cr ); 11235 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11236 "PUSH $src.lo\n\t" 11237 "FILD ST,[ESP + #0]\n\t" 11238 "ADD ESP,8\n\t" 11239 "FSTP_S $dst\t# F-round" %} 11240 opcode(0xDF, 0x5); /* DF /5 */ 11241 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11242 ins_pipe( pipe_slow ); 11243 %} 11244 11245 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11246 match(Set dst (ConvL2I src)); 11247 effect( DEF dst, USE src ); 11248 format %{ "MOV $dst,$src.lo" %} 11249 ins_encode(enc_CopyL_Lo(dst,src)); 11250 ins_pipe( ialu_reg_reg ); 11251 %} 11252 11253 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11254 match(Set dst (MoveF2I src)); 11255 effect( DEF dst, USE src ); 11256 ins_cost(100); 11257 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11258 ins_encode %{ 11259 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11260 %} 11261 ins_pipe( ialu_reg_mem ); 11262 %} 11263 11264 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11265 predicate(UseSSE==0); 11266 match(Set dst (MoveF2I src)); 11267 effect( DEF dst, USE src ); 11268 11269 ins_cost(125); 11270 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11271 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11272 ins_pipe( fpu_mem_reg ); 11273 %} 11274 11275 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11276 predicate(UseSSE>=1); 11277 match(Set dst (MoveF2I src)); 11278 effect( DEF dst, USE src ); 11279 11280 ins_cost(95); 11281 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11282 ins_encode %{ 11283 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11284 %} 11285 ins_pipe( pipe_slow ); 11286 %} 11287 11288 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11289 predicate(UseSSE>=2); 11290 match(Set dst (MoveF2I src)); 11291 effect( DEF dst, USE src ); 11292 ins_cost(85); 11293 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11294 ins_encode %{ 11295 __ movdl($dst$$Register, $src$$XMMRegister); 11296 %} 11297 ins_pipe( pipe_slow ); 11298 %} 11299 11300 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11301 match(Set dst (MoveI2F src)); 11302 effect( DEF dst, USE src ); 11303 11304 ins_cost(100); 11305 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11306 ins_encode %{ 11307 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11308 %} 11309 ins_pipe( ialu_mem_reg ); 11310 %} 11311 11312 11313 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11314 predicate(UseSSE==0); 11315 match(Set dst (MoveI2F src)); 11316 effect(DEF dst, USE src); 11317 11318 ins_cost(125); 11319 format %{ "FLD_S $src\n\t" 11320 "FSTP $dst\t# MoveI2F_stack_reg" %} 11321 opcode(0xD9); /* D9 /0, FLD m32real */ 11322 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11323 Pop_Reg_FPR(dst) ); 11324 ins_pipe( fpu_reg_mem ); 11325 %} 11326 11327 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11328 predicate(UseSSE>=1); 11329 match(Set dst (MoveI2F src)); 11330 effect( DEF dst, USE src ); 11331 11332 ins_cost(95); 11333 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11334 ins_encode %{ 11335 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11336 %} 11337 ins_pipe( pipe_slow ); 11338 %} 11339 11340 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11341 predicate(UseSSE>=2); 11342 match(Set dst (MoveI2F src)); 11343 effect( DEF dst, USE src ); 11344 11345 ins_cost(85); 11346 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11347 ins_encode %{ 11348 __ movdl($dst$$XMMRegister, $src$$Register); 11349 %} 11350 ins_pipe( pipe_slow ); 11351 %} 11352 11353 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11354 match(Set dst (MoveD2L src)); 11355 effect(DEF dst, USE src); 11356 11357 ins_cost(250); 11358 format %{ "MOV $dst.lo,$src\n\t" 11359 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11360 opcode(0x8B, 0x8B); 11361 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11362 ins_pipe( ialu_mem_long_reg ); 11363 %} 11364 11365 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11366 predicate(UseSSE<=1); 11367 match(Set dst (MoveD2L src)); 11368 effect(DEF dst, USE src); 11369 11370 ins_cost(125); 11371 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11372 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11373 ins_pipe( fpu_mem_reg ); 11374 %} 11375 11376 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11377 predicate(UseSSE>=2); 11378 match(Set dst (MoveD2L src)); 11379 effect(DEF dst, USE src); 11380 ins_cost(95); 11381 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11382 ins_encode %{ 11383 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11384 %} 11385 ins_pipe( pipe_slow ); 11386 %} 11387 11388 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11389 predicate(UseSSE>=2); 11390 match(Set dst (MoveD2L src)); 11391 effect(DEF dst, USE src, TEMP tmp); 11392 ins_cost(85); 11393 format %{ "MOVD $dst.lo,$src\n\t" 11394 "PSHUFLW $tmp,$src,0x4E\n\t" 11395 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11396 ins_encode %{ 11397 __ movdl($dst$$Register, $src$$XMMRegister); 11398 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11399 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11400 %} 11401 ins_pipe( pipe_slow ); 11402 %} 11403 11404 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11405 match(Set dst (MoveL2D src)); 11406 effect(DEF dst, USE src); 11407 11408 ins_cost(200); 11409 format %{ "MOV $dst,$src.lo\n\t" 11410 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11411 opcode(0x89, 0x89); 11412 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11413 ins_pipe( ialu_mem_long_reg ); 11414 %} 11415 11416 11417 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11418 predicate(UseSSE<=1); 11419 match(Set dst (MoveL2D src)); 11420 effect(DEF dst, USE src); 11421 ins_cost(125); 11422 11423 format %{ "FLD_D $src\n\t" 11424 "FSTP $dst\t# MoveL2D_stack_reg" %} 11425 opcode(0xDD); /* DD /0, FLD m64real */ 11426 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11427 Pop_Reg_DPR(dst) ); 11428 ins_pipe( fpu_reg_mem ); 11429 %} 11430 11431 11432 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11433 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11434 match(Set dst (MoveL2D src)); 11435 effect(DEF dst, USE src); 11436 11437 ins_cost(95); 11438 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11439 ins_encode %{ 11440 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11441 %} 11442 ins_pipe( pipe_slow ); 11443 %} 11444 11445 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11446 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11447 match(Set dst (MoveL2D src)); 11448 effect(DEF dst, USE src); 11449 11450 ins_cost(95); 11451 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11452 ins_encode %{ 11453 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11454 %} 11455 ins_pipe( pipe_slow ); 11456 %} 11457 11458 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11459 predicate(UseSSE>=2); 11460 match(Set dst (MoveL2D src)); 11461 effect(TEMP dst, USE src, TEMP tmp); 11462 ins_cost(85); 11463 format %{ "MOVD $dst,$src.lo\n\t" 11464 "MOVD $tmp,$src.hi\n\t" 11465 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11466 ins_encode %{ 11467 __ movdl($dst$$XMMRegister, $src$$Register); 11468 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11469 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11470 %} 11471 ins_pipe( pipe_slow ); 11472 %} 11473 11474 //----------------------------- CompressBits/ExpandBits ------------------------ 11475 11476 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11477 predicate(n->bottom_type()->isa_long()); 11478 match(Set dst (CompressBits src mask)); 11479 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11480 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11481 ins_encode %{ 11482 Label exit, partail_result; 11483 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11484 // Merge the results of upper and lower destination registers such that upper destination 11485 // results are contiguously laid out after the lower destination result. 11486 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11487 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11488 __ popcntl($rtmp$$Register, $mask$$Register); 11489 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11490 __ cmpl($rtmp$$Register, 32); 11491 __ jccb(Assembler::equal, exit); 11492 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11493 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11494 // Shift left the contents of upper destination register by true bit count of lower mask register 11495 // and merge with lower destination register. 11496 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11497 __ orl($dst$$Register, $rtmp$$Register); 11498 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11499 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11500 // since contents of upper destination have already been copied to lower destination 11501 // register. 11502 __ cmpl($rtmp$$Register, 0); 11503 __ jccb(Assembler::greater, partail_result); 11504 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11505 __ jmp(exit); 11506 __ bind(partail_result); 11507 // Perform right shift over upper destination register to move out bits already copied 11508 // to lower destination register. 11509 __ subl($rtmp$$Register, 32); 11510 __ negl($rtmp$$Register); 11511 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11512 __ bind(exit); 11513 %} 11514 ins_pipe( pipe_slow ); 11515 %} 11516 11517 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11518 predicate(n->bottom_type()->isa_long()); 11519 match(Set dst (ExpandBits src mask)); 11520 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11521 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11522 ins_encode %{ 11523 // Extraction operation sequentially reads the bits from source register starting from LSB 11524 // and lays them out into destination register at bit locations corresponding to true bits 11525 // in mask register. Thus number of source bits read are equal to combined true bit count 11526 // of mask register pair. 11527 Label exit, mask_clipping; 11528 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11529 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11530 __ popcntl($rtmp$$Register, $mask$$Register); 11531 // If true bit count of lower mask register is 32 then none of bit of lower source register 11532 // will feed to upper destination register. 11533 __ cmpl($rtmp$$Register, 32); 11534 __ jccb(Assembler::equal, exit); 11535 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11536 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11537 // Shift right the contents of lower source register to remove already consumed bits. 11538 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11539 // Extract the bits from lower source register starting from LSB under the influence 11540 // of upper mask register. 11541 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11542 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11543 __ subl($rtmp$$Register, 32); 11544 __ negl($rtmp$$Register); 11545 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11546 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11547 // Clear the set bits in upper mask register which have been used to extract the contents 11548 // from lower source register. 11549 __ bind(mask_clipping); 11550 __ blsrl($mask$$Register, $mask$$Register); 11551 __ decrementl($rtmp$$Register, 1); 11552 __ jccb(Assembler::greater, mask_clipping); 11553 // Starting from LSB extract the bits from upper source register under the influence of 11554 // remaining set bits in upper mask register. 11555 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11556 // Merge the partial results extracted from lower and upper source register bits. 11557 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11558 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11559 __ bind(exit); 11560 %} 11561 ins_pipe( pipe_slow ); 11562 %} 11563 11564 // ======================================================================= 11565 // fast clearing of an array 11566 // Small ClearArray non-AVX512. 11567 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11568 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11569 match(Set dummy (ClearArray cnt base)); 11570 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11571 11572 format %{ $$template 11573 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11574 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11575 $$emit$$"JG LARGE\n\t" 11576 $$emit$$"SHL ECX, 1\n\t" 11577 $$emit$$"DEC ECX\n\t" 11578 $$emit$$"JS DONE\t# Zero length\n\t" 11579 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11580 $$emit$$"DEC ECX\n\t" 11581 $$emit$$"JGE LOOP\n\t" 11582 $$emit$$"JMP DONE\n\t" 11583 $$emit$$"# LARGE:\n\t" 11584 if (UseFastStosb) { 11585 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11586 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11587 } else if (UseXMMForObjInit) { 11588 $$emit$$"MOV RDI,RAX\n\t" 11589 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11590 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11591 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11592 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11593 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11594 $$emit$$"ADD 0x40,RAX\n\t" 11595 $$emit$$"# L_zero_64_bytes:\n\t" 11596 $$emit$$"SUB 0x8,RCX\n\t" 11597 $$emit$$"JGE L_loop\n\t" 11598 $$emit$$"ADD 0x4,RCX\n\t" 11599 $$emit$$"JL L_tail\n\t" 11600 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11601 $$emit$$"ADD 0x20,RAX\n\t" 11602 $$emit$$"SUB 0x4,RCX\n\t" 11603 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11604 $$emit$$"ADD 0x4,RCX\n\t" 11605 $$emit$$"JLE L_end\n\t" 11606 $$emit$$"DEC RCX\n\t" 11607 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11608 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11609 $$emit$$"ADD 0x8,RAX\n\t" 11610 $$emit$$"DEC RCX\n\t" 11611 $$emit$$"JGE L_sloop\n\t" 11612 $$emit$$"# L_end:\n\t" 11613 } else { 11614 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11615 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11616 } 11617 $$emit$$"# DONE" 11618 %} 11619 ins_encode %{ 11620 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11621 $tmp$$XMMRegister, false, knoreg); 11622 %} 11623 ins_pipe( pipe_slow ); 11624 %} 11625 11626 // Small ClearArray AVX512 non-constant length. 11627 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11628 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11629 match(Set dummy (ClearArray cnt base)); 11630 ins_cost(125); 11631 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11632 11633 format %{ $$template 11634 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11635 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11636 $$emit$$"JG LARGE\n\t" 11637 $$emit$$"SHL ECX, 1\n\t" 11638 $$emit$$"DEC ECX\n\t" 11639 $$emit$$"JS DONE\t# Zero length\n\t" 11640 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11641 $$emit$$"DEC ECX\n\t" 11642 $$emit$$"JGE LOOP\n\t" 11643 $$emit$$"JMP DONE\n\t" 11644 $$emit$$"# LARGE:\n\t" 11645 if (UseFastStosb) { 11646 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11647 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11648 } else if (UseXMMForObjInit) { 11649 $$emit$$"MOV RDI,RAX\n\t" 11650 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11651 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11652 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11653 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11654 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11655 $$emit$$"ADD 0x40,RAX\n\t" 11656 $$emit$$"# L_zero_64_bytes:\n\t" 11657 $$emit$$"SUB 0x8,RCX\n\t" 11658 $$emit$$"JGE L_loop\n\t" 11659 $$emit$$"ADD 0x4,RCX\n\t" 11660 $$emit$$"JL L_tail\n\t" 11661 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11662 $$emit$$"ADD 0x20,RAX\n\t" 11663 $$emit$$"SUB 0x4,RCX\n\t" 11664 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11665 $$emit$$"ADD 0x4,RCX\n\t" 11666 $$emit$$"JLE L_end\n\t" 11667 $$emit$$"DEC RCX\n\t" 11668 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11669 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11670 $$emit$$"ADD 0x8,RAX\n\t" 11671 $$emit$$"DEC RCX\n\t" 11672 $$emit$$"JGE L_sloop\n\t" 11673 $$emit$$"# L_end:\n\t" 11674 } else { 11675 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11676 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11677 } 11678 $$emit$$"# DONE" 11679 %} 11680 ins_encode %{ 11681 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11682 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11683 %} 11684 ins_pipe( pipe_slow ); 11685 %} 11686 11687 // Large ClearArray non-AVX512. 11688 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11689 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11690 match(Set dummy (ClearArray cnt base)); 11691 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11692 format %{ $$template 11693 if (UseFastStosb) { 11694 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11695 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11696 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11697 } else if (UseXMMForObjInit) { 11698 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11699 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11700 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11701 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11702 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11703 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11704 $$emit$$"ADD 0x40,RAX\n\t" 11705 $$emit$$"# L_zero_64_bytes:\n\t" 11706 $$emit$$"SUB 0x8,RCX\n\t" 11707 $$emit$$"JGE L_loop\n\t" 11708 $$emit$$"ADD 0x4,RCX\n\t" 11709 $$emit$$"JL L_tail\n\t" 11710 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11711 $$emit$$"ADD 0x20,RAX\n\t" 11712 $$emit$$"SUB 0x4,RCX\n\t" 11713 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11714 $$emit$$"ADD 0x4,RCX\n\t" 11715 $$emit$$"JLE L_end\n\t" 11716 $$emit$$"DEC RCX\n\t" 11717 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11718 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11719 $$emit$$"ADD 0x8,RAX\n\t" 11720 $$emit$$"DEC RCX\n\t" 11721 $$emit$$"JGE L_sloop\n\t" 11722 $$emit$$"# L_end:\n\t" 11723 } else { 11724 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11725 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11726 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11727 } 11728 $$emit$$"# DONE" 11729 %} 11730 ins_encode %{ 11731 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11732 $tmp$$XMMRegister, true, knoreg); 11733 %} 11734 ins_pipe( pipe_slow ); 11735 %} 11736 11737 // Large ClearArray AVX512. 11738 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11739 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11740 match(Set dummy (ClearArray cnt base)); 11741 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11742 format %{ $$template 11743 if (UseFastStosb) { 11744 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11745 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11746 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11747 } else if (UseXMMForObjInit) { 11748 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11749 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11750 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11751 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11752 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11753 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11754 $$emit$$"ADD 0x40,RAX\n\t" 11755 $$emit$$"# L_zero_64_bytes:\n\t" 11756 $$emit$$"SUB 0x8,RCX\n\t" 11757 $$emit$$"JGE L_loop\n\t" 11758 $$emit$$"ADD 0x4,RCX\n\t" 11759 $$emit$$"JL L_tail\n\t" 11760 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11761 $$emit$$"ADD 0x20,RAX\n\t" 11762 $$emit$$"SUB 0x4,RCX\n\t" 11763 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11764 $$emit$$"ADD 0x4,RCX\n\t" 11765 $$emit$$"JLE L_end\n\t" 11766 $$emit$$"DEC RCX\n\t" 11767 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11768 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11769 $$emit$$"ADD 0x8,RAX\n\t" 11770 $$emit$$"DEC RCX\n\t" 11771 $$emit$$"JGE L_sloop\n\t" 11772 $$emit$$"# L_end:\n\t" 11773 } else { 11774 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11775 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11776 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11777 } 11778 $$emit$$"# DONE" 11779 %} 11780 ins_encode %{ 11781 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11782 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11783 %} 11784 ins_pipe( pipe_slow ); 11785 %} 11786 11787 // Small ClearArray AVX512 constant length. 11788 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11789 %{ 11790 predicate(!((ClearArrayNode*)n)->is_large() && 11791 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11792 match(Set dummy (ClearArray cnt base)); 11793 ins_cost(100); 11794 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11795 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11796 ins_encode %{ 11797 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11798 %} 11799 ins_pipe(pipe_slow); 11800 %} 11801 11802 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11803 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11804 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11805 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11806 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11807 11808 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11809 ins_encode %{ 11810 __ string_compare($str1$$Register, $str2$$Register, 11811 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11812 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11813 %} 11814 ins_pipe( pipe_slow ); 11815 %} 11816 11817 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11818 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11819 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11820 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11821 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11822 11823 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11824 ins_encode %{ 11825 __ string_compare($str1$$Register, $str2$$Register, 11826 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11827 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11828 %} 11829 ins_pipe( pipe_slow ); 11830 %} 11831 11832 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11833 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11834 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11835 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11836 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11837 11838 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11839 ins_encode %{ 11840 __ string_compare($str1$$Register, $str2$$Register, 11841 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11842 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11843 %} 11844 ins_pipe( pipe_slow ); 11845 %} 11846 11847 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11848 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11849 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11850 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11851 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11852 11853 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11854 ins_encode %{ 11855 __ string_compare($str1$$Register, $str2$$Register, 11856 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11857 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11858 %} 11859 ins_pipe( pipe_slow ); 11860 %} 11861 11862 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11863 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11864 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11865 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11866 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11867 11868 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11869 ins_encode %{ 11870 __ string_compare($str1$$Register, $str2$$Register, 11871 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11872 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11873 %} 11874 ins_pipe( pipe_slow ); 11875 %} 11876 11877 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11878 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11879 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11880 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11881 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11882 11883 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11884 ins_encode %{ 11885 __ string_compare($str1$$Register, $str2$$Register, 11886 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11887 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11888 %} 11889 ins_pipe( pipe_slow ); 11890 %} 11891 11892 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11893 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11894 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11895 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11896 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11897 11898 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11899 ins_encode %{ 11900 __ string_compare($str2$$Register, $str1$$Register, 11901 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11902 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11903 %} 11904 ins_pipe( pipe_slow ); 11905 %} 11906 11907 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11908 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11909 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11910 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11911 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11912 11913 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11914 ins_encode %{ 11915 __ string_compare($str2$$Register, $str1$$Register, 11916 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11917 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11918 %} 11919 ins_pipe( pipe_slow ); 11920 %} 11921 11922 // fast string equals 11923 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11924 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11925 predicate(!VM_Version::supports_avx512vlbw()); 11926 match(Set result (StrEquals (Binary str1 str2) cnt)); 11927 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11928 11929 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11930 ins_encode %{ 11931 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11932 $cnt$$Register, $result$$Register, $tmp3$$Register, 11933 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11934 %} 11935 11936 ins_pipe( pipe_slow ); 11937 %} 11938 11939 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11940 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11941 predicate(VM_Version::supports_avx512vlbw()); 11942 match(Set result (StrEquals (Binary str1 str2) cnt)); 11943 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11944 11945 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11946 ins_encode %{ 11947 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11948 $cnt$$Register, $result$$Register, $tmp3$$Register, 11949 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11950 %} 11951 11952 ins_pipe( pipe_slow ); 11953 %} 11954 11955 11956 // fast search of substring with known size. 11957 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11958 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11959 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11960 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11961 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11962 11963 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11964 ins_encode %{ 11965 int icnt2 = (int)$int_cnt2$$constant; 11966 if (icnt2 >= 16) { 11967 // IndexOf for constant substrings with size >= 16 elements 11968 // which don't need to be loaded through stack. 11969 __ string_indexofC8($str1$$Register, $str2$$Register, 11970 $cnt1$$Register, $cnt2$$Register, 11971 icnt2, $result$$Register, 11972 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11973 } else { 11974 // Small strings are loaded through stack if they cross page boundary. 11975 __ string_indexof($str1$$Register, $str2$$Register, 11976 $cnt1$$Register, $cnt2$$Register, 11977 icnt2, $result$$Register, 11978 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11979 } 11980 %} 11981 ins_pipe( pipe_slow ); 11982 %} 11983 11984 // fast search of substring with known size. 11985 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11986 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11987 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11988 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11989 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11990 11991 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11992 ins_encode %{ 11993 int icnt2 = (int)$int_cnt2$$constant; 11994 if (icnt2 >= 8) { 11995 // IndexOf for constant substrings with size >= 8 elements 11996 // which don't need to be loaded through stack. 11997 __ string_indexofC8($str1$$Register, $str2$$Register, 11998 $cnt1$$Register, $cnt2$$Register, 11999 icnt2, $result$$Register, 12000 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12001 } else { 12002 // Small strings are loaded through stack if they cross page boundary. 12003 __ string_indexof($str1$$Register, $str2$$Register, 12004 $cnt1$$Register, $cnt2$$Register, 12005 icnt2, $result$$Register, 12006 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12007 } 12008 %} 12009 ins_pipe( pipe_slow ); 12010 %} 12011 12012 // fast search of substring with known size. 12013 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12014 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12015 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12016 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12017 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12018 12019 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12020 ins_encode %{ 12021 int icnt2 = (int)$int_cnt2$$constant; 12022 if (icnt2 >= 8) { 12023 // IndexOf for constant substrings with size >= 8 elements 12024 // which don't need to be loaded through stack. 12025 __ string_indexofC8($str1$$Register, $str2$$Register, 12026 $cnt1$$Register, $cnt2$$Register, 12027 icnt2, $result$$Register, 12028 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12029 } else { 12030 // Small strings are loaded through stack if they cross page boundary. 12031 __ string_indexof($str1$$Register, $str2$$Register, 12032 $cnt1$$Register, $cnt2$$Register, 12033 icnt2, $result$$Register, 12034 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12035 } 12036 %} 12037 ins_pipe( pipe_slow ); 12038 %} 12039 12040 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12041 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12042 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 12043 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12044 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12045 12046 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12047 ins_encode %{ 12048 __ string_indexof($str1$$Register, $str2$$Register, 12049 $cnt1$$Register, $cnt2$$Register, 12050 (-1), $result$$Register, 12051 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 12052 %} 12053 ins_pipe( pipe_slow ); 12054 %} 12055 12056 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12057 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12058 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12059 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12060 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12061 12062 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12063 ins_encode %{ 12064 __ string_indexof($str1$$Register, $str2$$Register, 12065 $cnt1$$Register, $cnt2$$Register, 12066 (-1), $result$$Register, 12067 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12068 %} 12069 ins_pipe( pipe_slow ); 12070 %} 12071 12072 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12073 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12074 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12075 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12076 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12077 12078 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12079 ins_encode %{ 12080 __ string_indexof($str1$$Register, $str2$$Register, 12081 $cnt1$$Register, $cnt2$$Register, 12082 (-1), $result$$Register, 12083 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12084 %} 12085 ins_pipe( pipe_slow ); 12086 %} 12087 12088 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12089 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12090 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12091 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12092 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12093 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12094 ins_encode %{ 12095 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12096 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12097 %} 12098 ins_pipe( pipe_slow ); 12099 %} 12100 12101 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12102 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12103 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12104 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12105 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12106 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12107 ins_encode %{ 12108 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12109 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12110 %} 12111 ins_pipe( pipe_slow ); 12112 %} 12113 12114 12115 // fast array equals 12116 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12117 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12118 %{ 12119 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12120 match(Set result (AryEq ary1 ary2)); 12121 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12122 //ins_cost(300); 12123 12124 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12125 ins_encode %{ 12126 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12127 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12128 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12129 %} 12130 ins_pipe( pipe_slow ); 12131 %} 12132 12133 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12134 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12135 %{ 12136 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12137 match(Set result (AryEq ary1 ary2)); 12138 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12139 //ins_cost(300); 12140 12141 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12142 ins_encode %{ 12143 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12144 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12145 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12146 %} 12147 ins_pipe( pipe_slow ); 12148 %} 12149 12150 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12151 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12152 %{ 12153 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12154 match(Set result (AryEq ary1 ary2)); 12155 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12156 //ins_cost(300); 12157 12158 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12159 ins_encode %{ 12160 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12161 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12162 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12163 %} 12164 ins_pipe( pipe_slow ); 12165 %} 12166 12167 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12168 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12169 %{ 12170 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12171 match(Set result (AryEq ary1 ary2)); 12172 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12173 //ins_cost(300); 12174 12175 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12176 ins_encode %{ 12177 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12178 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12179 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12180 %} 12181 ins_pipe( pipe_slow ); 12182 %} 12183 12184 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12185 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12186 %{ 12187 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12188 match(Set result (CountPositives ary1 len)); 12189 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12190 12191 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12192 ins_encode %{ 12193 __ count_positives($ary1$$Register, $len$$Register, 12194 $result$$Register, $tmp3$$Register, 12195 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12196 %} 12197 ins_pipe( pipe_slow ); 12198 %} 12199 12200 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12201 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12202 %{ 12203 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12204 match(Set result (CountPositives ary1 len)); 12205 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12206 12207 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12208 ins_encode %{ 12209 __ count_positives($ary1$$Register, $len$$Register, 12210 $result$$Register, $tmp3$$Register, 12211 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12212 %} 12213 ins_pipe( pipe_slow ); 12214 %} 12215 12216 12217 // fast char[] to byte[] compression 12218 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12219 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12220 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12221 match(Set result (StrCompressedCopy src (Binary dst len))); 12222 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12223 12224 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12225 ins_encode %{ 12226 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12227 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12228 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12229 knoreg, knoreg); 12230 %} 12231 ins_pipe( pipe_slow ); 12232 %} 12233 12234 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12235 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12236 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12237 match(Set result (StrCompressedCopy src (Binary dst len))); 12238 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12239 12240 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12241 ins_encode %{ 12242 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12243 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12244 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12245 $ktmp1$$KRegister, $ktmp2$$KRegister); 12246 %} 12247 ins_pipe( pipe_slow ); 12248 %} 12249 12250 // fast byte[] to char[] inflation 12251 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12252 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12253 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12254 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12255 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12256 12257 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12258 ins_encode %{ 12259 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12260 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12261 %} 12262 ins_pipe( pipe_slow ); 12263 %} 12264 12265 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12266 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12267 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12268 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12269 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12270 12271 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12272 ins_encode %{ 12273 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12274 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12275 %} 12276 ins_pipe( pipe_slow ); 12277 %} 12278 12279 // encode char[] to byte[] in ISO_8859_1 12280 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12281 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12282 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12283 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12284 match(Set result (EncodeISOArray src (Binary dst len))); 12285 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12286 12287 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12288 ins_encode %{ 12289 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12290 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12291 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12292 %} 12293 ins_pipe( pipe_slow ); 12294 %} 12295 12296 // encode char[] to byte[] in ASCII 12297 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12298 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12299 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12300 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12301 match(Set result (EncodeISOArray src (Binary dst len))); 12302 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12303 12304 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12305 ins_encode %{ 12306 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12307 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12308 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12309 %} 12310 ins_pipe( pipe_slow ); 12311 %} 12312 12313 //----------Control Flow Instructions------------------------------------------ 12314 // Signed compare Instructions 12315 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12316 match(Set cr (CmpI op1 op2)); 12317 effect( DEF cr, USE op1, USE op2 ); 12318 format %{ "CMP $op1,$op2" %} 12319 opcode(0x3B); /* Opcode 3B /r */ 12320 ins_encode( OpcP, RegReg( op1, op2) ); 12321 ins_pipe( ialu_cr_reg_reg ); 12322 %} 12323 12324 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12325 match(Set cr (CmpI op1 op2)); 12326 effect( DEF cr, USE op1 ); 12327 format %{ "CMP $op1,$op2" %} 12328 opcode(0x81,0x07); /* Opcode 81 /7 */ 12329 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12330 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12331 ins_pipe( ialu_cr_reg_imm ); 12332 %} 12333 12334 // Cisc-spilled version of cmpI_eReg 12335 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12336 match(Set cr (CmpI op1 (LoadI op2))); 12337 12338 format %{ "CMP $op1,$op2" %} 12339 ins_cost(500); 12340 opcode(0x3B); /* Opcode 3B /r */ 12341 ins_encode( OpcP, RegMem( op1, op2) ); 12342 ins_pipe( ialu_cr_reg_mem ); 12343 %} 12344 12345 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12346 match(Set cr (CmpI src zero)); 12347 effect( DEF cr, USE src ); 12348 12349 format %{ "TEST $src,$src" %} 12350 opcode(0x85); 12351 ins_encode( OpcP, RegReg( src, src ) ); 12352 ins_pipe( ialu_cr_reg_imm ); 12353 %} 12354 12355 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12356 match(Set cr (CmpI (AndI src con) zero)); 12357 12358 format %{ "TEST $src,$con" %} 12359 opcode(0xF7,0x00); 12360 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12361 ins_pipe( ialu_cr_reg_imm ); 12362 %} 12363 12364 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12365 match(Set cr (CmpI (AndI src mem) zero)); 12366 12367 format %{ "TEST $src,$mem" %} 12368 opcode(0x85); 12369 ins_encode( OpcP, RegMem( src, mem ) ); 12370 ins_pipe( ialu_cr_reg_mem ); 12371 %} 12372 12373 // Unsigned compare Instructions; really, same as signed except they 12374 // produce an eFlagsRegU instead of eFlagsReg. 12375 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12376 match(Set cr (CmpU op1 op2)); 12377 12378 format %{ "CMPu $op1,$op2" %} 12379 opcode(0x3B); /* Opcode 3B /r */ 12380 ins_encode( OpcP, RegReg( op1, op2) ); 12381 ins_pipe( ialu_cr_reg_reg ); 12382 %} 12383 12384 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12385 match(Set cr (CmpU op1 op2)); 12386 12387 format %{ "CMPu $op1,$op2" %} 12388 opcode(0x81,0x07); /* Opcode 81 /7 */ 12389 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12390 ins_pipe( ialu_cr_reg_imm ); 12391 %} 12392 12393 // // Cisc-spilled version of cmpU_eReg 12394 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12395 match(Set cr (CmpU op1 (LoadI op2))); 12396 12397 format %{ "CMPu $op1,$op2" %} 12398 ins_cost(500); 12399 opcode(0x3B); /* Opcode 3B /r */ 12400 ins_encode( OpcP, RegMem( op1, op2) ); 12401 ins_pipe( ialu_cr_reg_mem ); 12402 %} 12403 12404 // // Cisc-spilled version of cmpU_eReg 12405 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12406 // match(Set cr (CmpU (LoadI op1) op2)); 12407 // 12408 // format %{ "CMPu $op1,$op2" %} 12409 // ins_cost(500); 12410 // opcode(0x39); /* Opcode 39 /r */ 12411 // ins_encode( OpcP, RegMem( op1, op2) ); 12412 //%} 12413 12414 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12415 match(Set cr (CmpU src zero)); 12416 12417 format %{ "TESTu $src,$src" %} 12418 opcode(0x85); 12419 ins_encode( OpcP, RegReg( src, src ) ); 12420 ins_pipe( ialu_cr_reg_imm ); 12421 %} 12422 12423 // Unsigned pointer compare Instructions 12424 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12425 match(Set cr (CmpP op1 op2)); 12426 12427 format %{ "CMPu $op1,$op2" %} 12428 opcode(0x3B); /* Opcode 3B /r */ 12429 ins_encode( OpcP, RegReg( op1, op2) ); 12430 ins_pipe( ialu_cr_reg_reg ); 12431 %} 12432 12433 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12434 match(Set cr (CmpP op1 op2)); 12435 12436 format %{ "CMPu $op1,$op2" %} 12437 opcode(0x81,0x07); /* Opcode 81 /7 */ 12438 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12439 ins_pipe( ialu_cr_reg_imm ); 12440 %} 12441 12442 // // Cisc-spilled version of cmpP_eReg 12443 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12444 match(Set cr (CmpP op1 (LoadP op2))); 12445 12446 format %{ "CMPu $op1,$op2" %} 12447 ins_cost(500); 12448 opcode(0x3B); /* Opcode 3B /r */ 12449 ins_encode( OpcP, RegMem( op1, op2) ); 12450 ins_pipe( ialu_cr_reg_mem ); 12451 %} 12452 12453 // // Cisc-spilled version of cmpP_eReg 12454 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12455 // match(Set cr (CmpP (LoadP op1) op2)); 12456 // 12457 // format %{ "CMPu $op1,$op2" %} 12458 // ins_cost(500); 12459 // opcode(0x39); /* Opcode 39 /r */ 12460 // ins_encode( OpcP, RegMem( op1, op2) ); 12461 //%} 12462 12463 // Compare raw pointer (used in out-of-heap check). 12464 // Only works because non-oop pointers must be raw pointers 12465 // and raw pointers have no anti-dependencies. 12466 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12467 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12468 match(Set cr (CmpP op1 (LoadP op2))); 12469 12470 format %{ "CMPu $op1,$op2" %} 12471 opcode(0x3B); /* Opcode 3B /r */ 12472 ins_encode( OpcP, RegMem( op1, op2) ); 12473 ins_pipe( ialu_cr_reg_mem ); 12474 %} 12475 12476 // 12477 // This will generate a signed flags result. This should be ok 12478 // since any compare to a zero should be eq/neq. 12479 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12480 match(Set cr (CmpP src zero)); 12481 12482 format %{ "TEST $src,$src" %} 12483 opcode(0x85); 12484 ins_encode( OpcP, RegReg( src, src ) ); 12485 ins_pipe( ialu_cr_reg_imm ); 12486 %} 12487 12488 // Cisc-spilled version of testP_reg 12489 // This will generate a signed flags result. This should be ok 12490 // since any compare to a zero should be eq/neq. 12491 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12492 match(Set cr (CmpP (LoadP op) zero)); 12493 12494 format %{ "TEST $op,0xFFFFFFFF" %} 12495 ins_cost(500); 12496 opcode(0xF7); /* Opcode F7 /0 */ 12497 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12498 ins_pipe( ialu_cr_reg_imm ); 12499 %} 12500 12501 // Yanked all unsigned pointer compare operations. 12502 // Pointer compares are done with CmpP which is already unsigned. 12503 12504 //----------Max and Min-------------------------------------------------------- 12505 // Min Instructions 12506 //// 12507 // *** Min and Max using the conditional move are slower than the 12508 // *** branch version on a Pentium III. 12509 // // Conditional move for min 12510 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12511 // effect( USE_DEF op2, USE op1, USE cr ); 12512 // format %{ "CMOVlt $op2,$op1\t! min" %} 12513 // opcode(0x4C,0x0F); 12514 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12515 // ins_pipe( pipe_cmov_reg ); 12516 //%} 12517 // 12518 //// Min Register with Register (P6 version) 12519 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12520 // predicate(VM_Version::supports_cmov() ); 12521 // match(Set op2 (MinI op1 op2)); 12522 // ins_cost(200); 12523 // expand %{ 12524 // eFlagsReg cr; 12525 // compI_eReg(cr,op1,op2); 12526 // cmovI_reg_lt(op2,op1,cr); 12527 // %} 12528 //%} 12529 12530 // Min Register with Register (generic version) 12531 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12532 match(Set dst (MinI dst src)); 12533 effect(KILL flags); 12534 ins_cost(300); 12535 12536 format %{ "MIN $dst,$src" %} 12537 opcode(0xCC); 12538 ins_encode( min_enc(dst,src) ); 12539 ins_pipe( pipe_slow ); 12540 %} 12541 12542 // Max Register with Register 12543 // *** Min and Max using the conditional move are slower than the 12544 // *** branch version on a Pentium III. 12545 // // Conditional move for max 12546 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12547 // effect( USE_DEF op2, USE op1, USE cr ); 12548 // format %{ "CMOVgt $op2,$op1\t! max" %} 12549 // opcode(0x4F,0x0F); 12550 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12551 // ins_pipe( pipe_cmov_reg ); 12552 //%} 12553 // 12554 // // Max Register with Register (P6 version) 12555 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12556 // predicate(VM_Version::supports_cmov() ); 12557 // match(Set op2 (MaxI op1 op2)); 12558 // ins_cost(200); 12559 // expand %{ 12560 // eFlagsReg cr; 12561 // compI_eReg(cr,op1,op2); 12562 // cmovI_reg_gt(op2,op1,cr); 12563 // %} 12564 //%} 12565 12566 // Max Register with Register (generic version) 12567 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12568 match(Set dst (MaxI dst src)); 12569 effect(KILL flags); 12570 ins_cost(300); 12571 12572 format %{ "MAX $dst,$src" %} 12573 opcode(0xCC); 12574 ins_encode( max_enc(dst,src) ); 12575 ins_pipe( pipe_slow ); 12576 %} 12577 12578 // ============================================================================ 12579 // Counted Loop limit node which represents exact final iterator value. 12580 // Note: the resulting value should fit into integer range since 12581 // counted loops have limit check on overflow. 12582 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12583 match(Set limit (LoopLimit (Binary init limit) stride)); 12584 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12585 ins_cost(300); 12586 12587 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12588 ins_encode %{ 12589 int strd = (int)$stride$$constant; 12590 assert(strd != 1 && strd != -1, "sanity"); 12591 int m1 = (strd > 0) ? 1 : -1; 12592 // Convert limit to long (EAX:EDX) 12593 __ cdql(); 12594 // Convert init to long (init:tmp) 12595 __ movl($tmp$$Register, $init$$Register); 12596 __ sarl($tmp$$Register, 31); 12597 // $limit - $init 12598 __ subl($limit$$Register, $init$$Register); 12599 __ sbbl($limit_hi$$Register, $tmp$$Register); 12600 // + ($stride - 1) 12601 if (strd > 0) { 12602 __ addl($limit$$Register, (strd - 1)); 12603 __ adcl($limit_hi$$Register, 0); 12604 __ movl($tmp$$Register, strd); 12605 } else { 12606 __ addl($limit$$Register, (strd + 1)); 12607 __ adcl($limit_hi$$Register, -1); 12608 __ lneg($limit_hi$$Register, $limit$$Register); 12609 __ movl($tmp$$Register, -strd); 12610 } 12611 // signed division: (EAX:EDX) / pos_stride 12612 __ idivl($tmp$$Register); 12613 if (strd < 0) { 12614 // restore sign 12615 __ negl($tmp$$Register); 12616 } 12617 // (EAX) * stride 12618 __ mull($tmp$$Register); 12619 // + init (ignore upper bits) 12620 __ addl($limit$$Register, $init$$Register); 12621 %} 12622 ins_pipe( pipe_slow ); 12623 %} 12624 12625 // ============================================================================ 12626 // Branch Instructions 12627 // Jump Table 12628 instruct jumpXtnd(rRegI switch_val) %{ 12629 match(Jump switch_val); 12630 ins_cost(350); 12631 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12632 ins_encode %{ 12633 // Jump to Address(table_base + switch_reg) 12634 Address index(noreg, $switch_val$$Register, Address::times_1); 12635 __ jump(ArrayAddress($constantaddress, index), noreg); 12636 %} 12637 ins_pipe(pipe_jmp); 12638 %} 12639 12640 // Jump Direct - Label defines a relative address from JMP+1 12641 instruct jmpDir(label labl) %{ 12642 match(Goto); 12643 effect(USE labl); 12644 12645 ins_cost(300); 12646 format %{ "JMP $labl" %} 12647 size(5); 12648 ins_encode %{ 12649 Label* L = $labl$$label; 12650 __ jmp(*L, false); // Always long jump 12651 %} 12652 ins_pipe( pipe_jmp ); 12653 %} 12654 12655 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12656 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12657 match(If cop cr); 12658 effect(USE labl); 12659 12660 ins_cost(300); 12661 format %{ "J$cop $labl" %} 12662 size(6); 12663 ins_encode %{ 12664 Label* L = $labl$$label; 12665 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12666 %} 12667 ins_pipe( pipe_jcc ); 12668 %} 12669 12670 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12671 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12672 match(CountedLoopEnd cop cr); 12673 effect(USE labl); 12674 12675 ins_cost(300); 12676 format %{ "J$cop $labl\t# Loop end" %} 12677 size(6); 12678 ins_encode %{ 12679 Label* L = $labl$$label; 12680 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12681 %} 12682 ins_pipe( pipe_jcc ); 12683 %} 12684 12685 // Jump Direct Conditional - using unsigned comparison 12686 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12687 match(If cop cmp); 12688 effect(USE labl); 12689 12690 ins_cost(300); 12691 format %{ "J$cop,u $labl" %} 12692 size(6); 12693 ins_encode %{ 12694 Label* L = $labl$$label; 12695 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12696 %} 12697 ins_pipe(pipe_jcc); 12698 %} 12699 12700 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12701 match(If cop cmp); 12702 effect(USE labl); 12703 12704 ins_cost(200); 12705 format %{ "J$cop,u $labl" %} 12706 size(6); 12707 ins_encode %{ 12708 Label* L = $labl$$label; 12709 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12710 %} 12711 ins_pipe(pipe_jcc); 12712 %} 12713 12714 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12715 match(If cop cmp); 12716 effect(USE labl); 12717 12718 ins_cost(200); 12719 format %{ $$template 12720 if ($cop$$cmpcode == Assembler::notEqual) { 12721 $$emit$$"JP,u $labl\n\t" 12722 $$emit$$"J$cop,u $labl" 12723 } else { 12724 $$emit$$"JP,u done\n\t" 12725 $$emit$$"J$cop,u $labl\n\t" 12726 $$emit$$"done:" 12727 } 12728 %} 12729 ins_encode %{ 12730 Label* l = $labl$$label; 12731 if ($cop$$cmpcode == Assembler::notEqual) { 12732 __ jcc(Assembler::parity, *l, false); 12733 __ jcc(Assembler::notEqual, *l, false); 12734 } else if ($cop$$cmpcode == Assembler::equal) { 12735 Label done; 12736 __ jccb(Assembler::parity, done); 12737 __ jcc(Assembler::equal, *l, false); 12738 __ bind(done); 12739 } else { 12740 ShouldNotReachHere(); 12741 } 12742 %} 12743 ins_pipe(pipe_jcc); 12744 %} 12745 12746 // ============================================================================ 12747 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12748 // array for an instance of the superklass. Set a hidden internal cache on a 12749 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12750 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12751 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12752 match(Set result (PartialSubtypeCheck sub super)); 12753 effect( KILL rcx, KILL cr ); 12754 12755 ins_cost(1100); // slightly larger than the next version 12756 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12757 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12758 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12759 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12760 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12761 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12762 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12763 "miss:\t" %} 12764 12765 opcode(0x1); // Force a XOR of EDI 12766 ins_encode( enc_PartialSubtypeCheck() ); 12767 ins_pipe( pipe_slow ); 12768 %} 12769 12770 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12771 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12772 effect( KILL rcx, KILL result ); 12773 12774 ins_cost(1000); 12775 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12776 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12777 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12778 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12779 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12780 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12781 "miss:\t" %} 12782 12783 opcode(0x0); // No need to XOR EDI 12784 ins_encode( enc_PartialSubtypeCheck() ); 12785 ins_pipe( pipe_slow ); 12786 %} 12787 12788 // ============================================================================ 12789 // Branch Instructions -- short offset versions 12790 // 12791 // These instructions are used to replace jumps of a long offset (the default 12792 // match) with jumps of a shorter offset. These instructions are all tagged 12793 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12794 // match rules in general matching. Instead, the ADLC generates a conversion 12795 // method in the MachNode which can be used to do in-place replacement of the 12796 // long variant with the shorter variant. The compiler will determine if a 12797 // branch can be taken by the is_short_branch_offset() predicate in the machine 12798 // specific code section of the file. 12799 12800 // Jump Direct - Label defines a relative address from JMP+1 12801 instruct jmpDir_short(label labl) %{ 12802 match(Goto); 12803 effect(USE labl); 12804 12805 ins_cost(300); 12806 format %{ "JMP,s $labl" %} 12807 size(2); 12808 ins_encode %{ 12809 Label* L = $labl$$label; 12810 __ jmpb(*L); 12811 %} 12812 ins_pipe( pipe_jmp ); 12813 ins_short_branch(1); 12814 %} 12815 12816 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12817 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12818 match(If cop cr); 12819 effect(USE labl); 12820 12821 ins_cost(300); 12822 format %{ "J$cop,s $labl" %} 12823 size(2); 12824 ins_encode %{ 12825 Label* L = $labl$$label; 12826 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12827 %} 12828 ins_pipe( pipe_jcc ); 12829 ins_short_branch(1); 12830 %} 12831 12832 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12833 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12834 match(CountedLoopEnd cop cr); 12835 effect(USE labl); 12836 12837 ins_cost(300); 12838 format %{ "J$cop,s $labl\t# Loop end" %} 12839 size(2); 12840 ins_encode %{ 12841 Label* L = $labl$$label; 12842 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12843 %} 12844 ins_pipe( pipe_jcc ); 12845 ins_short_branch(1); 12846 %} 12847 12848 // Jump Direct Conditional - using unsigned comparison 12849 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12850 match(If cop cmp); 12851 effect(USE labl); 12852 12853 ins_cost(300); 12854 format %{ "J$cop,us $labl" %} 12855 size(2); 12856 ins_encode %{ 12857 Label* L = $labl$$label; 12858 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12859 %} 12860 ins_pipe( pipe_jcc ); 12861 ins_short_branch(1); 12862 %} 12863 12864 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12865 match(If cop cmp); 12866 effect(USE labl); 12867 12868 ins_cost(300); 12869 format %{ "J$cop,us $labl" %} 12870 size(2); 12871 ins_encode %{ 12872 Label* L = $labl$$label; 12873 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12874 %} 12875 ins_pipe( pipe_jcc ); 12876 ins_short_branch(1); 12877 %} 12878 12879 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12880 match(If cop cmp); 12881 effect(USE labl); 12882 12883 ins_cost(300); 12884 format %{ $$template 12885 if ($cop$$cmpcode == Assembler::notEqual) { 12886 $$emit$$"JP,u,s $labl\n\t" 12887 $$emit$$"J$cop,u,s $labl" 12888 } else { 12889 $$emit$$"JP,u,s done\n\t" 12890 $$emit$$"J$cop,u,s $labl\n\t" 12891 $$emit$$"done:" 12892 } 12893 %} 12894 size(4); 12895 ins_encode %{ 12896 Label* l = $labl$$label; 12897 if ($cop$$cmpcode == Assembler::notEqual) { 12898 __ jccb(Assembler::parity, *l); 12899 __ jccb(Assembler::notEqual, *l); 12900 } else if ($cop$$cmpcode == Assembler::equal) { 12901 Label done; 12902 __ jccb(Assembler::parity, done); 12903 __ jccb(Assembler::equal, *l); 12904 __ bind(done); 12905 } else { 12906 ShouldNotReachHere(); 12907 } 12908 %} 12909 ins_pipe(pipe_jcc); 12910 ins_short_branch(1); 12911 %} 12912 12913 // ============================================================================ 12914 // Long Compare 12915 // 12916 // Currently we hold longs in 2 registers. Comparing such values efficiently 12917 // is tricky. The flavor of compare used depends on whether we are testing 12918 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12919 // The GE test is the negated LT test. The LE test can be had by commuting 12920 // the operands (yielding a GE test) and then negating; negate again for the 12921 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12922 // NE test is negated from that. 12923 12924 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12925 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12926 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12927 // are collapsed internally in the ADLC's dfa-gen code. The match for 12928 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12929 // foo match ends up with the wrong leaf. One fix is to not match both 12930 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12931 // both forms beat the trinary form of long-compare and both are very useful 12932 // on Intel which has so few registers. 12933 12934 // Manifest a CmpL result in an integer register. Very painful. 12935 // This is the test to avoid. 12936 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12937 match(Set dst (CmpL3 src1 src2)); 12938 effect( KILL flags ); 12939 ins_cost(1000); 12940 format %{ "XOR $dst,$dst\n\t" 12941 "CMP $src1.hi,$src2.hi\n\t" 12942 "JLT,s m_one\n\t" 12943 "JGT,s p_one\n\t" 12944 "CMP $src1.lo,$src2.lo\n\t" 12945 "JB,s m_one\n\t" 12946 "JEQ,s done\n" 12947 "p_one:\tINC $dst\n\t" 12948 "JMP,s done\n" 12949 "m_one:\tDEC $dst\n" 12950 "done:" %} 12951 ins_encode %{ 12952 Label p_one, m_one, done; 12953 __ xorptr($dst$$Register, $dst$$Register); 12954 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12955 __ jccb(Assembler::less, m_one); 12956 __ jccb(Assembler::greater, p_one); 12957 __ cmpl($src1$$Register, $src2$$Register); 12958 __ jccb(Assembler::below, m_one); 12959 __ jccb(Assembler::equal, done); 12960 __ bind(p_one); 12961 __ incrementl($dst$$Register); 12962 __ jmpb(done); 12963 __ bind(m_one); 12964 __ decrementl($dst$$Register); 12965 __ bind(done); 12966 %} 12967 ins_pipe( pipe_slow ); 12968 %} 12969 12970 //====== 12971 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12972 // compares. Can be used for LE or GT compares by reversing arguments. 12973 // NOT GOOD FOR EQ/NE tests. 12974 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12975 match( Set flags (CmpL src zero )); 12976 ins_cost(100); 12977 format %{ "TEST $src.hi,$src.hi" %} 12978 opcode(0x85); 12979 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12980 ins_pipe( ialu_cr_reg_reg ); 12981 %} 12982 12983 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12984 // compares. Can be used for LE or GT compares by reversing arguments. 12985 // NOT GOOD FOR EQ/NE tests. 12986 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12987 match( Set flags (CmpL src1 src2 )); 12988 effect( TEMP tmp ); 12989 ins_cost(300); 12990 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12991 "MOV $tmp,$src1.hi\n\t" 12992 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12993 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12994 ins_pipe( ialu_cr_reg_reg ); 12995 %} 12996 12997 // Long compares reg < zero/req OR reg >= zero/req. 12998 // Just a wrapper for a normal branch, plus the predicate test. 12999 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13000 match(If cmp flags); 13001 effect(USE labl); 13002 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13003 expand %{ 13004 jmpCon(cmp,flags,labl); // JLT or JGE... 13005 %} 13006 %} 13007 13008 //====== 13009 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13010 // compares. Can be used for LE or GT compares by reversing arguments. 13011 // NOT GOOD FOR EQ/NE tests. 13012 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13013 match(Set flags (CmpUL src zero)); 13014 ins_cost(100); 13015 format %{ "TEST $src.hi,$src.hi" %} 13016 opcode(0x85); 13017 ins_encode(OpcP, RegReg_Hi2(src, src)); 13018 ins_pipe(ialu_cr_reg_reg); 13019 %} 13020 13021 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13022 // compares. Can be used for LE or GT compares by reversing arguments. 13023 // NOT GOOD FOR EQ/NE tests. 13024 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13025 match(Set flags (CmpUL src1 src2)); 13026 effect(TEMP tmp); 13027 ins_cost(300); 13028 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13029 "MOV $tmp,$src1.hi\n\t" 13030 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13031 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13032 ins_pipe(ialu_cr_reg_reg); 13033 %} 13034 13035 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13036 // Just a wrapper for a normal branch, plus the predicate test. 13037 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13038 match(If cmp flags); 13039 effect(USE labl); 13040 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13041 expand %{ 13042 jmpCon(cmp, flags, labl); // JLT or JGE... 13043 %} 13044 %} 13045 13046 // Compare 2 longs and CMOVE longs. 13047 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13048 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13049 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13050 ins_cost(400); 13051 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13052 "CMOV$cmp $dst.hi,$src.hi" %} 13053 opcode(0x0F,0x40); 13054 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13055 ins_pipe( pipe_cmov_reg_long ); 13056 %} 13057 13058 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13059 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13060 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13061 ins_cost(500); 13062 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13063 "CMOV$cmp $dst.hi,$src.hi" %} 13064 opcode(0x0F,0x40); 13065 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13066 ins_pipe( pipe_cmov_reg_long ); 13067 %} 13068 13069 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 13070 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13071 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13072 ins_cost(400); 13073 expand %{ 13074 cmovLL_reg_LTGE(cmp, flags, dst, src); 13075 %} 13076 %} 13077 13078 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 13079 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13080 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13081 ins_cost(500); 13082 expand %{ 13083 cmovLL_mem_LTGE(cmp, flags, dst, src); 13084 %} 13085 %} 13086 13087 // Compare 2 longs and CMOVE ints. 13088 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13089 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13090 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13091 ins_cost(200); 13092 format %{ "CMOV$cmp $dst,$src" %} 13093 opcode(0x0F,0x40); 13094 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13095 ins_pipe( pipe_cmov_reg ); 13096 %} 13097 13098 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13099 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13100 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13101 ins_cost(250); 13102 format %{ "CMOV$cmp $dst,$src" %} 13103 opcode(0x0F,0x40); 13104 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13105 ins_pipe( pipe_cmov_mem ); 13106 %} 13107 13108 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 13109 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13110 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13111 ins_cost(200); 13112 expand %{ 13113 cmovII_reg_LTGE(cmp, flags, dst, src); 13114 %} 13115 %} 13116 13117 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 13118 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13119 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13120 ins_cost(250); 13121 expand %{ 13122 cmovII_mem_LTGE(cmp, flags, dst, src); 13123 %} 13124 %} 13125 13126 // Compare 2 longs and CMOVE ptrs. 13127 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13128 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13129 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13130 ins_cost(200); 13131 format %{ "CMOV$cmp $dst,$src" %} 13132 opcode(0x0F,0x40); 13133 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13134 ins_pipe( pipe_cmov_reg ); 13135 %} 13136 13137 // Compare 2 unsigned longs and CMOVE ptrs. 13138 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 13139 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13140 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13141 ins_cost(200); 13142 expand %{ 13143 cmovPP_reg_LTGE(cmp,flags,dst,src); 13144 %} 13145 %} 13146 13147 // Compare 2 longs and CMOVE doubles 13148 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13149 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13150 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13151 ins_cost(200); 13152 expand %{ 13153 fcmovDPR_regS(cmp,flags,dst,src); 13154 %} 13155 %} 13156 13157 // Compare 2 longs and CMOVE doubles 13158 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13159 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13160 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13161 ins_cost(200); 13162 expand %{ 13163 fcmovD_regS(cmp,flags,dst,src); 13164 %} 13165 %} 13166 13167 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13168 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13169 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13170 ins_cost(200); 13171 expand %{ 13172 fcmovFPR_regS(cmp,flags,dst,src); 13173 %} 13174 %} 13175 13176 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13177 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13178 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13179 ins_cost(200); 13180 expand %{ 13181 fcmovF_regS(cmp,flags,dst,src); 13182 %} 13183 %} 13184 13185 //====== 13186 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13187 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13188 match( Set flags (CmpL src zero )); 13189 effect(TEMP tmp); 13190 ins_cost(200); 13191 format %{ "MOV $tmp,$src.lo\n\t" 13192 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13193 ins_encode( long_cmp_flags0( src, tmp ) ); 13194 ins_pipe( ialu_reg_reg_long ); 13195 %} 13196 13197 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13198 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13199 match( Set flags (CmpL src1 src2 )); 13200 ins_cost(200+300); 13201 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13202 "JNE,s skip\n\t" 13203 "CMP $src1.hi,$src2.hi\n\t" 13204 "skip:\t" %} 13205 ins_encode( long_cmp_flags1( src1, src2 ) ); 13206 ins_pipe( ialu_cr_reg_reg ); 13207 %} 13208 13209 // Long compare reg == zero/reg OR reg != zero/reg 13210 // Just a wrapper for a normal branch, plus the predicate test. 13211 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13212 match(If cmp flags); 13213 effect(USE labl); 13214 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13215 expand %{ 13216 jmpCon(cmp,flags,labl); // JEQ or JNE... 13217 %} 13218 %} 13219 13220 //====== 13221 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13222 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13223 match(Set flags (CmpUL src zero)); 13224 effect(TEMP tmp); 13225 ins_cost(200); 13226 format %{ "MOV $tmp,$src.lo\n\t" 13227 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13228 ins_encode(long_cmp_flags0(src, tmp)); 13229 ins_pipe(ialu_reg_reg_long); 13230 %} 13231 13232 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13233 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13234 match(Set flags (CmpUL src1 src2)); 13235 ins_cost(200+300); 13236 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13237 "JNE,s skip\n\t" 13238 "CMP $src1.hi,$src2.hi\n\t" 13239 "skip:\t" %} 13240 ins_encode(long_cmp_flags1(src1, src2)); 13241 ins_pipe(ialu_cr_reg_reg); 13242 %} 13243 13244 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13245 // Just a wrapper for a normal branch, plus the predicate test. 13246 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13247 match(If cmp flags); 13248 effect(USE labl); 13249 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13250 expand %{ 13251 jmpCon(cmp, flags, labl); // JEQ or JNE... 13252 %} 13253 %} 13254 13255 // Compare 2 longs and CMOVE longs. 13256 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13257 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13258 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13259 ins_cost(400); 13260 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13261 "CMOV$cmp $dst.hi,$src.hi" %} 13262 opcode(0x0F,0x40); 13263 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13264 ins_pipe( pipe_cmov_reg_long ); 13265 %} 13266 13267 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13268 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13269 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13270 ins_cost(500); 13271 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13272 "CMOV$cmp $dst.hi,$src.hi" %} 13273 opcode(0x0F,0x40); 13274 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13275 ins_pipe( pipe_cmov_reg_long ); 13276 %} 13277 13278 // Compare 2 longs and CMOVE ints. 13279 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13280 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13281 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13282 ins_cost(200); 13283 format %{ "CMOV$cmp $dst,$src" %} 13284 opcode(0x0F,0x40); 13285 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13286 ins_pipe( pipe_cmov_reg ); 13287 %} 13288 13289 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13290 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13291 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13292 ins_cost(250); 13293 format %{ "CMOV$cmp $dst,$src" %} 13294 opcode(0x0F,0x40); 13295 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13296 ins_pipe( pipe_cmov_mem ); 13297 %} 13298 13299 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13300 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13301 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13302 ins_cost(200); 13303 expand %{ 13304 cmovII_reg_EQNE(cmp, flags, dst, src); 13305 %} 13306 %} 13307 13308 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13309 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13310 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13311 ins_cost(250); 13312 expand %{ 13313 cmovII_mem_EQNE(cmp, flags, dst, src); 13314 %} 13315 %} 13316 13317 // Compare 2 longs and CMOVE ptrs. 13318 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13319 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13320 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13321 ins_cost(200); 13322 format %{ "CMOV$cmp $dst,$src" %} 13323 opcode(0x0F,0x40); 13324 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13325 ins_pipe( pipe_cmov_reg ); 13326 %} 13327 13328 // Compare 2 unsigned longs and CMOVE ptrs. 13329 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13330 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13331 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13332 ins_cost(200); 13333 expand %{ 13334 cmovPP_reg_EQNE(cmp,flags,dst,src); 13335 %} 13336 %} 13337 13338 // Compare 2 longs and CMOVE doubles 13339 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13340 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13341 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13342 ins_cost(200); 13343 expand %{ 13344 fcmovDPR_regS(cmp,flags,dst,src); 13345 %} 13346 %} 13347 13348 // Compare 2 longs and CMOVE doubles 13349 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13350 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13351 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13352 ins_cost(200); 13353 expand %{ 13354 fcmovD_regS(cmp,flags,dst,src); 13355 %} 13356 %} 13357 13358 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13359 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13360 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13361 ins_cost(200); 13362 expand %{ 13363 fcmovFPR_regS(cmp,flags,dst,src); 13364 %} 13365 %} 13366 13367 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13368 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13369 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13370 ins_cost(200); 13371 expand %{ 13372 fcmovF_regS(cmp,flags,dst,src); 13373 %} 13374 %} 13375 13376 //====== 13377 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13378 // Same as cmpL_reg_flags_LEGT except must negate src 13379 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13380 match( Set flags (CmpL src zero )); 13381 effect( TEMP tmp ); 13382 ins_cost(300); 13383 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13384 "CMP $tmp,$src.lo\n\t" 13385 "SBB $tmp,$src.hi\n\t" %} 13386 ins_encode( long_cmp_flags3(src, tmp) ); 13387 ins_pipe( ialu_reg_reg_long ); 13388 %} 13389 13390 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13391 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13392 // requires a commuted test to get the same result. 13393 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13394 match( Set flags (CmpL src1 src2 )); 13395 effect( TEMP tmp ); 13396 ins_cost(300); 13397 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13398 "MOV $tmp,$src2.hi\n\t" 13399 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13400 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13401 ins_pipe( ialu_cr_reg_reg ); 13402 %} 13403 13404 // Long compares reg < zero/req OR reg >= zero/req. 13405 // Just a wrapper for a normal branch, plus the predicate test 13406 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13407 match(If cmp flags); 13408 effect(USE labl); 13409 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13410 ins_cost(300); 13411 expand %{ 13412 jmpCon(cmp,flags,labl); // JGT or JLE... 13413 %} 13414 %} 13415 13416 //====== 13417 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13418 // Same as cmpUL_reg_flags_LEGT except must negate src 13419 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13420 match(Set flags (CmpUL src zero)); 13421 effect(TEMP tmp); 13422 ins_cost(300); 13423 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13424 "CMP $tmp,$src.lo\n\t" 13425 "SBB $tmp,$src.hi\n\t" %} 13426 ins_encode(long_cmp_flags3(src, tmp)); 13427 ins_pipe(ialu_reg_reg_long); 13428 %} 13429 13430 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13431 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13432 // requires a commuted test to get the same result. 13433 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13434 match(Set flags (CmpUL src1 src2)); 13435 effect(TEMP tmp); 13436 ins_cost(300); 13437 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13438 "MOV $tmp,$src2.hi\n\t" 13439 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13440 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13441 ins_pipe(ialu_cr_reg_reg); 13442 %} 13443 13444 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13445 // Just a wrapper for a normal branch, plus the predicate test 13446 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13447 match(If cmp flags); 13448 effect(USE labl); 13449 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13450 ins_cost(300); 13451 expand %{ 13452 jmpCon(cmp, flags, labl); // JGT or JLE... 13453 %} 13454 %} 13455 13456 // Compare 2 longs and CMOVE longs. 13457 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13458 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13459 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13460 ins_cost(400); 13461 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13462 "CMOV$cmp $dst.hi,$src.hi" %} 13463 opcode(0x0F,0x40); 13464 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13465 ins_pipe( pipe_cmov_reg_long ); 13466 %} 13467 13468 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13469 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13470 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13471 ins_cost(500); 13472 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13473 "CMOV$cmp $dst.hi,$src.hi+4" %} 13474 opcode(0x0F,0x40); 13475 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13476 ins_pipe( pipe_cmov_reg_long ); 13477 %} 13478 13479 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13480 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13481 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13482 ins_cost(400); 13483 expand %{ 13484 cmovLL_reg_LEGT(cmp, flags, dst, src); 13485 %} 13486 %} 13487 13488 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13489 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13490 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13491 ins_cost(500); 13492 expand %{ 13493 cmovLL_mem_LEGT(cmp, flags, dst, src); 13494 %} 13495 %} 13496 13497 // Compare 2 longs and CMOVE ints. 13498 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13499 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13500 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13501 ins_cost(200); 13502 format %{ "CMOV$cmp $dst,$src" %} 13503 opcode(0x0F,0x40); 13504 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13505 ins_pipe( pipe_cmov_reg ); 13506 %} 13507 13508 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13509 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13510 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13511 ins_cost(250); 13512 format %{ "CMOV$cmp $dst,$src" %} 13513 opcode(0x0F,0x40); 13514 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13515 ins_pipe( pipe_cmov_mem ); 13516 %} 13517 13518 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13519 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13520 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13521 ins_cost(200); 13522 expand %{ 13523 cmovII_reg_LEGT(cmp, flags, dst, src); 13524 %} 13525 %} 13526 13527 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13528 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13529 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13530 ins_cost(250); 13531 expand %{ 13532 cmovII_mem_LEGT(cmp, flags, dst, src); 13533 %} 13534 %} 13535 13536 // Compare 2 longs and CMOVE ptrs. 13537 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13538 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13539 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13540 ins_cost(200); 13541 format %{ "CMOV$cmp $dst,$src" %} 13542 opcode(0x0F,0x40); 13543 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13544 ins_pipe( pipe_cmov_reg ); 13545 %} 13546 13547 // Compare 2 unsigned longs and CMOVE ptrs. 13548 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13549 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13550 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13551 ins_cost(200); 13552 expand %{ 13553 cmovPP_reg_LEGT(cmp,flags,dst,src); 13554 %} 13555 %} 13556 13557 // Compare 2 longs and CMOVE doubles 13558 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13559 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13560 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13561 ins_cost(200); 13562 expand %{ 13563 fcmovDPR_regS(cmp,flags,dst,src); 13564 %} 13565 %} 13566 13567 // Compare 2 longs and CMOVE doubles 13568 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13569 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13570 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13571 ins_cost(200); 13572 expand %{ 13573 fcmovD_regS(cmp,flags,dst,src); 13574 %} 13575 %} 13576 13577 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13578 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13579 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13580 ins_cost(200); 13581 expand %{ 13582 fcmovFPR_regS(cmp,flags,dst,src); 13583 %} 13584 %} 13585 13586 13587 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13588 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13589 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13590 ins_cost(200); 13591 expand %{ 13592 fcmovF_regS(cmp,flags,dst,src); 13593 %} 13594 %} 13595 13596 13597 // ============================================================================ 13598 // Procedure Call/Return Instructions 13599 // Call Java Static Instruction 13600 // Note: If this code changes, the corresponding ret_addr_offset() and 13601 // compute_padding() functions will have to be adjusted. 13602 instruct CallStaticJavaDirect(method meth) %{ 13603 match(CallStaticJava); 13604 effect(USE meth); 13605 13606 ins_cost(300); 13607 format %{ "CALL,static " %} 13608 opcode(0xE8); /* E8 cd */ 13609 ins_encode( pre_call_resets, 13610 Java_Static_Call( meth ), 13611 call_epilog, 13612 post_call_FPU ); 13613 ins_pipe( pipe_slow ); 13614 ins_alignment(4); 13615 %} 13616 13617 // Call Java Dynamic Instruction 13618 // Note: If this code changes, the corresponding ret_addr_offset() and 13619 // compute_padding() functions will have to be adjusted. 13620 instruct CallDynamicJavaDirect(method meth) %{ 13621 match(CallDynamicJava); 13622 effect(USE meth); 13623 13624 ins_cost(300); 13625 format %{ "MOV EAX,(oop)-1\n\t" 13626 "CALL,dynamic" %} 13627 opcode(0xE8); /* E8 cd */ 13628 ins_encode( pre_call_resets, 13629 Java_Dynamic_Call( meth ), 13630 call_epilog, 13631 post_call_FPU ); 13632 ins_pipe( pipe_slow ); 13633 ins_alignment(4); 13634 %} 13635 13636 // Call Runtime Instruction 13637 instruct CallRuntimeDirect(method meth) %{ 13638 match(CallRuntime ); 13639 effect(USE meth); 13640 13641 ins_cost(300); 13642 format %{ "CALL,runtime " %} 13643 opcode(0xE8); /* E8 cd */ 13644 // Use FFREEs to clear entries in float stack 13645 ins_encode( pre_call_resets, 13646 FFree_Float_Stack_All, 13647 Java_To_Runtime( meth ), 13648 post_call_FPU ); 13649 ins_pipe( pipe_slow ); 13650 %} 13651 13652 // Call runtime without safepoint 13653 instruct CallLeafDirect(method meth) %{ 13654 match(CallLeaf); 13655 effect(USE meth); 13656 13657 ins_cost(300); 13658 format %{ "CALL_LEAF,runtime " %} 13659 opcode(0xE8); /* E8 cd */ 13660 ins_encode( pre_call_resets, 13661 FFree_Float_Stack_All, 13662 Java_To_Runtime( meth ), 13663 Verify_FPU_For_Leaf, post_call_FPU ); 13664 ins_pipe( pipe_slow ); 13665 %} 13666 13667 instruct CallLeafNoFPDirect(method meth) %{ 13668 match(CallLeafNoFP); 13669 effect(USE meth); 13670 13671 ins_cost(300); 13672 format %{ "CALL_LEAF_NOFP,runtime " %} 13673 opcode(0xE8); /* E8 cd */ 13674 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13675 ins_pipe( pipe_slow ); 13676 %} 13677 13678 13679 // Return Instruction 13680 // Remove the return address & jump to it. 13681 instruct Ret() %{ 13682 match(Return); 13683 format %{ "RET" %} 13684 opcode(0xC3); 13685 ins_encode(OpcP); 13686 ins_pipe( pipe_jmp ); 13687 %} 13688 13689 // Tail Call; Jump from runtime stub to Java code. 13690 // Also known as an 'interprocedural jump'. 13691 // Target of jump will eventually return to caller. 13692 // TailJump below removes the return address. 13693 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13694 match(TailCall jump_target method_ptr); 13695 ins_cost(300); 13696 format %{ "JMP $jump_target \t# EBX holds method" %} 13697 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13698 ins_encode( OpcP, RegOpc(jump_target) ); 13699 ins_pipe( pipe_jmp ); 13700 %} 13701 13702 13703 // Tail Jump; remove the return address; jump to target. 13704 // TailCall above leaves the return address around. 13705 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13706 match( TailJump jump_target ex_oop ); 13707 ins_cost(300); 13708 format %{ "POP EDX\t# pop return address into dummy\n\t" 13709 "JMP $jump_target " %} 13710 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13711 ins_encode( enc_pop_rdx, 13712 OpcP, RegOpc(jump_target) ); 13713 ins_pipe( pipe_jmp ); 13714 %} 13715 13716 // Create exception oop: created by stack-crawling runtime code. 13717 // Created exception is now available to this handler, and is setup 13718 // just prior to jumping to this handler. No code emitted. 13719 instruct CreateException( eAXRegP ex_oop ) 13720 %{ 13721 match(Set ex_oop (CreateEx)); 13722 13723 size(0); 13724 // use the following format syntax 13725 format %{ "# exception oop is in EAX; no code emitted" %} 13726 ins_encode(); 13727 ins_pipe( empty ); 13728 %} 13729 13730 13731 // Rethrow exception: 13732 // The exception oop will come in the first argument position. 13733 // Then JUMP (not call) to the rethrow stub code. 13734 instruct RethrowException() 13735 %{ 13736 match(Rethrow); 13737 13738 // use the following format syntax 13739 format %{ "JMP rethrow_stub" %} 13740 ins_encode(enc_rethrow); 13741 ins_pipe( pipe_jmp ); 13742 %} 13743 13744 // inlined locking and unlocking 13745 13746 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{ 13747 predicate(Compile::current()->use_rtm()); 13748 match(Set cr (FastLock object box)); 13749 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread); 13750 ins_cost(300); 13751 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13752 ins_encode %{ 13753 __ get_thread($thread$$Register); 13754 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13755 $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register, 13756 _rtm_counters, _stack_rtm_counters, 13757 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13758 true, ra_->C->profile_rtm()); 13759 %} 13760 ins_pipe(pipe_slow); 13761 %} 13762 13763 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13764 predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm()); 13765 match(Set cr (FastLock object box)); 13766 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13767 ins_cost(300); 13768 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13769 ins_encode %{ 13770 __ get_thread($thread$$Register); 13771 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13772 $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false); 13773 %} 13774 ins_pipe(pipe_slow); 13775 %} 13776 13777 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13778 predicate(LockingMode != LM_LIGHTWEIGHT); 13779 match(Set cr (FastUnlock object box)); 13780 effect(TEMP tmp, USE_KILL box); 13781 ins_cost(300); 13782 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13783 ins_encode %{ 13784 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13785 %} 13786 ins_pipe(pipe_slow); 13787 %} 13788 13789 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{ 13790 predicate(LockingMode == LM_LIGHTWEIGHT); 13791 match(Set cr (FastLock object box)); 13792 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread); 13793 ins_cost(300); 13794 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %} 13795 ins_encode %{ 13796 __ get_thread($thread$$Register); 13797 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13798 %} 13799 ins_pipe(pipe_slow); 13800 %} 13801 13802 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{ 13803 predicate(LockingMode == LM_LIGHTWEIGHT); 13804 match(Set cr (FastUnlock object eax_reg)); 13805 effect(TEMP tmp, USE_KILL eax_reg, TEMP thread); 13806 ins_cost(300); 13807 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %} 13808 ins_encode %{ 13809 __ get_thread($thread$$Register); 13810 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13811 %} 13812 ins_pipe(pipe_slow); 13813 %} 13814 13815 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13816 predicate(Matcher::vector_length(n) <= 32); 13817 match(Set dst (MaskAll src)); 13818 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13819 ins_encode %{ 13820 int mask_len = Matcher::vector_length(this); 13821 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13822 %} 13823 ins_pipe( pipe_slow ); 13824 %} 13825 13826 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13827 predicate(Matcher::vector_length(n) > 32); 13828 match(Set dst (MaskAll src)); 13829 effect(TEMP ktmp); 13830 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13831 ins_encode %{ 13832 int mask_len = Matcher::vector_length(this); 13833 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13834 %} 13835 ins_pipe( pipe_slow ); 13836 %} 13837 13838 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13839 predicate(Matcher::vector_length(n) > 32); 13840 match(Set dst (MaskAll src)); 13841 effect(TEMP ktmp); 13842 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13843 ins_encode %{ 13844 int mask_len = Matcher::vector_length(this); 13845 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13846 %} 13847 ins_pipe( pipe_slow ); 13848 %} 13849 13850 // ============================================================================ 13851 // Safepoint Instruction 13852 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13853 match(SafePoint poll); 13854 effect(KILL cr, USE poll); 13855 13856 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13857 ins_cost(125); 13858 // EBP would need size(3) 13859 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13860 ins_encode %{ 13861 __ relocate(relocInfo::poll_type); 13862 address pre_pc = __ pc(); 13863 __ testl(rax, Address($poll$$Register, 0)); 13864 address post_pc = __ pc(); 13865 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13866 %} 13867 ins_pipe(ialu_reg_mem); 13868 %} 13869 13870 13871 // ============================================================================ 13872 // This name is KNOWN by the ADLC and cannot be changed. 13873 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13874 // for this guy. 13875 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13876 match(Set dst (ThreadLocal)); 13877 effect(DEF dst, KILL cr); 13878 13879 format %{ "MOV $dst, Thread::current()" %} 13880 ins_encode %{ 13881 Register dstReg = as_Register($dst$$reg); 13882 __ get_thread(dstReg); 13883 %} 13884 ins_pipe( ialu_reg_fat ); 13885 %} 13886 13887 13888 13889 //----------PEEPHOLE RULES----------------------------------------------------- 13890 // These must follow all instruction definitions as they use the names 13891 // defined in the instructions definitions. 13892 // 13893 // peepmatch ( root_instr_name [preceding_instruction]* ); 13894 // 13895 // peepconstraint %{ 13896 // (instruction_number.operand_name relational_op instruction_number.operand_name 13897 // [, ...] ); 13898 // // instruction numbers are zero-based using left to right order in peepmatch 13899 // 13900 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13901 // // provide an instruction_number.operand_name for each operand that appears 13902 // // in the replacement instruction's match rule 13903 // 13904 // ---------VM FLAGS--------------------------------------------------------- 13905 // 13906 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13907 // 13908 // Each peephole rule is given an identifying number starting with zero and 13909 // increasing by one in the order seen by the parser. An individual peephole 13910 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13911 // on the command-line. 13912 // 13913 // ---------CURRENT LIMITATIONS---------------------------------------------- 13914 // 13915 // Only match adjacent instructions in same basic block 13916 // Only equality constraints 13917 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13918 // Only one replacement instruction 13919 // 13920 // ---------EXAMPLE---------------------------------------------------------- 13921 // 13922 // // pertinent parts of existing instructions in architecture description 13923 // instruct movI(rRegI dst, rRegI src) %{ 13924 // match(Set dst (CopyI src)); 13925 // %} 13926 // 13927 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13928 // match(Set dst (AddI dst src)); 13929 // effect(KILL cr); 13930 // %} 13931 // 13932 // // Change (inc mov) to lea 13933 // peephole %{ 13934 // // increment preceded by register-register move 13935 // peepmatch ( incI_eReg movI ); 13936 // // require that the destination register of the increment 13937 // // match the destination register of the move 13938 // peepconstraint ( 0.dst == 1.dst ); 13939 // // construct a replacement instruction that sets 13940 // // the destination to ( move's source register + one ) 13941 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13942 // %} 13943 // 13944 // Implementation no longer uses movX instructions since 13945 // machine-independent system no longer uses CopyX nodes. 13946 // 13947 // peephole %{ 13948 // peepmatch ( incI_eReg movI ); 13949 // peepconstraint ( 0.dst == 1.dst ); 13950 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13951 // %} 13952 // 13953 // peephole %{ 13954 // peepmatch ( decI_eReg movI ); 13955 // peepconstraint ( 0.dst == 1.dst ); 13956 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13957 // %} 13958 // 13959 // peephole %{ 13960 // peepmatch ( addI_eReg_imm movI ); 13961 // peepconstraint ( 0.dst == 1.dst ); 13962 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13963 // %} 13964 // 13965 // peephole %{ 13966 // peepmatch ( addP_eReg_imm movP ); 13967 // peepconstraint ( 0.dst == 1.dst ); 13968 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13969 // %} 13970 13971 // // Change load of spilled value to only a spill 13972 // instruct storeI(memory mem, rRegI src) %{ 13973 // match(Set mem (StoreI mem src)); 13974 // %} 13975 // 13976 // instruct loadI(rRegI dst, memory mem) %{ 13977 // match(Set dst (LoadI mem)); 13978 // %} 13979 // 13980 peephole %{ 13981 peepmatch ( loadI storeI ); 13982 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13983 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13984 %} 13985 13986 //----------SMARTSPILL RULES--------------------------------------------------- 13987 // These must follow all instruction definitions as they use the names 13988 // defined in the instructions definitions.