1 // 2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ _masm. 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 cbuf.insts()->emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 cbuf.insts()->emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(CodeBuffer &cbuf, int code) { 353 cbuf.insts()->emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 358 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 359 emit_opcode(cbuf, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(CodeBuffer &cbuf, int d8) { 364 cbuf.insts()->emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(CodeBuffer &cbuf, int d16) { 369 cbuf.insts()->emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(CodeBuffer &cbuf, int d32) { 374 cbuf.insts()->emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 379 int format) { 380 cbuf.relocate(cbuf.insts_mark(), reloc, format); 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 cbuf.relocate(cbuf.insts_mark(), rspec, format); 393 cbuf.insts()->emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 398 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (cbuf, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(cbuf, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(cbuf, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(cbuf, 0x1, reg_encoding, base); 423 emit_d8(cbuf, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (cbuf, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(cbuf, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (cbuf, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 450 emit_rm(cbuf, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 456 emit_rm(cbuf, scale, index, base); 457 emit_d8(cbuf, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 462 emit_rm(cbuf, scale, index, 0x04); 463 } else { 464 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 465 emit_rm(cbuf, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (cbuf, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( cbuf, 0x8B ); 483 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler& _masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 static void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 C2_MacroAssembler _masm(&cbuf); 613 614 int framesize = C->output()->frame_size_in_bytes(); 615 int bangsize = C->output()->bang_size_in_bytes(); 616 617 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr); 618 619 C->output()->set_frame_complete(cbuf.insts_size()); 620 621 if (C->has_mach_constant_base_node()) { 622 // NOTE: We set the table base offset here because users might be 623 // emitted before MachConstantBaseNode. 624 ConstantTable& constant_table = C->output()->constant_table(); 625 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 626 } 627 } 628 629 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 630 return MachNode::size(ra_); // too many variables; just compute it the hard way 631 } 632 633 int MachPrologNode::reloc() const { 634 return 0; // a large enough number 635 } 636 637 //============================================================================= 638 #ifndef PRODUCT 639 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 640 Compile *C = ra_->C; 641 int framesize = C->output()->frame_size_in_bytes(); 642 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 643 // Remove two words for return addr and rbp, 644 framesize -= 2*wordSize; 645 646 if (C->max_vector_size() > 16) { 647 st->print("VZEROUPPER"); 648 st->cr(); st->print("\t"); 649 } 650 if (C->in_24_bit_fp_mode()) { 651 st->print("FLDCW standard control word"); 652 st->cr(); st->print("\t"); 653 } 654 if (framesize) { 655 st->print("ADD ESP,%d\t# Destroy frame",framesize); 656 st->cr(); st->print("\t"); 657 } 658 st->print_cr("POPL EBP"); st->print("\t"); 659 if (do_polling() && C->is_method_compilation()) { 660 st->print("CMPL rsp, poll_offset[thread] \n\t" 661 "JA #safepoint_stub\t" 662 "# Safepoint: poll for GC"); 663 } 664 } 665 #endif 666 667 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 668 Compile *C = ra_->C; 669 MacroAssembler _masm(&cbuf); 670 671 if (C->max_vector_size() > 16) { 672 // Clear upper bits of YMM registers when current compiled code uses 673 // wide vectors to avoid AVX <-> SSE transition penalty during call. 674 _masm.vzeroupper(); 675 } 676 // If method set FPU control word, restore to standard control word 677 if (C->in_24_bit_fp_mode()) { 678 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 679 } 680 681 int framesize = C->output()->frame_size_in_bytes(); 682 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 683 // Remove two words for return addr and rbp, 684 framesize -= 2*wordSize; 685 686 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 687 688 if (framesize >= 128) { 689 emit_opcode(cbuf, 0x81); // add SP, #framesize 690 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 691 emit_d32(cbuf, framesize); 692 } else if (framesize) { 693 emit_opcode(cbuf, 0x83); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d8(cbuf, framesize); 696 } 697 698 emit_opcode(cbuf, 0x58 | EBP_enc); 699 700 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 701 __ reserved_stack_check(); 702 } 703 704 if (do_polling() && C->is_method_compilation()) { 705 Register thread = as_Register(EBX_enc); 706 MacroAssembler masm(&cbuf); 707 __ get_thread(thread); 708 Label dummy_label; 709 Label* code_stub = &dummy_label; 710 if (!C->output()->in_scratch_emit_size()) { 711 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 712 C->output()->add_stub(stub); 713 code_stub = &stub->entry(); 714 } 715 __ relocate(relocInfo::poll_return_type); 716 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 717 } 718 } 719 720 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 721 return MachNode::size(ra_); // too many variables; just compute it 722 // the hard way 723 } 724 725 int MachEpilogNode::reloc() const { 726 return 0; // a large enough number 727 } 728 729 const Pipeline * MachEpilogNode::pipeline() const { 730 return MachNode::pipeline_class(); 731 } 732 733 //============================================================================= 734 735 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 736 static enum RC rc_class( OptoReg::Name reg ) { 737 738 if( !OptoReg::is_valid(reg) ) return rc_bad; 739 if (OptoReg::is_stack(reg)) return rc_stack; 740 741 VMReg r = OptoReg::as_VMReg(reg); 742 if (r->is_Register()) return rc_int; 743 if (r->is_FloatRegister()) { 744 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 745 return rc_float; 746 } 747 if (r->is_KRegister()) return rc_kreg; 748 assert(r->is_XMMRegister(), "must be"); 749 return rc_xmm; 750 } 751 752 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 753 int opcode, const char *op_str, int size, outputStream* st ) { 754 if( cbuf ) { 755 emit_opcode (*cbuf, opcode ); 756 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 757 #ifndef PRODUCT 758 } else if( !do_size ) { 759 if( size != 0 ) st->print("\n\t"); 760 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 761 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 762 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 763 } else { // FLD, FST, PUSH, POP 764 st->print("%s [ESP + #%d]",op_str,offset); 765 } 766 #endif 767 } 768 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 769 return size+3+offset_size; 770 } 771 772 // Helper for XMM registers. Extra opcode bits, limited syntax. 773 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 774 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 775 int in_size_in_bits = Assembler::EVEX_32bit; 776 int evex_encoding = 0; 777 if (reg_lo+1 == reg_hi) { 778 in_size_in_bits = Assembler::EVEX_64bit; 779 evex_encoding = Assembler::VEX_W; 780 } 781 if (cbuf) { 782 MacroAssembler _masm(cbuf); 783 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 784 // it maps more cases to single byte displacement 785 _masm.set_managed(); 786 if (reg_lo+1 == reg_hi) { // double move? 787 if (is_load) { 788 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 789 } else { 790 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 791 } 792 } else { 793 if (is_load) { 794 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } 799 #ifndef PRODUCT 800 } else if (!do_size) { 801 if (size != 0) st->print("\n\t"); 802 if (reg_lo+1 == reg_hi) { // double move? 803 if (is_load) st->print("%s %s,[ESP + #%d]", 804 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 805 Matcher::regName[reg_lo], offset); 806 else st->print("MOVSD [ESP + #%d],%s", 807 offset, Matcher::regName[reg_lo]); 808 } else { 809 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 810 Matcher::regName[reg_lo], offset); 811 else st->print("MOVSS [ESP + #%d],%s", 812 offset, Matcher::regName[reg_lo]); 813 } 814 #endif 815 } 816 bool is_single_byte = false; 817 if ((UseAVX > 2) && (offset != 0)) { 818 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 819 } 820 int offset_size = 0; 821 if (UseAVX > 2 ) { 822 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 823 } else { 824 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 825 } 826 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 827 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 828 return size+5+offset_size; 829 } 830 831 832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 833 int src_hi, int dst_hi, int size, outputStream* st ) { 834 if (cbuf) { 835 MacroAssembler _masm(cbuf); 836 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 837 _masm.set_managed(); 838 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 839 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 840 as_XMMRegister(Matcher::_regEncode[src_lo])); 841 } else { 842 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 843 as_XMMRegister(Matcher::_regEncode[src_lo])); 844 } 845 #ifndef PRODUCT 846 } else if (!do_size) { 847 if (size != 0) st->print("\n\t"); 848 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 849 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 850 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 851 } else { 852 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 853 } 854 } else { 855 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 856 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } 861 #endif 862 } 863 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 864 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 865 int sz = (UseAVX > 2) ? 6 : 4; 866 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 867 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 868 return size + sz; 869 } 870 871 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 872 int src_hi, int dst_hi, int size, outputStream* st ) { 873 // 32-bit 874 if (cbuf) { 875 MacroAssembler _masm(cbuf); 876 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 877 _masm.set_managed(); 878 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 879 as_Register(Matcher::_regEncode[src_lo])); 880 #ifndef PRODUCT 881 } else if (!do_size) { 882 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 883 #endif 884 } 885 return (UseAVX> 2) ? 6 : 4; 886 } 887 888 889 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 890 int src_hi, int dst_hi, int size, outputStream* st ) { 891 // 32-bit 892 if (cbuf) { 893 MacroAssembler _masm(cbuf); 894 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 895 _masm.set_managed(); 896 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 897 as_XMMRegister(Matcher::_regEncode[src_lo])); 898 #ifndef PRODUCT 899 } else if (!do_size) { 900 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 901 #endif 902 } 903 return (UseAVX> 2) ? 6 : 4; 904 } 905 906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 907 if( cbuf ) { 908 emit_opcode(*cbuf, 0x8B ); 909 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 910 #ifndef PRODUCT 911 } else if( !do_size ) { 912 if( size != 0 ) st->print("\n\t"); 913 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 914 #endif 915 } 916 return size+2; 917 } 918 919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 920 int offset, int size, outputStream* st ) { 921 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 922 if( cbuf ) { 923 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 924 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 925 #ifndef PRODUCT 926 } else if( !do_size ) { 927 if( size != 0 ) st->print("\n\t"); 928 st->print("FLD %s",Matcher::regName[src_lo]); 929 #endif 930 } 931 size += 2; 932 } 933 934 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 935 const char *op_str; 936 int op; 937 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 938 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 939 op = 0xDD; 940 } else { // 32-bit store 941 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 942 op = 0xD9; 943 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 944 } 945 946 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 947 } 948 949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 950 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 951 int src_hi, int dst_hi, uint ireg, outputStream* st); 952 953 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 954 int stack_offset, int reg, uint ireg, outputStream* st); 955 956 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 957 int dst_offset, uint ireg, outputStream* st) { 958 if (cbuf) { 959 MacroAssembler _masm(cbuf); 960 switch (ireg) { 961 case Op_VecS: 962 __ pushl(Address(rsp, src_offset)); 963 __ popl (Address(rsp, dst_offset)); 964 break; 965 case Op_VecD: 966 __ pushl(Address(rsp, src_offset)); 967 __ popl (Address(rsp, dst_offset)); 968 __ pushl(Address(rsp, src_offset+4)); 969 __ popl (Address(rsp, dst_offset+4)); 970 break; 971 case Op_VecX: 972 __ movdqu(Address(rsp, -16), xmm0); 973 __ movdqu(xmm0, Address(rsp, src_offset)); 974 __ movdqu(Address(rsp, dst_offset), xmm0); 975 __ movdqu(xmm0, Address(rsp, -16)); 976 break; 977 case Op_VecY: 978 __ vmovdqu(Address(rsp, -32), xmm0); 979 __ vmovdqu(xmm0, Address(rsp, src_offset)); 980 __ vmovdqu(Address(rsp, dst_offset), xmm0); 981 __ vmovdqu(xmm0, Address(rsp, -32)); 982 break; 983 case Op_VecZ: 984 __ evmovdquq(Address(rsp, -64), xmm0, 2); 985 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 986 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 987 __ evmovdquq(xmm0, Address(rsp, -64), 2); 988 break; 989 default: 990 ShouldNotReachHere(); 991 } 992 #ifndef PRODUCT 993 } else { 994 switch (ireg) { 995 case Op_VecS: 996 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 997 "popl [rsp + #%d]", 998 src_offset, dst_offset); 999 break; 1000 case Op_VecD: 1001 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1002 "popq [rsp + #%d]\n\t" 1003 "pushl [rsp + #%d]\n\t" 1004 "popq [rsp + #%d]", 1005 src_offset, dst_offset, src_offset+4, dst_offset+4); 1006 break; 1007 case Op_VecX: 1008 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1009 "movdqu xmm0, [rsp + #%d]\n\t" 1010 "movdqu [rsp + #%d], xmm0\n\t" 1011 "movdqu xmm0, [rsp - #16]", 1012 src_offset, dst_offset); 1013 break; 1014 case Op_VecY: 1015 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1016 "vmovdqu xmm0, [rsp + #%d]\n\t" 1017 "vmovdqu [rsp + #%d], xmm0\n\t" 1018 "vmovdqu xmm0, [rsp - #32]", 1019 src_offset, dst_offset); 1020 break; 1021 case Op_VecZ: 1022 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1023 "vmovdqu xmm0, [rsp + #%d]\n\t" 1024 "vmovdqu [rsp + #%d], xmm0\n\t" 1025 "vmovdqu xmm0, [rsp - #64]", 1026 src_offset, dst_offset); 1027 break; 1028 default: 1029 ShouldNotReachHere(); 1030 } 1031 #endif 1032 } 1033 } 1034 1035 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1036 // Get registers to move 1037 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1038 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1039 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1040 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1041 1042 enum RC src_second_rc = rc_class(src_second); 1043 enum RC src_first_rc = rc_class(src_first); 1044 enum RC dst_second_rc = rc_class(dst_second); 1045 enum RC dst_first_rc = rc_class(dst_first); 1046 1047 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1048 1049 // Generate spill code! 1050 int size = 0; 1051 1052 if( src_first == dst_first && src_second == dst_second ) 1053 return size; // Self copy, no move 1054 1055 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) { 1056 uint ireg = ideal_reg(); 1057 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1058 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1059 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1060 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1061 // mem -> mem 1062 int src_offset = ra_->reg2offset(src_first); 1063 int dst_offset = ra_->reg2offset(dst_first); 1064 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1065 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1066 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1067 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1068 int stack_offset = ra_->reg2offset(dst_first); 1069 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1070 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1071 int stack_offset = ra_->reg2offset(src_first); 1072 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1073 } else { 1074 ShouldNotReachHere(); 1075 } 1076 return 0; 1077 } 1078 1079 // -------------------------------------- 1080 // Check for mem-mem move. push/pop to move. 1081 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1082 if( src_second == dst_first ) { // overlapping stack copy ranges 1083 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1084 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1085 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1086 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1087 } 1088 // move low bits 1089 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1090 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1091 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1092 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1093 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1094 } 1095 return size; 1096 } 1097 1098 // -------------------------------------- 1099 // Check for integer reg-reg copy 1100 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1101 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1102 1103 // Check for integer store 1104 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1105 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1106 1107 // Check for integer load 1108 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1109 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1110 1111 // Check for integer reg-xmm reg copy 1112 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1113 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1114 "no 64 bit integer-float reg moves" ); 1115 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1116 } 1117 // -------------------------------------- 1118 // Check for float reg-reg copy 1119 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1120 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1121 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1122 if( cbuf ) { 1123 1124 // Note the mucking with the register encode to compensate for the 0/1 1125 // indexing issue mentioned in a comment in the reg_def sections 1126 // for FPR registers many lines above here. 1127 1128 if( src_first != FPR1L_num ) { 1129 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1130 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1131 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1132 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1133 } else { 1134 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1135 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1136 } 1137 #ifndef PRODUCT 1138 } else if( !do_size ) { 1139 if( size != 0 ) st->print("\n\t"); 1140 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1141 else st->print( "FST %s", Matcher::regName[dst_first]); 1142 #endif 1143 } 1144 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1145 } 1146 1147 // Check for float store 1148 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1149 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1150 } 1151 1152 // Check for float load 1153 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1154 int offset = ra_->reg2offset(src_first); 1155 const char *op_str; 1156 int op; 1157 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1158 op_str = "FLD_D"; 1159 op = 0xDD; 1160 } else { // 32-bit load 1161 op_str = "FLD_S"; 1162 op = 0xD9; 1163 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1164 } 1165 if( cbuf ) { 1166 emit_opcode (*cbuf, op ); 1167 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1168 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1169 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1170 #ifndef PRODUCT 1171 } else if( !do_size ) { 1172 if( size != 0 ) st->print("\n\t"); 1173 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1174 #endif 1175 } 1176 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1177 return size + 3+offset_size+2; 1178 } 1179 1180 // Check for xmm reg-reg copy 1181 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1182 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1183 (src_first+1 == src_second && dst_first+1 == dst_second), 1184 "no non-adjacent float-moves" ); 1185 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1186 } 1187 1188 // Check for xmm reg-integer reg copy 1189 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1190 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1191 "no 64 bit float-integer reg moves" ); 1192 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1193 } 1194 1195 // Check for xmm store 1196 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1197 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1198 } 1199 1200 // Check for float xmm load 1201 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1202 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1203 } 1204 1205 // Copy from float reg to xmm reg 1206 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1207 // copy to the top of stack from floating point reg 1208 // and use LEA to preserve flags 1209 if( cbuf ) { 1210 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1211 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1212 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1213 emit_d8(*cbuf,0xF8); 1214 #ifndef PRODUCT 1215 } else if( !do_size ) { 1216 if( size != 0 ) st->print("\n\t"); 1217 st->print("LEA ESP,[ESP-8]"); 1218 #endif 1219 } 1220 size += 4; 1221 1222 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1223 1224 // Copy from the temp memory to the xmm reg. 1225 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1226 1227 if( cbuf ) { 1228 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1229 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1230 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1231 emit_d8(*cbuf,0x08); 1232 #ifndef PRODUCT 1233 } else if( !do_size ) { 1234 if( size != 0 ) st->print("\n\t"); 1235 st->print("LEA ESP,[ESP+8]"); 1236 #endif 1237 } 1238 size += 4; 1239 return size; 1240 } 1241 1242 // AVX-512 opmask specific spilling. 1243 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1244 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1245 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1246 int offset = ra_->reg2offset(src_first); 1247 if (cbuf != nullptr) { 1248 MacroAssembler _masm(cbuf); 1249 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1250 #ifndef PRODUCT 1251 } else { 1252 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); 1253 #endif 1254 } 1255 return 0; 1256 } 1257 1258 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1259 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1260 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1261 int offset = ra_->reg2offset(dst_first); 1262 if (cbuf != nullptr) { 1263 MacroAssembler _masm(cbuf); 1264 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1265 #ifndef PRODUCT 1266 } else { 1267 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); 1268 #endif 1269 } 1270 return 0; 1271 } 1272 1273 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1274 Unimplemented(); 1275 return 0; 1276 } 1277 1278 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1279 Unimplemented(); 1280 return 0; 1281 } 1282 1283 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1284 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1285 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1286 if (cbuf != nullptr) { 1287 MacroAssembler _masm(cbuf); 1288 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1289 #ifndef PRODUCT 1290 } else { 1291 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); 1292 #endif 1293 } 1294 return 0; 1295 } 1296 1297 assert( size > 0, "missed a case" ); 1298 1299 // -------------------------------------------------------------------- 1300 // Check for second bits still needing moving. 1301 if( src_second == dst_second ) 1302 return size; // Self copy; no move 1303 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1304 1305 // Check for second word int-int move 1306 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1307 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1308 1309 // Check for second word integer store 1310 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1311 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1312 1313 // Check for second word integer load 1314 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1315 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1316 1317 Unimplemented(); 1318 return 0; // Mute compiler 1319 } 1320 1321 #ifndef PRODUCT 1322 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1323 implementation( nullptr, ra_, false, st ); 1324 } 1325 #endif 1326 1327 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1328 implementation( &cbuf, ra_, false, nullptr ); 1329 } 1330 1331 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1332 return MachNode::size(ra_); 1333 } 1334 1335 1336 //============================================================================= 1337 #ifndef PRODUCT 1338 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1339 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1340 int reg = ra_->get_reg_first(this); 1341 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1342 } 1343 #endif 1344 1345 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1346 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1347 int reg = ra_->get_encode(this); 1348 if( offset >= 128 ) { 1349 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1350 emit_rm(cbuf, 0x2, reg, 0x04); 1351 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1352 emit_d32(cbuf, offset); 1353 } 1354 else { 1355 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1356 emit_rm(cbuf, 0x1, reg, 0x04); 1357 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1358 emit_d8(cbuf, offset); 1359 } 1360 } 1361 1362 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1363 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1364 if( offset >= 128 ) { 1365 return 7; 1366 } 1367 else { 1368 return 4; 1369 } 1370 } 1371 1372 //============================================================================= 1373 #ifndef PRODUCT 1374 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1375 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1376 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1377 st->print_cr("\tNOP"); 1378 st->print_cr("\tNOP"); 1379 if( !OptoBreakpoint ) 1380 st->print_cr("\tNOP"); 1381 } 1382 #endif 1383 1384 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1385 MacroAssembler masm(&cbuf); 1386 masm.ic_check(CodeEntryAlignment); 1387 } 1388 1389 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1390 return MachNode::size(ra_); // too many variables; just compute it 1391 // the hard way 1392 } 1393 1394 1395 //============================================================================= 1396 1397 // Vector calling convention not supported. 1398 bool Matcher::supports_vector_calling_convention() { 1399 return false; 1400 } 1401 1402 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1403 Unimplemented(); 1404 return OptoRegPair(0, 0); 1405 } 1406 1407 // Is this branch offset short enough that a short branch can be used? 1408 // 1409 // NOTE: If the platform does not provide any short branch variants, then 1410 // this method should return false for offset 0. 1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1412 // The passed offset is relative to address of the branch. 1413 // On 86 a branch displacement is calculated relative to address 1414 // of a next instruction. 1415 offset -= br_size; 1416 1417 // the short version of jmpConUCF2 contains multiple branches, 1418 // making the reach slightly less 1419 if (rule == jmpConUCF2_rule) 1420 return (-126 <= offset && offset <= 125); 1421 return (-128 <= offset && offset <= 127); 1422 } 1423 1424 // Return whether or not this register is ever used as an argument. This 1425 // function is used on startup to build the trampoline stubs in generateOptoStub. 1426 // Registers not mentioned will be killed by the VM call in the trampoline, and 1427 // arguments in those registers not be available to the callee. 1428 bool Matcher::can_be_java_arg( int reg ) { 1429 if( reg == ECX_num || reg == EDX_num ) return true; 1430 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1431 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1432 return false; 1433 } 1434 1435 bool Matcher::is_spillable_arg( int reg ) { 1436 return can_be_java_arg(reg); 1437 } 1438 1439 uint Matcher::int_pressure_limit() 1440 { 1441 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1442 } 1443 1444 uint Matcher::float_pressure_limit() 1445 { 1446 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1447 } 1448 1449 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1450 // Use hardware integer DIV instruction when 1451 // it is faster than a code which use multiply. 1452 // Only when constant divisor fits into 32 bit 1453 // (min_jint is excluded to get only correct 1454 // positive 32 bit values from negative). 1455 return VM_Version::has_fast_idiv() && 1456 (divisor == (int)divisor && divisor != min_jint); 1457 } 1458 1459 // Register for DIVI projection of divmodI 1460 RegMask Matcher::divI_proj_mask() { 1461 return EAX_REG_mask(); 1462 } 1463 1464 // Register for MODI projection of divmodI 1465 RegMask Matcher::modI_proj_mask() { 1466 return EDX_REG_mask(); 1467 } 1468 1469 // Register for DIVL projection of divmodL 1470 RegMask Matcher::divL_proj_mask() { 1471 ShouldNotReachHere(); 1472 return RegMask(); 1473 } 1474 1475 // Register for MODL projection of divmodL 1476 RegMask Matcher::modL_proj_mask() { 1477 ShouldNotReachHere(); 1478 return RegMask(); 1479 } 1480 1481 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1482 return NO_REG_mask(); 1483 } 1484 1485 // Returns true if the high 32 bits of the value is known to be zero. 1486 bool is_operand_hi32_zero(Node* n) { 1487 int opc = n->Opcode(); 1488 if (opc == Op_AndL) { 1489 Node* o2 = n->in(2); 1490 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1491 return true; 1492 } 1493 } 1494 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1495 return true; 1496 } 1497 return false; 1498 } 1499 1500 %} 1501 1502 //----------ENCODING BLOCK----------------------------------------------------- 1503 // This block specifies the encoding classes used by the compiler to output 1504 // byte streams. Encoding classes generate functions which are called by 1505 // Machine Instruction Nodes in order to generate the bit encoding of the 1506 // instruction. Operands specify their base encoding interface with the 1507 // interface keyword. There are currently supported four interfaces, 1508 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1509 // operand to generate a function which returns its register number when 1510 // queried. CONST_INTER causes an operand to generate a function which 1511 // returns the value of the constant when queried. MEMORY_INTER causes an 1512 // operand to generate four functions which return the Base Register, the 1513 // Index Register, the Scale Value, and the Offset Value of the operand when 1514 // queried. COND_INTER causes an operand to generate six functions which 1515 // return the encoding code (ie - encoding bits for the instruction) 1516 // associated with each basic boolean condition for a conditional instruction. 1517 // Instructions specify two basic values for encoding. They use the 1518 // ins_encode keyword to specify their encoding class (which must be one of 1519 // the class names specified in the encoding block), and they use the 1520 // opcode keyword to specify, in order, their primary, secondary, and 1521 // tertiary opcode. Only the opcode sections which a particular instruction 1522 // needs for encoding need to be specified. 1523 encode %{ 1524 // Build emit functions for each basic byte or larger field in the intel 1525 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1526 // code in the enc_class source block. Emit functions will live in the 1527 // main source block for now. In future, we can generalize this by 1528 // adding a syntax that specifies the sizes of fields in an order, 1529 // so that the adlc can build the emit functions automagically 1530 1531 // Emit primary opcode 1532 enc_class OpcP %{ 1533 emit_opcode(cbuf, $primary); 1534 %} 1535 1536 // Emit secondary opcode 1537 enc_class OpcS %{ 1538 emit_opcode(cbuf, $secondary); 1539 %} 1540 1541 // Emit opcode directly 1542 enc_class Opcode(immI d8) %{ 1543 emit_opcode(cbuf, $d8$$constant); 1544 %} 1545 1546 enc_class SizePrefix %{ 1547 emit_opcode(cbuf,0x66); 1548 %} 1549 1550 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1551 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1552 %} 1553 1554 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1555 emit_opcode(cbuf,$opcode$$constant); 1556 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1557 %} 1558 1559 enc_class mov_r32_imm0( rRegI dst ) %{ 1560 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1561 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1562 %} 1563 1564 enc_class cdq_enc %{ 1565 // Full implementation of Java idiv and irem; checks for 1566 // special case as described in JVM spec., p.243 & p.271. 1567 // 1568 // normal case special case 1569 // 1570 // input : rax,: dividend min_int 1571 // reg: divisor -1 1572 // 1573 // output: rax,: quotient (= rax, idiv reg) min_int 1574 // rdx: remainder (= rax, irem reg) 0 1575 // 1576 // Code sequnce: 1577 // 1578 // 81 F8 00 00 00 80 cmp rax,80000000h 1579 // 0F 85 0B 00 00 00 jne normal_case 1580 // 33 D2 xor rdx,edx 1581 // 83 F9 FF cmp rcx,0FFh 1582 // 0F 84 03 00 00 00 je done 1583 // normal_case: 1584 // 99 cdq 1585 // F7 F9 idiv rax,ecx 1586 // done: 1587 // 1588 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1589 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1590 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1591 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1592 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1593 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1594 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1595 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1596 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1597 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1598 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1599 // normal_case: 1600 emit_opcode(cbuf,0x99); // cdq 1601 // idiv (note: must be emitted by the user of this rule) 1602 // normal: 1603 %} 1604 1605 // Dense encoding for older common ops 1606 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1607 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1608 %} 1609 1610 1611 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1612 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1613 // Check for 8-bit immediate, and set sign extend bit in opcode 1614 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1615 emit_opcode(cbuf, $primary | 0x02); 1616 } 1617 else { // If 32-bit immediate 1618 emit_opcode(cbuf, $primary); 1619 } 1620 %} 1621 1622 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1623 // Emit primary opcode and set sign-extend bit 1624 // Check for 8-bit immediate, and set sign extend bit in opcode 1625 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1626 emit_opcode(cbuf, $primary | 0x02); } 1627 else { // If 32-bit immediate 1628 emit_opcode(cbuf, $primary); 1629 } 1630 // Emit r/m byte with secondary opcode, after primary opcode. 1631 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1632 %} 1633 1634 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1635 // Check for 8-bit immediate, and set sign extend bit in opcode 1636 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1637 $$$emit8$imm$$constant; 1638 } 1639 else { // If 32-bit immediate 1640 // Output immediate 1641 $$$emit32$imm$$constant; 1642 } 1643 %} 1644 1645 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1646 // Emit primary opcode and set sign-extend bit 1647 // Check for 8-bit immediate, and set sign extend bit in opcode 1648 int con = (int)$imm$$constant; // Throw away top bits 1649 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1650 // Emit r/m byte with secondary opcode, after primary opcode. 1651 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1652 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1653 else emit_d32(cbuf,con); 1654 %} 1655 1656 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1657 // Emit primary opcode and set sign-extend bit 1658 // Check for 8-bit immediate, and set sign extend bit in opcode 1659 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1660 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1661 // Emit r/m byte with tertiary opcode, after primary opcode. 1662 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1663 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1664 else emit_d32(cbuf,con); 1665 %} 1666 1667 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1668 emit_cc(cbuf, $secondary, $dst$$reg ); 1669 %} 1670 1671 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1672 int destlo = $dst$$reg; 1673 int desthi = HIGH_FROM_LOW_ENC(destlo); 1674 // bswap lo 1675 emit_opcode(cbuf, 0x0F); 1676 emit_cc(cbuf, 0xC8, destlo); 1677 // bswap hi 1678 emit_opcode(cbuf, 0x0F); 1679 emit_cc(cbuf, 0xC8, desthi); 1680 // xchg lo and hi 1681 emit_opcode(cbuf, 0x87); 1682 emit_rm(cbuf, 0x3, destlo, desthi); 1683 %} 1684 1685 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1686 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1687 %} 1688 1689 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1690 $$$emit8$primary; 1691 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1692 %} 1693 1694 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1695 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1696 emit_d8(cbuf, op >> 8 ); 1697 emit_d8(cbuf, op & 255); 1698 %} 1699 1700 // emulate a CMOV with a conditional branch around a MOV 1701 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1702 // Invert sense of branch from sense of CMOV 1703 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1704 emit_d8( cbuf, $brOffs$$constant ); 1705 %} 1706 1707 enc_class enc_PartialSubtypeCheck( ) %{ 1708 Register Redi = as_Register(EDI_enc); // result register 1709 Register Reax = as_Register(EAX_enc); // super class 1710 Register Recx = as_Register(ECX_enc); // killed 1711 Register Resi = as_Register(ESI_enc); // sub class 1712 Label miss; 1713 1714 MacroAssembler _masm(&cbuf); 1715 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1716 nullptr, &miss, 1717 /*set_cond_codes:*/ true); 1718 if ($primary) { 1719 __ xorptr(Redi, Redi); 1720 } 1721 __ bind(miss); 1722 %} 1723 1724 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1725 MacroAssembler masm(&cbuf); 1726 int start = masm.offset(); 1727 if (UseSSE >= 2) { 1728 if (VerifyFPU) { 1729 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1730 } 1731 } else { 1732 // External c_calling_convention expects the FPU stack to be 'clean'. 1733 // Compiled code leaves it dirty. Do cleanup now. 1734 masm.empty_FPU_stack(); 1735 } 1736 if (sizeof_FFree_Float_Stack_All == -1) { 1737 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1738 } else { 1739 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1740 } 1741 %} 1742 1743 enc_class Verify_FPU_For_Leaf %{ 1744 if( VerifyFPU ) { 1745 MacroAssembler masm(&cbuf); 1746 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1747 } 1748 %} 1749 1750 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1751 // This is the instruction starting address for relocation info. 1752 MacroAssembler _masm(&cbuf); 1753 cbuf.set_insts_mark(); 1754 $$$emit8$primary; 1755 // CALL directly to the runtime 1756 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1757 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1758 __ post_call_nop(); 1759 1760 if (UseSSE >= 2) { 1761 MacroAssembler _masm(&cbuf); 1762 BasicType rt = tf()->return_type(); 1763 1764 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1765 // A C runtime call where the return value is unused. In SSE2+ 1766 // mode the result needs to be removed from the FPU stack. It's 1767 // likely that this function call could be removed by the 1768 // optimizer if the C function is a pure function. 1769 __ ffree(0); 1770 } else if (rt == T_FLOAT) { 1771 __ lea(rsp, Address(rsp, -4)); 1772 __ fstp_s(Address(rsp, 0)); 1773 __ movflt(xmm0, Address(rsp, 0)); 1774 __ lea(rsp, Address(rsp, 4)); 1775 } else if (rt == T_DOUBLE) { 1776 __ lea(rsp, Address(rsp, -8)); 1777 __ fstp_d(Address(rsp, 0)); 1778 __ movdbl(xmm0, Address(rsp, 0)); 1779 __ lea(rsp, Address(rsp, 8)); 1780 } 1781 } 1782 %} 1783 1784 enc_class pre_call_resets %{ 1785 // If method sets FPU control word restore it here 1786 debug_only(int off0 = cbuf.insts_size()); 1787 if (ra_->C->in_24_bit_fp_mode()) { 1788 MacroAssembler _masm(&cbuf); 1789 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1790 } 1791 // Clear upper bits of YMM registers when current compiled code uses 1792 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1793 MacroAssembler _masm(&cbuf); 1794 __ vzeroupper(); 1795 debug_only(int off1 = cbuf.insts_size()); 1796 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1797 %} 1798 1799 enc_class post_call_FPU %{ 1800 // If method sets FPU control word do it here also 1801 if (Compile::current()->in_24_bit_fp_mode()) { 1802 MacroAssembler masm(&cbuf); 1803 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1804 } 1805 %} 1806 1807 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1808 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1809 // who we intended to call. 1810 MacroAssembler _masm(&cbuf); 1811 cbuf.set_insts_mark(); 1812 $$$emit8$primary; 1813 1814 if (!_method) { 1815 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1816 runtime_call_Relocation::spec(), 1817 RELOC_IMM32); 1818 __ post_call_nop(); 1819 } else { 1820 int method_index = resolved_method_index(cbuf); 1821 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1822 : static_call_Relocation::spec(method_index); 1823 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1824 rspec, RELOC_DISP32); 1825 __ post_call_nop(); 1826 address mark = cbuf.insts_mark(); 1827 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1828 // Calls of the same statically bound method can share 1829 // a stub to the interpreter. 1830 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off()); 1831 } else { 1832 // Emit stubs for static call. 1833 address stub = CompiledDirectCall::emit_to_interp_stub(cbuf, mark); 1834 if (stub == nullptr) { 1835 ciEnv::current()->record_failure("CodeCache is full"); 1836 return; 1837 } 1838 } 1839 } 1840 %} 1841 1842 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1843 MacroAssembler _masm(&cbuf); 1844 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1845 __ post_call_nop(); 1846 %} 1847 1848 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1849 int disp = in_bytes(Method::from_compiled_offset()); 1850 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1851 1852 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1853 MacroAssembler _masm(&cbuf); 1854 cbuf.set_insts_mark(); 1855 $$$emit8$primary; 1856 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1857 emit_d8(cbuf, disp); // Displacement 1858 __ post_call_nop(); 1859 %} 1860 1861 // Following encoding is no longer used, but may be restored if calling 1862 // convention changes significantly. 1863 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1864 // 1865 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1866 // // int ic_reg = Matcher::inline_cache_reg(); 1867 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1868 // // int imo_reg = Matcher::interpreter_method_reg(); 1869 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1870 // 1871 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1872 // // // so we load it immediately before the call 1873 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1874 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1875 // 1876 // // xor rbp,ebp 1877 // emit_opcode(cbuf, 0x33); 1878 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1879 // 1880 // // CALL to interpreter. 1881 // cbuf.set_insts_mark(); 1882 // $$$emit8$primary; 1883 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1884 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1885 // %} 1886 1887 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1888 $$$emit8$primary; 1889 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1890 $$$emit8$shift$$constant; 1891 %} 1892 1893 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1894 // Load immediate does not have a zero or sign extended version 1895 // for 8-bit immediates 1896 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1897 $$$emit32$src$$constant; 1898 %} 1899 1900 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1901 // Load immediate does not have a zero or sign extended version 1902 // for 8-bit immediates 1903 emit_opcode(cbuf, $primary + $dst$$reg); 1904 $$$emit32$src$$constant; 1905 %} 1906 1907 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1908 // Load immediate does not have a zero or sign extended version 1909 // for 8-bit immediates 1910 int dst_enc = $dst$$reg; 1911 int src_con = $src$$constant & 0x0FFFFFFFFL; 1912 if (src_con == 0) { 1913 // xor dst, dst 1914 emit_opcode(cbuf, 0x33); 1915 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1916 } else { 1917 emit_opcode(cbuf, $primary + dst_enc); 1918 emit_d32(cbuf, src_con); 1919 } 1920 %} 1921 1922 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1923 // Load immediate does not have a zero or sign extended version 1924 // for 8-bit immediates 1925 int dst_enc = $dst$$reg + 2; 1926 int src_con = ((julong)($src$$constant)) >> 32; 1927 if (src_con == 0) { 1928 // xor dst, dst 1929 emit_opcode(cbuf, 0x33); 1930 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1931 } else { 1932 emit_opcode(cbuf, $primary + dst_enc); 1933 emit_d32(cbuf, src_con); 1934 } 1935 %} 1936 1937 1938 // Encode a reg-reg copy. If it is useless, then empty encoding. 1939 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1940 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1941 %} 1942 1943 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1944 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1945 %} 1946 1947 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1948 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1949 %} 1950 1951 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1952 $$$emit8$primary; 1953 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1954 %} 1955 1956 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1957 $$$emit8$secondary; 1958 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1959 %} 1960 1961 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1962 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1963 %} 1964 1965 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1966 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1967 %} 1968 1969 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1970 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1971 %} 1972 1973 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1974 // Output immediate 1975 $$$emit32$src$$constant; 1976 %} 1977 1978 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1979 // Output Float immediate bits 1980 jfloat jf = $src$$constant; 1981 int jf_as_bits = jint_cast( jf ); 1982 emit_d32(cbuf, jf_as_bits); 1983 %} 1984 1985 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1986 // Output Float immediate bits 1987 jfloat jf = $src$$constant; 1988 int jf_as_bits = jint_cast( jf ); 1989 emit_d32(cbuf, jf_as_bits); 1990 %} 1991 1992 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1993 // Output immediate 1994 $$$emit16$src$$constant; 1995 %} 1996 1997 enc_class Con_d32(immI src) %{ 1998 emit_d32(cbuf,$src$$constant); 1999 %} 2000 2001 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2002 // Output immediate memory reference 2003 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2004 emit_d32(cbuf, 0x00); 2005 %} 2006 2007 enc_class lock_prefix( ) %{ 2008 emit_opcode(cbuf,0xF0); // [Lock] 2009 %} 2010 2011 // Cmp-xchg long value. 2012 // Note: we need to swap rbx, and rcx before and after the 2013 // cmpxchg8 instruction because the instruction uses 2014 // rcx as the high order word of the new value to store but 2015 // our register encoding uses rbx,. 2016 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2017 2018 // XCHG rbx,ecx 2019 emit_opcode(cbuf,0x87); 2020 emit_opcode(cbuf,0xD9); 2021 // [Lock] 2022 emit_opcode(cbuf,0xF0); 2023 // CMPXCHG8 [Eptr] 2024 emit_opcode(cbuf,0x0F); 2025 emit_opcode(cbuf,0xC7); 2026 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2027 // XCHG rbx,ecx 2028 emit_opcode(cbuf,0x87); 2029 emit_opcode(cbuf,0xD9); 2030 %} 2031 2032 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2033 // [Lock] 2034 emit_opcode(cbuf,0xF0); 2035 2036 // CMPXCHG [Eptr] 2037 emit_opcode(cbuf,0x0F); 2038 emit_opcode(cbuf,0xB1); 2039 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2040 %} 2041 2042 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2043 // [Lock] 2044 emit_opcode(cbuf,0xF0); 2045 2046 // CMPXCHGB [Eptr] 2047 emit_opcode(cbuf,0x0F); 2048 emit_opcode(cbuf,0xB0); 2049 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2050 %} 2051 2052 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2053 // [Lock] 2054 emit_opcode(cbuf,0xF0); 2055 2056 // 16-bit mode 2057 emit_opcode(cbuf, 0x66); 2058 2059 // CMPXCHGW [Eptr] 2060 emit_opcode(cbuf,0x0F); 2061 emit_opcode(cbuf,0xB1); 2062 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2063 %} 2064 2065 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2066 int res_encoding = $res$$reg; 2067 2068 // MOV res,0 2069 emit_opcode( cbuf, 0xB8 + res_encoding); 2070 emit_d32( cbuf, 0 ); 2071 // JNE,s fail 2072 emit_opcode(cbuf,0x75); 2073 emit_d8(cbuf, 5 ); 2074 // MOV res,1 2075 emit_opcode( cbuf, 0xB8 + res_encoding); 2076 emit_d32( cbuf, 1 ); 2077 // fail: 2078 %} 2079 2080 enc_class set_instruction_start( ) %{ 2081 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2082 %} 2083 2084 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2085 int reg_encoding = $ereg$$reg; 2086 int base = $mem$$base; 2087 int index = $mem$$index; 2088 int scale = $mem$$scale; 2089 int displace = $mem$$disp; 2090 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2091 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2092 %} 2093 2094 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2095 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2096 int base = $mem$$base; 2097 int index = $mem$$index; 2098 int scale = $mem$$scale; 2099 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2100 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2101 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2102 %} 2103 2104 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2105 int r1, r2; 2106 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2107 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2108 emit_opcode(cbuf,0x0F); 2109 emit_opcode(cbuf,$tertiary); 2110 emit_rm(cbuf, 0x3, r1, r2); 2111 emit_d8(cbuf,$cnt$$constant); 2112 emit_d8(cbuf,$primary); 2113 emit_rm(cbuf, 0x3, $secondary, r1); 2114 emit_d8(cbuf,$cnt$$constant); 2115 %} 2116 2117 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2118 emit_opcode( cbuf, 0x8B ); // Move 2119 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2120 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2121 emit_d8(cbuf,$primary); 2122 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2123 emit_d8(cbuf,$cnt$$constant-32); 2124 } 2125 emit_d8(cbuf,$primary); 2126 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2127 emit_d8(cbuf,31); 2128 %} 2129 2130 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2131 int r1, r2; 2132 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2133 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2134 2135 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2136 emit_rm(cbuf, 0x3, r1, r2); 2137 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2138 emit_opcode(cbuf,$primary); 2139 emit_rm(cbuf, 0x3, $secondary, r1); 2140 emit_d8(cbuf,$cnt$$constant-32); 2141 } 2142 emit_opcode(cbuf,0x33); // XOR r2,r2 2143 emit_rm(cbuf, 0x3, r2, r2); 2144 %} 2145 2146 // Clone of RegMem but accepts an extra parameter to access each 2147 // half of a double in memory; it never needs relocation info. 2148 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2149 emit_opcode(cbuf,$opcode$$constant); 2150 int reg_encoding = $rm_reg$$reg; 2151 int base = $mem$$base; 2152 int index = $mem$$index; 2153 int scale = $mem$$scale; 2154 int displace = $mem$$disp + $disp_for_half$$constant; 2155 relocInfo::relocType disp_reloc = relocInfo::none; 2156 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2157 %} 2158 2159 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2160 // 2161 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2162 // and it never needs relocation information. 2163 // Frequently used to move data between FPU's Stack Top and memory. 2164 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2165 int rm_byte_opcode = $rm_opcode$$constant; 2166 int base = $mem$$base; 2167 int index = $mem$$index; 2168 int scale = $mem$$scale; 2169 int displace = $mem$$disp; 2170 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2171 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2172 %} 2173 2174 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2175 int rm_byte_opcode = $rm_opcode$$constant; 2176 int base = $mem$$base; 2177 int index = $mem$$index; 2178 int scale = $mem$$scale; 2179 int displace = $mem$$disp; 2180 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2181 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2182 %} 2183 2184 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2185 int reg_encoding = $dst$$reg; 2186 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2187 int index = 0x04; // 0x04 indicates no index 2188 int scale = 0x00; // 0x00 indicates no scale 2189 int displace = $src1$$constant; // 0x00 indicates no displacement 2190 relocInfo::relocType disp_reloc = relocInfo::none; 2191 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2192 %} 2193 2194 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2195 // Compare dst,src 2196 emit_opcode(cbuf,0x3B); 2197 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2198 // jmp dst < src around move 2199 emit_opcode(cbuf,0x7C); 2200 emit_d8(cbuf,2); 2201 // move dst,src 2202 emit_opcode(cbuf,0x8B); 2203 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2204 %} 2205 2206 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2207 // Compare dst,src 2208 emit_opcode(cbuf,0x3B); 2209 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2210 // jmp dst > src around move 2211 emit_opcode(cbuf,0x7F); 2212 emit_d8(cbuf,2); 2213 // move dst,src 2214 emit_opcode(cbuf,0x8B); 2215 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2216 %} 2217 2218 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2219 // If src is FPR1, we can just FST to store it. 2220 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2221 int reg_encoding = 0x2; // Just store 2222 int base = $mem$$base; 2223 int index = $mem$$index; 2224 int scale = $mem$$scale; 2225 int displace = $mem$$disp; 2226 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2227 if( $src$$reg != FPR1L_enc ) { 2228 reg_encoding = 0x3; // Store & pop 2229 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2230 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2231 } 2232 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2233 emit_opcode(cbuf,$primary); 2234 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2235 %} 2236 2237 enc_class neg_reg(rRegI dst) %{ 2238 // NEG $dst 2239 emit_opcode(cbuf,0xF7); 2240 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2241 %} 2242 2243 enc_class setLT_reg(eCXRegI dst) %{ 2244 // SETLT $dst 2245 emit_opcode(cbuf,0x0F); 2246 emit_opcode(cbuf,0x9C); 2247 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2248 %} 2249 2250 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2251 int tmpReg = $tmp$$reg; 2252 2253 // SUB $p,$q 2254 emit_opcode(cbuf,0x2B); 2255 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2256 // SBB $tmp,$tmp 2257 emit_opcode(cbuf,0x1B); 2258 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2259 // AND $tmp,$y 2260 emit_opcode(cbuf,0x23); 2261 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2262 // ADD $p,$tmp 2263 emit_opcode(cbuf,0x03); 2264 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2265 %} 2266 2267 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2268 // TEST shift,32 2269 emit_opcode(cbuf,0xF7); 2270 emit_rm(cbuf, 0x3, 0, ECX_enc); 2271 emit_d32(cbuf,0x20); 2272 // JEQ,s small 2273 emit_opcode(cbuf, 0x74); 2274 emit_d8(cbuf, 0x04); 2275 // MOV $dst.hi,$dst.lo 2276 emit_opcode( cbuf, 0x8B ); 2277 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2278 // CLR $dst.lo 2279 emit_opcode(cbuf, 0x33); 2280 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2281 // small: 2282 // SHLD $dst.hi,$dst.lo,$shift 2283 emit_opcode(cbuf,0x0F); 2284 emit_opcode(cbuf,0xA5); 2285 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2286 // SHL $dst.lo,$shift" 2287 emit_opcode(cbuf,0xD3); 2288 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2289 %} 2290 2291 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2292 // TEST shift,32 2293 emit_opcode(cbuf,0xF7); 2294 emit_rm(cbuf, 0x3, 0, ECX_enc); 2295 emit_d32(cbuf,0x20); 2296 // JEQ,s small 2297 emit_opcode(cbuf, 0x74); 2298 emit_d8(cbuf, 0x04); 2299 // MOV $dst.lo,$dst.hi 2300 emit_opcode( cbuf, 0x8B ); 2301 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2302 // CLR $dst.hi 2303 emit_opcode(cbuf, 0x33); 2304 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2305 // small: 2306 // SHRD $dst.lo,$dst.hi,$shift 2307 emit_opcode(cbuf,0x0F); 2308 emit_opcode(cbuf,0xAD); 2309 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2310 // SHR $dst.hi,$shift" 2311 emit_opcode(cbuf,0xD3); 2312 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2313 %} 2314 2315 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2316 // TEST shift,32 2317 emit_opcode(cbuf,0xF7); 2318 emit_rm(cbuf, 0x3, 0, ECX_enc); 2319 emit_d32(cbuf,0x20); 2320 // JEQ,s small 2321 emit_opcode(cbuf, 0x74); 2322 emit_d8(cbuf, 0x05); 2323 // MOV $dst.lo,$dst.hi 2324 emit_opcode( cbuf, 0x8B ); 2325 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2326 // SAR $dst.hi,31 2327 emit_opcode(cbuf, 0xC1); 2328 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2329 emit_d8(cbuf, 0x1F ); 2330 // small: 2331 // SHRD $dst.lo,$dst.hi,$shift 2332 emit_opcode(cbuf,0x0F); 2333 emit_opcode(cbuf,0xAD); 2334 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2335 // SAR $dst.hi,$shift" 2336 emit_opcode(cbuf,0xD3); 2337 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2338 %} 2339 2340 2341 // ----------------- Encodings for floating point unit ----------------- 2342 // May leave result in FPU-TOS or FPU reg depending on opcodes 2343 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2344 $$$emit8$primary; 2345 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2346 %} 2347 2348 // Pop argument in FPR0 with FSTP ST(0) 2349 enc_class PopFPU() %{ 2350 emit_opcode( cbuf, 0xDD ); 2351 emit_d8( cbuf, 0xD8 ); 2352 %} 2353 2354 // !!!!! equivalent to Pop_Reg_F 2355 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2356 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2357 emit_d8( cbuf, 0xD8+$dst$$reg ); 2358 %} 2359 2360 enc_class Push_Reg_DPR( regDPR dst ) %{ 2361 emit_opcode( cbuf, 0xD9 ); 2362 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2363 %} 2364 2365 enc_class strictfp_bias1( regDPR dst ) %{ 2366 emit_opcode( cbuf, 0xDB ); // FLD m80real 2367 emit_opcode( cbuf, 0x2D ); 2368 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2369 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2370 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2371 %} 2372 2373 enc_class strictfp_bias2( regDPR dst ) %{ 2374 emit_opcode( cbuf, 0xDB ); // FLD m80real 2375 emit_opcode( cbuf, 0x2D ); 2376 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2377 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2378 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2379 %} 2380 2381 // Special case for moving an integer register to a stack slot. 2382 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2383 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2384 %} 2385 2386 // Special case for moving a register to a stack slot. 2387 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2388 // Opcode already emitted 2389 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2390 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2391 emit_d32(cbuf, $dst$$disp); // Displacement 2392 %} 2393 2394 // Push the integer in stackSlot 'src' onto FP-stack 2395 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2396 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2397 %} 2398 2399 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2400 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2401 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2402 %} 2403 2404 // Same as Pop_Mem_F except for opcode 2405 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2406 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2407 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2408 %} 2409 2410 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2411 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2412 emit_d8( cbuf, 0xD8+$dst$$reg ); 2413 %} 2414 2415 enc_class Push_Reg_FPR( regFPR dst ) %{ 2416 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2417 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2418 %} 2419 2420 // Push FPU's float to a stack-slot, and pop FPU-stack 2421 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2422 int pop = 0x02; 2423 if ($src$$reg != FPR1L_enc) { 2424 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2425 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2426 pop = 0x03; 2427 } 2428 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2429 %} 2430 2431 // Push FPU's double to a stack-slot, and pop FPU-stack 2432 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2433 int pop = 0x02; 2434 if ($src$$reg != FPR1L_enc) { 2435 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2436 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2437 pop = 0x03; 2438 } 2439 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2440 %} 2441 2442 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2443 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2444 int pop = 0xD0 - 1; // -1 since we skip FLD 2445 if ($src$$reg != FPR1L_enc) { 2446 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2447 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2448 pop = 0xD8; 2449 } 2450 emit_opcode( cbuf, 0xDD ); 2451 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2452 %} 2453 2454 2455 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2456 // load dst in FPR0 2457 emit_opcode( cbuf, 0xD9 ); 2458 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2459 if ($src$$reg != FPR1L_enc) { 2460 // fincstp 2461 emit_opcode (cbuf, 0xD9); 2462 emit_opcode (cbuf, 0xF7); 2463 // swap src with FPR1: 2464 // FXCH FPR1 with src 2465 emit_opcode(cbuf, 0xD9); 2466 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2467 // fdecstp 2468 emit_opcode (cbuf, 0xD9); 2469 emit_opcode (cbuf, 0xF6); 2470 } 2471 %} 2472 2473 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2474 MacroAssembler _masm(&cbuf); 2475 __ subptr(rsp, 8); 2476 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2477 __ fld_d(Address(rsp, 0)); 2478 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2479 __ fld_d(Address(rsp, 0)); 2480 %} 2481 2482 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2483 MacroAssembler _masm(&cbuf); 2484 __ subptr(rsp, 4); 2485 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2486 __ fld_s(Address(rsp, 0)); 2487 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2488 __ fld_s(Address(rsp, 0)); 2489 %} 2490 2491 enc_class Push_ResultD(regD dst) %{ 2492 MacroAssembler _masm(&cbuf); 2493 __ fstp_d(Address(rsp, 0)); 2494 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2495 __ addptr(rsp, 8); 2496 %} 2497 2498 enc_class Push_ResultF(regF dst, immI d8) %{ 2499 MacroAssembler _masm(&cbuf); 2500 __ fstp_s(Address(rsp, 0)); 2501 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2502 __ addptr(rsp, $d8$$constant); 2503 %} 2504 2505 enc_class Push_SrcD(regD src) %{ 2506 MacroAssembler _masm(&cbuf); 2507 __ subptr(rsp, 8); 2508 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2509 __ fld_d(Address(rsp, 0)); 2510 %} 2511 2512 enc_class push_stack_temp_qword() %{ 2513 MacroAssembler _masm(&cbuf); 2514 __ subptr(rsp, 8); 2515 %} 2516 2517 enc_class pop_stack_temp_qword() %{ 2518 MacroAssembler _masm(&cbuf); 2519 __ addptr(rsp, 8); 2520 %} 2521 2522 enc_class push_xmm_to_fpr1(regD src) %{ 2523 MacroAssembler _masm(&cbuf); 2524 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2525 __ fld_d(Address(rsp, 0)); 2526 %} 2527 2528 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2529 if ($src$$reg != FPR1L_enc) { 2530 // fincstp 2531 emit_opcode (cbuf, 0xD9); 2532 emit_opcode (cbuf, 0xF7); 2533 // FXCH FPR1 with src 2534 emit_opcode(cbuf, 0xD9); 2535 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2536 // fdecstp 2537 emit_opcode (cbuf, 0xD9); 2538 emit_opcode (cbuf, 0xF6); 2539 } 2540 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2541 // // FSTP FPR$dst$$reg 2542 // emit_opcode( cbuf, 0xDD ); 2543 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2544 %} 2545 2546 enc_class fnstsw_sahf_skip_parity() %{ 2547 // fnstsw ax 2548 emit_opcode( cbuf, 0xDF ); 2549 emit_opcode( cbuf, 0xE0 ); 2550 // sahf 2551 emit_opcode( cbuf, 0x9E ); 2552 // jnp ::skip 2553 emit_opcode( cbuf, 0x7B ); 2554 emit_opcode( cbuf, 0x05 ); 2555 %} 2556 2557 enc_class emitModDPR() %{ 2558 // fprem must be iterative 2559 // :: loop 2560 // fprem 2561 emit_opcode( cbuf, 0xD9 ); 2562 emit_opcode( cbuf, 0xF8 ); 2563 // wait 2564 emit_opcode( cbuf, 0x9b ); 2565 // fnstsw ax 2566 emit_opcode( cbuf, 0xDF ); 2567 emit_opcode( cbuf, 0xE0 ); 2568 // sahf 2569 emit_opcode( cbuf, 0x9E ); 2570 // jp ::loop 2571 emit_opcode( cbuf, 0x0F ); 2572 emit_opcode( cbuf, 0x8A ); 2573 emit_opcode( cbuf, 0xF4 ); 2574 emit_opcode( cbuf, 0xFF ); 2575 emit_opcode( cbuf, 0xFF ); 2576 emit_opcode( cbuf, 0xFF ); 2577 %} 2578 2579 enc_class fpu_flags() %{ 2580 // fnstsw_ax 2581 emit_opcode( cbuf, 0xDF); 2582 emit_opcode( cbuf, 0xE0); 2583 // test ax,0x0400 2584 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2585 emit_opcode( cbuf, 0xA9 ); 2586 emit_d16 ( cbuf, 0x0400 ); 2587 // // // This sequence works, but stalls for 12-16 cycles on PPro 2588 // // test rax,0x0400 2589 // emit_opcode( cbuf, 0xA9 ); 2590 // emit_d32 ( cbuf, 0x00000400 ); 2591 // 2592 // jz exit (no unordered comparison) 2593 emit_opcode( cbuf, 0x74 ); 2594 emit_d8 ( cbuf, 0x02 ); 2595 // mov ah,1 - treat as LT case (set carry flag) 2596 emit_opcode( cbuf, 0xB4 ); 2597 emit_d8 ( cbuf, 0x01 ); 2598 // sahf 2599 emit_opcode( cbuf, 0x9E); 2600 %} 2601 2602 enc_class cmpF_P6_fixup() %{ 2603 // Fixup the integer flags in case comparison involved a NaN 2604 // 2605 // JNP exit (no unordered comparison, P-flag is set by NaN) 2606 emit_opcode( cbuf, 0x7B ); 2607 emit_d8 ( cbuf, 0x03 ); 2608 // MOV AH,1 - treat as LT case (set carry flag) 2609 emit_opcode( cbuf, 0xB4 ); 2610 emit_d8 ( cbuf, 0x01 ); 2611 // SAHF 2612 emit_opcode( cbuf, 0x9E); 2613 // NOP // target for branch to avoid branch to branch 2614 emit_opcode( cbuf, 0x90); 2615 %} 2616 2617 // fnstsw_ax(); 2618 // sahf(); 2619 // movl(dst, nan_result); 2620 // jcc(Assembler::parity, exit); 2621 // movl(dst, less_result); 2622 // jcc(Assembler::below, exit); 2623 // movl(dst, equal_result); 2624 // jcc(Assembler::equal, exit); 2625 // movl(dst, greater_result); 2626 2627 // less_result = 1; 2628 // greater_result = -1; 2629 // equal_result = 0; 2630 // nan_result = -1; 2631 2632 enc_class CmpF_Result(rRegI dst) %{ 2633 // fnstsw_ax(); 2634 emit_opcode( cbuf, 0xDF); 2635 emit_opcode( cbuf, 0xE0); 2636 // sahf 2637 emit_opcode( cbuf, 0x9E); 2638 // movl(dst, nan_result); 2639 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2640 emit_d32( cbuf, -1 ); 2641 // jcc(Assembler::parity, exit); 2642 emit_opcode( cbuf, 0x7A ); 2643 emit_d8 ( cbuf, 0x13 ); 2644 // movl(dst, less_result); 2645 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2646 emit_d32( cbuf, -1 ); 2647 // jcc(Assembler::below, exit); 2648 emit_opcode( cbuf, 0x72 ); 2649 emit_d8 ( cbuf, 0x0C ); 2650 // movl(dst, equal_result); 2651 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2652 emit_d32( cbuf, 0 ); 2653 // jcc(Assembler::equal, exit); 2654 emit_opcode( cbuf, 0x74 ); 2655 emit_d8 ( cbuf, 0x05 ); 2656 // movl(dst, greater_result); 2657 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2658 emit_d32( cbuf, 1 ); 2659 %} 2660 2661 2662 // Compare the longs and set flags 2663 // BROKEN! Do Not use as-is 2664 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2665 // CMP $src1.hi,$src2.hi 2666 emit_opcode( cbuf, 0x3B ); 2667 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2668 // JNE,s done 2669 emit_opcode(cbuf,0x75); 2670 emit_d8(cbuf, 2 ); 2671 // CMP $src1.lo,$src2.lo 2672 emit_opcode( cbuf, 0x3B ); 2673 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2674 // done: 2675 %} 2676 2677 enc_class convert_int_long( regL dst, rRegI src ) %{ 2678 // mov $dst.lo,$src 2679 int dst_encoding = $dst$$reg; 2680 int src_encoding = $src$$reg; 2681 encode_Copy( cbuf, dst_encoding , src_encoding ); 2682 // mov $dst.hi,$src 2683 encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2684 // sar $dst.hi,31 2685 emit_opcode( cbuf, 0xC1 ); 2686 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2687 emit_d8(cbuf, 0x1F ); 2688 %} 2689 2690 enc_class convert_long_double( eRegL src ) %{ 2691 // push $src.hi 2692 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2693 // push $src.lo 2694 emit_opcode(cbuf, 0x50+$src$$reg ); 2695 // fild 64-bits at [SP] 2696 emit_opcode(cbuf,0xdf); 2697 emit_d8(cbuf, 0x6C); 2698 emit_d8(cbuf, 0x24); 2699 emit_d8(cbuf, 0x00); 2700 // pop stack 2701 emit_opcode(cbuf, 0x83); // add SP, #8 2702 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2703 emit_d8(cbuf, 0x8); 2704 %} 2705 2706 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2707 // IMUL EDX:EAX,$src1 2708 emit_opcode( cbuf, 0xF7 ); 2709 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2710 // SAR EDX,$cnt-32 2711 int shift_count = ((int)$cnt$$constant) - 32; 2712 if (shift_count > 0) { 2713 emit_opcode(cbuf, 0xC1); 2714 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2715 emit_d8(cbuf, shift_count); 2716 } 2717 %} 2718 2719 // this version doesn't have add sp, 8 2720 enc_class convert_long_double2( eRegL src ) %{ 2721 // push $src.hi 2722 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2723 // push $src.lo 2724 emit_opcode(cbuf, 0x50+$src$$reg ); 2725 // fild 64-bits at [SP] 2726 emit_opcode(cbuf,0xdf); 2727 emit_d8(cbuf, 0x6C); 2728 emit_d8(cbuf, 0x24); 2729 emit_d8(cbuf, 0x00); 2730 %} 2731 2732 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2733 // Basic idea: long = (long)int * (long)int 2734 // IMUL EDX:EAX, src 2735 emit_opcode( cbuf, 0xF7 ); 2736 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2737 %} 2738 2739 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2740 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2741 // MUL EDX:EAX, src 2742 emit_opcode( cbuf, 0xF7 ); 2743 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2744 %} 2745 2746 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2747 // Basic idea: lo(result) = lo(x_lo * y_lo) 2748 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2749 // MOV $tmp,$src.lo 2750 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2751 // IMUL $tmp,EDX 2752 emit_opcode( cbuf, 0x0F ); 2753 emit_opcode( cbuf, 0xAF ); 2754 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2755 // MOV EDX,$src.hi 2756 encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2757 // IMUL EDX,EAX 2758 emit_opcode( cbuf, 0x0F ); 2759 emit_opcode( cbuf, 0xAF ); 2760 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2761 // ADD $tmp,EDX 2762 emit_opcode( cbuf, 0x03 ); 2763 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2764 // MUL EDX:EAX,$src.lo 2765 emit_opcode( cbuf, 0xF7 ); 2766 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2767 // ADD EDX,ESI 2768 emit_opcode( cbuf, 0x03 ); 2769 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2770 %} 2771 2772 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2773 // Basic idea: lo(result) = lo(src * y_lo) 2774 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2775 // IMUL $tmp,EDX,$src 2776 emit_opcode( cbuf, 0x6B ); 2777 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2778 emit_d8( cbuf, (int)$src$$constant ); 2779 // MOV EDX,$src 2780 emit_opcode(cbuf, 0xB8 + EDX_enc); 2781 emit_d32( cbuf, (int)$src$$constant ); 2782 // MUL EDX:EAX,EDX 2783 emit_opcode( cbuf, 0xF7 ); 2784 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2785 // ADD EDX,ESI 2786 emit_opcode( cbuf, 0x03 ); 2787 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2788 %} 2789 2790 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2791 // PUSH src1.hi 2792 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2793 // PUSH src1.lo 2794 emit_opcode(cbuf, 0x50+$src1$$reg ); 2795 // PUSH src2.hi 2796 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2797 // PUSH src2.lo 2798 emit_opcode(cbuf, 0x50+$src2$$reg ); 2799 // CALL directly to the runtime 2800 MacroAssembler _masm(&cbuf); 2801 cbuf.set_insts_mark(); 2802 emit_opcode(cbuf,0xE8); // Call into runtime 2803 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2804 __ post_call_nop(); 2805 // Restore stack 2806 emit_opcode(cbuf, 0x83); // add SP, #framesize 2807 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2808 emit_d8(cbuf, 4*4); 2809 %} 2810 2811 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2812 // PUSH src1.hi 2813 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2814 // PUSH src1.lo 2815 emit_opcode(cbuf, 0x50+$src1$$reg ); 2816 // PUSH src2.hi 2817 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2818 // PUSH src2.lo 2819 emit_opcode(cbuf, 0x50+$src2$$reg ); 2820 // CALL directly to the runtime 2821 MacroAssembler _masm(&cbuf); 2822 cbuf.set_insts_mark(); 2823 emit_opcode(cbuf,0xE8); // Call into runtime 2824 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2825 __ post_call_nop(); 2826 // Restore stack 2827 emit_opcode(cbuf, 0x83); // add SP, #framesize 2828 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2829 emit_d8(cbuf, 4*4); 2830 %} 2831 2832 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2833 // MOV $tmp,$src.lo 2834 emit_opcode(cbuf, 0x8B); 2835 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2836 // OR $tmp,$src.hi 2837 emit_opcode(cbuf, 0x0B); 2838 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2839 %} 2840 2841 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2842 // CMP $src1.lo,$src2.lo 2843 emit_opcode( cbuf, 0x3B ); 2844 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2845 // JNE,s skip 2846 emit_cc(cbuf, 0x70, 0x5); 2847 emit_d8(cbuf,2); 2848 // CMP $src1.hi,$src2.hi 2849 emit_opcode( cbuf, 0x3B ); 2850 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2851 %} 2852 2853 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2854 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2855 emit_opcode( cbuf, 0x3B ); 2856 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2857 // MOV $tmp,$src1.hi 2858 emit_opcode( cbuf, 0x8B ); 2859 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2860 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2861 emit_opcode( cbuf, 0x1B ); 2862 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2863 %} 2864 2865 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2866 // XOR $tmp,$tmp 2867 emit_opcode(cbuf,0x33); // XOR 2868 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2869 // CMP $tmp,$src.lo 2870 emit_opcode( cbuf, 0x3B ); 2871 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2872 // SBB $tmp,$src.hi 2873 emit_opcode( cbuf, 0x1B ); 2874 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2875 %} 2876 2877 // Sniff, sniff... smells like Gnu Superoptimizer 2878 enc_class neg_long( eRegL dst ) %{ 2879 emit_opcode(cbuf,0xF7); // NEG hi 2880 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2881 emit_opcode(cbuf,0xF7); // NEG lo 2882 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2883 emit_opcode(cbuf,0x83); // SBB hi,0 2884 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2885 emit_d8 (cbuf,0 ); 2886 %} 2887 2888 enc_class enc_pop_rdx() %{ 2889 emit_opcode(cbuf,0x5A); 2890 %} 2891 2892 enc_class enc_rethrow() %{ 2893 MacroAssembler _masm(&cbuf); 2894 cbuf.set_insts_mark(); 2895 emit_opcode(cbuf, 0xE9); // jmp entry 2896 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2897 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2898 __ post_call_nop(); 2899 %} 2900 2901 2902 // Convert a double to an int. Java semantics require we do complex 2903 // manglelations in the corner cases. So we set the rounding mode to 2904 // 'zero', store the darned double down as an int, and reset the 2905 // rounding mode to 'nearest'. The hardware throws an exception which 2906 // patches up the correct value directly to the stack. 2907 enc_class DPR2I_encoding( regDPR src ) %{ 2908 // Flip to round-to-zero mode. We attempted to allow invalid-op 2909 // exceptions here, so that a NAN or other corner-case value will 2910 // thrown an exception (but normal values get converted at full speed). 2911 // However, I2C adapters and other float-stack manglers leave pending 2912 // invalid-op exceptions hanging. We would have to clear them before 2913 // enabling them and that is more expensive than just testing for the 2914 // invalid value Intel stores down in the corner cases. 2915 emit_opcode(cbuf,0xD9); // FLDCW trunc 2916 emit_opcode(cbuf,0x2D); 2917 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2918 // Allocate a word 2919 emit_opcode(cbuf,0x83); // SUB ESP,4 2920 emit_opcode(cbuf,0xEC); 2921 emit_d8(cbuf,0x04); 2922 // Encoding assumes a double has been pushed into FPR0. 2923 // Store down the double as an int, popping the FPU stack 2924 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2925 emit_opcode(cbuf,0x1C); 2926 emit_d8(cbuf,0x24); 2927 // Restore the rounding mode; mask the exception 2928 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2929 emit_opcode(cbuf,0x2D); 2930 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2931 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2932 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2933 2934 // Load the converted int; adjust CPU stack 2935 emit_opcode(cbuf,0x58); // POP EAX 2936 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2937 emit_d32 (cbuf,0x80000000); // 0x80000000 2938 emit_opcode(cbuf,0x75); // JNE around_slow_call 2939 emit_d8 (cbuf,0x07); // Size of slow_call 2940 // Push src onto stack slow-path 2941 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2942 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2943 // CALL directly to the runtime 2944 MacroAssembler _masm(&cbuf); 2945 cbuf.set_insts_mark(); 2946 emit_opcode(cbuf,0xE8); // Call into runtime 2947 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2948 __ post_call_nop(); 2949 // Carry on here... 2950 %} 2951 2952 enc_class DPR2L_encoding( regDPR src ) %{ 2953 emit_opcode(cbuf,0xD9); // FLDCW trunc 2954 emit_opcode(cbuf,0x2D); 2955 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2956 // Allocate a word 2957 emit_opcode(cbuf,0x83); // SUB ESP,8 2958 emit_opcode(cbuf,0xEC); 2959 emit_d8(cbuf,0x08); 2960 // Encoding assumes a double has been pushed into FPR0. 2961 // Store down the double as a long, popping the FPU stack 2962 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2963 emit_opcode(cbuf,0x3C); 2964 emit_d8(cbuf,0x24); 2965 // Restore the rounding mode; mask the exception 2966 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2967 emit_opcode(cbuf,0x2D); 2968 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2969 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2970 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2971 2972 // Load the converted int; adjust CPU stack 2973 emit_opcode(cbuf,0x58); // POP EAX 2974 emit_opcode(cbuf,0x5A); // POP EDX 2975 emit_opcode(cbuf,0x81); // CMP EDX,imm 2976 emit_d8 (cbuf,0xFA); // rdx 2977 emit_d32 (cbuf,0x80000000); // 0x80000000 2978 emit_opcode(cbuf,0x75); // JNE around_slow_call 2979 emit_d8 (cbuf,0x07+4); // Size of slow_call 2980 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2981 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2982 emit_opcode(cbuf,0x75); // JNE around_slow_call 2983 emit_d8 (cbuf,0x07); // Size of slow_call 2984 // Push src onto stack slow-path 2985 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2986 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2987 // CALL directly to the runtime 2988 MacroAssembler _masm(&cbuf); 2989 cbuf.set_insts_mark(); 2990 emit_opcode(cbuf,0xE8); // Call into runtime 2991 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2992 __ post_call_nop(); 2993 // Carry on here... 2994 %} 2995 2996 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2997 // Operand was loaded from memory into fp ST (stack top) 2998 // FMUL ST,$src /* D8 C8+i */ 2999 emit_opcode(cbuf, 0xD8); 3000 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3001 %} 3002 3003 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3004 // FADDP ST,src2 /* D8 C0+i */ 3005 emit_opcode(cbuf, 0xD8); 3006 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3007 //could use FADDP src2,fpST /* DE C0+i */ 3008 %} 3009 3010 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3011 // FADDP src2,ST /* DE C0+i */ 3012 emit_opcode(cbuf, 0xDE); 3013 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3014 %} 3015 3016 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3017 // Operand has been loaded into fp ST (stack top) 3018 // FSUB ST,$src1 3019 emit_opcode(cbuf, 0xD8); 3020 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3021 3022 // FDIV 3023 emit_opcode(cbuf, 0xD8); 3024 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3025 %} 3026 3027 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3028 // Operand was loaded from memory into fp ST (stack top) 3029 // FADD ST,$src /* D8 C0+i */ 3030 emit_opcode(cbuf, 0xD8); 3031 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3032 3033 // FMUL ST,src2 /* D8 C*+i */ 3034 emit_opcode(cbuf, 0xD8); 3035 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3036 %} 3037 3038 3039 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3040 // Operand was loaded from memory into fp ST (stack top) 3041 // FADD ST,$src /* D8 C0+i */ 3042 emit_opcode(cbuf, 0xD8); 3043 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3044 3045 // FMULP src2,ST /* DE C8+i */ 3046 emit_opcode(cbuf, 0xDE); 3047 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3048 %} 3049 3050 // Atomically load the volatile long 3051 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3052 emit_opcode(cbuf,0xDF); 3053 int rm_byte_opcode = 0x05; 3054 int base = $mem$$base; 3055 int index = $mem$$index; 3056 int scale = $mem$$scale; 3057 int displace = $mem$$disp; 3058 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3059 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3060 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3061 %} 3062 3063 // Volatile Store Long. Must be atomic, so move it into 3064 // the FP TOS and then do a 64-bit FIST. Has to probe the 3065 // target address before the store (for null-ptr checks) 3066 // so the memory operand is used twice in the encoding. 3067 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3068 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3069 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3070 emit_opcode(cbuf,0xDF); 3071 int rm_byte_opcode = 0x07; 3072 int base = $mem$$base; 3073 int index = $mem$$index; 3074 int scale = $mem$$scale; 3075 int displace = $mem$$disp; 3076 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3077 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3078 %} 3079 3080 %} 3081 3082 3083 //----------FRAME-------------------------------------------------------------- 3084 // Definition of frame structure and management information. 3085 // 3086 // S T A C K L A Y O U T Allocators stack-slot number 3087 // | (to get allocators register number 3088 // G Owned by | | v add OptoReg::stack0()) 3089 // r CALLER | | 3090 // o | +--------+ pad to even-align allocators stack-slot 3091 // w V | pad0 | numbers; owned by CALLER 3092 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3093 // h ^ | in | 5 3094 // | | args | 4 Holes in incoming args owned by SELF 3095 // | | | | 3 3096 // | | +--------+ 3097 // V | | old out| Empty on Intel, window on Sparc 3098 // | old |preserve| Must be even aligned. 3099 // | SP-+--------+----> Matcher::_old_SP, even aligned 3100 // | | in | 3 area for Intel ret address 3101 // Owned by |preserve| Empty on Sparc. 3102 // SELF +--------+ 3103 // | | pad2 | 2 pad to align old SP 3104 // | +--------+ 1 3105 // | | locks | 0 3106 // | +--------+----> OptoReg::stack0(), even aligned 3107 // | | pad1 | 11 pad to align new SP 3108 // | +--------+ 3109 // | | | 10 3110 // | | spills | 9 spills 3111 // V | | 8 (pad0 slot for callee) 3112 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3113 // ^ | out | 7 3114 // | | args | 6 Holes in outgoing args owned by CALLEE 3115 // Owned by +--------+ 3116 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3117 // | new |preserve| Must be even-aligned. 3118 // | SP-+--------+----> Matcher::_new_SP, even aligned 3119 // | | | 3120 // 3121 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3122 // known from SELF's arguments and the Java calling convention. 3123 // Region 6-7 is determined per call site. 3124 // Note 2: If the calling convention leaves holes in the incoming argument 3125 // area, those holes are owned by SELF. Holes in the outgoing area 3126 // are owned by the CALLEE. Holes should not be necessary in the 3127 // incoming area, as the Java calling convention is completely under 3128 // the control of the AD file. Doubles can be sorted and packed to 3129 // avoid holes. Holes in the outgoing arguments may be necessary for 3130 // varargs C calling conventions. 3131 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3132 // even aligned with pad0 as needed. 3133 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3134 // region 6-11 is even aligned; it may be padded out more so that 3135 // the region from SP to FP meets the minimum stack alignment. 3136 3137 frame %{ 3138 // These three registers define part of the calling convention 3139 // between compiled code and the interpreter. 3140 inline_cache_reg(EAX); // Inline Cache Register 3141 3142 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3143 cisc_spilling_operand_name(indOffset32); 3144 3145 // Number of stack slots consumed by locking an object 3146 sync_stack_slots(1); 3147 3148 // Compiled code's Frame Pointer 3149 frame_pointer(ESP); 3150 // Interpreter stores its frame pointer in a register which is 3151 // stored to the stack by I2CAdaptors. 3152 // I2CAdaptors convert from interpreted java to compiled java. 3153 interpreter_frame_pointer(EBP); 3154 3155 // Stack alignment requirement 3156 // Alignment size in bytes (128-bit -> 16 bytes) 3157 stack_alignment(StackAlignmentInBytes); 3158 3159 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3160 // for calls to C. Supports the var-args backing area for register parms. 3161 varargs_C_out_slots_killed(0); 3162 3163 // The after-PROLOG location of the return address. Location of 3164 // return address specifies a type (REG or STACK) and a number 3165 // representing the register number (i.e. - use a register name) or 3166 // stack slot. 3167 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3168 // Otherwise, it is above the locks and verification slot and alignment word 3169 return_addr(STACK - 1 + 3170 align_up((Compile::current()->in_preserve_stack_slots() + 3171 Compile::current()->fixed_slots()), 3172 stack_alignment_in_slots())); 3173 3174 // Location of C & interpreter return values 3175 c_return_value %{ 3176 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3177 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3178 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3179 3180 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3181 // that C functions return float and double results in XMM0. 3182 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3183 return OptoRegPair(XMM0b_num,XMM0_num); 3184 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3185 return OptoRegPair(OptoReg::Bad,XMM0_num); 3186 3187 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3188 %} 3189 3190 // Location of return values 3191 return_value %{ 3192 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3193 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3194 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3195 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3196 return OptoRegPair(XMM0b_num,XMM0_num); 3197 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3198 return OptoRegPair(OptoReg::Bad,XMM0_num); 3199 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3200 %} 3201 3202 %} 3203 3204 //----------ATTRIBUTES--------------------------------------------------------- 3205 //----------Operand Attributes------------------------------------------------- 3206 op_attrib op_cost(0); // Required cost attribute 3207 3208 //----------Instruction Attributes--------------------------------------------- 3209 ins_attrib ins_cost(100); // Required cost attribute 3210 ins_attrib ins_size(8); // Required size attribute (in bits) 3211 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3212 // non-matching short branch variant of some 3213 // long branch? 3214 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3215 // specifies the alignment that some part of the instruction (not 3216 // necessarily the start) requires. If > 1, a compute_padding() 3217 // function must be provided for the instruction 3218 3219 //----------OPERANDS----------------------------------------------------------- 3220 // Operand definitions must precede instruction definitions for correct parsing 3221 // in the ADLC because operands constitute user defined types which are used in 3222 // instruction definitions. 3223 3224 //----------Simple Operands---------------------------------------------------- 3225 // Immediate Operands 3226 // Integer Immediate 3227 operand immI() %{ 3228 match(ConI); 3229 3230 op_cost(10); 3231 format %{ %} 3232 interface(CONST_INTER); 3233 %} 3234 3235 // Constant for test vs zero 3236 operand immI_0() %{ 3237 predicate(n->get_int() == 0); 3238 match(ConI); 3239 3240 op_cost(0); 3241 format %{ %} 3242 interface(CONST_INTER); 3243 %} 3244 3245 // Constant for increment 3246 operand immI_1() %{ 3247 predicate(n->get_int() == 1); 3248 match(ConI); 3249 3250 op_cost(0); 3251 format %{ %} 3252 interface(CONST_INTER); 3253 %} 3254 3255 // Constant for decrement 3256 operand immI_M1() %{ 3257 predicate(n->get_int() == -1); 3258 match(ConI); 3259 3260 op_cost(0); 3261 format %{ %} 3262 interface(CONST_INTER); 3263 %} 3264 3265 // Valid scale values for addressing modes 3266 operand immI2() %{ 3267 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3268 match(ConI); 3269 3270 format %{ %} 3271 interface(CONST_INTER); 3272 %} 3273 3274 operand immI8() %{ 3275 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3276 match(ConI); 3277 3278 op_cost(5); 3279 format %{ %} 3280 interface(CONST_INTER); 3281 %} 3282 3283 operand immU8() %{ 3284 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3285 match(ConI); 3286 3287 op_cost(5); 3288 format %{ %} 3289 interface(CONST_INTER); 3290 %} 3291 3292 operand immI16() %{ 3293 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3294 match(ConI); 3295 3296 op_cost(10); 3297 format %{ %} 3298 interface(CONST_INTER); 3299 %} 3300 3301 // Int Immediate non-negative 3302 operand immU31() 3303 %{ 3304 predicate(n->get_int() >= 0); 3305 match(ConI); 3306 3307 op_cost(0); 3308 format %{ %} 3309 interface(CONST_INTER); 3310 %} 3311 3312 // Constant for long shifts 3313 operand immI_32() %{ 3314 predicate( n->get_int() == 32 ); 3315 match(ConI); 3316 3317 op_cost(0); 3318 format %{ %} 3319 interface(CONST_INTER); 3320 %} 3321 3322 operand immI_1_31() %{ 3323 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3324 match(ConI); 3325 3326 op_cost(0); 3327 format %{ %} 3328 interface(CONST_INTER); 3329 %} 3330 3331 operand immI_32_63() %{ 3332 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3333 match(ConI); 3334 op_cost(0); 3335 3336 format %{ %} 3337 interface(CONST_INTER); 3338 %} 3339 3340 operand immI_2() %{ 3341 predicate( n->get_int() == 2 ); 3342 match(ConI); 3343 3344 op_cost(0); 3345 format %{ %} 3346 interface(CONST_INTER); 3347 %} 3348 3349 operand immI_3() %{ 3350 predicate( n->get_int() == 3 ); 3351 match(ConI); 3352 3353 op_cost(0); 3354 format %{ %} 3355 interface(CONST_INTER); 3356 %} 3357 3358 operand immI_4() 3359 %{ 3360 predicate(n->get_int() == 4); 3361 match(ConI); 3362 3363 op_cost(0); 3364 format %{ %} 3365 interface(CONST_INTER); 3366 %} 3367 3368 operand immI_8() 3369 %{ 3370 predicate(n->get_int() == 8); 3371 match(ConI); 3372 3373 op_cost(0); 3374 format %{ %} 3375 interface(CONST_INTER); 3376 %} 3377 3378 // Pointer Immediate 3379 operand immP() %{ 3380 match(ConP); 3381 3382 op_cost(10); 3383 format %{ %} 3384 interface(CONST_INTER); 3385 %} 3386 3387 // Null Pointer Immediate 3388 operand immP0() %{ 3389 predicate( n->get_ptr() == 0 ); 3390 match(ConP); 3391 op_cost(0); 3392 3393 format %{ %} 3394 interface(CONST_INTER); 3395 %} 3396 3397 // Long Immediate 3398 operand immL() %{ 3399 match(ConL); 3400 3401 op_cost(20); 3402 format %{ %} 3403 interface(CONST_INTER); 3404 %} 3405 3406 // Long Immediate zero 3407 operand immL0() %{ 3408 predicate( n->get_long() == 0L ); 3409 match(ConL); 3410 op_cost(0); 3411 3412 format %{ %} 3413 interface(CONST_INTER); 3414 %} 3415 3416 // Long Immediate zero 3417 operand immL_M1() %{ 3418 predicate( n->get_long() == -1L ); 3419 match(ConL); 3420 op_cost(0); 3421 3422 format %{ %} 3423 interface(CONST_INTER); 3424 %} 3425 3426 // Long immediate from 0 to 127. 3427 // Used for a shorter form of long mul by 10. 3428 operand immL_127() %{ 3429 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3430 match(ConL); 3431 op_cost(0); 3432 3433 format %{ %} 3434 interface(CONST_INTER); 3435 %} 3436 3437 // Long Immediate: low 32-bit mask 3438 operand immL_32bits() %{ 3439 predicate(n->get_long() == 0xFFFFFFFFL); 3440 match(ConL); 3441 op_cost(0); 3442 3443 format %{ %} 3444 interface(CONST_INTER); 3445 %} 3446 3447 // Long Immediate: low 32-bit mask 3448 operand immL32() %{ 3449 predicate(n->get_long() == (int)(n->get_long())); 3450 match(ConL); 3451 op_cost(20); 3452 3453 format %{ %} 3454 interface(CONST_INTER); 3455 %} 3456 3457 //Double Immediate zero 3458 operand immDPR0() %{ 3459 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3460 // bug that generates code such that NaNs compare equal to 0.0 3461 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3462 match(ConD); 3463 3464 op_cost(5); 3465 format %{ %} 3466 interface(CONST_INTER); 3467 %} 3468 3469 // Double Immediate one 3470 operand immDPR1() %{ 3471 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3472 match(ConD); 3473 3474 op_cost(5); 3475 format %{ %} 3476 interface(CONST_INTER); 3477 %} 3478 3479 // Double Immediate 3480 operand immDPR() %{ 3481 predicate(UseSSE<=1); 3482 match(ConD); 3483 3484 op_cost(5); 3485 format %{ %} 3486 interface(CONST_INTER); 3487 %} 3488 3489 operand immD() %{ 3490 predicate(UseSSE>=2); 3491 match(ConD); 3492 3493 op_cost(5); 3494 format %{ %} 3495 interface(CONST_INTER); 3496 %} 3497 3498 // Double Immediate zero 3499 operand immD0() %{ 3500 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3501 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3502 // compare equal to -0.0. 3503 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3504 match(ConD); 3505 3506 format %{ %} 3507 interface(CONST_INTER); 3508 %} 3509 3510 // Float Immediate zero 3511 operand immFPR0() %{ 3512 predicate(UseSSE == 0 && n->getf() == 0.0F); 3513 match(ConF); 3514 3515 op_cost(5); 3516 format %{ %} 3517 interface(CONST_INTER); 3518 %} 3519 3520 // Float Immediate one 3521 operand immFPR1() %{ 3522 predicate(UseSSE == 0 && n->getf() == 1.0F); 3523 match(ConF); 3524 3525 op_cost(5); 3526 format %{ %} 3527 interface(CONST_INTER); 3528 %} 3529 3530 // Float Immediate 3531 operand immFPR() %{ 3532 predicate( UseSSE == 0 ); 3533 match(ConF); 3534 3535 op_cost(5); 3536 format %{ %} 3537 interface(CONST_INTER); 3538 %} 3539 3540 // Float Immediate 3541 operand immF() %{ 3542 predicate(UseSSE >= 1); 3543 match(ConF); 3544 3545 op_cost(5); 3546 format %{ %} 3547 interface(CONST_INTER); 3548 %} 3549 3550 // Float Immediate zero. Zero and not -0.0 3551 operand immF0() %{ 3552 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3553 match(ConF); 3554 3555 op_cost(5); 3556 format %{ %} 3557 interface(CONST_INTER); 3558 %} 3559 3560 // Immediates for special shifts (sign extend) 3561 3562 // Constants for increment 3563 operand immI_16() %{ 3564 predicate( n->get_int() == 16 ); 3565 match(ConI); 3566 3567 format %{ %} 3568 interface(CONST_INTER); 3569 %} 3570 3571 operand immI_24() %{ 3572 predicate( n->get_int() == 24 ); 3573 match(ConI); 3574 3575 format %{ %} 3576 interface(CONST_INTER); 3577 %} 3578 3579 // Constant for byte-wide masking 3580 operand immI_255() %{ 3581 predicate( n->get_int() == 255 ); 3582 match(ConI); 3583 3584 format %{ %} 3585 interface(CONST_INTER); 3586 %} 3587 3588 // Constant for short-wide masking 3589 operand immI_65535() %{ 3590 predicate(n->get_int() == 65535); 3591 match(ConI); 3592 3593 format %{ %} 3594 interface(CONST_INTER); 3595 %} 3596 3597 operand kReg() 3598 %{ 3599 constraint(ALLOC_IN_RC(vectmask_reg)); 3600 match(RegVectMask); 3601 format %{%} 3602 interface(REG_INTER); 3603 %} 3604 3605 // Register Operands 3606 // Integer Register 3607 operand rRegI() %{ 3608 constraint(ALLOC_IN_RC(int_reg)); 3609 match(RegI); 3610 match(xRegI); 3611 match(eAXRegI); 3612 match(eBXRegI); 3613 match(eCXRegI); 3614 match(eDXRegI); 3615 match(eDIRegI); 3616 match(eSIRegI); 3617 3618 format %{ %} 3619 interface(REG_INTER); 3620 %} 3621 3622 // Subset of Integer Register 3623 operand xRegI(rRegI reg) %{ 3624 constraint(ALLOC_IN_RC(int_x_reg)); 3625 match(reg); 3626 match(eAXRegI); 3627 match(eBXRegI); 3628 match(eCXRegI); 3629 match(eDXRegI); 3630 3631 format %{ %} 3632 interface(REG_INTER); 3633 %} 3634 3635 // Special Registers 3636 operand eAXRegI(xRegI reg) %{ 3637 constraint(ALLOC_IN_RC(eax_reg)); 3638 match(reg); 3639 match(rRegI); 3640 3641 format %{ "EAX" %} 3642 interface(REG_INTER); 3643 %} 3644 3645 // Special Registers 3646 operand eBXRegI(xRegI reg) %{ 3647 constraint(ALLOC_IN_RC(ebx_reg)); 3648 match(reg); 3649 match(rRegI); 3650 3651 format %{ "EBX" %} 3652 interface(REG_INTER); 3653 %} 3654 3655 operand eCXRegI(xRegI reg) %{ 3656 constraint(ALLOC_IN_RC(ecx_reg)); 3657 match(reg); 3658 match(rRegI); 3659 3660 format %{ "ECX" %} 3661 interface(REG_INTER); 3662 %} 3663 3664 operand eDXRegI(xRegI reg) %{ 3665 constraint(ALLOC_IN_RC(edx_reg)); 3666 match(reg); 3667 match(rRegI); 3668 3669 format %{ "EDX" %} 3670 interface(REG_INTER); 3671 %} 3672 3673 operand eDIRegI(xRegI reg) %{ 3674 constraint(ALLOC_IN_RC(edi_reg)); 3675 match(reg); 3676 match(rRegI); 3677 3678 format %{ "EDI" %} 3679 interface(REG_INTER); 3680 %} 3681 3682 operand nadxRegI() %{ 3683 constraint(ALLOC_IN_RC(nadx_reg)); 3684 match(RegI); 3685 match(eBXRegI); 3686 match(eCXRegI); 3687 match(eSIRegI); 3688 match(eDIRegI); 3689 3690 format %{ %} 3691 interface(REG_INTER); 3692 %} 3693 3694 operand ncxRegI() %{ 3695 constraint(ALLOC_IN_RC(ncx_reg)); 3696 match(RegI); 3697 match(eAXRegI); 3698 match(eDXRegI); 3699 match(eSIRegI); 3700 match(eDIRegI); 3701 3702 format %{ %} 3703 interface(REG_INTER); 3704 %} 3705 3706 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3707 // // 3708 operand eSIRegI(xRegI reg) %{ 3709 constraint(ALLOC_IN_RC(esi_reg)); 3710 match(reg); 3711 match(rRegI); 3712 3713 format %{ "ESI" %} 3714 interface(REG_INTER); 3715 %} 3716 3717 // Pointer Register 3718 operand anyRegP() %{ 3719 constraint(ALLOC_IN_RC(any_reg)); 3720 match(RegP); 3721 match(eAXRegP); 3722 match(eBXRegP); 3723 match(eCXRegP); 3724 match(eDIRegP); 3725 match(eRegP); 3726 3727 format %{ %} 3728 interface(REG_INTER); 3729 %} 3730 3731 operand eRegP() %{ 3732 constraint(ALLOC_IN_RC(int_reg)); 3733 match(RegP); 3734 match(eAXRegP); 3735 match(eBXRegP); 3736 match(eCXRegP); 3737 match(eDIRegP); 3738 3739 format %{ %} 3740 interface(REG_INTER); 3741 %} 3742 3743 operand rRegP() %{ 3744 constraint(ALLOC_IN_RC(int_reg)); 3745 match(RegP); 3746 match(eAXRegP); 3747 match(eBXRegP); 3748 match(eCXRegP); 3749 match(eDIRegP); 3750 3751 format %{ %} 3752 interface(REG_INTER); 3753 %} 3754 3755 // On windows95, EBP is not safe to use for implicit null tests. 3756 operand eRegP_no_EBP() %{ 3757 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3758 match(RegP); 3759 match(eAXRegP); 3760 match(eBXRegP); 3761 match(eCXRegP); 3762 match(eDIRegP); 3763 3764 op_cost(100); 3765 format %{ %} 3766 interface(REG_INTER); 3767 %} 3768 3769 operand pRegP() %{ 3770 constraint(ALLOC_IN_RC(p_reg)); 3771 match(RegP); 3772 match(eBXRegP); 3773 match(eDXRegP); 3774 match(eSIRegP); 3775 match(eDIRegP); 3776 3777 format %{ %} 3778 interface(REG_INTER); 3779 %} 3780 3781 // Special Registers 3782 // Return a pointer value 3783 operand eAXRegP(eRegP reg) %{ 3784 constraint(ALLOC_IN_RC(eax_reg)); 3785 match(reg); 3786 format %{ "EAX" %} 3787 interface(REG_INTER); 3788 %} 3789 3790 // Used in AtomicAdd 3791 operand eBXRegP(eRegP reg) %{ 3792 constraint(ALLOC_IN_RC(ebx_reg)); 3793 match(reg); 3794 format %{ "EBX" %} 3795 interface(REG_INTER); 3796 %} 3797 3798 // Tail-call (interprocedural jump) to interpreter 3799 operand eCXRegP(eRegP reg) %{ 3800 constraint(ALLOC_IN_RC(ecx_reg)); 3801 match(reg); 3802 format %{ "ECX" %} 3803 interface(REG_INTER); 3804 %} 3805 3806 operand eDXRegP(eRegP reg) %{ 3807 constraint(ALLOC_IN_RC(edx_reg)); 3808 match(reg); 3809 format %{ "EDX" %} 3810 interface(REG_INTER); 3811 %} 3812 3813 operand eSIRegP(eRegP reg) %{ 3814 constraint(ALLOC_IN_RC(esi_reg)); 3815 match(reg); 3816 format %{ "ESI" %} 3817 interface(REG_INTER); 3818 %} 3819 3820 // Used in rep stosw 3821 operand eDIRegP(eRegP reg) %{ 3822 constraint(ALLOC_IN_RC(edi_reg)); 3823 match(reg); 3824 format %{ "EDI" %} 3825 interface(REG_INTER); 3826 %} 3827 3828 operand eRegL() %{ 3829 constraint(ALLOC_IN_RC(long_reg)); 3830 match(RegL); 3831 match(eADXRegL); 3832 3833 format %{ %} 3834 interface(REG_INTER); 3835 %} 3836 3837 operand eADXRegL( eRegL reg ) %{ 3838 constraint(ALLOC_IN_RC(eadx_reg)); 3839 match(reg); 3840 3841 format %{ "EDX:EAX" %} 3842 interface(REG_INTER); 3843 %} 3844 3845 operand eBCXRegL( eRegL reg ) %{ 3846 constraint(ALLOC_IN_RC(ebcx_reg)); 3847 match(reg); 3848 3849 format %{ "EBX:ECX" %} 3850 interface(REG_INTER); 3851 %} 3852 3853 operand eBDPRegL( eRegL reg ) %{ 3854 constraint(ALLOC_IN_RC(ebpd_reg)); 3855 match(reg); 3856 3857 format %{ "EBP:EDI" %} 3858 interface(REG_INTER); 3859 %} 3860 // Special case for integer high multiply 3861 operand eADXRegL_low_only() %{ 3862 constraint(ALLOC_IN_RC(eadx_reg)); 3863 match(RegL); 3864 3865 format %{ "EAX" %} 3866 interface(REG_INTER); 3867 %} 3868 3869 // Flags register, used as output of compare instructions 3870 operand rFlagsReg() %{ 3871 constraint(ALLOC_IN_RC(int_flags)); 3872 match(RegFlags); 3873 3874 format %{ "EFLAGS" %} 3875 interface(REG_INTER); 3876 %} 3877 3878 // Flags register, used as output of compare instructions 3879 operand eFlagsReg() %{ 3880 constraint(ALLOC_IN_RC(int_flags)); 3881 match(RegFlags); 3882 3883 format %{ "EFLAGS" %} 3884 interface(REG_INTER); 3885 %} 3886 3887 // Flags register, used as output of FLOATING POINT compare instructions 3888 operand eFlagsRegU() %{ 3889 constraint(ALLOC_IN_RC(int_flags)); 3890 match(RegFlags); 3891 3892 format %{ "EFLAGS_U" %} 3893 interface(REG_INTER); 3894 %} 3895 3896 operand eFlagsRegUCF() %{ 3897 constraint(ALLOC_IN_RC(int_flags)); 3898 match(RegFlags); 3899 predicate(false); 3900 3901 format %{ "EFLAGS_U_CF" %} 3902 interface(REG_INTER); 3903 %} 3904 3905 // Condition Code Register used by long compare 3906 operand flagsReg_long_LTGE() %{ 3907 constraint(ALLOC_IN_RC(int_flags)); 3908 match(RegFlags); 3909 format %{ "FLAGS_LTGE" %} 3910 interface(REG_INTER); 3911 %} 3912 operand flagsReg_long_EQNE() %{ 3913 constraint(ALLOC_IN_RC(int_flags)); 3914 match(RegFlags); 3915 format %{ "FLAGS_EQNE" %} 3916 interface(REG_INTER); 3917 %} 3918 operand flagsReg_long_LEGT() %{ 3919 constraint(ALLOC_IN_RC(int_flags)); 3920 match(RegFlags); 3921 format %{ "FLAGS_LEGT" %} 3922 interface(REG_INTER); 3923 %} 3924 3925 // Condition Code Register used by unsigned long compare 3926 operand flagsReg_ulong_LTGE() %{ 3927 constraint(ALLOC_IN_RC(int_flags)); 3928 match(RegFlags); 3929 format %{ "FLAGS_U_LTGE" %} 3930 interface(REG_INTER); 3931 %} 3932 operand flagsReg_ulong_EQNE() %{ 3933 constraint(ALLOC_IN_RC(int_flags)); 3934 match(RegFlags); 3935 format %{ "FLAGS_U_EQNE" %} 3936 interface(REG_INTER); 3937 %} 3938 operand flagsReg_ulong_LEGT() %{ 3939 constraint(ALLOC_IN_RC(int_flags)); 3940 match(RegFlags); 3941 format %{ "FLAGS_U_LEGT" %} 3942 interface(REG_INTER); 3943 %} 3944 3945 // Float register operands 3946 operand regDPR() %{ 3947 predicate( UseSSE < 2 ); 3948 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3949 match(RegD); 3950 match(regDPR1); 3951 match(regDPR2); 3952 format %{ %} 3953 interface(REG_INTER); 3954 %} 3955 3956 operand regDPR1(regDPR reg) %{ 3957 predicate( UseSSE < 2 ); 3958 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 3959 match(reg); 3960 format %{ "FPR1" %} 3961 interface(REG_INTER); 3962 %} 3963 3964 operand regDPR2(regDPR reg) %{ 3965 predicate( UseSSE < 2 ); 3966 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 3967 match(reg); 3968 format %{ "FPR2" %} 3969 interface(REG_INTER); 3970 %} 3971 3972 operand regnotDPR1(regDPR reg) %{ 3973 predicate( UseSSE < 2 ); 3974 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 3975 match(reg); 3976 format %{ %} 3977 interface(REG_INTER); 3978 %} 3979 3980 // Float register operands 3981 operand regFPR() %{ 3982 predicate( UseSSE < 2 ); 3983 constraint(ALLOC_IN_RC(fp_flt_reg)); 3984 match(RegF); 3985 match(regFPR1); 3986 format %{ %} 3987 interface(REG_INTER); 3988 %} 3989 3990 // Float register operands 3991 operand regFPR1(regFPR reg) %{ 3992 predicate( UseSSE < 2 ); 3993 constraint(ALLOC_IN_RC(fp_flt_reg0)); 3994 match(reg); 3995 format %{ "FPR1" %} 3996 interface(REG_INTER); 3997 %} 3998 3999 // XMM Float register operands 4000 operand regF() %{ 4001 predicate( UseSSE>=1 ); 4002 constraint(ALLOC_IN_RC(float_reg_legacy)); 4003 match(RegF); 4004 format %{ %} 4005 interface(REG_INTER); 4006 %} 4007 4008 operand legRegF() %{ 4009 predicate( UseSSE>=1 ); 4010 constraint(ALLOC_IN_RC(float_reg_legacy)); 4011 match(RegF); 4012 format %{ %} 4013 interface(REG_INTER); 4014 %} 4015 4016 // Float register operands 4017 operand vlRegF() %{ 4018 constraint(ALLOC_IN_RC(float_reg_vl)); 4019 match(RegF); 4020 4021 format %{ %} 4022 interface(REG_INTER); 4023 %} 4024 4025 // XMM Double register operands 4026 operand regD() %{ 4027 predicate( UseSSE>=2 ); 4028 constraint(ALLOC_IN_RC(double_reg_legacy)); 4029 match(RegD); 4030 format %{ %} 4031 interface(REG_INTER); 4032 %} 4033 4034 // Double register operands 4035 operand legRegD() %{ 4036 predicate( UseSSE>=2 ); 4037 constraint(ALLOC_IN_RC(double_reg_legacy)); 4038 match(RegD); 4039 format %{ %} 4040 interface(REG_INTER); 4041 %} 4042 4043 operand vlRegD() %{ 4044 constraint(ALLOC_IN_RC(double_reg_vl)); 4045 match(RegD); 4046 4047 format %{ %} 4048 interface(REG_INTER); 4049 %} 4050 4051 //----------Memory Operands---------------------------------------------------- 4052 // Direct Memory Operand 4053 operand direct(immP addr) %{ 4054 match(addr); 4055 4056 format %{ "[$addr]" %} 4057 interface(MEMORY_INTER) %{ 4058 base(0xFFFFFFFF); 4059 index(0x4); 4060 scale(0x0); 4061 disp($addr); 4062 %} 4063 %} 4064 4065 // Indirect Memory Operand 4066 operand indirect(eRegP reg) %{ 4067 constraint(ALLOC_IN_RC(int_reg)); 4068 match(reg); 4069 4070 format %{ "[$reg]" %} 4071 interface(MEMORY_INTER) %{ 4072 base($reg); 4073 index(0x4); 4074 scale(0x0); 4075 disp(0x0); 4076 %} 4077 %} 4078 4079 // Indirect Memory Plus Short Offset Operand 4080 operand indOffset8(eRegP reg, immI8 off) %{ 4081 match(AddP reg off); 4082 4083 format %{ "[$reg + $off]" %} 4084 interface(MEMORY_INTER) %{ 4085 base($reg); 4086 index(0x4); 4087 scale(0x0); 4088 disp($off); 4089 %} 4090 %} 4091 4092 // Indirect Memory Plus Long Offset Operand 4093 operand indOffset32(eRegP reg, immI off) %{ 4094 match(AddP reg off); 4095 4096 format %{ "[$reg + $off]" %} 4097 interface(MEMORY_INTER) %{ 4098 base($reg); 4099 index(0x4); 4100 scale(0x0); 4101 disp($off); 4102 %} 4103 %} 4104 4105 // Indirect Memory Plus Long Offset Operand 4106 operand indOffset32X(rRegI reg, immP off) %{ 4107 match(AddP off reg); 4108 4109 format %{ "[$reg + $off]" %} 4110 interface(MEMORY_INTER) %{ 4111 base($reg); 4112 index(0x4); 4113 scale(0x0); 4114 disp($off); 4115 %} 4116 %} 4117 4118 // Indirect Memory Plus Index Register Plus Offset Operand 4119 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4120 match(AddP (AddP reg ireg) off); 4121 4122 op_cost(10); 4123 format %{"[$reg + $off + $ireg]" %} 4124 interface(MEMORY_INTER) %{ 4125 base($reg); 4126 index($ireg); 4127 scale(0x0); 4128 disp($off); 4129 %} 4130 %} 4131 4132 // Indirect Memory Plus Index Register Plus Offset Operand 4133 operand indIndex(eRegP reg, rRegI ireg) %{ 4134 match(AddP reg ireg); 4135 4136 op_cost(10); 4137 format %{"[$reg + $ireg]" %} 4138 interface(MEMORY_INTER) %{ 4139 base($reg); 4140 index($ireg); 4141 scale(0x0); 4142 disp(0x0); 4143 %} 4144 %} 4145 4146 // // ------------------------------------------------------------------------- 4147 // // 486 architecture doesn't support "scale * index + offset" with out a base 4148 // // ------------------------------------------------------------------------- 4149 // // Scaled Memory Operands 4150 // // Indirect Memory Times Scale Plus Offset Operand 4151 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4152 // match(AddP off (LShiftI ireg scale)); 4153 // 4154 // op_cost(10); 4155 // format %{"[$off + $ireg << $scale]" %} 4156 // interface(MEMORY_INTER) %{ 4157 // base(0x4); 4158 // index($ireg); 4159 // scale($scale); 4160 // disp($off); 4161 // %} 4162 // %} 4163 4164 // Indirect Memory Times Scale Plus Index Register 4165 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4166 match(AddP reg (LShiftI ireg scale)); 4167 4168 op_cost(10); 4169 format %{"[$reg + $ireg << $scale]" %} 4170 interface(MEMORY_INTER) %{ 4171 base($reg); 4172 index($ireg); 4173 scale($scale); 4174 disp(0x0); 4175 %} 4176 %} 4177 4178 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4179 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4180 match(AddP (AddP reg (LShiftI ireg scale)) off); 4181 4182 op_cost(10); 4183 format %{"[$reg + $off + $ireg << $scale]" %} 4184 interface(MEMORY_INTER) %{ 4185 base($reg); 4186 index($ireg); 4187 scale($scale); 4188 disp($off); 4189 %} 4190 %} 4191 4192 //----------Load Long Memory Operands------------------------------------------ 4193 // The load-long idiom will use it's address expression again after loading 4194 // the first word of the long. If the load-long destination overlaps with 4195 // registers used in the addressing expression, the 2nd half will be loaded 4196 // from a clobbered address. Fix this by requiring that load-long use 4197 // address registers that do not overlap with the load-long target. 4198 4199 // load-long support 4200 operand load_long_RegP() %{ 4201 constraint(ALLOC_IN_RC(esi_reg)); 4202 match(RegP); 4203 match(eSIRegP); 4204 op_cost(100); 4205 format %{ %} 4206 interface(REG_INTER); 4207 %} 4208 4209 // Indirect Memory Operand Long 4210 operand load_long_indirect(load_long_RegP reg) %{ 4211 constraint(ALLOC_IN_RC(esi_reg)); 4212 match(reg); 4213 4214 format %{ "[$reg]" %} 4215 interface(MEMORY_INTER) %{ 4216 base($reg); 4217 index(0x4); 4218 scale(0x0); 4219 disp(0x0); 4220 %} 4221 %} 4222 4223 // Indirect Memory Plus Long Offset Operand 4224 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4225 match(AddP reg off); 4226 4227 format %{ "[$reg + $off]" %} 4228 interface(MEMORY_INTER) %{ 4229 base($reg); 4230 index(0x4); 4231 scale(0x0); 4232 disp($off); 4233 %} 4234 %} 4235 4236 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4237 4238 4239 //----------Special Memory Operands-------------------------------------------- 4240 // Stack Slot Operand - This operand is used for loading and storing temporary 4241 // values on the stack where a match requires a value to 4242 // flow through memory. 4243 operand stackSlotP(sRegP reg) %{ 4244 constraint(ALLOC_IN_RC(stack_slots)); 4245 // No match rule because this operand is only generated in matching 4246 format %{ "[$reg]" %} 4247 interface(MEMORY_INTER) %{ 4248 base(0x4); // ESP 4249 index(0x4); // No Index 4250 scale(0x0); // No Scale 4251 disp($reg); // Stack Offset 4252 %} 4253 %} 4254 4255 operand stackSlotI(sRegI reg) %{ 4256 constraint(ALLOC_IN_RC(stack_slots)); 4257 // No match rule because this operand is only generated in matching 4258 format %{ "[$reg]" %} 4259 interface(MEMORY_INTER) %{ 4260 base(0x4); // ESP 4261 index(0x4); // No Index 4262 scale(0x0); // No Scale 4263 disp($reg); // Stack Offset 4264 %} 4265 %} 4266 4267 operand stackSlotF(sRegF reg) %{ 4268 constraint(ALLOC_IN_RC(stack_slots)); 4269 // No match rule because this operand is only generated in matching 4270 format %{ "[$reg]" %} 4271 interface(MEMORY_INTER) %{ 4272 base(0x4); // ESP 4273 index(0x4); // No Index 4274 scale(0x0); // No Scale 4275 disp($reg); // Stack Offset 4276 %} 4277 %} 4278 4279 operand stackSlotD(sRegD reg) %{ 4280 constraint(ALLOC_IN_RC(stack_slots)); 4281 // No match rule because this operand is only generated in matching 4282 format %{ "[$reg]" %} 4283 interface(MEMORY_INTER) %{ 4284 base(0x4); // ESP 4285 index(0x4); // No Index 4286 scale(0x0); // No Scale 4287 disp($reg); // Stack Offset 4288 %} 4289 %} 4290 4291 operand stackSlotL(sRegL reg) %{ 4292 constraint(ALLOC_IN_RC(stack_slots)); 4293 // No match rule because this operand is only generated in matching 4294 format %{ "[$reg]" %} 4295 interface(MEMORY_INTER) %{ 4296 base(0x4); // ESP 4297 index(0x4); // No Index 4298 scale(0x0); // No Scale 4299 disp($reg); // Stack Offset 4300 %} 4301 %} 4302 4303 //----------Conditional Branch Operands---------------------------------------- 4304 // Comparison Op - This is the operation of the comparison, and is limited to 4305 // the following set of codes: 4306 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4307 // 4308 // Other attributes of the comparison, such as unsignedness, are specified 4309 // by the comparison instruction that sets a condition code flags register. 4310 // That result is represented by a flags operand whose subtype is appropriate 4311 // to the unsignedness (etc.) of the comparison. 4312 // 4313 // Later, the instruction which matches both the Comparison Op (a Bool) and 4314 // the flags (produced by the Cmp) specifies the coding of the comparison op 4315 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4316 4317 // Comparison Code 4318 operand cmpOp() %{ 4319 match(Bool); 4320 4321 format %{ "" %} 4322 interface(COND_INTER) %{ 4323 equal(0x4, "e"); 4324 not_equal(0x5, "ne"); 4325 less(0xC, "l"); 4326 greater_equal(0xD, "ge"); 4327 less_equal(0xE, "le"); 4328 greater(0xF, "g"); 4329 overflow(0x0, "o"); 4330 no_overflow(0x1, "no"); 4331 %} 4332 %} 4333 4334 // Comparison Code, unsigned compare. Used by FP also, with 4335 // C2 (unordered) turned into GT or LT already. The other bits 4336 // C0 and C3 are turned into Carry & Zero flags. 4337 operand cmpOpU() %{ 4338 match(Bool); 4339 4340 format %{ "" %} 4341 interface(COND_INTER) %{ 4342 equal(0x4, "e"); 4343 not_equal(0x5, "ne"); 4344 less(0x2, "b"); 4345 greater_equal(0x3, "nb"); 4346 less_equal(0x6, "be"); 4347 greater(0x7, "nbe"); 4348 overflow(0x0, "o"); 4349 no_overflow(0x1, "no"); 4350 %} 4351 %} 4352 4353 // Floating comparisons that don't require any fixup for the unordered case 4354 operand cmpOpUCF() %{ 4355 match(Bool); 4356 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4357 n->as_Bool()->_test._test == BoolTest::ge || 4358 n->as_Bool()->_test._test == BoolTest::le || 4359 n->as_Bool()->_test._test == BoolTest::gt); 4360 format %{ "" %} 4361 interface(COND_INTER) %{ 4362 equal(0x4, "e"); 4363 not_equal(0x5, "ne"); 4364 less(0x2, "b"); 4365 greater_equal(0x3, "nb"); 4366 less_equal(0x6, "be"); 4367 greater(0x7, "nbe"); 4368 overflow(0x0, "o"); 4369 no_overflow(0x1, "no"); 4370 %} 4371 %} 4372 4373 4374 // Floating comparisons that can be fixed up with extra conditional jumps 4375 operand cmpOpUCF2() %{ 4376 match(Bool); 4377 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4378 n->as_Bool()->_test._test == BoolTest::eq); 4379 format %{ "" %} 4380 interface(COND_INTER) %{ 4381 equal(0x4, "e"); 4382 not_equal(0x5, "ne"); 4383 less(0x2, "b"); 4384 greater_equal(0x3, "nb"); 4385 less_equal(0x6, "be"); 4386 greater(0x7, "nbe"); 4387 overflow(0x0, "o"); 4388 no_overflow(0x1, "no"); 4389 %} 4390 %} 4391 4392 // Comparison Code for FP conditional move 4393 operand cmpOp_fcmov() %{ 4394 match(Bool); 4395 4396 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4397 n->as_Bool()->_test._test != BoolTest::no_overflow); 4398 format %{ "" %} 4399 interface(COND_INTER) %{ 4400 equal (0x0C8); 4401 not_equal (0x1C8); 4402 less (0x0C0); 4403 greater_equal(0x1C0); 4404 less_equal (0x0D0); 4405 greater (0x1D0); 4406 overflow(0x0, "o"); // not really supported by the instruction 4407 no_overflow(0x1, "no"); // not really supported by the instruction 4408 %} 4409 %} 4410 4411 // Comparison Code used in long compares 4412 operand cmpOp_commute() %{ 4413 match(Bool); 4414 4415 format %{ "" %} 4416 interface(COND_INTER) %{ 4417 equal(0x4, "e"); 4418 not_equal(0x5, "ne"); 4419 less(0xF, "g"); 4420 greater_equal(0xE, "le"); 4421 less_equal(0xD, "ge"); 4422 greater(0xC, "l"); 4423 overflow(0x0, "o"); 4424 no_overflow(0x1, "no"); 4425 %} 4426 %} 4427 4428 // Comparison Code used in unsigned long compares 4429 operand cmpOpU_commute() %{ 4430 match(Bool); 4431 4432 format %{ "" %} 4433 interface(COND_INTER) %{ 4434 equal(0x4, "e"); 4435 not_equal(0x5, "ne"); 4436 less(0x7, "nbe"); 4437 greater_equal(0x6, "be"); 4438 less_equal(0x3, "nb"); 4439 greater(0x2, "b"); 4440 overflow(0x0, "o"); 4441 no_overflow(0x1, "no"); 4442 %} 4443 %} 4444 4445 //----------OPERAND CLASSES---------------------------------------------------- 4446 // Operand Classes are groups of operands that are used as to simplify 4447 // instruction definitions by not requiring the AD writer to specify separate 4448 // instructions for every form of operand when the instruction accepts 4449 // multiple operand types with the same basic encoding and format. The classic 4450 // case of this is memory operands. 4451 4452 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4453 indIndex, indIndexScale, indIndexScaleOffset); 4454 4455 // Long memory operations are encoded in 2 instructions and a +4 offset. 4456 // This means some kind of offset is always required and you cannot use 4457 // an oop as the offset (done when working on static globals). 4458 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4459 indIndex, indIndexScale, indIndexScaleOffset); 4460 4461 4462 //----------PIPELINE----------------------------------------------------------- 4463 // Rules which define the behavior of the target architectures pipeline. 4464 pipeline %{ 4465 4466 //----------ATTRIBUTES--------------------------------------------------------- 4467 attributes %{ 4468 variable_size_instructions; // Fixed size instructions 4469 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4470 instruction_unit_size = 1; // An instruction is 1 bytes long 4471 instruction_fetch_unit_size = 16; // The processor fetches one line 4472 instruction_fetch_units = 1; // of 16 bytes 4473 4474 // List of nop instructions 4475 nops( MachNop ); 4476 %} 4477 4478 //----------RESOURCES---------------------------------------------------------- 4479 // Resources are the functional units available to the machine 4480 4481 // Generic P2/P3 pipeline 4482 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4483 // 3 instructions decoded per cycle. 4484 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4485 // 2 ALU op, only ALU0 handles mul/div instructions. 4486 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4487 MS0, MS1, MEM = MS0 | MS1, 4488 BR, FPU, 4489 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4490 4491 //----------PIPELINE DESCRIPTION----------------------------------------------- 4492 // Pipeline Description specifies the stages in the machine's pipeline 4493 4494 // Generic P2/P3 pipeline 4495 pipe_desc(S0, S1, S2, S3, S4, S5); 4496 4497 //----------PIPELINE CLASSES--------------------------------------------------- 4498 // Pipeline Classes describe the stages in which input and output are 4499 // referenced by the hardware pipeline. 4500 4501 // Naming convention: ialu or fpu 4502 // Then: _reg 4503 // Then: _reg if there is a 2nd register 4504 // Then: _long if it's a pair of instructions implementing a long 4505 // Then: _fat if it requires the big decoder 4506 // Or: _mem if it requires the big decoder and a memory unit. 4507 4508 // Integer ALU reg operation 4509 pipe_class ialu_reg(rRegI dst) %{ 4510 single_instruction; 4511 dst : S4(write); 4512 dst : S3(read); 4513 DECODE : S0; // any decoder 4514 ALU : S3; // any alu 4515 %} 4516 4517 // Long ALU reg operation 4518 pipe_class ialu_reg_long(eRegL dst) %{ 4519 instruction_count(2); 4520 dst : S4(write); 4521 dst : S3(read); 4522 DECODE : S0(2); // any 2 decoders 4523 ALU : S3(2); // both alus 4524 %} 4525 4526 // Integer ALU reg operation using big decoder 4527 pipe_class ialu_reg_fat(rRegI dst) %{ 4528 single_instruction; 4529 dst : S4(write); 4530 dst : S3(read); 4531 D0 : S0; // big decoder only 4532 ALU : S3; // any alu 4533 %} 4534 4535 // Long ALU reg operation using big decoder 4536 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4537 instruction_count(2); 4538 dst : S4(write); 4539 dst : S3(read); 4540 D0 : S0(2); // big decoder only; twice 4541 ALU : S3(2); // any 2 alus 4542 %} 4543 4544 // Integer ALU reg-reg operation 4545 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4546 single_instruction; 4547 dst : S4(write); 4548 src : S3(read); 4549 DECODE : S0; // any decoder 4550 ALU : S3; // any alu 4551 %} 4552 4553 // Long ALU reg-reg operation 4554 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4555 instruction_count(2); 4556 dst : S4(write); 4557 src : S3(read); 4558 DECODE : S0(2); // any 2 decoders 4559 ALU : S3(2); // both alus 4560 %} 4561 4562 // Integer ALU reg-reg operation 4563 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4564 single_instruction; 4565 dst : S4(write); 4566 src : S3(read); 4567 D0 : S0; // big decoder only 4568 ALU : S3; // any alu 4569 %} 4570 4571 // Long ALU reg-reg operation 4572 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4573 instruction_count(2); 4574 dst : S4(write); 4575 src : S3(read); 4576 D0 : S0(2); // big decoder only; twice 4577 ALU : S3(2); // both alus 4578 %} 4579 4580 // Integer ALU reg-mem operation 4581 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4582 single_instruction; 4583 dst : S5(write); 4584 mem : S3(read); 4585 D0 : S0; // big decoder only 4586 ALU : S4; // any alu 4587 MEM : S3; // any mem 4588 %} 4589 4590 // Long ALU reg-mem operation 4591 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4592 instruction_count(2); 4593 dst : S5(write); 4594 mem : S3(read); 4595 D0 : S0(2); // big decoder only; twice 4596 ALU : S4(2); // any 2 alus 4597 MEM : S3(2); // both mems 4598 %} 4599 4600 // Integer mem operation (prefetch) 4601 pipe_class ialu_mem(memory mem) 4602 %{ 4603 single_instruction; 4604 mem : S3(read); 4605 D0 : S0; // big decoder only 4606 MEM : S3; // any mem 4607 %} 4608 4609 // Integer Store to Memory 4610 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4611 single_instruction; 4612 mem : S3(read); 4613 src : S5(read); 4614 D0 : S0; // big decoder only 4615 ALU : S4; // any alu 4616 MEM : S3; 4617 %} 4618 4619 // Long Store to Memory 4620 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4621 instruction_count(2); 4622 mem : S3(read); 4623 src : S5(read); 4624 D0 : S0(2); // big decoder only; twice 4625 ALU : S4(2); // any 2 alus 4626 MEM : S3(2); // Both mems 4627 %} 4628 4629 // Integer Store to Memory 4630 pipe_class ialu_mem_imm(memory mem) %{ 4631 single_instruction; 4632 mem : S3(read); 4633 D0 : S0; // big decoder only 4634 ALU : S4; // any alu 4635 MEM : S3; 4636 %} 4637 4638 // Integer ALU0 reg-reg operation 4639 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4640 single_instruction; 4641 dst : S4(write); 4642 src : S3(read); 4643 D0 : S0; // Big decoder only 4644 ALU0 : S3; // only alu0 4645 %} 4646 4647 // Integer ALU0 reg-mem operation 4648 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4649 single_instruction; 4650 dst : S5(write); 4651 mem : S3(read); 4652 D0 : S0; // big decoder only 4653 ALU0 : S4; // ALU0 only 4654 MEM : S3; // any mem 4655 %} 4656 4657 // Integer ALU reg-reg operation 4658 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4659 single_instruction; 4660 cr : S4(write); 4661 src1 : S3(read); 4662 src2 : S3(read); 4663 DECODE : S0; // any decoder 4664 ALU : S3; // any alu 4665 %} 4666 4667 // Integer ALU reg-imm operation 4668 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4669 single_instruction; 4670 cr : S4(write); 4671 src1 : S3(read); 4672 DECODE : S0; // any decoder 4673 ALU : S3; // any alu 4674 %} 4675 4676 // Integer ALU reg-mem operation 4677 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4678 single_instruction; 4679 cr : S4(write); 4680 src1 : S3(read); 4681 src2 : S3(read); 4682 D0 : S0; // big decoder only 4683 ALU : S4; // any alu 4684 MEM : S3; 4685 %} 4686 4687 // Conditional move reg-reg 4688 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4689 instruction_count(4); 4690 y : S4(read); 4691 q : S3(read); 4692 p : S3(read); 4693 DECODE : S0(4); // any decoder 4694 %} 4695 4696 // Conditional move reg-reg 4697 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4698 single_instruction; 4699 dst : S4(write); 4700 src : S3(read); 4701 cr : S3(read); 4702 DECODE : S0; // any decoder 4703 %} 4704 4705 // Conditional move reg-mem 4706 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4707 single_instruction; 4708 dst : S4(write); 4709 src : S3(read); 4710 cr : S3(read); 4711 DECODE : S0; // any decoder 4712 MEM : S3; 4713 %} 4714 4715 // Conditional move reg-reg long 4716 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4717 single_instruction; 4718 dst : S4(write); 4719 src : S3(read); 4720 cr : S3(read); 4721 DECODE : S0(2); // any 2 decoders 4722 %} 4723 4724 // Conditional move double reg-reg 4725 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4726 single_instruction; 4727 dst : S4(write); 4728 src : S3(read); 4729 cr : S3(read); 4730 DECODE : S0; // any decoder 4731 %} 4732 4733 // Float reg-reg operation 4734 pipe_class fpu_reg(regDPR dst) %{ 4735 instruction_count(2); 4736 dst : S3(read); 4737 DECODE : S0(2); // any 2 decoders 4738 FPU : S3; 4739 %} 4740 4741 // Float reg-reg operation 4742 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4743 instruction_count(2); 4744 dst : S4(write); 4745 src : S3(read); 4746 DECODE : S0(2); // any 2 decoders 4747 FPU : S3; 4748 %} 4749 4750 // Float reg-reg operation 4751 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4752 instruction_count(3); 4753 dst : S4(write); 4754 src1 : S3(read); 4755 src2 : S3(read); 4756 DECODE : S0(3); // any 3 decoders 4757 FPU : S3(2); 4758 %} 4759 4760 // Float reg-reg operation 4761 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4762 instruction_count(4); 4763 dst : S4(write); 4764 src1 : S3(read); 4765 src2 : S3(read); 4766 src3 : S3(read); 4767 DECODE : S0(4); // any 3 decoders 4768 FPU : S3(2); 4769 %} 4770 4771 // Float reg-reg operation 4772 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4773 instruction_count(4); 4774 dst : S4(write); 4775 src1 : S3(read); 4776 src2 : S3(read); 4777 src3 : S3(read); 4778 DECODE : S1(3); // any 3 decoders 4779 D0 : S0; // Big decoder only 4780 FPU : S3(2); 4781 MEM : S3; 4782 %} 4783 4784 // Float reg-mem operation 4785 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4786 instruction_count(2); 4787 dst : S5(write); 4788 mem : S3(read); 4789 D0 : S0; // big decoder only 4790 DECODE : S1; // any decoder for FPU POP 4791 FPU : S4; 4792 MEM : S3; // any mem 4793 %} 4794 4795 // Float reg-mem operation 4796 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4797 instruction_count(3); 4798 dst : S5(write); 4799 src1 : S3(read); 4800 mem : S3(read); 4801 D0 : S0; // big decoder only 4802 DECODE : S1(2); // any decoder for FPU POP 4803 FPU : S4; 4804 MEM : S3; // any mem 4805 %} 4806 4807 // Float mem-reg operation 4808 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4809 instruction_count(2); 4810 src : S5(read); 4811 mem : S3(read); 4812 DECODE : S0; // any decoder for FPU PUSH 4813 D0 : S1; // big decoder only 4814 FPU : S4; 4815 MEM : S3; // any mem 4816 %} 4817 4818 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4819 instruction_count(3); 4820 src1 : S3(read); 4821 src2 : S3(read); 4822 mem : S3(read); 4823 DECODE : S0(2); // any decoder for FPU PUSH 4824 D0 : S1; // big decoder only 4825 FPU : S4; 4826 MEM : S3; // any mem 4827 %} 4828 4829 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4830 instruction_count(3); 4831 src1 : S3(read); 4832 src2 : S3(read); 4833 mem : S4(read); 4834 DECODE : S0; // any decoder for FPU PUSH 4835 D0 : S0(2); // big decoder only 4836 FPU : S4; 4837 MEM : S3(2); // any mem 4838 %} 4839 4840 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4841 instruction_count(2); 4842 src1 : S3(read); 4843 dst : S4(read); 4844 D0 : S0(2); // big decoder only 4845 MEM : S3(2); // any mem 4846 %} 4847 4848 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4849 instruction_count(3); 4850 src1 : S3(read); 4851 src2 : S3(read); 4852 dst : S4(read); 4853 D0 : S0(3); // big decoder only 4854 FPU : S4; 4855 MEM : S3(3); // any mem 4856 %} 4857 4858 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4859 instruction_count(3); 4860 src1 : S4(read); 4861 mem : S4(read); 4862 DECODE : S0; // any decoder for FPU PUSH 4863 D0 : S0(2); // big decoder only 4864 FPU : S4; 4865 MEM : S3(2); // any mem 4866 %} 4867 4868 // Float load constant 4869 pipe_class fpu_reg_con(regDPR dst) %{ 4870 instruction_count(2); 4871 dst : S5(write); 4872 D0 : S0; // big decoder only for the load 4873 DECODE : S1; // any decoder for FPU POP 4874 FPU : S4; 4875 MEM : S3; // any mem 4876 %} 4877 4878 // Float load constant 4879 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4880 instruction_count(3); 4881 dst : S5(write); 4882 src : S3(read); 4883 D0 : S0; // big decoder only for the load 4884 DECODE : S1(2); // any decoder for FPU POP 4885 FPU : S4; 4886 MEM : S3; // any mem 4887 %} 4888 4889 // UnConditional branch 4890 pipe_class pipe_jmp( label labl ) %{ 4891 single_instruction; 4892 BR : S3; 4893 %} 4894 4895 // Conditional branch 4896 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4897 single_instruction; 4898 cr : S1(read); 4899 BR : S3; 4900 %} 4901 4902 // Allocation idiom 4903 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4904 instruction_count(1); force_serialization; 4905 fixed_latency(6); 4906 heap_ptr : S3(read); 4907 DECODE : S0(3); 4908 D0 : S2; 4909 MEM : S3; 4910 ALU : S3(2); 4911 dst : S5(write); 4912 BR : S5; 4913 %} 4914 4915 // Generic big/slow expanded idiom 4916 pipe_class pipe_slow( ) %{ 4917 instruction_count(10); multiple_bundles; force_serialization; 4918 fixed_latency(100); 4919 D0 : S0(2); 4920 MEM : S3(2); 4921 %} 4922 4923 // The real do-nothing guy 4924 pipe_class empty( ) %{ 4925 instruction_count(0); 4926 %} 4927 4928 // Define the class for the Nop node 4929 define %{ 4930 MachNop = empty; 4931 %} 4932 4933 %} 4934 4935 //----------INSTRUCTIONS------------------------------------------------------- 4936 // 4937 // match -- States which machine-independent subtree may be replaced 4938 // by this instruction. 4939 // ins_cost -- The estimated cost of this instruction is used by instruction 4940 // selection to identify a minimum cost tree of machine 4941 // instructions that matches a tree of machine-independent 4942 // instructions. 4943 // format -- A string providing the disassembly for this instruction. 4944 // The value of an instruction's operand may be inserted 4945 // by referring to it with a '$' prefix. 4946 // opcode -- Three instruction opcodes may be provided. These are referred 4947 // to within an encode class as $primary, $secondary, and $tertiary 4948 // respectively. The primary opcode is commonly used to 4949 // indicate the type of machine instruction, while secondary 4950 // and tertiary are often used for prefix options or addressing 4951 // modes. 4952 // ins_encode -- A list of encode classes with parameters. The encode class 4953 // name must have been defined in an 'enc_class' specification 4954 // in the encode section of the architecture description. 4955 4956 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4957 // Load Float 4958 instruct MoveF2LEG(legRegF dst, regF src) %{ 4959 match(Set dst src); 4960 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4961 ins_encode %{ 4962 ShouldNotReachHere(); 4963 %} 4964 ins_pipe( fpu_reg_reg ); 4965 %} 4966 4967 // Load Float 4968 instruct MoveLEG2F(regF dst, legRegF src) %{ 4969 match(Set dst src); 4970 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4971 ins_encode %{ 4972 ShouldNotReachHere(); 4973 %} 4974 ins_pipe( fpu_reg_reg ); 4975 %} 4976 4977 // Load Float 4978 instruct MoveF2VL(vlRegF dst, regF src) %{ 4979 match(Set dst src); 4980 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4981 ins_encode %{ 4982 ShouldNotReachHere(); 4983 %} 4984 ins_pipe( fpu_reg_reg ); 4985 %} 4986 4987 // Load Float 4988 instruct MoveVL2F(regF dst, vlRegF src) %{ 4989 match(Set dst src); 4990 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4991 ins_encode %{ 4992 ShouldNotReachHere(); 4993 %} 4994 ins_pipe( fpu_reg_reg ); 4995 %} 4996 4997 4998 4999 // Load Double 5000 instruct MoveD2LEG(legRegD dst, regD src) %{ 5001 match(Set dst src); 5002 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5003 ins_encode %{ 5004 ShouldNotReachHere(); 5005 %} 5006 ins_pipe( fpu_reg_reg ); 5007 %} 5008 5009 // Load Double 5010 instruct MoveLEG2D(regD dst, legRegD src) %{ 5011 match(Set dst src); 5012 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5013 ins_encode %{ 5014 ShouldNotReachHere(); 5015 %} 5016 ins_pipe( fpu_reg_reg ); 5017 %} 5018 5019 // Load Double 5020 instruct MoveD2VL(vlRegD dst, regD src) %{ 5021 match(Set dst src); 5022 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5023 ins_encode %{ 5024 ShouldNotReachHere(); 5025 %} 5026 ins_pipe( fpu_reg_reg ); 5027 %} 5028 5029 // Load Double 5030 instruct MoveVL2D(regD dst, vlRegD src) %{ 5031 match(Set dst src); 5032 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5033 ins_encode %{ 5034 ShouldNotReachHere(); 5035 %} 5036 ins_pipe( fpu_reg_reg ); 5037 %} 5038 5039 //----------BSWAP-Instruction-------------------------------------------------- 5040 instruct bytes_reverse_int(rRegI dst) %{ 5041 match(Set dst (ReverseBytesI dst)); 5042 5043 format %{ "BSWAP $dst" %} 5044 opcode(0x0F, 0xC8); 5045 ins_encode( OpcP, OpcSReg(dst) ); 5046 ins_pipe( ialu_reg ); 5047 %} 5048 5049 instruct bytes_reverse_long(eRegL dst) %{ 5050 match(Set dst (ReverseBytesL dst)); 5051 5052 format %{ "BSWAP $dst.lo\n\t" 5053 "BSWAP $dst.hi\n\t" 5054 "XCHG $dst.lo $dst.hi" %} 5055 5056 ins_cost(125); 5057 ins_encode( bswap_long_bytes(dst) ); 5058 ins_pipe( ialu_reg_reg); 5059 %} 5060 5061 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5062 match(Set dst (ReverseBytesUS dst)); 5063 effect(KILL cr); 5064 5065 format %{ "BSWAP $dst\n\t" 5066 "SHR $dst,16\n\t" %} 5067 ins_encode %{ 5068 __ bswapl($dst$$Register); 5069 __ shrl($dst$$Register, 16); 5070 %} 5071 ins_pipe( ialu_reg ); 5072 %} 5073 5074 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5075 match(Set dst (ReverseBytesS dst)); 5076 effect(KILL cr); 5077 5078 format %{ "BSWAP $dst\n\t" 5079 "SAR $dst,16\n\t" %} 5080 ins_encode %{ 5081 __ bswapl($dst$$Register); 5082 __ sarl($dst$$Register, 16); 5083 %} 5084 ins_pipe( ialu_reg ); 5085 %} 5086 5087 5088 //---------- Zeros Count Instructions ------------------------------------------ 5089 5090 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5091 predicate(UseCountLeadingZerosInstruction); 5092 match(Set dst (CountLeadingZerosI src)); 5093 effect(KILL cr); 5094 5095 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5096 ins_encode %{ 5097 __ lzcntl($dst$$Register, $src$$Register); 5098 %} 5099 ins_pipe(ialu_reg); 5100 %} 5101 5102 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5103 predicate(!UseCountLeadingZerosInstruction); 5104 match(Set dst (CountLeadingZerosI src)); 5105 effect(KILL cr); 5106 5107 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5108 "JNZ skip\n\t" 5109 "MOV $dst, -1\n" 5110 "skip:\n\t" 5111 "NEG $dst\n\t" 5112 "ADD $dst, 31" %} 5113 ins_encode %{ 5114 Register Rdst = $dst$$Register; 5115 Register Rsrc = $src$$Register; 5116 Label skip; 5117 __ bsrl(Rdst, Rsrc); 5118 __ jccb(Assembler::notZero, skip); 5119 __ movl(Rdst, -1); 5120 __ bind(skip); 5121 __ negl(Rdst); 5122 __ addl(Rdst, BitsPerInt - 1); 5123 %} 5124 ins_pipe(ialu_reg); 5125 %} 5126 5127 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5128 predicate(UseCountLeadingZerosInstruction); 5129 match(Set dst (CountLeadingZerosL src)); 5130 effect(TEMP dst, KILL cr); 5131 5132 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5133 "JNC done\n\t" 5134 "LZCNT $dst, $src.lo\n\t" 5135 "ADD $dst, 32\n" 5136 "done:" %} 5137 ins_encode %{ 5138 Register Rdst = $dst$$Register; 5139 Register Rsrc = $src$$Register; 5140 Label done; 5141 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5142 __ jccb(Assembler::carryClear, done); 5143 __ lzcntl(Rdst, Rsrc); 5144 __ addl(Rdst, BitsPerInt); 5145 __ bind(done); 5146 %} 5147 ins_pipe(ialu_reg); 5148 %} 5149 5150 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5151 predicate(!UseCountLeadingZerosInstruction); 5152 match(Set dst (CountLeadingZerosL src)); 5153 effect(TEMP dst, KILL cr); 5154 5155 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5156 "JZ msw_is_zero\n\t" 5157 "ADD $dst, 32\n\t" 5158 "JMP not_zero\n" 5159 "msw_is_zero:\n\t" 5160 "BSR $dst, $src.lo\n\t" 5161 "JNZ not_zero\n\t" 5162 "MOV $dst, -1\n" 5163 "not_zero:\n\t" 5164 "NEG $dst\n\t" 5165 "ADD $dst, 63\n" %} 5166 ins_encode %{ 5167 Register Rdst = $dst$$Register; 5168 Register Rsrc = $src$$Register; 5169 Label msw_is_zero; 5170 Label not_zero; 5171 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5172 __ jccb(Assembler::zero, msw_is_zero); 5173 __ addl(Rdst, BitsPerInt); 5174 __ jmpb(not_zero); 5175 __ bind(msw_is_zero); 5176 __ bsrl(Rdst, Rsrc); 5177 __ jccb(Assembler::notZero, not_zero); 5178 __ movl(Rdst, -1); 5179 __ bind(not_zero); 5180 __ negl(Rdst); 5181 __ addl(Rdst, BitsPerLong - 1); 5182 %} 5183 ins_pipe(ialu_reg); 5184 %} 5185 5186 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5187 predicate(UseCountTrailingZerosInstruction); 5188 match(Set dst (CountTrailingZerosI src)); 5189 effect(KILL cr); 5190 5191 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5192 ins_encode %{ 5193 __ tzcntl($dst$$Register, $src$$Register); 5194 %} 5195 ins_pipe(ialu_reg); 5196 %} 5197 5198 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5199 predicate(!UseCountTrailingZerosInstruction); 5200 match(Set dst (CountTrailingZerosI src)); 5201 effect(KILL cr); 5202 5203 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5204 "JNZ done\n\t" 5205 "MOV $dst, 32\n" 5206 "done:" %} 5207 ins_encode %{ 5208 Register Rdst = $dst$$Register; 5209 Label done; 5210 __ bsfl(Rdst, $src$$Register); 5211 __ jccb(Assembler::notZero, done); 5212 __ movl(Rdst, BitsPerInt); 5213 __ bind(done); 5214 %} 5215 ins_pipe(ialu_reg); 5216 %} 5217 5218 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5219 predicate(UseCountTrailingZerosInstruction); 5220 match(Set dst (CountTrailingZerosL src)); 5221 effect(TEMP dst, KILL cr); 5222 5223 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5224 "JNC done\n\t" 5225 "TZCNT $dst, $src.hi\n\t" 5226 "ADD $dst, 32\n" 5227 "done:" %} 5228 ins_encode %{ 5229 Register Rdst = $dst$$Register; 5230 Register Rsrc = $src$$Register; 5231 Label done; 5232 __ tzcntl(Rdst, Rsrc); 5233 __ jccb(Assembler::carryClear, done); 5234 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5235 __ addl(Rdst, BitsPerInt); 5236 __ bind(done); 5237 %} 5238 ins_pipe(ialu_reg); 5239 %} 5240 5241 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5242 predicate(!UseCountTrailingZerosInstruction); 5243 match(Set dst (CountTrailingZerosL src)); 5244 effect(TEMP dst, KILL cr); 5245 5246 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5247 "JNZ done\n\t" 5248 "BSF $dst, $src.hi\n\t" 5249 "JNZ msw_not_zero\n\t" 5250 "MOV $dst, 32\n" 5251 "msw_not_zero:\n\t" 5252 "ADD $dst, 32\n" 5253 "done:" %} 5254 ins_encode %{ 5255 Register Rdst = $dst$$Register; 5256 Register Rsrc = $src$$Register; 5257 Label msw_not_zero; 5258 Label done; 5259 __ bsfl(Rdst, Rsrc); 5260 __ jccb(Assembler::notZero, done); 5261 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5262 __ jccb(Assembler::notZero, msw_not_zero); 5263 __ movl(Rdst, BitsPerInt); 5264 __ bind(msw_not_zero); 5265 __ addl(Rdst, BitsPerInt); 5266 __ bind(done); 5267 %} 5268 ins_pipe(ialu_reg); 5269 %} 5270 5271 5272 //---------- Population Count Instructions ------------------------------------- 5273 5274 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5275 predicate(UsePopCountInstruction); 5276 match(Set dst (PopCountI src)); 5277 effect(KILL cr); 5278 5279 format %{ "POPCNT $dst, $src" %} 5280 ins_encode %{ 5281 __ popcntl($dst$$Register, $src$$Register); 5282 %} 5283 ins_pipe(ialu_reg); 5284 %} 5285 5286 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5287 predicate(UsePopCountInstruction); 5288 match(Set dst (PopCountI (LoadI mem))); 5289 effect(KILL cr); 5290 5291 format %{ "POPCNT $dst, $mem" %} 5292 ins_encode %{ 5293 __ popcntl($dst$$Register, $mem$$Address); 5294 %} 5295 ins_pipe(ialu_reg); 5296 %} 5297 5298 // Note: Long.bitCount(long) returns an int. 5299 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5300 predicate(UsePopCountInstruction); 5301 match(Set dst (PopCountL src)); 5302 effect(KILL cr, TEMP tmp, TEMP dst); 5303 5304 format %{ "POPCNT $dst, $src.lo\n\t" 5305 "POPCNT $tmp, $src.hi\n\t" 5306 "ADD $dst, $tmp" %} 5307 ins_encode %{ 5308 __ popcntl($dst$$Register, $src$$Register); 5309 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5310 __ addl($dst$$Register, $tmp$$Register); 5311 %} 5312 ins_pipe(ialu_reg); 5313 %} 5314 5315 // Note: Long.bitCount(long) returns an int. 5316 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5317 predicate(UsePopCountInstruction); 5318 match(Set dst (PopCountL (LoadL mem))); 5319 effect(KILL cr, TEMP tmp, TEMP dst); 5320 5321 format %{ "POPCNT $dst, $mem\n\t" 5322 "POPCNT $tmp, $mem+4\n\t" 5323 "ADD $dst, $tmp" %} 5324 ins_encode %{ 5325 //__ popcntl($dst$$Register, $mem$$Address$$first); 5326 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5327 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5328 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5329 __ addl($dst$$Register, $tmp$$Register); 5330 %} 5331 ins_pipe(ialu_reg); 5332 %} 5333 5334 5335 //----------Load/Store/Move Instructions--------------------------------------- 5336 //----------Load Instructions-------------------------------------------------- 5337 // Load Byte (8bit signed) 5338 instruct loadB(xRegI dst, memory mem) %{ 5339 match(Set dst (LoadB mem)); 5340 5341 ins_cost(125); 5342 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5343 5344 ins_encode %{ 5345 __ movsbl($dst$$Register, $mem$$Address); 5346 %} 5347 5348 ins_pipe(ialu_reg_mem); 5349 %} 5350 5351 // Load Byte (8bit signed) into Long Register 5352 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5353 match(Set dst (ConvI2L (LoadB mem))); 5354 effect(KILL cr); 5355 5356 ins_cost(375); 5357 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5358 "MOV $dst.hi,$dst.lo\n\t" 5359 "SAR $dst.hi,7" %} 5360 5361 ins_encode %{ 5362 __ movsbl($dst$$Register, $mem$$Address); 5363 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5364 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5365 %} 5366 5367 ins_pipe(ialu_reg_mem); 5368 %} 5369 5370 // Load Unsigned Byte (8bit UNsigned) 5371 instruct loadUB(xRegI dst, memory mem) %{ 5372 match(Set dst (LoadUB mem)); 5373 5374 ins_cost(125); 5375 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5376 5377 ins_encode %{ 5378 __ movzbl($dst$$Register, $mem$$Address); 5379 %} 5380 5381 ins_pipe(ialu_reg_mem); 5382 %} 5383 5384 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5385 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5386 match(Set dst (ConvI2L (LoadUB mem))); 5387 effect(KILL cr); 5388 5389 ins_cost(250); 5390 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5391 "XOR $dst.hi,$dst.hi" %} 5392 5393 ins_encode %{ 5394 Register Rdst = $dst$$Register; 5395 __ movzbl(Rdst, $mem$$Address); 5396 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5397 %} 5398 5399 ins_pipe(ialu_reg_mem); 5400 %} 5401 5402 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5403 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5404 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5405 effect(KILL cr); 5406 5407 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5408 "XOR $dst.hi,$dst.hi\n\t" 5409 "AND $dst.lo,right_n_bits($mask, 8)" %} 5410 ins_encode %{ 5411 Register Rdst = $dst$$Register; 5412 __ movzbl(Rdst, $mem$$Address); 5413 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5414 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5415 %} 5416 ins_pipe(ialu_reg_mem); 5417 %} 5418 5419 // Load Short (16bit signed) 5420 instruct loadS(rRegI dst, memory mem) %{ 5421 match(Set dst (LoadS mem)); 5422 5423 ins_cost(125); 5424 format %{ "MOVSX $dst,$mem\t# short" %} 5425 5426 ins_encode %{ 5427 __ movswl($dst$$Register, $mem$$Address); 5428 %} 5429 5430 ins_pipe(ialu_reg_mem); 5431 %} 5432 5433 // Load Short (16 bit signed) to Byte (8 bit signed) 5434 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5435 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5436 5437 ins_cost(125); 5438 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5439 ins_encode %{ 5440 __ movsbl($dst$$Register, $mem$$Address); 5441 %} 5442 ins_pipe(ialu_reg_mem); 5443 %} 5444 5445 // Load Short (16bit signed) into Long Register 5446 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5447 match(Set dst (ConvI2L (LoadS mem))); 5448 effect(KILL cr); 5449 5450 ins_cost(375); 5451 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5452 "MOV $dst.hi,$dst.lo\n\t" 5453 "SAR $dst.hi,15" %} 5454 5455 ins_encode %{ 5456 __ movswl($dst$$Register, $mem$$Address); 5457 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5458 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5459 %} 5460 5461 ins_pipe(ialu_reg_mem); 5462 %} 5463 5464 // Load Unsigned Short/Char (16bit unsigned) 5465 instruct loadUS(rRegI dst, memory mem) %{ 5466 match(Set dst (LoadUS mem)); 5467 5468 ins_cost(125); 5469 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5470 5471 ins_encode %{ 5472 __ movzwl($dst$$Register, $mem$$Address); 5473 %} 5474 5475 ins_pipe(ialu_reg_mem); 5476 %} 5477 5478 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5479 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5480 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5481 5482 ins_cost(125); 5483 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5484 ins_encode %{ 5485 __ movsbl($dst$$Register, $mem$$Address); 5486 %} 5487 ins_pipe(ialu_reg_mem); 5488 %} 5489 5490 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5491 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5492 match(Set dst (ConvI2L (LoadUS mem))); 5493 effect(KILL cr); 5494 5495 ins_cost(250); 5496 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5497 "XOR $dst.hi,$dst.hi" %} 5498 5499 ins_encode %{ 5500 __ movzwl($dst$$Register, $mem$$Address); 5501 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5502 %} 5503 5504 ins_pipe(ialu_reg_mem); 5505 %} 5506 5507 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5508 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5509 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5510 effect(KILL cr); 5511 5512 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5513 "XOR $dst.hi,$dst.hi" %} 5514 ins_encode %{ 5515 Register Rdst = $dst$$Register; 5516 __ movzbl(Rdst, $mem$$Address); 5517 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5518 %} 5519 ins_pipe(ialu_reg_mem); 5520 %} 5521 5522 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5523 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5524 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5525 effect(KILL cr); 5526 5527 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5528 "XOR $dst.hi,$dst.hi\n\t" 5529 "AND $dst.lo,right_n_bits($mask, 16)" %} 5530 ins_encode %{ 5531 Register Rdst = $dst$$Register; 5532 __ movzwl(Rdst, $mem$$Address); 5533 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5534 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5535 %} 5536 ins_pipe(ialu_reg_mem); 5537 %} 5538 5539 // Load Integer 5540 instruct loadI(rRegI dst, memory mem) %{ 5541 match(Set dst (LoadI mem)); 5542 5543 ins_cost(125); 5544 format %{ "MOV $dst,$mem\t# int" %} 5545 5546 ins_encode %{ 5547 __ movl($dst$$Register, $mem$$Address); 5548 %} 5549 5550 ins_pipe(ialu_reg_mem); 5551 %} 5552 5553 // Load Integer (32 bit signed) to Byte (8 bit signed) 5554 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5555 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5556 5557 ins_cost(125); 5558 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5559 ins_encode %{ 5560 __ movsbl($dst$$Register, $mem$$Address); 5561 %} 5562 ins_pipe(ialu_reg_mem); 5563 %} 5564 5565 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5566 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5567 match(Set dst (AndI (LoadI mem) mask)); 5568 5569 ins_cost(125); 5570 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5571 ins_encode %{ 5572 __ movzbl($dst$$Register, $mem$$Address); 5573 %} 5574 ins_pipe(ialu_reg_mem); 5575 %} 5576 5577 // Load Integer (32 bit signed) to Short (16 bit signed) 5578 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5579 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5580 5581 ins_cost(125); 5582 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5583 ins_encode %{ 5584 __ movswl($dst$$Register, $mem$$Address); 5585 %} 5586 ins_pipe(ialu_reg_mem); 5587 %} 5588 5589 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5590 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5591 match(Set dst (AndI (LoadI mem) mask)); 5592 5593 ins_cost(125); 5594 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5595 ins_encode %{ 5596 __ movzwl($dst$$Register, $mem$$Address); 5597 %} 5598 ins_pipe(ialu_reg_mem); 5599 %} 5600 5601 // Load Integer into Long Register 5602 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5603 match(Set dst (ConvI2L (LoadI mem))); 5604 effect(KILL cr); 5605 5606 ins_cost(375); 5607 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5608 "MOV $dst.hi,$dst.lo\n\t" 5609 "SAR $dst.hi,31" %} 5610 5611 ins_encode %{ 5612 __ movl($dst$$Register, $mem$$Address); 5613 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5614 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5615 %} 5616 5617 ins_pipe(ialu_reg_mem); 5618 %} 5619 5620 // Load Integer with mask 0xFF into Long Register 5621 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5622 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5623 effect(KILL cr); 5624 5625 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5626 "XOR $dst.hi,$dst.hi" %} 5627 ins_encode %{ 5628 Register Rdst = $dst$$Register; 5629 __ movzbl(Rdst, $mem$$Address); 5630 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5631 %} 5632 ins_pipe(ialu_reg_mem); 5633 %} 5634 5635 // Load Integer with mask 0xFFFF into Long Register 5636 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5637 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5638 effect(KILL cr); 5639 5640 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5641 "XOR $dst.hi,$dst.hi" %} 5642 ins_encode %{ 5643 Register Rdst = $dst$$Register; 5644 __ movzwl(Rdst, $mem$$Address); 5645 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5646 %} 5647 ins_pipe(ialu_reg_mem); 5648 %} 5649 5650 // Load Integer with 31-bit mask into Long Register 5651 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5652 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5653 effect(KILL cr); 5654 5655 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5656 "XOR $dst.hi,$dst.hi\n\t" 5657 "AND $dst.lo,$mask" %} 5658 ins_encode %{ 5659 Register Rdst = $dst$$Register; 5660 __ movl(Rdst, $mem$$Address); 5661 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5662 __ andl(Rdst, $mask$$constant); 5663 %} 5664 ins_pipe(ialu_reg_mem); 5665 %} 5666 5667 // Load Unsigned Integer into Long Register 5668 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5669 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5670 effect(KILL cr); 5671 5672 ins_cost(250); 5673 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5674 "XOR $dst.hi,$dst.hi" %} 5675 5676 ins_encode %{ 5677 __ movl($dst$$Register, $mem$$Address); 5678 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5679 %} 5680 5681 ins_pipe(ialu_reg_mem); 5682 %} 5683 5684 // Load Long. Cannot clobber address while loading, so restrict address 5685 // register to ESI 5686 instruct loadL(eRegL dst, load_long_memory mem) %{ 5687 predicate(!((LoadLNode*)n)->require_atomic_access()); 5688 match(Set dst (LoadL mem)); 5689 5690 ins_cost(250); 5691 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5692 "MOV $dst.hi,$mem+4" %} 5693 5694 ins_encode %{ 5695 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5696 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5697 __ movl($dst$$Register, Amemlo); 5698 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5699 %} 5700 5701 ins_pipe(ialu_reg_long_mem); 5702 %} 5703 5704 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5705 // then store it down to the stack and reload on the int 5706 // side. 5707 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5708 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5709 match(Set dst (LoadL mem)); 5710 5711 ins_cost(200); 5712 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5713 "FISTp $dst" %} 5714 ins_encode(enc_loadL_volatile(mem,dst)); 5715 ins_pipe( fpu_reg_mem ); 5716 %} 5717 5718 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5719 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5720 match(Set dst (LoadL mem)); 5721 effect(TEMP tmp); 5722 ins_cost(180); 5723 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5724 "MOVSD $dst,$tmp" %} 5725 ins_encode %{ 5726 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5727 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5728 %} 5729 ins_pipe( pipe_slow ); 5730 %} 5731 5732 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5733 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5734 match(Set dst (LoadL mem)); 5735 effect(TEMP tmp); 5736 ins_cost(160); 5737 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5738 "MOVD $dst.lo,$tmp\n\t" 5739 "PSRLQ $tmp,32\n\t" 5740 "MOVD $dst.hi,$tmp" %} 5741 ins_encode %{ 5742 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5743 __ movdl($dst$$Register, $tmp$$XMMRegister); 5744 __ psrlq($tmp$$XMMRegister, 32); 5745 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5746 %} 5747 ins_pipe( pipe_slow ); 5748 %} 5749 5750 // Load Range 5751 instruct loadRange(rRegI dst, memory mem) %{ 5752 match(Set dst (LoadRange mem)); 5753 5754 ins_cost(125); 5755 format %{ "MOV $dst,$mem" %} 5756 opcode(0x8B); 5757 ins_encode( OpcP, RegMem(dst,mem)); 5758 ins_pipe( ialu_reg_mem ); 5759 %} 5760 5761 5762 // Load Pointer 5763 instruct loadP(eRegP dst, memory mem) %{ 5764 match(Set dst (LoadP mem)); 5765 5766 ins_cost(125); 5767 format %{ "MOV $dst,$mem" %} 5768 opcode(0x8B); 5769 ins_encode( OpcP, RegMem(dst,mem)); 5770 ins_pipe( ialu_reg_mem ); 5771 %} 5772 5773 // Load Klass Pointer 5774 instruct loadKlass(eRegP dst, memory mem) %{ 5775 match(Set dst (LoadKlass mem)); 5776 5777 ins_cost(125); 5778 format %{ "MOV $dst,$mem" %} 5779 opcode(0x8B); 5780 ins_encode( OpcP, RegMem(dst,mem)); 5781 ins_pipe( ialu_reg_mem ); 5782 %} 5783 5784 // Load Double 5785 instruct loadDPR(regDPR dst, memory mem) %{ 5786 predicate(UseSSE<=1); 5787 match(Set dst (LoadD mem)); 5788 5789 ins_cost(150); 5790 format %{ "FLD_D ST,$mem\n\t" 5791 "FSTP $dst" %} 5792 opcode(0xDD); /* DD /0 */ 5793 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5794 Pop_Reg_DPR(dst) ); 5795 ins_pipe( fpu_reg_mem ); 5796 %} 5797 5798 // Load Double to XMM 5799 instruct loadD(regD dst, memory mem) %{ 5800 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5801 match(Set dst (LoadD mem)); 5802 ins_cost(145); 5803 format %{ "MOVSD $dst,$mem" %} 5804 ins_encode %{ 5805 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5806 %} 5807 ins_pipe( pipe_slow ); 5808 %} 5809 5810 instruct loadD_partial(regD dst, memory mem) %{ 5811 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5812 match(Set dst (LoadD mem)); 5813 ins_cost(145); 5814 format %{ "MOVLPD $dst,$mem" %} 5815 ins_encode %{ 5816 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5817 %} 5818 ins_pipe( pipe_slow ); 5819 %} 5820 5821 // Load to XMM register (single-precision floating point) 5822 // MOVSS instruction 5823 instruct loadF(regF dst, memory mem) %{ 5824 predicate(UseSSE>=1); 5825 match(Set dst (LoadF mem)); 5826 ins_cost(145); 5827 format %{ "MOVSS $dst,$mem" %} 5828 ins_encode %{ 5829 __ movflt ($dst$$XMMRegister, $mem$$Address); 5830 %} 5831 ins_pipe( pipe_slow ); 5832 %} 5833 5834 // Load Float 5835 instruct loadFPR(regFPR dst, memory mem) %{ 5836 predicate(UseSSE==0); 5837 match(Set dst (LoadF mem)); 5838 5839 ins_cost(150); 5840 format %{ "FLD_S ST,$mem\n\t" 5841 "FSTP $dst" %} 5842 opcode(0xD9); /* D9 /0 */ 5843 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5844 Pop_Reg_FPR(dst) ); 5845 ins_pipe( fpu_reg_mem ); 5846 %} 5847 5848 // Load Effective Address 5849 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5850 match(Set dst mem); 5851 5852 ins_cost(110); 5853 format %{ "LEA $dst,$mem" %} 5854 opcode(0x8D); 5855 ins_encode( OpcP, RegMem(dst,mem)); 5856 ins_pipe( ialu_reg_reg_fat ); 5857 %} 5858 5859 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5860 match(Set dst mem); 5861 5862 ins_cost(110); 5863 format %{ "LEA $dst,$mem" %} 5864 opcode(0x8D); 5865 ins_encode( OpcP, RegMem(dst,mem)); 5866 ins_pipe( ialu_reg_reg_fat ); 5867 %} 5868 5869 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5870 match(Set dst mem); 5871 5872 ins_cost(110); 5873 format %{ "LEA $dst,$mem" %} 5874 opcode(0x8D); 5875 ins_encode( OpcP, RegMem(dst,mem)); 5876 ins_pipe( ialu_reg_reg_fat ); 5877 %} 5878 5879 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5880 match(Set dst mem); 5881 5882 ins_cost(110); 5883 format %{ "LEA $dst,$mem" %} 5884 opcode(0x8D); 5885 ins_encode( OpcP, RegMem(dst,mem)); 5886 ins_pipe( ialu_reg_reg_fat ); 5887 %} 5888 5889 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5890 match(Set dst mem); 5891 5892 ins_cost(110); 5893 format %{ "LEA $dst,$mem" %} 5894 opcode(0x8D); 5895 ins_encode( OpcP, RegMem(dst,mem)); 5896 ins_pipe( ialu_reg_reg_fat ); 5897 %} 5898 5899 // Load Constant 5900 instruct loadConI(rRegI dst, immI src) %{ 5901 match(Set dst src); 5902 5903 format %{ "MOV $dst,$src" %} 5904 ins_encode( LdImmI(dst, src) ); 5905 ins_pipe( ialu_reg_fat ); 5906 %} 5907 5908 // Load Constant zero 5909 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5910 match(Set dst src); 5911 effect(KILL cr); 5912 5913 ins_cost(50); 5914 format %{ "XOR $dst,$dst" %} 5915 opcode(0x33); /* + rd */ 5916 ins_encode( OpcP, RegReg( dst, dst ) ); 5917 ins_pipe( ialu_reg ); 5918 %} 5919 5920 instruct loadConP(eRegP dst, immP src) %{ 5921 match(Set dst src); 5922 5923 format %{ "MOV $dst,$src" %} 5924 opcode(0xB8); /* + rd */ 5925 ins_encode( LdImmP(dst, src) ); 5926 ins_pipe( ialu_reg_fat ); 5927 %} 5928 5929 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5930 match(Set dst src); 5931 effect(KILL cr); 5932 ins_cost(200); 5933 format %{ "MOV $dst.lo,$src.lo\n\t" 5934 "MOV $dst.hi,$src.hi" %} 5935 opcode(0xB8); 5936 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5937 ins_pipe( ialu_reg_long_fat ); 5938 %} 5939 5940 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5941 match(Set dst src); 5942 effect(KILL cr); 5943 ins_cost(150); 5944 format %{ "XOR $dst.lo,$dst.lo\n\t" 5945 "XOR $dst.hi,$dst.hi" %} 5946 opcode(0x33,0x33); 5947 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5948 ins_pipe( ialu_reg_long ); 5949 %} 5950 5951 // The instruction usage is guarded by predicate in operand immFPR(). 5952 instruct loadConFPR(regFPR dst, immFPR con) %{ 5953 match(Set dst con); 5954 ins_cost(125); 5955 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5956 "FSTP $dst" %} 5957 ins_encode %{ 5958 __ fld_s($constantaddress($con)); 5959 __ fstp_d($dst$$reg); 5960 %} 5961 ins_pipe(fpu_reg_con); 5962 %} 5963 5964 // The instruction usage is guarded by predicate in operand immFPR0(). 5965 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5966 match(Set dst con); 5967 ins_cost(125); 5968 format %{ "FLDZ ST\n\t" 5969 "FSTP $dst" %} 5970 ins_encode %{ 5971 __ fldz(); 5972 __ fstp_d($dst$$reg); 5973 %} 5974 ins_pipe(fpu_reg_con); 5975 %} 5976 5977 // The instruction usage is guarded by predicate in operand immFPR1(). 5978 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 5979 match(Set dst con); 5980 ins_cost(125); 5981 format %{ "FLD1 ST\n\t" 5982 "FSTP $dst" %} 5983 ins_encode %{ 5984 __ fld1(); 5985 __ fstp_d($dst$$reg); 5986 %} 5987 ins_pipe(fpu_reg_con); 5988 %} 5989 5990 // The instruction usage is guarded by predicate in operand immF(). 5991 instruct loadConF(regF dst, immF con) %{ 5992 match(Set dst con); 5993 ins_cost(125); 5994 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 5995 ins_encode %{ 5996 __ movflt($dst$$XMMRegister, $constantaddress($con)); 5997 %} 5998 ins_pipe(pipe_slow); 5999 %} 6000 6001 // The instruction usage is guarded by predicate in operand immF0(). 6002 instruct loadConF0(regF dst, immF0 src) %{ 6003 match(Set dst src); 6004 ins_cost(100); 6005 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6006 ins_encode %{ 6007 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6008 %} 6009 ins_pipe(pipe_slow); 6010 %} 6011 6012 // The instruction usage is guarded by predicate in operand immDPR(). 6013 instruct loadConDPR(regDPR dst, immDPR con) %{ 6014 match(Set dst con); 6015 ins_cost(125); 6016 6017 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6018 "FSTP $dst" %} 6019 ins_encode %{ 6020 __ fld_d($constantaddress($con)); 6021 __ fstp_d($dst$$reg); 6022 %} 6023 ins_pipe(fpu_reg_con); 6024 %} 6025 6026 // The instruction usage is guarded by predicate in operand immDPR0(). 6027 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6028 match(Set dst con); 6029 ins_cost(125); 6030 6031 format %{ "FLDZ ST\n\t" 6032 "FSTP $dst" %} 6033 ins_encode %{ 6034 __ fldz(); 6035 __ fstp_d($dst$$reg); 6036 %} 6037 ins_pipe(fpu_reg_con); 6038 %} 6039 6040 // The instruction usage is guarded by predicate in operand immDPR1(). 6041 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6042 match(Set dst con); 6043 ins_cost(125); 6044 6045 format %{ "FLD1 ST\n\t" 6046 "FSTP $dst" %} 6047 ins_encode %{ 6048 __ fld1(); 6049 __ fstp_d($dst$$reg); 6050 %} 6051 ins_pipe(fpu_reg_con); 6052 %} 6053 6054 // The instruction usage is guarded by predicate in operand immD(). 6055 instruct loadConD(regD dst, immD con) %{ 6056 match(Set dst con); 6057 ins_cost(125); 6058 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6059 ins_encode %{ 6060 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6061 %} 6062 ins_pipe(pipe_slow); 6063 %} 6064 6065 // The instruction usage is guarded by predicate in operand immD0(). 6066 instruct loadConD0(regD dst, immD0 src) %{ 6067 match(Set dst src); 6068 ins_cost(100); 6069 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6070 ins_encode %{ 6071 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6072 %} 6073 ins_pipe( pipe_slow ); 6074 %} 6075 6076 // Load Stack Slot 6077 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6078 match(Set dst src); 6079 ins_cost(125); 6080 6081 format %{ "MOV $dst,$src" %} 6082 opcode(0x8B); 6083 ins_encode( OpcP, RegMem(dst,src)); 6084 ins_pipe( ialu_reg_mem ); 6085 %} 6086 6087 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6088 match(Set dst src); 6089 6090 ins_cost(200); 6091 format %{ "MOV $dst,$src.lo\n\t" 6092 "MOV $dst+4,$src.hi" %} 6093 opcode(0x8B, 0x8B); 6094 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6095 ins_pipe( ialu_mem_long_reg ); 6096 %} 6097 6098 // Load Stack Slot 6099 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6100 match(Set dst src); 6101 ins_cost(125); 6102 6103 format %{ "MOV $dst,$src" %} 6104 opcode(0x8B); 6105 ins_encode( OpcP, RegMem(dst,src)); 6106 ins_pipe( ialu_reg_mem ); 6107 %} 6108 6109 // Load Stack Slot 6110 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6111 match(Set dst src); 6112 ins_cost(125); 6113 6114 format %{ "FLD_S $src\n\t" 6115 "FSTP $dst" %} 6116 opcode(0xD9); /* D9 /0, FLD m32real */ 6117 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6118 Pop_Reg_FPR(dst) ); 6119 ins_pipe( fpu_reg_mem ); 6120 %} 6121 6122 // Load Stack Slot 6123 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6124 match(Set dst src); 6125 ins_cost(125); 6126 6127 format %{ "FLD_D $src\n\t" 6128 "FSTP $dst" %} 6129 opcode(0xDD); /* DD /0, FLD m64real */ 6130 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6131 Pop_Reg_DPR(dst) ); 6132 ins_pipe( fpu_reg_mem ); 6133 %} 6134 6135 // Prefetch instructions for allocation. 6136 // Must be safe to execute with invalid address (cannot fault). 6137 6138 instruct prefetchAlloc0( memory mem ) %{ 6139 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6140 match(PrefetchAllocation mem); 6141 ins_cost(0); 6142 size(0); 6143 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6144 ins_encode(); 6145 ins_pipe(empty); 6146 %} 6147 6148 instruct prefetchAlloc( memory mem ) %{ 6149 predicate(AllocatePrefetchInstr==3); 6150 match( PrefetchAllocation mem ); 6151 ins_cost(100); 6152 6153 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6154 ins_encode %{ 6155 __ prefetchw($mem$$Address); 6156 %} 6157 ins_pipe(ialu_mem); 6158 %} 6159 6160 instruct prefetchAllocNTA( memory mem ) %{ 6161 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6162 match(PrefetchAllocation mem); 6163 ins_cost(100); 6164 6165 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6166 ins_encode %{ 6167 __ prefetchnta($mem$$Address); 6168 %} 6169 ins_pipe(ialu_mem); 6170 %} 6171 6172 instruct prefetchAllocT0( memory mem ) %{ 6173 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6174 match(PrefetchAllocation mem); 6175 ins_cost(100); 6176 6177 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6178 ins_encode %{ 6179 __ prefetcht0($mem$$Address); 6180 %} 6181 ins_pipe(ialu_mem); 6182 %} 6183 6184 instruct prefetchAllocT2( memory mem ) %{ 6185 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6186 match(PrefetchAllocation mem); 6187 ins_cost(100); 6188 6189 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6190 ins_encode %{ 6191 __ prefetcht2($mem$$Address); 6192 %} 6193 ins_pipe(ialu_mem); 6194 %} 6195 6196 //----------Store Instructions------------------------------------------------- 6197 6198 // Store Byte 6199 instruct storeB(memory mem, xRegI src) %{ 6200 match(Set mem (StoreB mem src)); 6201 6202 ins_cost(125); 6203 format %{ "MOV8 $mem,$src" %} 6204 opcode(0x88); 6205 ins_encode( OpcP, RegMem( src, mem ) ); 6206 ins_pipe( ialu_mem_reg ); 6207 %} 6208 6209 // Store Char/Short 6210 instruct storeC(memory mem, rRegI src) %{ 6211 match(Set mem (StoreC mem src)); 6212 6213 ins_cost(125); 6214 format %{ "MOV16 $mem,$src" %} 6215 opcode(0x89, 0x66); 6216 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6217 ins_pipe( ialu_mem_reg ); 6218 %} 6219 6220 // Store Integer 6221 instruct storeI(memory mem, rRegI src) %{ 6222 match(Set mem (StoreI mem src)); 6223 6224 ins_cost(125); 6225 format %{ "MOV $mem,$src" %} 6226 opcode(0x89); 6227 ins_encode( OpcP, RegMem( src, mem ) ); 6228 ins_pipe( ialu_mem_reg ); 6229 %} 6230 6231 // Store Long 6232 instruct storeL(long_memory mem, eRegL src) %{ 6233 predicate(!((StoreLNode*)n)->require_atomic_access()); 6234 match(Set mem (StoreL mem src)); 6235 6236 ins_cost(200); 6237 format %{ "MOV $mem,$src.lo\n\t" 6238 "MOV $mem+4,$src.hi" %} 6239 opcode(0x89, 0x89); 6240 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6241 ins_pipe( ialu_mem_long_reg ); 6242 %} 6243 6244 // Store Long to Integer 6245 instruct storeL2I(memory mem, eRegL src) %{ 6246 match(Set mem (StoreI mem (ConvL2I src))); 6247 6248 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6249 ins_encode %{ 6250 __ movl($mem$$Address, $src$$Register); 6251 %} 6252 ins_pipe(ialu_mem_reg); 6253 %} 6254 6255 // Volatile Store Long. Must be atomic, so move it into 6256 // the FP TOS and then do a 64-bit FIST. Has to probe the 6257 // target address before the store (for null-ptr checks) 6258 // so the memory operand is used twice in the encoding. 6259 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6260 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6261 match(Set mem (StoreL mem src)); 6262 effect( KILL cr ); 6263 ins_cost(400); 6264 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6265 "FILD $src\n\t" 6266 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6267 opcode(0x3B); 6268 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6269 ins_pipe( fpu_reg_mem ); 6270 %} 6271 6272 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6273 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6274 match(Set mem (StoreL mem src)); 6275 effect( TEMP tmp, KILL cr ); 6276 ins_cost(380); 6277 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6278 "MOVSD $tmp,$src\n\t" 6279 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6280 ins_encode %{ 6281 __ cmpl(rax, $mem$$Address); 6282 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6283 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6284 %} 6285 ins_pipe( pipe_slow ); 6286 %} 6287 6288 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6289 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6290 match(Set mem (StoreL mem src)); 6291 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6292 ins_cost(360); 6293 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6294 "MOVD $tmp,$src.lo\n\t" 6295 "MOVD $tmp2,$src.hi\n\t" 6296 "PUNPCKLDQ $tmp,$tmp2\n\t" 6297 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6298 ins_encode %{ 6299 __ cmpl(rax, $mem$$Address); 6300 __ movdl($tmp$$XMMRegister, $src$$Register); 6301 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6302 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6303 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6304 %} 6305 ins_pipe( pipe_slow ); 6306 %} 6307 6308 // Store Pointer; for storing unknown oops and raw pointers 6309 instruct storeP(memory mem, anyRegP src) %{ 6310 match(Set mem (StoreP mem src)); 6311 6312 ins_cost(125); 6313 format %{ "MOV $mem,$src" %} 6314 opcode(0x89); 6315 ins_encode( OpcP, RegMem( src, mem ) ); 6316 ins_pipe( ialu_mem_reg ); 6317 %} 6318 6319 // Store Integer Immediate 6320 instruct storeImmI(memory mem, immI src) %{ 6321 match(Set mem (StoreI mem src)); 6322 6323 ins_cost(150); 6324 format %{ "MOV $mem,$src" %} 6325 opcode(0xC7); /* C7 /0 */ 6326 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6327 ins_pipe( ialu_mem_imm ); 6328 %} 6329 6330 // Store Short/Char Immediate 6331 instruct storeImmI16(memory mem, immI16 src) %{ 6332 predicate(UseStoreImmI16); 6333 match(Set mem (StoreC mem src)); 6334 6335 ins_cost(150); 6336 format %{ "MOV16 $mem,$src" %} 6337 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6338 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6339 ins_pipe( ialu_mem_imm ); 6340 %} 6341 6342 // Store Pointer Immediate; null pointers or constant oops that do not 6343 // need card-mark barriers. 6344 instruct storeImmP(memory mem, immP src) %{ 6345 match(Set mem (StoreP mem src)); 6346 6347 ins_cost(150); 6348 format %{ "MOV $mem,$src" %} 6349 opcode(0xC7); /* C7 /0 */ 6350 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6351 ins_pipe( ialu_mem_imm ); 6352 %} 6353 6354 // Store Byte Immediate 6355 instruct storeImmB(memory mem, immI8 src) %{ 6356 match(Set mem (StoreB mem src)); 6357 6358 ins_cost(150); 6359 format %{ "MOV8 $mem,$src" %} 6360 opcode(0xC6); /* C6 /0 */ 6361 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6362 ins_pipe( ialu_mem_imm ); 6363 %} 6364 6365 // Store CMS card-mark Immediate 6366 instruct storeImmCM(memory mem, immI8 src) %{ 6367 match(Set mem (StoreCM mem src)); 6368 6369 ins_cost(150); 6370 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6371 opcode(0xC6); /* C6 /0 */ 6372 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6373 ins_pipe( ialu_mem_imm ); 6374 %} 6375 6376 // Store Double 6377 instruct storeDPR( memory mem, regDPR1 src) %{ 6378 predicate(UseSSE<=1); 6379 match(Set mem (StoreD mem src)); 6380 6381 ins_cost(100); 6382 format %{ "FST_D $mem,$src" %} 6383 opcode(0xDD); /* DD /2 */ 6384 ins_encode( enc_FPR_store(mem,src) ); 6385 ins_pipe( fpu_mem_reg ); 6386 %} 6387 6388 // Store double does rounding on x86 6389 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6390 predicate(UseSSE<=1); 6391 match(Set mem (StoreD mem (RoundDouble src))); 6392 6393 ins_cost(100); 6394 format %{ "FST_D $mem,$src\t# round" %} 6395 opcode(0xDD); /* DD /2 */ 6396 ins_encode( enc_FPR_store(mem,src) ); 6397 ins_pipe( fpu_mem_reg ); 6398 %} 6399 6400 // Store XMM register to memory (double-precision floating points) 6401 // MOVSD instruction 6402 instruct storeD(memory mem, regD src) %{ 6403 predicate(UseSSE>=2); 6404 match(Set mem (StoreD mem src)); 6405 ins_cost(95); 6406 format %{ "MOVSD $mem,$src" %} 6407 ins_encode %{ 6408 __ movdbl($mem$$Address, $src$$XMMRegister); 6409 %} 6410 ins_pipe( pipe_slow ); 6411 %} 6412 6413 // Store XMM register to memory (single-precision floating point) 6414 // MOVSS instruction 6415 instruct storeF(memory mem, regF src) %{ 6416 predicate(UseSSE>=1); 6417 match(Set mem (StoreF mem src)); 6418 ins_cost(95); 6419 format %{ "MOVSS $mem,$src" %} 6420 ins_encode %{ 6421 __ movflt($mem$$Address, $src$$XMMRegister); 6422 %} 6423 ins_pipe( pipe_slow ); 6424 %} 6425 6426 6427 // Store Float 6428 instruct storeFPR( memory mem, regFPR1 src) %{ 6429 predicate(UseSSE==0); 6430 match(Set mem (StoreF mem src)); 6431 6432 ins_cost(100); 6433 format %{ "FST_S $mem,$src" %} 6434 opcode(0xD9); /* D9 /2 */ 6435 ins_encode( enc_FPR_store(mem,src) ); 6436 ins_pipe( fpu_mem_reg ); 6437 %} 6438 6439 // Store Float does rounding on x86 6440 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6441 predicate(UseSSE==0); 6442 match(Set mem (StoreF mem (RoundFloat src))); 6443 6444 ins_cost(100); 6445 format %{ "FST_S $mem,$src\t# round" %} 6446 opcode(0xD9); /* D9 /2 */ 6447 ins_encode( enc_FPR_store(mem,src) ); 6448 ins_pipe( fpu_mem_reg ); 6449 %} 6450 6451 // Store Float does rounding on x86 6452 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6453 predicate(UseSSE<=1); 6454 match(Set mem (StoreF mem (ConvD2F src))); 6455 6456 ins_cost(100); 6457 format %{ "FST_S $mem,$src\t# D-round" %} 6458 opcode(0xD9); /* D9 /2 */ 6459 ins_encode( enc_FPR_store(mem,src) ); 6460 ins_pipe( fpu_mem_reg ); 6461 %} 6462 6463 // Store immediate Float value (it is faster than store from FPU register) 6464 // The instruction usage is guarded by predicate in operand immFPR(). 6465 instruct storeFPR_imm( memory mem, immFPR src) %{ 6466 match(Set mem (StoreF mem src)); 6467 6468 ins_cost(50); 6469 format %{ "MOV $mem,$src\t# store float" %} 6470 opcode(0xC7); /* C7 /0 */ 6471 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6472 ins_pipe( ialu_mem_imm ); 6473 %} 6474 6475 // Store immediate Float value (it is faster than store from XMM register) 6476 // The instruction usage is guarded by predicate in operand immF(). 6477 instruct storeF_imm( memory mem, immF src) %{ 6478 match(Set mem (StoreF mem src)); 6479 6480 ins_cost(50); 6481 format %{ "MOV $mem,$src\t# store float" %} 6482 opcode(0xC7); /* C7 /0 */ 6483 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6484 ins_pipe( ialu_mem_imm ); 6485 %} 6486 6487 // Store Integer to stack slot 6488 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6489 match(Set dst src); 6490 6491 ins_cost(100); 6492 format %{ "MOV $dst,$src" %} 6493 opcode(0x89); 6494 ins_encode( OpcPRegSS( dst, src ) ); 6495 ins_pipe( ialu_mem_reg ); 6496 %} 6497 6498 // Store Integer to stack slot 6499 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6500 match(Set dst src); 6501 6502 ins_cost(100); 6503 format %{ "MOV $dst,$src" %} 6504 opcode(0x89); 6505 ins_encode( OpcPRegSS( dst, src ) ); 6506 ins_pipe( ialu_mem_reg ); 6507 %} 6508 6509 // Store Long to stack slot 6510 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6511 match(Set dst src); 6512 6513 ins_cost(200); 6514 format %{ "MOV $dst,$src.lo\n\t" 6515 "MOV $dst+4,$src.hi" %} 6516 opcode(0x89, 0x89); 6517 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6518 ins_pipe( ialu_mem_long_reg ); 6519 %} 6520 6521 //----------MemBar Instructions----------------------------------------------- 6522 // Memory barrier flavors 6523 6524 instruct membar_acquire() %{ 6525 match(MemBarAcquire); 6526 match(LoadFence); 6527 ins_cost(400); 6528 6529 size(0); 6530 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6531 ins_encode(); 6532 ins_pipe(empty); 6533 %} 6534 6535 instruct membar_acquire_lock() %{ 6536 match(MemBarAcquireLock); 6537 ins_cost(0); 6538 6539 size(0); 6540 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6541 ins_encode( ); 6542 ins_pipe(empty); 6543 %} 6544 6545 instruct membar_release() %{ 6546 match(MemBarRelease); 6547 match(StoreFence); 6548 ins_cost(400); 6549 6550 size(0); 6551 format %{ "MEMBAR-release ! (empty encoding)" %} 6552 ins_encode( ); 6553 ins_pipe(empty); 6554 %} 6555 6556 instruct membar_release_lock() %{ 6557 match(MemBarReleaseLock); 6558 ins_cost(0); 6559 6560 size(0); 6561 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6562 ins_encode( ); 6563 ins_pipe(empty); 6564 %} 6565 6566 instruct membar_volatile(eFlagsReg cr) %{ 6567 match(MemBarVolatile); 6568 effect(KILL cr); 6569 ins_cost(400); 6570 6571 format %{ 6572 $$template 6573 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6574 %} 6575 ins_encode %{ 6576 __ membar(Assembler::StoreLoad); 6577 %} 6578 ins_pipe(pipe_slow); 6579 %} 6580 6581 instruct unnecessary_membar_volatile() %{ 6582 match(MemBarVolatile); 6583 predicate(Matcher::post_store_load_barrier(n)); 6584 ins_cost(0); 6585 6586 size(0); 6587 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6588 ins_encode( ); 6589 ins_pipe(empty); 6590 %} 6591 6592 instruct membar_storestore() %{ 6593 match(MemBarStoreStore); 6594 match(StoreStoreFence); 6595 ins_cost(0); 6596 6597 size(0); 6598 format %{ "MEMBAR-storestore (empty encoding)" %} 6599 ins_encode( ); 6600 ins_pipe(empty); 6601 %} 6602 6603 //----------Move Instructions-------------------------------------------------- 6604 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6605 match(Set dst (CastX2P src)); 6606 format %{ "# X2P $dst, $src" %} 6607 ins_encode( /*empty encoding*/ ); 6608 ins_cost(0); 6609 ins_pipe(empty); 6610 %} 6611 6612 instruct castP2X(rRegI dst, eRegP src ) %{ 6613 match(Set dst (CastP2X src)); 6614 ins_cost(50); 6615 format %{ "MOV $dst, $src\t# CastP2X" %} 6616 ins_encode( enc_Copy( dst, src) ); 6617 ins_pipe( ialu_reg_reg ); 6618 %} 6619 6620 //----------Conditional Move--------------------------------------------------- 6621 // Conditional move 6622 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6623 predicate(!VM_Version::supports_cmov() ); 6624 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6625 ins_cost(200); 6626 format %{ "J$cop,us skip\t# signed cmove\n\t" 6627 "MOV $dst,$src\n" 6628 "skip:" %} 6629 ins_encode %{ 6630 Label Lskip; 6631 // Invert sense of branch from sense of CMOV 6632 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6633 __ movl($dst$$Register, $src$$Register); 6634 __ bind(Lskip); 6635 %} 6636 ins_pipe( pipe_cmov_reg ); 6637 %} 6638 6639 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6640 predicate(!VM_Version::supports_cmov() ); 6641 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6642 ins_cost(200); 6643 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6644 "MOV $dst,$src\n" 6645 "skip:" %} 6646 ins_encode %{ 6647 Label Lskip; 6648 // Invert sense of branch from sense of CMOV 6649 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6650 __ movl($dst$$Register, $src$$Register); 6651 __ bind(Lskip); 6652 %} 6653 ins_pipe( pipe_cmov_reg ); 6654 %} 6655 6656 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6657 predicate(VM_Version::supports_cmov() ); 6658 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6659 ins_cost(200); 6660 format %{ "CMOV$cop $dst,$src" %} 6661 opcode(0x0F,0x40); 6662 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6663 ins_pipe( pipe_cmov_reg ); 6664 %} 6665 6666 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6667 predicate(VM_Version::supports_cmov() ); 6668 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6669 ins_cost(200); 6670 format %{ "CMOV$cop $dst,$src" %} 6671 opcode(0x0F,0x40); 6672 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6673 ins_pipe( pipe_cmov_reg ); 6674 %} 6675 6676 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6677 predicate(VM_Version::supports_cmov() ); 6678 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6679 ins_cost(200); 6680 expand %{ 6681 cmovI_regU(cop, cr, dst, src); 6682 %} 6683 %} 6684 6685 // Conditional move 6686 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6687 predicate(VM_Version::supports_cmov() ); 6688 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6689 ins_cost(250); 6690 format %{ "CMOV$cop $dst,$src" %} 6691 opcode(0x0F,0x40); 6692 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6693 ins_pipe( pipe_cmov_mem ); 6694 %} 6695 6696 // Conditional move 6697 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6698 predicate(VM_Version::supports_cmov() ); 6699 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6700 ins_cost(250); 6701 format %{ "CMOV$cop $dst,$src" %} 6702 opcode(0x0F,0x40); 6703 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6704 ins_pipe( pipe_cmov_mem ); 6705 %} 6706 6707 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6708 predicate(VM_Version::supports_cmov() ); 6709 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6710 ins_cost(250); 6711 expand %{ 6712 cmovI_memU(cop, cr, dst, src); 6713 %} 6714 %} 6715 6716 // Conditional move 6717 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6718 predicate(VM_Version::supports_cmov() ); 6719 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6720 ins_cost(200); 6721 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6722 opcode(0x0F,0x40); 6723 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6724 ins_pipe( pipe_cmov_reg ); 6725 %} 6726 6727 // Conditional move (non-P6 version) 6728 // Note: a CMoveP is generated for stubs and native wrappers 6729 // regardless of whether we are on a P6, so we 6730 // emulate a cmov here 6731 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6732 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6733 ins_cost(300); 6734 format %{ "Jn$cop skip\n\t" 6735 "MOV $dst,$src\t# pointer\n" 6736 "skip:" %} 6737 opcode(0x8b); 6738 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6739 ins_pipe( pipe_cmov_reg ); 6740 %} 6741 6742 // Conditional move 6743 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6744 predicate(VM_Version::supports_cmov() ); 6745 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6746 ins_cost(200); 6747 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6748 opcode(0x0F,0x40); 6749 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6750 ins_pipe( pipe_cmov_reg ); 6751 %} 6752 6753 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6754 predicate(VM_Version::supports_cmov() ); 6755 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6756 ins_cost(200); 6757 expand %{ 6758 cmovP_regU(cop, cr, dst, src); 6759 %} 6760 %} 6761 6762 // DISABLED: Requires the ADLC to emit a bottom_type call that 6763 // correctly meets the two pointer arguments; one is an incoming 6764 // register but the other is a memory operand. ALSO appears to 6765 // be buggy with implicit null checks. 6766 // 6767 //// Conditional move 6768 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6769 // predicate(VM_Version::supports_cmov() ); 6770 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6771 // ins_cost(250); 6772 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6773 // opcode(0x0F,0x40); 6774 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6775 // ins_pipe( pipe_cmov_mem ); 6776 //%} 6777 // 6778 //// Conditional move 6779 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6780 // predicate(VM_Version::supports_cmov() ); 6781 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6782 // ins_cost(250); 6783 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6784 // opcode(0x0F,0x40); 6785 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6786 // ins_pipe( pipe_cmov_mem ); 6787 //%} 6788 6789 // Conditional move 6790 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6791 predicate(UseSSE<=1); 6792 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6793 ins_cost(200); 6794 format %{ "FCMOV$cop $dst,$src\t# double" %} 6795 opcode(0xDA); 6796 ins_encode( enc_cmov_dpr(cop,src) ); 6797 ins_pipe( pipe_cmovDPR_reg ); 6798 %} 6799 6800 // Conditional move 6801 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6802 predicate(UseSSE==0); 6803 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6804 ins_cost(200); 6805 format %{ "FCMOV$cop $dst,$src\t# float" %} 6806 opcode(0xDA); 6807 ins_encode( enc_cmov_dpr(cop,src) ); 6808 ins_pipe( pipe_cmovDPR_reg ); 6809 %} 6810 6811 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6812 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6813 predicate(UseSSE<=1); 6814 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6815 ins_cost(200); 6816 format %{ "Jn$cop skip\n\t" 6817 "MOV $dst,$src\t# double\n" 6818 "skip:" %} 6819 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6820 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6821 ins_pipe( pipe_cmovDPR_reg ); 6822 %} 6823 6824 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6825 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6826 predicate(UseSSE==0); 6827 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6828 ins_cost(200); 6829 format %{ "Jn$cop skip\n\t" 6830 "MOV $dst,$src\t# float\n" 6831 "skip:" %} 6832 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6833 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6834 ins_pipe( pipe_cmovDPR_reg ); 6835 %} 6836 6837 // No CMOVE with SSE/SSE2 6838 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6839 predicate (UseSSE>=1); 6840 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6841 ins_cost(200); 6842 format %{ "Jn$cop skip\n\t" 6843 "MOVSS $dst,$src\t# float\n" 6844 "skip:" %} 6845 ins_encode %{ 6846 Label skip; 6847 // Invert sense of branch from sense of CMOV 6848 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6849 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6850 __ bind(skip); 6851 %} 6852 ins_pipe( pipe_slow ); 6853 %} 6854 6855 // No CMOVE with SSE/SSE2 6856 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6857 predicate (UseSSE>=2); 6858 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6859 ins_cost(200); 6860 format %{ "Jn$cop skip\n\t" 6861 "MOVSD $dst,$src\t# float\n" 6862 "skip:" %} 6863 ins_encode %{ 6864 Label skip; 6865 // Invert sense of branch from sense of CMOV 6866 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6867 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6868 __ bind(skip); 6869 %} 6870 ins_pipe( pipe_slow ); 6871 %} 6872 6873 // unsigned version 6874 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6875 predicate (UseSSE>=1); 6876 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6877 ins_cost(200); 6878 format %{ "Jn$cop skip\n\t" 6879 "MOVSS $dst,$src\t# float\n" 6880 "skip:" %} 6881 ins_encode %{ 6882 Label skip; 6883 // Invert sense of branch from sense of CMOV 6884 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6885 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6886 __ bind(skip); 6887 %} 6888 ins_pipe( pipe_slow ); 6889 %} 6890 6891 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6892 predicate (UseSSE>=1); 6893 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6894 ins_cost(200); 6895 expand %{ 6896 fcmovF_regU(cop, cr, dst, src); 6897 %} 6898 %} 6899 6900 // unsigned version 6901 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6902 predicate (UseSSE>=2); 6903 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6904 ins_cost(200); 6905 format %{ "Jn$cop skip\n\t" 6906 "MOVSD $dst,$src\t# float\n" 6907 "skip:" %} 6908 ins_encode %{ 6909 Label skip; 6910 // Invert sense of branch from sense of CMOV 6911 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6912 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6913 __ bind(skip); 6914 %} 6915 ins_pipe( pipe_slow ); 6916 %} 6917 6918 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6919 predicate (UseSSE>=2); 6920 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6921 ins_cost(200); 6922 expand %{ 6923 fcmovD_regU(cop, cr, dst, src); 6924 %} 6925 %} 6926 6927 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6928 predicate(VM_Version::supports_cmov() ); 6929 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6930 ins_cost(200); 6931 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6932 "CMOV$cop $dst.hi,$src.hi" %} 6933 opcode(0x0F,0x40); 6934 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6935 ins_pipe( pipe_cmov_reg_long ); 6936 %} 6937 6938 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6939 predicate(VM_Version::supports_cmov() ); 6940 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6941 ins_cost(200); 6942 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6943 "CMOV$cop $dst.hi,$src.hi" %} 6944 opcode(0x0F,0x40); 6945 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6946 ins_pipe( pipe_cmov_reg_long ); 6947 %} 6948 6949 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6950 predicate(VM_Version::supports_cmov() ); 6951 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6952 ins_cost(200); 6953 expand %{ 6954 cmovL_regU(cop, cr, dst, src); 6955 %} 6956 %} 6957 6958 //----------Arithmetic Instructions-------------------------------------------- 6959 //----------Addition Instructions---------------------------------------------- 6960 6961 // Integer Addition Instructions 6962 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 6963 match(Set dst (AddI dst src)); 6964 effect(KILL cr); 6965 6966 size(2); 6967 format %{ "ADD $dst,$src" %} 6968 opcode(0x03); 6969 ins_encode( OpcP, RegReg( dst, src) ); 6970 ins_pipe( ialu_reg_reg ); 6971 %} 6972 6973 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 6974 match(Set dst (AddI dst src)); 6975 effect(KILL cr); 6976 6977 format %{ "ADD $dst,$src" %} 6978 opcode(0x81, 0x00); /* /0 id */ 6979 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 6980 ins_pipe( ialu_reg ); 6981 %} 6982 6983 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 6984 predicate(UseIncDec); 6985 match(Set dst (AddI dst src)); 6986 effect(KILL cr); 6987 6988 size(1); 6989 format %{ "INC $dst" %} 6990 opcode(0x40); /* */ 6991 ins_encode( Opc_plus( primary, dst ) ); 6992 ins_pipe( ialu_reg ); 6993 %} 6994 6995 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 6996 match(Set dst (AddI src0 src1)); 6997 ins_cost(110); 6998 6999 format %{ "LEA $dst,[$src0 + $src1]" %} 7000 opcode(0x8D); /* 0x8D /r */ 7001 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7002 ins_pipe( ialu_reg_reg ); 7003 %} 7004 7005 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7006 match(Set dst (AddP src0 src1)); 7007 ins_cost(110); 7008 7009 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7010 opcode(0x8D); /* 0x8D /r */ 7011 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7012 ins_pipe( ialu_reg_reg ); 7013 %} 7014 7015 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7016 predicate(UseIncDec); 7017 match(Set dst (AddI dst src)); 7018 effect(KILL cr); 7019 7020 size(1); 7021 format %{ "DEC $dst" %} 7022 opcode(0x48); /* */ 7023 ins_encode( Opc_plus( primary, dst ) ); 7024 ins_pipe( ialu_reg ); 7025 %} 7026 7027 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7028 match(Set dst (AddP dst src)); 7029 effect(KILL cr); 7030 7031 size(2); 7032 format %{ "ADD $dst,$src" %} 7033 opcode(0x03); 7034 ins_encode( OpcP, RegReg( dst, src) ); 7035 ins_pipe( ialu_reg_reg ); 7036 %} 7037 7038 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7039 match(Set dst (AddP dst src)); 7040 effect(KILL cr); 7041 7042 format %{ "ADD $dst,$src" %} 7043 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7044 // ins_encode( RegImm( dst, src) ); 7045 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7046 ins_pipe( ialu_reg ); 7047 %} 7048 7049 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7050 match(Set dst (AddI dst (LoadI src))); 7051 effect(KILL cr); 7052 7053 ins_cost(150); 7054 format %{ "ADD $dst,$src" %} 7055 opcode(0x03); 7056 ins_encode( OpcP, RegMem( dst, src) ); 7057 ins_pipe( ialu_reg_mem ); 7058 %} 7059 7060 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7061 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7062 effect(KILL cr); 7063 7064 ins_cost(150); 7065 format %{ "ADD $dst,$src" %} 7066 opcode(0x01); /* Opcode 01 /r */ 7067 ins_encode( OpcP, RegMem( src, dst ) ); 7068 ins_pipe( ialu_mem_reg ); 7069 %} 7070 7071 // Add Memory with Immediate 7072 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7073 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7074 effect(KILL cr); 7075 7076 ins_cost(125); 7077 format %{ "ADD $dst,$src" %} 7078 opcode(0x81); /* Opcode 81 /0 id */ 7079 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7080 ins_pipe( ialu_mem_imm ); 7081 %} 7082 7083 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7084 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7085 effect(KILL cr); 7086 7087 ins_cost(125); 7088 format %{ "INC $dst" %} 7089 opcode(0xFF); /* Opcode FF /0 */ 7090 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7091 ins_pipe( ialu_mem_imm ); 7092 %} 7093 7094 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7095 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7096 effect(KILL cr); 7097 7098 ins_cost(125); 7099 format %{ "DEC $dst" %} 7100 opcode(0xFF); /* Opcode FF /1 */ 7101 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7102 ins_pipe( ialu_mem_imm ); 7103 %} 7104 7105 7106 instruct checkCastPP( eRegP dst ) %{ 7107 match(Set dst (CheckCastPP dst)); 7108 7109 size(0); 7110 format %{ "#checkcastPP of $dst" %} 7111 ins_encode( /*empty encoding*/ ); 7112 ins_pipe( empty ); 7113 %} 7114 7115 instruct castPP( eRegP dst ) %{ 7116 match(Set dst (CastPP dst)); 7117 format %{ "#castPP of $dst" %} 7118 ins_encode( /*empty encoding*/ ); 7119 ins_pipe( empty ); 7120 %} 7121 7122 instruct castII( rRegI dst ) %{ 7123 match(Set dst (CastII dst)); 7124 format %{ "#castII of $dst" %} 7125 ins_encode( /*empty encoding*/ ); 7126 ins_cost(0); 7127 ins_pipe( empty ); 7128 %} 7129 7130 instruct castLL( eRegL dst ) %{ 7131 match(Set dst (CastLL dst)); 7132 format %{ "#castLL of $dst" %} 7133 ins_encode( /*empty encoding*/ ); 7134 ins_cost(0); 7135 ins_pipe( empty ); 7136 %} 7137 7138 instruct castFF( regF dst ) %{ 7139 predicate(UseSSE >= 1); 7140 match(Set dst (CastFF dst)); 7141 format %{ "#castFF of $dst" %} 7142 ins_encode( /*empty encoding*/ ); 7143 ins_cost(0); 7144 ins_pipe( empty ); 7145 %} 7146 7147 instruct castDD( regD dst ) %{ 7148 predicate(UseSSE >= 2); 7149 match(Set dst (CastDD dst)); 7150 format %{ "#castDD of $dst" %} 7151 ins_encode( /*empty encoding*/ ); 7152 ins_cost(0); 7153 ins_pipe( empty ); 7154 %} 7155 7156 instruct castFF_PR( regFPR dst ) %{ 7157 predicate(UseSSE < 1); 7158 match(Set dst (CastFF dst)); 7159 format %{ "#castFF of $dst" %} 7160 ins_encode( /*empty encoding*/ ); 7161 ins_cost(0); 7162 ins_pipe( empty ); 7163 %} 7164 7165 instruct castDD_PR( regDPR dst ) %{ 7166 predicate(UseSSE < 2); 7167 match(Set dst (CastDD dst)); 7168 format %{ "#castDD of $dst" %} 7169 ins_encode( /*empty encoding*/ ); 7170 ins_cost(0); 7171 ins_pipe( empty ); 7172 %} 7173 7174 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7175 7176 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7177 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7178 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7179 effect(KILL cr, KILL oldval); 7180 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7181 "MOV $res,0\n\t" 7182 "JNE,s fail\n\t" 7183 "MOV $res,1\n" 7184 "fail:" %} 7185 ins_encode( enc_cmpxchg8(mem_ptr), 7186 enc_flags_ne_to_boolean(res) ); 7187 ins_pipe( pipe_cmpxchg ); 7188 %} 7189 7190 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7191 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7192 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7193 effect(KILL cr, KILL oldval); 7194 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7195 "MOV $res,0\n\t" 7196 "JNE,s fail\n\t" 7197 "MOV $res,1\n" 7198 "fail:" %} 7199 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7200 ins_pipe( pipe_cmpxchg ); 7201 %} 7202 7203 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7204 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7205 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7206 effect(KILL cr, KILL oldval); 7207 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7208 "MOV $res,0\n\t" 7209 "JNE,s fail\n\t" 7210 "MOV $res,1\n" 7211 "fail:" %} 7212 ins_encode( enc_cmpxchgb(mem_ptr), 7213 enc_flags_ne_to_boolean(res) ); 7214 ins_pipe( pipe_cmpxchg ); 7215 %} 7216 7217 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7218 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7219 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7220 effect(KILL cr, KILL oldval); 7221 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7222 "MOV $res,0\n\t" 7223 "JNE,s fail\n\t" 7224 "MOV $res,1\n" 7225 "fail:" %} 7226 ins_encode( enc_cmpxchgw(mem_ptr), 7227 enc_flags_ne_to_boolean(res) ); 7228 ins_pipe( pipe_cmpxchg ); 7229 %} 7230 7231 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7232 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7233 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7234 effect(KILL cr, KILL oldval); 7235 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7236 "MOV $res,0\n\t" 7237 "JNE,s fail\n\t" 7238 "MOV $res,1\n" 7239 "fail:" %} 7240 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7241 ins_pipe( pipe_cmpxchg ); 7242 %} 7243 7244 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7245 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7246 effect(KILL cr); 7247 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7248 ins_encode( enc_cmpxchg8(mem_ptr) ); 7249 ins_pipe( pipe_cmpxchg ); 7250 %} 7251 7252 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7253 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7254 effect(KILL cr); 7255 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7256 ins_encode( enc_cmpxchg(mem_ptr) ); 7257 ins_pipe( pipe_cmpxchg ); 7258 %} 7259 7260 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7261 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7262 effect(KILL cr); 7263 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7264 ins_encode( enc_cmpxchgb(mem_ptr) ); 7265 ins_pipe( pipe_cmpxchg ); 7266 %} 7267 7268 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7269 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7270 effect(KILL cr); 7271 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7272 ins_encode( enc_cmpxchgw(mem_ptr) ); 7273 ins_pipe( pipe_cmpxchg ); 7274 %} 7275 7276 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7277 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7278 effect(KILL cr); 7279 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7280 ins_encode( enc_cmpxchg(mem_ptr) ); 7281 ins_pipe( pipe_cmpxchg ); 7282 %} 7283 7284 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7285 predicate(n->as_LoadStore()->result_not_used()); 7286 match(Set dummy (GetAndAddB mem add)); 7287 effect(KILL cr); 7288 format %{ "ADDB [$mem],$add" %} 7289 ins_encode %{ 7290 __ lock(); 7291 __ addb($mem$$Address, $add$$constant); 7292 %} 7293 ins_pipe( pipe_cmpxchg ); 7294 %} 7295 7296 // Important to match to xRegI: only 8-bit regs. 7297 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7298 match(Set newval (GetAndAddB mem newval)); 7299 effect(KILL cr); 7300 format %{ "XADDB [$mem],$newval" %} 7301 ins_encode %{ 7302 __ lock(); 7303 __ xaddb($mem$$Address, $newval$$Register); 7304 %} 7305 ins_pipe( pipe_cmpxchg ); 7306 %} 7307 7308 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7309 predicate(n->as_LoadStore()->result_not_used()); 7310 match(Set dummy (GetAndAddS mem add)); 7311 effect(KILL cr); 7312 format %{ "ADDS [$mem],$add" %} 7313 ins_encode %{ 7314 __ lock(); 7315 __ addw($mem$$Address, $add$$constant); 7316 %} 7317 ins_pipe( pipe_cmpxchg ); 7318 %} 7319 7320 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7321 match(Set newval (GetAndAddS mem newval)); 7322 effect(KILL cr); 7323 format %{ "XADDS [$mem],$newval" %} 7324 ins_encode %{ 7325 __ lock(); 7326 __ xaddw($mem$$Address, $newval$$Register); 7327 %} 7328 ins_pipe( pipe_cmpxchg ); 7329 %} 7330 7331 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7332 predicate(n->as_LoadStore()->result_not_used()); 7333 match(Set dummy (GetAndAddI mem add)); 7334 effect(KILL cr); 7335 format %{ "ADDL [$mem],$add" %} 7336 ins_encode %{ 7337 __ lock(); 7338 __ addl($mem$$Address, $add$$constant); 7339 %} 7340 ins_pipe( pipe_cmpxchg ); 7341 %} 7342 7343 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7344 match(Set newval (GetAndAddI mem newval)); 7345 effect(KILL cr); 7346 format %{ "XADDL [$mem],$newval" %} 7347 ins_encode %{ 7348 __ lock(); 7349 __ xaddl($mem$$Address, $newval$$Register); 7350 %} 7351 ins_pipe( pipe_cmpxchg ); 7352 %} 7353 7354 // Important to match to xRegI: only 8-bit regs. 7355 instruct xchgB( memory mem, xRegI newval) %{ 7356 match(Set newval (GetAndSetB mem newval)); 7357 format %{ "XCHGB $newval,[$mem]" %} 7358 ins_encode %{ 7359 __ xchgb($newval$$Register, $mem$$Address); 7360 %} 7361 ins_pipe( pipe_cmpxchg ); 7362 %} 7363 7364 instruct xchgS( memory mem, rRegI newval) %{ 7365 match(Set newval (GetAndSetS mem newval)); 7366 format %{ "XCHGW $newval,[$mem]" %} 7367 ins_encode %{ 7368 __ xchgw($newval$$Register, $mem$$Address); 7369 %} 7370 ins_pipe( pipe_cmpxchg ); 7371 %} 7372 7373 instruct xchgI( memory mem, rRegI newval) %{ 7374 match(Set newval (GetAndSetI mem newval)); 7375 format %{ "XCHGL $newval,[$mem]" %} 7376 ins_encode %{ 7377 __ xchgl($newval$$Register, $mem$$Address); 7378 %} 7379 ins_pipe( pipe_cmpxchg ); 7380 %} 7381 7382 instruct xchgP( memory mem, pRegP newval) %{ 7383 match(Set newval (GetAndSetP mem newval)); 7384 format %{ "XCHGL $newval,[$mem]" %} 7385 ins_encode %{ 7386 __ xchgl($newval$$Register, $mem$$Address); 7387 %} 7388 ins_pipe( pipe_cmpxchg ); 7389 %} 7390 7391 //----------Subtraction Instructions------------------------------------------- 7392 7393 // Integer Subtraction Instructions 7394 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7395 match(Set dst (SubI dst src)); 7396 effect(KILL cr); 7397 7398 size(2); 7399 format %{ "SUB $dst,$src" %} 7400 opcode(0x2B); 7401 ins_encode( OpcP, RegReg( dst, src) ); 7402 ins_pipe( ialu_reg_reg ); 7403 %} 7404 7405 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7406 match(Set dst (SubI dst src)); 7407 effect(KILL cr); 7408 7409 format %{ "SUB $dst,$src" %} 7410 opcode(0x81,0x05); /* Opcode 81 /5 */ 7411 // ins_encode( RegImm( dst, src) ); 7412 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7413 ins_pipe( ialu_reg ); 7414 %} 7415 7416 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7417 match(Set dst (SubI dst (LoadI src))); 7418 effect(KILL cr); 7419 7420 ins_cost(150); 7421 format %{ "SUB $dst,$src" %} 7422 opcode(0x2B); 7423 ins_encode( OpcP, RegMem( dst, src) ); 7424 ins_pipe( ialu_reg_mem ); 7425 %} 7426 7427 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7428 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7429 effect(KILL cr); 7430 7431 ins_cost(150); 7432 format %{ "SUB $dst,$src" %} 7433 opcode(0x29); /* Opcode 29 /r */ 7434 ins_encode( OpcP, RegMem( src, dst ) ); 7435 ins_pipe( ialu_mem_reg ); 7436 %} 7437 7438 // Subtract from a pointer 7439 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7440 match(Set dst (AddP dst (SubI zero src))); 7441 effect(KILL cr); 7442 7443 size(2); 7444 format %{ "SUB $dst,$src" %} 7445 opcode(0x2B); 7446 ins_encode( OpcP, RegReg( dst, src) ); 7447 ins_pipe( ialu_reg_reg ); 7448 %} 7449 7450 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7451 match(Set dst (SubI zero dst)); 7452 effect(KILL cr); 7453 7454 size(2); 7455 format %{ "NEG $dst" %} 7456 opcode(0xF7,0x03); // Opcode F7 /3 7457 ins_encode( OpcP, RegOpc( dst ) ); 7458 ins_pipe( ialu_reg ); 7459 %} 7460 7461 //----------Multiplication/Division Instructions------------------------------- 7462 // Integer Multiplication Instructions 7463 // Multiply Register 7464 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7465 match(Set dst (MulI dst src)); 7466 effect(KILL cr); 7467 7468 size(3); 7469 ins_cost(300); 7470 format %{ "IMUL $dst,$src" %} 7471 opcode(0xAF, 0x0F); 7472 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7473 ins_pipe( ialu_reg_reg_alu0 ); 7474 %} 7475 7476 // Multiply 32-bit Immediate 7477 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7478 match(Set dst (MulI src imm)); 7479 effect(KILL cr); 7480 7481 ins_cost(300); 7482 format %{ "IMUL $dst,$src,$imm" %} 7483 opcode(0x69); /* 69 /r id */ 7484 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7485 ins_pipe( ialu_reg_reg_alu0 ); 7486 %} 7487 7488 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7489 match(Set dst src); 7490 effect(KILL cr); 7491 7492 // Note that this is artificially increased to make it more expensive than loadConL 7493 ins_cost(250); 7494 format %{ "MOV EAX,$src\t// low word only" %} 7495 opcode(0xB8); 7496 ins_encode( LdImmL_Lo(dst, src) ); 7497 ins_pipe( ialu_reg_fat ); 7498 %} 7499 7500 // Multiply by 32-bit Immediate, taking the shifted high order results 7501 // (special case for shift by 32) 7502 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7503 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7504 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7505 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7506 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7507 effect(USE src1, KILL cr); 7508 7509 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7510 ins_cost(0*100 + 1*400 - 150); 7511 format %{ "IMUL EDX:EAX,$src1" %} 7512 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7513 ins_pipe( pipe_slow ); 7514 %} 7515 7516 // Multiply by 32-bit Immediate, taking the shifted high order results 7517 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7518 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7519 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7520 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7521 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7522 effect(USE src1, KILL cr); 7523 7524 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7525 ins_cost(1*100 + 1*400 - 150); 7526 format %{ "IMUL EDX:EAX,$src1\n\t" 7527 "SAR EDX,$cnt-32" %} 7528 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7529 ins_pipe( pipe_slow ); 7530 %} 7531 7532 // Multiply Memory 32-bit Immediate 7533 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7534 match(Set dst (MulI (LoadI src) imm)); 7535 effect(KILL cr); 7536 7537 ins_cost(300); 7538 format %{ "IMUL $dst,$src,$imm" %} 7539 opcode(0x69); /* 69 /r id */ 7540 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7541 ins_pipe( ialu_reg_mem_alu0 ); 7542 %} 7543 7544 // Multiply Memory 7545 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7546 match(Set dst (MulI dst (LoadI src))); 7547 effect(KILL cr); 7548 7549 ins_cost(350); 7550 format %{ "IMUL $dst,$src" %} 7551 opcode(0xAF, 0x0F); 7552 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7553 ins_pipe( ialu_reg_mem_alu0 ); 7554 %} 7555 7556 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7557 %{ 7558 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7559 effect(KILL cr, KILL src2); 7560 7561 expand %{ mulI_eReg(dst, src1, cr); 7562 mulI_eReg(src2, src3, cr); 7563 addI_eReg(dst, src2, cr); %} 7564 %} 7565 7566 // Multiply Register Int to Long 7567 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7568 // Basic Idea: long = (long)int * (long)int 7569 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7570 effect(DEF dst, USE src, USE src1, KILL flags); 7571 7572 ins_cost(300); 7573 format %{ "IMUL $dst,$src1" %} 7574 7575 ins_encode( long_int_multiply( dst, src1 ) ); 7576 ins_pipe( ialu_reg_reg_alu0 ); 7577 %} 7578 7579 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7580 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7581 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7582 effect(KILL flags); 7583 7584 ins_cost(300); 7585 format %{ "MUL $dst,$src1" %} 7586 7587 ins_encode( long_uint_multiply(dst, src1) ); 7588 ins_pipe( ialu_reg_reg_alu0 ); 7589 %} 7590 7591 // Multiply Register Long 7592 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7593 match(Set dst (MulL dst src)); 7594 effect(KILL cr, TEMP tmp); 7595 ins_cost(4*100+3*400); 7596 // Basic idea: lo(result) = lo(x_lo * y_lo) 7597 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7598 format %{ "MOV $tmp,$src.lo\n\t" 7599 "IMUL $tmp,EDX\n\t" 7600 "MOV EDX,$src.hi\n\t" 7601 "IMUL EDX,EAX\n\t" 7602 "ADD $tmp,EDX\n\t" 7603 "MUL EDX:EAX,$src.lo\n\t" 7604 "ADD EDX,$tmp" %} 7605 ins_encode( long_multiply( dst, src, tmp ) ); 7606 ins_pipe( pipe_slow ); 7607 %} 7608 7609 // Multiply Register Long where the left operand's high 32 bits are zero 7610 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7611 predicate(is_operand_hi32_zero(n->in(1))); 7612 match(Set dst (MulL dst src)); 7613 effect(KILL cr, TEMP tmp); 7614 ins_cost(2*100+2*400); 7615 // Basic idea: lo(result) = lo(x_lo * y_lo) 7616 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7617 format %{ "MOV $tmp,$src.hi\n\t" 7618 "IMUL $tmp,EAX\n\t" 7619 "MUL EDX:EAX,$src.lo\n\t" 7620 "ADD EDX,$tmp" %} 7621 ins_encode %{ 7622 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7623 __ imull($tmp$$Register, rax); 7624 __ mull($src$$Register); 7625 __ addl(rdx, $tmp$$Register); 7626 %} 7627 ins_pipe( pipe_slow ); 7628 %} 7629 7630 // Multiply Register Long where the right operand's high 32 bits are zero 7631 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7632 predicate(is_operand_hi32_zero(n->in(2))); 7633 match(Set dst (MulL dst src)); 7634 effect(KILL cr, TEMP tmp); 7635 ins_cost(2*100+2*400); 7636 // Basic idea: lo(result) = lo(x_lo * y_lo) 7637 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7638 format %{ "MOV $tmp,$src.lo\n\t" 7639 "IMUL $tmp,EDX\n\t" 7640 "MUL EDX:EAX,$src.lo\n\t" 7641 "ADD EDX,$tmp" %} 7642 ins_encode %{ 7643 __ movl($tmp$$Register, $src$$Register); 7644 __ imull($tmp$$Register, rdx); 7645 __ mull($src$$Register); 7646 __ addl(rdx, $tmp$$Register); 7647 %} 7648 ins_pipe( pipe_slow ); 7649 %} 7650 7651 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7652 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7653 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7654 match(Set dst (MulL dst src)); 7655 effect(KILL cr); 7656 ins_cost(1*400); 7657 // Basic idea: lo(result) = lo(x_lo * y_lo) 7658 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7659 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7660 ins_encode %{ 7661 __ mull($src$$Register); 7662 %} 7663 ins_pipe( pipe_slow ); 7664 %} 7665 7666 // Multiply Register Long by small constant 7667 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7668 match(Set dst (MulL dst src)); 7669 effect(KILL cr, TEMP tmp); 7670 ins_cost(2*100+2*400); 7671 size(12); 7672 // Basic idea: lo(result) = lo(src * EAX) 7673 // hi(result) = hi(src * EAX) + lo(src * EDX) 7674 format %{ "IMUL $tmp,EDX,$src\n\t" 7675 "MOV EDX,$src\n\t" 7676 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7677 "ADD EDX,$tmp" %} 7678 ins_encode( long_multiply_con( dst, src, tmp ) ); 7679 ins_pipe( pipe_slow ); 7680 %} 7681 7682 // Integer DIV with Register 7683 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7684 match(Set rax (DivI rax div)); 7685 effect(KILL rdx, KILL cr); 7686 size(26); 7687 ins_cost(30*100+10*100); 7688 format %{ "CMP EAX,0x80000000\n\t" 7689 "JNE,s normal\n\t" 7690 "XOR EDX,EDX\n\t" 7691 "CMP ECX,-1\n\t" 7692 "JE,s done\n" 7693 "normal: CDQ\n\t" 7694 "IDIV $div\n\t" 7695 "done:" %} 7696 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7697 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7698 ins_pipe( ialu_reg_reg_alu0 ); 7699 %} 7700 7701 // Divide Register Long 7702 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7703 match(Set dst (DivL src1 src2)); 7704 effect(CALL); 7705 ins_cost(10000); 7706 format %{ "PUSH $src1.hi\n\t" 7707 "PUSH $src1.lo\n\t" 7708 "PUSH $src2.hi\n\t" 7709 "PUSH $src2.lo\n\t" 7710 "CALL SharedRuntime::ldiv\n\t" 7711 "ADD ESP,16" %} 7712 ins_encode( long_div(src1,src2) ); 7713 ins_pipe( pipe_slow ); 7714 %} 7715 7716 // Integer DIVMOD with Register, both quotient and mod results 7717 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7718 match(DivModI rax div); 7719 effect(KILL cr); 7720 size(26); 7721 ins_cost(30*100+10*100); 7722 format %{ "CMP EAX,0x80000000\n\t" 7723 "JNE,s normal\n\t" 7724 "XOR EDX,EDX\n\t" 7725 "CMP ECX,-1\n\t" 7726 "JE,s done\n" 7727 "normal: CDQ\n\t" 7728 "IDIV $div\n\t" 7729 "done:" %} 7730 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7731 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7732 ins_pipe( pipe_slow ); 7733 %} 7734 7735 // Integer MOD with Register 7736 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7737 match(Set rdx (ModI rax div)); 7738 effect(KILL rax, KILL cr); 7739 7740 size(26); 7741 ins_cost(300); 7742 format %{ "CDQ\n\t" 7743 "IDIV $div" %} 7744 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7745 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7746 ins_pipe( ialu_reg_reg_alu0 ); 7747 %} 7748 7749 // Remainder Register Long 7750 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7751 match(Set dst (ModL src1 src2)); 7752 effect(CALL); 7753 ins_cost(10000); 7754 format %{ "PUSH $src1.hi\n\t" 7755 "PUSH $src1.lo\n\t" 7756 "PUSH $src2.hi\n\t" 7757 "PUSH $src2.lo\n\t" 7758 "CALL SharedRuntime::lrem\n\t" 7759 "ADD ESP,16" %} 7760 ins_encode( long_mod(src1,src2) ); 7761 ins_pipe( pipe_slow ); 7762 %} 7763 7764 // Divide Register Long (no special case since divisor != -1) 7765 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7766 match(Set dst (DivL dst imm)); 7767 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7768 ins_cost(1000); 7769 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7770 "XOR $tmp2,$tmp2\n\t" 7771 "CMP $tmp,EDX\n\t" 7772 "JA,s fast\n\t" 7773 "MOV $tmp2,EAX\n\t" 7774 "MOV EAX,EDX\n\t" 7775 "MOV EDX,0\n\t" 7776 "JLE,s pos\n\t" 7777 "LNEG EAX : $tmp2\n\t" 7778 "DIV $tmp # unsigned division\n\t" 7779 "XCHG EAX,$tmp2\n\t" 7780 "DIV $tmp\n\t" 7781 "LNEG $tmp2 : EAX\n\t" 7782 "JMP,s done\n" 7783 "pos:\n\t" 7784 "DIV $tmp\n\t" 7785 "XCHG EAX,$tmp2\n" 7786 "fast:\n\t" 7787 "DIV $tmp\n" 7788 "done:\n\t" 7789 "MOV EDX,$tmp2\n\t" 7790 "NEG EDX:EAX # if $imm < 0" %} 7791 ins_encode %{ 7792 int con = (int)$imm$$constant; 7793 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7794 int pcon = (con > 0) ? con : -con; 7795 Label Lfast, Lpos, Ldone; 7796 7797 __ movl($tmp$$Register, pcon); 7798 __ xorl($tmp2$$Register,$tmp2$$Register); 7799 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7800 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7801 7802 __ movl($tmp2$$Register, $dst$$Register); // save 7803 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7804 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7805 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7806 7807 // Negative dividend. 7808 // convert value to positive to use unsigned division 7809 __ lneg($dst$$Register, $tmp2$$Register); 7810 __ divl($tmp$$Register); 7811 __ xchgl($dst$$Register, $tmp2$$Register); 7812 __ divl($tmp$$Register); 7813 // revert result back to negative 7814 __ lneg($tmp2$$Register, $dst$$Register); 7815 __ jmpb(Ldone); 7816 7817 __ bind(Lpos); 7818 __ divl($tmp$$Register); // Use unsigned division 7819 __ xchgl($dst$$Register, $tmp2$$Register); 7820 // Fallthrow for final divide, tmp2 has 32 bit hi result 7821 7822 __ bind(Lfast); 7823 // fast path: src is positive 7824 __ divl($tmp$$Register); // Use unsigned division 7825 7826 __ bind(Ldone); 7827 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7828 if (con < 0) { 7829 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7830 } 7831 %} 7832 ins_pipe( pipe_slow ); 7833 %} 7834 7835 // Remainder Register Long (remainder fit into 32 bits) 7836 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7837 match(Set dst (ModL dst imm)); 7838 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7839 ins_cost(1000); 7840 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7841 "CMP $tmp,EDX\n\t" 7842 "JA,s fast\n\t" 7843 "MOV $tmp2,EAX\n\t" 7844 "MOV EAX,EDX\n\t" 7845 "MOV EDX,0\n\t" 7846 "JLE,s pos\n\t" 7847 "LNEG EAX : $tmp2\n\t" 7848 "DIV $tmp # unsigned division\n\t" 7849 "MOV EAX,$tmp2\n\t" 7850 "DIV $tmp\n\t" 7851 "NEG EDX\n\t" 7852 "JMP,s done\n" 7853 "pos:\n\t" 7854 "DIV $tmp\n\t" 7855 "MOV EAX,$tmp2\n" 7856 "fast:\n\t" 7857 "DIV $tmp\n" 7858 "done:\n\t" 7859 "MOV EAX,EDX\n\t" 7860 "SAR EDX,31\n\t" %} 7861 ins_encode %{ 7862 int con = (int)$imm$$constant; 7863 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7864 int pcon = (con > 0) ? con : -con; 7865 Label Lfast, Lpos, Ldone; 7866 7867 __ movl($tmp$$Register, pcon); 7868 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7869 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7870 7871 __ movl($tmp2$$Register, $dst$$Register); // save 7872 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7873 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7874 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7875 7876 // Negative dividend. 7877 // convert value to positive to use unsigned division 7878 __ lneg($dst$$Register, $tmp2$$Register); 7879 __ divl($tmp$$Register); 7880 __ movl($dst$$Register, $tmp2$$Register); 7881 __ divl($tmp$$Register); 7882 // revert remainder back to negative 7883 __ negl(HIGH_FROM_LOW($dst$$Register)); 7884 __ jmpb(Ldone); 7885 7886 __ bind(Lpos); 7887 __ divl($tmp$$Register); 7888 __ movl($dst$$Register, $tmp2$$Register); 7889 7890 __ bind(Lfast); 7891 // fast path: src is positive 7892 __ divl($tmp$$Register); 7893 7894 __ bind(Ldone); 7895 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7896 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7897 7898 %} 7899 ins_pipe( pipe_slow ); 7900 %} 7901 7902 // Integer Shift Instructions 7903 // Shift Left by one 7904 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7905 match(Set dst (LShiftI dst shift)); 7906 effect(KILL cr); 7907 7908 size(2); 7909 format %{ "SHL $dst,$shift" %} 7910 opcode(0xD1, 0x4); /* D1 /4 */ 7911 ins_encode( OpcP, RegOpc( dst ) ); 7912 ins_pipe( ialu_reg ); 7913 %} 7914 7915 // Shift Left by 8-bit immediate 7916 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7917 match(Set dst (LShiftI dst shift)); 7918 effect(KILL cr); 7919 7920 size(3); 7921 format %{ "SHL $dst,$shift" %} 7922 opcode(0xC1, 0x4); /* C1 /4 ib */ 7923 ins_encode( RegOpcImm( dst, shift) ); 7924 ins_pipe( ialu_reg ); 7925 %} 7926 7927 // Shift Left by variable 7928 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7929 match(Set dst (LShiftI dst shift)); 7930 effect(KILL cr); 7931 7932 size(2); 7933 format %{ "SHL $dst,$shift" %} 7934 opcode(0xD3, 0x4); /* D3 /4 */ 7935 ins_encode( OpcP, RegOpc( dst ) ); 7936 ins_pipe( ialu_reg_reg ); 7937 %} 7938 7939 // Arithmetic shift right by one 7940 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7941 match(Set dst (RShiftI dst shift)); 7942 effect(KILL cr); 7943 7944 size(2); 7945 format %{ "SAR $dst,$shift" %} 7946 opcode(0xD1, 0x7); /* D1 /7 */ 7947 ins_encode( OpcP, RegOpc( dst ) ); 7948 ins_pipe( ialu_reg ); 7949 %} 7950 7951 // Arithmetic shift right by one 7952 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 7953 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7954 effect(KILL cr); 7955 format %{ "SAR $dst,$shift" %} 7956 opcode(0xD1, 0x7); /* D1 /7 */ 7957 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7958 ins_pipe( ialu_mem_imm ); 7959 %} 7960 7961 // Arithmetic Shift Right by 8-bit immediate 7962 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7963 match(Set dst (RShiftI dst shift)); 7964 effect(KILL cr); 7965 7966 size(3); 7967 format %{ "SAR $dst,$shift" %} 7968 opcode(0xC1, 0x7); /* C1 /7 ib */ 7969 ins_encode( RegOpcImm( dst, shift ) ); 7970 ins_pipe( ialu_mem_imm ); 7971 %} 7972 7973 // Arithmetic Shift Right by 8-bit immediate 7974 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7975 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7976 effect(KILL cr); 7977 7978 format %{ "SAR $dst,$shift" %} 7979 opcode(0xC1, 0x7); /* C1 /7 ib */ 7980 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7981 ins_pipe( ialu_mem_imm ); 7982 %} 7983 7984 // Arithmetic Shift Right by variable 7985 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7986 match(Set dst (RShiftI dst shift)); 7987 effect(KILL cr); 7988 7989 size(2); 7990 format %{ "SAR $dst,$shift" %} 7991 opcode(0xD3, 0x7); /* D3 /7 */ 7992 ins_encode( OpcP, RegOpc( dst ) ); 7993 ins_pipe( ialu_reg_reg ); 7994 %} 7995 7996 // Logical shift right by one 7997 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7998 match(Set dst (URShiftI dst shift)); 7999 effect(KILL cr); 8000 8001 size(2); 8002 format %{ "SHR $dst,$shift" %} 8003 opcode(0xD1, 0x5); /* D1 /5 */ 8004 ins_encode( OpcP, RegOpc( dst ) ); 8005 ins_pipe( ialu_reg ); 8006 %} 8007 8008 // Logical Shift Right by 8-bit immediate 8009 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8010 match(Set dst (URShiftI dst shift)); 8011 effect(KILL cr); 8012 8013 size(3); 8014 format %{ "SHR $dst,$shift" %} 8015 opcode(0xC1, 0x5); /* C1 /5 ib */ 8016 ins_encode( RegOpcImm( dst, shift) ); 8017 ins_pipe( ialu_reg ); 8018 %} 8019 8020 8021 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8022 // This idiom is used by the compiler for the i2b bytecode. 8023 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8024 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8025 8026 size(3); 8027 format %{ "MOVSX $dst,$src :8" %} 8028 ins_encode %{ 8029 __ movsbl($dst$$Register, $src$$Register); 8030 %} 8031 ins_pipe(ialu_reg_reg); 8032 %} 8033 8034 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8035 // This idiom is used by the compiler the i2s bytecode. 8036 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8037 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8038 8039 size(3); 8040 format %{ "MOVSX $dst,$src :16" %} 8041 ins_encode %{ 8042 __ movswl($dst$$Register, $src$$Register); 8043 %} 8044 ins_pipe(ialu_reg_reg); 8045 %} 8046 8047 8048 // Logical Shift Right by variable 8049 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8050 match(Set dst (URShiftI dst shift)); 8051 effect(KILL cr); 8052 8053 size(2); 8054 format %{ "SHR $dst,$shift" %} 8055 opcode(0xD3, 0x5); /* D3 /5 */ 8056 ins_encode( OpcP, RegOpc( dst ) ); 8057 ins_pipe( ialu_reg_reg ); 8058 %} 8059 8060 8061 //----------Logical Instructions----------------------------------------------- 8062 //----------Integer Logical Instructions--------------------------------------- 8063 // And Instructions 8064 // And Register with Register 8065 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8066 match(Set dst (AndI dst src)); 8067 effect(KILL cr); 8068 8069 size(2); 8070 format %{ "AND $dst,$src" %} 8071 opcode(0x23); 8072 ins_encode( OpcP, RegReg( dst, src) ); 8073 ins_pipe( ialu_reg_reg ); 8074 %} 8075 8076 // And Register with Immediate 8077 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8078 match(Set dst (AndI dst src)); 8079 effect(KILL cr); 8080 8081 format %{ "AND $dst,$src" %} 8082 opcode(0x81,0x04); /* Opcode 81 /4 */ 8083 // ins_encode( RegImm( dst, src) ); 8084 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8085 ins_pipe( ialu_reg ); 8086 %} 8087 8088 // And Register with Memory 8089 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8090 match(Set dst (AndI dst (LoadI src))); 8091 effect(KILL cr); 8092 8093 ins_cost(150); 8094 format %{ "AND $dst,$src" %} 8095 opcode(0x23); 8096 ins_encode( OpcP, RegMem( dst, src) ); 8097 ins_pipe( ialu_reg_mem ); 8098 %} 8099 8100 // And Memory with Register 8101 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8102 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8103 effect(KILL cr); 8104 8105 ins_cost(150); 8106 format %{ "AND $dst,$src" %} 8107 opcode(0x21); /* Opcode 21 /r */ 8108 ins_encode( OpcP, RegMem( src, dst ) ); 8109 ins_pipe( ialu_mem_reg ); 8110 %} 8111 8112 // And Memory with Immediate 8113 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8114 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8115 effect(KILL cr); 8116 8117 ins_cost(125); 8118 format %{ "AND $dst,$src" %} 8119 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8120 // ins_encode( MemImm( dst, src) ); 8121 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8122 ins_pipe( ialu_mem_imm ); 8123 %} 8124 8125 // BMI1 instructions 8126 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8127 match(Set dst (AndI (XorI src1 minus_1) src2)); 8128 predicate(UseBMI1Instructions); 8129 effect(KILL cr); 8130 8131 format %{ "ANDNL $dst, $src1, $src2" %} 8132 8133 ins_encode %{ 8134 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8135 %} 8136 ins_pipe(ialu_reg); 8137 %} 8138 8139 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8140 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8141 predicate(UseBMI1Instructions); 8142 effect(KILL cr); 8143 8144 ins_cost(125); 8145 format %{ "ANDNL $dst, $src1, $src2" %} 8146 8147 ins_encode %{ 8148 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8149 %} 8150 ins_pipe(ialu_reg_mem); 8151 %} 8152 8153 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8154 match(Set dst (AndI (SubI imm_zero src) src)); 8155 predicate(UseBMI1Instructions); 8156 effect(KILL cr); 8157 8158 format %{ "BLSIL $dst, $src" %} 8159 8160 ins_encode %{ 8161 __ blsil($dst$$Register, $src$$Register); 8162 %} 8163 ins_pipe(ialu_reg); 8164 %} 8165 8166 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8167 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8168 predicate(UseBMI1Instructions); 8169 effect(KILL cr); 8170 8171 ins_cost(125); 8172 format %{ "BLSIL $dst, $src" %} 8173 8174 ins_encode %{ 8175 __ blsil($dst$$Register, $src$$Address); 8176 %} 8177 ins_pipe(ialu_reg_mem); 8178 %} 8179 8180 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8181 %{ 8182 match(Set dst (XorI (AddI src minus_1) src)); 8183 predicate(UseBMI1Instructions); 8184 effect(KILL cr); 8185 8186 format %{ "BLSMSKL $dst, $src" %} 8187 8188 ins_encode %{ 8189 __ blsmskl($dst$$Register, $src$$Register); 8190 %} 8191 8192 ins_pipe(ialu_reg); 8193 %} 8194 8195 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8196 %{ 8197 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8198 predicate(UseBMI1Instructions); 8199 effect(KILL cr); 8200 8201 ins_cost(125); 8202 format %{ "BLSMSKL $dst, $src" %} 8203 8204 ins_encode %{ 8205 __ blsmskl($dst$$Register, $src$$Address); 8206 %} 8207 8208 ins_pipe(ialu_reg_mem); 8209 %} 8210 8211 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8212 %{ 8213 match(Set dst (AndI (AddI src minus_1) src) ); 8214 predicate(UseBMI1Instructions); 8215 effect(KILL cr); 8216 8217 format %{ "BLSRL $dst, $src" %} 8218 8219 ins_encode %{ 8220 __ blsrl($dst$$Register, $src$$Register); 8221 %} 8222 8223 ins_pipe(ialu_reg); 8224 %} 8225 8226 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8227 %{ 8228 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8229 predicate(UseBMI1Instructions); 8230 effect(KILL cr); 8231 8232 ins_cost(125); 8233 format %{ "BLSRL $dst, $src" %} 8234 8235 ins_encode %{ 8236 __ blsrl($dst$$Register, $src$$Address); 8237 %} 8238 8239 ins_pipe(ialu_reg_mem); 8240 %} 8241 8242 // Or Instructions 8243 // Or Register with Register 8244 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8245 match(Set dst (OrI dst src)); 8246 effect(KILL cr); 8247 8248 size(2); 8249 format %{ "OR $dst,$src" %} 8250 opcode(0x0B); 8251 ins_encode( OpcP, RegReg( dst, src) ); 8252 ins_pipe( ialu_reg_reg ); 8253 %} 8254 8255 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8256 match(Set dst (OrI dst (CastP2X src))); 8257 effect(KILL cr); 8258 8259 size(2); 8260 format %{ "OR $dst,$src" %} 8261 opcode(0x0B); 8262 ins_encode( OpcP, RegReg( dst, src) ); 8263 ins_pipe( ialu_reg_reg ); 8264 %} 8265 8266 8267 // Or Register with Immediate 8268 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8269 match(Set dst (OrI dst src)); 8270 effect(KILL cr); 8271 8272 format %{ "OR $dst,$src" %} 8273 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8274 // ins_encode( RegImm( dst, src) ); 8275 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8276 ins_pipe( ialu_reg ); 8277 %} 8278 8279 // Or Register with Memory 8280 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8281 match(Set dst (OrI dst (LoadI src))); 8282 effect(KILL cr); 8283 8284 ins_cost(150); 8285 format %{ "OR $dst,$src" %} 8286 opcode(0x0B); 8287 ins_encode( OpcP, RegMem( dst, src) ); 8288 ins_pipe( ialu_reg_mem ); 8289 %} 8290 8291 // Or Memory with Register 8292 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8293 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8294 effect(KILL cr); 8295 8296 ins_cost(150); 8297 format %{ "OR $dst,$src" %} 8298 opcode(0x09); /* Opcode 09 /r */ 8299 ins_encode( OpcP, RegMem( src, dst ) ); 8300 ins_pipe( ialu_mem_reg ); 8301 %} 8302 8303 // Or Memory with Immediate 8304 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8305 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8306 effect(KILL cr); 8307 8308 ins_cost(125); 8309 format %{ "OR $dst,$src" %} 8310 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8311 // ins_encode( MemImm( dst, src) ); 8312 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8313 ins_pipe( ialu_mem_imm ); 8314 %} 8315 8316 // ROL/ROR 8317 // ROL expand 8318 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8319 effect(USE_DEF dst, USE shift, KILL cr); 8320 8321 format %{ "ROL $dst, $shift" %} 8322 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8323 ins_encode( OpcP, RegOpc( dst )); 8324 ins_pipe( ialu_reg ); 8325 %} 8326 8327 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8328 effect(USE_DEF dst, USE shift, KILL cr); 8329 8330 format %{ "ROL $dst, $shift" %} 8331 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8332 ins_encode( RegOpcImm(dst, shift) ); 8333 ins_pipe(ialu_reg); 8334 %} 8335 8336 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8337 effect(USE_DEF dst, USE shift, KILL cr); 8338 8339 format %{ "ROL $dst, $shift" %} 8340 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8341 ins_encode(OpcP, RegOpc(dst)); 8342 ins_pipe( ialu_reg_reg ); 8343 %} 8344 // end of ROL expand 8345 8346 // ROL 32bit by one once 8347 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8348 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8349 8350 expand %{ 8351 rolI_eReg_imm1(dst, lshift, cr); 8352 %} 8353 %} 8354 8355 // ROL 32bit var by imm8 once 8356 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8357 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8358 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8359 8360 expand %{ 8361 rolI_eReg_imm8(dst, lshift, cr); 8362 %} 8363 %} 8364 8365 // ROL 32bit var by var once 8366 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8367 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8368 8369 expand %{ 8370 rolI_eReg_CL(dst, shift, cr); 8371 %} 8372 %} 8373 8374 // ROL 32bit var by var once 8375 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8376 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8377 8378 expand %{ 8379 rolI_eReg_CL(dst, shift, cr); 8380 %} 8381 %} 8382 8383 // ROR expand 8384 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8385 effect(USE_DEF dst, USE shift, KILL cr); 8386 8387 format %{ "ROR $dst, $shift" %} 8388 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8389 ins_encode( OpcP, RegOpc( dst ) ); 8390 ins_pipe( ialu_reg ); 8391 %} 8392 8393 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8394 effect (USE_DEF dst, USE shift, KILL cr); 8395 8396 format %{ "ROR $dst, $shift" %} 8397 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8398 ins_encode( RegOpcImm(dst, shift) ); 8399 ins_pipe( ialu_reg ); 8400 %} 8401 8402 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8403 effect(USE_DEF dst, USE shift, KILL cr); 8404 8405 format %{ "ROR $dst, $shift" %} 8406 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8407 ins_encode(OpcP, RegOpc(dst)); 8408 ins_pipe( ialu_reg_reg ); 8409 %} 8410 // end of ROR expand 8411 8412 // ROR right once 8413 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8414 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8415 8416 expand %{ 8417 rorI_eReg_imm1(dst, rshift, cr); 8418 %} 8419 %} 8420 8421 // ROR 32bit by immI8 once 8422 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8423 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8424 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8425 8426 expand %{ 8427 rorI_eReg_imm8(dst, rshift, cr); 8428 %} 8429 %} 8430 8431 // ROR 32bit var by var once 8432 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8433 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8434 8435 expand %{ 8436 rorI_eReg_CL(dst, shift, cr); 8437 %} 8438 %} 8439 8440 // ROR 32bit var by var once 8441 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8442 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8443 8444 expand %{ 8445 rorI_eReg_CL(dst, shift, cr); 8446 %} 8447 %} 8448 8449 // Xor Instructions 8450 // Xor Register with Register 8451 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8452 match(Set dst (XorI dst src)); 8453 effect(KILL cr); 8454 8455 size(2); 8456 format %{ "XOR $dst,$src" %} 8457 opcode(0x33); 8458 ins_encode( OpcP, RegReg( dst, src) ); 8459 ins_pipe( ialu_reg_reg ); 8460 %} 8461 8462 // Xor Register with Immediate -1 8463 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8464 match(Set dst (XorI dst imm)); 8465 8466 size(2); 8467 format %{ "NOT $dst" %} 8468 ins_encode %{ 8469 __ notl($dst$$Register); 8470 %} 8471 ins_pipe( ialu_reg ); 8472 %} 8473 8474 // Xor Register with Immediate 8475 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8476 match(Set dst (XorI dst src)); 8477 effect(KILL cr); 8478 8479 format %{ "XOR $dst,$src" %} 8480 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8481 // ins_encode( RegImm( dst, src) ); 8482 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8483 ins_pipe( ialu_reg ); 8484 %} 8485 8486 // Xor Register with Memory 8487 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8488 match(Set dst (XorI dst (LoadI src))); 8489 effect(KILL cr); 8490 8491 ins_cost(150); 8492 format %{ "XOR $dst,$src" %} 8493 opcode(0x33); 8494 ins_encode( OpcP, RegMem(dst, src) ); 8495 ins_pipe( ialu_reg_mem ); 8496 %} 8497 8498 // Xor Memory with Register 8499 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8500 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8501 effect(KILL cr); 8502 8503 ins_cost(150); 8504 format %{ "XOR $dst,$src" %} 8505 opcode(0x31); /* Opcode 31 /r */ 8506 ins_encode( OpcP, RegMem( src, dst ) ); 8507 ins_pipe( ialu_mem_reg ); 8508 %} 8509 8510 // Xor Memory with Immediate 8511 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8512 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8513 effect(KILL cr); 8514 8515 ins_cost(125); 8516 format %{ "XOR $dst,$src" %} 8517 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8518 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8519 ins_pipe( ialu_mem_imm ); 8520 %} 8521 8522 //----------Convert Int to Boolean--------------------------------------------- 8523 8524 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8525 effect( DEF dst, USE src ); 8526 format %{ "MOV $dst,$src" %} 8527 ins_encode( enc_Copy( dst, src) ); 8528 ins_pipe( ialu_reg_reg ); 8529 %} 8530 8531 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8532 effect( USE_DEF dst, USE src, KILL cr ); 8533 8534 size(4); 8535 format %{ "NEG $dst\n\t" 8536 "ADC $dst,$src" %} 8537 ins_encode( neg_reg(dst), 8538 OpcRegReg(0x13,dst,src) ); 8539 ins_pipe( ialu_reg_reg_long ); 8540 %} 8541 8542 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8543 match(Set dst (Conv2B src)); 8544 8545 expand %{ 8546 movI_nocopy(dst,src); 8547 ci2b(dst,src,cr); 8548 %} 8549 %} 8550 8551 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8552 effect( DEF dst, USE src ); 8553 format %{ "MOV $dst,$src" %} 8554 ins_encode( enc_Copy( dst, src) ); 8555 ins_pipe( ialu_reg_reg ); 8556 %} 8557 8558 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8559 effect( USE_DEF dst, USE src, KILL cr ); 8560 format %{ "NEG $dst\n\t" 8561 "ADC $dst,$src" %} 8562 ins_encode( neg_reg(dst), 8563 OpcRegReg(0x13,dst,src) ); 8564 ins_pipe( ialu_reg_reg_long ); 8565 %} 8566 8567 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8568 match(Set dst (Conv2B src)); 8569 8570 expand %{ 8571 movP_nocopy(dst,src); 8572 cp2b(dst,src,cr); 8573 %} 8574 %} 8575 8576 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8577 match(Set dst (CmpLTMask p q)); 8578 effect(KILL cr); 8579 ins_cost(400); 8580 8581 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8582 format %{ "XOR $dst,$dst\n\t" 8583 "CMP $p,$q\n\t" 8584 "SETlt $dst\n\t" 8585 "NEG $dst" %} 8586 ins_encode %{ 8587 Register Rp = $p$$Register; 8588 Register Rq = $q$$Register; 8589 Register Rd = $dst$$Register; 8590 Label done; 8591 __ xorl(Rd, Rd); 8592 __ cmpl(Rp, Rq); 8593 __ setb(Assembler::less, Rd); 8594 __ negl(Rd); 8595 %} 8596 8597 ins_pipe(pipe_slow); 8598 %} 8599 8600 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8601 match(Set dst (CmpLTMask dst zero)); 8602 effect(DEF dst, KILL cr); 8603 ins_cost(100); 8604 8605 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8606 ins_encode %{ 8607 __ sarl($dst$$Register, 31); 8608 %} 8609 ins_pipe(ialu_reg); 8610 %} 8611 8612 /* better to save a register than avoid a branch */ 8613 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8614 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8615 effect(KILL cr); 8616 ins_cost(400); 8617 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8618 "JGE done\n\t" 8619 "ADD $p,$y\n" 8620 "done: " %} 8621 ins_encode %{ 8622 Register Rp = $p$$Register; 8623 Register Rq = $q$$Register; 8624 Register Ry = $y$$Register; 8625 Label done; 8626 __ subl(Rp, Rq); 8627 __ jccb(Assembler::greaterEqual, done); 8628 __ addl(Rp, Ry); 8629 __ bind(done); 8630 %} 8631 8632 ins_pipe(pipe_cmplt); 8633 %} 8634 8635 /* better to save a register than avoid a branch */ 8636 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8637 match(Set y (AndI (CmpLTMask p q) y)); 8638 effect(KILL cr); 8639 8640 ins_cost(300); 8641 8642 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8643 "JLT done\n\t" 8644 "XORL $y, $y\n" 8645 "done: " %} 8646 ins_encode %{ 8647 Register Rp = $p$$Register; 8648 Register Rq = $q$$Register; 8649 Register Ry = $y$$Register; 8650 Label done; 8651 __ cmpl(Rp, Rq); 8652 __ jccb(Assembler::less, done); 8653 __ xorl(Ry, Ry); 8654 __ bind(done); 8655 %} 8656 8657 ins_pipe(pipe_cmplt); 8658 %} 8659 8660 /* If I enable this, I encourage spilling in the inner loop of compress. 8661 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8662 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8663 */ 8664 //----------Overflow Math Instructions----------------------------------------- 8665 8666 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8667 %{ 8668 match(Set cr (OverflowAddI op1 op2)); 8669 effect(DEF cr, USE_KILL op1, USE op2); 8670 8671 format %{ "ADD $op1, $op2\t# overflow check int" %} 8672 8673 ins_encode %{ 8674 __ addl($op1$$Register, $op2$$Register); 8675 %} 8676 ins_pipe(ialu_reg_reg); 8677 %} 8678 8679 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8680 %{ 8681 match(Set cr (OverflowAddI op1 op2)); 8682 effect(DEF cr, USE_KILL op1, USE op2); 8683 8684 format %{ "ADD $op1, $op2\t# overflow check int" %} 8685 8686 ins_encode %{ 8687 __ addl($op1$$Register, $op2$$constant); 8688 %} 8689 ins_pipe(ialu_reg_reg); 8690 %} 8691 8692 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8693 %{ 8694 match(Set cr (OverflowSubI op1 op2)); 8695 8696 format %{ "CMP $op1, $op2\t# overflow check int" %} 8697 ins_encode %{ 8698 __ cmpl($op1$$Register, $op2$$Register); 8699 %} 8700 ins_pipe(ialu_reg_reg); 8701 %} 8702 8703 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8704 %{ 8705 match(Set cr (OverflowSubI op1 op2)); 8706 8707 format %{ "CMP $op1, $op2\t# overflow check int" %} 8708 ins_encode %{ 8709 __ cmpl($op1$$Register, $op2$$constant); 8710 %} 8711 ins_pipe(ialu_reg_reg); 8712 %} 8713 8714 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8715 %{ 8716 match(Set cr (OverflowSubI zero op2)); 8717 effect(DEF cr, USE_KILL op2); 8718 8719 format %{ "NEG $op2\t# overflow check int" %} 8720 ins_encode %{ 8721 __ negl($op2$$Register); 8722 %} 8723 ins_pipe(ialu_reg_reg); 8724 %} 8725 8726 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8727 %{ 8728 match(Set cr (OverflowMulI op1 op2)); 8729 effect(DEF cr, USE_KILL op1, USE op2); 8730 8731 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8732 ins_encode %{ 8733 __ imull($op1$$Register, $op2$$Register); 8734 %} 8735 ins_pipe(ialu_reg_reg_alu0); 8736 %} 8737 8738 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8739 %{ 8740 match(Set cr (OverflowMulI op1 op2)); 8741 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8742 8743 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8744 ins_encode %{ 8745 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8746 %} 8747 ins_pipe(ialu_reg_reg_alu0); 8748 %} 8749 8750 // Integer Absolute Instructions 8751 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8752 %{ 8753 match(Set dst (AbsI src)); 8754 effect(TEMP dst, TEMP tmp, KILL cr); 8755 format %{ "movl $tmp, $src\n\t" 8756 "sarl $tmp, 31\n\t" 8757 "movl $dst, $src\n\t" 8758 "xorl $dst, $tmp\n\t" 8759 "subl $dst, $tmp\n" 8760 %} 8761 ins_encode %{ 8762 __ movl($tmp$$Register, $src$$Register); 8763 __ sarl($tmp$$Register, 31); 8764 __ movl($dst$$Register, $src$$Register); 8765 __ xorl($dst$$Register, $tmp$$Register); 8766 __ subl($dst$$Register, $tmp$$Register); 8767 %} 8768 8769 ins_pipe(ialu_reg_reg); 8770 %} 8771 8772 //----------Long Instructions------------------------------------------------ 8773 // Add Long Register with Register 8774 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8775 match(Set dst (AddL dst src)); 8776 effect(KILL cr); 8777 ins_cost(200); 8778 format %{ "ADD $dst.lo,$src.lo\n\t" 8779 "ADC $dst.hi,$src.hi" %} 8780 opcode(0x03, 0x13); 8781 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8782 ins_pipe( ialu_reg_reg_long ); 8783 %} 8784 8785 // Add Long Register with Immediate 8786 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8787 match(Set dst (AddL dst src)); 8788 effect(KILL cr); 8789 format %{ "ADD $dst.lo,$src.lo\n\t" 8790 "ADC $dst.hi,$src.hi" %} 8791 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8792 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8793 ins_pipe( ialu_reg_long ); 8794 %} 8795 8796 // Add Long Register with Memory 8797 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8798 match(Set dst (AddL dst (LoadL mem))); 8799 effect(KILL cr); 8800 ins_cost(125); 8801 format %{ "ADD $dst.lo,$mem\n\t" 8802 "ADC $dst.hi,$mem+4" %} 8803 opcode(0x03, 0x13); 8804 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8805 ins_pipe( ialu_reg_long_mem ); 8806 %} 8807 8808 // Subtract Long Register with Register. 8809 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8810 match(Set dst (SubL dst src)); 8811 effect(KILL cr); 8812 ins_cost(200); 8813 format %{ "SUB $dst.lo,$src.lo\n\t" 8814 "SBB $dst.hi,$src.hi" %} 8815 opcode(0x2B, 0x1B); 8816 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8817 ins_pipe( ialu_reg_reg_long ); 8818 %} 8819 8820 // Subtract Long Register with Immediate 8821 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8822 match(Set dst (SubL dst src)); 8823 effect(KILL cr); 8824 format %{ "SUB $dst.lo,$src.lo\n\t" 8825 "SBB $dst.hi,$src.hi" %} 8826 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8827 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8828 ins_pipe( ialu_reg_long ); 8829 %} 8830 8831 // Subtract Long Register with Memory 8832 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8833 match(Set dst (SubL dst (LoadL mem))); 8834 effect(KILL cr); 8835 ins_cost(125); 8836 format %{ "SUB $dst.lo,$mem\n\t" 8837 "SBB $dst.hi,$mem+4" %} 8838 opcode(0x2B, 0x1B); 8839 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8840 ins_pipe( ialu_reg_long_mem ); 8841 %} 8842 8843 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8844 match(Set dst (SubL zero dst)); 8845 effect(KILL cr); 8846 ins_cost(300); 8847 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8848 ins_encode( neg_long(dst) ); 8849 ins_pipe( ialu_reg_reg_long ); 8850 %} 8851 8852 // And Long Register with Register 8853 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8854 match(Set dst (AndL dst src)); 8855 effect(KILL cr); 8856 format %{ "AND $dst.lo,$src.lo\n\t" 8857 "AND $dst.hi,$src.hi" %} 8858 opcode(0x23,0x23); 8859 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8860 ins_pipe( ialu_reg_reg_long ); 8861 %} 8862 8863 // And Long Register with Immediate 8864 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8865 match(Set dst (AndL dst src)); 8866 effect(KILL cr); 8867 format %{ "AND $dst.lo,$src.lo\n\t" 8868 "AND $dst.hi,$src.hi" %} 8869 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8870 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8871 ins_pipe( ialu_reg_long ); 8872 %} 8873 8874 // And Long Register with Memory 8875 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8876 match(Set dst (AndL dst (LoadL mem))); 8877 effect(KILL cr); 8878 ins_cost(125); 8879 format %{ "AND $dst.lo,$mem\n\t" 8880 "AND $dst.hi,$mem+4" %} 8881 opcode(0x23, 0x23); 8882 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8883 ins_pipe( ialu_reg_long_mem ); 8884 %} 8885 8886 // BMI1 instructions 8887 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8888 match(Set dst (AndL (XorL src1 minus_1) src2)); 8889 predicate(UseBMI1Instructions); 8890 effect(KILL cr, TEMP dst); 8891 8892 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8893 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8894 %} 8895 8896 ins_encode %{ 8897 Register Rdst = $dst$$Register; 8898 Register Rsrc1 = $src1$$Register; 8899 Register Rsrc2 = $src2$$Register; 8900 __ andnl(Rdst, Rsrc1, Rsrc2); 8901 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8902 %} 8903 ins_pipe(ialu_reg_reg_long); 8904 %} 8905 8906 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8907 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8908 predicate(UseBMI1Instructions); 8909 effect(KILL cr, TEMP dst); 8910 8911 ins_cost(125); 8912 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8913 "ANDNL $dst.hi, $src1.hi, $src2+4" 8914 %} 8915 8916 ins_encode %{ 8917 Register Rdst = $dst$$Register; 8918 Register Rsrc1 = $src1$$Register; 8919 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8920 8921 __ andnl(Rdst, Rsrc1, $src2$$Address); 8922 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8923 %} 8924 ins_pipe(ialu_reg_mem); 8925 %} 8926 8927 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8928 match(Set dst (AndL (SubL imm_zero src) src)); 8929 predicate(UseBMI1Instructions); 8930 effect(KILL cr, TEMP dst); 8931 8932 format %{ "MOVL $dst.hi, 0\n\t" 8933 "BLSIL $dst.lo, $src.lo\n\t" 8934 "JNZ done\n\t" 8935 "BLSIL $dst.hi, $src.hi\n" 8936 "done:" 8937 %} 8938 8939 ins_encode %{ 8940 Label done; 8941 Register Rdst = $dst$$Register; 8942 Register Rsrc = $src$$Register; 8943 __ movl(HIGH_FROM_LOW(Rdst), 0); 8944 __ blsil(Rdst, Rsrc); 8945 __ jccb(Assembler::notZero, done); 8946 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8947 __ bind(done); 8948 %} 8949 ins_pipe(ialu_reg); 8950 %} 8951 8952 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8953 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8954 predicate(UseBMI1Instructions); 8955 effect(KILL cr, TEMP dst); 8956 8957 ins_cost(125); 8958 format %{ "MOVL $dst.hi, 0\n\t" 8959 "BLSIL $dst.lo, $src\n\t" 8960 "JNZ done\n\t" 8961 "BLSIL $dst.hi, $src+4\n" 8962 "done:" 8963 %} 8964 8965 ins_encode %{ 8966 Label done; 8967 Register Rdst = $dst$$Register; 8968 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8969 8970 __ movl(HIGH_FROM_LOW(Rdst), 0); 8971 __ blsil(Rdst, $src$$Address); 8972 __ jccb(Assembler::notZero, done); 8973 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8974 __ bind(done); 8975 %} 8976 ins_pipe(ialu_reg_mem); 8977 %} 8978 8979 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8980 %{ 8981 match(Set dst (XorL (AddL src minus_1) src)); 8982 predicate(UseBMI1Instructions); 8983 effect(KILL cr, TEMP dst); 8984 8985 format %{ "MOVL $dst.hi, 0\n\t" 8986 "BLSMSKL $dst.lo, $src.lo\n\t" 8987 "JNC done\n\t" 8988 "BLSMSKL $dst.hi, $src.hi\n" 8989 "done:" 8990 %} 8991 8992 ins_encode %{ 8993 Label done; 8994 Register Rdst = $dst$$Register; 8995 Register Rsrc = $src$$Register; 8996 __ movl(HIGH_FROM_LOW(Rdst), 0); 8997 __ blsmskl(Rdst, Rsrc); 8998 __ jccb(Assembler::carryClear, done); 8999 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9000 __ bind(done); 9001 %} 9002 9003 ins_pipe(ialu_reg); 9004 %} 9005 9006 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9007 %{ 9008 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9009 predicate(UseBMI1Instructions); 9010 effect(KILL cr, TEMP dst); 9011 9012 ins_cost(125); 9013 format %{ "MOVL $dst.hi, 0\n\t" 9014 "BLSMSKL $dst.lo, $src\n\t" 9015 "JNC done\n\t" 9016 "BLSMSKL $dst.hi, $src+4\n" 9017 "done:" 9018 %} 9019 9020 ins_encode %{ 9021 Label done; 9022 Register Rdst = $dst$$Register; 9023 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9024 9025 __ movl(HIGH_FROM_LOW(Rdst), 0); 9026 __ blsmskl(Rdst, $src$$Address); 9027 __ jccb(Assembler::carryClear, done); 9028 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9029 __ bind(done); 9030 %} 9031 9032 ins_pipe(ialu_reg_mem); 9033 %} 9034 9035 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9036 %{ 9037 match(Set dst (AndL (AddL src minus_1) src) ); 9038 predicate(UseBMI1Instructions); 9039 effect(KILL cr, TEMP dst); 9040 9041 format %{ "MOVL $dst.hi, $src.hi\n\t" 9042 "BLSRL $dst.lo, $src.lo\n\t" 9043 "JNC done\n\t" 9044 "BLSRL $dst.hi, $src.hi\n" 9045 "done:" 9046 %} 9047 9048 ins_encode %{ 9049 Label done; 9050 Register Rdst = $dst$$Register; 9051 Register Rsrc = $src$$Register; 9052 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9053 __ blsrl(Rdst, Rsrc); 9054 __ jccb(Assembler::carryClear, done); 9055 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9056 __ bind(done); 9057 %} 9058 9059 ins_pipe(ialu_reg); 9060 %} 9061 9062 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9063 %{ 9064 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9065 predicate(UseBMI1Instructions); 9066 effect(KILL cr, TEMP dst); 9067 9068 ins_cost(125); 9069 format %{ "MOVL $dst.hi, $src+4\n\t" 9070 "BLSRL $dst.lo, $src\n\t" 9071 "JNC done\n\t" 9072 "BLSRL $dst.hi, $src+4\n" 9073 "done:" 9074 %} 9075 9076 ins_encode %{ 9077 Label done; 9078 Register Rdst = $dst$$Register; 9079 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9080 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9081 __ blsrl(Rdst, $src$$Address); 9082 __ jccb(Assembler::carryClear, done); 9083 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9084 __ bind(done); 9085 %} 9086 9087 ins_pipe(ialu_reg_mem); 9088 %} 9089 9090 // Or Long Register with Register 9091 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9092 match(Set dst (OrL dst src)); 9093 effect(KILL cr); 9094 format %{ "OR $dst.lo,$src.lo\n\t" 9095 "OR $dst.hi,$src.hi" %} 9096 opcode(0x0B,0x0B); 9097 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9098 ins_pipe( ialu_reg_reg_long ); 9099 %} 9100 9101 // Or Long Register with Immediate 9102 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9103 match(Set dst (OrL dst src)); 9104 effect(KILL cr); 9105 format %{ "OR $dst.lo,$src.lo\n\t" 9106 "OR $dst.hi,$src.hi" %} 9107 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9108 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9109 ins_pipe( ialu_reg_long ); 9110 %} 9111 9112 // Or Long Register with Memory 9113 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9114 match(Set dst (OrL dst (LoadL mem))); 9115 effect(KILL cr); 9116 ins_cost(125); 9117 format %{ "OR $dst.lo,$mem\n\t" 9118 "OR $dst.hi,$mem+4" %} 9119 opcode(0x0B,0x0B); 9120 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9121 ins_pipe( ialu_reg_long_mem ); 9122 %} 9123 9124 // Xor Long Register with Register 9125 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9126 match(Set dst (XorL dst src)); 9127 effect(KILL cr); 9128 format %{ "XOR $dst.lo,$src.lo\n\t" 9129 "XOR $dst.hi,$src.hi" %} 9130 opcode(0x33,0x33); 9131 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9132 ins_pipe( ialu_reg_reg_long ); 9133 %} 9134 9135 // Xor Long Register with Immediate -1 9136 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9137 match(Set dst (XorL dst imm)); 9138 format %{ "NOT $dst.lo\n\t" 9139 "NOT $dst.hi" %} 9140 ins_encode %{ 9141 __ notl($dst$$Register); 9142 __ notl(HIGH_FROM_LOW($dst$$Register)); 9143 %} 9144 ins_pipe( ialu_reg_long ); 9145 %} 9146 9147 // Xor Long Register with Immediate 9148 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9149 match(Set dst (XorL dst src)); 9150 effect(KILL cr); 9151 format %{ "XOR $dst.lo,$src.lo\n\t" 9152 "XOR $dst.hi,$src.hi" %} 9153 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9154 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9155 ins_pipe( ialu_reg_long ); 9156 %} 9157 9158 // Xor Long Register with Memory 9159 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9160 match(Set dst (XorL dst (LoadL mem))); 9161 effect(KILL cr); 9162 ins_cost(125); 9163 format %{ "XOR $dst.lo,$mem\n\t" 9164 "XOR $dst.hi,$mem+4" %} 9165 opcode(0x33,0x33); 9166 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9167 ins_pipe( ialu_reg_long_mem ); 9168 %} 9169 9170 // Shift Left Long by 1 9171 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9172 predicate(UseNewLongLShift); 9173 match(Set dst (LShiftL dst cnt)); 9174 effect(KILL cr); 9175 ins_cost(100); 9176 format %{ "ADD $dst.lo,$dst.lo\n\t" 9177 "ADC $dst.hi,$dst.hi" %} 9178 ins_encode %{ 9179 __ addl($dst$$Register,$dst$$Register); 9180 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9181 %} 9182 ins_pipe( ialu_reg_long ); 9183 %} 9184 9185 // Shift Left Long by 2 9186 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9187 predicate(UseNewLongLShift); 9188 match(Set dst (LShiftL dst cnt)); 9189 effect(KILL cr); 9190 ins_cost(100); 9191 format %{ "ADD $dst.lo,$dst.lo\n\t" 9192 "ADC $dst.hi,$dst.hi\n\t" 9193 "ADD $dst.lo,$dst.lo\n\t" 9194 "ADC $dst.hi,$dst.hi" %} 9195 ins_encode %{ 9196 __ addl($dst$$Register,$dst$$Register); 9197 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9198 __ addl($dst$$Register,$dst$$Register); 9199 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9200 %} 9201 ins_pipe( ialu_reg_long ); 9202 %} 9203 9204 // Shift Left Long by 3 9205 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9206 predicate(UseNewLongLShift); 9207 match(Set dst (LShiftL dst cnt)); 9208 effect(KILL cr); 9209 ins_cost(100); 9210 format %{ "ADD $dst.lo,$dst.lo\n\t" 9211 "ADC $dst.hi,$dst.hi\n\t" 9212 "ADD $dst.lo,$dst.lo\n\t" 9213 "ADC $dst.hi,$dst.hi\n\t" 9214 "ADD $dst.lo,$dst.lo\n\t" 9215 "ADC $dst.hi,$dst.hi" %} 9216 ins_encode %{ 9217 __ addl($dst$$Register,$dst$$Register); 9218 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9219 __ addl($dst$$Register,$dst$$Register); 9220 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9221 __ addl($dst$$Register,$dst$$Register); 9222 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9223 %} 9224 ins_pipe( ialu_reg_long ); 9225 %} 9226 9227 // Shift Left Long by 1-31 9228 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9229 match(Set dst (LShiftL dst cnt)); 9230 effect(KILL cr); 9231 ins_cost(200); 9232 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9233 "SHL $dst.lo,$cnt" %} 9234 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9235 ins_encode( move_long_small_shift(dst,cnt) ); 9236 ins_pipe( ialu_reg_long ); 9237 %} 9238 9239 // Shift Left Long by 32-63 9240 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9241 match(Set dst (LShiftL dst cnt)); 9242 effect(KILL cr); 9243 ins_cost(300); 9244 format %{ "MOV $dst.hi,$dst.lo\n" 9245 "\tSHL $dst.hi,$cnt-32\n" 9246 "\tXOR $dst.lo,$dst.lo" %} 9247 opcode(0xC1, 0x4); /* C1 /4 ib */ 9248 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9249 ins_pipe( ialu_reg_long ); 9250 %} 9251 9252 // Shift Left Long by variable 9253 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9254 match(Set dst (LShiftL dst shift)); 9255 effect(KILL cr); 9256 ins_cost(500+200); 9257 size(17); 9258 format %{ "TEST $shift,32\n\t" 9259 "JEQ,s small\n\t" 9260 "MOV $dst.hi,$dst.lo\n\t" 9261 "XOR $dst.lo,$dst.lo\n" 9262 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9263 "SHL $dst.lo,$shift" %} 9264 ins_encode( shift_left_long( dst, shift ) ); 9265 ins_pipe( pipe_slow ); 9266 %} 9267 9268 // Shift Right Long by 1-31 9269 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9270 match(Set dst (URShiftL dst cnt)); 9271 effect(KILL cr); 9272 ins_cost(200); 9273 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9274 "SHR $dst.hi,$cnt" %} 9275 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9276 ins_encode( move_long_small_shift(dst,cnt) ); 9277 ins_pipe( ialu_reg_long ); 9278 %} 9279 9280 // Shift Right Long by 32-63 9281 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9282 match(Set dst (URShiftL dst cnt)); 9283 effect(KILL cr); 9284 ins_cost(300); 9285 format %{ "MOV $dst.lo,$dst.hi\n" 9286 "\tSHR $dst.lo,$cnt-32\n" 9287 "\tXOR $dst.hi,$dst.hi" %} 9288 opcode(0xC1, 0x5); /* C1 /5 ib */ 9289 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9290 ins_pipe( ialu_reg_long ); 9291 %} 9292 9293 // Shift Right Long by variable 9294 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9295 match(Set dst (URShiftL dst shift)); 9296 effect(KILL cr); 9297 ins_cost(600); 9298 size(17); 9299 format %{ "TEST $shift,32\n\t" 9300 "JEQ,s small\n\t" 9301 "MOV $dst.lo,$dst.hi\n\t" 9302 "XOR $dst.hi,$dst.hi\n" 9303 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9304 "SHR $dst.hi,$shift" %} 9305 ins_encode( shift_right_long( dst, shift ) ); 9306 ins_pipe( pipe_slow ); 9307 %} 9308 9309 // Shift Right Long by 1-31 9310 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9311 match(Set dst (RShiftL dst cnt)); 9312 effect(KILL cr); 9313 ins_cost(200); 9314 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9315 "SAR $dst.hi,$cnt" %} 9316 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9317 ins_encode( move_long_small_shift(dst,cnt) ); 9318 ins_pipe( ialu_reg_long ); 9319 %} 9320 9321 // Shift Right Long by 32-63 9322 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9323 match(Set dst (RShiftL dst cnt)); 9324 effect(KILL cr); 9325 ins_cost(300); 9326 format %{ "MOV $dst.lo,$dst.hi\n" 9327 "\tSAR $dst.lo,$cnt-32\n" 9328 "\tSAR $dst.hi,31" %} 9329 opcode(0xC1, 0x7); /* C1 /7 ib */ 9330 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9331 ins_pipe( ialu_reg_long ); 9332 %} 9333 9334 // Shift Right arithmetic Long by variable 9335 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9336 match(Set dst (RShiftL dst shift)); 9337 effect(KILL cr); 9338 ins_cost(600); 9339 size(18); 9340 format %{ "TEST $shift,32\n\t" 9341 "JEQ,s small\n\t" 9342 "MOV $dst.lo,$dst.hi\n\t" 9343 "SAR $dst.hi,31\n" 9344 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9345 "SAR $dst.hi,$shift" %} 9346 ins_encode( shift_right_arith_long( dst, shift ) ); 9347 ins_pipe( pipe_slow ); 9348 %} 9349 9350 9351 //----------Double Instructions------------------------------------------------ 9352 // Double Math 9353 9354 // Compare & branch 9355 9356 // P6 version of float compare, sets condition codes in EFLAGS 9357 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9358 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9359 match(Set cr (CmpD src1 src2)); 9360 effect(KILL rax); 9361 ins_cost(150); 9362 format %{ "FLD $src1\n\t" 9363 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9364 "JNP exit\n\t" 9365 "MOV ah,1 // saw a NaN, set CF\n\t" 9366 "SAHF\n" 9367 "exit:\tNOP // avoid branch to branch" %} 9368 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9369 ins_encode( Push_Reg_DPR(src1), 9370 OpcP, RegOpc(src2), 9371 cmpF_P6_fixup ); 9372 ins_pipe( pipe_slow ); 9373 %} 9374 9375 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9376 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9377 match(Set cr (CmpD src1 src2)); 9378 ins_cost(150); 9379 format %{ "FLD $src1\n\t" 9380 "FUCOMIP ST,$src2 // P6 instruction" %} 9381 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9382 ins_encode( Push_Reg_DPR(src1), 9383 OpcP, RegOpc(src2)); 9384 ins_pipe( pipe_slow ); 9385 %} 9386 9387 // Compare & branch 9388 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9389 predicate(UseSSE<=1); 9390 match(Set cr (CmpD src1 src2)); 9391 effect(KILL rax); 9392 ins_cost(200); 9393 format %{ "FLD $src1\n\t" 9394 "FCOMp $src2\n\t" 9395 "FNSTSW AX\n\t" 9396 "TEST AX,0x400\n\t" 9397 "JZ,s flags\n\t" 9398 "MOV AH,1\t# unordered treat as LT\n" 9399 "flags:\tSAHF" %} 9400 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9401 ins_encode( Push_Reg_DPR(src1), 9402 OpcP, RegOpc(src2), 9403 fpu_flags); 9404 ins_pipe( pipe_slow ); 9405 %} 9406 9407 // Compare vs zero into -1,0,1 9408 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9409 predicate(UseSSE<=1); 9410 match(Set dst (CmpD3 src1 zero)); 9411 effect(KILL cr, KILL rax); 9412 ins_cost(280); 9413 format %{ "FTSTD $dst,$src1" %} 9414 opcode(0xE4, 0xD9); 9415 ins_encode( Push_Reg_DPR(src1), 9416 OpcS, OpcP, PopFPU, 9417 CmpF_Result(dst)); 9418 ins_pipe( pipe_slow ); 9419 %} 9420 9421 // Compare into -1,0,1 9422 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9423 predicate(UseSSE<=1); 9424 match(Set dst (CmpD3 src1 src2)); 9425 effect(KILL cr, KILL rax); 9426 ins_cost(300); 9427 format %{ "FCMPD $dst,$src1,$src2" %} 9428 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9429 ins_encode( Push_Reg_DPR(src1), 9430 OpcP, RegOpc(src2), 9431 CmpF_Result(dst)); 9432 ins_pipe( pipe_slow ); 9433 %} 9434 9435 // float compare and set condition codes in EFLAGS by XMM regs 9436 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9437 predicate(UseSSE>=2); 9438 match(Set cr (CmpD src1 src2)); 9439 ins_cost(145); 9440 format %{ "UCOMISD $src1,$src2\n\t" 9441 "JNP,s exit\n\t" 9442 "PUSHF\t# saw NaN, set CF\n\t" 9443 "AND [rsp], #0xffffff2b\n\t" 9444 "POPF\n" 9445 "exit:" %} 9446 ins_encode %{ 9447 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9448 emit_cmpfp_fixup(_masm); 9449 %} 9450 ins_pipe( pipe_slow ); 9451 %} 9452 9453 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9454 predicate(UseSSE>=2); 9455 match(Set cr (CmpD src1 src2)); 9456 ins_cost(100); 9457 format %{ "UCOMISD $src1,$src2" %} 9458 ins_encode %{ 9459 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9460 %} 9461 ins_pipe( pipe_slow ); 9462 %} 9463 9464 // float compare and set condition codes in EFLAGS by XMM regs 9465 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9466 predicate(UseSSE>=2); 9467 match(Set cr (CmpD src1 (LoadD src2))); 9468 ins_cost(145); 9469 format %{ "UCOMISD $src1,$src2\n\t" 9470 "JNP,s exit\n\t" 9471 "PUSHF\t# saw NaN, set CF\n\t" 9472 "AND [rsp], #0xffffff2b\n\t" 9473 "POPF\n" 9474 "exit:" %} 9475 ins_encode %{ 9476 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9477 emit_cmpfp_fixup(_masm); 9478 %} 9479 ins_pipe( pipe_slow ); 9480 %} 9481 9482 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9483 predicate(UseSSE>=2); 9484 match(Set cr (CmpD src1 (LoadD src2))); 9485 ins_cost(100); 9486 format %{ "UCOMISD $src1,$src2" %} 9487 ins_encode %{ 9488 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9489 %} 9490 ins_pipe( pipe_slow ); 9491 %} 9492 9493 // Compare into -1,0,1 in XMM 9494 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9495 predicate(UseSSE>=2); 9496 match(Set dst (CmpD3 src1 src2)); 9497 effect(KILL cr); 9498 ins_cost(255); 9499 format %{ "UCOMISD $src1, $src2\n\t" 9500 "MOV $dst, #-1\n\t" 9501 "JP,s done\n\t" 9502 "JB,s done\n\t" 9503 "SETNE $dst\n\t" 9504 "MOVZB $dst, $dst\n" 9505 "done:" %} 9506 ins_encode %{ 9507 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9508 emit_cmpfp3(_masm, $dst$$Register); 9509 %} 9510 ins_pipe( pipe_slow ); 9511 %} 9512 9513 // Compare into -1,0,1 in XMM and memory 9514 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9515 predicate(UseSSE>=2); 9516 match(Set dst (CmpD3 src1 (LoadD src2))); 9517 effect(KILL cr); 9518 ins_cost(275); 9519 format %{ "UCOMISD $src1, $src2\n\t" 9520 "MOV $dst, #-1\n\t" 9521 "JP,s done\n\t" 9522 "JB,s done\n\t" 9523 "SETNE $dst\n\t" 9524 "MOVZB $dst, $dst\n" 9525 "done:" %} 9526 ins_encode %{ 9527 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9528 emit_cmpfp3(_masm, $dst$$Register); 9529 %} 9530 ins_pipe( pipe_slow ); 9531 %} 9532 9533 9534 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9535 predicate (UseSSE <=1); 9536 match(Set dst (SubD dst src)); 9537 9538 format %{ "FLD $src\n\t" 9539 "DSUBp $dst,ST" %} 9540 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9541 ins_cost(150); 9542 ins_encode( Push_Reg_DPR(src), 9543 OpcP, RegOpc(dst) ); 9544 ins_pipe( fpu_reg_reg ); 9545 %} 9546 9547 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9548 predicate (UseSSE <=1); 9549 match(Set dst (RoundDouble (SubD src1 src2))); 9550 ins_cost(250); 9551 9552 format %{ "FLD $src2\n\t" 9553 "DSUB ST,$src1\n\t" 9554 "FSTP_D $dst\t# D-round" %} 9555 opcode(0xD8, 0x5); 9556 ins_encode( Push_Reg_DPR(src2), 9557 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9558 ins_pipe( fpu_mem_reg_reg ); 9559 %} 9560 9561 9562 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9563 predicate (UseSSE <=1); 9564 match(Set dst (SubD dst (LoadD src))); 9565 ins_cost(150); 9566 9567 format %{ "FLD $src\n\t" 9568 "DSUBp $dst,ST" %} 9569 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9570 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9571 OpcP, RegOpc(dst) ); 9572 ins_pipe( fpu_reg_mem ); 9573 %} 9574 9575 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9576 predicate (UseSSE<=1); 9577 match(Set dst (AbsD src)); 9578 ins_cost(100); 9579 format %{ "FABS" %} 9580 opcode(0xE1, 0xD9); 9581 ins_encode( OpcS, OpcP ); 9582 ins_pipe( fpu_reg_reg ); 9583 %} 9584 9585 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9586 predicate(UseSSE<=1); 9587 match(Set dst (NegD src)); 9588 ins_cost(100); 9589 format %{ "FCHS" %} 9590 opcode(0xE0, 0xD9); 9591 ins_encode( OpcS, OpcP ); 9592 ins_pipe( fpu_reg_reg ); 9593 %} 9594 9595 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9596 predicate(UseSSE<=1); 9597 match(Set dst (AddD dst src)); 9598 format %{ "FLD $src\n\t" 9599 "DADD $dst,ST" %} 9600 size(4); 9601 ins_cost(150); 9602 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9603 ins_encode( Push_Reg_DPR(src), 9604 OpcP, RegOpc(dst) ); 9605 ins_pipe( fpu_reg_reg ); 9606 %} 9607 9608 9609 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9610 predicate(UseSSE<=1); 9611 match(Set dst (RoundDouble (AddD src1 src2))); 9612 ins_cost(250); 9613 9614 format %{ "FLD $src2\n\t" 9615 "DADD ST,$src1\n\t" 9616 "FSTP_D $dst\t# D-round" %} 9617 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9618 ins_encode( Push_Reg_DPR(src2), 9619 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9620 ins_pipe( fpu_mem_reg_reg ); 9621 %} 9622 9623 9624 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9625 predicate(UseSSE<=1); 9626 match(Set dst (AddD dst (LoadD src))); 9627 ins_cost(150); 9628 9629 format %{ "FLD $src\n\t" 9630 "DADDp $dst,ST" %} 9631 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9632 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9633 OpcP, RegOpc(dst) ); 9634 ins_pipe( fpu_reg_mem ); 9635 %} 9636 9637 // add-to-memory 9638 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9639 predicate(UseSSE<=1); 9640 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9641 ins_cost(150); 9642 9643 format %{ "FLD_D $dst\n\t" 9644 "DADD ST,$src\n\t" 9645 "FST_D $dst" %} 9646 opcode(0xDD, 0x0); 9647 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9648 Opcode(0xD8), RegOpc(src), 9649 set_instruction_start, 9650 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9651 ins_pipe( fpu_reg_mem ); 9652 %} 9653 9654 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9655 predicate(UseSSE<=1); 9656 match(Set dst (AddD dst con)); 9657 ins_cost(125); 9658 format %{ "FLD1\n\t" 9659 "DADDp $dst,ST" %} 9660 ins_encode %{ 9661 __ fld1(); 9662 __ faddp($dst$$reg); 9663 %} 9664 ins_pipe(fpu_reg); 9665 %} 9666 9667 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9668 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9669 match(Set dst (AddD dst con)); 9670 ins_cost(200); 9671 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9672 "DADDp $dst,ST" %} 9673 ins_encode %{ 9674 __ fld_d($constantaddress($con)); 9675 __ faddp($dst$$reg); 9676 %} 9677 ins_pipe(fpu_reg_mem); 9678 %} 9679 9680 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9681 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9682 match(Set dst (RoundDouble (AddD src con))); 9683 ins_cost(200); 9684 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9685 "DADD ST,$src\n\t" 9686 "FSTP_D $dst\t# D-round" %} 9687 ins_encode %{ 9688 __ fld_d($constantaddress($con)); 9689 __ fadd($src$$reg); 9690 __ fstp_d(Address(rsp, $dst$$disp)); 9691 %} 9692 ins_pipe(fpu_mem_reg_con); 9693 %} 9694 9695 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9696 predicate(UseSSE<=1); 9697 match(Set dst (MulD dst src)); 9698 format %{ "FLD $src\n\t" 9699 "DMULp $dst,ST" %} 9700 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9701 ins_cost(150); 9702 ins_encode( Push_Reg_DPR(src), 9703 OpcP, RegOpc(dst) ); 9704 ins_pipe( fpu_reg_reg ); 9705 %} 9706 9707 // Strict FP instruction biases argument before multiply then 9708 // biases result to avoid double rounding of subnormals. 9709 // 9710 // scale arg1 by multiplying arg1 by 2^(-15360) 9711 // load arg2 9712 // multiply scaled arg1 by arg2 9713 // rescale product by 2^(15360) 9714 // 9715 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9716 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9717 match(Set dst (MulD dst src)); 9718 ins_cost(1); // Select this instruction for all FP double multiplies 9719 9720 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9721 "DMULp $dst,ST\n\t" 9722 "FLD $src\n\t" 9723 "DMULp $dst,ST\n\t" 9724 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9725 "DMULp $dst,ST\n\t" %} 9726 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9727 ins_encode( strictfp_bias1(dst), 9728 Push_Reg_DPR(src), 9729 OpcP, RegOpc(dst), 9730 strictfp_bias2(dst) ); 9731 ins_pipe( fpu_reg_reg ); 9732 %} 9733 9734 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9735 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9736 match(Set dst (MulD dst con)); 9737 ins_cost(200); 9738 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9739 "DMULp $dst,ST" %} 9740 ins_encode %{ 9741 __ fld_d($constantaddress($con)); 9742 __ fmulp($dst$$reg); 9743 %} 9744 ins_pipe(fpu_reg_mem); 9745 %} 9746 9747 9748 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9749 predicate( UseSSE<=1 ); 9750 match(Set dst (MulD dst (LoadD src))); 9751 ins_cost(200); 9752 format %{ "FLD_D $src\n\t" 9753 "DMULp $dst,ST" %} 9754 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9755 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9756 OpcP, RegOpc(dst) ); 9757 ins_pipe( fpu_reg_mem ); 9758 %} 9759 9760 // 9761 // Cisc-alternate to reg-reg multiply 9762 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9763 predicate( UseSSE<=1 ); 9764 match(Set dst (MulD src (LoadD mem))); 9765 ins_cost(250); 9766 format %{ "FLD_D $mem\n\t" 9767 "DMUL ST,$src\n\t" 9768 "FSTP_D $dst" %} 9769 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9770 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9771 OpcReg_FPR(src), 9772 Pop_Reg_DPR(dst) ); 9773 ins_pipe( fpu_reg_reg_mem ); 9774 %} 9775 9776 9777 // MACRO3 -- addDPR a mulDPR 9778 // This instruction is a '2-address' instruction in that the result goes 9779 // back to src2. This eliminates a move from the macro; possibly the 9780 // register allocator will have to add it back (and maybe not). 9781 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9782 predicate( UseSSE<=1 ); 9783 match(Set src2 (AddD (MulD src0 src1) src2)); 9784 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9785 "DMUL ST,$src1\n\t" 9786 "DADDp $src2,ST" %} 9787 ins_cost(250); 9788 opcode(0xDD); /* LoadD DD /0 */ 9789 ins_encode( Push_Reg_FPR(src0), 9790 FMul_ST_reg(src1), 9791 FAddP_reg_ST(src2) ); 9792 ins_pipe( fpu_reg_reg_reg ); 9793 %} 9794 9795 9796 // MACRO3 -- subDPR a mulDPR 9797 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9798 predicate( UseSSE<=1 ); 9799 match(Set src2 (SubD (MulD src0 src1) src2)); 9800 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9801 "DMUL ST,$src1\n\t" 9802 "DSUBRp $src2,ST" %} 9803 ins_cost(250); 9804 ins_encode( Push_Reg_FPR(src0), 9805 FMul_ST_reg(src1), 9806 Opcode(0xDE), Opc_plus(0xE0,src2)); 9807 ins_pipe( fpu_reg_reg_reg ); 9808 %} 9809 9810 9811 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9812 predicate( UseSSE<=1 ); 9813 match(Set dst (DivD dst src)); 9814 9815 format %{ "FLD $src\n\t" 9816 "FDIVp $dst,ST" %} 9817 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9818 ins_cost(150); 9819 ins_encode( Push_Reg_DPR(src), 9820 OpcP, RegOpc(dst) ); 9821 ins_pipe( fpu_reg_reg ); 9822 %} 9823 9824 // Strict FP instruction biases argument before division then 9825 // biases result, to avoid double rounding of subnormals. 9826 // 9827 // scale dividend by multiplying dividend by 2^(-15360) 9828 // load divisor 9829 // divide scaled dividend by divisor 9830 // rescale quotient by 2^(15360) 9831 // 9832 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9833 predicate (UseSSE<=1); 9834 match(Set dst (DivD dst src)); 9835 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9836 ins_cost(01); 9837 9838 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9839 "DMULp $dst,ST\n\t" 9840 "FLD $src\n\t" 9841 "FDIVp $dst,ST\n\t" 9842 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9843 "DMULp $dst,ST\n\t" %} 9844 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9845 ins_encode( strictfp_bias1(dst), 9846 Push_Reg_DPR(src), 9847 OpcP, RegOpc(dst), 9848 strictfp_bias2(dst) ); 9849 ins_pipe( fpu_reg_reg ); 9850 %} 9851 9852 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9853 predicate(UseSSE<=1); 9854 match(Set dst (ModD dst src)); 9855 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9856 9857 format %{ "DMOD $dst,$src" %} 9858 ins_cost(250); 9859 ins_encode(Push_Reg_Mod_DPR(dst, src), 9860 emitModDPR(), 9861 Push_Result_Mod_DPR(src), 9862 Pop_Reg_DPR(dst)); 9863 ins_pipe( pipe_slow ); 9864 %} 9865 9866 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9867 predicate(UseSSE>=2); 9868 match(Set dst (ModD src0 src1)); 9869 effect(KILL rax, KILL cr); 9870 9871 format %{ "SUB ESP,8\t # DMOD\n" 9872 "\tMOVSD [ESP+0],$src1\n" 9873 "\tFLD_D [ESP+0]\n" 9874 "\tMOVSD [ESP+0],$src0\n" 9875 "\tFLD_D [ESP+0]\n" 9876 "loop:\tFPREM\n" 9877 "\tFWAIT\n" 9878 "\tFNSTSW AX\n" 9879 "\tSAHF\n" 9880 "\tJP loop\n" 9881 "\tFSTP_D [ESP+0]\n" 9882 "\tMOVSD $dst,[ESP+0]\n" 9883 "\tADD ESP,8\n" 9884 "\tFSTP ST0\t # Restore FPU Stack" 9885 %} 9886 ins_cost(250); 9887 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9888 ins_pipe( pipe_slow ); 9889 %} 9890 9891 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9892 predicate (UseSSE<=1); 9893 match(Set dst(AtanD dst src)); 9894 format %{ "DATA $dst,$src" %} 9895 opcode(0xD9, 0xF3); 9896 ins_encode( Push_Reg_DPR(src), 9897 OpcP, OpcS, RegOpc(dst) ); 9898 ins_pipe( pipe_slow ); 9899 %} 9900 9901 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9902 predicate (UseSSE>=2); 9903 match(Set dst(AtanD dst src)); 9904 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9905 format %{ "DATA $dst,$src" %} 9906 opcode(0xD9, 0xF3); 9907 ins_encode( Push_SrcD(src), 9908 OpcP, OpcS, Push_ResultD(dst) ); 9909 ins_pipe( pipe_slow ); 9910 %} 9911 9912 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9913 predicate (UseSSE<=1); 9914 match(Set dst (SqrtD src)); 9915 format %{ "DSQRT $dst,$src" %} 9916 opcode(0xFA, 0xD9); 9917 ins_encode( Push_Reg_DPR(src), 9918 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9919 ins_pipe( pipe_slow ); 9920 %} 9921 9922 //-------------Float Instructions------------------------------- 9923 // Float Math 9924 9925 // Code for float compare: 9926 // fcompp(); 9927 // fwait(); fnstsw_ax(); 9928 // sahf(); 9929 // movl(dst, unordered_result); 9930 // jcc(Assembler::parity, exit); 9931 // movl(dst, less_result); 9932 // jcc(Assembler::below, exit); 9933 // movl(dst, equal_result); 9934 // jcc(Assembler::equal, exit); 9935 // movl(dst, greater_result); 9936 // exit: 9937 9938 // P6 version of float compare, sets condition codes in EFLAGS 9939 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9940 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9941 match(Set cr (CmpF src1 src2)); 9942 effect(KILL rax); 9943 ins_cost(150); 9944 format %{ "FLD $src1\n\t" 9945 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9946 "JNP exit\n\t" 9947 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9948 "SAHF\n" 9949 "exit:\tNOP // avoid branch to branch" %} 9950 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9951 ins_encode( Push_Reg_DPR(src1), 9952 OpcP, RegOpc(src2), 9953 cmpF_P6_fixup ); 9954 ins_pipe( pipe_slow ); 9955 %} 9956 9957 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9958 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9959 match(Set cr (CmpF src1 src2)); 9960 ins_cost(100); 9961 format %{ "FLD $src1\n\t" 9962 "FUCOMIP ST,$src2 // P6 instruction" %} 9963 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9964 ins_encode( Push_Reg_DPR(src1), 9965 OpcP, RegOpc(src2)); 9966 ins_pipe( pipe_slow ); 9967 %} 9968 9969 9970 // Compare & branch 9971 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9972 predicate(UseSSE == 0); 9973 match(Set cr (CmpF src1 src2)); 9974 effect(KILL rax); 9975 ins_cost(200); 9976 format %{ "FLD $src1\n\t" 9977 "FCOMp $src2\n\t" 9978 "FNSTSW AX\n\t" 9979 "TEST AX,0x400\n\t" 9980 "JZ,s flags\n\t" 9981 "MOV AH,1\t# unordered treat as LT\n" 9982 "flags:\tSAHF" %} 9983 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9984 ins_encode( Push_Reg_DPR(src1), 9985 OpcP, RegOpc(src2), 9986 fpu_flags); 9987 ins_pipe( pipe_slow ); 9988 %} 9989 9990 // Compare vs zero into -1,0,1 9991 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9992 predicate(UseSSE == 0); 9993 match(Set dst (CmpF3 src1 zero)); 9994 effect(KILL cr, KILL rax); 9995 ins_cost(280); 9996 format %{ "FTSTF $dst,$src1" %} 9997 opcode(0xE4, 0xD9); 9998 ins_encode( Push_Reg_DPR(src1), 9999 OpcS, OpcP, PopFPU, 10000 CmpF_Result(dst)); 10001 ins_pipe( pipe_slow ); 10002 %} 10003 10004 // Compare into -1,0,1 10005 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10006 predicate(UseSSE == 0); 10007 match(Set dst (CmpF3 src1 src2)); 10008 effect(KILL cr, KILL rax); 10009 ins_cost(300); 10010 format %{ "FCMPF $dst,$src1,$src2" %} 10011 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10012 ins_encode( Push_Reg_DPR(src1), 10013 OpcP, RegOpc(src2), 10014 CmpF_Result(dst)); 10015 ins_pipe( pipe_slow ); 10016 %} 10017 10018 // float compare and set condition codes in EFLAGS by XMM regs 10019 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10020 predicate(UseSSE>=1); 10021 match(Set cr (CmpF src1 src2)); 10022 ins_cost(145); 10023 format %{ "UCOMISS $src1,$src2\n\t" 10024 "JNP,s exit\n\t" 10025 "PUSHF\t# saw NaN, set CF\n\t" 10026 "AND [rsp], #0xffffff2b\n\t" 10027 "POPF\n" 10028 "exit:" %} 10029 ins_encode %{ 10030 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10031 emit_cmpfp_fixup(_masm); 10032 %} 10033 ins_pipe( pipe_slow ); 10034 %} 10035 10036 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10037 predicate(UseSSE>=1); 10038 match(Set cr (CmpF src1 src2)); 10039 ins_cost(100); 10040 format %{ "UCOMISS $src1,$src2" %} 10041 ins_encode %{ 10042 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10043 %} 10044 ins_pipe( pipe_slow ); 10045 %} 10046 10047 // float compare and set condition codes in EFLAGS by XMM regs 10048 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10049 predicate(UseSSE>=1); 10050 match(Set cr (CmpF src1 (LoadF src2))); 10051 ins_cost(165); 10052 format %{ "UCOMISS $src1,$src2\n\t" 10053 "JNP,s exit\n\t" 10054 "PUSHF\t# saw NaN, set CF\n\t" 10055 "AND [rsp], #0xffffff2b\n\t" 10056 "POPF\n" 10057 "exit:" %} 10058 ins_encode %{ 10059 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10060 emit_cmpfp_fixup(_masm); 10061 %} 10062 ins_pipe( pipe_slow ); 10063 %} 10064 10065 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10066 predicate(UseSSE>=1); 10067 match(Set cr (CmpF src1 (LoadF src2))); 10068 ins_cost(100); 10069 format %{ "UCOMISS $src1,$src2" %} 10070 ins_encode %{ 10071 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10072 %} 10073 ins_pipe( pipe_slow ); 10074 %} 10075 10076 // Compare into -1,0,1 in XMM 10077 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10078 predicate(UseSSE>=1); 10079 match(Set dst (CmpF3 src1 src2)); 10080 effect(KILL cr); 10081 ins_cost(255); 10082 format %{ "UCOMISS $src1, $src2\n\t" 10083 "MOV $dst, #-1\n\t" 10084 "JP,s done\n\t" 10085 "JB,s done\n\t" 10086 "SETNE $dst\n\t" 10087 "MOVZB $dst, $dst\n" 10088 "done:" %} 10089 ins_encode %{ 10090 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10091 emit_cmpfp3(_masm, $dst$$Register); 10092 %} 10093 ins_pipe( pipe_slow ); 10094 %} 10095 10096 // Compare into -1,0,1 in XMM and memory 10097 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10098 predicate(UseSSE>=1); 10099 match(Set dst (CmpF3 src1 (LoadF src2))); 10100 effect(KILL cr); 10101 ins_cost(275); 10102 format %{ "UCOMISS $src1, $src2\n\t" 10103 "MOV $dst, #-1\n\t" 10104 "JP,s done\n\t" 10105 "JB,s done\n\t" 10106 "SETNE $dst\n\t" 10107 "MOVZB $dst, $dst\n" 10108 "done:" %} 10109 ins_encode %{ 10110 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10111 emit_cmpfp3(_masm, $dst$$Register); 10112 %} 10113 ins_pipe( pipe_slow ); 10114 %} 10115 10116 // Spill to obtain 24-bit precision 10117 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10118 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10119 match(Set dst (SubF src1 src2)); 10120 10121 format %{ "FSUB $dst,$src1 - $src2" %} 10122 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10123 ins_encode( Push_Reg_FPR(src1), 10124 OpcReg_FPR(src2), 10125 Pop_Mem_FPR(dst) ); 10126 ins_pipe( fpu_mem_reg_reg ); 10127 %} 10128 // 10129 // This instruction does not round to 24-bits 10130 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10131 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10132 match(Set dst (SubF dst src)); 10133 10134 format %{ "FSUB $dst,$src" %} 10135 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10136 ins_encode( Push_Reg_FPR(src), 10137 OpcP, RegOpc(dst) ); 10138 ins_pipe( fpu_reg_reg ); 10139 %} 10140 10141 // Spill to obtain 24-bit precision 10142 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10143 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10144 match(Set dst (AddF src1 src2)); 10145 10146 format %{ "FADD $dst,$src1,$src2" %} 10147 opcode(0xD8, 0x0); /* D8 C0+i */ 10148 ins_encode( Push_Reg_FPR(src2), 10149 OpcReg_FPR(src1), 10150 Pop_Mem_FPR(dst) ); 10151 ins_pipe( fpu_mem_reg_reg ); 10152 %} 10153 // 10154 // This instruction does not round to 24-bits 10155 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10156 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10157 match(Set dst (AddF dst src)); 10158 10159 format %{ "FLD $src\n\t" 10160 "FADDp $dst,ST" %} 10161 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10162 ins_encode( Push_Reg_FPR(src), 10163 OpcP, RegOpc(dst) ); 10164 ins_pipe( fpu_reg_reg ); 10165 %} 10166 10167 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10168 predicate(UseSSE==0); 10169 match(Set dst (AbsF src)); 10170 ins_cost(100); 10171 format %{ "FABS" %} 10172 opcode(0xE1, 0xD9); 10173 ins_encode( OpcS, OpcP ); 10174 ins_pipe( fpu_reg_reg ); 10175 %} 10176 10177 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10178 predicate(UseSSE==0); 10179 match(Set dst (NegF src)); 10180 ins_cost(100); 10181 format %{ "FCHS" %} 10182 opcode(0xE0, 0xD9); 10183 ins_encode( OpcS, OpcP ); 10184 ins_pipe( fpu_reg_reg ); 10185 %} 10186 10187 // Cisc-alternate to addFPR_reg 10188 // Spill to obtain 24-bit precision 10189 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10190 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10191 match(Set dst (AddF src1 (LoadF src2))); 10192 10193 format %{ "FLD $src2\n\t" 10194 "FADD ST,$src1\n\t" 10195 "FSTP_S $dst" %} 10196 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10197 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10198 OpcReg_FPR(src1), 10199 Pop_Mem_FPR(dst) ); 10200 ins_pipe( fpu_mem_reg_mem ); 10201 %} 10202 // 10203 // Cisc-alternate to addFPR_reg 10204 // This instruction does not round to 24-bits 10205 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10206 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10207 match(Set dst (AddF dst (LoadF src))); 10208 10209 format %{ "FADD $dst,$src" %} 10210 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10211 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10212 OpcP, RegOpc(dst) ); 10213 ins_pipe( fpu_reg_mem ); 10214 %} 10215 10216 // // Following two instructions for _222_mpegaudio 10217 // Spill to obtain 24-bit precision 10218 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10219 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10220 match(Set dst (AddF src1 src2)); 10221 10222 format %{ "FADD $dst,$src1,$src2" %} 10223 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10224 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10225 OpcReg_FPR(src2), 10226 Pop_Mem_FPR(dst) ); 10227 ins_pipe( fpu_mem_reg_mem ); 10228 %} 10229 10230 // Cisc-spill variant 10231 // Spill to obtain 24-bit precision 10232 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10233 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10234 match(Set dst (AddF src1 (LoadF src2))); 10235 10236 format %{ "FADD $dst,$src1,$src2 cisc" %} 10237 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10238 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10239 set_instruction_start, 10240 OpcP, RMopc_Mem(secondary,src1), 10241 Pop_Mem_FPR(dst) ); 10242 ins_pipe( fpu_mem_mem_mem ); 10243 %} 10244 10245 // Spill to obtain 24-bit precision 10246 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10247 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10248 match(Set dst (AddF src1 src2)); 10249 10250 format %{ "FADD $dst,$src1,$src2" %} 10251 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10252 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10253 set_instruction_start, 10254 OpcP, RMopc_Mem(secondary,src1), 10255 Pop_Mem_FPR(dst) ); 10256 ins_pipe( fpu_mem_mem_mem ); 10257 %} 10258 10259 10260 // Spill to obtain 24-bit precision 10261 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10262 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10263 match(Set dst (AddF src con)); 10264 format %{ "FLD $src\n\t" 10265 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10266 "FSTP_S $dst" %} 10267 ins_encode %{ 10268 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10269 __ fadd_s($constantaddress($con)); 10270 __ fstp_s(Address(rsp, $dst$$disp)); 10271 %} 10272 ins_pipe(fpu_mem_reg_con); 10273 %} 10274 // 10275 // This instruction does not round to 24-bits 10276 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10277 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10278 match(Set dst (AddF src con)); 10279 format %{ "FLD $src\n\t" 10280 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10281 "FSTP $dst" %} 10282 ins_encode %{ 10283 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10284 __ fadd_s($constantaddress($con)); 10285 __ fstp_d($dst$$reg); 10286 %} 10287 ins_pipe(fpu_reg_reg_con); 10288 %} 10289 10290 // Spill to obtain 24-bit precision 10291 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10292 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10293 match(Set dst (MulF src1 src2)); 10294 10295 format %{ "FLD $src1\n\t" 10296 "FMUL $src2\n\t" 10297 "FSTP_S $dst" %} 10298 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10299 ins_encode( Push_Reg_FPR(src1), 10300 OpcReg_FPR(src2), 10301 Pop_Mem_FPR(dst) ); 10302 ins_pipe( fpu_mem_reg_reg ); 10303 %} 10304 // 10305 // This instruction does not round to 24-bits 10306 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10307 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10308 match(Set dst (MulF src1 src2)); 10309 10310 format %{ "FLD $src1\n\t" 10311 "FMUL $src2\n\t" 10312 "FSTP_S $dst" %} 10313 opcode(0xD8, 0x1); /* D8 C8+i */ 10314 ins_encode( Push_Reg_FPR(src2), 10315 OpcReg_FPR(src1), 10316 Pop_Reg_FPR(dst) ); 10317 ins_pipe( fpu_reg_reg_reg ); 10318 %} 10319 10320 10321 // Spill to obtain 24-bit precision 10322 // Cisc-alternate to reg-reg multiply 10323 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10324 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10325 match(Set dst (MulF src1 (LoadF src2))); 10326 10327 format %{ "FLD_S $src2\n\t" 10328 "FMUL $src1\n\t" 10329 "FSTP_S $dst" %} 10330 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10331 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10332 OpcReg_FPR(src1), 10333 Pop_Mem_FPR(dst) ); 10334 ins_pipe( fpu_mem_reg_mem ); 10335 %} 10336 // 10337 // This instruction does not round to 24-bits 10338 // Cisc-alternate to reg-reg multiply 10339 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10340 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10341 match(Set dst (MulF src1 (LoadF src2))); 10342 10343 format %{ "FMUL $dst,$src1,$src2" %} 10344 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10345 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10346 OpcReg_FPR(src1), 10347 Pop_Reg_FPR(dst) ); 10348 ins_pipe( fpu_reg_reg_mem ); 10349 %} 10350 10351 // Spill to obtain 24-bit precision 10352 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10353 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10354 match(Set dst (MulF src1 src2)); 10355 10356 format %{ "FMUL $dst,$src1,$src2" %} 10357 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10358 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10359 set_instruction_start, 10360 OpcP, RMopc_Mem(secondary,src1), 10361 Pop_Mem_FPR(dst) ); 10362 ins_pipe( fpu_mem_mem_mem ); 10363 %} 10364 10365 // Spill to obtain 24-bit precision 10366 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10367 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10368 match(Set dst (MulF src con)); 10369 10370 format %{ "FLD $src\n\t" 10371 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10372 "FSTP_S $dst" %} 10373 ins_encode %{ 10374 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10375 __ fmul_s($constantaddress($con)); 10376 __ fstp_s(Address(rsp, $dst$$disp)); 10377 %} 10378 ins_pipe(fpu_mem_reg_con); 10379 %} 10380 // 10381 // This instruction does not round to 24-bits 10382 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10383 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10384 match(Set dst (MulF src con)); 10385 10386 format %{ "FLD $src\n\t" 10387 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10388 "FSTP $dst" %} 10389 ins_encode %{ 10390 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10391 __ fmul_s($constantaddress($con)); 10392 __ fstp_d($dst$$reg); 10393 %} 10394 ins_pipe(fpu_reg_reg_con); 10395 %} 10396 10397 10398 // 10399 // MACRO1 -- subsume unshared load into mulFPR 10400 // This instruction does not round to 24-bits 10401 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10402 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10403 match(Set dst (MulF (LoadF mem1) src)); 10404 10405 format %{ "FLD $mem1 ===MACRO1===\n\t" 10406 "FMUL ST,$src\n\t" 10407 "FSTP $dst" %} 10408 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10409 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10410 OpcReg_FPR(src), 10411 Pop_Reg_FPR(dst) ); 10412 ins_pipe( fpu_reg_reg_mem ); 10413 %} 10414 // 10415 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10416 // This instruction does not round to 24-bits 10417 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10418 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10419 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10420 ins_cost(95); 10421 10422 format %{ "FLD $mem1 ===MACRO2===\n\t" 10423 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10424 "FADD ST,$src2\n\t" 10425 "FSTP $dst" %} 10426 opcode(0xD9); /* LoadF D9 /0 */ 10427 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10428 FMul_ST_reg(src1), 10429 FAdd_ST_reg(src2), 10430 Pop_Reg_FPR(dst) ); 10431 ins_pipe( fpu_reg_mem_reg_reg ); 10432 %} 10433 10434 // MACRO3 -- addFPR a mulFPR 10435 // This instruction does not round to 24-bits. It is a '2-address' 10436 // instruction in that the result goes back to src2. This eliminates 10437 // a move from the macro; possibly the register allocator will have 10438 // to add it back (and maybe not). 10439 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10440 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10441 match(Set src2 (AddF (MulF src0 src1) src2)); 10442 10443 format %{ "FLD $src0 ===MACRO3===\n\t" 10444 "FMUL ST,$src1\n\t" 10445 "FADDP $src2,ST" %} 10446 opcode(0xD9); /* LoadF D9 /0 */ 10447 ins_encode( Push_Reg_FPR(src0), 10448 FMul_ST_reg(src1), 10449 FAddP_reg_ST(src2) ); 10450 ins_pipe( fpu_reg_reg_reg ); 10451 %} 10452 10453 // MACRO4 -- divFPR subFPR 10454 // This instruction does not round to 24-bits 10455 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10456 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10457 match(Set dst (DivF (SubF src2 src1) src3)); 10458 10459 format %{ "FLD $src2 ===MACRO4===\n\t" 10460 "FSUB ST,$src1\n\t" 10461 "FDIV ST,$src3\n\t" 10462 "FSTP $dst" %} 10463 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10464 ins_encode( Push_Reg_FPR(src2), 10465 subFPR_divFPR_encode(src1,src3), 10466 Pop_Reg_FPR(dst) ); 10467 ins_pipe( fpu_reg_reg_reg_reg ); 10468 %} 10469 10470 // Spill to obtain 24-bit precision 10471 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10472 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10473 match(Set dst (DivF src1 src2)); 10474 10475 format %{ "FDIV $dst,$src1,$src2" %} 10476 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10477 ins_encode( Push_Reg_FPR(src1), 10478 OpcReg_FPR(src2), 10479 Pop_Mem_FPR(dst) ); 10480 ins_pipe( fpu_mem_reg_reg ); 10481 %} 10482 // 10483 // This instruction does not round to 24-bits 10484 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10485 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10486 match(Set dst (DivF dst src)); 10487 10488 format %{ "FDIV $dst,$src" %} 10489 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10490 ins_encode( Push_Reg_FPR(src), 10491 OpcP, RegOpc(dst) ); 10492 ins_pipe( fpu_reg_reg ); 10493 %} 10494 10495 10496 // Spill to obtain 24-bit precision 10497 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10498 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10499 match(Set dst (ModF src1 src2)); 10500 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10501 10502 format %{ "FMOD $dst,$src1,$src2" %} 10503 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10504 emitModDPR(), 10505 Push_Result_Mod_DPR(src2), 10506 Pop_Mem_FPR(dst)); 10507 ins_pipe( pipe_slow ); 10508 %} 10509 // 10510 // This instruction does not round to 24-bits 10511 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10512 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10513 match(Set dst (ModF dst src)); 10514 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10515 10516 format %{ "FMOD $dst,$src" %} 10517 ins_encode(Push_Reg_Mod_DPR(dst, src), 10518 emitModDPR(), 10519 Push_Result_Mod_DPR(src), 10520 Pop_Reg_FPR(dst)); 10521 ins_pipe( pipe_slow ); 10522 %} 10523 10524 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10525 predicate(UseSSE>=1); 10526 match(Set dst (ModF src0 src1)); 10527 effect(KILL rax, KILL cr); 10528 format %{ "SUB ESP,4\t # FMOD\n" 10529 "\tMOVSS [ESP+0],$src1\n" 10530 "\tFLD_S [ESP+0]\n" 10531 "\tMOVSS [ESP+0],$src0\n" 10532 "\tFLD_S [ESP+0]\n" 10533 "loop:\tFPREM\n" 10534 "\tFWAIT\n" 10535 "\tFNSTSW AX\n" 10536 "\tSAHF\n" 10537 "\tJP loop\n" 10538 "\tFSTP_S [ESP+0]\n" 10539 "\tMOVSS $dst,[ESP+0]\n" 10540 "\tADD ESP,4\n" 10541 "\tFSTP ST0\t # Restore FPU Stack" 10542 %} 10543 ins_cost(250); 10544 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10545 ins_pipe( pipe_slow ); 10546 %} 10547 10548 10549 //----------Arithmetic Conversion Instructions--------------------------------- 10550 // The conversions operations are all Alpha sorted. Please keep it that way! 10551 10552 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10553 predicate(UseSSE==0); 10554 match(Set dst (RoundFloat src)); 10555 ins_cost(125); 10556 format %{ "FST_S $dst,$src\t# F-round" %} 10557 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10558 ins_pipe( fpu_mem_reg ); 10559 %} 10560 10561 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10562 predicate(UseSSE<=1); 10563 match(Set dst (RoundDouble src)); 10564 ins_cost(125); 10565 format %{ "FST_D $dst,$src\t# D-round" %} 10566 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10567 ins_pipe( fpu_mem_reg ); 10568 %} 10569 10570 // Force rounding to 24-bit precision and 6-bit exponent 10571 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10572 predicate(UseSSE==0); 10573 match(Set dst (ConvD2F src)); 10574 format %{ "FST_S $dst,$src\t# F-round" %} 10575 expand %{ 10576 roundFloat_mem_reg(dst,src); 10577 %} 10578 %} 10579 10580 // Force rounding to 24-bit precision and 6-bit exponent 10581 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10582 predicate(UseSSE==1); 10583 match(Set dst (ConvD2F src)); 10584 effect( KILL cr ); 10585 format %{ "SUB ESP,4\n\t" 10586 "FST_S [ESP],$src\t# F-round\n\t" 10587 "MOVSS $dst,[ESP]\n\t" 10588 "ADD ESP,4" %} 10589 ins_encode %{ 10590 __ subptr(rsp, 4); 10591 if ($src$$reg != FPR1L_enc) { 10592 __ fld_s($src$$reg-1); 10593 __ fstp_s(Address(rsp, 0)); 10594 } else { 10595 __ fst_s(Address(rsp, 0)); 10596 } 10597 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10598 __ addptr(rsp, 4); 10599 %} 10600 ins_pipe( pipe_slow ); 10601 %} 10602 10603 // Force rounding double precision to single precision 10604 instruct convD2F_reg(regF dst, regD src) %{ 10605 predicate(UseSSE>=2); 10606 match(Set dst (ConvD2F src)); 10607 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10608 ins_encode %{ 10609 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10610 %} 10611 ins_pipe( pipe_slow ); 10612 %} 10613 10614 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10615 predicate(UseSSE==0); 10616 match(Set dst (ConvF2D src)); 10617 format %{ "FST_S $dst,$src\t# D-round" %} 10618 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10619 ins_pipe( fpu_reg_reg ); 10620 %} 10621 10622 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10623 predicate(UseSSE==1); 10624 match(Set dst (ConvF2D src)); 10625 format %{ "FST_D $dst,$src\t# D-round" %} 10626 expand %{ 10627 roundDouble_mem_reg(dst,src); 10628 %} 10629 %} 10630 10631 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10632 predicate(UseSSE==1); 10633 match(Set dst (ConvF2D src)); 10634 effect( KILL cr ); 10635 format %{ "SUB ESP,4\n\t" 10636 "MOVSS [ESP] $src\n\t" 10637 "FLD_S [ESP]\n\t" 10638 "ADD ESP,4\n\t" 10639 "FSTP $dst\t# D-round" %} 10640 ins_encode %{ 10641 __ subptr(rsp, 4); 10642 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10643 __ fld_s(Address(rsp, 0)); 10644 __ addptr(rsp, 4); 10645 __ fstp_d($dst$$reg); 10646 %} 10647 ins_pipe( pipe_slow ); 10648 %} 10649 10650 instruct convF2D_reg(regD dst, regF src) %{ 10651 predicate(UseSSE>=2); 10652 match(Set dst (ConvF2D src)); 10653 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10654 ins_encode %{ 10655 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10656 %} 10657 ins_pipe( pipe_slow ); 10658 %} 10659 10660 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10661 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10662 predicate(UseSSE<=1); 10663 match(Set dst (ConvD2I src)); 10664 effect( KILL tmp, KILL cr ); 10665 format %{ "FLD $src\t# Convert double to int \n\t" 10666 "FLDCW trunc mode\n\t" 10667 "SUB ESP,4\n\t" 10668 "FISTp [ESP + #0]\n\t" 10669 "FLDCW std/24-bit mode\n\t" 10670 "POP EAX\n\t" 10671 "CMP EAX,0x80000000\n\t" 10672 "JNE,s fast\n\t" 10673 "FLD_D $src\n\t" 10674 "CALL d2i_wrapper\n" 10675 "fast:" %} 10676 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10677 ins_pipe( pipe_slow ); 10678 %} 10679 10680 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10681 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10682 predicate(UseSSE>=2); 10683 match(Set dst (ConvD2I src)); 10684 effect( KILL tmp, KILL cr ); 10685 format %{ "CVTTSD2SI $dst, $src\n\t" 10686 "CMP $dst,0x80000000\n\t" 10687 "JNE,s fast\n\t" 10688 "SUB ESP, 8\n\t" 10689 "MOVSD [ESP], $src\n\t" 10690 "FLD_D [ESP]\n\t" 10691 "ADD ESP, 8\n\t" 10692 "CALL d2i_wrapper\n" 10693 "fast:" %} 10694 ins_encode %{ 10695 Label fast; 10696 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10697 __ cmpl($dst$$Register, 0x80000000); 10698 __ jccb(Assembler::notEqual, fast); 10699 __ subptr(rsp, 8); 10700 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10701 __ fld_d(Address(rsp, 0)); 10702 __ addptr(rsp, 8); 10703 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10704 __ post_call_nop(); 10705 __ bind(fast); 10706 %} 10707 ins_pipe( pipe_slow ); 10708 %} 10709 10710 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10711 predicate(UseSSE<=1); 10712 match(Set dst (ConvD2L src)); 10713 effect( KILL cr ); 10714 format %{ "FLD $src\t# Convert double to long\n\t" 10715 "FLDCW trunc mode\n\t" 10716 "SUB ESP,8\n\t" 10717 "FISTp [ESP + #0]\n\t" 10718 "FLDCW std/24-bit mode\n\t" 10719 "POP EAX\n\t" 10720 "POP EDX\n\t" 10721 "CMP EDX,0x80000000\n\t" 10722 "JNE,s fast\n\t" 10723 "TEST EAX,EAX\n\t" 10724 "JNE,s fast\n\t" 10725 "FLD $src\n\t" 10726 "CALL d2l_wrapper\n" 10727 "fast:" %} 10728 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10729 ins_pipe( pipe_slow ); 10730 %} 10731 10732 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10733 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10734 predicate (UseSSE>=2); 10735 match(Set dst (ConvD2L src)); 10736 effect( KILL cr ); 10737 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10738 "MOVSD [ESP],$src\n\t" 10739 "FLD_D [ESP]\n\t" 10740 "FLDCW trunc mode\n\t" 10741 "FISTp [ESP + #0]\n\t" 10742 "FLDCW std/24-bit mode\n\t" 10743 "POP EAX\n\t" 10744 "POP EDX\n\t" 10745 "CMP EDX,0x80000000\n\t" 10746 "JNE,s fast\n\t" 10747 "TEST EAX,EAX\n\t" 10748 "JNE,s fast\n\t" 10749 "SUB ESP,8\n\t" 10750 "MOVSD [ESP],$src\n\t" 10751 "FLD_D [ESP]\n\t" 10752 "ADD ESP,8\n\t" 10753 "CALL d2l_wrapper\n" 10754 "fast:" %} 10755 ins_encode %{ 10756 Label fast; 10757 __ subptr(rsp, 8); 10758 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10759 __ fld_d(Address(rsp, 0)); 10760 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10761 __ fistp_d(Address(rsp, 0)); 10762 // Restore the rounding mode, mask the exception 10763 if (Compile::current()->in_24_bit_fp_mode()) { 10764 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10765 } else { 10766 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10767 } 10768 // Load the converted long, adjust CPU stack 10769 __ pop(rax); 10770 __ pop(rdx); 10771 __ cmpl(rdx, 0x80000000); 10772 __ jccb(Assembler::notEqual, fast); 10773 __ testl(rax, rax); 10774 __ jccb(Assembler::notEqual, fast); 10775 __ subptr(rsp, 8); 10776 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10777 __ fld_d(Address(rsp, 0)); 10778 __ addptr(rsp, 8); 10779 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10780 __ post_call_nop(); 10781 __ bind(fast); 10782 %} 10783 ins_pipe( pipe_slow ); 10784 %} 10785 10786 // Convert a double to an int. Java semantics require we do complex 10787 // manglations in the corner cases. So we set the rounding mode to 10788 // 'zero', store the darned double down as an int, and reset the 10789 // rounding mode to 'nearest'. The hardware stores a flag value down 10790 // if we would overflow or converted a NAN; we check for this and 10791 // and go the slow path if needed. 10792 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10793 predicate(UseSSE==0); 10794 match(Set dst (ConvF2I src)); 10795 effect( KILL tmp, KILL cr ); 10796 format %{ "FLD $src\t# Convert float to int \n\t" 10797 "FLDCW trunc mode\n\t" 10798 "SUB ESP,4\n\t" 10799 "FISTp [ESP + #0]\n\t" 10800 "FLDCW std/24-bit mode\n\t" 10801 "POP EAX\n\t" 10802 "CMP EAX,0x80000000\n\t" 10803 "JNE,s fast\n\t" 10804 "FLD $src\n\t" 10805 "CALL d2i_wrapper\n" 10806 "fast:" %} 10807 // DPR2I_encoding works for FPR2I 10808 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10809 ins_pipe( pipe_slow ); 10810 %} 10811 10812 // Convert a float in xmm to an int reg. 10813 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10814 predicate(UseSSE>=1); 10815 match(Set dst (ConvF2I src)); 10816 effect( KILL tmp, KILL cr ); 10817 format %{ "CVTTSS2SI $dst, $src\n\t" 10818 "CMP $dst,0x80000000\n\t" 10819 "JNE,s fast\n\t" 10820 "SUB ESP, 4\n\t" 10821 "MOVSS [ESP], $src\n\t" 10822 "FLD [ESP]\n\t" 10823 "ADD ESP, 4\n\t" 10824 "CALL d2i_wrapper\n" 10825 "fast:" %} 10826 ins_encode %{ 10827 Label fast; 10828 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10829 __ cmpl($dst$$Register, 0x80000000); 10830 __ jccb(Assembler::notEqual, fast); 10831 __ subptr(rsp, 4); 10832 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10833 __ fld_s(Address(rsp, 0)); 10834 __ addptr(rsp, 4); 10835 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10836 __ post_call_nop(); 10837 __ bind(fast); 10838 %} 10839 ins_pipe( pipe_slow ); 10840 %} 10841 10842 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10843 predicate(UseSSE==0); 10844 match(Set dst (ConvF2L src)); 10845 effect( KILL cr ); 10846 format %{ "FLD $src\t# Convert float to long\n\t" 10847 "FLDCW trunc mode\n\t" 10848 "SUB ESP,8\n\t" 10849 "FISTp [ESP + #0]\n\t" 10850 "FLDCW std/24-bit mode\n\t" 10851 "POP EAX\n\t" 10852 "POP EDX\n\t" 10853 "CMP EDX,0x80000000\n\t" 10854 "JNE,s fast\n\t" 10855 "TEST EAX,EAX\n\t" 10856 "JNE,s fast\n\t" 10857 "FLD $src\n\t" 10858 "CALL d2l_wrapper\n" 10859 "fast:" %} 10860 // DPR2L_encoding works for FPR2L 10861 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10862 ins_pipe( pipe_slow ); 10863 %} 10864 10865 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10866 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10867 predicate (UseSSE>=1); 10868 match(Set dst (ConvF2L src)); 10869 effect( KILL cr ); 10870 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10871 "MOVSS [ESP],$src\n\t" 10872 "FLD_S [ESP]\n\t" 10873 "FLDCW trunc mode\n\t" 10874 "FISTp [ESP + #0]\n\t" 10875 "FLDCW std/24-bit mode\n\t" 10876 "POP EAX\n\t" 10877 "POP EDX\n\t" 10878 "CMP EDX,0x80000000\n\t" 10879 "JNE,s fast\n\t" 10880 "TEST EAX,EAX\n\t" 10881 "JNE,s fast\n\t" 10882 "SUB ESP,4\t# Convert float to long\n\t" 10883 "MOVSS [ESP],$src\n\t" 10884 "FLD_S [ESP]\n\t" 10885 "ADD ESP,4\n\t" 10886 "CALL d2l_wrapper\n" 10887 "fast:" %} 10888 ins_encode %{ 10889 Label fast; 10890 __ subptr(rsp, 8); 10891 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10892 __ fld_s(Address(rsp, 0)); 10893 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10894 __ fistp_d(Address(rsp, 0)); 10895 // Restore the rounding mode, mask the exception 10896 if (Compile::current()->in_24_bit_fp_mode()) { 10897 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10898 } else { 10899 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10900 } 10901 // Load the converted long, adjust CPU stack 10902 __ pop(rax); 10903 __ pop(rdx); 10904 __ cmpl(rdx, 0x80000000); 10905 __ jccb(Assembler::notEqual, fast); 10906 __ testl(rax, rax); 10907 __ jccb(Assembler::notEqual, fast); 10908 __ subptr(rsp, 4); 10909 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10910 __ fld_s(Address(rsp, 0)); 10911 __ addptr(rsp, 4); 10912 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10913 __ post_call_nop(); 10914 __ bind(fast); 10915 %} 10916 ins_pipe( pipe_slow ); 10917 %} 10918 10919 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10920 predicate( UseSSE<=1 ); 10921 match(Set dst (ConvI2D src)); 10922 format %{ "FILD $src\n\t" 10923 "FSTP $dst" %} 10924 opcode(0xDB, 0x0); /* DB /0 */ 10925 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10926 ins_pipe( fpu_reg_mem ); 10927 %} 10928 10929 instruct convI2D_reg(regD dst, rRegI src) %{ 10930 predicate( UseSSE>=2 && !UseXmmI2D ); 10931 match(Set dst (ConvI2D src)); 10932 format %{ "CVTSI2SD $dst,$src" %} 10933 ins_encode %{ 10934 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10935 %} 10936 ins_pipe( pipe_slow ); 10937 %} 10938 10939 instruct convI2D_mem(regD dst, memory mem) %{ 10940 predicate( UseSSE>=2 ); 10941 match(Set dst (ConvI2D (LoadI mem))); 10942 format %{ "CVTSI2SD $dst,$mem" %} 10943 ins_encode %{ 10944 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10945 %} 10946 ins_pipe( pipe_slow ); 10947 %} 10948 10949 instruct convXI2D_reg(regD dst, rRegI src) 10950 %{ 10951 predicate( UseSSE>=2 && UseXmmI2D ); 10952 match(Set dst (ConvI2D src)); 10953 10954 format %{ "MOVD $dst,$src\n\t" 10955 "CVTDQ2PD $dst,$dst\t# i2d" %} 10956 ins_encode %{ 10957 __ movdl($dst$$XMMRegister, $src$$Register); 10958 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10959 %} 10960 ins_pipe(pipe_slow); // XXX 10961 %} 10962 10963 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10964 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10965 match(Set dst (ConvI2D (LoadI mem))); 10966 format %{ "FILD $mem\n\t" 10967 "FSTP $dst" %} 10968 opcode(0xDB); /* DB /0 */ 10969 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10970 Pop_Reg_DPR(dst)); 10971 ins_pipe( fpu_reg_mem ); 10972 %} 10973 10974 // Convert a byte to a float; no rounding step needed. 10975 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10976 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10977 match(Set dst (ConvI2F src)); 10978 format %{ "FILD $src\n\t" 10979 "FSTP $dst" %} 10980 10981 opcode(0xDB, 0x0); /* DB /0 */ 10982 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10983 ins_pipe( fpu_reg_mem ); 10984 %} 10985 10986 // In 24-bit mode, force exponent rounding by storing back out 10987 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10988 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10989 match(Set dst (ConvI2F src)); 10990 ins_cost(200); 10991 format %{ "FILD $src\n\t" 10992 "FSTP_S $dst" %} 10993 opcode(0xDB, 0x0); /* DB /0 */ 10994 ins_encode( Push_Mem_I(src), 10995 Pop_Mem_FPR(dst)); 10996 ins_pipe( fpu_mem_mem ); 10997 %} 10998 10999 // In 24-bit mode, force exponent rounding by storing back out 11000 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11001 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11002 match(Set dst (ConvI2F (LoadI mem))); 11003 ins_cost(200); 11004 format %{ "FILD $mem\n\t" 11005 "FSTP_S $dst" %} 11006 opcode(0xDB); /* DB /0 */ 11007 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11008 Pop_Mem_FPR(dst)); 11009 ins_pipe( fpu_mem_mem ); 11010 %} 11011 11012 // This instruction does not round to 24-bits 11013 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11014 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11015 match(Set dst (ConvI2F src)); 11016 format %{ "FILD $src\n\t" 11017 "FSTP $dst" %} 11018 opcode(0xDB, 0x0); /* DB /0 */ 11019 ins_encode( Push_Mem_I(src), 11020 Pop_Reg_FPR(dst)); 11021 ins_pipe( fpu_reg_mem ); 11022 %} 11023 11024 // This instruction does not round to 24-bits 11025 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11026 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11027 match(Set dst (ConvI2F (LoadI mem))); 11028 format %{ "FILD $mem\n\t" 11029 "FSTP $dst" %} 11030 opcode(0xDB); /* DB /0 */ 11031 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11032 Pop_Reg_FPR(dst)); 11033 ins_pipe( fpu_reg_mem ); 11034 %} 11035 11036 // Convert an int to a float in xmm; no rounding step needed. 11037 instruct convI2F_reg(regF dst, rRegI src) %{ 11038 predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F )); 11039 match(Set dst (ConvI2F src)); 11040 format %{ "CVTSI2SS $dst, $src" %} 11041 ins_encode %{ 11042 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11043 %} 11044 ins_pipe( pipe_slow ); 11045 %} 11046 11047 instruct convXI2F_reg(regF dst, rRegI src) 11048 %{ 11049 predicate( UseSSE>=2 && UseXmmI2F ); 11050 match(Set dst (ConvI2F src)); 11051 11052 format %{ "MOVD $dst,$src\n\t" 11053 "CVTDQ2PS $dst,$dst\t# i2f" %} 11054 ins_encode %{ 11055 __ movdl($dst$$XMMRegister, $src$$Register); 11056 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11057 %} 11058 ins_pipe(pipe_slow); // XXX 11059 %} 11060 11061 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11062 match(Set dst (ConvI2L src)); 11063 effect(KILL cr); 11064 ins_cost(375); 11065 format %{ "MOV $dst.lo,$src\n\t" 11066 "MOV $dst.hi,$src\n\t" 11067 "SAR $dst.hi,31" %} 11068 ins_encode(convert_int_long(dst,src)); 11069 ins_pipe( ialu_reg_reg_long ); 11070 %} 11071 11072 // Zero-extend convert int to long 11073 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11074 match(Set dst (AndL (ConvI2L src) mask) ); 11075 effect( KILL flags ); 11076 ins_cost(250); 11077 format %{ "MOV $dst.lo,$src\n\t" 11078 "XOR $dst.hi,$dst.hi" %} 11079 opcode(0x33); // XOR 11080 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11081 ins_pipe( ialu_reg_reg_long ); 11082 %} 11083 11084 // Zero-extend long 11085 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11086 match(Set dst (AndL src mask) ); 11087 effect( KILL flags ); 11088 ins_cost(250); 11089 format %{ "MOV $dst.lo,$src.lo\n\t" 11090 "XOR $dst.hi,$dst.hi\n\t" %} 11091 opcode(0x33); // XOR 11092 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11093 ins_pipe( ialu_reg_reg_long ); 11094 %} 11095 11096 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11097 predicate (UseSSE<=1); 11098 match(Set dst (ConvL2D src)); 11099 effect( KILL cr ); 11100 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11101 "PUSH $src.lo\n\t" 11102 "FILD ST,[ESP + #0]\n\t" 11103 "ADD ESP,8\n\t" 11104 "FSTP_D $dst\t# D-round" %} 11105 opcode(0xDF, 0x5); /* DF /5 */ 11106 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11107 ins_pipe( pipe_slow ); 11108 %} 11109 11110 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11111 predicate (UseSSE>=2); 11112 match(Set dst (ConvL2D src)); 11113 effect( KILL cr ); 11114 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11115 "PUSH $src.lo\n\t" 11116 "FILD_D [ESP]\n\t" 11117 "FSTP_D [ESP]\n\t" 11118 "MOVSD $dst,[ESP]\n\t" 11119 "ADD ESP,8" %} 11120 opcode(0xDF, 0x5); /* DF /5 */ 11121 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11122 ins_pipe( pipe_slow ); 11123 %} 11124 11125 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11126 predicate (UseSSE>=1); 11127 match(Set dst (ConvL2F src)); 11128 effect( KILL cr ); 11129 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11130 "PUSH $src.lo\n\t" 11131 "FILD_D [ESP]\n\t" 11132 "FSTP_S [ESP]\n\t" 11133 "MOVSS $dst,[ESP]\n\t" 11134 "ADD ESP,8" %} 11135 opcode(0xDF, 0x5); /* DF /5 */ 11136 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11137 ins_pipe( pipe_slow ); 11138 %} 11139 11140 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11141 match(Set dst (ConvL2F src)); 11142 effect( KILL cr ); 11143 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11144 "PUSH $src.lo\n\t" 11145 "FILD ST,[ESP + #0]\n\t" 11146 "ADD ESP,8\n\t" 11147 "FSTP_S $dst\t# F-round" %} 11148 opcode(0xDF, 0x5); /* DF /5 */ 11149 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11150 ins_pipe( pipe_slow ); 11151 %} 11152 11153 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11154 match(Set dst (ConvL2I src)); 11155 effect( DEF dst, USE src ); 11156 format %{ "MOV $dst,$src.lo" %} 11157 ins_encode(enc_CopyL_Lo(dst,src)); 11158 ins_pipe( ialu_reg_reg ); 11159 %} 11160 11161 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11162 match(Set dst (MoveF2I src)); 11163 effect( DEF dst, USE src ); 11164 ins_cost(100); 11165 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11166 ins_encode %{ 11167 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11168 %} 11169 ins_pipe( ialu_reg_mem ); 11170 %} 11171 11172 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11173 predicate(UseSSE==0); 11174 match(Set dst (MoveF2I src)); 11175 effect( DEF dst, USE src ); 11176 11177 ins_cost(125); 11178 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11179 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11180 ins_pipe( fpu_mem_reg ); 11181 %} 11182 11183 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11184 predicate(UseSSE>=1); 11185 match(Set dst (MoveF2I src)); 11186 effect( DEF dst, USE src ); 11187 11188 ins_cost(95); 11189 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11190 ins_encode %{ 11191 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11192 %} 11193 ins_pipe( pipe_slow ); 11194 %} 11195 11196 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11197 predicate(UseSSE>=2); 11198 match(Set dst (MoveF2I src)); 11199 effect( DEF dst, USE src ); 11200 ins_cost(85); 11201 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11202 ins_encode %{ 11203 __ movdl($dst$$Register, $src$$XMMRegister); 11204 %} 11205 ins_pipe( pipe_slow ); 11206 %} 11207 11208 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11209 match(Set dst (MoveI2F src)); 11210 effect( DEF dst, USE src ); 11211 11212 ins_cost(100); 11213 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11214 ins_encode %{ 11215 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11216 %} 11217 ins_pipe( ialu_mem_reg ); 11218 %} 11219 11220 11221 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11222 predicate(UseSSE==0); 11223 match(Set dst (MoveI2F src)); 11224 effect(DEF dst, USE src); 11225 11226 ins_cost(125); 11227 format %{ "FLD_S $src\n\t" 11228 "FSTP $dst\t# MoveI2F_stack_reg" %} 11229 opcode(0xD9); /* D9 /0, FLD m32real */ 11230 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11231 Pop_Reg_FPR(dst) ); 11232 ins_pipe( fpu_reg_mem ); 11233 %} 11234 11235 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11236 predicate(UseSSE>=1); 11237 match(Set dst (MoveI2F src)); 11238 effect( DEF dst, USE src ); 11239 11240 ins_cost(95); 11241 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11242 ins_encode %{ 11243 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11244 %} 11245 ins_pipe( pipe_slow ); 11246 %} 11247 11248 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11249 predicate(UseSSE>=2); 11250 match(Set dst (MoveI2F src)); 11251 effect( DEF dst, USE src ); 11252 11253 ins_cost(85); 11254 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11255 ins_encode %{ 11256 __ movdl($dst$$XMMRegister, $src$$Register); 11257 %} 11258 ins_pipe( pipe_slow ); 11259 %} 11260 11261 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11262 match(Set dst (MoveD2L src)); 11263 effect(DEF dst, USE src); 11264 11265 ins_cost(250); 11266 format %{ "MOV $dst.lo,$src\n\t" 11267 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11268 opcode(0x8B, 0x8B); 11269 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11270 ins_pipe( ialu_mem_long_reg ); 11271 %} 11272 11273 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11274 predicate(UseSSE<=1); 11275 match(Set dst (MoveD2L src)); 11276 effect(DEF dst, USE src); 11277 11278 ins_cost(125); 11279 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11280 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11281 ins_pipe( fpu_mem_reg ); 11282 %} 11283 11284 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11285 predicate(UseSSE>=2); 11286 match(Set dst (MoveD2L src)); 11287 effect(DEF dst, USE src); 11288 ins_cost(95); 11289 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11290 ins_encode %{ 11291 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11292 %} 11293 ins_pipe( pipe_slow ); 11294 %} 11295 11296 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11297 predicate(UseSSE>=2); 11298 match(Set dst (MoveD2L src)); 11299 effect(DEF dst, USE src, TEMP tmp); 11300 ins_cost(85); 11301 format %{ "MOVD $dst.lo,$src\n\t" 11302 "PSHUFLW $tmp,$src,0x4E\n\t" 11303 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11304 ins_encode %{ 11305 __ movdl($dst$$Register, $src$$XMMRegister); 11306 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11307 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11308 %} 11309 ins_pipe( pipe_slow ); 11310 %} 11311 11312 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11313 match(Set dst (MoveL2D src)); 11314 effect(DEF dst, USE src); 11315 11316 ins_cost(200); 11317 format %{ "MOV $dst,$src.lo\n\t" 11318 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11319 opcode(0x89, 0x89); 11320 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11321 ins_pipe( ialu_mem_long_reg ); 11322 %} 11323 11324 11325 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11326 predicate(UseSSE<=1); 11327 match(Set dst (MoveL2D src)); 11328 effect(DEF dst, USE src); 11329 ins_cost(125); 11330 11331 format %{ "FLD_D $src\n\t" 11332 "FSTP $dst\t# MoveL2D_stack_reg" %} 11333 opcode(0xDD); /* DD /0, FLD m64real */ 11334 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11335 Pop_Reg_DPR(dst) ); 11336 ins_pipe( fpu_reg_mem ); 11337 %} 11338 11339 11340 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11341 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11342 match(Set dst (MoveL2D src)); 11343 effect(DEF dst, USE src); 11344 11345 ins_cost(95); 11346 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11347 ins_encode %{ 11348 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11349 %} 11350 ins_pipe( pipe_slow ); 11351 %} 11352 11353 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11354 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11355 match(Set dst (MoveL2D src)); 11356 effect(DEF dst, USE src); 11357 11358 ins_cost(95); 11359 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11360 ins_encode %{ 11361 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11362 %} 11363 ins_pipe( pipe_slow ); 11364 %} 11365 11366 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11367 predicate(UseSSE>=2); 11368 match(Set dst (MoveL2D src)); 11369 effect(TEMP dst, USE src, TEMP tmp); 11370 ins_cost(85); 11371 format %{ "MOVD $dst,$src.lo\n\t" 11372 "MOVD $tmp,$src.hi\n\t" 11373 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11374 ins_encode %{ 11375 __ movdl($dst$$XMMRegister, $src$$Register); 11376 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11377 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11378 %} 11379 ins_pipe( pipe_slow ); 11380 %} 11381 11382 //----------------------------- CompressBits/ExpandBits ------------------------ 11383 11384 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11385 predicate(n->bottom_type()->isa_long()); 11386 match(Set dst (CompressBits src mask)); 11387 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11388 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11389 ins_encode %{ 11390 Label exit, partail_result; 11391 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11392 // Merge the results of upper and lower destination registers such that upper destination 11393 // results are contiguously laid out after the lower destination result. 11394 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11395 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11396 __ popcntl($rtmp$$Register, $mask$$Register); 11397 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11398 __ cmpl($rtmp$$Register, 32); 11399 __ jccb(Assembler::equal, exit); 11400 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11401 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11402 // Shift left the contents of upper destination register by true bit count of lower mask register 11403 // and merge with lower destination register. 11404 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11405 __ orl($dst$$Register, $rtmp$$Register); 11406 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11407 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11408 // since contents of upper destination have already been copied to lower destination 11409 // register. 11410 __ cmpl($rtmp$$Register, 0); 11411 __ jccb(Assembler::greater, partail_result); 11412 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11413 __ jmp(exit); 11414 __ bind(partail_result); 11415 // Perform right shift over upper destination register to move out bits already copied 11416 // to lower destination register. 11417 __ subl($rtmp$$Register, 32); 11418 __ negl($rtmp$$Register); 11419 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11420 __ bind(exit); 11421 %} 11422 ins_pipe( pipe_slow ); 11423 %} 11424 11425 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11426 predicate(n->bottom_type()->isa_long()); 11427 match(Set dst (ExpandBits src mask)); 11428 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11429 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11430 ins_encode %{ 11431 // Extraction operation sequentially reads the bits from source register starting from LSB 11432 // and lays them out into destination register at bit locations corresponding to true bits 11433 // in mask register. Thus number of source bits read are equal to combined true bit count 11434 // of mask register pair. 11435 Label exit, mask_clipping; 11436 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11437 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11438 __ popcntl($rtmp$$Register, $mask$$Register); 11439 // If true bit count of lower mask register is 32 then none of bit of lower source register 11440 // will feed to upper destination register. 11441 __ cmpl($rtmp$$Register, 32); 11442 __ jccb(Assembler::equal, exit); 11443 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11444 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11445 // Shift right the contents of lower source register to remove already consumed bits. 11446 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11447 // Extract the bits from lower source register starting from LSB under the influence 11448 // of upper mask register. 11449 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11450 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11451 __ subl($rtmp$$Register, 32); 11452 __ negl($rtmp$$Register); 11453 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11454 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11455 // Clear the set bits in upper mask register which have been used to extract the contents 11456 // from lower source register. 11457 __ bind(mask_clipping); 11458 __ blsrl($mask$$Register, $mask$$Register); 11459 __ decrementl($rtmp$$Register, 1); 11460 __ jccb(Assembler::greater, mask_clipping); 11461 // Starting from LSB extract the bits from upper source register under the influence of 11462 // remaining set bits in upper mask register. 11463 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11464 // Merge the partial results extracted from lower and upper source register bits. 11465 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11466 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11467 __ bind(exit); 11468 %} 11469 ins_pipe( pipe_slow ); 11470 %} 11471 11472 // ======================================================================= 11473 // fast clearing of an array 11474 // Small ClearArray non-AVX512. 11475 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11476 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11477 match(Set dummy (ClearArray cnt base)); 11478 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11479 11480 format %{ $$template 11481 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11482 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11483 $$emit$$"JG LARGE\n\t" 11484 $$emit$$"SHL ECX, 1\n\t" 11485 $$emit$$"DEC ECX\n\t" 11486 $$emit$$"JS DONE\t# Zero length\n\t" 11487 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11488 $$emit$$"DEC ECX\n\t" 11489 $$emit$$"JGE LOOP\n\t" 11490 $$emit$$"JMP DONE\n\t" 11491 $$emit$$"# LARGE:\n\t" 11492 if (UseFastStosb) { 11493 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11494 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11495 } else if (UseXMMForObjInit) { 11496 $$emit$$"MOV RDI,RAX\n\t" 11497 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11498 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11499 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11500 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11501 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11502 $$emit$$"ADD 0x40,RAX\n\t" 11503 $$emit$$"# L_zero_64_bytes:\n\t" 11504 $$emit$$"SUB 0x8,RCX\n\t" 11505 $$emit$$"JGE L_loop\n\t" 11506 $$emit$$"ADD 0x4,RCX\n\t" 11507 $$emit$$"JL L_tail\n\t" 11508 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11509 $$emit$$"ADD 0x20,RAX\n\t" 11510 $$emit$$"SUB 0x4,RCX\n\t" 11511 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11512 $$emit$$"ADD 0x4,RCX\n\t" 11513 $$emit$$"JLE L_end\n\t" 11514 $$emit$$"DEC RCX\n\t" 11515 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11516 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11517 $$emit$$"ADD 0x8,RAX\n\t" 11518 $$emit$$"DEC RCX\n\t" 11519 $$emit$$"JGE L_sloop\n\t" 11520 $$emit$$"# L_end:\n\t" 11521 } else { 11522 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11523 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11524 } 11525 $$emit$$"# DONE" 11526 %} 11527 ins_encode %{ 11528 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11529 $tmp$$XMMRegister, false, knoreg); 11530 %} 11531 ins_pipe( pipe_slow ); 11532 %} 11533 11534 // Small ClearArray AVX512 non-constant length. 11535 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11536 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11537 match(Set dummy (ClearArray cnt base)); 11538 ins_cost(125); 11539 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11540 11541 format %{ $$template 11542 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11543 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11544 $$emit$$"JG LARGE\n\t" 11545 $$emit$$"SHL ECX, 1\n\t" 11546 $$emit$$"DEC ECX\n\t" 11547 $$emit$$"JS DONE\t# Zero length\n\t" 11548 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11549 $$emit$$"DEC ECX\n\t" 11550 $$emit$$"JGE LOOP\n\t" 11551 $$emit$$"JMP DONE\n\t" 11552 $$emit$$"# LARGE:\n\t" 11553 if (UseFastStosb) { 11554 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11555 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11556 } else if (UseXMMForObjInit) { 11557 $$emit$$"MOV RDI,RAX\n\t" 11558 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11559 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11560 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11561 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11562 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11563 $$emit$$"ADD 0x40,RAX\n\t" 11564 $$emit$$"# L_zero_64_bytes:\n\t" 11565 $$emit$$"SUB 0x8,RCX\n\t" 11566 $$emit$$"JGE L_loop\n\t" 11567 $$emit$$"ADD 0x4,RCX\n\t" 11568 $$emit$$"JL L_tail\n\t" 11569 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11570 $$emit$$"ADD 0x20,RAX\n\t" 11571 $$emit$$"SUB 0x4,RCX\n\t" 11572 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11573 $$emit$$"ADD 0x4,RCX\n\t" 11574 $$emit$$"JLE L_end\n\t" 11575 $$emit$$"DEC RCX\n\t" 11576 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11577 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11578 $$emit$$"ADD 0x8,RAX\n\t" 11579 $$emit$$"DEC RCX\n\t" 11580 $$emit$$"JGE L_sloop\n\t" 11581 $$emit$$"# L_end:\n\t" 11582 } else { 11583 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11584 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11585 } 11586 $$emit$$"# DONE" 11587 %} 11588 ins_encode %{ 11589 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11590 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11591 %} 11592 ins_pipe( pipe_slow ); 11593 %} 11594 11595 // Large ClearArray non-AVX512. 11596 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11597 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11598 match(Set dummy (ClearArray cnt base)); 11599 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11600 format %{ $$template 11601 if (UseFastStosb) { 11602 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11603 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11604 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11605 } else if (UseXMMForObjInit) { 11606 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11607 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11608 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11609 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11610 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11611 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11612 $$emit$$"ADD 0x40,RAX\n\t" 11613 $$emit$$"# L_zero_64_bytes:\n\t" 11614 $$emit$$"SUB 0x8,RCX\n\t" 11615 $$emit$$"JGE L_loop\n\t" 11616 $$emit$$"ADD 0x4,RCX\n\t" 11617 $$emit$$"JL L_tail\n\t" 11618 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11619 $$emit$$"ADD 0x20,RAX\n\t" 11620 $$emit$$"SUB 0x4,RCX\n\t" 11621 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11622 $$emit$$"ADD 0x4,RCX\n\t" 11623 $$emit$$"JLE L_end\n\t" 11624 $$emit$$"DEC RCX\n\t" 11625 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11626 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11627 $$emit$$"ADD 0x8,RAX\n\t" 11628 $$emit$$"DEC RCX\n\t" 11629 $$emit$$"JGE L_sloop\n\t" 11630 $$emit$$"# L_end:\n\t" 11631 } else { 11632 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11633 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11634 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11635 } 11636 $$emit$$"# DONE" 11637 %} 11638 ins_encode %{ 11639 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11640 $tmp$$XMMRegister, true, knoreg); 11641 %} 11642 ins_pipe( pipe_slow ); 11643 %} 11644 11645 // Large ClearArray AVX512. 11646 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11647 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11648 match(Set dummy (ClearArray cnt base)); 11649 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11650 format %{ $$template 11651 if (UseFastStosb) { 11652 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11653 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11654 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11655 } else if (UseXMMForObjInit) { 11656 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11657 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11658 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11659 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11660 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11661 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11662 $$emit$$"ADD 0x40,RAX\n\t" 11663 $$emit$$"# L_zero_64_bytes:\n\t" 11664 $$emit$$"SUB 0x8,RCX\n\t" 11665 $$emit$$"JGE L_loop\n\t" 11666 $$emit$$"ADD 0x4,RCX\n\t" 11667 $$emit$$"JL L_tail\n\t" 11668 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11669 $$emit$$"ADD 0x20,RAX\n\t" 11670 $$emit$$"SUB 0x4,RCX\n\t" 11671 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11672 $$emit$$"ADD 0x4,RCX\n\t" 11673 $$emit$$"JLE L_end\n\t" 11674 $$emit$$"DEC RCX\n\t" 11675 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11676 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11677 $$emit$$"ADD 0x8,RAX\n\t" 11678 $$emit$$"DEC RCX\n\t" 11679 $$emit$$"JGE L_sloop\n\t" 11680 $$emit$$"# L_end:\n\t" 11681 } else { 11682 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11683 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11684 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11685 } 11686 $$emit$$"# DONE" 11687 %} 11688 ins_encode %{ 11689 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11690 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11691 %} 11692 ins_pipe( pipe_slow ); 11693 %} 11694 11695 // Small ClearArray AVX512 constant length. 11696 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11697 %{ 11698 predicate(!((ClearArrayNode*)n)->is_large() && 11699 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11700 match(Set dummy (ClearArray cnt base)); 11701 ins_cost(100); 11702 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11703 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11704 ins_encode %{ 11705 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11706 %} 11707 ins_pipe(pipe_slow); 11708 %} 11709 11710 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11711 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11712 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11713 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11714 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11715 11716 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11717 ins_encode %{ 11718 __ string_compare($str1$$Register, $str2$$Register, 11719 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11720 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11721 %} 11722 ins_pipe( pipe_slow ); 11723 %} 11724 11725 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11726 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11727 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11728 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11729 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11730 11731 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11732 ins_encode %{ 11733 __ string_compare($str1$$Register, $str2$$Register, 11734 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11735 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11736 %} 11737 ins_pipe( pipe_slow ); 11738 %} 11739 11740 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11741 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11742 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11743 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11744 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11745 11746 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11747 ins_encode %{ 11748 __ string_compare($str1$$Register, $str2$$Register, 11749 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11750 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11751 %} 11752 ins_pipe( pipe_slow ); 11753 %} 11754 11755 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11756 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11757 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11758 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11759 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11760 11761 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11762 ins_encode %{ 11763 __ string_compare($str1$$Register, $str2$$Register, 11764 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11765 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11766 %} 11767 ins_pipe( pipe_slow ); 11768 %} 11769 11770 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11771 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11772 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11773 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11774 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11775 11776 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11777 ins_encode %{ 11778 __ string_compare($str1$$Register, $str2$$Register, 11779 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11780 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11781 %} 11782 ins_pipe( pipe_slow ); 11783 %} 11784 11785 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11786 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11787 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11788 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11789 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11790 11791 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11792 ins_encode %{ 11793 __ string_compare($str1$$Register, $str2$$Register, 11794 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11795 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11796 %} 11797 ins_pipe( pipe_slow ); 11798 %} 11799 11800 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11801 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11802 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11803 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11804 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11805 11806 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11807 ins_encode %{ 11808 __ string_compare($str2$$Register, $str1$$Register, 11809 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11810 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11811 %} 11812 ins_pipe( pipe_slow ); 11813 %} 11814 11815 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11816 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11817 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11818 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11819 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11820 11821 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11822 ins_encode %{ 11823 __ string_compare($str2$$Register, $str1$$Register, 11824 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11825 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11826 %} 11827 ins_pipe( pipe_slow ); 11828 %} 11829 11830 // fast string equals 11831 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11832 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11833 predicate(!VM_Version::supports_avx512vlbw()); 11834 match(Set result (StrEquals (Binary str1 str2) cnt)); 11835 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11836 11837 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11838 ins_encode %{ 11839 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11840 $cnt$$Register, $result$$Register, $tmp3$$Register, 11841 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11842 %} 11843 11844 ins_pipe( pipe_slow ); 11845 %} 11846 11847 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11848 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11849 predicate(VM_Version::supports_avx512vlbw()); 11850 match(Set result (StrEquals (Binary str1 str2) cnt)); 11851 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11852 11853 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11854 ins_encode %{ 11855 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11856 $cnt$$Register, $result$$Register, $tmp3$$Register, 11857 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11858 %} 11859 11860 ins_pipe( pipe_slow ); 11861 %} 11862 11863 11864 // fast search of substring with known size. 11865 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11866 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11867 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11868 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11869 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11870 11871 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11872 ins_encode %{ 11873 int icnt2 = (int)$int_cnt2$$constant; 11874 if (icnt2 >= 16) { 11875 // IndexOf for constant substrings with size >= 16 elements 11876 // which don't need to be loaded through stack. 11877 __ string_indexofC8($str1$$Register, $str2$$Register, 11878 $cnt1$$Register, $cnt2$$Register, 11879 icnt2, $result$$Register, 11880 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11881 } else { 11882 // Small strings are loaded through stack if they cross page boundary. 11883 __ string_indexof($str1$$Register, $str2$$Register, 11884 $cnt1$$Register, $cnt2$$Register, 11885 icnt2, $result$$Register, 11886 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11887 } 11888 %} 11889 ins_pipe( pipe_slow ); 11890 %} 11891 11892 // fast search of substring with known size. 11893 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11894 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11895 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11896 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11897 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11898 11899 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11900 ins_encode %{ 11901 int icnt2 = (int)$int_cnt2$$constant; 11902 if (icnt2 >= 8) { 11903 // IndexOf for constant substrings with size >= 8 elements 11904 // which don't need to be loaded through stack. 11905 __ string_indexofC8($str1$$Register, $str2$$Register, 11906 $cnt1$$Register, $cnt2$$Register, 11907 icnt2, $result$$Register, 11908 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11909 } else { 11910 // Small strings are loaded through stack if they cross page boundary. 11911 __ string_indexof($str1$$Register, $str2$$Register, 11912 $cnt1$$Register, $cnt2$$Register, 11913 icnt2, $result$$Register, 11914 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11915 } 11916 %} 11917 ins_pipe( pipe_slow ); 11918 %} 11919 11920 // fast search of substring with known size. 11921 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11922 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11923 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11924 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11925 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11926 11927 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11928 ins_encode %{ 11929 int icnt2 = (int)$int_cnt2$$constant; 11930 if (icnt2 >= 8) { 11931 // IndexOf for constant substrings with size >= 8 elements 11932 // which don't need to be loaded through stack. 11933 __ string_indexofC8($str1$$Register, $str2$$Register, 11934 $cnt1$$Register, $cnt2$$Register, 11935 icnt2, $result$$Register, 11936 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11937 } else { 11938 // Small strings are loaded through stack if they cross page boundary. 11939 __ string_indexof($str1$$Register, $str2$$Register, 11940 $cnt1$$Register, $cnt2$$Register, 11941 icnt2, $result$$Register, 11942 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11943 } 11944 %} 11945 ins_pipe( pipe_slow ); 11946 %} 11947 11948 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11949 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11950 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11951 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11952 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11953 11954 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11955 ins_encode %{ 11956 __ string_indexof($str1$$Register, $str2$$Register, 11957 $cnt1$$Register, $cnt2$$Register, 11958 (-1), $result$$Register, 11959 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11960 %} 11961 ins_pipe( pipe_slow ); 11962 %} 11963 11964 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11965 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11966 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11967 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11968 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11969 11970 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11971 ins_encode %{ 11972 __ string_indexof($str1$$Register, $str2$$Register, 11973 $cnt1$$Register, $cnt2$$Register, 11974 (-1), $result$$Register, 11975 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11976 %} 11977 ins_pipe( pipe_slow ); 11978 %} 11979 11980 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11981 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11982 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11983 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11984 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11985 11986 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11987 ins_encode %{ 11988 __ string_indexof($str1$$Register, $str2$$Register, 11989 $cnt1$$Register, $cnt2$$Register, 11990 (-1), $result$$Register, 11991 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11992 %} 11993 ins_pipe( pipe_slow ); 11994 %} 11995 11996 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11997 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11998 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 11999 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12000 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12001 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12002 ins_encode %{ 12003 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12004 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12005 %} 12006 ins_pipe( pipe_slow ); 12007 %} 12008 12009 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12010 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12011 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12012 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12013 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12014 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12015 ins_encode %{ 12016 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12017 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12018 %} 12019 ins_pipe( pipe_slow ); 12020 %} 12021 12022 12023 // fast array equals 12024 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12025 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12026 %{ 12027 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12028 match(Set result (AryEq ary1 ary2)); 12029 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12030 //ins_cost(300); 12031 12032 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12033 ins_encode %{ 12034 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12035 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12036 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12037 %} 12038 ins_pipe( pipe_slow ); 12039 %} 12040 12041 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12042 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12043 %{ 12044 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12045 match(Set result (AryEq ary1 ary2)); 12046 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12047 //ins_cost(300); 12048 12049 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12050 ins_encode %{ 12051 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12052 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12053 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12054 %} 12055 ins_pipe( pipe_slow ); 12056 %} 12057 12058 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12059 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12060 %{ 12061 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12062 match(Set result (AryEq ary1 ary2)); 12063 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12064 //ins_cost(300); 12065 12066 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12067 ins_encode %{ 12068 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12069 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12070 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12071 %} 12072 ins_pipe( pipe_slow ); 12073 %} 12074 12075 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12076 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12077 %{ 12078 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12079 match(Set result (AryEq ary1 ary2)); 12080 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12081 //ins_cost(300); 12082 12083 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12084 ins_encode %{ 12085 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12086 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12087 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12088 %} 12089 ins_pipe( pipe_slow ); 12090 %} 12091 12092 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12093 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12094 %{ 12095 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12096 match(Set result (CountPositives ary1 len)); 12097 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12098 12099 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12100 ins_encode %{ 12101 __ count_positives($ary1$$Register, $len$$Register, 12102 $result$$Register, $tmp3$$Register, 12103 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12104 %} 12105 ins_pipe( pipe_slow ); 12106 %} 12107 12108 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12109 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12110 %{ 12111 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12112 match(Set result (CountPositives ary1 len)); 12113 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12114 12115 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12116 ins_encode %{ 12117 __ count_positives($ary1$$Register, $len$$Register, 12118 $result$$Register, $tmp3$$Register, 12119 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12120 %} 12121 ins_pipe( pipe_slow ); 12122 %} 12123 12124 12125 // fast char[] to byte[] compression 12126 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12127 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12128 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12129 match(Set result (StrCompressedCopy src (Binary dst len))); 12130 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12131 12132 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12133 ins_encode %{ 12134 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12135 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12136 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12137 knoreg, knoreg); 12138 %} 12139 ins_pipe( pipe_slow ); 12140 %} 12141 12142 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12143 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12144 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12145 match(Set result (StrCompressedCopy src (Binary dst len))); 12146 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12147 12148 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12149 ins_encode %{ 12150 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12151 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12152 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12153 $ktmp1$$KRegister, $ktmp2$$KRegister); 12154 %} 12155 ins_pipe( pipe_slow ); 12156 %} 12157 12158 // fast byte[] to char[] inflation 12159 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12160 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12161 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12162 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12163 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12164 12165 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12166 ins_encode %{ 12167 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12168 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12169 %} 12170 ins_pipe( pipe_slow ); 12171 %} 12172 12173 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12174 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12175 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12176 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12177 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12178 12179 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12180 ins_encode %{ 12181 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12182 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12183 %} 12184 ins_pipe( pipe_slow ); 12185 %} 12186 12187 // encode char[] to byte[] in ISO_8859_1 12188 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12189 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12190 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12191 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12192 match(Set result (EncodeISOArray src (Binary dst len))); 12193 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12194 12195 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12196 ins_encode %{ 12197 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12198 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12199 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12200 %} 12201 ins_pipe( pipe_slow ); 12202 %} 12203 12204 // encode char[] to byte[] in ASCII 12205 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12206 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12207 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12208 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12209 match(Set result (EncodeISOArray src (Binary dst len))); 12210 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12211 12212 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12213 ins_encode %{ 12214 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12215 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12216 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12217 %} 12218 ins_pipe( pipe_slow ); 12219 %} 12220 12221 //----------Control Flow Instructions------------------------------------------ 12222 // Signed compare Instructions 12223 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12224 match(Set cr (CmpI op1 op2)); 12225 effect( DEF cr, USE op1, USE op2 ); 12226 format %{ "CMP $op1,$op2" %} 12227 opcode(0x3B); /* Opcode 3B /r */ 12228 ins_encode( OpcP, RegReg( op1, op2) ); 12229 ins_pipe( ialu_cr_reg_reg ); 12230 %} 12231 12232 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12233 match(Set cr (CmpI op1 op2)); 12234 effect( DEF cr, USE op1 ); 12235 format %{ "CMP $op1,$op2" %} 12236 opcode(0x81,0x07); /* Opcode 81 /7 */ 12237 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12238 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12239 ins_pipe( ialu_cr_reg_imm ); 12240 %} 12241 12242 // Cisc-spilled version of cmpI_eReg 12243 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12244 match(Set cr (CmpI op1 (LoadI op2))); 12245 12246 format %{ "CMP $op1,$op2" %} 12247 ins_cost(500); 12248 opcode(0x3B); /* Opcode 3B /r */ 12249 ins_encode( OpcP, RegMem( op1, op2) ); 12250 ins_pipe( ialu_cr_reg_mem ); 12251 %} 12252 12253 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12254 match(Set cr (CmpI src zero)); 12255 effect( DEF cr, USE src ); 12256 12257 format %{ "TEST $src,$src" %} 12258 opcode(0x85); 12259 ins_encode( OpcP, RegReg( src, src ) ); 12260 ins_pipe( ialu_cr_reg_imm ); 12261 %} 12262 12263 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12264 match(Set cr (CmpI (AndI src con) zero)); 12265 12266 format %{ "TEST $src,$con" %} 12267 opcode(0xF7,0x00); 12268 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12269 ins_pipe( ialu_cr_reg_imm ); 12270 %} 12271 12272 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12273 match(Set cr (CmpI (AndI src mem) zero)); 12274 12275 format %{ "TEST $src,$mem" %} 12276 opcode(0x85); 12277 ins_encode( OpcP, RegMem( src, mem ) ); 12278 ins_pipe( ialu_cr_reg_mem ); 12279 %} 12280 12281 // Unsigned compare Instructions; really, same as signed except they 12282 // produce an eFlagsRegU instead of eFlagsReg. 12283 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12284 match(Set cr (CmpU op1 op2)); 12285 12286 format %{ "CMPu $op1,$op2" %} 12287 opcode(0x3B); /* Opcode 3B /r */ 12288 ins_encode( OpcP, RegReg( op1, op2) ); 12289 ins_pipe( ialu_cr_reg_reg ); 12290 %} 12291 12292 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12293 match(Set cr (CmpU op1 op2)); 12294 12295 format %{ "CMPu $op1,$op2" %} 12296 opcode(0x81,0x07); /* Opcode 81 /7 */ 12297 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12298 ins_pipe( ialu_cr_reg_imm ); 12299 %} 12300 12301 // // Cisc-spilled version of cmpU_eReg 12302 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12303 match(Set cr (CmpU op1 (LoadI op2))); 12304 12305 format %{ "CMPu $op1,$op2" %} 12306 ins_cost(500); 12307 opcode(0x3B); /* Opcode 3B /r */ 12308 ins_encode( OpcP, RegMem( op1, op2) ); 12309 ins_pipe( ialu_cr_reg_mem ); 12310 %} 12311 12312 // // Cisc-spilled version of cmpU_eReg 12313 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12314 // match(Set cr (CmpU (LoadI op1) op2)); 12315 // 12316 // format %{ "CMPu $op1,$op2" %} 12317 // ins_cost(500); 12318 // opcode(0x39); /* Opcode 39 /r */ 12319 // ins_encode( OpcP, RegMem( op1, op2) ); 12320 //%} 12321 12322 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12323 match(Set cr (CmpU src zero)); 12324 12325 format %{ "TESTu $src,$src" %} 12326 opcode(0x85); 12327 ins_encode( OpcP, RegReg( src, src ) ); 12328 ins_pipe( ialu_cr_reg_imm ); 12329 %} 12330 12331 // Unsigned pointer compare Instructions 12332 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12333 match(Set cr (CmpP op1 op2)); 12334 12335 format %{ "CMPu $op1,$op2" %} 12336 opcode(0x3B); /* Opcode 3B /r */ 12337 ins_encode( OpcP, RegReg( op1, op2) ); 12338 ins_pipe( ialu_cr_reg_reg ); 12339 %} 12340 12341 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12342 match(Set cr (CmpP op1 op2)); 12343 12344 format %{ "CMPu $op1,$op2" %} 12345 opcode(0x81,0x07); /* Opcode 81 /7 */ 12346 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12347 ins_pipe( ialu_cr_reg_imm ); 12348 %} 12349 12350 // // Cisc-spilled version of cmpP_eReg 12351 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12352 match(Set cr (CmpP op1 (LoadP op2))); 12353 12354 format %{ "CMPu $op1,$op2" %} 12355 ins_cost(500); 12356 opcode(0x3B); /* Opcode 3B /r */ 12357 ins_encode( OpcP, RegMem( op1, op2) ); 12358 ins_pipe( ialu_cr_reg_mem ); 12359 %} 12360 12361 // // Cisc-spilled version of cmpP_eReg 12362 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12363 // match(Set cr (CmpP (LoadP op1) op2)); 12364 // 12365 // format %{ "CMPu $op1,$op2" %} 12366 // ins_cost(500); 12367 // opcode(0x39); /* Opcode 39 /r */ 12368 // ins_encode( OpcP, RegMem( op1, op2) ); 12369 //%} 12370 12371 // Compare raw pointer (used in out-of-heap check). 12372 // Only works because non-oop pointers must be raw pointers 12373 // and raw pointers have no anti-dependencies. 12374 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12375 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12376 match(Set cr (CmpP op1 (LoadP op2))); 12377 12378 format %{ "CMPu $op1,$op2" %} 12379 opcode(0x3B); /* Opcode 3B /r */ 12380 ins_encode( OpcP, RegMem( op1, op2) ); 12381 ins_pipe( ialu_cr_reg_mem ); 12382 %} 12383 12384 // 12385 // This will generate a signed flags result. This should be ok 12386 // since any compare to a zero should be eq/neq. 12387 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12388 match(Set cr (CmpP src zero)); 12389 12390 format %{ "TEST $src,$src" %} 12391 opcode(0x85); 12392 ins_encode( OpcP, RegReg( src, src ) ); 12393 ins_pipe( ialu_cr_reg_imm ); 12394 %} 12395 12396 // Cisc-spilled version of testP_reg 12397 // This will generate a signed flags result. This should be ok 12398 // since any compare to a zero should be eq/neq. 12399 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12400 match(Set cr (CmpP (LoadP op) zero)); 12401 12402 format %{ "TEST $op,0xFFFFFFFF" %} 12403 ins_cost(500); 12404 opcode(0xF7); /* Opcode F7 /0 */ 12405 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12406 ins_pipe( ialu_cr_reg_imm ); 12407 %} 12408 12409 // Yanked all unsigned pointer compare operations. 12410 // Pointer compares are done with CmpP which is already unsigned. 12411 12412 //----------Max and Min-------------------------------------------------------- 12413 // Min Instructions 12414 //// 12415 // *** Min and Max using the conditional move are slower than the 12416 // *** branch version on a Pentium III. 12417 // // Conditional move for min 12418 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12419 // effect( USE_DEF op2, USE op1, USE cr ); 12420 // format %{ "CMOVlt $op2,$op1\t! min" %} 12421 // opcode(0x4C,0x0F); 12422 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12423 // ins_pipe( pipe_cmov_reg ); 12424 //%} 12425 // 12426 //// Min Register with Register (P6 version) 12427 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12428 // predicate(VM_Version::supports_cmov() ); 12429 // match(Set op2 (MinI op1 op2)); 12430 // ins_cost(200); 12431 // expand %{ 12432 // eFlagsReg cr; 12433 // compI_eReg(cr,op1,op2); 12434 // cmovI_reg_lt(op2,op1,cr); 12435 // %} 12436 //%} 12437 12438 // Min Register with Register (generic version) 12439 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12440 match(Set dst (MinI dst src)); 12441 effect(KILL flags); 12442 ins_cost(300); 12443 12444 format %{ "MIN $dst,$src" %} 12445 opcode(0xCC); 12446 ins_encode( min_enc(dst,src) ); 12447 ins_pipe( pipe_slow ); 12448 %} 12449 12450 // Max Register with Register 12451 // *** Min and Max using the conditional move are slower than the 12452 // *** branch version on a Pentium III. 12453 // // Conditional move for max 12454 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12455 // effect( USE_DEF op2, USE op1, USE cr ); 12456 // format %{ "CMOVgt $op2,$op1\t! max" %} 12457 // opcode(0x4F,0x0F); 12458 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12459 // ins_pipe( pipe_cmov_reg ); 12460 //%} 12461 // 12462 // // Max Register with Register (P6 version) 12463 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12464 // predicate(VM_Version::supports_cmov() ); 12465 // match(Set op2 (MaxI op1 op2)); 12466 // ins_cost(200); 12467 // expand %{ 12468 // eFlagsReg cr; 12469 // compI_eReg(cr,op1,op2); 12470 // cmovI_reg_gt(op2,op1,cr); 12471 // %} 12472 //%} 12473 12474 // Max Register with Register (generic version) 12475 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12476 match(Set dst (MaxI dst src)); 12477 effect(KILL flags); 12478 ins_cost(300); 12479 12480 format %{ "MAX $dst,$src" %} 12481 opcode(0xCC); 12482 ins_encode( max_enc(dst,src) ); 12483 ins_pipe( pipe_slow ); 12484 %} 12485 12486 // ============================================================================ 12487 // Counted Loop limit node which represents exact final iterator value. 12488 // Note: the resulting value should fit into integer range since 12489 // counted loops have limit check on overflow. 12490 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12491 match(Set limit (LoopLimit (Binary init limit) stride)); 12492 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12493 ins_cost(300); 12494 12495 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12496 ins_encode %{ 12497 int strd = (int)$stride$$constant; 12498 assert(strd != 1 && strd != -1, "sanity"); 12499 int m1 = (strd > 0) ? 1 : -1; 12500 // Convert limit to long (EAX:EDX) 12501 __ cdql(); 12502 // Convert init to long (init:tmp) 12503 __ movl($tmp$$Register, $init$$Register); 12504 __ sarl($tmp$$Register, 31); 12505 // $limit - $init 12506 __ subl($limit$$Register, $init$$Register); 12507 __ sbbl($limit_hi$$Register, $tmp$$Register); 12508 // + ($stride - 1) 12509 if (strd > 0) { 12510 __ addl($limit$$Register, (strd - 1)); 12511 __ adcl($limit_hi$$Register, 0); 12512 __ movl($tmp$$Register, strd); 12513 } else { 12514 __ addl($limit$$Register, (strd + 1)); 12515 __ adcl($limit_hi$$Register, -1); 12516 __ lneg($limit_hi$$Register, $limit$$Register); 12517 __ movl($tmp$$Register, -strd); 12518 } 12519 // signed division: (EAX:EDX) / pos_stride 12520 __ idivl($tmp$$Register); 12521 if (strd < 0) { 12522 // restore sign 12523 __ negl($tmp$$Register); 12524 } 12525 // (EAX) * stride 12526 __ mull($tmp$$Register); 12527 // + init (ignore upper bits) 12528 __ addl($limit$$Register, $init$$Register); 12529 %} 12530 ins_pipe( pipe_slow ); 12531 %} 12532 12533 // ============================================================================ 12534 // Branch Instructions 12535 // Jump Table 12536 instruct jumpXtnd(rRegI switch_val) %{ 12537 match(Jump switch_val); 12538 ins_cost(350); 12539 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12540 ins_encode %{ 12541 // Jump to Address(table_base + switch_reg) 12542 Address index(noreg, $switch_val$$Register, Address::times_1); 12543 __ jump(ArrayAddress($constantaddress, index), noreg); 12544 %} 12545 ins_pipe(pipe_jmp); 12546 %} 12547 12548 // Jump Direct - Label defines a relative address from JMP+1 12549 instruct jmpDir(label labl) %{ 12550 match(Goto); 12551 effect(USE labl); 12552 12553 ins_cost(300); 12554 format %{ "JMP $labl" %} 12555 size(5); 12556 ins_encode %{ 12557 Label* L = $labl$$label; 12558 __ jmp(*L, false); // Always long jump 12559 %} 12560 ins_pipe( pipe_jmp ); 12561 %} 12562 12563 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12564 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12565 match(If cop cr); 12566 effect(USE labl); 12567 12568 ins_cost(300); 12569 format %{ "J$cop $labl" %} 12570 size(6); 12571 ins_encode %{ 12572 Label* L = $labl$$label; 12573 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12574 %} 12575 ins_pipe( pipe_jcc ); 12576 %} 12577 12578 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12579 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12580 match(CountedLoopEnd cop cr); 12581 effect(USE labl); 12582 12583 ins_cost(300); 12584 format %{ "J$cop $labl\t# Loop end" %} 12585 size(6); 12586 ins_encode %{ 12587 Label* L = $labl$$label; 12588 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12589 %} 12590 ins_pipe( pipe_jcc ); 12591 %} 12592 12593 // Jump Direct Conditional - using unsigned comparison 12594 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12595 match(If cop cmp); 12596 effect(USE labl); 12597 12598 ins_cost(300); 12599 format %{ "J$cop,u $labl" %} 12600 size(6); 12601 ins_encode %{ 12602 Label* L = $labl$$label; 12603 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12604 %} 12605 ins_pipe(pipe_jcc); 12606 %} 12607 12608 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12609 match(If cop cmp); 12610 effect(USE labl); 12611 12612 ins_cost(200); 12613 format %{ "J$cop,u $labl" %} 12614 size(6); 12615 ins_encode %{ 12616 Label* L = $labl$$label; 12617 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12618 %} 12619 ins_pipe(pipe_jcc); 12620 %} 12621 12622 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12623 match(If cop cmp); 12624 effect(USE labl); 12625 12626 ins_cost(200); 12627 format %{ $$template 12628 if ($cop$$cmpcode == Assembler::notEqual) { 12629 $$emit$$"JP,u $labl\n\t" 12630 $$emit$$"J$cop,u $labl" 12631 } else { 12632 $$emit$$"JP,u done\n\t" 12633 $$emit$$"J$cop,u $labl\n\t" 12634 $$emit$$"done:" 12635 } 12636 %} 12637 ins_encode %{ 12638 Label* l = $labl$$label; 12639 if ($cop$$cmpcode == Assembler::notEqual) { 12640 __ jcc(Assembler::parity, *l, false); 12641 __ jcc(Assembler::notEqual, *l, false); 12642 } else if ($cop$$cmpcode == Assembler::equal) { 12643 Label done; 12644 __ jccb(Assembler::parity, done); 12645 __ jcc(Assembler::equal, *l, false); 12646 __ bind(done); 12647 } else { 12648 ShouldNotReachHere(); 12649 } 12650 %} 12651 ins_pipe(pipe_jcc); 12652 %} 12653 12654 // ============================================================================ 12655 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12656 // array for an instance of the superklass. Set a hidden internal cache on a 12657 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12658 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12659 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12660 match(Set result (PartialSubtypeCheck sub super)); 12661 effect( KILL rcx, KILL cr ); 12662 12663 ins_cost(1100); // slightly larger than the next version 12664 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12665 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12666 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12667 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12668 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12669 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12670 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12671 "miss:\t" %} 12672 12673 opcode(0x1); // Force a XOR of EDI 12674 ins_encode( enc_PartialSubtypeCheck() ); 12675 ins_pipe( pipe_slow ); 12676 %} 12677 12678 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12679 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12680 effect( KILL rcx, KILL result ); 12681 12682 ins_cost(1000); 12683 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12684 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12685 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12686 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12687 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12688 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12689 "miss:\t" %} 12690 12691 opcode(0x0); // No need to XOR EDI 12692 ins_encode( enc_PartialSubtypeCheck() ); 12693 ins_pipe( pipe_slow ); 12694 %} 12695 12696 // ============================================================================ 12697 // Branch Instructions -- short offset versions 12698 // 12699 // These instructions are used to replace jumps of a long offset (the default 12700 // match) with jumps of a shorter offset. These instructions are all tagged 12701 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12702 // match rules in general matching. Instead, the ADLC generates a conversion 12703 // method in the MachNode which can be used to do in-place replacement of the 12704 // long variant with the shorter variant. The compiler will determine if a 12705 // branch can be taken by the is_short_branch_offset() predicate in the machine 12706 // specific code section of the file. 12707 12708 // Jump Direct - Label defines a relative address from JMP+1 12709 instruct jmpDir_short(label labl) %{ 12710 match(Goto); 12711 effect(USE labl); 12712 12713 ins_cost(300); 12714 format %{ "JMP,s $labl" %} 12715 size(2); 12716 ins_encode %{ 12717 Label* L = $labl$$label; 12718 __ jmpb(*L); 12719 %} 12720 ins_pipe( pipe_jmp ); 12721 ins_short_branch(1); 12722 %} 12723 12724 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12725 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12726 match(If cop cr); 12727 effect(USE labl); 12728 12729 ins_cost(300); 12730 format %{ "J$cop,s $labl" %} 12731 size(2); 12732 ins_encode %{ 12733 Label* L = $labl$$label; 12734 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12735 %} 12736 ins_pipe( pipe_jcc ); 12737 ins_short_branch(1); 12738 %} 12739 12740 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12741 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12742 match(CountedLoopEnd cop cr); 12743 effect(USE labl); 12744 12745 ins_cost(300); 12746 format %{ "J$cop,s $labl\t# Loop end" %} 12747 size(2); 12748 ins_encode %{ 12749 Label* L = $labl$$label; 12750 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12751 %} 12752 ins_pipe( pipe_jcc ); 12753 ins_short_branch(1); 12754 %} 12755 12756 // Jump Direct Conditional - using unsigned comparison 12757 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12758 match(If cop cmp); 12759 effect(USE labl); 12760 12761 ins_cost(300); 12762 format %{ "J$cop,us $labl" %} 12763 size(2); 12764 ins_encode %{ 12765 Label* L = $labl$$label; 12766 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12767 %} 12768 ins_pipe( pipe_jcc ); 12769 ins_short_branch(1); 12770 %} 12771 12772 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12773 match(If cop cmp); 12774 effect(USE labl); 12775 12776 ins_cost(300); 12777 format %{ "J$cop,us $labl" %} 12778 size(2); 12779 ins_encode %{ 12780 Label* L = $labl$$label; 12781 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12782 %} 12783 ins_pipe( pipe_jcc ); 12784 ins_short_branch(1); 12785 %} 12786 12787 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12788 match(If cop cmp); 12789 effect(USE labl); 12790 12791 ins_cost(300); 12792 format %{ $$template 12793 if ($cop$$cmpcode == Assembler::notEqual) { 12794 $$emit$$"JP,u,s $labl\n\t" 12795 $$emit$$"J$cop,u,s $labl" 12796 } else { 12797 $$emit$$"JP,u,s done\n\t" 12798 $$emit$$"J$cop,u,s $labl\n\t" 12799 $$emit$$"done:" 12800 } 12801 %} 12802 size(4); 12803 ins_encode %{ 12804 Label* l = $labl$$label; 12805 if ($cop$$cmpcode == Assembler::notEqual) { 12806 __ jccb(Assembler::parity, *l); 12807 __ jccb(Assembler::notEqual, *l); 12808 } else if ($cop$$cmpcode == Assembler::equal) { 12809 Label done; 12810 __ jccb(Assembler::parity, done); 12811 __ jccb(Assembler::equal, *l); 12812 __ bind(done); 12813 } else { 12814 ShouldNotReachHere(); 12815 } 12816 %} 12817 ins_pipe(pipe_jcc); 12818 ins_short_branch(1); 12819 %} 12820 12821 // ============================================================================ 12822 // Long Compare 12823 // 12824 // Currently we hold longs in 2 registers. Comparing such values efficiently 12825 // is tricky. The flavor of compare used depends on whether we are testing 12826 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12827 // The GE test is the negated LT test. The LE test can be had by commuting 12828 // the operands (yielding a GE test) and then negating; negate again for the 12829 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12830 // NE test is negated from that. 12831 12832 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12833 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12834 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12835 // are collapsed internally in the ADLC's dfa-gen code. The match for 12836 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12837 // foo match ends up with the wrong leaf. One fix is to not match both 12838 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12839 // both forms beat the trinary form of long-compare and both are very useful 12840 // on Intel which has so few registers. 12841 12842 // Manifest a CmpL result in an integer register. Very painful. 12843 // This is the test to avoid. 12844 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12845 match(Set dst (CmpL3 src1 src2)); 12846 effect( KILL flags ); 12847 ins_cost(1000); 12848 format %{ "XOR $dst,$dst\n\t" 12849 "CMP $src1.hi,$src2.hi\n\t" 12850 "JLT,s m_one\n\t" 12851 "JGT,s p_one\n\t" 12852 "CMP $src1.lo,$src2.lo\n\t" 12853 "JB,s m_one\n\t" 12854 "JEQ,s done\n" 12855 "p_one:\tINC $dst\n\t" 12856 "JMP,s done\n" 12857 "m_one:\tDEC $dst\n" 12858 "done:" %} 12859 ins_encode %{ 12860 Label p_one, m_one, done; 12861 __ xorptr($dst$$Register, $dst$$Register); 12862 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12863 __ jccb(Assembler::less, m_one); 12864 __ jccb(Assembler::greater, p_one); 12865 __ cmpl($src1$$Register, $src2$$Register); 12866 __ jccb(Assembler::below, m_one); 12867 __ jccb(Assembler::equal, done); 12868 __ bind(p_one); 12869 __ incrementl($dst$$Register); 12870 __ jmpb(done); 12871 __ bind(m_one); 12872 __ decrementl($dst$$Register); 12873 __ bind(done); 12874 %} 12875 ins_pipe( pipe_slow ); 12876 %} 12877 12878 //====== 12879 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12880 // compares. Can be used for LE or GT compares by reversing arguments. 12881 // NOT GOOD FOR EQ/NE tests. 12882 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12883 match( Set flags (CmpL src zero )); 12884 ins_cost(100); 12885 format %{ "TEST $src.hi,$src.hi" %} 12886 opcode(0x85); 12887 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12888 ins_pipe( ialu_cr_reg_reg ); 12889 %} 12890 12891 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12892 // compares. Can be used for LE or GT compares by reversing arguments. 12893 // NOT GOOD FOR EQ/NE tests. 12894 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12895 match( Set flags (CmpL src1 src2 )); 12896 effect( TEMP tmp ); 12897 ins_cost(300); 12898 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12899 "MOV $tmp,$src1.hi\n\t" 12900 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12901 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12902 ins_pipe( ialu_cr_reg_reg ); 12903 %} 12904 12905 // Long compares reg < zero/req OR reg >= zero/req. 12906 // Just a wrapper for a normal branch, plus the predicate test. 12907 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12908 match(If cmp flags); 12909 effect(USE labl); 12910 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12911 expand %{ 12912 jmpCon(cmp,flags,labl); // JLT or JGE... 12913 %} 12914 %} 12915 12916 //====== 12917 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12918 // compares. Can be used for LE or GT compares by reversing arguments. 12919 // NOT GOOD FOR EQ/NE tests. 12920 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12921 match(Set flags (CmpUL src zero)); 12922 ins_cost(100); 12923 format %{ "TEST $src.hi,$src.hi" %} 12924 opcode(0x85); 12925 ins_encode(OpcP, RegReg_Hi2(src, src)); 12926 ins_pipe(ialu_cr_reg_reg); 12927 %} 12928 12929 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12930 // compares. Can be used for LE or GT compares by reversing arguments. 12931 // NOT GOOD FOR EQ/NE tests. 12932 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12933 match(Set flags (CmpUL src1 src2)); 12934 effect(TEMP tmp); 12935 ins_cost(300); 12936 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12937 "MOV $tmp,$src1.hi\n\t" 12938 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12939 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12940 ins_pipe(ialu_cr_reg_reg); 12941 %} 12942 12943 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12944 // Just a wrapper for a normal branch, plus the predicate test. 12945 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12946 match(If cmp flags); 12947 effect(USE labl); 12948 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12949 expand %{ 12950 jmpCon(cmp, flags, labl); // JLT or JGE... 12951 %} 12952 %} 12953 12954 // Compare 2 longs and CMOVE longs. 12955 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12956 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12957 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12958 ins_cost(400); 12959 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12960 "CMOV$cmp $dst.hi,$src.hi" %} 12961 opcode(0x0F,0x40); 12962 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12963 ins_pipe( pipe_cmov_reg_long ); 12964 %} 12965 12966 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12967 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12968 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12969 ins_cost(500); 12970 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12971 "CMOV$cmp $dst.hi,$src.hi" %} 12972 opcode(0x0F,0x40); 12973 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12974 ins_pipe( pipe_cmov_reg_long ); 12975 %} 12976 12977 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 12978 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12979 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12980 ins_cost(400); 12981 expand %{ 12982 cmovLL_reg_LTGE(cmp, flags, dst, src); 12983 %} 12984 %} 12985 12986 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 12987 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12988 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12989 ins_cost(500); 12990 expand %{ 12991 cmovLL_mem_LTGE(cmp, flags, dst, src); 12992 %} 12993 %} 12994 12995 // Compare 2 longs and CMOVE ints. 12996 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12997 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12998 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12999 ins_cost(200); 13000 format %{ "CMOV$cmp $dst,$src" %} 13001 opcode(0x0F,0x40); 13002 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13003 ins_pipe( pipe_cmov_reg ); 13004 %} 13005 13006 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13007 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13008 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13009 ins_cost(250); 13010 format %{ "CMOV$cmp $dst,$src" %} 13011 opcode(0x0F,0x40); 13012 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13013 ins_pipe( pipe_cmov_mem ); 13014 %} 13015 13016 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 13017 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13018 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13019 ins_cost(200); 13020 expand %{ 13021 cmovII_reg_LTGE(cmp, flags, dst, src); 13022 %} 13023 %} 13024 13025 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 13026 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13027 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13028 ins_cost(250); 13029 expand %{ 13030 cmovII_mem_LTGE(cmp, flags, dst, src); 13031 %} 13032 %} 13033 13034 // Compare 2 longs and CMOVE ptrs. 13035 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13036 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13037 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13038 ins_cost(200); 13039 format %{ "CMOV$cmp $dst,$src" %} 13040 opcode(0x0F,0x40); 13041 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13042 ins_pipe( pipe_cmov_reg ); 13043 %} 13044 13045 // Compare 2 unsigned longs and CMOVE ptrs. 13046 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 13047 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13048 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13049 ins_cost(200); 13050 expand %{ 13051 cmovPP_reg_LTGE(cmp,flags,dst,src); 13052 %} 13053 %} 13054 13055 // Compare 2 longs and CMOVE doubles 13056 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13057 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13058 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13059 ins_cost(200); 13060 expand %{ 13061 fcmovDPR_regS(cmp,flags,dst,src); 13062 %} 13063 %} 13064 13065 // Compare 2 longs and CMOVE doubles 13066 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13067 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13068 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13069 ins_cost(200); 13070 expand %{ 13071 fcmovD_regS(cmp,flags,dst,src); 13072 %} 13073 %} 13074 13075 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13076 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13077 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13078 ins_cost(200); 13079 expand %{ 13080 fcmovFPR_regS(cmp,flags,dst,src); 13081 %} 13082 %} 13083 13084 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13085 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13086 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13087 ins_cost(200); 13088 expand %{ 13089 fcmovF_regS(cmp,flags,dst,src); 13090 %} 13091 %} 13092 13093 //====== 13094 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13095 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13096 match( Set flags (CmpL src zero )); 13097 effect(TEMP tmp); 13098 ins_cost(200); 13099 format %{ "MOV $tmp,$src.lo\n\t" 13100 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13101 ins_encode( long_cmp_flags0( src, tmp ) ); 13102 ins_pipe( ialu_reg_reg_long ); 13103 %} 13104 13105 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13106 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13107 match( Set flags (CmpL src1 src2 )); 13108 ins_cost(200+300); 13109 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13110 "JNE,s skip\n\t" 13111 "CMP $src1.hi,$src2.hi\n\t" 13112 "skip:\t" %} 13113 ins_encode( long_cmp_flags1( src1, src2 ) ); 13114 ins_pipe( ialu_cr_reg_reg ); 13115 %} 13116 13117 // Long compare reg == zero/reg OR reg != zero/reg 13118 // Just a wrapper for a normal branch, plus the predicate test. 13119 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13120 match(If cmp flags); 13121 effect(USE labl); 13122 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13123 expand %{ 13124 jmpCon(cmp,flags,labl); // JEQ or JNE... 13125 %} 13126 %} 13127 13128 //====== 13129 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13130 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13131 match(Set flags (CmpUL src zero)); 13132 effect(TEMP tmp); 13133 ins_cost(200); 13134 format %{ "MOV $tmp,$src.lo\n\t" 13135 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13136 ins_encode(long_cmp_flags0(src, tmp)); 13137 ins_pipe(ialu_reg_reg_long); 13138 %} 13139 13140 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13141 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13142 match(Set flags (CmpUL src1 src2)); 13143 ins_cost(200+300); 13144 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13145 "JNE,s skip\n\t" 13146 "CMP $src1.hi,$src2.hi\n\t" 13147 "skip:\t" %} 13148 ins_encode(long_cmp_flags1(src1, src2)); 13149 ins_pipe(ialu_cr_reg_reg); 13150 %} 13151 13152 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13153 // Just a wrapper for a normal branch, plus the predicate test. 13154 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13155 match(If cmp flags); 13156 effect(USE labl); 13157 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13158 expand %{ 13159 jmpCon(cmp, flags, labl); // JEQ or JNE... 13160 %} 13161 %} 13162 13163 // Compare 2 longs and CMOVE longs. 13164 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13165 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13166 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13167 ins_cost(400); 13168 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13169 "CMOV$cmp $dst.hi,$src.hi" %} 13170 opcode(0x0F,0x40); 13171 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13172 ins_pipe( pipe_cmov_reg_long ); 13173 %} 13174 13175 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13176 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13177 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13178 ins_cost(500); 13179 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13180 "CMOV$cmp $dst.hi,$src.hi" %} 13181 opcode(0x0F,0x40); 13182 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13183 ins_pipe( pipe_cmov_reg_long ); 13184 %} 13185 13186 // Compare 2 longs and CMOVE ints. 13187 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13188 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13189 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13190 ins_cost(200); 13191 format %{ "CMOV$cmp $dst,$src" %} 13192 opcode(0x0F,0x40); 13193 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13194 ins_pipe( pipe_cmov_reg ); 13195 %} 13196 13197 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13198 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13199 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13200 ins_cost(250); 13201 format %{ "CMOV$cmp $dst,$src" %} 13202 opcode(0x0F,0x40); 13203 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13204 ins_pipe( pipe_cmov_mem ); 13205 %} 13206 13207 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13208 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13209 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13210 ins_cost(200); 13211 expand %{ 13212 cmovII_reg_EQNE(cmp, flags, dst, src); 13213 %} 13214 %} 13215 13216 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13217 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13218 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13219 ins_cost(250); 13220 expand %{ 13221 cmovII_mem_EQNE(cmp, flags, dst, src); 13222 %} 13223 %} 13224 13225 // Compare 2 longs and CMOVE ptrs. 13226 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13227 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13228 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13229 ins_cost(200); 13230 format %{ "CMOV$cmp $dst,$src" %} 13231 opcode(0x0F,0x40); 13232 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13233 ins_pipe( pipe_cmov_reg ); 13234 %} 13235 13236 // Compare 2 unsigned longs and CMOVE ptrs. 13237 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13238 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13239 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13240 ins_cost(200); 13241 expand %{ 13242 cmovPP_reg_EQNE(cmp,flags,dst,src); 13243 %} 13244 %} 13245 13246 // Compare 2 longs and CMOVE doubles 13247 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13248 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13249 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13250 ins_cost(200); 13251 expand %{ 13252 fcmovDPR_regS(cmp,flags,dst,src); 13253 %} 13254 %} 13255 13256 // Compare 2 longs and CMOVE doubles 13257 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13258 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13259 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13260 ins_cost(200); 13261 expand %{ 13262 fcmovD_regS(cmp,flags,dst,src); 13263 %} 13264 %} 13265 13266 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13267 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13268 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13269 ins_cost(200); 13270 expand %{ 13271 fcmovFPR_regS(cmp,flags,dst,src); 13272 %} 13273 %} 13274 13275 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13276 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13277 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13278 ins_cost(200); 13279 expand %{ 13280 fcmovF_regS(cmp,flags,dst,src); 13281 %} 13282 %} 13283 13284 //====== 13285 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13286 // Same as cmpL_reg_flags_LEGT except must negate src 13287 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13288 match( Set flags (CmpL src zero )); 13289 effect( TEMP tmp ); 13290 ins_cost(300); 13291 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13292 "CMP $tmp,$src.lo\n\t" 13293 "SBB $tmp,$src.hi\n\t" %} 13294 ins_encode( long_cmp_flags3(src, tmp) ); 13295 ins_pipe( ialu_reg_reg_long ); 13296 %} 13297 13298 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13299 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13300 // requires a commuted test to get the same result. 13301 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13302 match( Set flags (CmpL src1 src2 )); 13303 effect( TEMP tmp ); 13304 ins_cost(300); 13305 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13306 "MOV $tmp,$src2.hi\n\t" 13307 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13308 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13309 ins_pipe( ialu_cr_reg_reg ); 13310 %} 13311 13312 // Long compares reg < zero/req OR reg >= zero/req. 13313 // Just a wrapper for a normal branch, plus the predicate test 13314 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13315 match(If cmp flags); 13316 effect(USE labl); 13317 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13318 ins_cost(300); 13319 expand %{ 13320 jmpCon(cmp,flags,labl); // JGT or JLE... 13321 %} 13322 %} 13323 13324 //====== 13325 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13326 // Same as cmpUL_reg_flags_LEGT except must negate src 13327 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13328 match(Set flags (CmpUL src zero)); 13329 effect(TEMP tmp); 13330 ins_cost(300); 13331 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13332 "CMP $tmp,$src.lo\n\t" 13333 "SBB $tmp,$src.hi\n\t" %} 13334 ins_encode(long_cmp_flags3(src, tmp)); 13335 ins_pipe(ialu_reg_reg_long); 13336 %} 13337 13338 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13339 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13340 // requires a commuted test to get the same result. 13341 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13342 match(Set flags (CmpUL src1 src2)); 13343 effect(TEMP tmp); 13344 ins_cost(300); 13345 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13346 "MOV $tmp,$src2.hi\n\t" 13347 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13348 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13349 ins_pipe(ialu_cr_reg_reg); 13350 %} 13351 13352 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13353 // Just a wrapper for a normal branch, plus the predicate test 13354 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13355 match(If cmp flags); 13356 effect(USE labl); 13357 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13358 ins_cost(300); 13359 expand %{ 13360 jmpCon(cmp, flags, labl); // JGT or JLE... 13361 %} 13362 %} 13363 13364 // Compare 2 longs and CMOVE longs. 13365 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13366 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13367 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13368 ins_cost(400); 13369 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13370 "CMOV$cmp $dst.hi,$src.hi" %} 13371 opcode(0x0F,0x40); 13372 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13373 ins_pipe( pipe_cmov_reg_long ); 13374 %} 13375 13376 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13377 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13378 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13379 ins_cost(500); 13380 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13381 "CMOV$cmp $dst.hi,$src.hi+4" %} 13382 opcode(0x0F,0x40); 13383 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13384 ins_pipe( pipe_cmov_reg_long ); 13385 %} 13386 13387 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13388 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13389 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13390 ins_cost(400); 13391 expand %{ 13392 cmovLL_reg_LEGT(cmp, flags, dst, src); 13393 %} 13394 %} 13395 13396 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13397 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13398 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13399 ins_cost(500); 13400 expand %{ 13401 cmovLL_mem_LEGT(cmp, flags, dst, src); 13402 %} 13403 %} 13404 13405 // Compare 2 longs and CMOVE ints. 13406 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13407 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13408 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13409 ins_cost(200); 13410 format %{ "CMOV$cmp $dst,$src" %} 13411 opcode(0x0F,0x40); 13412 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13413 ins_pipe( pipe_cmov_reg ); 13414 %} 13415 13416 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13417 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13418 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13419 ins_cost(250); 13420 format %{ "CMOV$cmp $dst,$src" %} 13421 opcode(0x0F,0x40); 13422 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13423 ins_pipe( pipe_cmov_mem ); 13424 %} 13425 13426 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13427 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13428 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13429 ins_cost(200); 13430 expand %{ 13431 cmovII_reg_LEGT(cmp, flags, dst, src); 13432 %} 13433 %} 13434 13435 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13436 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13437 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13438 ins_cost(250); 13439 expand %{ 13440 cmovII_mem_LEGT(cmp, flags, dst, src); 13441 %} 13442 %} 13443 13444 // Compare 2 longs and CMOVE ptrs. 13445 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13446 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13447 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13448 ins_cost(200); 13449 format %{ "CMOV$cmp $dst,$src" %} 13450 opcode(0x0F,0x40); 13451 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13452 ins_pipe( pipe_cmov_reg ); 13453 %} 13454 13455 // Compare 2 unsigned longs and CMOVE ptrs. 13456 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13457 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13458 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13459 ins_cost(200); 13460 expand %{ 13461 cmovPP_reg_LEGT(cmp,flags,dst,src); 13462 %} 13463 %} 13464 13465 // Compare 2 longs and CMOVE doubles 13466 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13467 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13468 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13469 ins_cost(200); 13470 expand %{ 13471 fcmovDPR_regS(cmp,flags,dst,src); 13472 %} 13473 %} 13474 13475 // Compare 2 longs and CMOVE doubles 13476 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13477 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13478 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13479 ins_cost(200); 13480 expand %{ 13481 fcmovD_regS(cmp,flags,dst,src); 13482 %} 13483 %} 13484 13485 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13486 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13487 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13488 ins_cost(200); 13489 expand %{ 13490 fcmovFPR_regS(cmp,flags,dst,src); 13491 %} 13492 %} 13493 13494 13495 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13496 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13497 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13498 ins_cost(200); 13499 expand %{ 13500 fcmovF_regS(cmp,flags,dst,src); 13501 %} 13502 %} 13503 13504 13505 // ============================================================================ 13506 // Procedure Call/Return Instructions 13507 // Call Java Static Instruction 13508 // Note: If this code changes, the corresponding ret_addr_offset() and 13509 // compute_padding() functions will have to be adjusted. 13510 instruct CallStaticJavaDirect(method meth) %{ 13511 match(CallStaticJava); 13512 effect(USE meth); 13513 13514 ins_cost(300); 13515 format %{ "CALL,static " %} 13516 opcode(0xE8); /* E8 cd */ 13517 ins_encode( pre_call_resets, 13518 Java_Static_Call( meth ), 13519 call_epilog, 13520 post_call_FPU ); 13521 ins_pipe( pipe_slow ); 13522 ins_alignment(4); 13523 %} 13524 13525 // Call Java Dynamic Instruction 13526 // Note: If this code changes, the corresponding ret_addr_offset() and 13527 // compute_padding() functions will have to be adjusted. 13528 instruct CallDynamicJavaDirect(method meth) %{ 13529 match(CallDynamicJava); 13530 effect(USE meth); 13531 13532 ins_cost(300); 13533 format %{ "MOV EAX,(oop)-1\n\t" 13534 "CALL,dynamic" %} 13535 opcode(0xE8); /* E8 cd */ 13536 ins_encode( pre_call_resets, 13537 Java_Dynamic_Call( meth ), 13538 call_epilog, 13539 post_call_FPU ); 13540 ins_pipe( pipe_slow ); 13541 ins_alignment(4); 13542 %} 13543 13544 // Call Runtime Instruction 13545 instruct CallRuntimeDirect(method meth) %{ 13546 match(CallRuntime ); 13547 effect(USE meth); 13548 13549 ins_cost(300); 13550 format %{ "CALL,runtime " %} 13551 opcode(0xE8); /* E8 cd */ 13552 // Use FFREEs to clear entries in float stack 13553 ins_encode( pre_call_resets, 13554 FFree_Float_Stack_All, 13555 Java_To_Runtime( meth ), 13556 post_call_FPU ); 13557 ins_pipe( pipe_slow ); 13558 %} 13559 13560 // Call runtime without safepoint 13561 instruct CallLeafDirect(method meth) %{ 13562 match(CallLeaf); 13563 effect(USE meth); 13564 13565 ins_cost(300); 13566 format %{ "CALL_LEAF,runtime " %} 13567 opcode(0xE8); /* E8 cd */ 13568 ins_encode( pre_call_resets, 13569 FFree_Float_Stack_All, 13570 Java_To_Runtime( meth ), 13571 Verify_FPU_For_Leaf, post_call_FPU ); 13572 ins_pipe( pipe_slow ); 13573 %} 13574 13575 instruct CallLeafNoFPDirect(method meth) %{ 13576 match(CallLeafNoFP); 13577 effect(USE meth); 13578 13579 ins_cost(300); 13580 format %{ "CALL_LEAF_NOFP,runtime " %} 13581 opcode(0xE8); /* E8 cd */ 13582 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13583 ins_pipe( pipe_slow ); 13584 %} 13585 13586 13587 // Return Instruction 13588 // Remove the return address & jump to it. 13589 instruct Ret() %{ 13590 match(Return); 13591 format %{ "RET" %} 13592 opcode(0xC3); 13593 ins_encode(OpcP); 13594 ins_pipe( pipe_jmp ); 13595 %} 13596 13597 // Tail Call; Jump from runtime stub to Java code. 13598 // Also known as an 'interprocedural jump'. 13599 // Target of jump will eventually return to caller. 13600 // TailJump below removes the return address. 13601 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13602 match(TailCall jump_target method_ptr); 13603 ins_cost(300); 13604 format %{ "JMP $jump_target \t# EBX holds method" %} 13605 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13606 ins_encode( OpcP, RegOpc(jump_target) ); 13607 ins_pipe( pipe_jmp ); 13608 %} 13609 13610 13611 // Tail Jump; remove the return address; jump to target. 13612 // TailCall above leaves the return address around. 13613 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13614 match( TailJump jump_target ex_oop ); 13615 ins_cost(300); 13616 format %{ "POP EDX\t# pop return address into dummy\n\t" 13617 "JMP $jump_target " %} 13618 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13619 ins_encode( enc_pop_rdx, 13620 OpcP, RegOpc(jump_target) ); 13621 ins_pipe( pipe_jmp ); 13622 %} 13623 13624 // Create exception oop: created by stack-crawling runtime code. 13625 // Created exception is now available to this handler, and is setup 13626 // just prior to jumping to this handler. No code emitted. 13627 instruct CreateException( eAXRegP ex_oop ) 13628 %{ 13629 match(Set ex_oop (CreateEx)); 13630 13631 size(0); 13632 // use the following format syntax 13633 format %{ "# exception oop is in EAX; no code emitted" %} 13634 ins_encode(); 13635 ins_pipe( empty ); 13636 %} 13637 13638 13639 // Rethrow exception: 13640 // The exception oop will come in the first argument position. 13641 // Then JUMP (not call) to the rethrow stub code. 13642 instruct RethrowException() 13643 %{ 13644 match(Rethrow); 13645 13646 // use the following format syntax 13647 format %{ "JMP rethrow_stub" %} 13648 ins_encode(enc_rethrow); 13649 ins_pipe( pipe_jmp ); 13650 %} 13651 13652 // inlined locking and unlocking 13653 13654 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{ 13655 predicate(Compile::current()->use_rtm()); 13656 match(Set cr (FastLock object box)); 13657 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread); 13658 ins_cost(300); 13659 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13660 ins_encode %{ 13661 __ get_thread($thread$$Register); 13662 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13663 $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register, 13664 _rtm_counters, _stack_rtm_counters, 13665 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13666 true, ra_->C->profile_rtm()); 13667 %} 13668 ins_pipe(pipe_slow); 13669 %} 13670 13671 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13672 predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm()); 13673 match(Set cr (FastLock object box)); 13674 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13675 ins_cost(300); 13676 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13677 ins_encode %{ 13678 __ get_thread($thread$$Register); 13679 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13680 $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false); 13681 %} 13682 ins_pipe(pipe_slow); 13683 %} 13684 13685 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp, eRegP scr) %{ 13686 predicate(LockingMode != LM_LIGHTWEIGHT); 13687 match(Set cr (FastUnlock object box)); 13688 effect(TEMP tmp, TEMP scr, USE_KILL box); 13689 ins_cost(300); 13690 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13691 ins_encode %{ 13692 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, ra_->C->use_rtm()); 13693 %} 13694 ins_pipe(pipe_slow); 13695 %} 13696 13697 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{ 13698 predicate(LockingMode == LM_LIGHTWEIGHT); 13699 match(Set cr (FastLock object box)); 13700 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread); 13701 ins_cost(300); 13702 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %} 13703 ins_encode %{ 13704 __ get_thread($thread$$Register); 13705 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13706 %} 13707 ins_pipe(pipe_slow); 13708 %} 13709 13710 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP scr, eRegP thread) %{ 13711 predicate(LockingMode == LM_LIGHTWEIGHT); 13712 match(Set cr (FastUnlock object eax_reg)); 13713 effect(TEMP tmp, TEMP scr, USE_KILL eax_reg, TEMP thread); 13714 ins_cost(300); 13715 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %} 13716 ins_encode %{ 13717 __ get_thread($thread$$Register); 13718 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $scr$$Register, $thread$$Register); 13719 %} 13720 ins_pipe(pipe_slow); 13721 %} 13722 13723 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13724 predicate(Matcher::vector_length(n) <= 32); 13725 match(Set dst (MaskAll src)); 13726 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13727 ins_encode %{ 13728 int mask_len = Matcher::vector_length(this); 13729 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13730 %} 13731 ins_pipe( pipe_slow ); 13732 %} 13733 13734 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13735 predicate(Matcher::vector_length(n) > 32); 13736 match(Set dst (MaskAll src)); 13737 effect(TEMP ktmp); 13738 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13739 ins_encode %{ 13740 int mask_len = Matcher::vector_length(this); 13741 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13742 %} 13743 ins_pipe( pipe_slow ); 13744 %} 13745 13746 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13747 predicate(Matcher::vector_length(n) > 32); 13748 match(Set dst (MaskAll src)); 13749 effect(TEMP ktmp); 13750 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13751 ins_encode %{ 13752 int mask_len = Matcher::vector_length(this); 13753 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13754 %} 13755 ins_pipe( pipe_slow ); 13756 %} 13757 13758 // ============================================================================ 13759 // Safepoint Instruction 13760 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13761 match(SafePoint poll); 13762 effect(KILL cr, USE poll); 13763 13764 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13765 ins_cost(125); 13766 // EBP would need size(3) 13767 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13768 ins_encode %{ 13769 __ relocate(relocInfo::poll_type); 13770 address pre_pc = __ pc(); 13771 __ testl(rax, Address($poll$$Register, 0)); 13772 address post_pc = __ pc(); 13773 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13774 %} 13775 ins_pipe(ialu_reg_mem); 13776 %} 13777 13778 13779 // ============================================================================ 13780 // This name is KNOWN by the ADLC and cannot be changed. 13781 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13782 // for this guy. 13783 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13784 match(Set dst (ThreadLocal)); 13785 effect(DEF dst, KILL cr); 13786 13787 format %{ "MOV $dst, Thread::current()" %} 13788 ins_encode %{ 13789 Register dstReg = as_Register($dst$$reg); 13790 __ get_thread(dstReg); 13791 %} 13792 ins_pipe( ialu_reg_fat ); 13793 %} 13794 13795 13796 13797 //----------PEEPHOLE RULES----------------------------------------------------- 13798 // These must follow all instruction definitions as they use the names 13799 // defined in the instructions definitions. 13800 // 13801 // peepmatch ( root_instr_name [preceding_instruction]* ); 13802 // 13803 // peepconstraint %{ 13804 // (instruction_number.operand_name relational_op instruction_number.operand_name 13805 // [, ...] ); 13806 // // instruction numbers are zero-based using left to right order in peepmatch 13807 // 13808 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13809 // // provide an instruction_number.operand_name for each operand that appears 13810 // // in the replacement instruction's match rule 13811 // 13812 // ---------VM FLAGS--------------------------------------------------------- 13813 // 13814 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13815 // 13816 // Each peephole rule is given an identifying number starting with zero and 13817 // increasing by one in the order seen by the parser. An individual peephole 13818 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13819 // on the command-line. 13820 // 13821 // ---------CURRENT LIMITATIONS---------------------------------------------- 13822 // 13823 // Only match adjacent instructions in same basic block 13824 // Only equality constraints 13825 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13826 // Only one replacement instruction 13827 // 13828 // ---------EXAMPLE---------------------------------------------------------- 13829 // 13830 // // pertinent parts of existing instructions in architecture description 13831 // instruct movI(rRegI dst, rRegI src) %{ 13832 // match(Set dst (CopyI src)); 13833 // %} 13834 // 13835 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13836 // match(Set dst (AddI dst src)); 13837 // effect(KILL cr); 13838 // %} 13839 // 13840 // // Change (inc mov) to lea 13841 // peephole %{ 13842 // // increment preceded by register-register move 13843 // peepmatch ( incI_eReg movI ); 13844 // // require that the destination register of the increment 13845 // // match the destination register of the move 13846 // peepconstraint ( 0.dst == 1.dst ); 13847 // // construct a replacement instruction that sets 13848 // // the destination to ( move's source register + one ) 13849 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13850 // %} 13851 // 13852 // Implementation no longer uses movX instructions since 13853 // machine-independent system no longer uses CopyX nodes. 13854 // 13855 // peephole %{ 13856 // peepmatch ( incI_eReg movI ); 13857 // peepconstraint ( 0.dst == 1.dst ); 13858 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13859 // %} 13860 // 13861 // peephole %{ 13862 // peepmatch ( decI_eReg movI ); 13863 // peepconstraint ( 0.dst == 1.dst ); 13864 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13865 // %} 13866 // 13867 // peephole %{ 13868 // peepmatch ( addI_eReg_imm movI ); 13869 // peepconstraint ( 0.dst == 1.dst ); 13870 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13871 // %} 13872 // 13873 // peephole %{ 13874 // peepmatch ( addP_eReg_imm movP ); 13875 // peepconstraint ( 0.dst == 1.dst ); 13876 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13877 // %} 13878 13879 // // Change load of spilled value to only a spill 13880 // instruct storeI(memory mem, rRegI src) %{ 13881 // match(Set mem (StoreI mem src)); 13882 // %} 13883 // 13884 // instruct loadI(rRegI dst, memory mem) %{ 13885 // match(Set dst (LoadI mem)); 13886 // %} 13887 // 13888 peephole %{ 13889 peepmatch ( loadI storeI ); 13890 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13891 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13892 %} 13893 13894 //----------SMARTSPILL RULES--------------------------------------------------- 13895 // These must follow all instruction definitions as they use the names 13896 // defined in the instructions definitions.