1 // 2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ masm-> 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 __ emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 __ emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(C2_MacroAssembler *masm, int code) { 353 __ emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) { 358 __ relocate(__ inst_mark() + offset, reloc); 359 emit_opcode(masm, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(C2_MacroAssembler *masm, int d8) { 364 __ emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(C2_MacroAssembler *masm, int d16) { 369 __ emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(C2_MacroAssembler *masm, int d32) { 374 __ emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc, 379 int format) { 380 __ relocate(__ inst_mark(), reloc, format); 381 __ emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 __ relocate(__ inst_mark(), rspec, format); 393 __ emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) { 398 emit_opcode( masm, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( masm, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (masm, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( masm, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(masm, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(masm, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(masm, 0x1, reg_encoding, base); 423 emit_d8(masm, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(masm, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(masm, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (masm, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(masm, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(masm, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (masm, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(masm, 0x0, reg_encoding, 0x4); 450 emit_rm(masm, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(masm, 0x1, reg_encoding, 0x4); 456 emit_rm(masm, scale, index, base); 457 emit_d8(masm, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(masm, 0x2, reg_encoding, 0x4); 462 emit_rm(masm, scale, index, 0x04); 463 } else { 464 emit_rm(masm, 0x2, reg_encoding, 0x4); 465 emit_rm(masm, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(masm, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (masm, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( masm, 0x8B ); 483 emit_rm(masm, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler* masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 613 int framesize = C->output()->frame_size_in_bytes(); 614 int bangsize = C->output()->bang_size_in_bytes(); 615 616 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr); 617 618 C->output()->set_frame_complete(__ offset()); 619 620 if (C->has_mach_constant_base_node()) { 621 // NOTE: We set the table base offset here because users might be 622 // emitted before MachConstantBaseNode. 623 ConstantTable& constant_table = C->output()->constant_table(); 624 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 625 } 626 } 627 628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 629 return MachNode::size(ra_); // too many variables; just compute it the hard way 630 } 631 632 int MachPrologNode::reloc() const { 633 return 0; // a large enough number 634 } 635 636 //============================================================================= 637 #ifndef PRODUCT 638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 639 Compile *C = ra_->C; 640 int framesize = C->output()->frame_size_in_bytes(); 641 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 642 // Remove two words for return addr and rbp, 643 framesize -= 2*wordSize; 644 645 if (C->max_vector_size() > 16) { 646 st->print("VZEROUPPER"); 647 st->cr(); st->print("\t"); 648 } 649 if (C->in_24_bit_fp_mode()) { 650 st->print("FLDCW standard control word"); 651 st->cr(); st->print("\t"); 652 } 653 if (framesize) { 654 st->print("ADD ESP,%d\t# Destroy frame",framesize); 655 st->cr(); st->print("\t"); 656 } 657 st->print_cr("POPL EBP"); st->print("\t"); 658 if (do_polling() && C->is_method_compilation()) { 659 st->print("CMPL rsp, poll_offset[thread] \n\t" 660 "JA #safepoint_stub\t" 661 "# Safepoint: poll for GC"); 662 } 663 } 664 #endif 665 666 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 667 Compile *C = ra_->C; 668 669 if (C->max_vector_size() > 16) { 670 // Clear upper bits of YMM registers when current compiled code uses 671 // wide vectors to avoid AVX <-> SSE transition penalty during call. 672 __ vzeroupper(); 673 } 674 // If method set FPU control word, restore to standard control word 675 if (C->in_24_bit_fp_mode()) { 676 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 677 } 678 679 int framesize = C->output()->frame_size_in_bytes(); 680 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 681 // Remove two words for return addr and rbp, 682 framesize -= 2*wordSize; 683 684 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 685 686 if (framesize >= 128) { 687 emit_opcode(masm, 0x81); // add SP, #framesize 688 emit_rm(masm, 0x3, 0x00, ESP_enc); 689 emit_d32(masm, framesize); 690 } else if (framesize) { 691 emit_opcode(masm, 0x83); // add SP, #framesize 692 emit_rm(masm, 0x3, 0x00, ESP_enc); 693 emit_d8(masm, framesize); 694 } 695 696 emit_opcode(masm, 0x58 | EBP_enc); 697 698 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 699 __ reserved_stack_check(); 700 } 701 702 if (do_polling() && C->is_method_compilation()) { 703 Register thread = as_Register(EBX_enc); 704 __ get_thread(thread); 705 Label dummy_label; 706 Label* code_stub = &dummy_label; 707 if (!C->output()->in_scratch_emit_size()) { 708 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 709 C->output()->add_stub(stub); 710 code_stub = &stub->entry(); 711 } 712 __ set_inst_mark(); 713 __ relocate(relocInfo::poll_return_type); 714 __ clear_inst_mark(); 715 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 716 } 717 } 718 719 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 720 return MachNode::size(ra_); // too many variables; just compute it 721 // the hard way 722 } 723 724 int MachEpilogNode::reloc() const { 725 return 0; // a large enough number 726 } 727 728 const Pipeline * MachEpilogNode::pipeline() const { 729 return MachNode::pipeline_class(); 730 } 731 732 //============================================================================= 733 734 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 735 static enum RC rc_class( OptoReg::Name reg ) { 736 737 if( !OptoReg::is_valid(reg) ) return rc_bad; 738 if (OptoReg::is_stack(reg)) return rc_stack; 739 740 VMReg r = OptoReg::as_VMReg(reg); 741 if (r->is_Register()) return rc_int; 742 if (r->is_FloatRegister()) { 743 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 744 return rc_float; 745 } 746 if (r->is_KRegister()) return rc_kreg; 747 assert(r->is_XMMRegister(), "must be"); 748 return rc_xmm; 749 } 750 751 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg, 752 int opcode, const char *op_str, int size, outputStream* st ) { 753 if( masm ) { 754 masm->set_inst_mark(); 755 emit_opcode (masm, opcode ); 756 encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 757 masm->clear_inst_mark(); 758 #ifndef PRODUCT 759 } else if( !do_size ) { 760 if( size != 0 ) st->print("\n\t"); 761 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 762 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 763 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 764 } else { // FLD, FST, PUSH, POP 765 st->print("%s [ESP + #%d]",op_str,offset); 766 } 767 #endif 768 } 769 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 770 return size+3+offset_size; 771 } 772 773 // Helper for XMM registers. Extra opcode bits, limited syntax. 774 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, 775 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 776 int in_size_in_bits = Assembler::EVEX_32bit; 777 int evex_encoding = 0; 778 if (reg_lo+1 == reg_hi) { 779 in_size_in_bits = Assembler::EVEX_64bit; 780 evex_encoding = Assembler::VEX_W; 781 } 782 if (masm) { 783 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 784 // it maps more cases to single byte displacement 785 __ set_managed(); 786 if (reg_lo+1 == reg_hi) { // double move? 787 if (is_load) { 788 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 789 } else { 790 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 791 } 792 } else { 793 if (is_load) { 794 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } 799 #ifndef PRODUCT 800 } else if (!do_size) { 801 if (size != 0) st->print("\n\t"); 802 if (reg_lo+1 == reg_hi) { // double move? 803 if (is_load) st->print("%s %s,[ESP + #%d]", 804 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 805 Matcher::regName[reg_lo], offset); 806 else st->print("MOVSD [ESP + #%d],%s", 807 offset, Matcher::regName[reg_lo]); 808 } else { 809 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 810 Matcher::regName[reg_lo], offset); 811 else st->print("MOVSS [ESP + #%d],%s", 812 offset, Matcher::regName[reg_lo]); 813 } 814 #endif 815 } 816 bool is_single_byte = false; 817 if ((UseAVX > 2) && (offset != 0)) { 818 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 819 } 820 int offset_size = 0; 821 if (UseAVX > 2 ) { 822 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 823 } else { 824 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 825 } 826 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 827 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 828 return size+5+offset_size; 829 } 830 831 832 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 833 int src_hi, int dst_hi, int size, outputStream* st ) { 834 if (masm) { 835 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 836 __ set_managed(); 837 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 838 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 839 as_XMMRegister(Matcher::_regEncode[src_lo])); 840 } else { 841 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 842 as_XMMRegister(Matcher::_regEncode[src_lo])); 843 } 844 #ifndef PRODUCT 845 } else if (!do_size) { 846 if (size != 0) st->print("\n\t"); 847 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 848 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 849 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 850 } else { 851 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 852 } 853 } else { 854 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 855 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 856 } else { 857 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 858 } 859 } 860 #endif 861 } 862 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 863 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 864 int sz = (UseAVX > 2) ? 6 : 4; 865 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 866 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 867 return size + sz; 868 } 869 870 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 871 int src_hi, int dst_hi, int size, outputStream* st ) { 872 // 32-bit 873 if (masm) { 874 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 875 __ set_managed(); 876 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 877 as_Register(Matcher::_regEncode[src_lo])); 878 #ifndef PRODUCT 879 } else if (!do_size) { 880 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 881 #endif 882 } 883 return (UseAVX> 2) ? 6 : 4; 884 } 885 886 887 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 888 int src_hi, int dst_hi, int size, outputStream* st ) { 889 // 32-bit 890 if (masm) { 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 __ set_managed(); 893 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 894 as_XMMRegister(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) { 904 if( masm ) { 905 emit_opcode(masm, 0x8B ); 906 emit_rm (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 907 #ifndef PRODUCT 908 } else if( !do_size ) { 909 if( size != 0 ) st->print("\n\t"); 910 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 911 #endif 912 } 913 return size+2; 914 } 915 916 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 917 int offset, int size, outputStream* st ) { 918 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 919 if( masm ) { 920 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) 921 emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] ); 922 #ifndef PRODUCT 923 } else if( !do_size ) { 924 if( size != 0 ) st->print("\n\t"); 925 st->print("FLD %s",Matcher::regName[src_lo]); 926 #endif 927 } 928 size += 2; 929 } 930 931 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 932 const char *op_str; 933 int op; 934 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 935 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 936 op = 0xDD; 937 } else { // 32-bit store 938 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 939 op = 0xD9; 940 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 941 } 942 943 return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st); 944 } 945 946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 947 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 948 int src_hi, int dst_hi, uint ireg, outputStream* st); 949 950 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 951 int stack_offset, int reg, uint ireg, outputStream* st); 952 953 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset, 954 int dst_offset, uint ireg, outputStream* st) { 955 if (masm) { 956 switch (ireg) { 957 case Op_VecS: 958 __ pushl(Address(rsp, src_offset)); 959 __ popl (Address(rsp, dst_offset)); 960 break; 961 case Op_VecD: 962 __ pushl(Address(rsp, src_offset)); 963 __ popl (Address(rsp, dst_offset)); 964 __ pushl(Address(rsp, src_offset+4)); 965 __ popl (Address(rsp, dst_offset+4)); 966 break; 967 case Op_VecX: 968 __ movdqu(Address(rsp, -16), xmm0); 969 __ movdqu(xmm0, Address(rsp, src_offset)); 970 __ movdqu(Address(rsp, dst_offset), xmm0); 971 __ movdqu(xmm0, Address(rsp, -16)); 972 break; 973 case Op_VecY: 974 __ vmovdqu(Address(rsp, -32), xmm0); 975 __ vmovdqu(xmm0, Address(rsp, src_offset)); 976 __ vmovdqu(Address(rsp, dst_offset), xmm0); 977 __ vmovdqu(xmm0, Address(rsp, -32)); 978 break; 979 case Op_VecZ: 980 __ evmovdquq(Address(rsp, -64), xmm0, 2); 981 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 982 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 983 __ evmovdquq(xmm0, Address(rsp, -64), 2); 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 #ifndef PRODUCT 989 } else { 990 switch (ireg) { 991 case Op_VecS: 992 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 993 "popl [rsp + #%d]", 994 src_offset, dst_offset); 995 break; 996 case Op_VecD: 997 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 998 "popq [rsp + #%d]\n\t" 999 "pushl [rsp + #%d]\n\t" 1000 "popq [rsp + #%d]", 1001 src_offset, dst_offset, src_offset+4, dst_offset+4); 1002 break; 1003 case Op_VecX: 1004 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1005 "movdqu xmm0, [rsp + #%d]\n\t" 1006 "movdqu [rsp + #%d], xmm0\n\t" 1007 "movdqu xmm0, [rsp - #16]", 1008 src_offset, dst_offset); 1009 break; 1010 case Op_VecY: 1011 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1012 "vmovdqu xmm0, [rsp + #%d]\n\t" 1013 "vmovdqu [rsp + #%d], xmm0\n\t" 1014 "vmovdqu xmm0, [rsp - #32]", 1015 src_offset, dst_offset); 1016 break; 1017 case Op_VecZ: 1018 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1019 "vmovdqu xmm0, [rsp + #%d]\n\t" 1020 "vmovdqu [rsp + #%d], xmm0\n\t" 1021 "vmovdqu xmm0, [rsp - #64]", 1022 src_offset, dst_offset); 1023 break; 1024 default: 1025 ShouldNotReachHere(); 1026 } 1027 #endif 1028 } 1029 } 1030 1031 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1032 // Get registers to move 1033 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1034 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1035 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1036 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1037 1038 enum RC src_second_rc = rc_class(src_second); 1039 enum RC src_first_rc = rc_class(src_first); 1040 enum RC dst_second_rc = rc_class(dst_second); 1041 enum RC dst_first_rc = rc_class(dst_first); 1042 1043 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1044 1045 // Generate spill code! 1046 int size = 0; 1047 1048 if( src_first == dst_first && src_second == dst_second ) 1049 return size; // Self copy, no move 1050 1051 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) { 1052 uint ireg = ideal_reg(); 1053 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1054 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1055 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1056 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1057 // mem -> mem 1058 int src_offset = ra_->reg2offset(src_first); 1059 int dst_offset = ra_->reg2offset(dst_first); 1060 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st); 1061 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1062 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st); 1063 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1064 int stack_offset = ra_->reg2offset(dst_first); 1065 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st); 1066 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1067 int stack_offset = ra_->reg2offset(src_first); 1068 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st); 1069 } else { 1070 ShouldNotReachHere(); 1071 } 1072 return 0; 1073 } 1074 1075 // -------------------------------------- 1076 // Check for mem-mem move. push/pop to move. 1077 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1078 if( src_second == dst_first ) { // overlapping stack copy ranges 1079 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1080 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1081 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1082 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1083 } 1084 // move low bits 1085 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1086 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1087 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1088 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1089 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1090 } 1091 return size; 1092 } 1093 1094 // -------------------------------------- 1095 // Check for integer reg-reg copy 1096 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1097 size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st); 1098 1099 // Check for integer store 1100 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1101 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1102 1103 // Check for integer load 1104 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1105 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1106 1107 // Check for integer reg-xmm reg copy 1108 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1109 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1110 "no 64 bit integer-float reg moves" ); 1111 return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1112 } 1113 // -------------------------------------- 1114 // Check for float reg-reg copy 1115 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1116 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1117 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1118 if( masm ) { 1119 1120 // Note the mucking with the register encode to compensate for the 0/1 1121 // indexing issue mentioned in a comment in the reg_def sections 1122 // for FPR registers many lines above here. 1123 1124 if( src_first != FPR1L_num ) { 1125 emit_opcode (masm, 0xD9 ); // FLD ST(i) 1126 emit_d8 (masm, 0xC0+Matcher::_regEncode[src_first]-1 ); 1127 emit_opcode (masm, 0xDD ); // FSTP ST(i) 1128 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); 1129 } else { 1130 emit_opcode (masm, 0xDD ); // FST ST(i) 1131 emit_d8 (masm, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1132 } 1133 #ifndef PRODUCT 1134 } else if( !do_size ) { 1135 if( size != 0 ) st->print("\n\t"); 1136 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1137 else st->print( "FST %s", Matcher::regName[dst_first]); 1138 #endif 1139 } 1140 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1141 } 1142 1143 // Check for float store 1144 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1145 return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1146 } 1147 1148 // Check for float load 1149 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1150 int offset = ra_->reg2offset(src_first); 1151 const char *op_str; 1152 int op; 1153 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1154 op_str = "FLD_D"; 1155 op = 0xDD; 1156 } else { // 32-bit load 1157 op_str = "FLD_S"; 1158 op = 0xD9; 1159 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1160 } 1161 if( masm ) { 1162 masm->set_inst_mark(); 1163 emit_opcode (masm, op ); 1164 encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1165 emit_opcode (masm, 0xDD ); // FSTP ST(i) 1166 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); 1167 masm->clear_inst_mark(); 1168 #ifndef PRODUCT 1169 } else if( !do_size ) { 1170 if( size != 0 ) st->print("\n\t"); 1171 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1172 #endif 1173 } 1174 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1175 return size + 3+offset_size+2; 1176 } 1177 1178 // Check for xmm reg-reg copy 1179 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1180 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1181 (src_first+1 == src_second && dst_first+1 == dst_second), 1182 "no non-adjacent float-moves" ); 1183 return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1184 } 1185 1186 // Check for xmm reg-integer reg copy 1187 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1188 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1189 "no 64 bit float-integer reg moves" ); 1190 return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1191 } 1192 1193 // Check for xmm store 1194 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1195 return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1196 } 1197 1198 // Check for float xmm load 1199 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1200 return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1201 } 1202 1203 // Copy from float reg to xmm reg 1204 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1205 // copy to the top of stack from floating point reg 1206 // and use LEA to preserve flags 1207 if( masm ) { 1208 emit_opcode(masm,0x8D); // LEA ESP,[ESP-8] 1209 emit_rm(masm, 0x1, ESP_enc, 0x04); 1210 emit_rm(masm, 0x0, 0x04, ESP_enc); 1211 emit_d8(masm,0xF8); 1212 #ifndef PRODUCT 1213 } else if( !do_size ) { 1214 if( size != 0 ) st->print("\n\t"); 1215 st->print("LEA ESP,[ESP-8]"); 1216 #endif 1217 } 1218 size += 4; 1219 1220 size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1221 1222 // Copy from the temp memory to the xmm reg. 1223 size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st); 1224 1225 if( masm ) { 1226 emit_opcode(masm,0x8D); // LEA ESP,[ESP+8] 1227 emit_rm(masm, 0x1, ESP_enc, 0x04); 1228 emit_rm(masm, 0x0, 0x04, ESP_enc); 1229 emit_d8(masm,0x08); 1230 #ifndef PRODUCT 1231 } else if( !do_size ) { 1232 if( size != 0 ) st->print("\n\t"); 1233 st->print("LEA ESP,[ESP+8]"); 1234 #endif 1235 } 1236 size += 4; 1237 return size; 1238 } 1239 1240 // AVX-512 opmask specific spilling. 1241 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1242 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1243 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1244 int offset = ra_->reg2offset(src_first); 1245 if (masm != nullptr) { 1246 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1247 #ifndef PRODUCT 1248 } else { 1249 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); 1250 #endif 1251 } 1252 return 0; 1253 } 1254 1255 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1256 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1257 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1258 int offset = ra_->reg2offset(dst_first); 1259 if (masm != nullptr) { 1260 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1261 #ifndef PRODUCT 1262 } else { 1263 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); 1264 #endif 1265 } 1266 return 0; 1267 } 1268 1269 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1270 Unimplemented(); 1271 return 0; 1272 } 1273 1274 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1275 Unimplemented(); 1276 return 0; 1277 } 1278 1279 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1280 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1281 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1282 if (masm != nullptr) { 1283 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1284 #ifndef PRODUCT 1285 } else { 1286 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); 1287 #endif 1288 } 1289 return 0; 1290 } 1291 1292 assert( size > 0, "missed a case" ); 1293 1294 // -------------------------------------------------------------------- 1295 // Check for second bits still needing moving. 1296 if( src_second == dst_second ) 1297 return size; // Self copy; no move 1298 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1299 1300 // Check for second word int-int move 1301 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1302 return impl_mov_helper(masm,do_size,src_second,dst_second,size, st); 1303 1304 // Check for second word integer store 1305 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1306 return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1307 1308 // Check for second word integer load 1309 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1310 return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1311 1312 Unimplemented(); 1313 return 0; // Mute compiler 1314 } 1315 1316 #ifndef PRODUCT 1317 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1318 implementation( nullptr, ra_, false, st ); 1319 } 1320 #endif 1321 1322 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1323 implementation( masm, ra_, false, nullptr ); 1324 } 1325 1326 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1327 return MachNode::size(ra_); 1328 } 1329 1330 1331 //============================================================================= 1332 #ifndef PRODUCT 1333 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1334 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1335 int reg = ra_->get_reg_first(this); 1336 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1337 } 1338 #endif 1339 1340 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1341 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1342 int reg = ra_->get_encode(this); 1343 if( offset >= 128 ) { 1344 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] 1345 emit_rm(masm, 0x2, reg, 0x04); 1346 emit_rm(masm, 0x0, 0x04, ESP_enc); 1347 emit_d32(masm, offset); 1348 } 1349 else { 1350 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] 1351 emit_rm(masm, 0x1, reg, 0x04); 1352 emit_rm(masm, 0x0, 0x04, ESP_enc); 1353 emit_d8(masm, offset); 1354 } 1355 } 1356 1357 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1358 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1359 if( offset >= 128 ) { 1360 return 7; 1361 } 1362 else { 1363 return 4; 1364 } 1365 } 1366 1367 //============================================================================= 1368 #ifndef PRODUCT 1369 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1370 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1371 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1372 st->print_cr("\tNOP"); 1373 st->print_cr("\tNOP"); 1374 if( !OptoBreakpoint ) 1375 st->print_cr("\tNOP"); 1376 } 1377 #endif 1378 1379 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1380 __ ic_check(CodeEntryAlignment); 1381 } 1382 1383 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1384 return MachNode::size(ra_); // too many variables; just compute it 1385 // the hard way 1386 } 1387 1388 1389 //============================================================================= 1390 1391 // Vector calling convention not supported. 1392 bool Matcher::supports_vector_calling_convention() { 1393 return false; 1394 } 1395 1396 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1397 Unimplemented(); 1398 return OptoRegPair(0, 0); 1399 } 1400 1401 // Is this branch offset short enough that a short branch can be used? 1402 // 1403 // NOTE: If the platform does not provide any short branch variants, then 1404 // this method should return false for offset 0. 1405 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1406 // The passed offset is relative to address of the branch. 1407 // On 86 a branch displacement is calculated relative to address 1408 // of a next instruction. 1409 offset -= br_size; 1410 1411 // the short version of jmpConUCF2 contains multiple branches, 1412 // making the reach slightly less 1413 if (rule == jmpConUCF2_rule) 1414 return (-126 <= offset && offset <= 125); 1415 return (-128 <= offset && offset <= 127); 1416 } 1417 1418 // Return whether or not this register is ever used as an argument. This 1419 // function is used on startup to build the trampoline stubs in generateOptoStub. 1420 // Registers not mentioned will be killed by the VM call in the trampoline, and 1421 // arguments in those registers not be available to the callee. 1422 bool Matcher::can_be_java_arg( int reg ) { 1423 if( reg == ECX_num || reg == EDX_num ) return true; 1424 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1425 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1426 return false; 1427 } 1428 1429 bool Matcher::is_spillable_arg( int reg ) { 1430 return can_be_java_arg(reg); 1431 } 1432 1433 uint Matcher::int_pressure_limit() 1434 { 1435 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1436 } 1437 1438 uint Matcher::float_pressure_limit() 1439 { 1440 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1441 } 1442 1443 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1444 // Use hardware integer DIV instruction when 1445 // it is faster than a code which use multiply. 1446 // Only when constant divisor fits into 32 bit 1447 // (min_jint is excluded to get only correct 1448 // positive 32 bit values from negative). 1449 return VM_Version::has_fast_idiv() && 1450 (divisor == (int)divisor && divisor != min_jint); 1451 } 1452 1453 // Register for DIVI projection of divmodI 1454 RegMask Matcher::divI_proj_mask() { 1455 return EAX_REG_mask(); 1456 } 1457 1458 // Register for MODI projection of divmodI 1459 RegMask Matcher::modI_proj_mask() { 1460 return EDX_REG_mask(); 1461 } 1462 1463 // Register for DIVL projection of divmodL 1464 RegMask Matcher::divL_proj_mask() { 1465 ShouldNotReachHere(); 1466 return RegMask(); 1467 } 1468 1469 // Register for MODL projection of divmodL 1470 RegMask Matcher::modL_proj_mask() { 1471 ShouldNotReachHere(); 1472 return RegMask(); 1473 } 1474 1475 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1476 return NO_REG_mask(); 1477 } 1478 1479 // Returns true if the high 32 bits of the value is known to be zero. 1480 bool is_operand_hi32_zero(Node* n) { 1481 int opc = n->Opcode(); 1482 if (opc == Op_AndL) { 1483 Node* o2 = n->in(2); 1484 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1485 return true; 1486 } 1487 } 1488 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1489 return true; 1490 } 1491 return false; 1492 } 1493 1494 %} 1495 1496 //----------ENCODING BLOCK----------------------------------------------------- 1497 // This block specifies the encoding classes used by the compiler to output 1498 // byte streams. Encoding classes generate functions which are called by 1499 // Machine Instruction Nodes in order to generate the bit encoding of the 1500 // instruction. Operands specify their base encoding interface with the 1501 // interface keyword. There are currently supported four interfaces, 1502 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1503 // operand to generate a function which returns its register number when 1504 // queried. CONST_INTER causes an operand to generate a function which 1505 // returns the value of the constant when queried. MEMORY_INTER causes an 1506 // operand to generate four functions which return the Base Register, the 1507 // Index Register, the Scale Value, and the Offset Value of the operand when 1508 // queried. COND_INTER causes an operand to generate six functions which 1509 // return the encoding code (ie - encoding bits for the instruction) 1510 // associated with each basic boolean condition for a conditional instruction. 1511 // Instructions specify two basic values for encoding. They use the 1512 // ins_encode keyword to specify their encoding class (which must be one of 1513 // the class names specified in the encoding block), and they use the 1514 // opcode keyword to specify, in order, their primary, secondary, and 1515 // tertiary opcode. Only the opcode sections which a particular instruction 1516 // needs for encoding need to be specified. 1517 encode %{ 1518 // Build emit functions for each basic byte or larger field in the intel 1519 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1520 // code in the enc_class source block. Emit functions will live in the 1521 // main source block for now. In future, we can generalize this by 1522 // adding a syntax that specifies the sizes of fields in an order, 1523 // so that the adlc can build the emit functions automagically 1524 1525 // Set instruction mark in MacroAssembler. This is used only in 1526 // instructions that emit bytes directly to the CodeBuffer wraped 1527 // in the MacroAssembler. Should go away once all "instruct" are 1528 // patched to emit bytes only using methods in MacroAssembler. 1529 enc_class SetInstMark %{ 1530 __ set_inst_mark(); 1531 %} 1532 1533 enc_class ClearInstMark %{ 1534 __ clear_inst_mark(); 1535 %} 1536 1537 // Emit primary opcode 1538 enc_class OpcP %{ 1539 emit_opcode(masm, $primary); 1540 %} 1541 1542 // Emit secondary opcode 1543 enc_class OpcS %{ 1544 emit_opcode(masm, $secondary); 1545 %} 1546 1547 // Emit opcode directly 1548 enc_class Opcode(immI d8) %{ 1549 emit_opcode(masm, $d8$$constant); 1550 %} 1551 1552 enc_class SizePrefix %{ 1553 emit_opcode(masm,0x66); 1554 %} 1555 1556 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1557 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1558 %} 1559 1560 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1561 emit_opcode(masm,$opcode$$constant); 1562 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1563 %} 1564 1565 enc_class mov_r32_imm0( rRegI dst ) %{ 1566 emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1567 emit_d32 ( masm, 0x0 ); // imm32==0x0 1568 %} 1569 1570 enc_class cdq_enc %{ 1571 // Full implementation of Java idiv and irem; checks for 1572 // special case as described in JVM spec., p.243 & p.271. 1573 // 1574 // normal case special case 1575 // 1576 // input : rax,: dividend min_int 1577 // reg: divisor -1 1578 // 1579 // output: rax,: quotient (= rax, idiv reg) min_int 1580 // rdx: remainder (= rax, irem reg) 0 1581 // 1582 // Code sequnce: 1583 // 1584 // 81 F8 00 00 00 80 cmp rax,80000000h 1585 // 0F 85 0B 00 00 00 jne normal_case 1586 // 33 D2 xor rdx,edx 1587 // 83 F9 FF cmp rcx,0FFh 1588 // 0F 84 03 00 00 00 je done 1589 // normal_case: 1590 // 99 cdq 1591 // F7 F9 idiv rax,ecx 1592 // done: 1593 // 1594 emit_opcode(masm,0x81); emit_d8(masm,0xF8); 1595 emit_opcode(masm,0x00); emit_d8(masm,0x00); 1596 emit_opcode(masm,0x00); emit_d8(masm,0x80); // cmp rax,80000000h 1597 emit_opcode(masm,0x0F); emit_d8(masm,0x85); 1598 emit_opcode(masm,0x0B); emit_d8(masm,0x00); 1599 emit_opcode(masm,0x00); emit_d8(masm,0x00); // jne normal_case 1600 emit_opcode(masm,0x33); emit_d8(masm,0xD2); // xor rdx,edx 1601 emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh 1602 emit_opcode(masm,0x0F); emit_d8(masm,0x84); 1603 emit_opcode(masm,0x03); emit_d8(masm,0x00); 1604 emit_opcode(masm,0x00); emit_d8(masm,0x00); // je done 1605 // normal_case: 1606 emit_opcode(masm,0x99); // cdq 1607 // idiv (note: must be emitted by the user of this rule) 1608 // normal: 1609 %} 1610 1611 // Dense encoding for older common ops 1612 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1613 emit_opcode(masm, $opcode$$constant + $reg$$reg); 1614 %} 1615 1616 1617 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1618 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1619 // Check for 8-bit immediate, and set sign extend bit in opcode 1620 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1621 emit_opcode(masm, $primary | 0x02); 1622 } 1623 else { // If 32-bit immediate 1624 emit_opcode(masm, $primary); 1625 } 1626 %} 1627 1628 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1629 // Emit primary opcode and set sign-extend bit 1630 // Check for 8-bit immediate, and set sign extend bit in opcode 1631 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1632 emit_opcode(masm, $primary | 0x02); } 1633 else { // If 32-bit immediate 1634 emit_opcode(masm, $primary); 1635 } 1636 // Emit r/m byte with secondary opcode, after primary opcode. 1637 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1638 %} 1639 1640 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1641 // Check for 8-bit immediate, and set sign extend bit in opcode 1642 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1643 $$$emit8$imm$$constant; 1644 } 1645 else { // If 32-bit immediate 1646 // Output immediate 1647 $$$emit32$imm$$constant; 1648 } 1649 %} 1650 1651 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1652 // Emit primary opcode and set sign-extend bit 1653 // Check for 8-bit immediate, and set sign extend bit in opcode 1654 int con = (int)$imm$$constant; // Throw away top bits 1655 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1656 // Emit r/m byte with secondary opcode, after primary opcode. 1657 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1658 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); 1659 else emit_d32(masm,con); 1660 %} 1661 1662 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1663 // Emit primary opcode and set sign-extend bit 1664 // Check for 8-bit immediate, and set sign extend bit in opcode 1665 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1666 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1667 // Emit r/m byte with tertiary opcode, after primary opcode. 1668 emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1669 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); 1670 else emit_d32(masm,con); 1671 %} 1672 1673 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1674 emit_cc(masm, $secondary, $dst$$reg ); 1675 %} 1676 1677 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1678 int destlo = $dst$$reg; 1679 int desthi = HIGH_FROM_LOW_ENC(destlo); 1680 // bswap lo 1681 emit_opcode(masm, 0x0F); 1682 emit_cc(masm, 0xC8, destlo); 1683 // bswap hi 1684 emit_opcode(masm, 0x0F); 1685 emit_cc(masm, 0xC8, desthi); 1686 // xchg lo and hi 1687 emit_opcode(masm, 0x87); 1688 emit_rm(masm, 0x3, destlo, desthi); 1689 %} 1690 1691 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1692 emit_rm(masm, 0x3, $secondary, $div$$reg ); 1693 %} 1694 1695 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1696 $$$emit8$primary; 1697 emit_cc(masm, $secondary, $cop$$cmpcode); 1698 %} 1699 1700 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1701 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1702 emit_d8(masm, op >> 8 ); 1703 emit_d8(masm, op & 255); 1704 %} 1705 1706 // emulate a CMOV with a conditional branch around a MOV 1707 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1708 // Invert sense of branch from sense of CMOV 1709 emit_cc( masm, 0x70, ($cop$$cmpcode^1) ); 1710 emit_d8( masm, $brOffs$$constant ); 1711 %} 1712 1713 enc_class enc_PartialSubtypeCheck( ) %{ 1714 Register Redi = as_Register(EDI_enc); // result register 1715 Register Reax = as_Register(EAX_enc); // super class 1716 Register Recx = as_Register(ECX_enc); // killed 1717 Register Resi = as_Register(ESI_enc); // sub class 1718 Label miss; 1719 1720 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1721 nullptr, &miss, 1722 /*set_cond_codes:*/ true); 1723 if ($primary) { 1724 __ xorptr(Redi, Redi); 1725 } 1726 __ bind(miss); 1727 %} 1728 1729 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1730 int start = __ offset(); 1731 if (UseSSE >= 2) { 1732 if (VerifyFPU) { 1733 __ verify_FPU(0, "must be empty in SSE2+ mode"); 1734 } 1735 } else { 1736 // External c_calling_convention expects the FPU stack to be 'clean'. 1737 // Compiled code leaves it dirty. Do cleanup now. 1738 __ empty_FPU_stack(); 1739 } 1740 if (sizeof_FFree_Float_Stack_All == -1) { 1741 sizeof_FFree_Float_Stack_All = __ offset() - start; 1742 } else { 1743 assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1744 } 1745 %} 1746 1747 enc_class Verify_FPU_For_Leaf %{ 1748 if( VerifyFPU ) { 1749 __ verify_FPU( -3, "Returning from Runtime Leaf call"); 1750 } 1751 %} 1752 1753 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1754 // This is the instruction starting address for relocation info. 1755 __ set_inst_mark(); 1756 $$$emit8$primary; 1757 // CALL directly to the runtime 1758 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1759 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1760 __ clear_inst_mark(); 1761 __ post_call_nop(); 1762 1763 if (UseSSE >= 2) { 1764 BasicType rt = tf()->return_type(); 1765 1766 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1767 // A C runtime call where the return value is unused. In SSE2+ 1768 // mode the result needs to be removed from the FPU stack. It's 1769 // likely that this function call could be removed by the 1770 // optimizer if the C function is a pure function. 1771 __ ffree(0); 1772 } else if (rt == T_FLOAT) { 1773 __ lea(rsp, Address(rsp, -4)); 1774 __ fstp_s(Address(rsp, 0)); 1775 __ movflt(xmm0, Address(rsp, 0)); 1776 __ lea(rsp, Address(rsp, 4)); 1777 } else if (rt == T_DOUBLE) { 1778 __ lea(rsp, Address(rsp, -8)); 1779 __ fstp_d(Address(rsp, 0)); 1780 __ movdbl(xmm0, Address(rsp, 0)); 1781 __ lea(rsp, Address(rsp, 8)); 1782 } 1783 } 1784 %} 1785 1786 enc_class pre_call_resets %{ 1787 // If method sets FPU control word restore it here 1788 debug_only(int off0 = __ offset()); 1789 if (ra_->C->in_24_bit_fp_mode()) { 1790 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1791 } 1792 // Clear upper bits of YMM registers when current compiled code uses 1793 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1794 __ vzeroupper(); 1795 debug_only(int off1 = __ offset()); 1796 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1797 %} 1798 1799 enc_class post_call_FPU %{ 1800 // If method sets FPU control word do it here also 1801 if (Compile::current()->in_24_bit_fp_mode()) { 1802 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1803 } 1804 %} 1805 1806 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1807 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1808 // who we intended to call. 1809 __ set_inst_mark(); 1810 $$$emit8$primary; 1811 1812 if (!_method) { 1813 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1814 runtime_call_Relocation::spec(), 1815 RELOC_IMM32); 1816 __ clear_inst_mark(); 1817 __ post_call_nop(); 1818 } else { 1819 int method_index = resolved_method_index(masm); 1820 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1821 : static_call_Relocation::spec(method_index); 1822 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1823 rspec, RELOC_DISP32); 1824 __ post_call_nop(); 1825 address mark = __ inst_mark(); 1826 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1827 // Calls of the same statically bound method can share 1828 // a stub to the interpreter. 1829 __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off()); 1830 __ clear_inst_mark(); 1831 } else { 1832 // Emit stubs for static call. 1833 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark); 1834 __ clear_inst_mark(); 1835 if (stub == nullptr) { 1836 ciEnv::current()->record_failure("CodeCache is full"); 1837 return; 1838 } 1839 } 1840 } 1841 %} 1842 1843 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1844 __ ic_call((address)$meth$$method, resolved_method_index(masm)); 1845 __ post_call_nop(); 1846 %} 1847 1848 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1849 int disp = in_bytes(Method::from_compiled_offset()); 1850 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1851 1852 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1853 __ set_inst_mark(); 1854 $$$emit8$primary; 1855 emit_rm(masm, 0x01, $secondary, EAX_enc ); // R/M byte 1856 emit_d8(masm, disp); // Displacement 1857 __ clear_inst_mark(); 1858 __ post_call_nop(); 1859 %} 1860 1861 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1862 $$$emit8$primary; 1863 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1864 $$$emit8$shift$$constant; 1865 %} 1866 1867 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1868 // Load immediate does not have a zero or sign extended version 1869 // for 8-bit immediates 1870 emit_opcode(masm, 0xB8 + $dst$$reg); 1871 $$$emit32$src$$constant; 1872 %} 1873 1874 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1875 // Load immediate does not have a zero or sign extended version 1876 // for 8-bit immediates 1877 emit_opcode(masm, $primary + $dst$$reg); 1878 $$$emit32$src$$constant; 1879 %} 1880 1881 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1882 // Load immediate does not have a zero or sign extended version 1883 // for 8-bit immediates 1884 int dst_enc = $dst$$reg; 1885 int src_con = $src$$constant & 0x0FFFFFFFFL; 1886 if (src_con == 0) { 1887 // xor dst, dst 1888 emit_opcode(masm, 0x33); 1889 emit_rm(masm, 0x3, dst_enc, dst_enc); 1890 } else { 1891 emit_opcode(masm, $primary + dst_enc); 1892 emit_d32(masm, src_con); 1893 } 1894 %} 1895 1896 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1897 // Load immediate does not have a zero or sign extended version 1898 // for 8-bit immediates 1899 int dst_enc = $dst$$reg + 2; 1900 int src_con = ((julong)($src$$constant)) >> 32; 1901 if (src_con == 0) { 1902 // xor dst, dst 1903 emit_opcode(masm, 0x33); 1904 emit_rm(masm, 0x3, dst_enc, dst_enc); 1905 } else { 1906 emit_opcode(masm, $primary + dst_enc); 1907 emit_d32(masm, src_con); 1908 } 1909 %} 1910 1911 1912 // Encode a reg-reg copy. If it is useless, then empty encoding. 1913 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1914 encode_Copy( masm, $dst$$reg, $src$$reg ); 1915 %} 1916 1917 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1918 encode_Copy( masm, $dst$$reg, $src$$reg ); 1919 %} 1920 1921 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1922 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1923 %} 1924 1925 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1926 $$$emit8$primary; 1927 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1928 %} 1929 1930 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1931 $$$emit8$secondary; 1932 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1933 %} 1934 1935 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1936 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1937 %} 1938 1939 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1940 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1941 %} 1942 1943 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1944 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1945 %} 1946 1947 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1948 // Output immediate 1949 $$$emit32$src$$constant; 1950 %} 1951 1952 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1953 // Output Float immediate bits 1954 jfloat jf = $src$$constant; 1955 int jf_as_bits = jint_cast( jf ); 1956 emit_d32(masm, jf_as_bits); 1957 %} 1958 1959 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1960 // Output Float immediate bits 1961 jfloat jf = $src$$constant; 1962 int jf_as_bits = jint_cast( jf ); 1963 emit_d32(masm, jf_as_bits); 1964 %} 1965 1966 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1967 // Output immediate 1968 $$$emit16$src$$constant; 1969 %} 1970 1971 enc_class Con_d32(immI src) %{ 1972 emit_d32(masm,$src$$constant); 1973 %} 1974 1975 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1976 // Output immediate memory reference 1977 emit_rm(masm, 0x00, $t1$$reg, 0x05 ); 1978 emit_d32(masm, 0x00); 1979 %} 1980 1981 enc_class lock_prefix( ) %{ 1982 emit_opcode(masm,0xF0); // [Lock] 1983 %} 1984 1985 // Cmp-xchg long value. 1986 // Note: we need to swap rbx, and rcx before and after the 1987 // cmpxchg8 instruction because the instruction uses 1988 // rcx as the high order word of the new value to store but 1989 // our register encoding uses rbx,. 1990 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 1991 1992 // XCHG rbx,ecx 1993 emit_opcode(masm,0x87); 1994 emit_opcode(masm,0xD9); 1995 // [Lock] 1996 emit_opcode(masm,0xF0); 1997 // CMPXCHG8 [Eptr] 1998 emit_opcode(masm,0x0F); 1999 emit_opcode(masm,0xC7); 2000 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2001 // XCHG rbx,ecx 2002 emit_opcode(masm,0x87); 2003 emit_opcode(masm,0xD9); 2004 %} 2005 2006 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2007 // [Lock] 2008 emit_opcode(masm,0xF0); 2009 2010 // CMPXCHG [Eptr] 2011 emit_opcode(masm,0x0F); 2012 emit_opcode(masm,0xB1); 2013 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2014 %} 2015 2016 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2017 // [Lock] 2018 emit_opcode(masm,0xF0); 2019 2020 // CMPXCHGB [Eptr] 2021 emit_opcode(masm,0x0F); 2022 emit_opcode(masm,0xB0); 2023 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2024 %} 2025 2026 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2027 // [Lock] 2028 emit_opcode(masm,0xF0); 2029 2030 // 16-bit mode 2031 emit_opcode(masm, 0x66); 2032 2033 // CMPXCHGW [Eptr] 2034 emit_opcode(masm,0x0F); 2035 emit_opcode(masm,0xB1); 2036 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2037 %} 2038 2039 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2040 int res_encoding = $res$$reg; 2041 2042 // MOV res,0 2043 emit_opcode( masm, 0xB8 + res_encoding); 2044 emit_d32( masm, 0 ); 2045 // JNE,s fail 2046 emit_opcode(masm,0x75); 2047 emit_d8(masm, 5 ); 2048 // MOV res,1 2049 emit_opcode( masm, 0xB8 + res_encoding); 2050 emit_d32( masm, 1 ); 2051 // fail: 2052 %} 2053 2054 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2055 int reg_encoding = $ereg$$reg; 2056 int base = $mem$$base; 2057 int index = $mem$$index; 2058 int scale = $mem$$scale; 2059 int displace = $mem$$disp; 2060 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2061 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2062 %} 2063 2064 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2065 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2066 int base = $mem$$base; 2067 int index = $mem$$index; 2068 int scale = $mem$$scale; 2069 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2070 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2071 encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none); 2072 %} 2073 2074 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2075 int r1, r2; 2076 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2077 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2078 emit_opcode(masm,0x0F); 2079 emit_opcode(masm,$tertiary); 2080 emit_rm(masm, 0x3, r1, r2); 2081 emit_d8(masm,$cnt$$constant); 2082 emit_d8(masm,$primary); 2083 emit_rm(masm, 0x3, $secondary, r1); 2084 emit_d8(masm,$cnt$$constant); 2085 %} 2086 2087 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2088 emit_opcode( masm, 0x8B ); // Move 2089 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2090 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2091 emit_d8(masm,$primary); 2092 emit_rm(masm, 0x3, $secondary, $dst$$reg); 2093 emit_d8(masm,$cnt$$constant-32); 2094 } 2095 emit_d8(masm,$primary); 2096 emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2097 emit_d8(masm,31); 2098 %} 2099 2100 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2101 int r1, r2; 2102 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2103 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2104 2105 emit_opcode( masm, 0x8B ); // Move r1,r2 2106 emit_rm(masm, 0x3, r1, r2); 2107 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2108 emit_opcode(masm,$primary); 2109 emit_rm(masm, 0x3, $secondary, r1); 2110 emit_d8(masm,$cnt$$constant-32); 2111 } 2112 emit_opcode(masm,0x33); // XOR r2,r2 2113 emit_rm(masm, 0x3, r2, r2); 2114 %} 2115 2116 // Clone of RegMem but accepts an extra parameter to access each 2117 // half of a double in memory; it never needs relocation info. 2118 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2119 emit_opcode(masm,$opcode$$constant); 2120 int reg_encoding = $rm_reg$$reg; 2121 int base = $mem$$base; 2122 int index = $mem$$index; 2123 int scale = $mem$$scale; 2124 int displace = $mem$$disp + $disp_for_half$$constant; 2125 relocInfo::relocType disp_reloc = relocInfo::none; 2126 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2127 %} 2128 2129 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2130 // 2131 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2132 // and it never needs relocation information. 2133 // Frequently used to move data between FPU's Stack Top and memory. 2134 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2135 int rm_byte_opcode = $rm_opcode$$constant; 2136 int base = $mem$$base; 2137 int index = $mem$$index; 2138 int scale = $mem$$scale; 2139 int displace = $mem$$disp; 2140 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2141 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2142 %} 2143 2144 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2145 int rm_byte_opcode = $rm_opcode$$constant; 2146 int base = $mem$$base; 2147 int index = $mem$$index; 2148 int scale = $mem$$scale; 2149 int displace = $mem$$disp; 2150 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2151 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2152 %} 2153 2154 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2155 int reg_encoding = $dst$$reg; 2156 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2157 int index = 0x04; // 0x04 indicates no index 2158 int scale = 0x00; // 0x00 indicates no scale 2159 int displace = $src1$$constant; // 0x00 indicates no displacement 2160 relocInfo::relocType disp_reloc = relocInfo::none; 2161 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2162 %} 2163 2164 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2165 // Compare dst,src 2166 emit_opcode(masm,0x3B); 2167 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2168 // jmp dst < src around move 2169 emit_opcode(masm,0x7C); 2170 emit_d8(masm,2); 2171 // move dst,src 2172 emit_opcode(masm,0x8B); 2173 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2174 %} 2175 2176 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2177 // Compare dst,src 2178 emit_opcode(masm,0x3B); 2179 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2180 // jmp dst > src around move 2181 emit_opcode(masm,0x7F); 2182 emit_d8(masm,2); 2183 // move dst,src 2184 emit_opcode(masm,0x8B); 2185 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2186 %} 2187 2188 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2189 // If src is FPR1, we can just FST to store it. 2190 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2191 int reg_encoding = 0x2; // Just store 2192 int base = $mem$$base; 2193 int index = $mem$$index; 2194 int scale = $mem$$scale; 2195 int displace = $mem$$disp; 2196 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2197 if( $src$$reg != FPR1L_enc ) { 2198 reg_encoding = 0x3; // Store & pop 2199 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) 2200 emit_d8( masm, 0xC0-1+$src$$reg ); 2201 } 2202 __ set_inst_mark(); // Mark start of opcode for reloc info in mem operand 2203 emit_opcode(masm,$primary); 2204 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2205 __ clear_inst_mark(); 2206 %} 2207 2208 enc_class neg_reg(rRegI dst) %{ 2209 // NEG $dst 2210 emit_opcode(masm,0xF7); 2211 emit_rm(masm, 0x3, 0x03, $dst$$reg ); 2212 %} 2213 2214 enc_class setLT_reg(eCXRegI dst) %{ 2215 // SETLT $dst 2216 emit_opcode(masm,0x0F); 2217 emit_opcode(masm,0x9C); 2218 emit_rm( masm, 0x3, 0x4, $dst$$reg ); 2219 %} 2220 2221 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2222 int tmpReg = $tmp$$reg; 2223 2224 // SUB $p,$q 2225 emit_opcode(masm,0x2B); 2226 emit_rm(masm, 0x3, $p$$reg, $q$$reg); 2227 // SBB $tmp,$tmp 2228 emit_opcode(masm,0x1B); 2229 emit_rm(masm, 0x3, tmpReg, tmpReg); 2230 // AND $tmp,$y 2231 emit_opcode(masm,0x23); 2232 emit_rm(masm, 0x3, tmpReg, $y$$reg); 2233 // ADD $p,$tmp 2234 emit_opcode(masm,0x03); 2235 emit_rm(masm, 0x3, $p$$reg, tmpReg); 2236 %} 2237 2238 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2239 // TEST shift,32 2240 emit_opcode(masm,0xF7); 2241 emit_rm(masm, 0x3, 0, ECX_enc); 2242 emit_d32(masm,0x20); 2243 // JEQ,s small 2244 emit_opcode(masm, 0x74); 2245 emit_d8(masm, 0x04); 2246 // MOV $dst.hi,$dst.lo 2247 emit_opcode( masm, 0x8B ); 2248 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2249 // CLR $dst.lo 2250 emit_opcode(masm, 0x33); 2251 emit_rm(masm, 0x3, $dst$$reg, $dst$$reg); 2252 // small: 2253 // SHLD $dst.hi,$dst.lo,$shift 2254 emit_opcode(masm,0x0F); 2255 emit_opcode(masm,0xA5); 2256 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2257 // SHL $dst.lo,$shift" 2258 emit_opcode(masm,0xD3); 2259 emit_rm(masm, 0x3, 0x4, $dst$$reg ); 2260 %} 2261 2262 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2263 // TEST shift,32 2264 emit_opcode(masm,0xF7); 2265 emit_rm(masm, 0x3, 0, ECX_enc); 2266 emit_d32(masm,0x20); 2267 // JEQ,s small 2268 emit_opcode(masm, 0x74); 2269 emit_d8(masm, 0x04); 2270 // MOV $dst.lo,$dst.hi 2271 emit_opcode( masm, 0x8B ); 2272 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2273 // CLR $dst.hi 2274 emit_opcode(masm, 0x33); 2275 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2276 // small: 2277 // SHRD $dst.lo,$dst.hi,$shift 2278 emit_opcode(masm,0x0F); 2279 emit_opcode(masm,0xAD); 2280 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2281 // SHR $dst.hi,$shift" 2282 emit_opcode(masm,0xD3); 2283 emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2284 %} 2285 2286 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2287 // TEST shift,32 2288 emit_opcode(masm,0xF7); 2289 emit_rm(masm, 0x3, 0, ECX_enc); 2290 emit_d32(masm,0x20); 2291 // JEQ,s small 2292 emit_opcode(masm, 0x74); 2293 emit_d8(masm, 0x05); 2294 // MOV $dst.lo,$dst.hi 2295 emit_opcode( masm, 0x8B ); 2296 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2297 // SAR $dst.hi,31 2298 emit_opcode(masm, 0xC1); 2299 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2300 emit_d8(masm, 0x1F ); 2301 // small: 2302 // SHRD $dst.lo,$dst.hi,$shift 2303 emit_opcode(masm,0x0F); 2304 emit_opcode(masm,0xAD); 2305 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2306 // SAR $dst.hi,$shift" 2307 emit_opcode(masm,0xD3); 2308 emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2309 %} 2310 2311 2312 // ----------------- Encodings for floating point unit ----------------- 2313 // May leave result in FPU-TOS or FPU reg depending on opcodes 2314 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2315 $$$emit8$primary; 2316 emit_rm(masm, 0x3, $secondary, $src$$reg ); 2317 %} 2318 2319 // Pop argument in FPR0 with FSTP ST(0) 2320 enc_class PopFPU() %{ 2321 emit_opcode( masm, 0xDD ); 2322 emit_d8( masm, 0xD8 ); 2323 %} 2324 2325 // !!!!! equivalent to Pop_Reg_F 2326 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2327 emit_opcode( masm, 0xDD ); // FSTP ST(i) 2328 emit_d8( masm, 0xD8+$dst$$reg ); 2329 %} 2330 2331 enc_class Push_Reg_DPR( regDPR dst ) %{ 2332 emit_opcode( masm, 0xD9 ); 2333 emit_d8( masm, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2334 %} 2335 2336 enc_class strictfp_bias1( regDPR dst ) %{ 2337 emit_opcode( masm, 0xDB ); // FLD m80real 2338 emit_opcode( masm, 0x2D ); 2339 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2340 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 2341 emit_opcode( masm, 0xC8+$dst$$reg ); 2342 %} 2343 2344 enc_class strictfp_bias2( regDPR dst ) %{ 2345 emit_opcode( masm, 0xDB ); // FLD m80real 2346 emit_opcode( masm, 0x2D ); 2347 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2348 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 2349 emit_opcode( masm, 0xC8+$dst$$reg ); 2350 %} 2351 2352 // Special case for moving an integer register to a stack slot. 2353 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2354 store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp ); 2355 %} 2356 2357 // Special case for moving a register to a stack slot. 2358 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2359 // Opcode already emitted 2360 emit_rm( masm, 0x02, $src$$reg, ESP_enc ); // R/M byte 2361 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 2362 emit_d32(masm, $dst$$disp); // Displacement 2363 %} 2364 2365 // Push the integer in stackSlot 'src' onto FP-stack 2366 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2367 store_to_stackslot( masm, $primary, $secondary, $src$$disp ); 2368 %} 2369 2370 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2371 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2372 store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp ); 2373 %} 2374 2375 // Same as Pop_Mem_F except for opcode 2376 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2377 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2378 store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp ); 2379 %} 2380 2381 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2382 emit_opcode( masm, 0xDD ); // FSTP ST(i) 2383 emit_d8( masm, 0xD8+$dst$$reg ); 2384 %} 2385 2386 enc_class Push_Reg_FPR( regFPR dst ) %{ 2387 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2388 emit_d8( masm, 0xC0-1+$dst$$reg ); 2389 %} 2390 2391 // Push FPU's float to a stack-slot, and pop FPU-stack 2392 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2393 int pop = 0x02; 2394 if ($src$$reg != FPR1L_enc) { 2395 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2396 emit_d8( masm, 0xC0-1+$src$$reg ); 2397 pop = 0x03; 2398 } 2399 store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2400 %} 2401 2402 // Push FPU's double to a stack-slot, and pop FPU-stack 2403 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2404 int pop = 0x02; 2405 if ($src$$reg != FPR1L_enc) { 2406 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2407 emit_d8( masm, 0xC0-1+$src$$reg ); 2408 pop = 0x03; 2409 } 2410 store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2411 %} 2412 2413 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2414 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2415 int pop = 0xD0 - 1; // -1 since we skip FLD 2416 if ($src$$reg != FPR1L_enc) { 2417 emit_opcode( masm, 0xD9 ); // FLD ST(src-1) 2418 emit_d8( masm, 0xC0-1+$src$$reg ); 2419 pop = 0xD8; 2420 } 2421 emit_opcode( masm, 0xDD ); 2422 emit_d8( masm, pop+$dst$$reg ); // FST<P> ST(i) 2423 %} 2424 2425 2426 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2427 // load dst in FPR0 2428 emit_opcode( masm, 0xD9 ); 2429 emit_d8( masm, 0xC0-1+$dst$$reg ); 2430 if ($src$$reg != FPR1L_enc) { 2431 // fincstp 2432 emit_opcode (masm, 0xD9); 2433 emit_opcode (masm, 0xF7); 2434 // swap src with FPR1: 2435 // FXCH FPR1 with src 2436 emit_opcode(masm, 0xD9); 2437 emit_d8(masm, 0xC8-1+$src$$reg ); 2438 // fdecstp 2439 emit_opcode (masm, 0xD9); 2440 emit_opcode (masm, 0xF6); 2441 } 2442 %} 2443 2444 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2445 __ subptr(rsp, 8); 2446 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2447 __ fld_d(Address(rsp, 0)); 2448 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2449 __ fld_d(Address(rsp, 0)); 2450 %} 2451 2452 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2453 __ subptr(rsp, 4); 2454 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2455 __ fld_s(Address(rsp, 0)); 2456 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2457 __ fld_s(Address(rsp, 0)); 2458 %} 2459 2460 enc_class Push_ResultD(regD dst) %{ 2461 __ fstp_d(Address(rsp, 0)); 2462 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2463 __ addptr(rsp, 8); 2464 %} 2465 2466 enc_class Push_ResultF(regF dst, immI d8) %{ 2467 __ fstp_s(Address(rsp, 0)); 2468 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2469 __ addptr(rsp, $d8$$constant); 2470 %} 2471 2472 enc_class Push_SrcD(regD src) %{ 2473 __ subptr(rsp, 8); 2474 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2475 __ fld_d(Address(rsp, 0)); 2476 %} 2477 2478 enc_class push_stack_temp_qword() %{ 2479 __ subptr(rsp, 8); 2480 %} 2481 2482 enc_class pop_stack_temp_qword() %{ 2483 __ addptr(rsp, 8); 2484 %} 2485 2486 enc_class push_xmm_to_fpr1(regD src) %{ 2487 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2488 __ fld_d(Address(rsp, 0)); 2489 %} 2490 2491 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2492 if ($src$$reg != FPR1L_enc) { 2493 // fincstp 2494 emit_opcode (masm, 0xD9); 2495 emit_opcode (masm, 0xF7); 2496 // FXCH FPR1 with src 2497 emit_opcode(masm, 0xD9); 2498 emit_d8(masm, 0xC8-1+$src$$reg ); 2499 // fdecstp 2500 emit_opcode (masm, 0xD9); 2501 emit_opcode (masm, 0xF6); 2502 } 2503 %} 2504 2505 enc_class fnstsw_sahf_skip_parity() %{ 2506 // fnstsw ax 2507 emit_opcode( masm, 0xDF ); 2508 emit_opcode( masm, 0xE0 ); 2509 // sahf 2510 emit_opcode( masm, 0x9E ); 2511 // jnp ::skip 2512 emit_opcode( masm, 0x7B ); 2513 emit_opcode( masm, 0x05 ); 2514 %} 2515 2516 enc_class emitModDPR() %{ 2517 // fprem must be iterative 2518 // :: loop 2519 // fprem 2520 emit_opcode( masm, 0xD9 ); 2521 emit_opcode( masm, 0xF8 ); 2522 // wait 2523 emit_opcode( masm, 0x9b ); 2524 // fnstsw ax 2525 emit_opcode( masm, 0xDF ); 2526 emit_opcode( masm, 0xE0 ); 2527 // sahf 2528 emit_opcode( masm, 0x9E ); 2529 // jp ::loop 2530 emit_opcode( masm, 0x0F ); 2531 emit_opcode( masm, 0x8A ); 2532 emit_opcode( masm, 0xF4 ); 2533 emit_opcode( masm, 0xFF ); 2534 emit_opcode( masm, 0xFF ); 2535 emit_opcode( masm, 0xFF ); 2536 %} 2537 2538 enc_class fpu_flags() %{ 2539 // fnstsw_ax 2540 emit_opcode( masm, 0xDF); 2541 emit_opcode( masm, 0xE0); 2542 // test ax,0x0400 2543 emit_opcode( masm, 0x66 ); // operand-size prefix for 16-bit immediate 2544 emit_opcode( masm, 0xA9 ); 2545 emit_d16 ( masm, 0x0400 ); 2546 // // // This sequence works, but stalls for 12-16 cycles on PPro 2547 // // test rax,0x0400 2548 // emit_opcode( masm, 0xA9 ); 2549 // emit_d32 ( masm, 0x00000400 ); 2550 // 2551 // jz exit (no unordered comparison) 2552 emit_opcode( masm, 0x74 ); 2553 emit_d8 ( masm, 0x02 ); 2554 // mov ah,1 - treat as LT case (set carry flag) 2555 emit_opcode( masm, 0xB4 ); 2556 emit_d8 ( masm, 0x01 ); 2557 // sahf 2558 emit_opcode( masm, 0x9E); 2559 %} 2560 2561 enc_class cmpF_P6_fixup() %{ 2562 // Fixup the integer flags in case comparison involved a NaN 2563 // 2564 // JNP exit (no unordered comparison, P-flag is set by NaN) 2565 emit_opcode( masm, 0x7B ); 2566 emit_d8 ( masm, 0x03 ); 2567 // MOV AH,1 - treat as LT case (set carry flag) 2568 emit_opcode( masm, 0xB4 ); 2569 emit_d8 ( masm, 0x01 ); 2570 // SAHF 2571 emit_opcode( masm, 0x9E); 2572 // NOP // target for branch to avoid branch to branch 2573 emit_opcode( masm, 0x90); 2574 %} 2575 2576 // fnstsw_ax(); 2577 // sahf(); 2578 // movl(dst, nan_result); 2579 // jcc(Assembler::parity, exit); 2580 // movl(dst, less_result); 2581 // jcc(Assembler::below, exit); 2582 // movl(dst, equal_result); 2583 // jcc(Assembler::equal, exit); 2584 // movl(dst, greater_result); 2585 2586 // less_result = 1; 2587 // greater_result = -1; 2588 // equal_result = 0; 2589 // nan_result = -1; 2590 2591 enc_class CmpF_Result(rRegI dst) %{ 2592 // fnstsw_ax(); 2593 emit_opcode( masm, 0xDF); 2594 emit_opcode( masm, 0xE0); 2595 // sahf 2596 emit_opcode( masm, 0x9E); 2597 // movl(dst, nan_result); 2598 emit_opcode( masm, 0xB8 + $dst$$reg); 2599 emit_d32( masm, -1 ); 2600 // jcc(Assembler::parity, exit); 2601 emit_opcode( masm, 0x7A ); 2602 emit_d8 ( masm, 0x13 ); 2603 // movl(dst, less_result); 2604 emit_opcode( masm, 0xB8 + $dst$$reg); 2605 emit_d32( masm, -1 ); 2606 // jcc(Assembler::below, exit); 2607 emit_opcode( masm, 0x72 ); 2608 emit_d8 ( masm, 0x0C ); 2609 // movl(dst, equal_result); 2610 emit_opcode( masm, 0xB8 + $dst$$reg); 2611 emit_d32( masm, 0 ); 2612 // jcc(Assembler::equal, exit); 2613 emit_opcode( masm, 0x74 ); 2614 emit_d8 ( masm, 0x05 ); 2615 // movl(dst, greater_result); 2616 emit_opcode( masm, 0xB8 + $dst$$reg); 2617 emit_d32( masm, 1 ); 2618 %} 2619 2620 2621 // Compare the longs and set flags 2622 // BROKEN! Do Not use as-is 2623 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2624 // CMP $src1.hi,$src2.hi 2625 emit_opcode( masm, 0x3B ); 2626 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2627 // JNE,s done 2628 emit_opcode(masm,0x75); 2629 emit_d8(masm, 2 ); 2630 // CMP $src1.lo,$src2.lo 2631 emit_opcode( masm, 0x3B ); 2632 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2633 // done: 2634 %} 2635 2636 enc_class convert_int_long( regL dst, rRegI src ) %{ 2637 // mov $dst.lo,$src 2638 int dst_encoding = $dst$$reg; 2639 int src_encoding = $src$$reg; 2640 encode_Copy( masm, dst_encoding , src_encoding ); 2641 // mov $dst.hi,$src 2642 encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2643 // sar $dst.hi,31 2644 emit_opcode( masm, 0xC1 ); 2645 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2646 emit_d8(masm, 0x1F ); 2647 %} 2648 2649 enc_class convert_long_double( eRegL src ) %{ 2650 // push $src.hi 2651 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2652 // push $src.lo 2653 emit_opcode(masm, 0x50+$src$$reg ); 2654 // fild 64-bits at [SP] 2655 emit_opcode(masm,0xdf); 2656 emit_d8(masm, 0x6C); 2657 emit_d8(masm, 0x24); 2658 emit_d8(masm, 0x00); 2659 // pop stack 2660 emit_opcode(masm, 0x83); // add SP, #8 2661 emit_rm(masm, 0x3, 0x00, ESP_enc); 2662 emit_d8(masm, 0x8); 2663 %} 2664 2665 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2666 // IMUL EDX:EAX,$src1 2667 emit_opcode( masm, 0xF7 ); 2668 emit_rm( masm, 0x3, 0x5, $src1$$reg ); 2669 // SAR EDX,$cnt-32 2670 int shift_count = ((int)$cnt$$constant) - 32; 2671 if (shift_count > 0) { 2672 emit_opcode(masm, 0xC1); 2673 emit_rm(masm, 0x3, 7, $dst$$reg ); 2674 emit_d8(masm, shift_count); 2675 } 2676 %} 2677 2678 // this version doesn't have add sp, 8 2679 enc_class convert_long_double2( eRegL src ) %{ 2680 // push $src.hi 2681 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2682 // push $src.lo 2683 emit_opcode(masm, 0x50+$src$$reg ); 2684 // fild 64-bits at [SP] 2685 emit_opcode(masm,0xdf); 2686 emit_d8(masm, 0x6C); 2687 emit_d8(masm, 0x24); 2688 emit_d8(masm, 0x00); 2689 %} 2690 2691 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2692 // Basic idea: long = (long)int * (long)int 2693 // IMUL EDX:EAX, src 2694 emit_opcode( masm, 0xF7 ); 2695 emit_rm( masm, 0x3, 0x5, $src$$reg); 2696 %} 2697 2698 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2699 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2700 // MUL EDX:EAX, src 2701 emit_opcode( masm, 0xF7 ); 2702 emit_rm( masm, 0x3, 0x4, $src$$reg); 2703 %} 2704 2705 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2706 // Basic idea: lo(result) = lo(x_lo * y_lo) 2707 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2708 // MOV $tmp,$src.lo 2709 encode_Copy( masm, $tmp$$reg, $src$$reg ); 2710 // IMUL $tmp,EDX 2711 emit_opcode( masm, 0x0F ); 2712 emit_opcode( masm, 0xAF ); 2713 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2714 // MOV EDX,$src.hi 2715 encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2716 // IMUL EDX,EAX 2717 emit_opcode( masm, 0x0F ); 2718 emit_opcode( masm, 0xAF ); 2719 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2720 // ADD $tmp,EDX 2721 emit_opcode( masm, 0x03 ); 2722 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2723 // MUL EDX:EAX,$src.lo 2724 emit_opcode( masm, 0xF7 ); 2725 emit_rm( masm, 0x3, 0x4, $src$$reg ); 2726 // ADD EDX,ESI 2727 emit_opcode( masm, 0x03 ); 2728 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2729 %} 2730 2731 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2732 // Basic idea: lo(result) = lo(src * y_lo) 2733 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2734 // IMUL $tmp,EDX,$src 2735 emit_opcode( masm, 0x6B ); 2736 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2737 emit_d8( masm, (int)$src$$constant ); 2738 // MOV EDX,$src 2739 emit_opcode(masm, 0xB8 + EDX_enc); 2740 emit_d32( masm, (int)$src$$constant ); 2741 // MUL EDX:EAX,EDX 2742 emit_opcode( masm, 0xF7 ); 2743 emit_rm( masm, 0x3, 0x4, EDX_enc ); 2744 // ADD EDX,ESI 2745 emit_opcode( masm, 0x03 ); 2746 emit_rm( masm, 0x3, EDX_enc, $tmp$$reg ); 2747 %} 2748 2749 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2750 // PUSH src1.hi 2751 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2752 // PUSH src1.lo 2753 emit_opcode(masm, 0x50+$src1$$reg ); 2754 // PUSH src2.hi 2755 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2756 // PUSH src2.lo 2757 emit_opcode(masm, 0x50+$src2$$reg ); 2758 // CALL directly to the runtime 2759 __ set_inst_mark(); 2760 emit_opcode(masm,0xE8); // Call into runtime 2761 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2762 __ clear_inst_mark(); 2763 __ post_call_nop(); 2764 // Restore stack 2765 emit_opcode(masm, 0x83); // add SP, #framesize 2766 emit_rm(masm, 0x3, 0x00, ESP_enc); 2767 emit_d8(masm, 4*4); 2768 %} 2769 2770 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2771 // PUSH src1.hi 2772 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2773 // PUSH src1.lo 2774 emit_opcode(masm, 0x50+$src1$$reg ); 2775 // PUSH src2.hi 2776 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2777 // PUSH src2.lo 2778 emit_opcode(masm, 0x50+$src2$$reg ); 2779 // CALL directly to the runtime 2780 __ set_inst_mark(); 2781 emit_opcode(masm,0xE8); // Call into runtime 2782 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2783 __ clear_inst_mark(); 2784 __ post_call_nop(); 2785 // Restore stack 2786 emit_opcode(masm, 0x83); // add SP, #framesize 2787 emit_rm(masm, 0x3, 0x00, ESP_enc); 2788 emit_d8(masm, 4*4); 2789 %} 2790 2791 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2792 // MOV $tmp,$src.lo 2793 emit_opcode(masm, 0x8B); 2794 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg); 2795 // OR $tmp,$src.hi 2796 emit_opcode(masm, 0x0B); 2797 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2798 %} 2799 2800 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2801 // CMP $src1.lo,$src2.lo 2802 emit_opcode( masm, 0x3B ); 2803 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2804 // JNE,s skip 2805 emit_cc(masm, 0x70, 0x5); 2806 emit_d8(masm,2); 2807 // CMP $src1.hi,$src2.hi 2808 emit_opcode( masm, 0x3B ); 2809 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2810 %} 2811 2812 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2813 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2814 emit_opcode( masm, 0x3B ); 2815 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2816 // MOV $tmp,$src1.hi 2817 emit_opcode( masm, 0x8B ); 2818 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2819 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2820 emit_opcode( masm, 0x1B ); 2821 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2822 %} 2823 2824 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2825 // XOR $tmp,$tmp 2826 emit_opcode(masm,0x33); // XOR 2827 emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg); 2828 // CMP $tmp,$src.lo 2829 emit_opcode( masm, 0x3B ); 2830 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg ); 2831 // SBB $tmp,$src.hi 2832 emit_opcode( masm, 0x1B ); 2833 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2834 %} 2835 2836 // Sniff, sniff... smells like Gnu Superoptimizer 2837 enc_class neg_long( eRegL dst ) %{ 2838 emit_opcode(masm,0xF7); // NEG hi 2839 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2840 emit_opcode(masm,0xF7); // NEG lo 2841 emit_rm (masm,0x3, 0x3, $dst$$reg ); 2842 emit_opcode(masm,0x83); // SBB hi,0 2843 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2844 emit_d8 (masm,0 ); 2845 %} 2846 2847 enc_class enc_pop_rdx() %{ 2848 emit_opcode(masm,0x5A); 2849 %} 2850 2851 enc_class enc_rethrow() %{ 2852 __ set_inst_mark(); 2853 emit_opcode(masm, 0xE9); // jmp entry 2854 emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4, 2855 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2856 __ clear_inst_mark(); 2857 __ post_call_nop(); 2858 %} 2859 2860 2861 // Convert a double to an int. Java semantics require we do complex 2862 // manglelations in the corner cases. So we set the rounding mode to 2863 // 'zero', store the darned double down as an int, and reset the 2864 // rounding mode to 'nearest'. The hardware throws an exception which 2865 // patches up the correct value directly to the stack. 2866 enc_class DPR2I_encoding( regDPR src ) %{ 2867 // Flip to round-to-zero mode. We attempted to allow invalid-op 2868 // exceptions here, so that a NAN or other corner-case value will 2869 // thrown an exception (but normal values get converted at full speed). 2870 // However, I2C adapters and other float-stack manglers leave pending 2871 // invalid-op exceptions hanging. We would have to clear them before 2872 // enabling them and that is more expensive than just testing for the 2873 // invalid value Intel stores down in the corner cases. 2874 emit_opcode(masm,0xD9); // FLDCW trunc 2875 emit_opcode(masm,0x2D); 2876 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2877 // Allocate a word 2878 emit_opcode(masm,0x83); // SUB ESP,4 2879 emit_opcode(masm,0xEC); 2880 emit_d8(masm,0x04); 2881 // Encoding assumes a double has been pushed into FPR0. 2882 // Store down the double as an int, popping the FPU stack 2883 emit_opcode(masm,0xDB); // FISTP [ESP] 2884 emit_opcode(masm,0x1C); 2885 emit_d8(masm,0x24); 2886 // Restore the rounding mode; mask the exception 2887 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode 2888 emit_opcode(masm,0x2D); 2889 emit_d32( masm, Compile::current()->in_24_bit_fp_mode() 2890 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2891 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2892 2893 // Load the converted int; adjust CPU stack 2894 emit_opcode(masm,0x58); // POP EAX 2895 emit_opcode(masm,0x3D); // CMP EAX,imm 2896 emit_d32 (masm,0x80000000); // 0x80000000 2897 emit_opcode(masm,0x75); // JNE around_slow_call 2898 emit_d8 (masm,0x07); // Size of slow_call 2899 // Push src onto stack slow-path 2900 emit_opcode(masm,0xD9 ); // FLD ST(i) 2901 emit_d8 (masm,0xC0-1+$src$$reg ); 2902 // CALL directly to the runtime 2903 __ set_inst_mark(); 2904 emit_opcode(masm,0xE8); // Call into runtime 2905 emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2906 __ clear_inst_mark(); 2907 __ post_call_nop(); 2908 // Carry on here... 2909 %} 2910 2911 enc_class DPR2L_encoding( regDPR src ) %{ 2912 emit_opcode(masm,0xD9); // FLDCW trunc 2913 emit_opcode(masm,0x2D); 2914 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2915 // Allocate a word 2916 emit_opcode(masm,0x83); // SUB ESP,8 2917 emit_opcode(masm,0xEC); 2918 emit_d8(masm,0x08); 2919 // Encoding assumes a double has been pushed into FPR0. 2920 // Store down the double as a long, popping the FPU stack 2921 emit_opcode(masm,0xDF); // FISTP [ESP] 2922 emit_opcode(masm,0x3C); 2923 emit_d8(masm,0x24); 2924 // Restore the rounding mode; mask the exception 2925 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode 2926 emit_opcode(masm,0x2D); 2927 emit_d32( masm, Compile::current()->in_24_bit_fp_mode() 2928 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2929 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2930 2931 // Load the converted int; adjust CPU stack 2932 emit_opcode(masm,0x58); // POP EAX 2933 emit_opcode(masm,0x5A); // POP EDX 2934 emit_opcode(masm,0x81); // CMP EDX,imm 2935 emit_d8 (masm,0xFA); // rdx 2936 emit_d32 (masm,0x80000000); // 0x80000000 2937 emit_opcode(masm,0x75); // JNE around_slow_call 2938 emit_d8 (masm,0x07+4); // Size of slow_call 2939 emit_opcode(masm,0x85); // TEST EAX,EAX 2940 emit_opcode(masm,0xC0); // 2/rax,/rax, 2941 emit_opcode(masm,0x75); // JNE around_slow_call 2942 emit_d8 (masm,0x07); // Size of slow_call 2943 // Push src onto stack slow-path 2944 emit_opcode(masm,0xD9 ); // FLD ST(i) 2945 emit_d8 (masm,0xC0-1+$src$$reg ); 2946 // CALL directly to the runtime 2947 __ set_inst_mark(); 2948 emit_opcode(masm,0xE8); // Call into runtime 2949 emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2950 __ clear_inst_mark(); 2951 __ post_call_nop(); 2952 // Carry on here... 2953 %} 2954 2955 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2956 // Operand was loaded from memory into fp ST (stack top) 2957 // FMUL ST,$src /* D8 C8+i */ 2958 emit_opcode(masm, 0xD8); 2959 emit_opcode(masm, 0xC8 + $src1$$reg); 2960 %} 2961 2962 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2963 // FADDP ST,src2 /* D8 C0+i */ 2964 emit_opcode(masm, 0xD8); 2965 emit_opcode(masm, 0xC0 + $src2$$reg); 2966 //could use FADDP src2,fpST /* DE C0+i */ 2967 %} 2968 2969 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 2970 // FADDP src2,ST /* DE C0+i */ 2971 emit_opcode(masm, 0xDE); 2972 emit_opcode(masm, 0xC0 + $src2$$reg); 2973 %} 2974 2975 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 2976 // Operand has been loaded into fp ST (stack top) 2977 // FSUB ST,$src1 2978 emit_opcode(masm, 0xD8); 2979 emit_opcode(masm, 0xE0 + $src1$$reg); 2980 2981 // FDIV 2982 emit_opcode(masm, 0xD8); 2983 emit_opcode(masm, 0xF0 + $src2$$reg); 2984 %} 2985 2986 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 2987 // Operand was loaded from memory into fp ST (stack top) 2988 // FADD ST,$src /* D8 C0+i */ 2989 emit_opcode(masm, 0xD8); 2990 emit_opcode(masm, 0xC0 + $src1$$reg); 2991 2992 // FMUL ST,src2 /* D8 C*+i */ 2993 emit_opcode(masm, 0xD8); 2994 emit_opcode(masm, 0xC8 + $src2$$reg); 2995 %} 2996 2997 2998 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 2999 // Operand was loaded from memory into fp ST (stack top) 3000 // FADD ST,$src /* D8 C0+i */ 3001 emit_opcode(masm, 0xD8); 3002 emit_opcode(masm, 0xC0 + $src1$$reg); 3003 3004 // FMULP src2,ST /* DE C8+i */ 3005 emit_opcode(masm, 0xDE); 3006 emit_opcode(masm, 0xC8 + $src2$$reg); 3007 %} 3008 3009 // Atomically load the volatile long 3010 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3011 emit_opcode(masm,0xDF); 3012 int rm_byte_opcode = 0x05; 3013 int base = $mem$$base; 3014 int index = $mem$$index; 3015 int scale = $mem$$scale; 3016 int displace = $mem$$disp; 3017 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3018 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3019 store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp ); 3020 %} 3021 3022 // Volatile Store Long. Must be atomic, so move it into 3023 // the FP TOS and then do a 64-bit FIST. Has to probe the 3024 // target address before the store (for null-ptr checks) 3025 // so the memory operand is used twice in the encoding. 3026 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3027 store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp ); 3028 __ set_inst_mark(); // Mark start of FIST in case $mem has an oop 3029 emit_opcode(masm,0xDF); 3030 int rm_byte_opcode = 0x07; 3031 int base = $mem$$base; 3032 int index = $mem$$index; 3033 int scale = $mem$$scale; 3034 int displace = $mem$$disp; 3035 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3036 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3037 __ clear_inst_mark(); 3038 %} 3039 3040 %} 3041 3042 3043 //----------FRAME-------------------------------------------------------------- 3044 // Definition of frame structure and management information. 3045 // 3046 // S T A C K L A Y O U T Allocators stack-slot number 3047 // | (to get allocators register number 3048 // G Owned by | | v add OptoReg::stack0()) 3049 // r CALLER | | 3050 // o | +--------+ pad to even-align allocators stack-slot 3051 // w V | pad0 | numbers; owned by CALLER 3052 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3053 // h ^ | in | 5 3054 // | | args | 4 Holes in incoming args owned by SELF 3055 // | | | | 3 3056 // | | +--------+ 3057 // V | | old out| Empty on Intel, window on Sparc 3058 // | old |preserve| Must be even aligned. 3059 // | SP-+--------+----> Matcher::_old_SP, even aligned 3060 // | | in | 3 area for Intel ret address 3061 // Owned by |preserve| Empty on Sparc. 3062 // SELF +--------+ 3063 // | | pad2 | 2 pad to align old SP 3064 // | +--------+ 1 3065 // | | locks | 0 3066 // | +--------+----> OptoReg::stack0(), even aligned 3067 // | | pad1 | 11 pad to align new SP 3068 // | +--------+ 3069 // | | | 10 3070 // | | spills | 9 spills 3071 // V | | 8 (pad0 slot for callee) 3072 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3073 // ^ | out | 7 3074 // | | args | 6 Holes in outgoing args owned by CALLEE 3075 // Owned by +--------+ 3076 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3077 // | new |preserve| Must be even-aligned. 3078 // | SP-+--------+----> Matcher::_new_SP, even aligned 3079 // | | | 3080 // 3081 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3082 // known from SELF's arguments and the Java calling convention. 3083 // Region 6-7 is determined per call site. 3084 // Note 2: If the calling convention leaves holes in the incoming argument 3085 // area, those holes are owned by SELF. Holes in the outgoing area 3086 // are owned by the CALLEE. Holes should not be necessary in the 3087 // incoming area, as the Java calling convention is completely under 3088 // the control of the AD file. Doubles can be sorted and packed to 3089 // avoid holes. Holes in the outgoing arguments may be necessary for 3090 // varargs C calling conventions. 3091 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3092 // even aligned with pad0 as needed. 3093 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3094 // region 6-11 is even aligned; it may be padded out more so that 3095 // the region from SP to FP meets the minimum stack alignment. 3096 3097 frame %{ 3098 // These three registers define part of the calling convention 3099 // between compiled code and the interpreter. 3100 inline_cache_reg(EAX); // Inline Cache Register 3101 3102 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3103 cisc_spilling_operand_name(indOffset32); 3104 3105 // Number of stack slots consumed by locking an object 3106 sync_stack_slots(1); 3107 3108 // Compiled code's Frame Pointer 3109 frame_pointer(ESP); 3110 // Interpreter stores its frame pointer in a register which is 3111 // stored to the stack by I2CAdaptors. 3112 // I2CAdaptors convert from interpreted java to compiled java. 3113 interpreter_frame_pointer(EBP); 3114 3115 // Stack alignment requirement 3116 // Alignment size in bytes (128-bit -> 16 bytes) 3117 stack_alignment(StackAlignmentInBytes); 3118 3119 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3120 // for calls to C. Supports the var-args backing area for register parms. 3121 varargs_C_out_slots_killed(0); 3122 3123 // The after-PROLOG location of the return address. Location of 3124 // return address specifies a type (REG or STACK) and a number 3125 // representing the register number (i.e. - use a register name) or 3126 // stack slot. 3127 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3128 // Otherwise, it is above the locks and verification slot and alignment word 3129 return_addr(STACK - 1 + 3130 align_up((Compile::current()->in_preserve_stack_slots() + 3131 Compile::current()->fixed_slots()), 3132 stack_alignment_in_slots())); 3133 3134 // Location of C & interpreter return values 3135 c_return_value %{ 3136 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3137 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3138 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3139 3140 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3141 // that C functions return float and double results in XMM0. 3142 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3143 return OptoRegPair(XMM0b_num,XMM0_num); 3144 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3145 return OptoRegPair(OptoReg::Bad,XMM0_num); 3146 3147 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3148 %} 3149 3150 // Location of return values 3151 return_value %{ 3152 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3153 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3154 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3155 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3156 return OptoRegPair(XMM0b_num,XMM0_num); 3157 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3158 return OptoRegPair(OptoReg::Bad,XMM0_num); 3159 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3160 %} 3161 3162 %} 3163 3164 //----------ATTRIBUTES--------------------------------------------------------- 3165 //----------Operand Attributes------------------------------------------------- 3166 op_attrib op_cost(0); // Required cost attribute 3167 3168 //----------Instruction Attributes--------------------------------------------- 3169 ins_attrib ins_cost(100); // Required cost attribute 3170 ins_attrib ins_size(8); // Required size attribute (in bits) 3171 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3172 // non-matching short branch variant of some 3173 // long branch? 3174 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3175 // specifies the alignment that some part of the instruction (not 3176 // necessarily the start) requires. If > 1, a compute_padding() 3177 // function must be provided for the instruction 3178 3179 //----------OPERANDS----------------------------------------------------------- 3180 // Operand definitions must precede instruction definitions for correct parsing 3181 // in the ADLC because operands constitute user defined types which are used in 3182 // instruction definitions. 3183 3184 //----------Simple Operands---------------------------------------------------- 3185 // Immediate Operands 3186 // Integer Immediate 3187 operand immI() %{ 3188 match(ConI); 3189 3190 op_cost(10); 3191 format %{ %} 3192 interface(CONST_INTER); 3193 %} 3194 3195 // Constant for test vs zero 3196 operand immI_0() %{ 3197 predicate(n->get_int() == 0); 3198 match(ConI); 3199 3200 op_cost(0); 3201 format %{ %} 3202 interface(CONST_INTER); 3203 %} 3204 3205 // Constant for increment 3206 operand immI_1() %{ 3207 predicate(n->get_int() == 1); 3208 match(ConI); 3209 3210 op_cost(0); 3211 format %{ %} 3212 interface(CONST_INTER); 3213 %} 3214 3215 // Constant for decrement 3216 operand immI_M1() %{ 3217 predicate(n->get_int() == -1); 3218 match(ConI); 3219 3220 op_cost(0); 3221 format %{ %} 3222 interface(CONST_INTER); 3223 %} 3224 3225 // Valid scale values for addressing modes 3226 operand immI2() %{ 3227 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3228 match(ConI); 3229 3230 format %{ %} 3231 interface(CONST_INTER); 3232 %} 3233 3234 operand immI8() %{ 3235 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3236 match(ConI); 3237 3238 op_cost(5); 3239 format %{ %} 3240 interface(CONST_INTER); 3241 %} 3242 3243 operand immU8() %{ 3244 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3245 match(ConI); 3246 3247 op_cost(5); 3248 format %{ %} 3249 interface(CONST_INTER); 3250 %} 3251 3252 operand immI16() %{ 3253 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3254 match(ConI); 3255 3256 op_cost(10); 3257 format %{ %} 3258 interface(CONST_INTER); 3259 %} 3260 3261 // Int Immediate non-negative 3262 operand immU31() 3263 %{ 3264 predicate(n->get_int() >= 0); 3265 match(ConI); 3266 3267 op_cost(0); 3268 format %{ %} 3269 interface(CONST_INTER); 3270 %} 3271 3272 // Constant for long shifts 3273 operand immI_32() %{ 3274 predicate( n->get_int() == 32 ); 3275 match(ConI); 3276 3277 op_cost(0); 3278 format %{ %} 3279 interface(CONST_INTER); 3280 %} 3281 3282 operand immI_1_31() %{ 3283 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3284 match(ConI); 3285 3286 op_cost(0); 3287 format %{ %} 3288 interface(CONST_INTER); 3289 %} 3290 3291 operand immI_32_63() %{ 3292 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3293 match(ConI); 3294 op_cost(0); 3295 3296 format %{ %} 3297 interface(CONST_INTER); 3298 %} 3299 3300 operand immI_2() %{ 3301 predicate( n->get_int() == 2 ); 3302 match(ConI); 3303 3304 op_cost(0); 3305 format %{ %} 3306 interface(CONST_INTER); 3307 %} 3308 3309 operand immI_3() %{ 3310 predicate( n->get_int() == 3 ); 3311 match(ConI); 3312 3313 op_cost(0); 3314 format %{ %} 3315 interface(CONST_INTER); 3316 %} 3317 3318 operand immI_4() 3319 %{ 3320 predicate(n->get_int() == 4); 3321 match(ConI); 3322 3323 op_cost(0); 3324 format %{ %} 3325 interface(CONST_INTER); 3326 %} 3327 3328 operand immI_8() 3329 %{ 3330 predicate(n->get_int() == 8); 3331 match(ConI); 3332 3333 op_cost(0); 3334 format %{ %} 3335 interface(CONST_INTER); 3336 %} 3337 3338 // Pointer Immediate 3339 operand immP() %{ 3340 match(ConP); 3341 3342 op_cost(10); 3343 format %{ %} 3344 interface(CONST_INTER); 3345 %} 3346 3347 // Null Pointer Immediate 3348 operand immP0() %{ 3349 predicate( n->get_ptr() == 0 ); 3350 match(ConP); 3351 op_cost(0); 3352 3353 format %{ %} 3354 interface(CONST_INTER); 3355 %} 3356 3357 // Long Immediate 3358 operand immL() %{ 3359 match(ConL); 3360 3361 op_cost(20); 3362 format %{ %} 3363 interface(CONST_INTER); 3364 %} 3365 3366 // Long Immediate zero 3367 operand immL0() %{ 3368 predicate( n->get_long() == 0L ); 3369 match(ConL); 3370 op_cost(0); 3371 3372 format %{ %} 3373 interface(CONST_INTER); 3374 %} 3375 3376 // Long Immediate zero 3377 operand immL_M1() %{ 3378 predicate( n->get_long() == -1L ); 3379 match(ConL); 3380 op_cost(0); 3381 3382 format %{ %} 3383 interface(CONST_INTER); 3384 %} 3385 3386 // Long immediate from 0 to 127. 3387 // Used for a shorter form of long mul by 10. 3388 operand immL_127() %{ 3389 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3390 match(ConL); 3391 op_cost(0); 3392 3393 format %{ %} 3394 interface(CONST_INTER); 3395 %} 3396 3397 // Long Immediate: low 32-bit mask 3398 operand immL_32bits() %{ 3399 predicate(n->get_long() == 0xFFFFFFFFL); 3400 match(ConL); 3401 op_cost(0); 3402 3403 format %{ %} 3404 interface(CONST_INTER); 3405 %} 3406 3407 // Long Immediate: low 32-bit mask 3408 operand immL32() %{ 3409 predicate(n->get_long() == (int)(n->get_long())); 3410 match(ConL); 3411 op_cost(20); 3412 3413 format %{ %} 3414 interface(CONST_INTER); 3415 %} 3416 3417 //Double Immediate zero 3418 operand immDPR0() %{ 3419 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3420 // bug that generates code such that NaNs compare equal to 0.0 3421 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3422 match(ConD); 3423 3424 op_cost(5); 3425 format %{ %} 3426 interface(CONST_INTER); 3427 %} 3428 3429 // Double Immediate one 3430 operand immDPR1() %{ 3431 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3432 match(ConD); 3433 3434 op_cost(5); 3435 format %{ %} 3436 interface(CONST_INTER); 3437 %} 3438 3439 // Double Immediate 3440 operand immDPR() %{ 3441 predicate(UseSSE<=1); 3442 match(ConD); 3443 3444 op_cost(5); 3445 format %{ %} 3446 interface(CONST_INTER); 3447 %} 3448 3449 operand immD() %{ 3450 predicate(UseSSE>=2); 3451 match(ConD); 3452 3453 op_cost(5); 3454 format %{ %} 3455 interface(CONST_INTER); 3456 %} 3457 3458 // Double Immediate zero 3459 operand immD0() %{ 3460 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3461 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3462 // compare equal to -0.0. 3463 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3464 match(ConD); 3465 3466 format %{ %} 3467 interface(CONST_INTER); 3468 %} 3469 3470 // Float Immediate zero 3471 operand immFPR0() %{ 3472 predicate(UseSSE == 0 && n->getf() == 0.0F); 3473 match(ConF); 3474 3475 op_cost(5); 3476 format %{ %} 3477 interface(CONST_INTER); 3478 %} 3479 3480 // Float Immediate one 3481 operand immFPR1() %{ 3482 predicate(UseSSE == 0 && n->getf() == 1.0F); 3483 match(ConF); 3484 3485 op_cost(5); 3486 format %{ %} 3487 interface(CONST_INTER); 3488 %} 3489 3490 // Float Immediate 3491 operand immFPR() %{ 3492 predicate( UseSSE == 0 ); 3493 match(ConF); 3494 3495 op_cost(5); 3496 format %{ %} 3497 interface(CONST_INTER); 3498 %} 3499 3500 // Float Immediate 3501 operand immF() %{ 3502 predicate(UseSSE >= 1); 3503 match(ConF); 3504 3505 op_cost(5); 3506 format %{ %} 3507 interface(CONST_INTER); 3508 %} 3509 3510 // Float Immediate zero. Zero and not -0.0 3511 operand immF0() %{ 3512 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3513 match(ConF); 3514 3515 op_cost(5); 3516 format %{ %} 3517 interface(CONST_INTER); 3518 %} 3519 3520 // Immediates for special shifts (sign extend) 3521 3522 // Constants for increment 3523 operand immI_16() %{ 3524 predicate( n->get_int() == 16 ); 3525 match(ConI); 3526 3527 format %{ %} 3528 interface(CONST_INTER); 3529 %} 3530 3531 operand immI_24() %{ 3532 predicate( n->get_int() == 24 ); 3533 match(ConI); 3534 3535 format %{ %} 3536 interface(CONST_INTER); 3537 %} 3538 3539 // Constant for byte-wide masking 3540 operand immI_255() %{ 3541 predicate( n->get_int() == 255 ); 3542 match(ConI); 3543 3544 format %{ %} 3545 interface(CONST_INTER); 3546 %} 3547 3548 // Constant for short-wide masking 3549 operand immI_65535() %{ 3550 predicate(n->get_int() == 65535); 3551 match(ConI); 3552 3553 format %{ %} 3554 interface(CONST_INTER); 3555 %} 3556 3557 operand kReg() 3558 %{ 3559 constraint(ALLOC_IN_RC(vectmask_reg)); 3560 match(RegVectMask); 3561 format %{%} 3562 interface(REG_INTER); 3563 %} 3564 3565 // Register Operands 3566 // Integer Register 3567 operand rRegI() %{ 3568 constraint(ALLOC_IN_RC(int_reg)); 3569 match(RegI); 3570 match(xRegI); 3571 match(eAXRegI); 3572 match(eBXRegI); 3573 match(eCXRegI); 3574 match(eDXRegI); 3575 match(eDIRegI); 3576 match(eSIRegI); 3577 3578 format %{ %} 3579 interface(REG_INTER); 3580 %} 3581 3582 // Subset of Integer Register 3583 operand xRegI(rRegI reg) %{ 3584 constraint(ALLOC_IN_RC(int_x_reg)); 3585 match(reg); 3586 match(eAXRegI); 3587 match(eBXRegI); 3588 match(eCXRegI); 3589 match(eDXRegI); 3590 3591 format %{ %} 3592 interface(REG_INTER); 3593 %} 3594 3595 // Special Registers 3596 operand eAXRegI(xRegI reg) %{ 3597 constraint(ALLOC_IN_RC(eax_reg)); 3598 match(reg); 3599 match(rRegI); 3600 3601 format %{ "EAX" %} 3602 interface(REG_INTER); 3603 %} 3604 3605 // Special Registers 3606 operand eBXRegI(xRegI reg) %{ 3607 constraint(ALLOC_IN_RC(ebx_reg)); 3608 match(reg); 3609 match(rRegI); 3610 3611 format %{ "EBX" %} 3612 interface(REG_INTER); 3613 %} 3614 3615 operand eCXRegI(xRegI reg) %{ 3616 constraint(ALLOC_IN_RC(ecx_reg)); 3617 match(reg); 3618 match(rRegI); 3619 3620 format %{ "ECX" %} 3621 interface(REG_INTER); 3622 %} 3623 3624 operand eDXRegI(xRegI reg) %{ 3625 constraint(ALLOC_IN_RC(edx_reg)); 3626 match(reg); 3627 match(rRegI); 3628 3629 format %{ "EDX" %} 3630 interface(REG_INTER); 3631 %} 3632 3633 operand eDIRegI(xRegI reg) %{ 3634 constraint(ALLOC_IN_RC(edi_reg)); 3635 match(reg); 3636 match(rRegI); 3637 3638 format %{ "EDI" %} 3639 interface(REG_INTER); 3640 %} 3641 3642 operand nadxRegI() %{ 3643 constraint(ALLOC_IN_RC(nadx_reg)); 3644 match(RegI); 3645 match(eBXRegI); 3646 match(eCXRegI); 3647 match(eSIRegI); 3648 match(eDIRegI); 3649 3650 format %{ %} 3651 interface(REG_INTER); 3652 %} 3653 3654 operand ncxRegI() %{ 3655 constraint(ALLOC_IN_RC(ncx_reg)); 3656 match(RegI); 3657 match(eAXRegI); 3658 match(eDXRegI); 3659 match(eSIRegI); 3660 match(eDIRegI); 3661 3662 format %{ %} 3663 interface(REG_INTER); 3664 %} 3665 3666 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3667 // // 3668 operand eSIRegI(xRegI reg) %{ 3669 constraint(ALLOC_IN_RC(esi_reg)); 3670 match(reg); 3671 match(rRegI); 3672 3673 format %{ "ESI" %} 3674 interface(REG_INTER); 3675 %} 3676 3677 // Pointer Register 3678 operand anyRegP() %{ 3679 constraint(ALLOC_IN_RC(any_reg)); 3680 match(RegP); 3681 match(eAXRegP); 3682 match(eBXRegP); 3683 match(eCXRegP); 3684 match(eDIRegP); 3685 match(eRegP); 3686 3687 format %{ %} 3688 interface(REG_INTER); 3689 %} 3690 3691 operand eRegP() %{ 3692 constraint(ALLOC_IN_RC(int_reg)); 3693 match(RegP); 3694 match(eAXRegP); 3695 match(eBXRegP); 3696 match(eCXRegP); 3697 match(eDIRegP); 3698 3699 format %{ %} 3700 interface(REG_INTER); 3701 %} 3702 3703 operand rRegP() %{ 3704 constraint(ALLOC_IN_RC(int_reg)); 3705 match(RegP); 3706 match(eAXRegP); 3707 match(eBXRegP); 3708 match(eCXRegP); 3709 match(eDIRegP); 3710 3711 format %{ %} 3712 interface(REG_INTER); 3713 %} 3714 3715 // On windows95, EBP is not safe to use for implicit null tests. 3716 operand eRegP_no_EBP() %{ 3717 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3718 match(RegP); 3719 match(eAXRegP); 3720 match(eBXRegP); 3721 match(eCXRegP); 3722 match(eDIRegP); 3723 3724 op_cost(100); 3725 format %{ %} 3726 interface(REG_INTER); 3727 %} 3728 3729 operand pRegP() %{ 3730 constraint(ALLOC_IN_RC(p_reg)); 3731 match(RegP); 3732 match(eBXRegP); 3733 match(eDXRegP); 3734 match(eSIRegP); 3735 match(eDIRegP); 3736 3737 format %{ %} 3738 interface(REG_INTER); 3739 %} 3740 3741 // Special Registers 3742 // Return a pointer value 3743 operand eAXRegP(eRegP reg) %{ 3744 constraint(ALLOC_IN_RC(eax_reg)); 3745 match(reg); 3746 format %{ "EAX" %} 3747 interface(REG_INTER); 3748 %} 3749 3750 // Used in AtomicAdd 3751 operand eBXRegP(eRegP reg) %{ 3752 constraint(ALLOC_IN_RC(ebx_reg)); 3753 match(reg); 3754 format %{ "EBX" %} 3755 interface(REG_INTER); 3756 %} 3757 3758 // Tail-call (interprocedural jump) to interpreter 3759 operand eCXRegP(eRegP reg) %{ 3760 constraint(ALLOC_IN_RC(ecx_reg)); 3761 match(reg); 3762 format %{ "ECX" %} 3763 interface(REG_INTER); 3764 %} 3765 3766 operand eDXRegP(eRegP reg) %{ 3767 constraint(ALLOC_IN_RC(edx_reg)); 3768 match(reg); 3769 format %{ "EDX" %} 3770 interface(REG_INTER); 3771 %} 3772 3773 operand eSIRegP(eRegP reg) %{ 3774 constraint(ALLOC_IN_RC(esi_reg)); 3775 match(reg); 3776 format %{ "ESI" %} 3777 interface(REG_INTER); 3778 %} 3779 3780 // Used in rep stosw 3781 operand eDIRegP(eRegP reg) %{ 3782 constraint(ALLOC_IN_RC(edi_reg)); 3783 match(reg); 3784 format %{ "EDI" %} 3785 interface(REG_INTER); 3786 %} 3787 3788 operand eRegL() %{ 3789 constraint(ALLOC_IN_RC(long_reg)); 3790 match(RegL); 3791 match(eADXRegL); 3792 3793 format %{ %} 3794 interface(REG_INTER); 3795 %} 3796 3797 operand eADXRegL( eRegL reg ) %{ 3798 constraint(ALLOC_IN_RC(eadx_reg)); 3799 match(reg); 3800 3801 format %{ "EDX:EAX" %} 3802 interface(REG_INTER); 3803 %} 3804 3805 operand eBCXRegL( eRegL reg ) %{ 3806 constraint(ALLOC_IN_RC(ebcx_reg)); 3807 match(reg); 3808 3809 format %{ "EBX:ECX" %} 3810 interface(REG_INTER); 3811 %} 3812 3813 operand eBDPRegL( eRegL reg ) %{ 3814 constraint(ALLOC_IN_RC(ebpd_reg)); 3815 match(reg); 3816 3817 format %{ "EBP:EDI" %} 3818 interface(REG_INTER); 3819 %} 3820 // Special case for integer high multiply 3821 operand eADXRegL_low_only() %{ 3822 constraint(ALLOC_IN_RC(eadx_reg)); 3823 match(RegL); 3824 3825 format %{ "EAX" %} 3826 interface(REG_INTER); 3827 %} 3828 3829 // Flags register, used as output of compare instructions 3830 operand rFlagsReg() %{ 3831 constraint(ALLOC_IN_RC(int_flags)); 3832 match(RegFlags); 3833 3834 format %{ "EFLAGS" %} 3835 interface(REG_INTER); 3836 %} 3837 3838 // Flags register, used as output of compare instructions 3839 operand eFlagsReg() %{ 3840 constraint(ALLOC_IN_RC(int_flags)); 3841 match(RegFlags); 3842 3843 format %{ "EFLAGS" %} 3844 interface(REG_INTER); 3845 %} 3846 3847 // Flags register, used as output of FLOATING POINT compare instructions 3848 operand eFlagsRegU() %{ 3849 constraint(ALLOC_IN_RC(int_flags)); 3850 match(RegFlags); 3851 3852 format %{ "EFLAGS_U" %} 3853 interface(REG_INTER); 3854 %} 3855 3856 operand eFlagsRegUCF() %{ 3857 constraint(ALLOC_IN_RC(int_flags)); 3858 match(RegFlags); 3859 predicate(false); 3860 3861 format %{ "EFLAGS_U_CF" %} 3862 interface(REG_INTER); 3863 %} 3864 3865 // Condition Code Register used by long compare 3866 operand flagsReg_long_LTGE() %{ 3867 constraint(ALLOC_IN_RC(int_flags)); 3868 match(RegFlags); 3869 format %{ "FLAGS_LTGE" %} 3870 interface(REG_INTER); 3871 %} 3872 operand flagsReg_long_EQNE() %{ 3873 constraint(ALLOC_IN_RC(int_flags)); 3874 match(RegFlags); 3875 format %{ "FLAGS_EQNE" %} 3876 interface(REG_INTER); 3877 %} 3878 operand flagsReg_long_LEGT() %{ 3879 constraint(ALLOC_IN_RC(int_flags)); 3880 match(RegFlags); 3881 format %{ "FLAGS_LEGT" %} 3882 interface(REG_INTER); 3883 %} 3884 3885 // Condition Code Register used by unsigned long compare 3886 operand flagsReg_ulong_LTGE() %{ 3887 constraint(ALLOC_IN_RC(int_flags)); 3888 match(RegFlags); 3889 format %{ "FLAGS_U_LTGE" %} 3890 interface(REG_INTER); 3891 %} 3892 operand flagsReg_ulong_EQNE() %{ 3893 constraint(ALLOC_IN_RC(int_flags)); 3894 match(RegFlags); 3895 format %{ "FLAGS_U_EQNE" %} 3896 interface(REG_INTER); 3897 %} 3898 operand flagsReg_ulong_LEGT() %{ 3899 constraint(ALLOC_IN_RC(int_flags)); 3900 match(RegFlags); 3901 format %{ "FLAGS_U_LEGT" %} 3902 interface(REG_INTER); 3903 %} 3904 3905 // Float register operands 3906 operand regDPR() %{ 3907 predicate( UseSSE < 2 ); 3908 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3909 match(RegD); 3910 match(regDPR1); 3911 match(regDPR2); 3912 format %{ %} 3913 interface(REG_INTER); 3914 %} 3915 3916 operand regDPR1(regDPR reg) %{ 3917 predicate( UseSSE < 2 ); 3918 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 3919 match(reg); 3920 format %{ "FPR1" %} 3921 interface(REG_INTER); 3922 %} 3923 3924 operand regDPR2(regDPR reg) %{ 3925 predicate( UseSSE < 2 ); 3926 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 3927 match(reg); 3928 format %{ "FPR2" %} 3929 interface(REG_INTER); 3930 %} 3931 3932 operand regnotDPR1(regDPR reg) %{ 3933 predicate( UseSSE < 2 ); 3934 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 3935 match(reg); 3936 format %{ %} 3937 interface(REG_INTER); 3938 %} 3939 3940 // Float register operands 3941 operand regFPR() %{ 3942 predicate( UseSSE < 2 ); 3943 constraint(ALLOC_IN_RC(fp_flt_reg)); 3944 match(RegF); 3945 match(regFPR1); 3946 format %{ %} 3947 interface(REG_INTER); 3948 %} 3949 3950 // Float register operands 3951 operand regFPR1(regFPR reg) %{ 3952 predicate( UseSSE < 2 ); 3953 constraint(ALLOC_IN_RC(fp_flt_reg0)); 3954 match(reg); 3955 format %{ "FPR1" %} 3956 interface(REG_INTER); 3957 %} 3958 3959 // XMM Float register operands 3960 operand regF() %{ 3961 predicate( UseSSE>=1 ); 3962 constraint(ALLOC_IN_RC(float_reg_legacy)); 3963 match(RegF); 3964 format %{ %} 3965 interface(REG_INTER); 3966 %} 3967 3968 operand legRegF() %{ 3969 predicate( UseSSE>=1 ); 3970 constraint(ALLOC_IN_RC(float_reg_legacy)); 3971 match(RegF); 3972 format %{ %} 3973 interface(REG_INTER); 3974 %} 3975 3976 // Float register operands 3977 operand vlRegF() %{ 3978 constraint(ALLOC_IN_RC(float_reg_vl)); 3979 match(RegF); 3980 3981 format %{ %} 3982 interface(REG_INTER); 3983 %} 3984 3985 // XMM Double register operands 3986 operand regD() %{ 3987 predicate( UseSSE>=2 ); 3988 constraint(ALLOC_IN_RC(double_reg_legacy)); 3989 match(RegD); 3990 format %{ %} 3991 interface(REG_INTER); 3992 %} 3993 3994 // Double register operands 3995 operand legRegD() %{ 3996 predicate( UseSSE>=2 ); 3997 constraint(ALLOC_IN_RC(double_reg_legacy)); 3998 match(RegD); 3999 format %{ %} 4000 interface(REG_INTER); 4001 %} 4002 4003 operand vlRegD() %{ 4004 constraint(ALLOC_IN_RC(double_reg_vl)); 4005 match(RegD); 4006 4007 format %{ %} 4008 interface(REG_INTER); 4009 %} 4010 4011 //----------Memory Operands---------------------------------------------------- 4012 // Direct Memory Operand 4013 operand direct(immP addr) %{ 4014 match(addr); 4015 4016 format %{ "[$addr]" %} 4017 interface(MEMORY_INTER) %{ 4018 base(0xFFFFFFFF); 4019 index(0x4); 4020 scale(0x0); 4021 disp($addr); 4022 %} 4023 %} 4024 4025 // Indirect Memory Operand 4026 operand indirect(eRegP reg) %{ 4027 constraint(ALLOC_IN_RC(int_reg)); 4028 match(reg); 4029 4030 format %{ "[$reg]" %} 4031 interface(MEMORY_INTER) %{ 4032 base($reg); 4033 index(0x4); 4034 scale(0x0); 4035 disp(0x0); 4036 %} 4037 %} 4038 4039 // Indirect Memory Plus Short Offset Operand 4040 operand indOffset8(eRegP reg, immI8 off) %{ 4041 match(AddP reg off); 4042 4043 format %{ "[$reg + $off]" %} 4044 interface(MEMORY_INTER) %{ 4045 base($reg); 4046 index(0x4); 4047 scale(0x0); 4048 disp($off); 4049 %} 4050 %} 4051 4052 // Indirect Memory Plus Long Offset Operand 4053 operand indOffset32(eRegP reg, immI off) %{ 4054 match(AddP reg off); 4055 4056 format %{ "[$reg + $off]" %} 4057 interface(MEMORY_INTER) %{ 4058 base($reg); 4059 index(0x4); 4060 scale(0x0); 4061 disp($off); 4062 %} 4063 %} 4064 4065 // Indirect Memory Plus Long Offset Operand 4066 operand indOffset32X(rRegI reg, immP off) %{ 4067 match(AddP off reg); 4068 4069 format %{ "[$reg + $off]" %} 4070 interface(MEMORY_INTER) %{ 4071 base($reg); 4072 index(0x4); 4073 scale(0x0); 4074 disp($off); 4075 %} 4076 %} 4077 4078 // Indirect Memory Plus Index Register Plus Offset Operand 4079 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4080 match(AddP (AddP reg ireg) off); 4081 4082 op_cost(10); 4083 format %{"[$reg + $off + $ireg]" %} 4084 interface(MEMORY_INTER) %{ 4085 base($reg); 4086 index($ireg); 4087 scale(0x0); 4088 disp($off); 4089 %} 4090 %} 4091 4092 // Indirect Memory Plus Index Register Plus Offset Operand 4093 operand indIndex(eRegP reg, rRegI ireg) %{ 4094 match(AddP reg ireg); 4095 4096 op_cost(10); 4097 format %{"[$reg + $ireg]" %} 4098 interface(MEMORY_INTER) %{ 4099 base($reg); 4100 index($ireg); 4101 scale(0x0); 4102 disp(0x0); 4103 %} 4104 %} 4105 4106 // // ------------------------------------------------------------------------- 4107 // // 486 architecture doesn't support "scale * index + offset" with out a base 4108 // // ------------------------------------------------------------------------- 4109 // // Scaled Memory Operands 4110 // // Indirect Memory Times Scale Plus Offset Operand 4111 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4112 // match(AddP off (LShiftI ireg scale)); 4113 // 4114 // op_cost(10); 4115 // format %{"[$off + $ireg << $scale]" %} 4116 // interface(MEMORY_INTER) %{ 4117 // base(0x4); 4118 // index($ireg); 4119 // scale($scale); 4120 // disp($off); 4121 // %} 4122 // %} 4123 4124 // Indirect Memory Times Scale Plus Index Register 4125 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4126 match(AddP reg (LShiftI ireg scale)); 4127 4128 op_cost(10); 4129 format %{"[$reg + $ireg << $scale]" %} 4130 interface(MEMORY_INTER) %{ 4131 base($reg); 4132 index($ireg); 4133 scale($scale); 4134 disp(0x0); 4135 %} 4136 %} 4137 4138 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4139 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4140 match(AddP (AddP reg (LShiftI ireg scale)) off); 4141 4142 op_cost(10); 4143 format %{"[$reg + $off + $ireg << $scale]" %} 4144 interface(MEMORY_INTER) %{ 4145 base($reg); 4146 index($ireg); 4147 scale($scale); 4148 disp($off); 4149 %} 4150 %} 4151 4152 //----------Load Long Memory Operands------------------------------------------ 4153 // The load-long idiom will use it's address expression again after loading 4154 // the first word of the long. If the load-long destination overlaps with 4155 // registers used in the addressing expression, the 2nd half will be loaded 4156 // from a clobbered address. Fix this by requiring that load-long use 4157 // address registers that do not overlap with the load-long target. 4158 4159 // load-long support 4160 operand load_long_RegP() %{ 4161 constraint(ALLOC_IN_RC(esi_reg)); 4162 match(RegP); 4163 match(eSIRegP); 4164 op_cost(100); 4165 format %{ %} 4166 interface(REG_INTER); 4167 %} 4168 4169 // Indirect Memory Operand Long 4170 operand load_long_indirect(load_long_RegP reg) %{ 4171 constraint(ALLOC_IN_RC(esi_reg)); 4172 match(reg); 4173 4174 format %{ "[$reg]" %} 4175 interface(MEMORY_INTER) %{ 4176 base($reg); 4177 index(0x4); 4178 scale(0x0); 4179 disp(0x0); 4180 %} 4181 %} 4182 4183 // Indirect Memory Plus Long Offset Operand 4184 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4185 match(AddP reg off); 4186 4187 format %{ "[$reg + $off]" %} 4188 interface(MEMORY_INTER) %{ 4189 base($reg); 4190 index(0x4); 4191 scale(0x0); 4192 disp($off); 4193 %} 4194 %} 4195 4196 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4197 4198 4199 //----------Special Memory Operands-------------------------------------------- 4200 // Stack Slot Operand - This operand is used for loading and storing temporary 4201 // values on the stack where a match requires a value to 4202 // flow through memory. 4203 operand stackSlotP(sRegP reg) %{ 4204 constraint(ALLOC_IN_RC(stack_slots)); 4205 // No match rule because this operand is only generated in matching 4206 format %{ "[$reg]" %} 4207 interface(MEMORY_INTER) %{ 4208 base(0x4); // ESP 4209 index(0x4); // No Index 4210 scale(0x0); // No Scale 4211 disp($reg); // Stack Offset 4212 %} 4213 %} 4214 4215 operand stackSlotI(sRegI reg) %{ 4216 constraint(ALLOC_IN_RC(stack_slots)); 4217 // No match rule because this operand is only generated in matching 4218 format %{ "[$reg]" %} 4219 interface(MEMORY_INTER) %{ 4220 base(0x4); // ESP 4221 index(0x4); // No Index 4222 scale(0x0); // No Scale 4223 disp($reg); // Stack Offset 4224 %} 4225 %} 4226 4227 operand stackSlotF(sRegF reg) %{ 4228 constraint(ALLOC_IN_RC(stack_slots)); 4229 // No match rule because this operand is only generated in matching 4230 format %{ "[$reg]" %} 4231 interface(MEMORY_INTER) %{ 4232 base(0x4); // ESP 4233 index(0x4); // No Index 4234 scale(0x0); // No Scale 4235 disp($reg); // Stack Offset 4236 %} 4237 %} 4238 4239 operand stackSlotD(sRegD reg) %{ 4240 constraint(ALLOC_IN_RC(stack_slots)); 4241 // No match rule because this operand is only generated in matching 4242 format %{ "[$reg]" %} 4243 interface(MEMORY_INTER) %{ 4244 base(0x4); // ESP 4245 index(0x4); // No Index 4246 scale(0x0); // No Scale 4247 disp($reg); // Stack Offset 4248 %} 4249 %} 4250 4251 operand stackSlotL(sRegL reg) %{ 4252 constraint(ALLOC_IN_RC(stack_slots)); 4253 // No match rule because this operand is only generated in matching 4254 format %{ "[$reg]" %} 4255 interface(MEMORY_INTER) %{ 4256 base(0x4); // ESP 4257 index(0x4); // No Index 4258 scale(0x0); // No Scale 4259 disp($reg); // Stack Offset 4260 %} 4261 %} 4262 4263 //----------Conditional Branch Operands---------------------------------------- 4264 // Comparison Op - This is the operation of the comparison, and is limited to 4265 // the following set of codes: 4266 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4267 // 4268 // Other attributes of the comparison, such as unsignedness, are specified 4269 // by the comparison instruction that sets a condition code flags register. 4270 // That result is represented by a flags operand whose subtype is appropriate 4271 // to the unsignedness (etc.) of the comparison. 4272 // 4273 // Later, the instruction which matches both the Comparison Op (a Bool) and 4274 // the flags (produced by the Cmp) specifies the coding of the comparison op 4275 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4276 4277 // Comparison Code 4278 operand cmpOp() %{ 4279 match(Bool); 4280 4281 format %{ "" %} 4282 interface(COND_INTER) %{ 4283 equal(0x4, "e"); 4284 not_equal(0x5, "ne"); 4285 less(0xC, "l"); 4286 greater_equal(0xD, "ge"); 4287 less_equal(0xE, "le"); 4288 greater(0xF, "g"); 4289 overflow(0x0, "o"); 4290 no_overflow(0x1, "no"); 4291 %} 4292 %} 4293 4294 // Comparison Code, unsigned compare. Used by FP also, with 4295 // C2 (unordered) turned into GT or LT already. The other bits 4296 // C0 and C3 are turned into Carry & Zero flags. 4297 operand cmpOpU() %{ 4298 match(Bool); 4299 4300 format %{ "" %} 4301 interface(COND_INTER) %{ 4302 equal(0x4, "e"); 4303 not_equal(0x5, "ne"); 4304 less(0x2, "b"); 4305 greater_equal(0x3, "nb"); 4306 less_equal(0x6, "be"); 4307 greater(0x7, "nbe"); 4308 overflow(0x0, "o"); 4309 no_overflow(0x1, "no"); 4310 %} 4311 %} 4312 4313 // Floating comparisons that don't require any fixup for the unordered case 4314 operand cmpOpUCF() %{ 4315 match(Bool); 4316 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4317 n->as_Bool()->_test._test == BoolTest::ge || 4318 n->as_Bool()->_test._test == BoolTest::le || 4319 n->as_Bool()->_test._test == BoolTest::gt); 4320 format %{ "" %} 4321 interface(COND_INTER) %{ 4322 equal(0x4, "e"); 4323 not_equal(0x5, "ne"); 4324 less(0x2, "b"); 4325 greater_equal(0x3, "nb"); 4326 less_equal(0x6, "be"); 4327 greater(0x7, "nbe"); 4328 overflow(0x0, "o"); 4329 no_overflow(0x1, "no"); 4330 %} 4331 %} 4332 4333 4334 // Floating comparisons that can be fixed up with extra conditional jumps 4335 operand cmpOpUCF2() %{ 4336 match(Bool); 4337 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4338 n->as_Bool()->_test._test == BoolTest::eq); 4339 format %{ "" %} 4340 interface(COND_INTER) %{ 4341 equal(0x4, "e"); 4342 not_equal(0x5, "ne"); 4343 less(0x2, "b"); 4344 greater_equal(0x3, "nb"); 4345 less_equal(0x6, "be"); 4346 greater(0x7, "nbe"); 4347 overflow(0x0, "o"); 4348 no_overflow(0x1, "no"); 4349 %} 4350 %} 4351 4352 // Comparison Code for FP conditional move 4353 operand cmpOp_fcmov() %{ 4354 match(Bool); 4355 4356 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4357 n->as_Bool()->_test._test != BoolTest::no_overflow); 4358 format %{ "" %} 4359 interface(COND_INTER) %{ 4360 equal (0x0C8); 4361 not_equal (0x1C8); 4362 less (0x0C0); 4363 greater_equal(0x1C0); 4364 less_equal (0x0D0); 4365 greater (0x1D0); 4366 overflow(0x0, "o"); // not really supported by the instruction 4367 no_overflow(0x1, "no"); // not really supported by the instruction 4368 %} 4369 %} 4370 4371 // Comparison Code used in long compares 4372 operand cmpOp_commute() %{ 4373 match(Bool); 4374 4375 format %{ "" %} 4376 interface(COND_INTER) %{ 4377 equal(0x4, "e"); 4378 not_equal(0x5, "ne"); 4379 less(0xF, "g"); 4380 greater_equal(0xE, "le"); 4381 less_equal(0xD, "ge"); 4382 greater(0xC, "l"); 4383 overflow(0x0, "o"); 4384 no_overflow(0x1, "no"); 4385 %} 4386 %} 4387 4388 // Comparison Code used in unsigned long compares 4389 operand cmpOpU_commute() %{ 4390 match(Bool); 4391 4392 format %{ "" %} 4393 interface(COND_INTER) %{ 4394 equal(0x4, "e"); 4395 not_equal(0x5, "ne"); 4396 less(0x7, "nbe"); 4397 greater_equal(0x6, "be"); 4398 less_equal(0x3, "nb"); 4399 greater(0x2, "b"); 4400 overflow(0x0, "o"); 4401 no_overflow(0x1, "no"); 4402 %} 4403 %} 4404 4405 //----------OPERAND CLASSES---------------------------------------------------- 4406 // Operand Classes are groups of operands that are used as to simplify 4407 // instruction definitions by not requiring the AD writer to specify separate 4408 // instructions for every form of operand when the instruction accepts 4409 // multiple operand types with the same basic encoding and format. The classic 4410 // case of this is memory operands. 4411 4412 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4413 indIndex, indIndexScale, indIndexScaleOffset); 4414 4415 // Long memory operations are encoded in 2 instructions and a +4 offset. 4416 // This means some kind of offset is always required and you cannot use 4417 // an oop as the offset (done when working on static globals). 4418 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4419 indIndex, indIndexScale, indIndexScaleOffset); 4420 4421 4422 //----------PIPELINE----------------------------------------------------------- 4423 // Rules which define the behavior of the target architectures pipeline. 4424 pipeline %{ 4425 4426 //----------ATTRIBUTES--------------------------------------------------------- 4427 attributes %{ 4428 variable_size_instructions; // Fixed size instructions 4429 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4430 instruction_unit_size = 1; // An instruction is 1 bytes long 4431 instruction_fetch_unit_size = 16; // The processor fetches one line 4432 instruction_fetch_units = 1; // of 16 bytes 4433 4434 // List of nop instructions 4435 nops( MachNop ); 4436 %} 4437 4438 //----------RESOURCES---------------------------------------------------------- 4439 // Resources are the functional units available to the machine 4440 4441 // Generic P2/P3 pipeline 4442 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4443 // 3 instructions decoded per cycle. 4444 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4445 // 2 ALU op, only ALU0 handles mul/div instructions. 4446 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4447 MS0, MS1, MEM = MS0 | MS1, 4448 BR, FPU, 4449 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4450 4451 //----------PIPELINE DESCRIPTION----------------------------------------------- 4452 // Pipeline Description specifies the stages in the machine's pipeline 4453 4454 // Generic P2/P3 pipeline 4455 pipe_desc(S0, S1, S2, S3, S4, S5); 4456 4457 //----------PIPELINE CLASSES--------------------------------------------------- 4458 // Pipeline Classes describe the stages in which input and output are 4459 // referenced by the hardware pipeline. 4460 4461 // Naming convention: ialu or fpu 4462 // Then: _reg 4463 // Then: _reg if there is a 2nd register 4464 // Then: _long if it's a pair of instructions implementing a long 4465 // Then: _fat if it requires the big decoder 4466 // Or: _mem if it requires the big decoder and a memory unit. 4467 4468 // Integer ALU reg operation 4469 pipe_class ialu_reg(rRegI dst) %{ 4470 single_instruction; 4471 dst : S4(write); 4472 dst : S3(read); 4473 DECODE : S0; // any decoder 4474 ALU : S3; // any alu 4475 %} 4476 4477 // Long ALU reg operation 4478 pipe_class ialu_reg_long(eRegL dst) %{ 4479 instruction_count(2); 4480 dst : S4(write); 4481 dst : S3(read); 4482 DECODE : S0(2); // any 2 decoders 4483 ALU : S3(2); // both alus 4484 %} 4485 4486 // Integer ALU reg operation using big decoder 4487 pipe_class ialu_reg_fat(rRegI dst) %{ 4488 single_instruction; 4489 dst : S4(write); 4490 dst : S3(read); 4491 D0 : S0; // big decoder only 4492 ALU : S3; // any alu 4493 %} 4494 4495 // Long ALU reg operation using big decoder 4496 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4497 instruction_count(2); 4498 dst : S4(write); 4499 dst : S3(read); 4500 D0 : S0(2); // big decoder only; twice 4501 ALU : S3(2); // any 2 alus 4502 %} 4503 4504 // Integer ALU reg-reg operation 4505 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4506 single_instruction; 4507 dst : S4(write); 4508 src : S3(read); 4509 DECODE : S0; // any decoder 4510 ALU : S3; // any alu 4511 %} 4512 4513 // Long ALU reg-reg operation 4514 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4515 instruction_count(2); 4516 dst : S4(write); 4517 src : S3(read); 4518 DECODE : S0(2); // any 2 decoders 4519 ALU : S3(2); // both alus 4520 %} 4521 4522 // Integer ALU reg-reg operation 4523 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4524 single_instruction; 4525 dst : S4(write); 4526 src : S3(read); 4527 D0 : S0; // big decoder only 4528 ALU : S3; // any alu 4529 %} 4530 4531 // Long ALU reg-reg operation 4532 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4533 instruction_count(2); 4534 dst : S4(write); 4535 src : S3(read); 4536 D0 : S0(2); // big decoder only; twice 4537 ALU : S3(2); // both alus 4538 %} 4539 4540 // Integer ALU reg-mem operation 4541 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4542 single_instruction; 4543 dst : S5(write); 4544 mem : S3(read); 4545 D0 : S0; // big decoder only 4546 ALU : S4; // any alu 4547 MEM : S3; // any mem 4548 %} 4549 4550 // Long ALU reg-mem operation 4551 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4552 instruction_count(2); 4553 dst : S5(write); 4554 mem : S3(read); 4555 D0 : S0(2); // big decoder only; twice 4556 ALU : S4(2); // any 2 alus 4557 MEM : S3(2); // both mems 4558 %} 4559 4560 // Integer mem operation (prefetch) 4561 pipe_class ialu_mem(memory mem) 4562 %{ 4563 single_instruction; 4564 mem : S3(read); 4565 D0 : S0; // big decoder only 4566 MEM : S3; // any mem 4567 %} 4568 4569 // Integer Store to Memory 4570 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4571 single_instruction; 4572 mem : S3(read); 4573 src : S5(read); 4574 D0 : S0; // big decoder only 4575 ALU : S4; // any alu 4576 MEM : S3; 4577 %} 4578 4579 // Long Store to Memory 4580 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4581 instruction_count(2); 4582 mem : S3(read); 4583 src : S5(read); 4584 D0 : S0(2); // big decoder only; twice 4585 ALU : S4(2); // any 2 alus 4586 MEM : S3(2); // Both mems 4587 %} 4588 4589 // Integer Store to Memory 4590 pipe_class ialu_mem_imm(memory mem) %{ 4591 single_instruction; 4592 mem : S3(read); 4593 D0 : S0; // big decoder only 4594 ALU : S4; // any alu 4595 MEM : S3; 4596 %} 4597 4598 // Integer ALU0 reg-reg operation 4599 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4600 single_instruction; 4601 dst : S4(write); 4602 src : S3(read); 4603 D0 : S0; // Big decoder only 4604 ALU0 : S3; // only alu0 4605 %} 4606 4607 // Integer ALU0 reg-mem operation 4608 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4609 single_instruction; 4610 dst : S5(write); 4611 mem : S3(read); 4612 D0 : S0; // big decoder only 4613 ALU0 : S4; // ALU0 only 4614 MEM : S3; // any mem 4615 %} 4616 4617 // Integer ALU reg-reg operation 4618 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4619 single_instruction; 4620 cr : S4(write); 4621 src1 : S3(read); 4622 src2 : S3(read); 4623 DECODE : S0; // any decoder 4624 ALU : S3; // any alu 4625 %} 4626 4627 // Integer ALU reg-imm operation 4628 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4629 single_instruction; 4630 cr : S4(write); 4631 src1 : S3(read); 4632 DECODE : S0; // any decoder 4633 ALU : S3; // any alu 4634 %} 4635 4636 // Integer ALU reg-mem operation 4637 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4638 single_instruction; 4639 cr : S4(write); 4640 src1 : S3(read); 4641 src2 : S3(read); 4642 D0 : S0; // big decoder only 4643 ALU : S4; // any alu 4644 MEM : S3; 4645 %} 4646 4647 // Conditional move reg-reg 4648 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4649 instruction_count(4); 4650 y : S4(read); 4651 q : S3(read); 4652 p : S3(read); 4653 DECODE : S0(4); // any decoder 4654 %} 4655 4656 // Conditional move reg-reg 4657 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4658 single_instruction; 4659 dst : S4(write); 4660 src : S3(read); 4661 cr : S3(read); 4662 DECODE : S0; // any decoder 4663 %} 4664 4665 // Conditional move reg-mem 4666 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4667 single_instruction; 4668 dst : S4(write); 4669 src : S3(read); 4670 cr : S3(read); 4671 DECODE : S0; // any decoder 4672 MEM : S3; 4673 %} 4674 4675 // Conditional move reg-reg long 4676 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4677 single_instruction; 4678 dst : S4(write); 4679 src : S3(read); 4680 cr : S3(read); 4681 DECODE : S0(2); // any 2 decoders 4682 %} 4683 4684 // Conditional move double reg-reg 4685 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4686 single_instruction; 4687 dst : S4(write); 4688 src : S3(read); 4689 cr : S3(read); 4690 DECODE : S0; // any decoder 4691 %} 4692 4693 // Float reg-reg operation 4694 pipe_class fpu_reg(regDPR dst) %{ 4695 instruction_count(2); 4696 dst : S3(read); 4697 DECODE : S0(2); // any 2 decoders 4698 FPU : S3; 4699 %} 4700 4701 // Float reg-reg operation 4702 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4703 instruction_count(2); 4704 dst : S4(write); 4705 src : S3(read); 4706 DECODE : S0(2); // any 2 decoders 4707 FPU : S3; 4708 %} 4709 4710 // Float reg-reg operation 4711 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4712 instruction_count(3); 4713 dst : S4(write); 4714 src1 : S3(read); 4715 src2 : S3(read); 4716 DECODE : S0(3); // any 3 decoders 4717 FPU : S3(2); 4718 %} 4719 4720 // Float reg-reg operation 4721 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4722 instruction_count(4); 4723 dst : S4(write); 4724 src1 : S3(read); 4725 src2 : S3(read); 4726 src3 : S3(read); 4727 DECODE : S0(4); // any 3 decoders 4728 FPU : S3(2); 4729 %} 4730 4731 // Float reg-reg operation 4732 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4733 instruction_count(4); 4734 dst : S4(write); 4735 src1 : S3(read); 4736 src2 : S3(read); 4737 src3 : S3(read); 4738 DECODE : S1(3); // any 3 decoders 4739 D0 : S0; // Big decoder only 4740 FPU : S3(2); 4741 MEM : S3; 4742 %} 4743 4744 // Float reg-mem operation 4745 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4746 instruction_count(2); 4747 dst : S5(write); 4748 mem : S3(read); 4749 D0 : S0; // big decoder only 4750 DECODE : S1; // any decoder for FPU POP 4751 FPU : S4; 4752 MEM : S3; // any mem 4753 %} 4754 4755 // Float reg-mem operation 4756 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4757 instruction_count(3); 4758 dst : S5(write); 4759 src1 : S3(read); 4760 mem : S3(read); 4761 D0 : S0; // big decoder only 4762 DECODE : S1(2); // any decoder for FPU POP 4763 FPU : S4; 4764 MEM : S3; // any mem 4765 %} 4766 4767 // Float mem-reg operation 4768 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4769 instruction_count(2); 4770 src : S5(read); 4771 mem : S3(read); 4772 DECODE : S0; // any decoder for FPU PUSH 4773 D0 : S1; // big decoder only 4774 FPU : S4; 4775 MEM : S3; // any mem 4776 %} 4777 4778 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4779 instruction_count(3); 4780 src1 : S3(read); 4781 src2 : S3(read); 4782 mem : S3(read); 4783 DECODE : S0(2); // any decoder for FPU PUSH 4784 D0 : S1; // big decoder only 4785 FPU : S4; 4786 MEM : S3; // any mem 4787 %} 4788 4789 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4790 instruction_count(3); 4791 src1 : S3(read); 4792 src2 : S3(read); 4793 mem : S4(read); 4794 DECODE : S0; // any decoder for FPU PUSH 4795 D0 : S0(2); // big decoder only 4796 FPU : S4; 4797 MEM : S3(2); // any mem 4798 %} 4799 4800 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4801 instruction_count(2); 4802 src1 : S3(read); 4803 dst : S4(read); 4804 D0 : S0(2); // big decoder only 4805 MEM : S3(2); // any mem 4806 %} 4807 4808 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4809 instruction_count(3); 4810 src1 : S3(read); 4811 src2 : S3(read); 4812 dst : S4(read); 4813 D0 : S0(3); // big decoder only 4814 FPU : S4; 4815 MEM : S3(3); // any mem 4816 %} 4817 4818 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4819 instruction_count(3); 4820 src1 : S4(read); 4821 mem : S4(read); 4822 DECODE : S0; // any decoder for FPU PUSH 4823 D0 : S0(2); // big decoder only 4824 FPU : S4; 4825 MEM : S3(2); // any mem 4826 %} 4827 4828 // Float load constant 4829 pipe_class fpu_reg_con(regDPR dst) %{ 4830 instruction_count(2); 4831 dst : S5(write); 4832 D0 : S0; // big decoder only for the load 4833 DECODE : S1; // any decoder for FPU POP 4834 FPU : S4; 4835 MEM : S3; // any mem 4836 %} 4837 4838 // Float load constant 4839 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4840 instruction_count(3); 4841 dst : S5(write); 4842 src : S3(read); 4843 D0 : S0; // big decoder only for the load 4844 DECODE : S1(2); // any decoder for FPU POP 4845 FPU : S4; 4846 MEM : S3; // any mem 4847 %} 4848 4849 // UnConditional branch 4850 pipe_class pipe_jmp( label labl ) %{ 4851 single_instruction; 4852 BR : S3; 4853 %} 4854 4855 // Conditional branch 4856 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4857 single_instruction; 4858 cr : S1(read); 4859 BR : S3; 4860 %} 4861 4862 // Allocation idiom 4863 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4864 instruction_count(1); force_serialization; 4865 fixed_latency(6); 4866 heap_ptr : S3(read); 4867 DECODE : S0(3); 4868 D0 : S2; 4869 MEM : S3; 4870 ALU : S3(2); 4871 dst : S5(write); 4872 BR : S5; 4873 %} 4874 4875 // Generic big/slow expanded idiom 4876 pipe_class pipe_slow( ) %{ 4877 instruction_count(10); multiple_bundles; force_serialization; 4878 fixed_latency(100); 4879 D0 : S0(2); 4880 MEM : S3(2); 4881 %} 4882 4883 // The real do-nothing guy 4884 pipe_class empty( ) %{ 4885 instruction_count(0); 4886 %} 4887 4888 // Define the class for the Nop node 4889 define %{ 4890 MachNop = empty; 4891 %} 4892 4893 %} 4894 4895 //----------INSTRUCTIONS------------------------------------------------------- 4896 // 4897 // match -- States which machine-independent subtree may be replaced 4898 // by this instruction. 4899 // ins_cost -- The estimated cost of this instruction is used by instruction 4900 // selection to identify a minimum cost tree of machine 4901 // instructions that matches a tree of machine-independent 4902 // instructions. 4903 // format -- A string providing the disassembly for this instruction. 4904 // The value of an instruction's operand may be inserted 4905 // by referring to it with a '$' prefix. 4906 // opcode -- Three instruction opcodes may be provided. These are referred 4907 // to within an encode class as $primary, $secondary, and $tertiary 4908 // respectively. The primary opcode is commonly used to 4909 // indicate the type of machine instruction, while secondary 4910 // and tertiary are often used for prefix options or addressing 4911 // modes. 4912 // ins_encode -- A list of encode classes with parameters. The encode class 4913 // name must have been defined in an 'enc_class' specification 4914 // in the encode section of the architecture description. 4915 4916 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4917 // Load Float 4918 instruct MoveF2LEG(legRegF dst, regF src) %{ 4919 match(Set dst src); 4920 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4921 ins_encode %{ 4922 ShouldNotReachHere(); 4923 %} 4924 ins_pipe( fpu_reg_reg ); 4925 %} 4926 4927 // Load Float 4928 instruct MoveLEG2F(regF dst, legRegF src) %{ 4929 match(Set dst src); 4930 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4931 ins_encode %{ 4932 ShouldNotReachHere(); 4933 %} 4934 ins_pipe( fpu_reg_reg ); 4935 %} 4936 4937 // Load Float 4938 instruct MoveF2VL(vlRegF dst, regF src) %{ 4939 match(Set dst src); 4940 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4941 ins_encode %{ 4942 ShouldNotReachHere(); 4943 %} 4944 ins_pipe( fpu_reg_reg ); 4945 %} 4946 4947 // Load Float 4948 instruct MoveVL2F(regF dst, vlRegF src) %{ 4949 match(Set dst src); 4950 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4951 ins_encode %{ 4952 ShouldNotReachHere(); 4953 %} 4954 ins_pipe( fpu_reg_reg ); 4955 %} 4956 4957 4958 4959 // Load Double 4960 instruct MoveD2LEG(legRegD dst, regD src) %{ 4961 match(Set dst src); 4962 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 4963 ins_encode %{ 4964 ShouldNotReachHere(); 4965 %} 4966 ins_pipe( fpu_reg_reg ); 4967 %} 4968 4969 // Load Double 4970 instruct MoveLEG2D(regD dst, legRegD src) %{ 4971 match(Set dst src); 4972 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 4973 ins_encode %{ 4974 ShouldNotReachHere(); 4975 %} 4976 ins_pipe( fpu_reg_reg ); 4977 %} 4978 4979 // Load Double 4980 instruct MoveD2VL(vlRegD dst, regD src) %{ 4981 match(Set dst src); 4982 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 4983 ins_encode %{ 4984 ShouldNotReachHere(); 4985 %} 4986 ins_pipe( fpu_reg_reg ); 4987 %} 4988 4989 // Load Double 4990 instruct MoveVL2D(regD dst, vlRegD src) %{ 4991 match(Set dst src); 4992 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 4993 ins_encode %{ 4994 ShouldNotReachHere(); 4995 %} 4996 ins_pipe( fpu_reg_reg ); 4997 %} 4998 4999 //----------BSWAP-Instruction-------------------------------------------------- 5000 instruct bytes_reverse_int(rRegI dst) %{ 5001 match(Set dst (ReverseBytesI dst)); 5002 5003 format %{ "BSWAP $dst" %} 5004 opcode(0x0F, 0xC8); 5005 ins_encode( OpcP, OpcSReg(dst) ); 5006 ins_pipe( ialu_reg ); 5007 %} 5008 5009 instruct bytes_reverse_long(eRegL dst) %{ 5010 match(Set dst (ReverseBytesL dst)); 5011 5012 format %{ "BSWAP $dst.lo\n\t" 5013 "BSWAP $dst.hi\n\t" 5014 "XCHG $dst.lo $dst.hi" %} 5015 5016 ins_cost(125); 5017 ins_encode( bswap_long_bytes(dst) ); 5018 ins_pipe( ialu_reg_reg); 5019 %} 5020 5021 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5022 match(Set dst (ReverseBytesUS dst)); 5023 effect(KILL cr); 5024 5025 format %{ "BSWAP $dst\n\t" 5026 "SHR $dst,16\n\t" %} 5027 ins_encode %{ 5028 __ bswapl($dst$$Register); 5029 __ shrl($dst$$Register, 16); 5030 %} 5031 ins_pipe( ialu_reg ); 5032 %} 5033 5034 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5035 match(Set dst (ReverseBytesS dst)); 5036 effect(KILL cr); 5037 5038 format %{ "BSWAP $dst\n\t" 5039 "SAR $dst,16\n\t" %} 5040 ins_encode %{ 5041 __ bswapl($dst$$Register); 5042 __ sarl($dst$$Register, 16); 5043 %} 5044 ins_pipe( ialu_reg ); 5045 %} 5046 5047 5048 //---------- Zeros Count Instructions ------------------------------------------ 5049 5050 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5051 predicate(UseCountLeadingZerosInstruction); 5052 match(Set dst (CountLeadingZerosI src)); 5053 effect(KILL cr); 5054 5055 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5056 ins_encode %{ 5057 __ lzcntl($dst$$Register, $src$$Register); 5058 %} 5059 ins_pipe(ialu_reg); 5060 %} 5061 5062 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5063 predicate(!UseCountLeadingZerosInstruction); 5064 match(Set dst (CountLeadingZerosI src)); 5065 effect(KILL cr); 5066 5067 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5068 "JNZ skip\n\t" 5069 "MOV $dst, -1\n" 5070 "skip:\n\t" 5071 "NEG $dst\n\t" 5072 "ADD $dst, 31" %} 5073 ins_encode %{ 5074 Register Rdst = $dst$$Register; 5075 Register Rsrc = $src$$Register; 5076 Label skip; 5077 __ bsrl(Rdst, Rsrc); 5078 __ jccb(Assembler::notZero, skip); 5079 __ movl(Rdst, -1); 5080 __ bind(skip); 5081 __ negl(Rdst); 5082 __ addl(Rdst, BitsPerInt - 1); 5083 %} 5084 ins_pipe(ialu_reg); 5085 %} 5086 5087 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5088 predicate(UseCountLeadingZerosInstruction); 5089 match(Set dst (CountLeadingZerosL src)); 5090 effect(TEMP dst, KILL cr); 5091 5092 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5093 "JNC done\n\t" 5094 "LZCNT $dst, $src.lo\n\t" 5095 "ADD $dst, 32\n" 5096 "done:" %} 5097 ins_encode %{ 5098 Register Rdst = $dst$$Register; 5099 Register Rsrc = $src$$Register; 5100 Label done; 5101 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5102 __ jccb(Assembler::carryClear, done); 5103 __ lzcntl(Rdst, Rsrc); 5104 __ addl(Rdst, BitsPerInt); 5105 __ bind(done); 5106 %} 5107 ins_pipe(ialu_reg); 5108 %} 5109 5110 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5111 predicate(!UseCountLeadingZerosInstruction); 5112 match(Set dst (CountLeadingZerosL src)); 5113 effect(TEMP dst, KILL cr); 5114 5115 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5116 "JZ msw_is_zero\n\t" 5117 "ADD $dst, 32\n\t" 5118 "JMP not_zero\n" 5119 "msw_is_zero:\n\t" 5120 "BSR $dst, $src.lo\n\t" 5121 "JNZ not_zero\n\t" 5122 "MOV $dst, -1\n" 5123 "not_zero:\n\t" 5124 "NEG $dst\n\t" 5125 "ADD $dst, 63\n" %} 5126 ins_encode %{ 5127 Register Rdst = $dst$$Register; 5128 Register Rsrc = $src$$Register; 5129 Label msw_is_zero; 5130 Label not_zero; 5131 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5132 __ jccb(Assembler::zero, msw_is_zero); 5133 __ addl(Rdst, BitsPerInt); 5134 __ jmpb(not_zero); 5135 __ bind(msw_is_zero); 5136 __ bsrl(Rdst, Rsrc); 5137 __ jccb(Assembler::notZero, not_zero); 5138 __ movl(Rdst, -1); 5139 __ bind(not_zero); 5140 __ negl(Rdst); 5141 __ addl(Rdst, BitsPerLong - 1); 5142 %} 5143 ins_pipe(ialu_reg); 5144 %} 5145 5146 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5147 predicate(UseCountTrailingZerosInstruction); 5148 match(Set dst (CountTrailingZerosI src)); 5149 effect(KILL cr); 5150 5151 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5152 ins_encode %{ 5153 __ tzcntl($dst$$Register, $src$$Register); 5154 %} 5155 ins_pipe(ialu_reg); 5156 %} 5157 5158 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5159 predicate(!UseCountTrailingZerosInstruction); 5160 match(Set dst (CountTrailingZerosI src)); 5161 effect(KILL cr); 5162 5163 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5164 "JNZ done\n\t" 5165 "MOV $dst, 32\n" 5166 "done:" %} 5167 ins_encode %{ 5168 Register Rdst = $dst$$Register; 5169 Label done; 5170 __ bsfl(Rdst, $src$$Register); 5171 __ jccb(Assembler::notZero, done); 5172 __ movl(Rdst, BitsPerInt); 5173 __ bind(done); 5174 %} 5175 ins_pipe(ialu_reg); 5176 %} 5177 5178 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5179 predicate(UseCountTrailingZerosInstruction); 5180 match(Set dst (CountTrailingZerosL src)); 5181 effect(TEMP dst, KILL cr); 5182 5183 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5184 "JNC done\n\t" 5185 "TZCNT $dst, $src.hi\n\t" 5186 "ADD $dst, 32\n" 5187 "done:" %} 5188 ins_encode %{ 5189 Register Rdst = $dst$$Register; 5190 Register Rsrc = $src$$Register; 5191 Label done; 5192 __ tzcntl(Rdst, Rsrc); 5193 __ jccb(Assembler::carryClear, done); 5194 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5195 __ addl(Rdst, BitsPerInt); 5196 __ bind(done); 5197 %} 5198 ins_pipe(ialu_reg); 5199 %} 5200 5201 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5202 predicate(!UseCountTrailingZerosInstruction); 5203 match(Set dst (CountTrailingZerosL src)); 5204 effect(TEMP dst, KILL cr); 5205 5206 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5207 "JNZ done\n\t" 5208 "BSF $dst, $src.hi\n\t" 5209 "JNZ msw_not_zero\n\t" 5210 "MOV $dst, 32\n" 5211 "msw_not_zero:\n\t" 5212 "ADD $dst, 32\n" 5213 "done:" %} 5214 ins_encode %{ 5215 Register Rdst = $dst$$Register; 5216 Register Rsrc = $src$$Register; 5217 Label msw_not_zero; 5218 Label done; 5219 __ bsfl(Rdst, Rsrc); 5220 __ jccb(Assembler::notZero, done); 5221 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5222 __ jccb(Assembler::notZero, msw_not_zero); 5223 __ movl(Rdst, BitsPerInt); 5224 __ bind(msw_not_zero); 5225 __ addl(Rdst, BitsPerInt); 5226 __ bind(done); 5227 %} 5228 ins_pipe(ialu_reg); 5229 %} 5230 5231 5232 //---------- Population Count Instructions ------------------------------------- 5233 5234 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5235 predicate(UsePopCountInstruction); 5236 match(Set dst (PopCountI src)); 5237 effect(KILL cr); 5238 5239 format %{ "POPCNT $dst, $src" %} 5240 ins_encode %{ 5241 __ popcntl($dst$$Register, $src$$Register); 5242 %} 5243 ins_pipe(ialu_reg); 5244 %} 5245 5246 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5247 predicate(UsePopCountInstruction); 5248 match(Set dst (PopCountI (LoadI mem))); 5249 effect(KILL cr); 5250 5251 format %{ "POPCNT $dst, $mem" %} 5252 ins_encode %{ 5253 __ popcntl($dst$$Register, $mem$$Address); 5254 %} 5255 ins_pipe(ialu_reg); 5256 %} 5257 5258 // Note: Long.bitCount(long) returns an int. 5259 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5260 predicate(UsePopCountInstruction); 5261 match(Set dst (PopCountL src)); 5262 effect(KILL cr, TEMP tmp, TEMP dst); 5263 5264 format %{ "POPCNT $dst, $src.lo\n\t" 5265 "POPCNT $tmp, $src.hi\n\t" 5266 "ADD $dst, $tmp" %} 5267 ins_encode %{ 5268 __ popcntl($dst$$Register, $src$$Register); 5269 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5270 __ addl($dst$$Register, $tmp$$Register); 5271 %} 5272 ins_pipe(ialu_reg); 5273 %} 5274 5275 // Note: Long.bitCount(long) returns an int. 5276 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5277 predicate(UsePopCountInstruction); 5278 match(Set dst (PopCountL (LoadL mem))); 5279 effect(KILL cr, TEMP tmp, TEMP dst); 5280 5281 format %{ "POPCNT $dst, $mem\n\t" 5282 "POPCNT $tmp, $mem+4\n\t" 5283 "ADD $dst, $tmp" %} 5284 ins_encode %{ 5285 //__ popcntl($dst$$Register, $mem$$Address$$first); 5286 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5287 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5288 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5289 __ addl($dst$$Register, $tmp$$Register); 5290 %} 5291 ins_pipe(ialu_reg); 5292 %} 5293 5294 5295 //----------Load/Store/Move Instructions--------------------------------------- 5296 //----------Load Instructions-------------------------------------------------- 5297 // Load Byte (8bit signed) 5298 instruct loadB(xRegI dst, memory mem) %{ 5299 match(Set dst (LoadB mem)); 5300 5301 ins_cost(125); 5302 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5303 5304 ins_encode %{ 5305 __ movsbl($dst$$Register, $mem$$Address); 5306 %} 5307 5308 ins_pipe(ialu_reg_mem); 5309 %} 5310 5311 // Load Byte (8bit signed) into Long Register 5312 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5313 match(Set dst (ConvI2L (LoadB mem))); 5314 effect(KILL cr); 5315 5316 ins_cost(375); 5317 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5318 "MOV $dst.hi,$dst.lo\n\t" 5319 "SAR $dst.hi,7" %} 5320 5321 ins_encode %{ 5322 __ movsbl($dst$$Register, $mem$$Address); 5323 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5324 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5325 %} 5326 5327 ins_pipe(ialu_reg_mem); 5328 %} 5329 5330 // Load Unsigned Byte (8bit UNsigned) 5331 instruct loadUB(xRegI dst, memory mem) %{ 5332 match(Set dst (LoadUB mem)); 5333 5334 ins_cost(125); 5335 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5336 5337 ins_encode %{ 5338 __ movzbl($dst$$Register, $mem$$Address); 5339 %} 5340 5341 ins_pipe(ialu_reg_mem); 5342 %} 5343 5344 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5345 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5346 match(Set dst (ConvI2L (LoadUB mem))); 5347 effect(KILL cr); 5348 5349 ins_cost(250); 5350 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5351 "XOR $dst.hi,$dst.hi" %} 5352 5353 ins_encode %{ 5354 Register Rdst = $dst$$Register; 5355 __ movzbl(Rdst, $mem$$Address); 5356 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5357 %} 5358 5359 ins_pipe(ialu_reg_mem); 5360 %} 5361 5362 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5363 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5364 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5365 effect(KILL cr); 5366 5367 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5368 "XOR $dst.hi,$dst.hi\n\t" 5369 "AND $dst.lo,right_n_bits($mask, 8)" %} 5370 ins_encode %{ 5371 Register Rdst = $dst$$Register; 5372 __ movzbl(Rdst, $mem$$Address); 5373 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5374 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5375 %} 5376 ins_pipe(ialu_reg_mem); 5377 %} 5378 5379 // Load Short (16bit signed) 5380 instruct loadS(rRegI dst, memory mem) %{ 5381 match(Set dst (LoadS mem)); 5382 5383 ins_cost(125); 5384 format %{ "MOVSX $dst,$mem\t# short" %} 5385 5386 ins_encode %{ 5387 __ movswl($dst$$Register, $mem$$Address); 5388 %} 5389 5390 ins_pipe(ialu_reg_mem); 5391 %} 5392 5393 // Load Short (16 bit signed) to Byte (8 bit signed) 5394 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5395 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5396 5397 ins_cost(125); 5398 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5399 ins_encode %{ 5400 __ movsbl($dst$$Register, $mem$$Address); 5401 %} 5402 ins_pipe(ialu_reg_mem); 5403 %} 5404 5405 // Load Short (16bit signed) into Long Register 5406 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5407 match(Set dst (ConvI2L (LoadS mem))); 5408 effect(KILL cr); 5409 5410 ins_cost(375); 5411 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5412 "MOV $dst.hi,$dst.lo\n\t" 5413 "SAR $dst.hi,15" %} 5414 5415 ins_encode %{ 5416 __ movswl($dst$$Register, $mem$$Address); 5417 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5418 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5419 %} 5420 5421 ins_pipe(ialu_reg_mem); 5422 %} 5423 5424 // Load Unsigned Short/Char (16bit unsigned) 5425 instruct loadUS(rRegI dst, memory mem) %{ 5426 match(Set dst (LoadUS mem)); 5427 5428 ins_cost(125); 5429 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5430 5431 ins_encode %{ 5432 __ movzwl($dst$$Register, $mem$$Address); 5433 %} 5434 5435 ins_pipe(ialu_reg_mem); 5436 %} 5437 5438 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5439 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5440 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5441 5442 ins_cost(125); 5443 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5444 ins_encode %{ 5445 __ movsbl($dst$$Register, $mem$$Address); 5446 %} 5447 ins_pipe(ialu_reg_mem); 5448 %} 5449 5450 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5451 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5452 match(Set dst (ConvI2L (LoadUS mem))); 5453 effect(KILL cr); 5454 5455 ins_cost(250); 5456 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5457 "XOR $dst.hi,$dst.hi" %} 5458 5459 ins_encode %{ 5460 __ movzwl($dst$$Register, $mem$$Address); 5461 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5462 %} 5463 5464 ins_pipe(ialu_reg_mem); 5465 %} 5466 5467 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5468 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5469 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5470 effect(KILL cr); 5471 5472 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5473 "XOR $dst.hi,$dst.hi" %} 5474 ins_encode %{ 5475 Register Rdst = $dst$$Register; 5476 __ movzbl(Rdst, $mem$$Address); 5477 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5478 %} 5479 ins_pipe(ialu_reg_mem); 5480 %} 5481 5482 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5483 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5484 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5485 effect(KILL cr); 5486 5487 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5488 "XOR $dst.hi,$dst.hi\n\t" 5489 "AND $dst.lo,right_n_bits($mask, 16)" %} 5490 ins_encode %{ 5491 Register Rdst = $dst$$Register; 5492 __ movzwl(Rdst, $mem$$Address); 5493 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5494 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5495 %} 5496 ins_pipe(ialu_reg_mem); 5497 %} 5498 5499 // Load Integer 5500 instruct loadI(rRegI dst, memory mem) %{ 5501 match(Set dst (LoadI mem)); 5502 5503 ins_cost(125); 5504 format %{ "MOV $dst,$mem\t# int" %} 5505 5506 ins_encode %{ 5507 __ movl($dst$$Register, $mem$$Address); 5508 %} 5509 5510 ins_pipe(ialu_reg_mem); 5511 %} 5512 5513 // Load Integer (32 bit signed) to Byte (8 bit signed) 5514 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5515 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5516 5517 ins_cost(125); 5518 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5519 ins_encode %{ 5520 __ movsbl($dst$$Register, $mem$$Address); 5521 %} 5522 ins_pipe(ialu_reg_mem); 5523 %} 5524 5525 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5526 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5527 match(Set dst (AndI (LoadI mem) mask)); 5528 5529 ins_cost(125); 5530 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5531 ins_encode %{ 5532 __ movzbl($dst$$Register, $mem$$Address); 5533 %} 5534 ins_pipe(ialu_reg_mem); 5535 %} 5536 5537 // Load Integer (32 bit signed) to Short (16 bit signed) 5538 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5539 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5540 5541 ins_cost(125); 5542 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5543 ins_encode %{ 5544 __ movswl($dst$$Register, $mem$$Address); 5545 %} 5546 ins_pipe(ialu_reg_mem); 5547 %} 5548 5549 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5550 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5551 match(Set dst (AndI (LoadI mem) mask)); 5552 5553 ins_cost(125); 5554 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5555 ins_encode %{ 5556 __ movzwl($dst$$Register, $mem$$Address); 5557 %} 5558 ins_pipe(ialu_reg_mem); 5559 %} 5560 5561 // Load Integer into Long Register 5562 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5563 match(Set dst (ConvI2L (LoadI mem))); 5564 effect(KILL cr); 5565 5566 ins_cost(375); 5567 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5568 "MOV $dst.hi,$dst.lo\n\t" 5569 "SAR $dst.hi,31" %} 5570 5571 ins_encode %{ 5572 __ movl($dst$$Register, $mem$$Address); 5573 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5574 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5575 %} 5576 5577 ins_pipe(ialu_reg_mem); 5578 %} 5579 5580 // Load Integer with mask 0xFF into Long Register 5581 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5582 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5583 effect(KILL cr); 5584 5585 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5586 "XOR $dst.hi,$dst.hi" %} 5587 ins_encode %{ 5588 Register Rdst = $dst$$Register; 5589 __ movzbl(Rdst, $mem$$Address); 5590 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5591 %} 5592 ins_pipe(ialu_reg_mem); 5593 %} 5594 5595 // Load Integer with mask 0xFFFF into Long Register 5596 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5597 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5598 effect(KILL cr); 5599 5600 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5601 "XOR $dst.hi,$dst.hi" %} 5602 ins_encode %{ 5603 Register Rdst = $dst$$Register; 5604 __ movzwl(Rdst, $mem$$Address); 5605 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5606 %} 5607 ins_pipe(ialu_reg_mem); 5608 %} 5609 5610 // Load Integer with 31-bit mask into Long Register 5611 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5612 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5613 effect(KILL cr); 5614 5615 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5616 "XOR $dst.hi,$dst.hi\n\t" 5617 "AND $dst.lo,$mask" %} 5618 ins_encode %{ 5619 Register Rdst = $dst$$Register; 5620 __ movl(Rdst, $mem$$Address); 5621 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5622 __ andl(Rdst, $mask$$constant); 5623 %} 5624 ins_pipe(ialu_reg_mem); 5625 %} 5626 5627 // Load Unsigned Integer into Long Register 5628 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5629 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5630 effect(KILL cr); 5631 5632 ins_cost(250); 5633 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5634 "XOR $dst.hi,$dst.hi" %} 5635 5636 ins_encode %{ 5637 __ movl($dst$$Register, $mem$$Address); 5638 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5639 %} 5640 5641 ins_pipe(ialu_reg_mem); 5642 %} 5643 5644 // Load Long. Cannot clobber address while loading, so restrict address 5645 // register to ESI 5646 instruct loadL(eRegL dst, load_long_memory mem) %{ 5647 predicate(!((LoadLNode*)n)->require_atomic_access()); 5648 match(Set dst (LoadL mem)); 5649 5650 ins_cost(250); 5651 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5652 "MOV $dst.hi,$mem+4" %} 5653 5654 ins_encode %{ 5655 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5656 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5657 __ movl($dst$$Register, Amemlo); 5658 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5659 %} 5660 5661 ins_pipe(ialu_reg_long_mem); 5662 %} 5663 5664 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5665 // then store it down to the stack and reload on the int 5666 // side. 5667 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5668 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5669 match(Set dst (LoadL mem)); 5670 5671 ins_cost(200); 5672 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5673 "FISTp $dst" %} 5674 ins_encode(enc_loadL_volatile(mem,dst)); 5675 ins_pipe( fpu_reg_mem ); 5676 %} 5677 5678 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5679 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5680 match(Set dst (LoadL mem)); 5681 effect(TEMP tmp); 5682 ins_cost(180); 5683 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5684 "MOVSD $dst,$tmp" %} 5685 ins_encode %{ 5686 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5687 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5688 %} 5689 ins_pipe( pipe_slow ); 5690 %} 5691 5692 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5693 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5694 match(Set dst (LoadL mem)); 5695 effect(TEMP tmp); 5696 ins_cost(160); 5697 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5698 "MOVD $dst.lo,$tmp\n\t" 5699 "PSRLQ $tmp,32\n\t" 5700 "MOVD $dst.hi,$tmp" %} 5701 ins_encode %{ 5702 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5703 __ movdl($dst$$Register, $tmp$$XMMRegister); 5704 __ psrlq($tmp$$XMMRegister, 32); 5705 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5706 %} 5707 ins_pipe( pipe_slow ); 5708 %} 5709 5710 // Load Range 5711 instruct loadRange(rRegI dst, memory mem) %{ 5712 match(Set dst (LoadRange mem)); 5713 5714 ins_cost(125); 5715 format %{ "MOV $dst,$mem" %} 5716 opcode(0x8B); 5717 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5718 ins_pipe( ialu_reg_mem ); 5719 %} 5720 5721 5722 // Load Pointer 5723 instruct loadP(eRegP dst, memory mem) %{ 5724 match(Set dst (LoadP mem)); 5725 5726 ins_cost(125); 5727 format %{ "MOV $dst,$mem" %} 5728 opcode(0x8B); 5729 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5730 ins_pipe( ialu_reg_mem ); 5731 %} 5732 5733 // Load Klass Pointer 5734 instruct loadKlass(eRegP dst, memory mem) %{ 5735 match(Set dst (LoadKlass mem)); 5736 5737 ins_cost(125); 5738 format %{ "MOV $dst,$mem" %} 5739 opcode(0x8B); 5740 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5741 ins_pipe( ialu_reg_mem ); 5742 %} 5743 5744 // Load Double 5745 instruct loadDPR(regDPR dst, memory mem) %{ 5746 predicate(UseSSE<=1); 5747 match(Set dst (LoadD mem)); 5748 5749 ins_cost(150); 5750 format %{ "FLD_D ST,$mem\n\t" 5751 "FSTP $dst" %} 5752 opcode(0xDD); /* DD /0 */ 5753 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 5754 Pop_Reg_DPR(dst), ClearInstMark ); 5755 ins_pipe( fpu_reg_mem ); 5756 %} 5757 5758 // Load Double to XMM 5759 instruct loadD(regD dst, memory mem) %{ 5760 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5761 match(Set dst (LoadD mem)); 5762 ins_cost(145); 5763 format %{ "MOVSD $dst,$mem" %} 5764 ins_encode %{ 5765 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5766 %} 5767 ins_pipe( pipe_slow ); 5768 %} 5769 5770 instruct loadD_partial(regD dst, memory mem) %{ 5771 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5772 match(Set dst (LoadD mem)); 5773 ins_cost(145); 5774 format %{ "MOVLPD $dst,$mem" %} 5775 ins_encode %{ 5776 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 // Load to XMM register (single-precision floating point) 5782 // MOVSS instruction 5783 instruct loadF(regF dst, memory mem) %{ 5784 predicate(UseSSE>=1); 5785 match(Set dst (LoadF mem)); 5786 ins_cost(145); 5787 format %{ "MOVSS $dst,$mem" %} 5788 ins_encode %{ 5789 __ movflt ($dst$$XMMRegister, $mem$$Address); 5790 %} 5791 ins_pipe( pipe_slow ); 5792 %} 5793 5794 // Load Float 5795 instruct loadFPR(regFPR dst, memory mem) %{ 5796 predicate(UseSSE==0); 5797 match(Set dst (LoadF mem)); 5798 5799 ins_cost(150); 5800 format %{ "FLD_S ST,$mem\n\t" 5801 "FSTP $dst" %} 5802 opcode(0xD9); /* D9 /0 */ 5803 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 5804 Pop_Reg_FPR(dst), ClearInstMark ); 5805 ins_pipe( fpu_reg_mem ); 5806 %} 5807 5808 // Load Effective Address 5809 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5810 match(Set dst mem); 5811 5812 ins_cost(110); 5813 format %{ "LEA $dst,$mem" %} 5814 opcode(0x8D); 5815 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5816 ins_pipe( ialu_reg_reg_fat ); 5817 %} 5818 5819 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5820 match(Set dst mem); 5821 5822 ins_cost(110); 5823 format %{ "LEA $dst,$mem" %} 5824 opcode(0x8D); 5825 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5826 ins_pipe( ialu_reg_reg_fat ); 5827 %} 5828 5829 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5830 match(Set dst mem); 5831 5832 ins_cost(110); 5833 format %{ "LEA $dst,$mem" %} 5834 opcode(0x8D); 5835 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5836 ins_pipe( ialu_reg_reg_fat ); 5837 %} 5838 5839 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5840 match(Set dst mem); 5841 5842 ins_cost(110); 5843 format %{ "LEA $dst,$mem" %} 5844 opcode(0x8D); 5845 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5846 ins_pipe( ialu_reg_reg_fat ); 5847 %} 5848 5849 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5850 match(Set dst mem); 5851 5852 ins_cost(110); 5853 format %{ "LEA $dst,$mem" %} 5854 opcode(0x8D); 5855 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5856 ins_pipe( ialu_reg_reg_fat ); 5857 %} 5858 5859 // Load Constant 5860 instruct loadConI(rRegI dst, immI src) %{ 5861 match(Set dst src); 5862 5863 format %{ "MOV $dst,$src" %} 5864 ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark ); 5865 ins_pipe( ialu_reg_fat ); 5866 %} 5867 5868 // Load Constant zero 5869 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5870 match(Set dst src); 5871 effect(KILL cr); 5872 5873 ins_cost(50); 5874 format %{ "XOR $dst,$dst" %} 5875 opcode(0x33); /* + rd */ 5876 ins_encode( OpcP, RegReg( dst, dst ) ); 5877 ins_pipe( ialu_reg ); 5878 %} 5879 5880 instruct loadConP(eRegP dst, immP src) %{ 5881 match(Set dst src); 5882 5883 format %{ "MOV $dst,$src" %} 5884 opcode(0xB8); /* + rd */ 5885 ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark ); 5886 ins_pipe( ialu_reg_fat ); 5887 %} 5888 5889 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5890 match(Set dst src); 5891 effect(KILL cr); 5892 ins_cost(200); 5893 format %{ "MOV $dst.lo,$src.lo\n\t" 5894 "MOV $dst.hi,$src.hi" %} 5895 opcode(0xB8); 5896 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5897 ins_pipe( ialu_reg_long_fat ); 5898 %} 5899 5900 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5901 match(Set dst src); 5902 effect(KILL cr); 5903 ins_cost(150); 5904 format %{ "XOR $dst.lo,$dst.lo\n\t" 5905 "XOR $dst.hi,$dst.hi" %} 5906 opcode(0x33,0x33); 5907 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5908 ins_pipe( ialu_reg_long ); 5909 %} 5910 5911 // The instruction usage is guarded by predicate in operand immFPR(). 5912 instruct loadConFPR(regFPR dst, immFPR con) %{ 5913 match(Set dst con); 5914 ins_cost(125); 5915 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5916 "FSTP $dst" %} 5917 ins_encode %{ 5918 __ fld_s($constantaddress($con)); 5919 __ fstp_d($dst$$reg); 5920 %} 5921 ins_pipe(fpu_reg_con); 5922 %} 5923 5924 // The instruction usage is guarded by predicate in operand immFPR0(). 5925 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5926 match(Set dst con); 5927 ins_cost(125); 5928 format %{ "FLDZ ST\n\t" 5929 "FSTP $dst" %} 5930 ins_encode %{ 5931 __ fldz(); 5932 __ fstp_d($dst$$reg); 5933 %} 5934 ins_pipe(fpu_reg_con); 5935 %} 5936 5937 // The instruction usage is guarded by predicate in operand immFPR1(). 5938 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 5939 match(Set dst con); 5940 ins_cost(125); 5941 format %{ "FLD1 ST\n\t" 5942 "FSTP $dst" %} 5943 ins_encode %{ 5944 __ fld1(); 5945 __ fstp_d($dst$$reg); 5946 %} 5947 ins_pipe(fpu_reg_con); 5948 %} 5949 5950 // The instruction usage is guarded by predicate in operand immF(). 5951 instruct loadConF(regF dst, immF con) %{ 5952 match(Set dst con); 5953 ins_cost(125); 5954 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 5955 ins_encode %{ 5956 __ movflt($dst$$XMMRegister, $constantaddress($con)); 5957 %} 5958 ins_pipe(pipe_slow); 5959 %} 5960 5961 // The instruction usage is guarded by predicate in operand immF0(). 5962 instruct loadConF0(regF dst, immF0 src) %{ 5963 match(Set dst src); 5964 ins_cost(100); 5965 format %{ "XORPS $dst,$dst\t# float 0.0" %} 5966 ins_encode %{ 5967 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 5968 %} 5969 ins_pipe(pipe_slow); 5970 %} 5971 5972 // The instruction usage is guarded by predicate in operand immDPR(). 5973 instruct loadConDPR(regDPR dst, immDPR con) %{ 5974 match(Set dst con); 5975 ins_cost(125); 5976 5977 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 5978 "FSTP $dst" %} 5979 ins_encode %{ 5980 __ fld_d($constantaddress($con)); 5981 __ fstp_d($dst$$reg); 5982 %} 5983 ins_pipe(fpu_reg_con); 5984 %} 5985 5986 // The instruction usage is guarded by predicate in operand immDPR0(). 5987 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 5988 match(Set dst con); 5989 ins_cost(125); 5990 5991 format %{ "FLDZ ST\n\t" 5992 "FSTP $dst" %} 5993 ins_encode %{ 5994 __ fldz(); 5995 __ fstp_d($dst$$reg); 5996 %} 5997 ins_pipe(fpu_reg_con); 5998 %} 5999 6000 // The instruction usage is guarded by predicate in operand immDPR1(). 6001 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6002 match(Set dst con); 6003 ins_cost(125); 6004 6005 format %{ "FLD1 ST\n\t" 6006 "FSTP $dst" %} 6007 ins_encode %{ 6008 __ fld1(); 6009 __ fstp_d($dst$$reg); 6010 %} 6011 ins_pipe(fpu_reg_con); 6012 %} 6013 6014 // The instruction usage is guarded by predicate in operand immD(). 6015 instruct loadConD(regD dst, immD con) %{ 6016 match(Set dst con); 6017 ins_cost(125); 6018 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6019 ins_encode %{ 6020 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6021 %} 6022 ins_pipe(pipe_slow); 6023 %} 6024 6025 // The instruction usage is guarded by predicate in operand immD0(). 6026 instruct loadConD0(regD dst, immD0 src) %{ 6027 match(Set dst src); 6028 ins_cost(100); 6029 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6030 ins_encode %{ 6031 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6032 %} 6033 ins_pipe( pipe_slow ); 6034 %} 6035 6036 // Load Stack Slot 6037 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6038 match(Set dst src); 6039 ins_cost(125); 6040 6041 format %{ "MOV $dst,$src" %} 6042 opcode(0x8B); 6043 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); 6044 ins_pipe( ialu_reg_mem ); 6045 %} 6046 6047 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6048 match(Set dst src); 6049 6050 ins_cost(200); 6051 format %{ "MOV $dst,$src.lo\n\t" 6052 "MOV $dst+4,$src.hi" %} 6053 opcode(0x8B, 0x8B); 6054 ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark ); 6055 ins_pipe( ialu_mem_long_reg ); 6056 %} 6057 6058 // Load Stack Slot 6059 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6060 match(Set dst src); 6061 ins_cost(125); 6062 6063 format %{ "MOV $dst,$src" %} 6064 opcode(0x8B); 6065 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); 6066 ins_pipe( ialu_reg_mem ); 6067 %} 6068 6069 // Load Stack Slot 6070 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6071 match(Set dst src); 6072 ins_cost(125); 6073 6074 format %{ "FLD_S $src\n\t" 6075 "FSTP $dst" %} 6076 opcode(0xD9); /* D9 /0, FLD m32real */ 6077 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 6078 Pop_Reg_FPR(dst), ClearInstMark ); 6079 ins_pipe( fpu_reg_mem ); 6080 %} 6081 6082 // Load Stack Slot 6083 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6084 match(Set dst src); 6085 ins_cost(125); 6086 6087 format %{ "FLD_D $src\n\t" 6088 "FSTP $dst" %} 6089 opcode(0xDD); /* DD /0, FLD m64real */ 6090 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 6091 Pop_Reg_DPR(dst), ClearInstMark ); 6092 ins_pipe( fpu_reg_mem ); 6093 %} 6094 6095 // Prefetch instructions for allocation. 6096 // Must be safe to execute with invalid address (cannot fault). 6097 6098 instruct prefetchAlloc0( memory mem ) %{ 6099 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6100 match(PrefetchAllocation mem); 6101 ins_cost(0); 6102 size(0); 6103 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6104 ins_encode(); 6105 ins_pipe(empty); 6106 %} 6107 6108 instruct prefetchAlloc( memory mem ) %{ 6109 predicate(AllocatePrefetchInstr==3); 6110 match( PrefetchAllocation mem ); 6111 ins_cost(100); 6112 6113 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6114 ins_encode %{ 6115 __ prefetchw($mem$$Address); 6116 %} 6117 ins_pipe(ialu_mem); 6118 %} 6119 6120 instruct prefetchAllocNTA( memory mem ) %{ 6121 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6122 match(PrefetchAllocation mem); 6123 ins_cost(100); 6124 6125 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6126 ins_encode %{ 6127 __ prefetchnta($mem$$Address); 6128 %} 6129 ins_pipe(ialu_mem); 6130 %} 6131 6132 instruct prefetchAllocT0( memory mem ) %{ 6133 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6134 match(PrefetchAllocation mem); 6135 ins_cost(100); 6136 6137 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6138 ins_encode %{ 6139 __ prefetcht0($mem$$Address); 6140 %} 6141 ins_pipe(ialu_mem); 6142 %} 6143 6144 instruct prefetchAllocT2( memory mem ) %{ 6145 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6146 match(PrefetchAllocation mem); 6147 ins_cost(100); 6148 6149 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6150 ins_encode %{ 6151 __ prefetcht2($mem$$Address); 6152 %} 6153 ins_pipe(ialu_mem); 6154 %} 6155 6156 //----------Store Instructions------------------------------------------------- 6157 6158 // Store Byte 6159 instruct storeB(memory mem, xRegI src) %{ 6160 match(Set mem (StoreB mem src)); 6161 6162 ins_cost(125); 6163 format %{ "MOV8 $mem,$src" %} 6164 opcode(0x88); 6165 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6166 ins_pipe( ialu_mem_reg ); 6167 %} 6168 6169 // Store Char/Short 6170 instruct storeC(memory mem, rRegI src) %{ 6171 match(Set mem (StoreC mem src)); 6172 6173 ins_cost(125); 6174 format %{ "MOV16 $mem,$src" %} 6175 opcode(0x89, 0x66); 6176 ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark ); 6177 ins_pipe( ialu_mem_reg ); 6178 %} 6179 6180 // Store Integer 6181 instruct storeI(memory mem, rRegI src) %{ 6182 match(Set mem (StoreI mem src)); 6183 6184 ins_cost(125); 6185 format %{ "MOV $mem,$src" %} 6186 opcode(0x89); 6187 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6188 ins_pipe( ialu_mem_reg ); 6189 %} 6190 6191 // Store Long 6192 instruct storeL(long_memory mem, eRegL src) %{ 6193 predicate(!((StoreLNode*)n)->require_atomic_access()); 6194 match(Set mem (StoreL mem src)); 6195 6196 ins_cost(200); 6197 format %{ "MOV $mem,$src.lo\n\t" 6198 "MOV $mem+4,$src.hi" %} 6199 opcode(0x89, 0x89); 6200 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark ); 6201 ins_pipe( ialu_mem_long_reg ); 6202 %} 6203 6204 // Store Long to Integer 6205 instruct storeL2I(memory mem, eRegL src) %{ 6206 match(Set mem (StoreI mem (ConvL2I src))); 6207 6208 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6209 ins_encode %{ 6210 __ movl($mem$$Address, $src$$Register); 6211 %} 6212 ins_pipe(ialu_mem_reg); 6213 %} 6214 6215 // Volatile Store Long. Must be atomic, so move it into 6216 // the FP TOS and then do a 64-bit FIST. Has to probe the 6217 // target address before the store (for null-ptr checks) 6218 // so the memory operand is used twice in the encoding. 6219 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6220 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6221 match(Set mem (StoreL mem src)); 6222 effect( KILL cr ); 6223 ins_cost(400); 6224 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6225 "FILD $src\n\t" 6226 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6227 opcode(0x3B); 6228 ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark); 6229 ins_pipe( fpu_reg_mem ); 6230 %} 6231 6232 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6233 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6234 match(Set mem (StoreL mem src)); 6235 effect( TEMP tmp, KILL cr ); 6236 ins_cost(380); 6237 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6238 "MOVSD $tmp,$src\n\t" 6239 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6240 ins_encode %{ 6241 __ cmpl(rax, $mem$$Address); 6242 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6243 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6244 %} 6245 ins_pipe( pipe_slow ); 6246 %} 6247 6248 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6249 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6250 match(Set mem (StoreL mem src)); 6251 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6252 ins_cost(360); 6253 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6254 "MOVD $tmp,$src.lo\n\t" 6255 "MOVD $tmp2,$src.hi\n\t" 6256 "PUNPCKLDQ $tmp,$tmp2\n\t" 6257 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6258 ins_encode %{ 6259 __ cmpl(rax, $mem$$Address); 6260 __ movdl($tmp$$XMMRegister, $src$$Register); 6261 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6262 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6263 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6264 %} 6265 ins_pipe( pipe_slow ); 6266 %} 6267 6268 // Store Pointer; for storing unknown oops and raw pointers 6269 instruct storeP(memory mem, anyRegP src) %{ 6270 match(Set mem (StoreP mem src)); 6271 6272 ins_cost(125); 6273 format %{ "MOV $mem,$src" %} 6274 opcode(0x89); 6275 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6276 ins_pipe( ialu_mem_reg ); 6277 %} 6278 6279 // Store Integer Immediate 6280 instruct storeImmI(memory mem, immI src) %{ 6281 match(Set mem (StoreI mem src)); 6282 6283 ins_cost(150); 6284 format %{ "MOV $mem,$src" %} 6285 opcode(0xC7); /* C7 /0 */ 6286 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark); 6287 ins_pipe( ialu_mem_imm ); 6288 %} 6289 6290 // Store Short/Char Immediate 6291 instruct storeImmI16(memory mem, immI16 src) %{ 6292 predicate(UseStoreImmI16); 6293 match(Set mem (StoreC mem src)); 6294 6295 ins_cost(150); 6296 format %{ "MOV16 $mem,$src" %} 6297 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6298 ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark); 6299 ins_pipe( ialu_mem_imm ); 6300 %} 6301 6302 // Store Pointer Immediate; null pointers or constant oops that do not 6303 // need card-mark barriers. 6304 instruct storeImmP(memory mem, immP src) %{ 6305 match(Set mem (StoreP mem src)); 6306 6307 ins_cost(150); 6308 format %{ "MOV $mem,$src" %} 6309 opcode(0xC7); /* C7 /0 */ 6310 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark); 6311 ins_pipe( ialu_mem_imm ); 6312 %} 6313 6314 // Store Byte Immediate 6315 instruct storeImmB(memory mem, immI8 src) %{ 6316 match(Set mem (StoreB mem src)); 6317 6318 ins_cost(150); 6319 format %{ "MOV8 $mem,$src" %} 6320 opcode(0xC6); /* C6 /0 */ 6321 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark); 6322 ins_pipe( ialu_mem_imm ); 6323 %} 6324 6325 // Store Double 6326 instruct storeDPR( memory mem, regDPR1 src) %{ 6327 predicate(UseSSE<=1); 6328 match(Set mem (StoreD mem src)); 6329 6330 ins_cost(100); 6331 format %{ "FST_D $mem,$src" %} 6332 opcode(0xDD); /* DD /2 */ 6333 ins_encode( enc_FPR_store(mem,src) ); 6334 ins_pipe( fpu_mem_reg ); 6335 %} 6336 6337 // Store double does rounding on x86 6338 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6339 predicate(UseSSE<=1); 6340 match(Set mem (StoreD mem (RoundDouble src))); 6341 6342 ins_cost(100); 6343 format %{ "FST_D $mem,$src\t# round" %} 6344 opcode(0xDD); /* DD /2 */ 6345 ins_encode( enc_FPR_store(mem,src) ); 6346 ins_pipe( fpu_mem_reg ); 6347 %} 6348 6349 // Store XMM register to memory (double-precision floating points) 6350 // MOVSD instruction 6351 instruct storeD(memory mem, regD src) %{ 6352 predicate(UseSSE>=2); 6353 match(Set mem (StoreD mem src)); 6354 ins_cost(95); 6355 format %{ "MOVSD $mem,$src" %} 6356 ins_encode %{ 6357 __ movdbl($mem$$Address, $src$$XMMRegister); 6358 %} 6359 ins_pipe( pipe_slow ); 6360 %} 6361 6362 // Store XMM register to memory (single-precision floating point) 6363 // MOVSS instruction 6364 instruct storeF(memory mem, regF src) %{ 6365 predicate(UseSSE>=1); 6366 match(Set mem (StoreF mem src)); 6367 ins_cost(95); 6368 format %{ "MOVSS $mem,$src" %} 6369 ins_encode %{ 6370 __ movflt($mem$$Address, $src$$XMMRegister); 6371 %} 6372 ins_pipe( pipe_slow ); 6373 %} 6374 6375 6376 // Store Float 6377 instruct storeFPR( memory mem, regFPR1 src) %{ 6378 predicate(UseSSE==0); 6379 match(Set mem (StoreF mem src)); 6380 6381 ins_cost(100); 6382 format %{ "FST_S $mem,$src" %} 6383 opcode(0xD9); /* D9 /2 */ 6384 ins_encode( enc_FPR_store(mem,src) ); 6385 ins_pipe( fpu_mem_reg ); 6386 %} 6387 6388 // Store Float does rounding on x86 6389 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6390 predicate(UseSSE==0); 6391 match(Set mem (StoreF mem (RoundFloat src))); 6392 6393 ins_cost(100); 6394 format %{ "FST_S $mem,$src\t# round" %} 6395 opcode(0xD9); /* D9 /2 */ 6396 ins_encode( enc_FPR_store(mem,src) ); 6397 ins_pipe( fpu_mem_reg ); 6398 %} 6399 6400 // Store Float does rounding on x86 6401 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6402 predicate(UseSSE<=1); 6403 match(Set mem (StoreF mem (ConvD2F src))); 6404 6405 ins_cost(100); 6406 format %{ "FST_S $mem,$src\t# D-round" %} 6407 opcode(0xD9); /* D9 /2 */ 6408 ins_encode( enc_FPR_store(mem,src) ); 6409 ins_pipe( fpu_mem_reg ); 6410 %} 6411 6412 // Store immediate Float value (it is faster than store from FPU register) 6413 // The instruction usage is guarded by predicate in operand immFPR(). 6414 instruct storeFPR_imm( memory mem, immFPR src) %{ 6415 match(Set mem (StoreF mem src)); 6416 6417 ins_cost(50); 6418 format %{ "MOV $mem,$src\t# store float" %} 6419 opcode(0xC7); /* C7 /0 */ 6420 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits(src), ClearInstMark); 6421 ins_pipe( ialu_mem_imm ); 6422 %} 6423 6424 // Store immediate Float value (it is faster than store from XMM register) 6425 // The instruction usage is guarded by predicate in operand immF(). 6426 instruct storeF_imm( memory mem, immF src) %{ 6427 match(Set mem (StoreF mem src)); 6428 6429 ins_cost(50); 6430 format %{ "MOV $mem,$src\t# store float" %} 6431 opcode(0xC7); /* C7 /0 */ 6432 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits(src), ClearInstMark); 6433 ins_pipe( ialu_mem_imm ); 6434 %} 6435 6436 // Store Integer to stack slot 6437 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6438 match(Set dst src); 6439 6440 ins_cost(100); 6441 format %{ "MOV $dst,$src" %} 6442 opcode(0x89); 6443 ins_encode( OpcPRegSS( dst, src ) ); 6444 ins_pipe( ialu_mem_reg ); 6445 %} 6446 6447 // Store Integer to stack slot 6448 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6449 match(Set dst src); 6450 6451 ins_cost(100); 6452 format %{ "MOV $dst,$src" %} 6453 opcode(0x89); 6454 ins_encode( OpcPRegSS( dst, src ) ); 6455 ins_pipe( ialu_mem_reg ); 6456 %} 6457 6458 // Store Long to stack slot 6459 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6460 match(Set dst src); 6461 6462 ins_cost(200); 6463 format %{ "MOV $dst,$src.lo\n\t" 6464 "MOV $dst+4,$src.hi" %} 6465 opcode(0x89, 0x89); 6466 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); 6467 ins_pipe( ialu_mem_long_reg ); 6468 %} 6469 6470 //----------MemBar Instructions----------------------------------------------- 6471 // Memory barrier flavors 6472 6473 instruct membar_acquire() %{ 6474 match(MemBarAcquire); 6475 match(LoadFence); 6476 ins_cost(400); 6477 6478 size(0); 6479 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6480 ins_encode(); 6481 ins_pipe(empty); 6482 %} 6483 6484 instruct membar_acquire_lock() %{ 6485 match(MemBarAcquireLock); 6486 ins_cost(0); 6487 6488 size(0); 6489 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6490 ins_encode( ); 6491 ins_pipe(empty); 6492 %} 6493 6494 instruct membar_release() %{ 6495 match(MemBarRelease); 6496 match(StoreFence); 6497 ins_cost(400); 6498 6499 size(0); 6500 format %{ "MEMBAR-release ! (empty encoding)" %} 6501 ins_encode( ); 6502 ins_pipe(empty); 6503 %} 6504 6505 instruct membar_release_lock() %{ 6506 match(MemBarReleaseLock); 6507 ins_cost(0); 6508 6509 size(0); 6510 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6511 ins_encode( ); 6512 ins_pipe(empty); 6513 %} 6514 6515 instruct membar_volatile(eFlagsReg cr) %{ 6516 match(MemBarVolatile); 6517 effect(KILL cr); 6518 ins_cost(400); 6519 6520 format %{ 6521 $$template 6522 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6523 %} 6524 ins_encode %{ 6525 __ membar(Assembler::StoreLoad); 6526 %} 6527 ins_pipe(pipe_slow); 6528 %} 6529 6530 instruct unnecessary_membar_volatile() %{ 6531 match(MemBarVolatile); 6532 predicate(Matcher::post_store_load_barrier(n)); 6533 ins_cost(0); 6534 6535 size(0); 6536 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6537 ins_encode( ); 6538 ins_pipe(empty); 6539 %} 6540 6541 instruct membar_storestore() %{ 6542 match(MemBarStoreStore); 6543 match(StoreStoreFence); 6544 ins_cost(0); 6545 6546 size(0); 6547 format %{ "MEMBAR-storestore (empty encoding)" %} 6548 ins_encode( ); 6549 ins_pipe(empty); 6550 %} 6551 6552 //----------Move Instructions-------------------------------------------------- 6553 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6554 match(Set dst (CastX2P src)); 6555 format %{ "# X2P $dst, $src" %} 6556 ins_encode( /*empty encoding*/ ); 6557 ins_cost(0); 6558 ins_pipe(empty); 6559 %} 6560 6561 instruct castP2X(rRegI dst, eRegP src ) %{ 6562 match(Set dst (CastP2X src)); 6563 ins_cost(50); 6564 format %{ "MOV $dst, $src\t# CastP2X" %} 6565 ins_encode( enc_Copy( dst, src) ); 6566 ins_pipe( ialu_reg_reg ); 6567 %} 6568 6569 //----------Conditional Move--------------------------------------------------- 6570 // Conditional move 6571 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6572 predicate(!VM_Version::supports_cmov() ); 6573 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6574 ins_cost(200); 6575 format %{ "J$cop,us skip\t# signed cmove\n\t" 6576 "MOV $dst,$src\n" 6577 "skip:" %} 6578 ins_encode %{ 6579 Label Lskip; 6580 // Invert sense of branch from sense of CMOV 6581 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6582 __ movl($dst$$Register, $src$$Register); 6583 __ bind(Lskip); 6584 %} 6585 ins_pipe( pipe_cmov_reg ); 6586 %} 6587 6588 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6589 predicate(!VM_Version::supports_cmov() ); 6590 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6591 ins_cost(200); 6592 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6593 "MOV $dst,$src\n" 6594 "skip:" %} 6595 ins_encode %{ 6596 Label Lskip; 6597 // Invert sense of branch from sense of CMOV 6598 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6599 __ movl($dst$$Register, $src$$Register); 6600 __ bind(Lskip); 6601 %} 6602 ins_pipe( pipe_cmov_reg ); 6603 %} 6604 6605 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6606 predicate(VM_Version::supports_cmov() ); 6607 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6608 ins_cost(200); 6609 format %{ "CMOV$cop $dst,$src" %} 6610 opcode(0x0F,0x40); 6611 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6612 ins_pipe( pipe_cmov_reg ); 6613 %} 6614 6615 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6616 predicate(VM_Version::supports_cmov() ); 6617 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6618 ins_cost(200); 6619 format %{ "CMOV$cop $dst,$src" %} 6620 opcode(0x0F,0x40); 6621 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6622 ins_pipe( pipe_cmov_reg ); 6623 %} 6624 6625 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6626 predicate(VM_Version::supports_cmov() ); 6627 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6628 ins_cost(200); 6629 expand %{ 6630 cmovI_regU(cop, cr, dst, src); 6631 %} 6632 %} 6633 6634 // Conditional move 6635 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6636 predicate(VM_Version::supports_cmov() ); 6637 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6638 ins_cost(250); 6639 format %{ "CMOV$cop $dst,$src" %} 6640 opcode(0x0F,0x40); 6641 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); 6642 ins_pipe( pipe_cmov_mem ); 6643 %} 6644 6645 // Conditional move 6646 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6647 predicate(VM_Version::supports_cmov() ); 6648 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6649 ins_cost(250); 6650 format %{ "CMOV$cop $dst,$src" %} 6651 opcode(0x0F,0x40); 6652 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); 6653 ins_pipe( pipe_cmov_mem ); 6654 %} 6655 6656 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6657 predicate(VM_Version::supports_cmov() ); 6658 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6659 ins_cost(250); 6660 expand %{ 6661 cmovI_memU(cop, cr, dst, src); 6662 %} 6663 %} 6664 6665 // Conditional move 6666 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6667 predicate(VM_Version::supports_cmov() ); 6668 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6669 ins_cost(200); 6670 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6671 opcode(0x0F,0x40); 6672 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6673 ins_pipe( pipe_cmov_reg ); 6674 %} 6675 6676 // Conditional move (non-P6 version) 6677 // Note: a CMoveP is generated for stubs and native wrappers 6678 // regardless of whether we are on a P6, so we 6679 // emulate a cmov here 6680 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6681 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6682 ins_cost(300); 6683 format %{ "Jn$cop skip\n\t" 6684 "MOV $dst,$src\t# pointer\n" 6685 "skip:" %} 6686 opcode(0x8b); 6687 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6688 ins_pipe( pipe_cmov_reg ); 6689 %} 6690 6691 // Conditional move 6692 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6693 predicate(VM_Version::supports_cmov() ); 6694 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6695 ins_cost(200); 6696 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6697 opcode(0x0F,0x40); 6698 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6699 ins_pipe( pipe_cmov_reg ); 6700 %} 6701 6702 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6703 predicate(VM_Version::supports_cmov() ); 6704 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6705 ins_cost(200); 6706 expand %{ 6707 cmovP_regU(cop, cr, dst, src); 6708 %} 6709 %} 6710 6711 // DISABLED: Requires the ADLC to emit a bottom_type call that 6712 // correctly meets the two pointer arguments; one is an incoming 6713 // register but the other is a memory operand. ALSO appears to 6714 // be buggy with implicit null checks. 6715 // 6716 //// Conditional move 6717 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6718 // predicate(VM_Version::supports_cmov() ); 6719 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6720 // ins_cost(250); 6721 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6722 // opcode(0x0F,0x40); 6723 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6724 // ins_pipe( pipe_cmov_mem ); 6725 //%} 6726 // 6727 //// Conditional move 6728 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6729 // predicate(VM_Version::supports_cmov() ); 6730 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6731 // ins_cost(250); 6732 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6733 // opcode(0x0F,0x40); 6734 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6735 // ins_pipe( pipe_cmov_mem ); 6736 //%} 6737 6738 // Conditional move 6739 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6740 predicate(UseSSE<=1); 6741 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6742 ins_cost(200); 6743 format %{ "FCMOV$cop $dst,$src\t# double" %} 6744 opcode(0xDA); 6745 ins_encode( enc_cmov_dpr(cop,src) ); 6746 ins_pipe( pipe_cmovDPR_reg ); 6747 %} 6748 6749 // Conditional move 6750 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6751 predicate(UseSSE==0); 6752 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6753 ins_cost(200); 6754 format %{ "FCMOV$cop $dst,$src\t# float" %} 6755 opcode(0xDA); 6756 ins_encode( enc_cmov_dpr(cop,src) ); 6757 ins_pipe( pipe_cmovDPR_reg ); 6758 %} 6759 6760 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6761 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6762 predicate(UseSSE<=1); 6763 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6764 ins_cost(200); 6765 format %{ "Jn$cop skip\n\t" 6766 "MOV $dst,$src\t# double\n" 6767 "skip:" %} 6768 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6769 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6770 ins_pipe( pipe_cmovDPR_reg ); 6771 %} 6772 6773 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6774 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6775 predicate(UseSSE==0); 6776 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6777 ins_cost(200); 6778 format %{ "Jn$cop skip\n\t" 6779 "MOV $dst,$src\t# float\n" 6780 "skip:" %} 6781 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6782 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6783 ins_pipe( pipe_cmovDPR_reg ); 6784 %} 6785 6786 // No CMOVE with SSE/SSE2 6787 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6788 predicate (UseSSE>=1); 6789 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6790 ins_cost(200); 6791 format %{ "Jn$cop skip\n\t" 6792 "MOVSS $dst,$src\t# float\n" 6793 "skip:" %} 6794 ins_encode %{ 6795 Label skip; 6796 // Invert sense of branch from sense of CMOV 6797 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6798 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6799 __ bind(skip); 6800 %} 6801 ins_pipe( pipe_slow ); 6802 %} 6803 6804 // No CMOVE with SSE/SSE2 6805 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6806 predicate (UseSSE>=2); 6807 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6808 ins_cost(200); 6809 format %{ "Jn$cop skip\n\t" 6810 "MOVSD $dst,$src\t# float\n" 6811 "skip:" %} 6812 ins_encode %{ 6813 Label skip; 6814 // Invert sense of branch from sense of CMOV 6815 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6816 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6817 __ bind(skip); 6818 %} 6819 ins_pipe( pipe_slow ); 6820 %} 6821 6822 // unsigned version 6823 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6824 predicate (UseSSE>=1); 6825 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6826 ins_cost(200); 6827 format %{ "Jn$cop skip\n\t" 6828 "MOVSS $dst,$src\t# float\n" 6829 "skip:" %} 6830 ins_encode %{ 6831 Label skip; 6832 // Invert sense of branch from sense of CMOV 6833 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6834 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6835 __ bind(skip); 6836 %} 6837 ins_pipe( pipe_slow ); 6838 %} 6839 6840 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6841 predicate (UseSSE>=1); 6842 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6843 ins_cost(200); 6844 expand %{ 6845 fcmovF_regU(cop, cr, dst, src); 6846 %} 6847 %} 6848 6849 // unsigned version 6850 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6851 predicate (UseSSE>=2); 6852 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6853 ins_cost(200); 6854 format %{ "Jn$cop skip\n\t" 6855 "MOVSD $dst,$src\t# float\n" 6856 "skip:" %} 6857 ins_encode %{ 6858 Label skip; 6859 // Invert sense of branch from sense of CMOV 6860 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6861 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6862 __ bind(skip); 6863 %} 6864 ins_pipe( pipe_slow ); 6865 %} 6866 6867 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6868 predicate (UseSSE>=2); 6869 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6870 ins_cost(200); 6871 expand %{ 6872 fcmovD_regU(cop, cr, dst, src); 6873 %} 6874 %} 6875 6876 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6877 predicate(VM_Version::supports_cmov() ); 6878 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6879 ins_cost(200); 6880 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6881 "CMOV$cop $dst.hi,$src.hi" %} 6882 opcode(0x0F,0x40); 6883 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6884 ins_pipe( pipe_cmov_reg_long ); 6885 %} 6886 6887 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6888 predicate(VM_Version::supports_cmov() ); 6889 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6890 ins_cost(200); 6891 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6892 "CMOV$cop $dst.hi,$src.hi" %} 6893 opcode(0x0F,0x40); 6894 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6895 ins_pipe( pipe_cmov_reg_long ); 6896 %} 6897 6898 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6899 predicate(VM_Version::supports_cmov() ); 6900 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6901 ins_cost(200); 6902 expand %{ 6903 cmovL_regU(cop, cr, dst, src); 6904 %} 6905 %} 6906 6907 //----------Arithmetic Instructions-------------------------------------------- 6908 //----------Addition Instructions---------------------------------------------- 6909 6910 // Integer Addition Instructions 6911 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 6912 match(Set dst (AddI dst src)); 6913 effect(KILL cr); 6914 6915 size(2); 6916 format %{ "ADD $dst,$src" %} 6917 opcode(0x03); 6918 ins_encode( OpcP, RegReg( dst, src) ); 6919 ins_pipe( ialu_reg_reg ); 6920 %} 6921 6922 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 6923 match(Set dst (AddI dst src)); 6924 effect(KILL cr); 6925 6926 format %{ "ADD $dst,$src" %} 6927 opcode(0x81, 0x00); /* /0 id */ 6928 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 6929 ins_pipe( ialu_reg ); 6930 %} 6931 6932 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 6933 predicate(UseIncDec); 6934 match(Set dst (AddI dst src)); 6935 effect(KILL cr); 6936 6937 size(1); 6938 format %{ "INC $dst" %} 6939 opcode(0x40); /* */ 6940 ins_encode( Opc_plus( primary, dst ) ); 6941 ins_pipe( ialu_reg ); 6942 %} 6943 6944 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 6945 match(Set dst (AddI src0 src1)); 6946 ins_cost(110); 6947 6948 format %{ "LEA $dst,[$src0 + $src1]" %} 6949 opcode(0x8D); /* 0x8D /r */ 6950 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); 6951 ins_pipe( ialu_reg_reg ); 6952 %} 6953 6954 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 6955 match(Set dst (AddP src0 src1)); 6956 ins_cost(110); 6957 6958 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 6959 opcode(0x8D); /* 0x8D /r */ 6960 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); 6961 ins_pipe( ialu_reg_reg ); 6962 %} 6963 6964 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 6965 predicate(UseIncDec); 6966 match(Set dst (AddI dst src)); 6967 effect(KILL cr); 6968 6969 size(1); 6970 format %{ "DEC $dst" %} 6971 opcode(0x48); /* */ 6972 ins_encode( Opc_plus( primary, dst ) ); 6973 ins_pipe( ialu_reg ); 6974 %} 6975 6976 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 6977 match(Set dst (AddP dst src)); 6978 effect(KILL cr); 6979 6980 size(2); 6981 format %{ "ADD $dst,$src" %} 6982 opcode(0x03); 6983 ins_encode( OpcP, RegReg( dst, src) ); 6984 ins_pipe( ialu_reg_reg ); 6985 %} 6986 6987 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 6988 match(Set dst (AddP dst src)); 6989 effect(KILL cr); 6990 6991 format %{ "ADD $dst,$src" %} 6992 opcode(0x81,0x00); /* Opcode 81 /0 id */ 6993 // ins_encode( RegImm( dst, src) ); 6994 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 6995 ins_pipe( ialu_reg ); 6996 %} 6997 6998 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 6999 match(Set dst (AddI dst (LoadI src))); 7000 effect(KILL cr); 7001 7002 ins_cost(150); 7003 format %{ "ADD $dst,$src" %} 7004 opcode(0x03); 7005 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 7006 ins_pipe( ialu_reg_mem ); 7007 %} 7008 7009 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7010 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7011 effect(KILL cr); 7012 7013 ins_cost(150); 7014 format %{ "ADD $dst,$src" %} 7015 opcode(0x01); /* Opcode 01 /r */ 7016 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 7017 ins_pipe( ialu_mem_reg ); 7018 %} 7019 7020 // Add Memory with Immediate 7021 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7022 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7023 effect(KILL cr); 7024 7025 ins_cost(125); 7026 format %{ "ADD $dst,$src" %} 7027 opcode(0x81); /* Opcode 81 /0 id */ 7028 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark ); 7029 ins_pipe( ialu_mem_imm ); 7030 %} 7031 7032 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7033 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7034 effect(KILL cr); 7035 7036 ins_cost(125); 7037 format %{ "INC $dst" %} 7038 opcode(0xFF); /* Opcode FF /0 */ 7039 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark); 7040 ins_pipe( ialu_mem_imm ); 7041 %} 7042 7043 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7044 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7045 effect(KILL cr); 7046 7047 ins_cost(125); 7048 format %{ "DEC $dst" %} 7049 opcode(0xFF); /* Opcode FF /1 */ 7050 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark); 7051 ins_pipe( ialu_mem_imm ); 7052 %} 7053 7054 7055 instruct checkCastPP( eRegP dst ) %{ 7056 match(Set dst (CheckCastPP dst)); 7057 7058 size(0); 7059 format %{ "#checkcastPP of $dst" %} 7060 ins_encode( /*empty encoding*/ ); 7061 ins_pipe( empty ); 7062 %} 7063 7064 instruct castPP( eRegP dst ) %{ 7065 match(Set dst (CastPP dst)); 7066 format %{ "#castPP of $dst" %} 7067 ins_encode( /*empty encoding*/ ); 7068 ins_pipe( empty ); 7069 %} 7070 7071 instruct castII( rRegI dst ) %{ 7072 match(Set dst (CastII dst)); 7073 format %{ "#castII of $dst" %} 7074 ins_encode( /*empty encoding*/ ); 7075 ins_cost(0); 7076 ins_pipe( empty ); 7077 %} 7078 7079 instruct castLL( eRegL dst ) %{ 7080 match(Set dst (CastLL dst)); 7081 format %{ "#castLL of $dst" %} 7082 ins_encode( /*empty encoding*/ ); 7083 ins_cost(0); 7084 ins_pipe( empty ); 7085 %} 7086 7087 instruct castFF( regF dst ) %{ 7088 predicate(UseSSE >= 1); 7089 match(Set dst (CastFF dst)); 7090 format %{ "#castFF of $dst" %} 7091 ins_encode( /*empty encoding*/ ); 7092 ins_cost(0); 7093 ins_pipe( empty ); 7094 %} 7095 7096 instruct castDD( regD dst ) %{ 7097 predicate(UseSSE >= 2); 7098 match(Set dst (CastDD dst)); 7099 format %{ "#castDD of $dst" %} 7100 ins_encode( /*empty encoding*/ ); 7101 ins_cost(0); 7102 ins_pipe( empty ); 7103 %} 7104 7105 instruct castFF_PR( regFPR dst ) %{ 7106 predicate(UseSSE < 1); 7107 match(Set dst (CastFF dst)); 7108 format %{ "#castFF of $dst" %} 7109 ins_encode( /*empty encoding*/ ); 7110 ins_cost(0); 7111 ins_pipe( empty ); 7112 %} 7113 7114 instruct castDD_PR( regDPR dst ) %{ 7115 predicate(UseSSE < 2); 7116 match(Set dst (CastDD dst)); 7117 format %{ "#castDD of $dst" %} 7118 ins_encode( /*empty encoding*/ ); 7119 ins_cost(0); 7120 ins_pipe( empty ); 7121 %} 7122 7123 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7124 7125 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7126 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7127 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7128 effect(KILL cr, KILL oldval); 7129 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7130 "MOV $res,0\n\t" 7131 "JNE,s fail\n\t" 7132 "MOV $res,1\n" 7133 "fail:" %} 7134 ins_encode( enc_cmpxchg8(mem_ptr), 7135 enc_flags_ne_to_boolean(res) ); 7136 ins_pipe( pipe_cmpxchg ); 7137 %} 7138 7139 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7140 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7141 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7142 effect(KILL cr, KILL oldval); 7143 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7144 "MOV $res,0\n\t" 7145 "JNE,s fail\n\t" 7146 "MOV $res,1\n" 7147 "fail:" %} 7148 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7149 ins_pipe( pipe_cmpxchg ); 7150 %} 7151 7152 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7153 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7154 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7155 effect(KILL cr, KILL oldval); 7156 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7157 "MOV $res,0\n\t" 7158 "JNE,s fail\n\t" 7159 "MOV $res,1\n" 7160 "fail:" %} 7161 ins_encode( enc_cmpxchgb(mem_ptr), 7162 enc_flags_ne_to_boolean(res) ); 7163 ins_pipe( pipe_cmpxchg ); 7164 %} 7165 7166 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7167 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7168 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7169 effect(KILL cr, KILL oldval); 7170 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7171 "MOV $res,0\n\t" 7172 "JNE,s fail\n\t" 7173 "MOV $res,1\n" 7174 "fail:" %} 7175 ins_encode( enc_cmpxchgw(mem_ptr), 7176 enc_flags_ne_to_boolean(res) ); 7177 ins_pipe( pipe_cmpxchg ); 7178 %} 7179 7180 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7181 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7182 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7183 effect(KILL cr, KILL oldval); 7184 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7185 "MOV $res,0\n\t" 7186 "JNE,s fail\n\t" 7187 "MOV $res,1\n" 7188 "fail:" %} 7189 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7190 ins_pipe( pipe_cmpxchg ); 7191 %} 7192 7193 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7194 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7195 effect(KILL cr); 7196 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7197 ins_encode( enc_cmpxchg8(mem_ptr) ); 7198 ins_pipe( pipe_cmpxchg ); 7199 %} 7200 7201 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7202 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7203 effect(KILL cr); 7204 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7205 ins_encode( enc_cmpxchg(mem_ptr) ); 7206 ins_pipe( pipe_cmpxchg ); 7207 %} 7208 7209 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7210 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7211 effect(KILL cr); 7212 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7213 ins_encode( enc_cmpxchgb(mem_ptr) ); 7214 ins_pipe( pipe_cmpxchg ); 7215 %} 7216 7217 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7218 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7219 effect(KILL cr); 7220 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7221 ins_encode( enc_cmpxchgw(mem_ptr) ); 7222 ins_pipe( pipe_cmpxchg ); 7223 %} 7224 7225 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7226 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7227 effect(KILL cr); 7228 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7229 ins_encode( enc_cmpxchg(mem_ptr) ); 7230 ins_pipe( pipe_cmpxchg ); 7231 %} 7232 7233 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7234 predicate(n->as_LoadStore()->result_not_used()); 7235 match(Set dummy (GetAndAddB mem add)); 7236 effect(KILL cr); 7237 format %{ "ADDB [$mem],$add" %} 7238 ins_encode %{ 7239 __ lock(); 7240 __ addb($mem$$Address, $add$$constant); 7241 %} 7242 ins_pipe( pipe_cmpxchg ); 7243 %} 7244 7245 // Important to match to xRegI: only 8-bit regs. 7246 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7247 match(Set newval (GetAndAddB mem newval)); 7248 effect(KILL cr); 7249 format %{ "XADDB [$mem],$newval" %} 7250 ins_encode %{ 7251 __ lock(); 7252 __ xaddb($mem$$Address, $newval$$Register); 7253 %} 7254 ins_pipe( pipe_cmpxchg ); 7255 %} 7256 7257 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7258 predicate(n->as_LoadStore()->result_not_used()); 7259 match(Set dummy (GetAndAddS mem add)); 7260 effect(KILL cr); 7261 format %{ "ADDS [$mem],$add" %} 7262 ins_encode %{ 7263 __ lock(); 7264 __ addw($mem$$Address, $add$$constant); 7265 %} 7266 ins_pipe( pipe_cmpxchg ); 7267 %} 7268 7269 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7270 match(Set newval (GetAndAddS mem newval)); 7271 effect(KILL cr); 7272 format %{ "XADDS [$mem],$newval" %} 7273 ins_encode %{ 7274 __ lock(); 7275 __ xaddw($mem$$Address, $newval$$Register); 7276 %} 7277 ins_pipe( pipe_cmpxchg ); 7278 %} 7279 7280 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7281 predicate(n->as_LoadStore()->result_not_used()); 7282 match(Set dummy (GetAndAddI mem add)); 7283 effect(KILL cr); 7284 format %{ "ADDL [$mem],$add" %} 7285 ins_encode %{ 7286 __ lock(); 7287 __ addl($mem$$Address, $add$$constant); 7288 %} 7289 ins_pipe( pipe_cmpxchg ); 7290 %} 7291 7292 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7293 match(Set newval (GetAndAddI mem newval)); 7294 effect(KILL cr); 7295 format %{ "XADDL [$mem],$newval" %} 7296 ins_encode %{ 7297 __ lock(); 7298 __ xaddl($mem$$Address, $newval$$Register); 7299 %} 7300 ins_pipe( pipe_cmpxchg ); 7301 %} 7302 7303 // Important to match to xRegI: only 8-bit regs. 7304 instruct xchgB( memory mem, xRegI newval) %{ 7305 match(Set newval (GetAndSetB mem newval)); 7306 format %{ "XCHGB $newval,[$mem]" %} 7307 ins_encode %{ 7308 __ xchgb($newval$$Register, $mem$$Address); 7309 %} 7310 ins_pipe( pipe_cmpxchg ); 7311 %} 7312 7313 instruct xchgS( memory mem, rRegI newval) %{ 7314 match(Set newval (GetAndSetS mem newval)); 7315 format %{ "XCHGW $newval,[$mem]" %} 7316 ins_encode %{ 7317 __ xchgw($newval$$Register, $mem$$Address); 7318 %} 7319 ins_pipe( pipe_cmpxchg ); 7320 %} 7321 7322 instruct xchgI( memory mem, rRegI newval) %{ 7323 match(Set newval (GetAndSetI mem newval)); 7324 format %{ "XCHGL $newval,[$mem]" %} 7325 ins_encode %{ 7326 __ xchgl($newval$$Register, $mem$$Address); 7327 %} 7328 ins_pipe( pipe_cmpxchg ); 7329 %} 7330 7331 instruct xchgP( memory mem, pRegP newval) %{ 7332 match(Set newval (GetAndSetP mem newval)); 7333 format %{ "XCHGL $newval,[$mem]" %} 7334 ins_encode %{ 7335 __ xchgl($newval$$Register, $mem$$Address); 7336 %} 7337 ins_pipe( pipe_cmpxchg ); 7338 %} 7339 7340 //----------Subtraction Instructions------------------------------------------- 7341 7342 // Integer Subtraction Instructions 7343 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7344 match(Set dst (SubI dst src)); 7345 effect(KILL cr); 7346 7347 size(2); 7348 format %{ "SUB $dst,$src" %} 7349 opcode(0x2B); 7350 ins_encode( OpcP, RegReg( dst, src) ); 7351 ins_pipe( ialu_reg_reg ); 7352 %} 7353 7354 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7355 match(Set dst (SubI dst src)); 7356 effect(KILL cr); 7357 7358 format %{ "SUB $dst,$src" %} 7359 opcode(0x81,0x05); /* Opcode 81 /5 */ 7360 // ins_encode( RegImm( dst, src) ); 7361 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7362 ins_pipe( ialu_reg ); 7363 %} 7364 7365 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7366 match(Set dst (SubI dst (LoadI src))); 7367 effect(KILL cr); 7368 7369 ins_cost(150); 7370 format %{ "SUB $dst,$src" %} 7371 opcode(0x2B); 7372 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 7373 ins_pipe( ialu_reg_mem ); 7374 %} 7375 7376 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7377 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7378 effect(KILL cr); 7379 7380 ins_cost(150); 7381 format %{ "SUB $dst,$src" %} 7382 opcode(0x29); /* Opcode 29 /r */ 7383 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 7384 ins_pipe( ialu_mem_reg ); 7385 %} 7386 7387 // Subtract from a pointer 7388 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7389 match(Set dst (AddP dst (SubI zero src))); 7390 effect(KILL cr); 7391 7392 size(2); 7393 format %{ "SUB $dst,$src" %} 7394 opcode(0x2B); 7395 ins_encode( OpcP, RegReg( dst, src) ); 7396 ins_pipe( ialu_reg_reg ); 7397 %} 7398 7399 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7400 match(Set dst (SubI zero dst)); 7401 effect(KILL cr); 7402 7403 size(2); 7404 format %{ "NEG $dst" %} 7405 opcode(0xF7,0x03); // Opcode F7 /3 7406 ins_encode( OpcP, RegOpc( dst ) ); 7407 ins_pipe( ialu_reg ); 7408 %} 7409 7410 //----------Multiplication/Division Instructions------------------------------- 7411 // Integer Multiplication Instructions 7412 // Multiply Register 7413 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7414 match(Set dst (MulI dst src)); 7415 effect(KILL cr); 7416 7417 size(3); 7418 ins_cost(300); 7419 format %{ "IMUL $dst,$src" %} 7420 opcode(0xAF, 0x0F); 7421 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7422 ins_pipe( ialu_reg_reg_alu0 ); 7423 %} 7424 7425 // Multiply 32-bit Immediate 7426 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7427 match(Set dst (MulI src imm)); 7428 effect(KILL cr); 7429 7430 ins_cost(300); 7431 format %{ "IMUL $dst,$src,$imm" %} 7432 opcode(0x69); /* 69 /r id */ 7433 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7434 ins_pipe( ialu_reg_reg_alu0 ); 7435 %} 7436 7437 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7438 match(Set dst src); 7439 effect(KILL cr); 7440 7441 // Note that this is artificially increased to make it more expensive than loadConL 7442 ins_cost(250); 7443 format %{ "MOV EAX,$src\t// low word only" %} 7444 opcode(0xB8); 7445 ins_encode( LdImmL_Lo(dst, src) ); 7446 ins_pipe( ialu_reg_fat ); 7447 %} 7448 7449 // Multiply by 32-bit Immediate, taking the shifted high order results 7450 // (special case for shift by 32) 7451 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7452 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7453 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7454 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7455 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7456 effect(USE src1, KILL cr); 7457 7458 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7459 ins_cost(0*100 + 1*400 - 150); 7460 format %{ "IMUL EDX:EAX,$src1" %} 7461 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7462 ins_pipe( pipe_slow ); 7463 %} 7464 7465 // Multiply by 32-bit Immediate, taking the shifted high order results 7466 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7467 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7468 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7469 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7470 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7471 effect(USE src1, KILL cr); 7472 7473 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7474 ins_cost(1*100 + 1*400 - 150); 7475 format %{ "IMUL EDX:EAX,$src1\n\t" 7476 "SAR EDX,$cnt-32" %} 7477 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7478 ins_pipe( pipe_slow ); 7479 %} 7480 7481 // Multiply Memory 32-bit Immediate 7482 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7483 match(Set dst (MulI (LoadI src) imm)); 7484 effect(KILL cr); 7485 7486 ins_cost(300); 7487 format %{ "IMUL $dst,$src,$imm" %} 7488 opcode(0x69); /* 69 /r id */ 7489 ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark ); 7490 ins_pipe( ialu_reg_mem_alu0 ); 7491 %} 7492 7493 // Multiply Memory 7494 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7495 match(Set dst (MulI dst (LoadI src))); 7496 effect(KILL cr); 7497 7498 ins_cost(350); 7499 format %{ "IMUL $dst,$src" %} 7500 opcode(0xAF, 0x0F); 7501 ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark ); 7502 ins_pipe( ialu_reg_mem_alu0 ); 7503 %} 7504 7505 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7506 %{ 7507 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7508 effect(KILL cr, KILL src2); 7509 7510 expand %{ mulI_eReg(dst, src1, cr); 7511 mulI_eReg(src2, src3, cr); 7512 addI_eReg(dst, src2, cr); %} 7513 %} 7514 7515 // Multiply Register Int to Long 7516 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7517 // Basic Idea: long = (long)int * (long)int 7518 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7519 effect(DEF dst, USE src, USE src1, KILL flags); 7520 7521 ins_cost(300); 7522 format %{ "IMUL $dst,$src1" %} 7523 7524 ins_encode( long_int_multiply( dst, src1 ) ); 7525 ins_pipe( ialu_reg_reg_alu0 ); 7526 %} 7527 7528 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7529 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7530 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7531 effect(KILL flags); 7532 7533 ins_cost(300); 7534 format %{ "MUL $dst,$src1" %} 7535 7536 ins_encode( long_uint_multiply(dst, src1) ); 7537 ins_pipe( ialu_reg_reg_alu0 ); 7538 %} 7539 7540 // Multiply Register Long 7541 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7542 match(Set dst (MulL dst src)); 7543 effect(KILL cr, TEMP tmp); 7544 ins_cost(4*100+3*400); 7545 // Basic idea: lo(result) = lo(x_lo * y_lo) 7546 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7547 format %{ "MOV $tmp,$src.lo\n\t" 7548 "IMUL $tmp,EDX\n\t" 7549 "MOV EDX,$src.hi\n\t" 7550 "IMUL EDX,EAX\n\t" 7551 "ADD $tmp,EDX\n\t" 7552 "MUL EDX:EAX,$src.lo\n\t" 7553 "ADD EDX,$tmp" %} 7554 ins_encode( long_multiply( dst, src, tmp ) ); 7555 ins_pipe( pipe_slow ); 7556 %} 7557 7558 // Multiply Register Long where the left operand's high 32 bits are zero 7559 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7560 predicate(is_operand_hi32_zero(n->in(1))); 7561 match(Set dst (MulL dst src)); 7562 effect(KILL cr, TEMP tmp); 7563 ins_cost(2*100+2*400); 7564 // Basic idea: lo(result) = lo(x_lo * y_lo) 7565 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7566 format %{ "MOV $tmp,$src.hi\n\t" 7567 "IMUL $tmp,EAX\n\t" 7568 "MUL EDX:EAX,$src.lo\n\t" 7569 "ADD EDX,$tmp" %} 7570 ins_encode %{ 7571 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7572 __ imull($tmp$$Register, rax); 7573 __ mull($src$$Register); 7574 __ addl(rdx, $tmp$$Register); 7575 %} 7576 ins_pipe( pipe_slow ); 7577 %} 7578 7579 // Multiply Register Long where the right operand's high 32 bits are zero 7580 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7581 predicate(is_operand_hi32_zero(n->in(2))); 7582 match(Set dst (MulL dst src)); 7583 effect(KILL cr, TEMP tmp); 7584 ins_cost(2*100+2*400); 7585 // Basic idea: lo(result) = lo(x_lo * y_lo) 7586 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7587 format %{ "MOV $tmp,$src.lo\n\t" 7588 "IMUL $tmp,EDX\n\t" 7589 "MUL EDX:EAX,$src.lo\n\t" 7590 "ADD EDX,$tmp" %} 7591 ins_encode %{ 7592 __ movl($tmp$$Register, $src$$Register); 7593 __ imull($tmp$$Register, rdx); 7594 __ mull($src$$Register); 7595 __ addl(rdx, $tmp$$Register); 7596 %} 7597 ins_pipe( pipe_slow ); 7598 %} 7599 7600 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7601 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7602 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7603 match(Set dst (MulL dst src)); 7604 effect(KILL cr); 7605 ins_cost(1*400); 7606 // Basic idea: lo(result) = lo(x_lo * y_lo) 7607 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7608 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7609 ins_encode %{ 7610 __ mull($src$$Register); 7611 %} 7612 ins_pipe( pipe_slow ); 7613 %} 7614 7615 // Multiply Register Long by small constant 7616 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7617 match(Set dst (MulL dst src)); 7618 effect(KILL cr, TEMP tmp); 7619 ins_cost(2*100+2*400); 7620 size(12); 7621 // Basic idea: lo(result) = lo(src * EAX) 7622 // hi(result) = hi(src * EAX) + lo(src * EDX) 7623 format %{ "IMUL $tmp,EDX,$src\n\t" 7624 "MOV EDX,$src\n\t" 7625 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7626 "ADD EDX,$tmp" %} 7627 ins_encode( long_multiply_con( dst, src, tmp ) ); 7628 ins_pipe( pipe_slow ); 7629 %} 7630 7631 // Integer DIV with Register 7632 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7633 match(Set rax (DivI rax div)); 7634 effect(KILL rdx, KILL cr); 7635 size(26); 7636 ins_cost(30*100+10*100); 7637 format %{ "CMP EAX,0x80000000\n\t" 7638 "JNE,s normal\n\t" 7639 "XOR EDX,EDX\n\t" 7640 "CMP ECX,-1\n\t" 7641 "JE,s done\n" 7642 "normal: CDQ\n\t" 7643 "IDIV $div\n\t" 7644 "done:" %} 7645 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7646 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7647 ins_pipe( ialu_reg_reg_alu0 ); 7648 %} 7649 7650 // Divide Register Long 7651 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7652 match(Set dst (DivL src1 src2)); 7653 effect(CALL); 7654 ins_cost(10000); 7655 format %{ "PUSH $src1.hi\n\t" 7656 "PUSH $src1.lo\n\t" 7657 "PUSH $src2.hi\n\t" 7658 "PUSH $src2.lo\n\t" 7659 "CALL SharedRuntime::ldiv\n\t" 7660 "ADD ESP,16" %} 7661 ins_encode( long_div(src1,src2) ); 7662 ins_pipe( pipe_slow ); 7663 %} 7664 7665 // Integer DIVMOD with Register, both quotient and mod results 7666 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7667 match(DivModI rax div); 7668 effect(KILL cr); 7669 size(26); 7670 ins_cost(30*100+10*100); 7671 format %{ "CMP EAX,0x80000000\n\t" 7672 "JNE,s normal\n\t" 7673 "XOR EDX,EDX\n\t" 7674 "CMP ECX,-1\n\t" 7675 "JE,s done\n" 7676 "normal: CDQ\n\t" 7677 "IDIV $div\n\t" 7678 "done:" %} 7679 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7680 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7681 ins_pipe( pipe_slow ); 7682 %} 7683 7684 // Integer MOD with Register 7685 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7686 match(Set rdx (ModI rax div)); 7687 effect(KILL rax, KILL cr); 7688 7689 size(26); 7690 ins_cost(300); 7691 format %{ "CDQ\n\t" 7692 "IDIV $div" %} 7693 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7694 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7695 ins_pipe( ialu_reg_reg_alu0 ); 7696 %} 7697 7698 // Remainder Register Long 7699 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7700 match(Set dst (ModL src1 src2)); 7701 effect(CALL); 7702 ins_cost(10000); 7703 format %{ "PUSH $src1.hi\n\t" 7704 "PUSH $src1.lo\n\t" 7705 "PUSH $src2.hi\n\t" 7706 "PUSH $src2.lo\n\t" 7707 "CALL SharedRuntime::lrem\n\t" 7708 "ADD ESP,16" %} 7709 ins_encode( long_mod(src1,src2) ); 7710 ins_pipe( pipe_slow ); 7711 %} 7712 7713 // Divide Register Long (no special case since divisor != -1) 7714 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7715 match(Set dst (DivL dst imm)); 7716 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7717 ins_cost(1000); 7718 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7719 "XOR $tmp2,$tmp2\n\t" 7720 "CMP $tmp,EDX\n\t" 7721 "JA,s fast\n\t" 7722 "MOV $tmp2,EAX\n\t" 7723 "MOV EAX,EDX\n\t" 7724 "MOV EDX,0\n\t" 7725 "JLE,s pos\n\t" 7726 "LNEG EAX : $tmp2\n\t" 7727 "DIV $tmp # unsigned division\n\t" 7728 "XCHG EAX,$tmp2\n\t" 7729 "DIV $tmp\n\t" 7730 "LNEG $tmp2 : EAX\n\t" 7731 "JMP,s done\n" 7732 "pos:\n\t" 7733 "DIV $tmp\n\t" 7734 "XCHG EAX,$tmp2\n" 7735 "fast:\n\t" 7736 "DIV $tmp\n" 7737 "done:\n\t" 7738 "MOV EDX,$tmp2\n\t" 7739 "NEG EDX:EAX # if $imm < 0" %} 7740 ins_encode %{ 7741 int con = (int)$imm$$constant; 7742 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7743 int pcon = (con > 0) ? con : -con; 7744 Label Lfast, Lpos, Ldone; 7745 7746 __ movl($tmp$$Register, pcon); 7747 __ xorl($tmp2$$Register,$tmp2$$Register); 7748 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7749 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7750 7751 __ movl($tmp2$$Register, $dst$$Register); // save 7752 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7753 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7754 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7755 7756 // Negative dividend. 7757 // convert value to positive to use unsigned division 7758 __ lneg($dst$$Register, $tmp2$$Register); 7759 __ divl($tmp$$Register); 7760 __ xchgl($dst$$Register, $tmp2$$Register); 7761 __ divl($tmp$$Register); 7762 // revert result back to negative 7763 __ lneg($tmp2$$Register, $dst$$Register); 7764 __ jmpb(Ldone); 7765 7766 __ bind(Lpos); 7767 __ divl($tmp$$Register); // Use unsigned division 7768 __ xchgl($dst$$Register, $tmp2$$Register); 7769 // Fallthrow for final divide, tmp2 has 32 bit hi result 7770 7771 __ bind(Lfast); 7772 // fast path: src is positive 7773 __ divl($tmp$$Register); // Use unsigned division 7774 7775 __ bind(Ldone); 7776 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7777 if (con < 0) { 7778 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7779 } 7780 %} 7781 ins_pipe( pipe_slow ); 7782 %} 7783 7784 // Remainder Register Long (remainder fit into 32 bits) 7785 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7786 match(Set dst (ModL dst imm)); 7787 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7788 ins_cost(1000); 7789 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7790 "CMP $tmp,EDX\n\t" 7791 "JA,s fast\n\t" 7792 "MOV $tmp2,EAX\n\t" 7793 "MOV EAX,EDX\n\t" 7794 "MOV EDX,0\n\t" 7795 "JLE,s pos\n\t" 7796 "LNEG EAX : $tmp2\n\t" 7797 "DIV $tmp # unsigned division\n\t" 7798 "MOV EAX,$tmp2\n\t" 7799 "DIV $tmp\n\t" 7800 "NEG EDX\n\t" 7801 "JMP,s done\n" 7802 "pos:\n\t" 7803 "DIV $tmp\n\t" 7804 "MOV EAX,$tmp2\n" 7805 "fast:\n\t" 7806 "DIV $tmp\n" 7807 "done:\n\t" 7808 "MOV EAX,EDX\n\t" 7809 "SAR EDX,31\n\t" %} 7810 ins_encode %{ 7811 int con = (int)$imm$$constant; 7812 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7813 int pcon = (con > 0) ? con : -con; 7814 Label Lfast, Lpos, Ldone; 7815 7816 __ movl($tmp$$Register, pcon); 7817 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7818 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7819 7820 __ movl($tmp2$$Register, $dst$$Register); // save 7821 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7822 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7823 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7824 7825 // Negative dividend. 7826 // convert value to positive to use unsigned division 7827 __ lneg($dst$$Register, $tmp2$$Register); 7828 __ divl($tmp$$Register); 7829 __ movl($dst$$Register, $tmp2$$Register); 7830 __ divl($tmp$$Register); 7831 // revert remainder back to negative 7832 __ negl(HIGH_FROM_LOW($dst$$Register)); 7833 __ jmpb(Ldone); 7834 7835 __ bind(Lpos); 7836 __ divl($tmp$$Register); 7837 __ movl($dst$$Register, $tmp2$$Register); 7838 7839 __ bind(Lfast); 7840 // fast path: src is positive 7841 __ divl($tmp$$Register); 7842 7843 __ bind(Ldone); 7844 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7845 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7846 7847 %} 7848 ins_pipe( pipe_slow ); 7849 %} 7850 7851 // Integer Shift Instructions 7852 // Shift Left by one 7853 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7854 match(Set dst (LShiftI dst shift)); 7855 effect(KILL cr); 7856 7857 size(2); 7858 format %{ "SHL $dst,$shift" %} 7859 opcode(0xD1, 0x4); /* D1 /4 */ 7860 ins_encode( OpcP, RegOpc( dst ) ); 7861 ins_pipe( ialu_reg ); 7862 %} 7863 7864 // Shift Left by 8-bit immediate 7865 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7866 match(Set dst (LShiftI dst shift)); 7867 effect(KILL cr); 7868 7869 size(3); 7870 format %{ "SHL $dst,$shift" %} 7871 opcode(0xC1, 0x4); /* C1 /4 ib */ 7872 ins_encode( RegOpcImm( dst, shift) ); 7873 ins_pipe( ialu_reg ); 7874 %} 7875 7876 // Shift Left by variable 7877 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7878 match(Set dst (LShiftI dst shift)); 7879 effect(KILL cr); 7880 7881 size(2); 7882 format %{ "SHL $dst,$shift" %} 7883 opcode(0xD3, 0x4); /* D3 /4 */ 7884 ins_encode( OpcP, RegOpc( dst ) ); 7885 ins_pipe( ialu_reg_reg ); 7886 %} 7887 7888 // Arithmetic shift right by one 7889 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7890 match(Set dst (RShiftI dst shift)); 7891 effect(KILL cr); 7892 7893 size(2); 7894 format %{ "SAR $dst,$shift" %} 7895 opcode(0xD1, 0x7); /* D1 /7 */ 7896 ins_encode( OpcP, RegOpc( dst ) ); 7897 ins_pipe( ialu_reg ); 7898 %} 7899 7900 // Arithmetic shift right by one 7901 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 7902 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7903 effect(KILL cr); 7904 format %{ "SAR $dst,$shift" %} 7905 opcode(0xD1, 0x7); /* D1 /7 */ 7906 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark ); 7907 ins_pipe( ialu_mem_imm ); 7908 %} 7909 7910 // Arithmetic Shift Right by 8-bit immediate 7911 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7912 match(Set dst (RShiftI dst shift)); 7913 effect(KILL cr); 7914 7915 size(3); 7916 format %{ "SAR $dst,$shift" %} 7917 opcode(0xC1, 0x7); /* C1 /7 ib */ 7918 ins_encode( RegOpcImm( dst, shift ) ); 7919 ins_pipe( ialu_mem_imm ); 7920 %} 7921 7922 // Arithmetic Shift Right by 8-bit immediate 7923 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7924 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7925 effect(KILL cr); 7926 7927 format %{ "SAR $dst,$shift" %} 7928 opcode(0xC1, 0x7); /* C1 /7 ib */ 7929 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark ); 7930 ins_pipe( ialu_mem_imm ); 7931 %} 7932 7933 // Arithmetic Shift Right by variable 7934 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7935 match(Set dst (RShiftI dst shift)); 7936 effect(KILL cr); 7937 7938 size(2); 7939 format %{ "SAR $dst,$shift" %} 7940 opcode(0xD3, 0x7); /* D3 /7 */ 7941 ins_encode( OpcP, RegOpc( dst ) ); 7942 ins_pipe( ialu_reg_reg ); 7943 %} 7944 7945 // Logical shift right by one 7946 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7947 match(Set dst (URShiftI dst shift)); 7948 effect(KILL cr); 7949 7950 size(2); 7951 format %{ "SHR $dst,$shift" %} 7952 opcode(0xD1, 0x5); /* D1 /5 */ 7953 ins_encode( OpcP, RegOpc( dst ) ); 7954 ins_pipe( ialu_reg ); 7955 %} 7956 7957 // Logical Shift Right by 8-bit immediate 7958 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7959 match(Set dst (URShiftI dst shift)); 7960 effect(KILL cr); 7961 7962 size(3); 7963 format %{ "SHR $dst,$shift" %} 7964 opcode(0xC1, 0x5); /* C1 /5 ib */ 7965 ins_encode( RegOpcImm( dst, shift) ); 7966 ins_pipe( ialu_reg ); 7967 %} 7968 7969 7970 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7971 // This idiom is used by the compiler for the i2b bytecode. 7972 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7973 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7974 7975 size(3); 7976 format %{ "MOVSX $dst,$src :8" %} 7977 ins_encode %{ 7978 __ movsbl($dst$$Register, $src$$Register); 7979 %} 7980 ins_pipe(ialu_reg_reg); 7981 %} 7982 7983 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7984 // This idiom is used by the compiler the i2s bytecode. 7985 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7986 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7987 7988 size(3); 7989 format %{ "MOVSX $dst,$src :16" %} 7990 ins_encode %{ 7991 __ movswl($dst$$Register, $src$$Register); 7992 %} 7993 ins_pipe(ialu_reg_reg); 7994 %} 7995 7996 7997 // Logical Shift Right by variable 7998 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7999 match(Set dst (URShiftI dst shift)); 8000 effect(KILL cr); 8001 8002 size(2); 8003 format %{ "SHR $dst,$shift" %} 8004 opcode(0xD3, 0x5); /* D3 /5 */ 8005 ins_encode( OpcP, RegOpc( dst ) ); 8006 ins_pipe( ialu_reg_reg ); 8007 %} 8008 8009 8010 //----------Logical Instructions----------------------------------------------- 8011 //----------Integer Logical Instructions--------------------------------------- 8012 // And Instructions 8013 // And Register with Register 8014 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8015 match(Set dst (AndI dst src)); 8016 effect(KILL cr); 8017 8018 size(2); 8019 format %{ "AND $dst,$src" %} 8020 opcode(0x23); 8021 ins_encode( OpcP, RegReg( dst, src) ); 8022 ins_pipe( ialu_reg_reg ); 8023 %} 8024 8025 // And Register with Immediate 8026 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8027 match(Set dst (AndI dst src)); 8028 effect(KILL cr); 8029 8030 format %{ "AND $dst,$src" %} 8031 opcode(0x81,0x04); /* Opcode 81 /4 */ 8032 // ins_encode( RegImm( dst, src) ); 8033 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8034 ins_pipe( ialu_reg ); 8035 %} 8036 8037 // And Register with Memory 8038 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8039 match(Set dst (AndI dst (LoadI src))); 8040 effect(KILL cr); 8041 8042 ins_cost(150); 8043 format %{ "AND $dst,$src" %} 8044 opcode(0x23); 8045 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 8046 ins_pipe( ialu_reg_mem ); 8047 %} 8048 8049 // And Memory with Register 8050 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8051 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8052 effect(KILL cr); 8053 8054 ins_cost(150); 8055 format %{ "AND $dst,$src" %} 8056 opcode(0x21); /* Opcode 21 /r */ 8057 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8058 ins_pipe( ialu_mem_reg ); 8059 %} 8060 8061 // And Memory with Immediate 8062 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8063 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8064 effect(KILL cr); 8065 8066 ins_cost(125); 8067 format %{ "AND $dst,$src" %} 8068 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8069 // ins_encode( MemImm( dst, src) ); 8070 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8071 ins_pipe( ialu_mem_imm ); 8072 %} 8073 8074 // BMI1 instructions 8075 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8076 match(Set dst (AndI (XorI src1 minus_1) src2)); 8077 predicate(UseBMI1Instructions); 8078 effect(KILL cr); 8079 8080 format %{ "ANDNL $dst, $src1, $src2" %} 8081 8082 ins_encode %{ 8083 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8084 %} 8085 ins_pipe(ialu_reg); 8086 %} 8087 8088 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8089 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8090 predicate(UseBMI1Instructions); 8091 effect(KILL cr); 8092 8093 ins_cost(125); 8094 format %{ "ANDNL $dst, $src1, $src2" %} 8095 8096 ins_encode %{ 8097 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8098 %} 8099 ins_pipe(ialu_reg_mem); 8100 %} 8101 8102 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8103 match(Set dst (AndI (SubI imm_zero src) src)); 8104 predicate(UseBMI1Instructions); 8105 effect(KILL cr); 8106 8107 format %{ "BLSIL $dst, $src" %} 8108 8109 ins_encode %{ 8110 __ blsil($dst$$Register, $src$$Register); 8111 %} 8112 ins_pipe(ialu_reg); 8113 %} 8114 8115 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8116 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8117 predicate(UseBMI1Instructions); 8118 effect(KILL cr); 8119 8120 ins_cost(125); 8121 format %{ "BLSIL $dst, $src" %} 8122 8123 ins_encode %{ 8124 __ blsil($dst$$Register, $src$$Address); 8125 %} 8126 ins_pipe(ialu_reg_mem); 8127 %} 8128 8129 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8130 %{ 8131 match(Set dst (XorI (AddI src minus_1) src)); 8132 predicate(UseBMI1Instructions); 8133 effect(KILL cr); 8134 8135 format %{ "BLSMSKL $dst, $src" %} 8136 8137 ins_encode %{ 8138 __ blsmskl($dst$$Register, $src$$Register); 8139 %} 8140 8141 ins_pipe(ialu_reg); 8142 %} 8143 8144 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8145 %{ 8146 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8147 predicate(UseBMI1Instructions); 8148 effect(KILL cr); 8149 8150 ins_cost(125); 8151 format %{ "BLSMSKL $dst, $src" %} 8152 8153 ins_encode %{ 8154 __ blsmskl($dst$$Register, $src$$Address); 8155 %} 8156 8157 ins_pipe(ialu_reg_mem); 8158 %} 8159 8160 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8161 %{ 8162 match(Set dst (AndI (AddI src minus_1) src) ); 8163 predicate(UseBMI1Instructions); 8164 effect(KILL cr); 8165 8166 format %{ "BLSRL $dst, $src" %} 8167 8168 ins_encode %{ 8169 __ blsrl($dst$$Register, $src$$Register); 8170 %} 8171 8172 ins_pipe(ialu_reg); 8173 %} 8174 8175 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8176 %{ 8177 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8178 predicate(UseBMI1Instructions); 8179 effect(KILL cr); 8180 8181 ins_cost(125); 8182 format %{ "BLSRL $dst, $src" %} 8183 8184 ins_encode %{ 8185 __ blsrl($dst$$Register, $src$$Address); 8186 %} 8187 8188 ins_pipe(ialu_reg_mem); 8189 %} 8190 8191 // Or Instructions 8192 // Or Register with Register 8193 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8194 match(Set dst (OrI dst src)); 8195 effect(KILL cr); 8196 8197 size(2); 8198 format %{ "OR $dst,$src" %} 8199 opcode(0x0B); 8200 ins_encode( OpcP, RegReg( dst, src) ); 8201 ins_pipe( ialu_reg_reg ); 8202 %} 8203 8204 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8205 match(Set dst (OrI dst (CastP2X src))); 8206 effect(KILL cr); 8207 8208 size(2); 8209 format %{ "OR $dst,$src" %} 8210 opcode(0x0B); 8211 ins_encode( OpcP, RegReg( dst, src) ); 8212 ins_pipe( ialu_reg_reg ); 8213 %} 8214 8215 8216 // Or Register with Immediate 8217 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8218 match(Set dst (OrI dst src)); 8219 effect(KILL cr); 8220 8221 format %{ "OR $dst,$src" %} 8222 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8223 // ins_encode( RegImm( dst, src) ); 8224 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8225 ins_pipe( ialu_reg ); 8226 %} 8227 8228 // Or Register with Memory 8229 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8230 match(Set dst (OrI dst (LoadI src))); 8231 effect(KILL cr); 8232 8233 ins_cost(150); 8234 format %{ "OR $dst,$src" %} 8235 opcode(0x0B); 8236 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 8237 ins_pipe( ialu_reg_mem ); 8238 %} 8239 8240 // Or Memory with Register 8241 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8242 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8243 effect(KILL cr); 8244 8245 ins_cost(150); 8246 format %{ "OR $dst,$src" %} 8247 opcode(0x09); /* Opcode 09 /r */ 8248 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8249 ins_pipe( ialu_mem_reg ); 8250 %} 8251 8252 // Or Memory with Immediate 8253 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8254 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8255 effect(KILL cr); 8256 8257 ins_cost(125); 8258 format %{ "OR $dst,$src" %} 8259 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8260 // ins_encode( MemImm( dst, src) ); 8261 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8262 ins_pipe( ialu_mem_imm ); 8263 %} 8264 8265 // ROL/ROR 8266 // ROL expand 8267 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8268 effect(USE_DEF dst, USE shift, KILL cr); 8269 8270 format %{ "ROL $dst, $shift" %} 8271 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8272 ins_encode( OpcP, RegOpc( dst )); 8273 ins_pipe( ialu_reg ); 8274 %} 8275 8276 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8277 effect(USE_DEF dst, USE shift, KILL cr); 8278 8279 format %{ "ROL $dst, $shift" %} 8280 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8281 ins_encode( RegOpcImm(dst, shift) ); 8282 ins_pipe(ialu_reg); 8283 %} 8284 8285 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8286 effect(USE_DEF dst, USE shift, KILL cr); 8287 8288 format %{ "ROL $dst, $shift" %} 8289 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8290 ins_encode(OpcP, RegOpc(dst)); 8291 ins_pipe( ialu_reg_reg ); 8292 %} 8293 // end of ROL expand 8294 8295 // ROL 32bit by one once 8296 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8297 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8298 8299 expand %{ 8300 rolI_eReg_imm1(dst, lshift, cr); 8301 %} 8302 %} 8303 8304 // ROL 32bit var by imm8 once 8305 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8306 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8307 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8308 8309 expand %{ 8310 rolI_eReg_imm8(dst, lshift, cr); 8311 %} 8312 %} 8313 8314 // ROL 32bit var by var once 8315 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8316 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8317 8318 expand %{ 8319 rolI_eReg_CL(dst, shift, cr); 8320 %} 8321 %} 8322 8323 // ROL 32bit var by var once 8324 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8325 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8326 8327 expand %{ 8328 rolI_eReg_CL(dst, shift, cr); 8329 %} 8330 %} 8331 8332 // ROR expand 8333 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8334 effect(USE_DEF dst, USE shift, KILL cr); 8335 8336 format %{ "ROR $dst, $shift" %} 8337 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8338 ins_encode( OpcP, RegOpc( dst ) ); 8339 ins_pipe( ialu_reg ); 8340 %} 8341 8342 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8343 effect (USE_DEF dst, USE shift, KILL cr); 8344 8345 format %{ "ROR $dst, $shift" %} 8346 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8347 ins_encode( RegOpcImm(dst, shift) ); 8348 ins_pipe( ialu_reg ); 8349 %} 8350 8351 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8352 effect(USE_DEF dst, USE shift, KILL cr); 8353 8354 format %{ "ROR $dst, $shift" %} 8355 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8356 ins_encode(OpcP, RegOpc(dst)); 8357 ins_pipe( ialu_reg_reg ); 8358 %} 8359 // end of ROR expand 8360 8361 // ROR right once 8362 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8363 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8364 8365 expand %{ 8366 rorI_eReg_imm1(dst, rshift, cr); 8367 %} 8368 %} 8369 8370 // ROR 32bit by immI8 once 8371 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8372 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8373 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8374 8375 expand %{ 8376 rorI_eReg_imm8(dst, rshift, cr); 8377 %} 8378 %} 8379 8380 // ROR 32bit var by var once 8381 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8382 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8383 8384 expand %{ 8385 rorI_eReg_CL(dst, shift, cr); 8386 %} 8387 %} 8388 8389 // ROR 32bit var by var once 8390 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8391 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8392 8393 expand %{ 8394 rorI_eReg_CL(dst, shift, cr); 8395 %} 8396 %} 8397 8398 // Xor Instructions 8399 // Xor Register with Register 8400 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8401 match(Set dst (XorI dst src)); 8402 effect(KILL cr); 8403 8404 size(2); 8405 format %{ "XOR $dst,$src" %} 8406 opcode(0x33); 8407 ins_encode( OpcP, RegReg( dst, src) ); 8408 ins_pipe( ialu_reg_reg ); 8409 %} 8410 8411 // Xor Register with Immediate -1 8412 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8413 match(Set dst (XorI dst imm)); 8414 8415 size(2); 8416 format %{ "NOT $dst" %} 8417 ins_encode %{ 8418 __ notl($dst$$Register); 8419 %} 8420 ins_pipe( ialu_reg ); 8421 %} 8422 8423 // Xor Register with Immediate 8424 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8425 match(Set dst (XorI dst src)); 8426 effect(KILL cr); 8427 8428 format %{ "XOR $dst,$src" %} 8429 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8430 // ins_encode( RegImm( dst, src) ); 8431 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8432 ins_pipe( ialu_reg ); 8433 %} 8434 8435 // Xor Register with Memory 8436 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8437 match(Set dst (XorI dst (LoadI src))); 8438 effect(KILL cr); 8439 8440 ins_cost(150); 8441 format %{ "XOR $dst,$src" %} 8442 opcode(0x33); 8443 ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark ); 8444 ins_pipe( ialu_reg_mem ); 8445 %} 8446 8447 // Xor Memory with Register 8448 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8449 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8450 effect(KILL cr); 8451 8452 ins_cost(150); 8453 format %{ "XOR $dst,$src" %} 8454 opcode(0x31); /* Opcode 31 /r */ 8455 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8456 ins_pipe( ialu_mem_reg ); 8457 %} 8458 8459 // Xor Memory with Immediate 8460 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8461 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8462 effect(KILL cr); 8463 8464 ins_cost(125); 8465 format %{ "XOR $dst,$src" %} 8466 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8467 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8468 ins_pipe( ialu_mem_imm ); 8469 %} 8470 8471 //----------Convert Int to Boolean--------------------------------------------- 8472 8473 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8474 effect( DEF dst, USE src ); 8475 format %{ "MOV $dst,$src" %} 8476 ins_encode( enc_Copy( dst, src) ); 8477 ins_pipe( ialu_reg_reg ); 8478 %} 8479 8480 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8481 effect( USE_DEF dst, USE src, KILL cr ); 8482 8483 size(4); 8484 format %{ "NEG $dst\n\t" 8485 "ADC $dst,$src" %} 8486 ins_encode( neg_reg(dst), 8487 OpcRegReg(0x13,dst,src) ); 8488 ins_pipe( ialu_reg_reg_long ); 8489 %} 8490 8491 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8492 match(Set dst (Conv2B src)); 8493 8494 expand %{ 8495 movI_nocopy(dst,src); 8496 ci2b(dst,src,cr); 8497 %} 8498 %} 8499 8500 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8501 effect( DEF dst, USE src ); 8502 format %{ "MOV $dst,$src" %} 8503 ins_encode( enc_Copy( dst, src) ); 8504 ins_pipe( ialu_reg_reg ); 8505 %} 8506 8507 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8508 effect( USE_DEF dst, USE src, KILL cr ); 8509 format %{ "NEG $dst\n\t" 8510 "ADC $dst,$src" %} 8511 ins_encode( neg_reg(dst), 8512 OpcRegReg(0x13,dst,src) ); 8513 ins_pipe( ialu_reg_reg_long ); 8514 %} 8515 8516 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8517 match(Set dst (Conv2B src)); 8518 8519 expand %{ 8520 movP_nocopy(dst,src); 8521 cp2b(dst,src,cr); 8522 %} 8523 %} 8524 8525 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8526 match(Set dst (CmpLTMask p q)); 8527 effect(KILL cr); 8528 ins_cost(400); 8529 8530 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8531 format %{ "XOR $dst,$dst\n\t" 8532 "CMP $p,$q\n\t" 8533 "SETlt $dst\n\t" 8534 "NEG $dst" %} 8535 ins_encode %{ 8536 Register Rp = $p$$Register; 8537 Register Rq = $q$$Register; 8538 Register Rd = $dst$$Register; 8539 Label done; 8540 __ xorl(Rd, Rd); 8541 __ cmpl(Rp, Rq); 8542 __ setb(Assembler::less, Rd); 8543 __ negl(Rd); 8544 %} 8545 8546 ins_pipe(pipe_slow); 8547 %} 8548 8549 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8550 match(Set dst (CmpLTMask dst zero)); 8551 effect(DEF dst, KILL cr); 8552 ins_cost(100); 8553 8554 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8555 ins_encode %{ 8556 __ sarl($dst$$Register, 31); 8557 %} 8558 ins_pipe(ialu_reg); 8559 %} 8560 8561 /* better to save a register than avoid a branch */ 8562 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8563 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8564 effect(KILL cr); 8565 ins_cost(400); 8566 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8567 "JGE done\n\t" 8568 "ADD $p,$y\n" 8569 "done: " %} 8570 ins_encode %{ 8571 Register Rp = $p$$Register; 8572 Register Rq = $q$$Register; 8573 Register Ry = $y$$Register; 8574 Label done; 8575 __ subl(Rp, Rq); 8576 __ jccb(Assembler::greaterEqual, done); 8577 __ addl(Rp, Ry); 8578 __ bind(done); 8579 %} 8580 8581 ins_pipe(pipe_cmplt); 8582 %} 8583 8584 /* better to save a register than avoid a branch */ 8585 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8586 match(Set y (AndI (CmpLTMask p q) y)); 8587 effect(KILL cr); 8588 8589 ins_cost(300); 8590 8591 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8592 "JLT done\n\t" 8593 "XORL $y, $y\n" 8594 "done: " %} 8595 ins_encode %{ 8596 Register Rp = $p$$Register; 8597 Register Rq = $q$$Register; 8598 Register Ry = $y$$Register; 8599 Label done; 8600 __ cmpl(Rp, Rq); 8601 __ jccb(Assembler::less, done); 8602 __ xorl(Ry, Ry); 8603 __ bind(done); 8604 %} 8605 8606 ins_pipe(pipe_cmplt); 8607 %} 8608 8609 /* If I enable this, I encourage spilling in the inner loop of compress. 8610 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8611 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8612 */ 8613 //----------Overflow Math Instructions----------------------------------------- 8614 8615 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8616 %{ 8617 match(Set cr (OverflowAddI op1 op2)); 8618 effect(DEF cr, USE_KILL op1, USE op2); 8619 8620 format %{ "ADD $op1, $op2\t# overflow check int" %} 8621 8622 ins_encode %{ 8623 __ addl($op1$$Register, $op2$$Register); 8624 %} 8625 ins_pipe(ialu_reg_reg); 8626 %} 8627 8628 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8629 %{ 8630 match(Set cr (OverflowAddI op1 op2)); 8631 effect(DEF cr, USE_KILL op1, USE op2); 8632 8633 format %{ "ADD $op1, $op2\t# overflow check int" %} 8634 8635 ins_encode %{ 8636 __ addl($op1$$Register, $op2$$constant); 8637 %} 8638 ins_pipe(ialu_reg_reg); 8639 %} 8640 8641 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8642 %{ 8643 match(Set cr (OverflowSubI op1 op2)); 8644 8645 format %{ "CMP $op1, $op2\t# overflow check int" %} 8646 ins_encode %{ 8647 __ cmpl($op1$$Register, $op2$$Register); 8648 %} 8649 ins_pipe(ialu_reg_reg); 8650 %} 8651 8652 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8653 %{ 8654 match(Set cr (OverflowSubI op1 op2)); 8655 8656 format %{ "CMP $op1, $op2\t# overflow check int" %} 8657 ins_encode %{ 8658 __ cmpl($op1$$Register, $op2$$constant); 8659 %} 8660 ins_pipe(ialu_reg_reg); 8661 %} 8662 8663 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8664 %{ 8665 match(Set cr (OverflowSubI zero op2)); 8666 effect(DEF cr, USE_KILL op2); 8667 8668 format %{ "NEG $op2\t# overflow check int" %} 8669 ins_encode %{ 8670 __ negl($op2$$Register); 8671 %} 8672 ins_pipe(ialu_reg_reg); 8673 %} 8674 8675 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8676 %{ 8677 match(Set cr (OverflowMulI op1 op2)); 8678 effect(DEF cr, USE_KILL op1, USE op2); 8679 8680 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8681 ins_encode %{ 8682 __ imull($op1$$Register, $op2$$Register); 8683 %} 8684 ins_pipe(ialu_reg_reg_alu0); 8685 %} 8686 8687 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8688 %{ 8689 match(Set cr (OverflowMulI op1 op2)); 8690 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8691 8692 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8693 ins_encode %{ 8694 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8695 %} 8696 ins_pipe(ialu_reg_reg_alu0); 8697 %} 8698 8699 // Integer Absolute Instructions 8700 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8701 %{ 8702 match(Set dst (AbsI src)); 8703 effect(TEMP dst, TEMP tmp, KILL cr); 8704 format %{ "movl $tmp, $src\n\t" 8705 "sarl $tmp, 31\n\t" 8706 "movl $dst, $src\n\t" 8707 "xorl $dst, $tmp\n\t" 8708 "subl $dst, $tmp\n" 8709 %} 8710 ins_encode %{ 8711 __ movl($tmp$$Register, $src$$Register); 8712 __ sarl($tmp$$Register, 31); 8713 __ movl($dst$$Register, $src$$Register); 8714 __ xorl($dst$$Register, $tmp$$Register); 8715 __ subl($dst$$Register, $tmp$$Register); 8716 %} 8717 8718 ins_pipe(ialu_reg_reg); 8719 %} 8720 8721 //----------Long Instructions------------------------------------------------ 8722 // Add Long Register with Register 8723 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8724 match(Set dst (AddL dst src)); 8725 effect(KILL cr); 8726 ins_cost(200); 8727 format %{ "ADD $dst.lo,$src.lo\n\t" 8728 "ADC $dst.hi,$src.hi" %} 8729 opcode(0x03, 0x13); 8730 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8731 ins_pipe( ialu_reg_reg_long ); 8732 %} 8733 8734 // Add Long Register with Immediate 8735 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8736 match(Set dst (AddL dst src)); 8737 effect(KILL cr); 8738 format %{ "ADD $dst.lo,$src.lo\n\t" 8739 "ADC $dst.hi,$src.hi" %} 8740 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8741 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8742 ins_pipe( ialu_reg_long ); 8743 %} 8744 8745 // Add Long Register with Memory 8746 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8747 match(Set dst (AddL dst (LoadL mem))); 8748 effect(KILL cr); 8749 ins_cost(125); 8750 format %{ "ADD $dst.lo,$mem\n\t" 8751 "ADC $dst.hi,$mem+4" %} 8752 opcode(0x03, 0x13); 8753 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8754 ins_pipe( ialu_reg_long_mem ); 8755 %} 8756 8757 // Subtract Long Register with Register. 8758 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8759 match(Set dst (SubL dst src)); 8760 effect(KILL cr); 8761 ins_cost(200); 8762 format %{ "SUB $dst.lo,$src.lo\n\t" 8763 "SBB $dst.hi,$src.hi" %} 8764 opcode(0x2B, 0x1B); 8765 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8766 ins_pipe( ialu_reg_reg_long ); 8767 %} 8768 8769 // Subtract Long Register with Immediate 8770 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8771 match(Set dst (SubL dst src)); 8772 effect(KILL cr); 8773 format %{ "SUB $dst.lo,$src.lo\n\t" 8774 "SBB $dst.hi,$src.hi" %} 8775 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8776 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8777 ins_pipe( ialu_reg_long ); 8778 %} 8779 8780 // Subtract Long Register with Memory 8781 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8782 match(Set dst (SubL dst (LoadL mem))); 8783 effect(KILL cr); 8784 ins_cost(125); 8785 format %{ "SUB $dst.lo,$mem\n\t" 8786 "SBB $dst.hi,$mem+4" %} 8787 opcode(0x2B, 0x1B); 8788 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8789 ins_pipe( ialu_reg_long_mem ); 8790 %} 8791 8792 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8793 match(Set dst (SubL zero dst)); 8794 effect(KILL cr); 8795 ins_cost(300); 8796 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8797 ins_encode( neg_long(dst) ); 8798 ins_pipe( ialu_reg_reg_long ); 8799 %} 8800 8801 // And Long Register with Register 8802 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8803 match(Set dst (AndL dst src)); 8804 effect(KILL cr); 8805 format %{ "AND $dst.lo,$src.lo\n\t" 8806 "AND $dst.hi,$src.hi" %} 8807 opcode(0x23,0x23); 8808 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8809 ins_pipe( ialu_reg_reg_long ); 8810 %} 8811 8812 // And Long Register with Immediate 8813 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8814 match(Set dst (AndL dst src)); 8815 effect(KILL cr); 8816 format %{ "AND $dst.lo,$src.lo\n\t" 8817 "AND $dst.hi,$src.hi" %} 8818 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8819 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8820 ins_pipe( ialu_reg_long ); 8821 %} 8822 8823 // And Long Register with Memory 8824 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8825 match(Set dst (AndL dst (LoadL mem))); 8826 effect(KILL cr); 8827 ins_cost(125); 8828 format %{ "AND $dst.lo,$mem\n\t" 8829 "AND $dst.hi,$mem+4" %} 8830 opcode(0x23, 0x23); 8831 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8832 ins_pipe( ialu_reg_long_mem ); 8833 %} 8834 8835 // BMI1 instructions 8836 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8837 match(Set dst (AndL (XorL src1 minus_1) src2)); 8838 predicate(UseBMI1Instructions); 8839 effect(KILL cr, TEMP dst); 8840 8841 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8842 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8843 %} 8844 8845 ins_encode %{ 8846 Register Rdst = $dst$$Register; 8847 Register Rsrc1 = $src1$$Register; 8848 Register Rsrc2 = $src2$$Register; 8849 __ andnl(Rdst, Rsrc1, Rsrc2); 8850 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8851 %} 8852 ins_pipe(ialu_reg_reg_long); 8853 %} 8854 8855 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8856 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8857 predicate(UseBMI1Instructions); 8858 effect(KILL cr, TEMP dst); 8859 8860 ins_cost(125); 8861 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8862 "ANDNL $dst.hi, $src1.hi, $src2+4" 8863 %} 8864 8865 ins_encode %{ 8866 Register Rdst = $dst$$Register; 8867 Register Rsrc1 = $src1$$Register; 8868 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8869 8870 __ andnl(Rdst, Rsrc1, $src2$$Address); 8871 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8872 %} 8873 ins_pipe(ialu_reg_mem); 8874 %} 8875 8876 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8877 match(Set dst (AndL (SubL imm_zero src) src)); 8878 predicate(UseBMI1Instructions); 8879 effect(KILL cr, TEMP dst); 8880 8881 format %{ "MOVL $dst.hi, 0\n\t" 8882 "BLSIL $dst.lo, $src.lo\n\t" 8883 "JNZ done\n\t" 8884 "BLSIL $dst.hi, $src.hi\n" 8885 "done:" 8886 %} 8887 8888 ins_encode %{ 8889 Label done; 8890 Register Rdst = $dst$$Register; 8891 Register Rsrc = $src$$Register; 8892 __ movl(HIGH_FROM_LOW(Rdst), 0); 8893 __ blsil(Rdst, Rsrc); 8894 __ jccb(Assembler::notZero, done); 8895 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8896 __ bind(done); 8897 %} 8898 ins_pipe(ialu_reg); 8899 %} 8900 8901 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8902 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8903 predicate(UseBMI1Instructions); 8904 effect(KILL cr, TEMP dst); 8905 8906 ins_cost(125); 8907 format %{ "MOVL $dst.hi, 0\n\t" 8908 "BLSIL $dst.lo, $src\n\t" 8909 "JNZ done\n\t" 8910 "BLSIL $dst.hi, $src+4\n" 8911 "done:" 8912 %} 8913 8914 ins_encode %{ 8915 Label done; 8916 Register Rdst = $dst$$Register; 8917 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8918 8919 __ movl(HIGH_FROM_LOW(Rdst), 0); 8920 __ blsil(Rdst, $src$$Address); 8921 __ jccb(Assembler::notZero, done); 8922 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8923 __ bind(done); 8924 %} 8925 ins_pipe(ialu_reg_mem); 8926 %} 8927 8928 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8929 %{ 8930 match(Set dst (XorL (AddL src minus_1) src)); 8931 predicate(UseBMI1Instructions); 8932 effect(KILL cr, TEMP dst); 8933 8934 format %{ "MOVL $dst.hi, 0\n\t" 8935 "BLSMSKL $dst.lo, $src.lo\n\t" 8936 "JNC done\n\t" 8937 "BLSMSKL $dst.hi, $src.hi\n" 8938 "done:" 8939 %} 8940 8941 ins_encode %{ 8942 Label done; 8943 Register Rdst = $dst$$Register; 8944 Register Rsrc = $src$$Register; 8945 __ movl(HIGH_FROM_LOW(Rdst), 0); 8946 __ blsmskl(Rdst, Rsrc); 8947 __ jccb(Assembler::carryClear, done); 8948 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8949 __ bind(done); 8950 %} 8951 8952 ins_pipe(ialu_reg); 8953 %} 8954 8955 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8956 %{ 8957 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8958 predicate(UseBMI1Instructions); 8959 effect(KILL cr, TEMP dst); 8960 8961 ins_cost(125); 8962 format %{ "MOVL $dst.hi, 0\n\t" 8963 "BLSMSKL $dst.lo, $src\n\t" 8964 "JNC done\n\t" 8965 "BLSMSKL $dst.hi, $src+4\n" 8966 "done:" 8967 %} 8968 8969 ins_encode %{ 8970 Label done; 8971 Register Rdst = $dst$$Register; 8972 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8973 8974 __ movl(HIGH_FROM_LOW(Rdst), 0); 8975 __ blsmskl(Rdst, $src$$Address); 8976 __ jccb(Assembler::carryClear, done); 8977 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8978 __ bind(done); 8979 %} 8980 8981 ins_pipe(ialu_reg_mem); 8982 %} 8983 8984 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8985 %{ 8986 match(Set dst (AndL (AddL src minus_1) src) ); 8987 predicate(UseBMI1Instructions); 8988 effect(KILL cr, TEMP dst); 8989 8990 format %{ "MOVL $dst.hi, $src.hi\n\t" 8991 "BLSRL $dst.lo, $src.lo\n\t" 8992 "JNC done\n\t" 8993 "BLSRL $dst.hi, $src.hi\n" 8994 "done:" 8995 %} 8996 8997 ins_encode %{ 8998 Label done; 8999 Register Rdst = $dst$$Register; 9000 Register Rsrc = $src$$Register; 9001 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9002 __ blsrl(Rdst, Rsrc); 9003 __ jccb(Assembler::carryClear, done); 9004 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9005 __ bind(done); 9006 %} 9007 9008 ins_pipe(ialu_reg); 9009 %} 9010 9011 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9012 %{ 9013 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9014 predicate(UseBMI1Instructions); 9015 effect(KILL cr, TEMP dst); 9016 9017 ins_cost(125); 9018 format %{ "MOVL $dst.hi, $src+4\n\t" 9019 "BLSRL $dst.lo, $src\n\t" 9020 "JNC done\n\t" 9021 "BLSRL $dst.hi, $src+4\n" 9022 "done:" 9023 %} 9024 9025 ins_encode %{ 9026 Label done; 9027 Register Rdst = $dst$$Register; 9028 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9029 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9030 __ blsrl(Rdst, $src$$Address); 9031 __ jccb(Assembler::carryClear, done); 9032 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9033 __ bind(done); 9034 %} 9035 9036 ins_pipe(ialu_reg_mem); 9037 %} 9038 9039 // Or Long Register with Register 9040 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9041 match(Set dst (OrL dst src)); 9042 effect(KILL cr); 9043 format %{ "OR $dst.lo,$src.lo\n\t" 9044 "OR $dst.hi,$src.hi" %} 9045 opcode(0x0B,0x0B); 9046 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9047 ins_pipe( ialu_reg_reg_long ); 9048 %} 9049 9050 // Or Long Register with Immediate 9051 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9052 match(Set dst (OrL dst src)); 9053 effect(KILL cr); 9054 format %{ "OR $dst.lo,$src.lo\n\t" 9055 "OR $dst.hi,$src.hi" %} 9056 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9057 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9058 ins_pipe( ialu_reg_long ); 9059 %} 9060 9061 // Or Long Register with Memory 9062 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9063 match(Set dst (OrL dst (LoadL mem))); 9064 effect(KILL cr); 9065 ins_cost(125); 9066 format %{ "OR $dst.lo,$mem\n\t" 9067 "OR $dst.hi,$mem+4" %} 9068 opcode(0x0B,0x0B); 9069 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 9070 ins_pipe( ialu_reg_long_mem ); 9071 %} 9072 9073 // Xor Long Register with Register 9074 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9075 match(Set dst (XorL dst src)); 9076 effect(KILL cr); 9077 format %{ "XOR $dst.lo,$src.lo\n\t" 9078 "XOR $dst.hi,$src.hi" %} 9079 opcode(0x33,0x33); 9080 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9081 ins_pipe( ialu_reg_reg_long ); 9082 %} 9083 9084 // Xor Long Register with Immediate -1 9085 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9086 match(Set dst (XorL dst imm)); 9087 format %{ "NOT $dst.lo\n\t" 9088 "NOT $dst.hi" %} 9089 ins_encode %{ 9090 __ notl($dst$$Register); 9091 __ notl(HIGH_FROM_LOW($dst$$Register)); 9092 %} 9093 ins_pipe( ialu_reg_long ); 9094 %} 9095 9096 // Xor Long Register with Immediate 9097 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9098 match(Set dst (XorL dst src)); 9099 effect(KILL cr); 9100 format %{ "XOR $dst.lo,$src.lo\n\t" 9101 "XOR $dst.hi,$src.hi" %} 9102 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9103 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9104 ins_pipe( ialu_reg_long ); 9105 %} 9106 9107 // Xor Long Register with Memory 9108 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9109 match(Set dst (XorL dst (LoadL mem))); 9110 effect(KILL cr); 9111 ins_cost(125); 9112 format %{ "XOR $dst.lo,$mem\n\t" 9113 "XOR $dst.hi,$mem+4" %} 9114 opcode(0x33,0x33); 9115 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 9116 ins_pipe( ialu_reg_long_mem ); 9117 %} 9118 9119 // Shift Left Long by 1 9120 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9121 predicate(UseNewLongLShift); 9122 match(Set dst (LShiftL dst cnt)); 9123 effect(KILL cr); 9124 ins_cost(100); 9125 format %{ "ADD $dst.lo,$dst.lo\n\t" 9126 "ADC $dst.hi,$dst.hi" %} 9127 ins_encode %{ 9128 __ addl($dst$$Register,$dst$$Register); 9129 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9130 %} 9131 ins_pipe( ialu_reg_long ); 9132 %} 9133 9134 // Shift Left Long by 2 9135 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9136 predicate(UseNewLongLShift); 9137 match(Set dst (LShiftL dst cnt)); 9138 effect(KILL cr); 9139 ins_cost(100); 9140 format %{ "ADD $dst.lo,$dst.lo\n\t" 9141 "ADC $dst.hi,$dst.hi\n\t" 9142 "ADD $dst.lo,$dst.lo\n\t" 9143 "ADC $dst.hi,$dst.hi" %} 9144 ins_encode %{ 9145 __ addl($dst$$Register,$dst$$Register); 9146 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9147 __ addl($dst$$Register,$dst$$Register); 9148 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9149 %} 9150 ins_pipe( ialu_reg_long ); 9151 %} 9152 9153 // Shift Left Long by 3 9154 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9155 predicate(UseNewLongLShift); 9156 match(Set dst (LShiftL dst cnt)); 9157 effect(KILL cr); 9158 ins_cost(100); 9159 format %{ "ADD $dst.lo,$dst.lo\n\t" 9160 "ADC $dst.hi,$dst.hi\n\t" 9161 "ADD $dst.lo,$dst.lo\n\t" 9162 "ADC $dst.hi,$dst.hi\n\t" 9163 "ADD $dst.lo,$dst.lo\n\t" 9164 "ADC $dst.hi,$dst.hi" %} 9165 ins_encode %{ 9166 __ addl($dst$$Register,$dst$$Register); 9167 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9168 __ addl($dst$$Register,$dst$$Register); 9169 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9170 __ addl($dst$$Register,$dst$$Register); 9171 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9172 %} 9173 ins_pipe( ialu_reg_long ); 9174 %} 9175 9176 // Shift Left Long by 1-31 9177 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9178 match(Set dst (LShiftL dst cnt)); 9179 effect(KILL cr); 9180 ins_cost(200); 9181 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9182 "SHL $dst.lo,$cnt" %} 9183 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9184 ins_encode( move_long_small_shift(dst,cnt) ); 9185 ins_pipe( ialu_reg_long ); 9186 %} 9187 9188 // Shift Left Long by 32-63 9189 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9190 match(Set dst (LShiftL dst cnt)); 9191 effect(KILL cr); 9192 ins_cost(300); 9193 format %{ "MOV $dst.hi,$dst.lo\n" 9194 "\tSHL $dst.hi,$cnt-32\n" 9195 "\tXOR $dst.lo,$dst.lo" %} 9196 opcode(0xC1, 0x4); /* C1 /4 ib */ 9197 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9198 ins_pipe( ialu_reg_long ); 9199 %} 9200 9201 // Shift Left Long by variable 9202 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9203 match(Set dst (LShiftL dst shift)); 9204 effect(KILL cr); 9205 ins_cost(500+200); 9206 size(17); 9207 format %{ "TEST $shift,32\n\t" 9208 "JEQ,s small\n\t" 9209 "MOV $dst.hi,$dst.lo\n\t" 9210 "XOR $dst.lo,$dst.lo\n" 9211 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9212 "SHL $dst.lo,$shift" %} 9213 ins_encode( shift_left_long( dst, shift ) ); 9214 ins_pipe( pipe_slow ); 9215 %} 9216 9217 // Shift Right Long by 1-31 9218 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9219 match(Set dst (URShiftL dst cnt)); 9220 effect(KILL cr); 9221 ins_cost(200); 9222 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9223 "SHR $dst.hi,$cnt" %} 9224 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9225 ins_encode( move_long_small_shift(dst,cnt) ); 9226 ins_pipe( ialu_reg_long ); 9227 %} 9228 9229 // Shift Right Long by 32-63 9230 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9231 match(Set dst (URShiftL dst cnt)); 9232 effect(KILL cr); 9233 ins_cost(300); 9234 format %{ "MOV $dst.lo,$dst.hi\n" 9235 "\tSHR $dst.lo,$cnt-32\n" 9236 "\tXOR $dst.hi,$dst.hi" %} 9237 opcode(0xC1, 0x5); /* C1 /5 ib */ 9238 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9239 ins_pipe( ialu_reg_long ); 9240 %} 9241 9242 // Shift Right Long by variable 9243 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9244 match(Set dst (URShiftL dst shift)); 9245 effect(KILL cr); 9246 ins_cost(600); 9247 size(17); 9248 format %{ "TEST $shift,32\n\t" 9249 "JEQ,s small\n\t" 9250 "MOV $dst.lo,$dst.hi\n\t" 9251 "XOR $dst.hi,$dst.hi\n" 9252 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9253 "SHR $dst.hi,$shift" %} 9254 ins_encode( shift_right_long( dst, shift ) ); 9255 ins_pipe( pipe_slow ); 9256 %} 9257 9258 // Shift Right Long by 1-31 9259 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9260 match(Set dst (RShiftL dst cnt)); 9261 effect(KILL cr); 9262 ins_cost(200); 9263 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9264 "SAR $dst.hi,$cnt" %} 9265 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9266 ins_encode( move_long_small_shift(dst,cnt) ); 9267 ins_pipe( ialu_reg_long ); 9268 %} 9269 9270 // Shift Right Long by 32-63 9271 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9272 match(Set dst (RShiftL dst cnt)); 9273 effect(KILL cr); 9274 ins_cost(300); 9275 format %{ "MOV $dst.lo,$dst.hi\n" 9276 "\tSAR $dst.lo,$cnt-32\n" 9277 "\tSAR $dst.hi,31" %} 9278 opcode(0xC1, 0x7); /* C1 /7 ib */ 9279 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9280 ins_pipe( ialu_reg_long ); 9281 %} 9282 9283 // Shift Right arithmetic Long by variable 9284 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9285 match(Set dst (RShiftL dst shift)); 9286 effect(KILL cr); 9287 ins_cost(600); 9288 size(18); 9289 format %{ "TEST $shift,32\n\t" 9290 "JEQ,s small\n\t" 9291 "MOV $dst.lo,$dst.hi\n\t" 9292 "SAR $dst.hi,31\n" 9293 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9294 "SAR $dst.hi,$shift" %} 9295 ins_encode( shift_right_arith_long( dst, shift ) ); 9296 ins_pipe( pipe_slow ); 9297 %} 9298 9299 9300 //----------Double Instructions------------------------------------------------ 9301 // Double Math 9302 9303 // Compare & branch 9304 9305 // P6 version of float compare, sets condition codes in EFLAGS 9306 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9307 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9308 match(Set cr (CmpD src1 src2)); 9309 effect(KILL rax); 9310 ins_cost(150); 9311 format %{ "FLD $src1\n\t" 9312 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9313 "JNP exit\n\t" 9314 "MOV ah,1 // saw a NaN, set CF\n\t" 9315 "SAHF\n" 9316 "exit:\tNOP // avoid branch to branch" %} 9317 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9318 ins_encode( Push_Reg_DPR(src1), 9319 OpcP, RegOpc(src2), 9320 cmpF_P6_fixup ); 9321 ins_pipe( pipe_slow ); 9322 %} 9323 9324 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9325 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9326 match(Set cr (CmpD src1 src2)); 9327 ins_cost(150); 9328 format %{ "FLD $src1\n\t" 9329 "FUCOMIP ST,$src2 // P6 instruction" %} 9330 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9331 ins_encode( Push_Reg_DPR(src1), 9332 OpcP, RegOpc(src2)); 9333 ins_pipe( pipe_slow ); 9334 %} 9335 9336 // Compare & branch 9337 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9338 predicate(UseSSE<=1); 9339 match(Set cr (CmpD src1 src2)); 9340 effect(KILL rax); 9341 ins_cost(200); 9342 format %{ "FLD $src1\n\t" 9343 "FCOMp $src2\n\t" 9344 "FNSTSW AX\n\t" 9345 "TEST AX,0x400\n\t" 9346 "JZ,s flags\n\t" 9347 "MOV AH,1\t# unordered treat as LT\n" 9348 "flags:\tSAHF" %} 9349 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9350 ins_encode( Push_Reg_DPR(src1), 9351 OpcP, RegOpc(src2), 9352 fpu_flags); 9353 ins_pipe( pipe_slow ); 9354 %} 9355 9356 // Compare vs zero into -1,0,1 9357 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9358 predicate(UseSSE<=1); 9359 match(Set dst (CmpD3 src1 zero)); 9360 effect(KILL cr, KILL rax); 9361 ins_cost(280); 9362 format %{ "FTSTD $dst,$src1" %} 9363 opcode(0xE4, 0xD9); 9364 ins_encode( Push_Reg_DPR(src1), 9365 OpcS, OpcP, PopFPU, 9366 CmpF_Result(dst)); 9367 ins_pipe( pipe_slow ); 9368 %} 9369 9370 // Compare into -1,0,1 9371 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9372 predicate(UseSSE<=1); 9373 match(Set dst (CmpD3 src1 src2)); 9374 effect(KILL cr, KILL rax); 9375 ins_cost(300); 9376 format %{ "FCMPD $dst,$src1,$src2" %} 9377 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9378 ins_encode( Push_Reg_DPR(src1), 9379 OpcP, RegOpc(src2), 9380 CmpF_Result(dst)); 9381 ins_pipe( pipe_slow ); 9382 %} 9383 9384 // float compare and set condition codes in EFLAGS by XMM regs 9385 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9386 predicate(UseSSE>=2); 9387 match(Set cr (CmpD src1 src2)); 9388 ins_cost(145); 9389 format %{ "UCOMISD $src1,$src2\n\t" 9390 "JNP,s exit\n\t" 9391 "PUSHF\t# saw NaN, set CF\n\t" 9392 "AND [rsp], #0xffffff2b\n\t" 9393 "POPF\n" 9394 "exit:" %} 9395 ins_encode %{ 9396 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9397 emit_cmpfp_fixup(masm); 9398 %} 9399 ins_pipe( pipe_slow ); 9400 %} 9401 9402 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9403 predicate(UseSSE>=2); 9404 match(Set cr (CmpD src1 src2)); 9405 ins_cost(100); 9406 format %{ "UCOMISD $src1,$src2" %} 9407 ins_encode %{ 9408 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9409 %} 9410 ins_pipe( pipe_slow ); 9411 %} 9412 9413 // float compare and set condition codes in EFLAGS by XMM regs 9414 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9415 predicate(UseSSE>=2); 9416 match(Set cr (CmpD src1 (LoadD src2))); 9417 ins_cost(145); 9418 format %{ "UCOMISD $src1,$src2\n\t" 9419 "JNP,s exit\n\t" 9420 "PUSHF\t# saw NaN, set CF\n\t" 9421 "AND [rsp], #0xffffff2b\n\t" 9422 "POPF\n" 9423 "exit:" %} 9424 ins_encode %{ 9425 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9426 emit_cmpfp_fixup(masm); 9427 %} 9428 ins_pipe( pipe_slow ); 9429 %} 9430 9431 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9432 predicate(UseSSE>=2); 9433 match(Set cr (CmpD src1 (LoadD src2))); 9434 ins_cost(100); 9435 format %{ "UCOMISD $src1,$src2" %} 9436 ins_encode %{ 9437 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9438 %} 9439 ins_pipe( pipe_slow ); 9440 %} 9441 9442 // Compare into -1,0,1 in XMM 9443 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9444 predicate(UseSSE>=2); 9445 match(Set dst (CmpD3 src1 src2)); 9446 effect(KILL cr); 9447 ins_cost(255); 9448 format %{ "UCOMISD $src1, $src2\n\t" 9449 "MOV $dst, #-1\n\t" 9450 "JP,s done\n\t" 9451 "JB,s done\n\t" 9452 "SETNE $dst\n\t" 9453 "MOVZB $dst, $dst\n" 9454 "done:" %} 9455 ins_encode %{ 9456 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9457 emit_cmpfp3(masm, $dst$$Register); 9458 %} 9459 ins_pipe( pipe_slow ); 9460 %} 9461 9462 // Compare into -1,0,1 in XMM and memory 9463 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9464 predicate(UseSSE>=2); 9465 match(Set dst (CmpD3 src1 (LoadD src2))); 9466 effect(KILL cr); 9467 ins_cost(275); 9468 format %{ "UCOMISD $src1, $src2\n\t" 9469 "MOV $dst, #-1\n\t" 9470 "JP,s done\n\t" 9471 "JB,s done\n\t" 9472 "SETNE $dst\n\t" 9473 "MOVZB $dst, $dst\n" 9474 "done:" %} 9475 ins_encode %{ 9476 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9477 emit_cmpfp3(masm, $dst$$Register); 9478 %} 9479 ins_pipe( pipe_slow ); 9480 %} 9481 9482 9483 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9484 predicate (UseSSE <=1); 9485 match(Set dst (SubD dst src)); 9486 9487 format %{ "FLD $src\n\t" 9488 "DSUBp $dst,ST" %} 9489 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9490 ins_cost(150); 9491 ins_encode( Push_Reg_DPR(src), 9492 OpcP, RegOpc(dst) ); 9493 ins_pipe( fpu_reg_reg ); 9494 %} 9495 9496 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9497 predicate (UseSSE <=1); 9498 match(Set dst (RoundDouble (SubD src1 src2))); 9499 ins_cost(250); 9500 9501 format %{ "FLD $src2\n\t" 9502 "DSUB ST,$src1\n\t" 9503 "FSTP_D $dst\t# D-round" %} 9504 opcode(0xD8, 0x5); 9505 ins_encode( Push_Reg_DPR(src2), 9506 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9507 ins_pipe( fpu_mem_reg_reg ); 9508 %} 9509 9510 9511 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9512 predicate (UseSSE <=1); 9513 match(Set dst (SubD dst (LoadD src))); 9514 ins_cost(150); 9515 9516 format %{ "FLD $src\n\t" 9517 "DSUBp $dst,ST" %} 9518 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9519 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9520 OpcP, RegOpc(dst), ClearInstMark ); 9521 ins_pipe( fpu_reg_mem ); 9522 %} 9523 9524 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9525 predicate (UseSSE<=1); 9526 match(Set dst (AbsD src)); 9527 ins_cost(100); 9528 format %{ "FABS" %} 9529 opcode(0xE1, 0xD9); 9530 ins_encode( OpcS, OpcP ); 9531 ins_pipe( fpu_reg_reg ); 9532 %} 9533 9534 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9535 predicate(UseSSE<=1); 9536 match(Set dst (NegD src)); 9537 ins_cost(100); 9538 format %{ "FCHS" %} 9539 opcode(0xE0, 0xD9); 9540 ins_encode( OpcS, OpcP ); 9541 ins_pipe( fpu_reg_reg ); 9542 %} 9543 9544 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9545 predicate(UseSSE<=1); 9546 match(Set dst (AddD dst src)); 9547 format %{ "FLD $src\n\t" 9548 "DADD $dst,ST" %} 9549 size(4); 9550 ins_cost(150); 9551 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9552 ins_encode( Push_Reg_DPR(src), 9553 OpcP, RegOpc(dst) ); 9554 ins_pipe( fpu_reg_reg ); 9555 %} 9556 9557 9558 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9559 predicate(UseSSE<=1); 9560 match(Set dst (RoundDouble (AddD src1 src2))); 9561 ins_cost(250); 9562 9563 format %{ "FLD $src2\n\t" 9564 "DADD ST,$src1\n\t" 9565 "FSTP_D $dst\t# D-round" %} 9566 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9567 ins_encode( Push_Reg_DPR(src2), 9568 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9569 ins_pipe( fpu_mem_reg_reg ); 9570 %} 9571 9572 9573 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9574 predicate(UseSSE<=1); 9575 match(Set dst (AddD dst (LoadD src))); 9576 ins_cost(150); 9577 9578 format %{ "FLD $src\n\t" 9579 "DADDp $dst,ST" %} 9580 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9581 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9582 OpcP, RegOpc(dst), ClearInstMark ); 9583 ins_pipe( fpu_reg_mem ); 9584 %} 9585 9586 // add-to-memory 9587 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9588 predicate(UseSSE<=1); 9589 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9590 ins_cost(150); 9591 9592 format %{ "FLD_D $dst\n\t" 9593 "DADD ST,$src\n\t" 9594 "FST_D $dst" %} 9595 opcode(0xDD, 0x0); 9596 ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst), 9597 Opcode(0xD8), RegOpc(src), ClearInstMark, 9598 SetInstMark, 9599 Opcode(0xDD), RMopc_Mem(0x03,dst), 9600 ClearInstMark); 9601 ins_pipe( fpu_reg_mem ); 9602 %} 9603 9604 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9605 predicate(UseSSE<=1); 9606 match(Set dst (AddD dst con)); 9607 ins_cost(125); 9608 format %{ "FLD1\n\t" 9609 "DADDp $dst,ST" %} 9610 ins_encode %{ 9611 __ fld1(); 9612 __ faddp($dst$$reg); 9613 %} 9614 ins_pipe(fpu_reg); 9615 %} 9616 9617 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9618 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9619 match(Set dst (AddD dst con)); 9620 ins_cost(200); 9621 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9622 "DADDp $dst,ST" %} 9623 ins_encode %{ 9624 __ fld_d($constantaddress($con)); 9625 __ faddp($dst$$reg); 9626 %} 9627 ins_pipe(fpu_reg_mem); 9628 %} 9629 9630 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9631 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9632 match(Set dst (RoundDouble (AddD src con))); 9633 ins_cost(200); 9634 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9635 "DADD ST,$src\n\t" 9636 "FSTP_D $dst\t# D-round" %} 9637 ins_encode %{ 9638 __ fld_d($constantaddress($con)); 9639 __ fadd($src$$reg); 9640 __ fstp_d(Address(rsp, $dst$$disp)); 9641 %} 9642 ins_pipe(fpu_mem_reg_con); 9643 %} 9644 9645 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9646 predicate(UseSSE<=1); 9647 match(Set dst (MulD dst src)); 9648 format %{ "FLD $src\n\t" 9649 "DMULp $dst,ST" %} 9650 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9651 ins_cost(150); 9652 ins_encode( Push_Reg_DPR(src), 9653 OpcP, RegOpc(dst) ); 9654 ins_pipe( fpu_reg_reg ); 9655 %} 9656 9657 // Strict FP instruction biases argument before multiply then 9658 // biases result to avoid double rounding of subnormals. 9659 // 9660 // scale arg1 by multiplying arg1 by 2^(-15360) 9661 // load arg2 9662 // multiply scaled arg1 by arg2 9663 // rescale product by 2^(15360) 9664 // 9665 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9666 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9667 match(Set dst (MulD dst src)); 9668 ins_cost(1); // Select this instruction for all FP double multiplies 9669 9670 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9671 "DMULp $dst,ST\n\t" 9672 "FLD $src\n\t" 9673 "DMULp $dst,ST\n\t" 9674 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9675 "DMULp $dst,ST\n\t" %} 9676 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9677 ins_encode( strictfp_bias1(dst), 9678 Push_Reg_DPR(src), 9679 OpcP, RegOpc(dst), 9680 strictfp_bias2(dst) ); 9681 ins_pipe( fpu_reg_reg ); 9682 %} 9683 9684 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9685 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9686 match(Set dst (MulD dst con)); 9687 ins_cost(200); 9688 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9689 "DMULp $dst,ST" %} 9690 ins_encode %{ 9691 __ fld_d($constantaddress($con)); 9692 __ fmulp($dst$$reg); 9693 %} 9694 ins_pipe(fpu_reg_mem); 9695 %} 9696 9697 9698 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9699 predicate( UseSSE<=1 ); 9700 match(Set dst (MulD dst (LoadD src))); 9701 ins_cost(200); 9702 format %{ "FLD_D $src\n\t" 9703 "DMULp $dst,ST" %} 9704 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9705 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9706 OpcP, RegOpc(dst), ClearInstMark ); 9707 ins_pipe( fpu_reg_mem ); 9708 %} 9709 9710 // 9711 // Cisc-alternate to reg-reg multiply 9712 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9713 predicate( UseSSE<=1 ); 9714 match(Set dst (MulD src (LoadD mem))); 9715 ins_cost(250); 9716 format %{ "FLD_D $mem\n\t" 9717 "DMUL ST,$src\n\t" 9718 "FSTP_D $dst" %} 9719 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9720 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem), 9721 OpcReg_FPR(src), 9722 Pop_Reg_DPR(dst), ClearInstMark ); 9723 ins_pipe( fpu_reg_reg_mem ); 9724 %} 9725 9726 9727 // MACRO3 -- addDPR a mulDPR 9728 // This instruction is a '2-address' instruction in that the result goes 9729 // back to src2. This eliminates a move from the macro; possibly the 9730 // register allocator will have to add it back (and maybe not). 9731 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9732 predicate( UseSSE<=1 ); 9733 match(Set src2 (AddD (MulD src0 src1) src2)); 9734 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9735 "DMUL ST,$src1\n\t" 9736 "DADDp $src2,ST" %} 9737 ins_cost(250); 9738 opcode(0xDD); /* LoadD DD /0 */ 9739 ins_encode( Push_Reg_FPR(src0), 9740 FMul_ST_reg(src1), 9741 FAddP_reg_ST(src2) ); 9742 ins_pipe( fpu_reg_reg_reg ); 9743 %} 9744 9745 9746 // MACRO3 -- subDPR a mulDPR 9747 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9748 predicate( UseSSE<=1 ); 9749 match(Set src2 (SubD (MulD src0 src1) src2)); 9750 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9751 "DMUL ST,$src1\n\t" 9752 "DSUBRp $src2,ST" %} 9753 ins_cost(250); 9754 ins_encode( Push_Reg_FPR(src0), 9755 FMul_ST_reg(src1), 9756 Opcode(0xDE), Opc_plus(0xE0,src2)); 9757 ins_pipe( fpu_reg_reg_reg ); 9758 %} 9759 9760 9761 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9762 predicate( UseSSE<=1 ); 9763 match(Set dst (DivD dst src)); 9764 9765 format %{ "FLD $src\n\t" 9766 "FDIVp $dst,ST" %} 9767 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9768 ins_cost(150); 9769 ins_encode( Push_Reg_DPR(src), 9770 OpcP, RegOpc(dst) ); 9771 ins_pipe( fpu_reg_reg ); 9772 %} 9773 9774 // Strict FP instruction biases argument before division then 9775 // biases result, to avoid double rounding of subnormals. 9776 // 9777 // scale dividend by multiplying dividend by 2^(-15360) 9778 // load divisor 9779 // divide scaled dividend by divisor 9780 // rescale quotient by 2^(15360) 9781 // 9782 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9783 predicate (UseSSE<=1); 9784 match(Set dst (DivD dst src)); 9785 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9786 ins_cost(01); 9787 9788 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9789 "DMULp $dst,ST\n\t" 9790 "FLD $src\n\t" 9791 "FDIVp $dst,ST\n\t" 9792 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9793 "DMULp $dst,ST\n\t" %} 9794 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9795 ins_encode( strictfp_bias1(dst), 9796 Push_Reg_DPR(src), 9797 OpcP, RegOpc(dst), 9798 strictfp_bias2(dst) ); 9799 ins_pipe( fpu_reg_reg ); 9800 %} 9801 9802 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9803 predicate(UseSSE<=1); 9804 match(Set dst (ModD dst src)); 9805 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9806 9807 format %{ "DMOD $dst,$src" %} 9808 ins_cost(250); 9809 ins_encode(Push_Reg_Mod_DPR(dst, src), 9810 emitModDPR(), 9811 Push_Result_Mod_DPR(src), 9812 Pop_Reg_DPR(dst)); 9813 ins_pipe( pipe_slow ); 9814 %} 9815 9816 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9817 predicate(UseSSE>=2); 9818 match(Set dst (ModD src0 src1)); 9819 effect(KILL rax, KILL cr); 9820 9821 format %{ "SUB ESP,8\t # DMOD\n" 9822 "\tMOVSD [ESP+0],$src1\n" 9823 "\tFLD_D [ESP+0]\n" 9824 "\tMOVSD [ESP+0],$src0\n" 9825 "\tFLD_D [ESP+0]\n" 9826 "loop:\tFPREM\n" 9827 "\tFWAIT\n" 9828 "\tFNSTSW AX\n" 9829 "\tSAHF\n" 9830 "\tJP loop\n" 9831 "\tFSTP_D [ESP+0]\n" 9832 "\tMOVSD $dst,[ESP+0]\n" 9833 "\tADD ESP,8\n" 9834 "\tFSTP ST0\t # Restore FPU Stack" 9835 %} 9836 ins_cost(250); 9837 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9838 ins_pipe( pipe_slow ); 9839 %} 9840 9841 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9842 predicate (UseSSE<=1); 9843 match(Set dst(AtanD dst src)); 9844 format %{ "DATA $dst,$src" %} 9845 opcode(0xD9, 0xF3); 9846 ins_encode( Push_Reg_DPR(src), 9847 OpcP, OpcS, RegOpc(dst) ); 9848 ins_pipe( pipe_slow ); 9849 %} 9850 9851 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9852 predicate (UseSSE>=2); 9853 match(Set dst(AtanD dst src)); 9854 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9855 format %{ "DATA $dst,$src" %} 9856 opcode(0xD9, 0xF3); 9857 ins_encode( Push_SrcD(src), 9858 OpcP, OpcS, Push_ResultD(dst) ); 9859 ins_pipe( pipe_slow ); 9860 %} 9861 9862 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9863 predicate (UseSSE<=1); 9864 match(Set dst (SqrtD src)); 9865 format %{ "DSQRT $dst,$src" %} 9866 opcode(0xFA, 0xD9); 9867 ins_encode( Push_Reg_DPR(src), 9868 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9869 ins_pipe( pipe_slow ); 9870 %} 9871 9872 //-------------Float Instructions------------------------------- 9873 // Float Math 9874 9875 // Code for float compare: 9876 // fcompp(); 9877 // fwait(); fnstsw_ax(); 9878 // sahf(); 9879 // movl(dst, unordered_result); 9880 // jcc(Assembler::parity, exit); 9881 // movl(dst, less_result); 9882 // jcc(Assembler::below, exit); 9883 // movl(dst, equal_result); 9884 // jcc(Assembler::equal, exit); 9885 // movl(dst, greater_result); 9886 // exit: 9887 9888 // P6 version of float compare, sets condition codes in EFLAGS 9889 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9890 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9891 match(Set cr (CmpF src1 src2)); 9892 effect(KILL rax); 9893 ins_cost(150); 9894 format %{ "FLD $src1\n\t" 9895 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9896 "JNP exit\n\t" 9897 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9898 "SAHF\n" 9899 "exit:\tNOP // avoid branch to branch" %} 9900 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9901 ins_encode( Push_Reg_DPR(src1), 9902 OpcP, RegOpc(src2), 9903 cmpF_P6_fixup ); 9904 ins_pipe( pipe_slow ); 9905 %} 9906 9907 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9908 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9909 match(Set cr (CmpF src1 src2)); 9910 ins_cost(100); 9911 format %{ "FLD $src1\n\t" 9912 "FUCOMIP ST,$src2 // P6 instruction" %} 9913 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9914 ins_encode( Push_Reg_DPR(src1), 9915 OpcP, RegOpc(src2)); 9916 ins_pipe( pipe_slow ); 9917 %} 9918 9919 9920 // Compare & branch 9921 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9922 predicate(UseSSE == 0); 9923 match(Set cr (CmpF src1 src2)); 9924 effect(KILL rax); 9925 ins_cost(200); 9926 format %{ "FLD $src1\n\t" 9927 "FCOMp $src2\n\t" 9928 "FNSTSW AX\n\t" 9929 "TEST AX,0x400\n\t" 9930 "JZ,s flags\n\t" 9931 "MOV AH,1\t# unordered treat as LT\n" 9932 "flags:\tSAHF" %} 9933 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9934 ins_encode( Push_Reg_DPR(src1), 9935 OpcP, RegOpc(src2), 9936 fpu_flags); 9937 ins_pipe( pipe_slow ); 9938 %} 9939 9940 // Compare vs zero into -1,0,1 9941 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9942 predicate(UseSSE == 0); 9943 match(Set dst (CmpF3 src1 zero)); 9944 effect(KILL cr, KILL rax); 9945 ins_cost(280); 9946 format %{ "FTSTF $dst,$src1" %} 9947 opcode(0xE4, 0xD9); 9948 ins_encode( Push_Reg_DPR(src1), 9949 OpcS, OpcP, PopFPU, 9950 CmpF_Result(dst)); 9951 ins_pipe( pipe_slow ); 9952 %} 9953 9954 // Compare into -1,0,1 9955 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9956 predicate(UseSSE == 0); 9957 match(Set dst (CmpF3 src1 src2)); 9958 effect(KILL cr, KILL rax); 9959 ins_cost(300); 9960 format %{ "FCMPF $dst,$src1,$src2" %} 9961 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9962 ins_encode( Push_Reg_DPR(src1), 9963 OpcP, RegOpc(src2), 9964 CmpF_Result(dst)); 9965 ins_pipe( pipe_slow ); 9966 %} 9967 9968 // float compare and set condition codes in EFLAGS by XMM regs 9969 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 9970 predicate(UseSSE>=1); 9971 match(Set cr (CmpF src1 src2)); 9972 ins_cost(145); 9973 format %{ "UCOMISS $src1,$src2\n\t" 9974 "JNP,s exit\n\t" 9975 "PUSHF\t# saw NaN, set CF\n\t" 9976 "AND [rsp], #0xffffff2b\n\t" 9977 "POPF\n" 9978 "exit:" %} 9979 ins_encode %{ 9980 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9981 emit_cmpfp_fixup(masm); 9982 %} 9983 ins_pipe( pipe_slow ); 9984 %} 9985 9986 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 9987 predicate(UseSSE>=1); 9988 match(Set cr (CmpF src1 src2)); 9989 ins_cost(100); 9990 format %{ "UCOMISS $src1,$src2" %} 9991 ins_encode %{ 9992 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9993 %} 9994 ins_pipe( pipe_slow ); 9995 %} 9996 9997 // float compare and set condition codes in EFLAGS by XMM regs 9998 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 9999 predicate(UseSSE>=1); 10000 match(Set cr (CmpF src1 (LoadF src2))); 10001 ins_cost(165); 10002 format %{ "UCOMISS $src1,$src2\n\t" 10003 "JNP,s exit\n\t" 10004 "PUSHF\t# saw NaN, set CF\n\t" 10005 "AND [rsp], #0xffffff2b\n\t" 10006 "POPF\n" 10007 "exit:" %} 10008 ins_encode %{ 10009 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10010 emit_cmpfp_fixup(masm); 10011 %} 10012 ins_pipe( pipe_slow ); 10013 %} 10014 10015 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10016 predicate(UseSSE>=1); 10017 match(Set cr (CmpF src1 (LoadF src2))); 10018 ins_cost(100); 10019 format %{ "UCOMISS $src1,$src2" %} 10020 ins_encode %{ 10021 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10022 %} 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 // Compare into -1,0,1 in XMM 10027 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10028 predicate(UseSSE>=1); 10029 match(Set dst (CmpF3 src1 src2)); 10030 effect(KILL cr); 10031 ins_cost(255); 10032 format %{ "UCOMISS $src1, $src2\n\t" 10033 "MOV $dst, #-1\n\t" 10034 "JP,s done\n\t" 10035 "JB,s done\n\t" 10036 "SETNE $dst\n\t" 10037 "MOVZB $dst, $dst\n" 10038 "done:" %} 10039 ins_encode %{ 10040 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10041 emit_cmpfp3(masm, $dst$$Register); 10042 %} 10043 ins_pipe( pipe_slow ); 10044 %} 10045 10046 // Compare into -1,0,1 in XMM and memory 10047 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10048 predicate(UseSSE>=1); 10049 match(Set dst (CmpF3 src1 (LoadF src2))); 10050 effect(KILL cr); 10051 ins_cost(275); 10052 format %{ "UCOMISS $src1, $src2\n\t" 10053 "MOV $dst, #-1\n\t" 10054 "JP,s done\n\t" 10055 "JB,s done\n\t" 10056 "SETNE $dst\n\t" 10057 "MOVZB $dst, $dst\n" 10058 "done:" %} 10059 ins_encode %{ 10060 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10061 emit_cmpfp3(masm, $dst$$Register); 10062 %} 10063 ins_pipe( pipe_slow ); 10064 %} 10065 10066 // Spill to obtain 24-bit precision 10067 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10068 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10069 match(Set dst (SubF src1 src2)); 10070 10071 format %{ "FSUB $dst,$src1 - $src2" %} 10072 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10073 ins_encode( Push_Reg_FPR(src1), 10074 OpcReg_FPR(src2), 10075 Pop_Mem_FPR(dst) ); 10076 ins_pipe( fpu_mem_reg_reg ); 10077 %} 10078 // 10079 // This instruction does not round to 24-bits 10080 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10081 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10082 match(Set dst (SubF dst src)); 10083 10084 format %{ "FSUB $dst,$src" %} 10085 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10086 ins_encode( Push_Reg_FPR(src), 10087 OpcP, RegOpc(dst) ); 10088 ins_pipe( fpu_reg_reg ); 10089 %} 10090 10091 // Spill to obtain 24-bit precision 10092 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10093 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10094 match(Set dst (AddF src1 src2)); 10095 10096 format %{ "FADD $dst,$src1,$src2" %} 10097 opcode(0xD8, 0x0); /* D8 C0+i */ 10098 ins_encode( Push_Reg_FPR(src2), 10099 OpcReg_FPR(src1), 10100 Pop_Mem_FPR(dst) ); 10101 ins_pipe( fpu_mem_reg_reg ); 10102 %} 10103 // 10104 // This instruction does not round to 24-bits 10105 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10106 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10107 match(Set dst (AddF dst src)); 10108 10109 format %{ "FLD $src\n\t" 10110 "FADDp $dst,ST" %} 10111 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10112 ins_encode( Push_Reg_FPR(src), 10113 OpcP, RegOpc(dst) ); 10114 ins_pipe( fpu_reg_reg ); 10115 %} 10116 10117 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10118 predicate(UseSSE==0); 10119 match(Set dst (AbsF src)); 10120 ins_cost(100); 10121 format %{ "FABS" %} 10122 opcode(0xE1, 0xD9); 10123 ins_encode( OpcS, OpcP ); 10124 ins_pipe( fpu_reg_reg ); 10125 %} 10126 10127 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10128 predicate(UseSSE==0); 10129 match(Set dst (NegF src)); 10130 ins_cost(100); 10131 format %{ "FCHS" %} 10132 opcode(0xE0, 0xD9); 10133 ins_encode( OpcS, OpcP ); 10134 ins_pipe( fpu_reg_reg ); 10135 %} 10136 10137 // Cisc-alternate to addFPR_reg 10138 // Spill to obtain 24-bit precision 10139 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10140 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10141 match(Set dst (AddF src1 (LoadF src2))); 10142 10143 format %{ "FLD $src2\n\t" 10144 "FADD ST,$src1\n\t" 10145 "FSTP_S $dst" %} 10146 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10147 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10148 OpcReg_FPR(src1), 10149 Pop_Mem_FPR(dst), ClearInstMark ); 10150 ins_pipe( fpu_mem_reg_mem ); 10151 %} 10152 // 10153 // Cisc-alternate to addFPR_reg 10154 // This instruction does not round to 24-bits 10155 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10156 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10157 match(Set dst (AddF dst (LoadF src))); 10158 10159 format %{ "FADD $dst,$src" %} 10160 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10161 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 10162 OpcP, RegOpc(dst), ClearInstMark ); 10163 ins_pipe( fpu_reg_mem ); 10164 %} 10165 10166 // // Following two instructions for _222_mpegaudio 10167 // Spill to obtain 24-bit precision 10168 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10169 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10170 match(Set dst (AddF src1 src2)); 10171 10172 format %{ "FADD $dst,$src1,$src2" %} 10173 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10174 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1), 10175 OpcReg_FPR(src2), 10176 Pop_Mem_FPR(dst), ClearInstMark ); 10177 ins_pipe( fpu_mem_reg_mem ); 10178 %} 10179 10180 // Cisc-spill variant 10181 // Spill to obtain 24-bit precision 10182 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10183 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10184 match(Set dst (AddF src1 (LoadF src2))); 10185 10186 format %{ "FADD $dst,$src1,$src2 cisc" %} 10187 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10188 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10189 OpcP, RMopc_Mem(secondary,src1), 10190 Pop_Mem_FPR(dst), 10191 ClearInstMark); 10192 ins_pipe( fpu_mem_mem_mem ); 10193 %} 10194 10195 // Spill to obtain 24-bit precision 10196 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10197 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10198 match(Set dst (AddF src1 src2)); 10199 10200 format %{ "FADD $dst,$src1,$src2" %} 10201 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10202 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10203 OpcP, RMopc_Mem(secondary,src1), 10204 Pop_Mem_FPR(dst), 10205 ClearInstMark); 10206 ins_pipe( fpu_mem_mem_mem ); 10207 %} 10208 10209 10210 // Spill to obtain 24-bit precision 10211 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10212 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10213 match(Set dst (AddF src con)); 10214 format %{ "FLD $src\n\t" 10215 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10216 "FSTP_S $dst" %} 10217 ins_encode %{ 10218 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10219 __ fadd_s($constantaddress($con)); 10220 __ fstp_s(Address(rsp, $dst$$disp)); 10221 %} 10222 ins_pipe(fpu_mem_reg_con); 10223 %} 10224 // 10225 // This instruction does not round to 24-bits 10226 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10227 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10228 match(Set dst (AddF src con)); 10229 format %{ "FLD $src\n\t" 10230 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10231 "FSTP $dst" %} 10232 ins_encode %{ 10233 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10234 __ fadd_s($constantaddress($con)); 10235 __ fstp_d($dst$$reg); 10236 %} 10237 ins_pipe(fpu_reg_reg_con); 10238 %} 10239 10240 // Spill to obtain 24-bit precision 10241 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10242 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10243 match(Set dst (MulF src1 src2)); 10244 10245 format %{ "FLD $src1\n\t" 10246 "FMUL $src2\n\t" 10247 "FSTP_S $dst" %} 10248 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10249 ins_encode( Push_Reg_FPR(src1), 10250 OpcReg_FPR(src2), 10251 Pop_Mem_FPR(dst) ); 10252 ins_pipe( fpu_mem_reg_reg ); 10253 %} 10254 // 10255 // This instruction does not round to 24-bits 10256 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10257 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10258 match(Set dst (MulF src1 src2)); 10259 10260 format %{ "FLD $src1\n\t" 10261 "FMUL $src2\n\t" 10262 "FSTP_S $dst" %} 10263 opcode(0xD8, 0x1); /* D8 C8+i */ 10264 ins_encode( Push_Reg_FPR(src2), 10265 OpcReg_FPR(src1), 10266 Pop_Reg_FPR(dst) ); 10267 ins_pipe( fpu_reg_reg_reg ); 10268 %} 10269 10270 10271 // Spill to obtain 24-bit precision 10272 // Cisc-alternate to reg-reg multiply 10273 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10274 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10275 match(Set dst (MulF src1 (LoadF src2))); 10276 10277 format %{ "FLD_S $src2\n\t" 10278 "FMUL $src1\n\t" 10279 "FSTP_S $dst" %} 10280 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10281 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10282 OpcReg_FPR(src1), 10283 Pop_Mem_FPR(dst), ClearInstMark ); 10284 ins_pipe( fpu_mem_reg_mem ); 10285 %} 10286 // 10287 // This instruction does not round to 24-bits 10288 // Cisc-alternate to reg-reg multiply 10289 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10290 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10291 match(Set dst (MulF src1 (LoadF src2))); 10292 10293 format %{ "FMUL $dst,$src1,$src2" %} 10294 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10295 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10296 OpcReg_FPR(src1), 10297 Pop_Reg_FPR(dst), ClearInstMark ); 10298 ins_pipe( fpu_reg_reg_mem ); 10299 %} 10300 10301 // Spill to obtain 24-bit precision 10302 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10303 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10304 match(Set dst (MulF src1 src2)); 10305 10306 format %{ "FMUL $dst,$src1,$src2" %} 10307 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10308 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10309 OpcP, RMopc_Mem(secondary,src1), 10310 Pop_Mem_FPR(dst), 10311 ClearInstMark ); 10312 ins_pipe( fpu_mem_mem_mem ); 10313 %} 10314 10315 // Spill to obtain 24-bit precision 10316 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10317 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10318 match(Set dst (MulF src con)); 10319 10320 format %{ "FLD $src\n\t" 10321 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10322 "FSTP_S $dst" %} 10323 ins_encode %{ 10324 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10325 __ fmul_s($constantaddress($con)); 10326 __ fstp_s(Address(rsp, $dst$$disp)); 10327 %} 10328 ins_pipe(fpu_mem_reg_con); 10329 %} 10330 // 10331 // This instruction does not round to 24-bits 10332 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10333 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10334 match(Set dst (MulF src con)); 10335 10336 format %{ "FLD $src\n\t" 10337 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10338 "FSTP $dst" %} 10339 ins_encode %{ 10340 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10341 __ fmul_s($constantaddress($con)); 10342 __ fstp_d($dst$$reg); 10343 %} 10344 ins_pipe(fpu_reg_reg_con); 10345 %} 10346 10347 10348 // 10349 // MACRO1 -- subsume unshared load into mulFPR 10350 // This instruction does not round to 24-bits 10351 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10352 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10353 match(Set dst (MulF (LoadF mem1) src)); 10354 10355 format %{ "FLD $mem1 ===MACRO1===\n\t" 10356 "FMUL ST,$src\n\t" 10357 "FSTP $dst" %} 10358 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10359 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1), 10360 OpcReg_FPR(src), 10361 Pop_Reg_FPR(dst), ClearInstMark ); 10362 ins_pipe( fpu_reg_reg_mem ); 10363 %} 10364 // 10365 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10366 // This instruction does not round to 24-bits 10367 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10368 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10369 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10370 ins_cost(95); 10371 10372 format %{ "FLD $mem1 ===MACRO2===\n\t" 10373 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10374 "FADD ST,$src2\n\t" 10375 "FSTP $dst" %} 10376 opcode(0xD9); /* LoadF D9 /0 */ 10377 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1), 10378 FMul_ST_reg(src1), 10379 FAdd_ST_reg(src2), 10380 Pop_Reg_FPR(dst), ClearInstMark ); 10381 ins_pipe( fpu_reg_mem_reg_reg ); 10382 %} 10383 10384 // MACRO3 -- addFPR a mulFPR 10385 // This instruction does not round to 24-bits. It is a '2-address' 10386 // instruction in that the result goes back to src2. This eliminates 10387 // a move from the macro; possibly the register allocator will have 10388 // to add it back (and maybe not). 10389 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10390 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10391 match(Set src2 (AddF (MulF src0 src1) src2)); 10392 10393 format %{ "FLD $src0 ===MACRO3===\n\t" 10394 "FMUL ST,$src1\n\t" 10395 "FADDP $src2,ST" %} 10396 opcode(0xD9); /* LoadF D9 /0 */ 10397 ins_encode( Push_Reg_FPR(src0), 10398 FMul_ST_reg(src1), 10399 FAddP_reg_ST(src2) ); 10400 ins_pipe( fpu_reg_reg_reg ); 10401 %} 10402 10403 // MACRO4 -- divFPR subFPR 10404 // This instruction does not round to 24-bits 10405 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10406 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10407 match(Set dst (DivF (SubF src2 src1) src3)); 10408 10409 format %{ "FLD $src2 ===MACRO4===\n\t" 10410 "FSUB ST,$src1\n\t" 10411 "FDIV ST,$src3\n\t" 10412 "FSTP $dst" %} 10413 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10414 ins_encode( Push_Reg_FPR(src2), 10415 subFPR_divFPR_encode(src1,src3), 10416 Pop_Reg_FPR(dst) ); 10417 ins_pipe( fpu_reg_reg_reg_reg ); 10418 %} 10419 10420 // Spill to obtain 24-bit precision 10421 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10422 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10423 match(Set dst (DivF src1 src2)); 10424 10425 format %{ "FDIV $dst,$src1,$src2" %} 10426 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10427 ins_encode( Push_Reg_FPR(src1), 10428 OpcReg_FPR(src2), 10429 Pop_Mem_FPR(dst) ); 10430 ins_pipe( fpu_mem_reg_reg ); 10431 %} 10432 // 10433 // This instruction does not round to 24-bits 10434 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10435 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10436 match(Set dst (DivF dst src)); 10437 10438 format %{ "FDIV $dst,$src" %} 10439 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10440 ins_encode( Push_Reg_FPR(src), 10441 OpcP, RegOpc(dst) ); 10442 ins_pipe( fpu_reg_reg ); 10443 %} 10444 10445 10446 // Spill to obtain 24-bit precision 10447 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10448 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10449 match(Set dst (ModF src1 src2)); 10450 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10451 10452 format %{ "FMOD $dst,$src1,$src2" %} 10453 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10454 emitModDPR(), 10455 Push_Result_Mod_DPR(src2), 10456 Pop_Mem_FPR(dst)); 10457 ins_pipe( pipe_slow ); 10458 %} 10459 // 10460 // This instruction does not round to 24-bits 10461 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10462 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10463 match(Set dst (ModF dst src)); 10464 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10465 10466 format %{ "FMOD $dst,$src" %} 10467 ins_encode(Push_Reg_Mod_DPR(dst, src), 10468 emitModDPR(), 10469 Push_Result_Mod_DPR(src), 10470 Pop_Reg_FPR(dst)); 10471 ins_pipe( pipe_slow ); 10472 %} 10473 10474 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10475 predicate(UseSSE>=1); 10476 match(Set dst (ModF src0 src1)); 10477 effect(KILL rax, KILL cr); 10478 format %{ "SUB ESP,4\t # FMOD\n" 10479 "\tMOVSS [ESP+0],$src1\n" 10480 "\tFLD_S [ESP+0]\n" 10481 "\tMOVSS [ESP+0],$src0\n" 10482 "\tFLD_S [ESP+0]\n" 10483 "loop:\tFPREM\n" 10484 "\tFWAIT\n" 10485 "\tFNSTSW AX\n" 10486 "\tSAHF\n" 10487 "\tJP loop\n" 10488 "\tFSTP_S [ESP+0]\n" 10489 "\tMOVSS $dst,[ESP+0]\n" 10490 "\tADD ESP,4\n" 10491 "\tFSTP ST0\t # Restore FPU Stack" 10492 %} 10493 ins_cost(250); 10494 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10495 ins_pipe( pipe_slow ); 10496 %} 10497 10498 10499 //----------Arithmetic Conversion Instructions--------------------------------- 10500 // The conversions operations are all Alpha sorted. Please keep it that way! 10501 10502 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10503 predicate(UseSSE==0); 10504 match(Set dst (RoundFloat src)); 10505 ins_cost(125); 10506 format %{ "FST_S $dst,$src\t# F-round" %} 10507 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10508 ins_pipe( fpu_mem_reg ); 10509 %} 10510 10511 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10512 predicate(UseSSE<=1); 10513 match(Set dst (RoundDouble src)); 10514 ins_cost(125); 10515 format %{ "FST_D $dst,$src\t# D-round" %} 10516 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10517 ins_pipe( fpu_mem_reg ); 10518 %} 10519 10520 // Force rounding to 24-bit precision and 6-bit exponent 10521 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10522 predicate(UseSSE==0); 10523 match(Set dst (ConvD2F src)); 10524 format %{ "FST_S $dst,$src\t# F-round" %} 10525 expand %{ 10526 roundFloat_mem_reg(dst,src); 10527 %} 10528 %} 10529 10530 // Force rounding to 24-bit precision and 6-bit exponent 10531 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10532 predicate(UseSSE==1); 10533 match(Set dst (ConvD2F src)); 10534 effect( KILL cr ); 10535 format %{ "SUB ESP,4\n\t" 10536 "FST_S [ESP],$src\t# F-round\n\t" 10537 "MOVSS $dst,[ESP]\n\t" 10538 "ADD ESP,4" %} 10539 ins_encode %{ 10540 __ subptr(rsp, 4); 10541 if ($src$$reg != FPR1L_enc) { 10542 __ fld_s($src$$reg-1); 10543 __ fstp_s(Address(rsp, 0)); 10544 } else { 10545 __ fst_s(Address(rsp, 0)); 10546 } 10547 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10548 __ addptr(rsp, 4); 10549 %} 10550 ins_pipe( pipe_slow ); 10551 %} 10552 10553 // Force rounding double precision to single precision 10554 instruct convD2F_reg(regF dst, regD src) %{ 10555 predicate(UseSSE>=2); 10556 match(Set dst (ConvD2F src)); 10557 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10558 ins_encode %{ 10559 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10560 %} 10561 ins_pipe( pipe_slow ); 10562 %} 10563 10564 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10565 predicate(UseSSE==0); 10566 match(Set dst (ConvF2D src)); 10567 format %{ "FST_S $dst,$src\t# D-round" %} 10568 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10569 ins_pipe( fpu_reg_reg ); 10570 %} 10571 10572 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10573 predicate(UseSSE==1); 10574 match(Set dst (ConvF2D src)); 10575 format %{ "FST_D $dst,$src\t# D-round" %} 10576 expand %{ 10577 roundDouble_mem_reg(dst,src); 10578 %} 10579 %} 10580 10581 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10582 predicate(UseSSE==1); 10583 match(Set dst (ConvF2D src)); 10584 effect( KILL cr ); 10585 format %{ "SUB ESP,4\n\t" 10586 "MOVSS [ESP] $src\n\t" 10587 "FLD_S [ESP]\n\t" 10588 "ADD ESP,4\n\t" 10589 "FSTP $dst\t# D-round" %} 10590 ins_encode %{ 10591 __ subptr(rsp, 4); 10592 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10593 __ fld_s(Address(rsp, 0)); 10594 __ addptr(rsp, 4); 10595 __ fstp_d($dst$$reg); 10596 %} 10597 ins_pipe( pipe_slow ); 10598 %} 10599 10600 instruct convF2D_reg(regD dst, regF src) %{ 10601 predicate(UseSSE>=2); 10602 match(Set dst (ConvF2D src)); 10603 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10604 ins_encode %{ 10605 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10606 %} 10607 ins_pipe( pipe_slow ); 10608 %} 10609 10610 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10611 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10612 predicate(UseSSE<=1); 10613 match(Set dst (ConvD2I src)); 10614 effect( KILL tmp, KILL cr ); 10615 format %{ "FLD $src\t# Convert double to int \n\t" 10616 "FLDCW trunc mode\n\t" 10617 "SUB ESP,4\n\t" 10618 "FISTp [ESP + #0]\n\t" 10619 "FLDCW std/24-bit mode\n\t" 10620 "POP EAX\n\t" 10621 "CMP EAX,0x80000000\n\t" 10622 "JNE,s fast\n\t" 10623 "FLD_D $src\n\t" 10624 "CALL d2i_wrapper\n" 10625 "fast:" %} 10626 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10627 ins_pipe( pipe_slow ); 10628 %} 10629 10630 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10631 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10632 predicate(UseSSE>=2); 10633 match(Set dst (ConvD2I src)); 10634 effect( KILL tmp, KILL cr ); 10635 format %{ "CVTTSD2SI $dst, $src\n\t" 10636 "CMP $dst,0x80000000\n\t" 10637 "JNE,s fast\n\t" 10638 "SUB ESP, 8\n\t" 10639 "MOVSD [ESP], $src\n\t" 10640 "FLD_D [ESP]\n\t" 10641 "ADD ESP, 8\n\t" 10642 "CALL d2i_wrapper\n" 10643 "fast:" %} 10644 ins_encode %{ 10645 Label fast; 10646 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10647 __ cmpl($dst$$Register, 0x80000000); 10648 __ jccb(Assembler::notEqual, fast); 10649 __ subptr(rsp, 8); 10650 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10651 __ fld_d(Address(rsp, 0)); 10652 __ addptr(rsp, 8); 10653 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10654 __ post_call_nop(); 10655 __ bind(fast); 10656 %} 10657 ins_pipe( pipe_slow ); 10658 %} 10659 10660 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10661 predicate(UseSSE<=1); 10662 match(Set dst (ConvD2L src)); 10663 effect( KILL cr ); 10664 format %{ "FLD $src\t# Convert double to long\n\t" 10665 "FLDCW trunc mode\n\t" 10666 "SUB ESP,8\n\t" 10667 "FISTp [ESP + #0]\n\t" 10668 "FLDCW std/24-bit mode\n\t" 10669 "POP EAX\n\t" 10670 "POP EDX\n\t" 10671 "CMP EDX,0x80000000\n\t" 10672 "JNE,s fast\n\t" 10673 "TEST EAX,EAX\n\t" 10674 "JNE,s fast\n\t" 10675 "FLD $src\n\t" 10676 "CALL d2l_wrapper\n" 10677 "fast:" %} 10678 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10679 ins_pipe( pipe_slow ); 10680 %} 10681 10682 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10683 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10684 predicate (UseSSE>=2); 10685 match(Set dst (ConvD2L src)); 10686 effect( KILL cr ); 10687 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10688 "MOVSD [ESP],$src\n\t" 10689 "FLD_D [ESP]\n\t" 10690 "FLDCW trunc mode\n\t" 10691 "FISTp [ESP + #0]\n\t" 10692 "FLDCW std/24-bit mode\n\t" 10693 "POP EAX\n\t" 10694 "POP EDX\n\t" 10695 "CMP EDX,0x80000000\n\t" 10696 "JNE,s fast\n\t" 10697 "TEST EAX,EAX\n\t" 10698 "JNE,s fast\n\t" 10699 "SUB ESP,8\n\t" 10700 "MOVSD [ESP],$src\n\t" 10701 "FLD_D [ESP]\n\t" 10702 "ADD ESP,8\n\t" 10703 "CALL d2l_wrapper\n" 10704 "fast:" %} 10705 ins_encode %{ 10706 Label fast; 10707 __ subptr(rsp, 8); 10708 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10709 __ fld_d(Address(rsp, 0)); 10710 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10711 __ fistp_d(Address(rsp, 0)); 10712 // Restore the rounding mode, mask the exception 10713 if (Compile::current()->in_24_bit_fp_mode()) { 10714 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10715 } else { 10716 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10717 } 10718 // Load the converted long, adjust CPU stack 10719 __ pop(rax); 10720 __ pop(rdx); 10721 __ cmpl(rdx, 0x80000000); 10722 __ jccb(Assembler::notEqual, fast); 10723 __ testl(rax, rax); 10724 __ jccb(Assembler::notEqual, fast); 10725 __ subptr(rsp, 8); 10726 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10727 __ fld_d(Address(rsp, 0)); 10728 __ addptr(rsp, 8); 10729 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10730 __ post_call_nop(); 10731 __ bind(fast); 10732 %} 10733 ins_pipe( pipe_slow ); 10734 %} 10735 10736 // Convert a double to an int. Java semantics require we do complex 10737 // manglations in the corner cases. So we set the rounding mode to 10738 // 'zero', store the darned double down as an int, and reset the 10739 // rounding mode to 'nearest'. The hardware stores a flag value down 10740 // if we would overflow or converted a NAN; we check for this and 10741 // and go the slow path if needed. 10742 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10743 predicate(UseSSE==0); 10744 match(Set dst (ConvF2I src)); 10745 effect( KILL tmp, KILL cr ); 10746 format %{ "FLD $src\t# Convert float to int \n\t" 10747 "FLDCW trunc mode\n\t" 10748 "SUB ESP,4\n\t" 10749 "FISTp [ESP + #0]\n\t" 10750 "FLDCW std/24-bit mode\n\t" 10751 "POP EAX\n\t" 10752 "CMP EAX,0x80000000\n\t" 10753 "JNE,s fast\n\t" 10754 "FLD $src\n\t" 10755 "CALL d2i_wrapper\n" 10756 "fast:" %} 10757 // DPR2I_encoding works for FPR2I 10758 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10759 ins_pipe( pipe_slow ); 10760 %} 10761 10762 // Convert a float in xmm to an int reg. 10763 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10764 predicate(UseSSE>=1); 10765 match(Set dst (ConvF2I src)); 10766 effect( KILL tmp, KILL cr ); 10767 format %{ "CVTTSS2SI $dst, $src\n\t" 10768 "CMP $dst,0x80000000\n\t" 10769 "JNE,s fast\n\t" 10770 "SUB ESP, 4\n\t" 10771 "MOVSS [ESP], $src\n\t" 10772 "FLD [ESP]\n\t" 10773 "ADD ESP, 4\n\t" 10774 "CALL d2i_wrapper\n" 10775 "fast:" %} 10776 ins_encode %{ 10777 Label fast; 10778 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10779 __ cmpl($dst$$Register, 0x80000000); 10780 __ jccb(Assembler::notEqual, fast); 10781 __ subptr(rsp, 4); 10782 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10783 __ fld_s(Address(rsp, 0)); 10784 __ addptr(rsp, 4); 10785 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10786 __ post_call_nop(); 10787 __ bind(fast); 10788 %} 10789 ins_pipe( pipe_slow ); 10790 %} 10791 10792 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10793 predicate(UseSSE==0); 10794 match(Set dst (ConvF2L src)); 10795 effect( KILL cr ); 10796 format %{ "FLD $src\t# Convert float to long\n\t" 10797 "FLDCW trunc mode\n\t" 10798 "SUB ESP,8\n\t" 10799 "FISTp [ESP + #0]\n\t" 10800 "FLDCW std/24-bit mode\n\t" 10801 "POP EAX\n\t" 10802 "POP EDX\n\t" 10803 "CMP EDX,0x80000000\n\t" 10804 "JNE,s fast\n\t" 10805 "TEST EAX,EAX\n\t" 10806 "JNE,s fast\n\t" 10807 "FLD $src\n\t" 10808 "CALL d2l_wrapper\n" 10809 "fast:" %} 10810 // DPR2L_encoding works for FPR2L 10811 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10812 ins_pipe( pipe_slow ); 10813 %} 10814 10815 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10816 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10817 predicate (UseSSE>=1); 10818 match(Set dst (ConvF2L src)); 10819 effect( KILL cr ); 10820 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10821 "MOVSS [ESP],$src\n\t" 10822 "FLD_S [ESP]\n\t" 10823 "FLDCW trunc mode\n\t" 10824 "FISTp [ESP + #0]\n\t" 10825 "FLDCW std/24-bit mode\n\t" 10826 "POP EAX\n\t" 10827 "POP EDX\n\t" 10828 "CMP EDX,0x80000000\n\t" 10829 "JNE,s fast\n\t" 10830 "TEST EAX,EAX\n\t" 10831 "JNE,s fast\n\t" 10832 "SUB ESP,4\t# Convert float to long\n\t" 10833 "MOVSS [ESP],$src\n\t" 10834 "FLD_S [ESP]\n\t" 10835 "ADD ESP,4\n\t" 10836 "CALL d2l_wrapper\n" 10837 "fast:" %} 10838 ins_encode %{ 10839 Label fast; 10840 __ subptr(rsp, 8); 10841 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10842 __ fld_s(Address(rsp, 0)); 10843 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10844 __ fistp_d(Address(rsp, 0)); 10845 // Restore the rounding mode, mask the exception 10846 if (Compile::current()->in_24_bit_fp_mode()) { 10847 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10848 } else { 10849 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10850 } 10851 // Load the converted long, adjust CPU stack 10852 __ pop(rax); 10853 __ pop(rdx); 10854 __ cmpl(rdx, 0x80000000); 10855 __ jccb(Assembler::notEqual, fast); 10856 __ testl(rax, rax); 10857 __ jccb(Assembler::notEqual, fast); 10858 __ subptr(rsp, 4); 10859 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10860 __ fld_s(Address(rsp, 0)); 10861 __ addptr(rsp, 4); 10862 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10863 __ post_call_nop(); 10864 __ bind(fast); 10865 %} 10866 ins_pipe( pipe_slow ); 10867 %} 10868 10869 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10870 predicate( UseSSE<=1 ); 10871 match(Set dst (ConvI2D src)); 10872 format %{ "FILD $src\n\t" 10873 "FSTP $dst" %} 10874 opcode(0xDB, 0x0); /* DB /0 */ 10875 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10876 ins_pipe( fpu_reg_mem ); 10877 %} 10878 10879 instruct convI2D_reg(regD dst, rRegI src) %{ 10880 predicate( UseSSE>=2 && !UseXmmI2D ); 10881 match(Set dst (ConvI2D src)); 10882 format %{ "CVTSI2SD $dst,$src" %} 10883 ins_encode %{ 10884 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10885 %} 10886 ins_pipe( pipe_slow ); 10887 %} 10888 10889 instruct convI2D_mem(regD dst, memory mem) %{ 10890 predicate( UseSSE>=2 ); 10891 match(Set dst (ConvI2D (LoadI mem))); 10892 format %{ "CVTSI2SD $dst,$mem" %} 10893 ins_encode %{ 10894 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10895 %} 10896 ins_pipe( pipe_slow ); 10897 %} 10898 10899 instruct convXI2D_reg(regD dst, rRegI src) 10900 %{ 10901 predicate( UseSSE>=2 && UseXmmI2D ); 10902 match(Set dst (ConvI2D src)); 10903 10904 format %{ "MOVD $dst,$src\n\t" 10905 "CVTDQ2PD $dst,$dst\t# i2d" %} 10906 ins_encode %{ 10907 __ movdl($dst$$XMMRegister, $src$$Register); 10908 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10909 %} 10910 ins_pipe(pipe_slow); // XXX 10911 %} 10912 10913 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10914 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10915 match(Set dst (ConvI2D (LoadI mem))); 10916 format %{ "FILD $mem\n\t" 10917 "FSTP $dst" %} 10918 opcode(0xDB); /* DB /0 */ 10919 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10920 Pop_Reg_DPR(dst), ClearInstMark); 10921 ins_pipe( fpu_reg_mem ); 10922 %} 10923 10924 // Convert a byte to a float; no rounding step needed. 10925 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10926 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10927 match(Set dst (ConvI2F src)); 10928 format %{ "FILD $src\n\t" 10929 "FSTP $dst" %} 10930 10931 opcode(0xDB, 0x0); /* DB /0 */ 10932 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10933 ins_pipe( fpu_reg_mem ); 10934 %} 10935 10936 // In 24-bit mode, force exponent rounding by storing back out 10937 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10938 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10939 match(Set dst (ConvI2F src)); 10940 ins_cost(200); 10941 format %{ "FILD $src\n\t" 10942 "FSTP_S $dst" %} 10943 opcode(0xDB, 0x0); /* DB /0 */ 10944 ins_encode( Push_Mem_I(src), 10945 Pop_Mem_FPR(dst)); 10946 ins_pipe( fpu_mem_mem ); 10947 %} 10948 10949 // In 24-bit mode, force exponent rounding by storing back out 10950 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10951 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10952 match(Set dst (ConvI2F (LoadI mem))); 10953 ins_cost(200); 10954 format %{ "FILD $mem\n\t" 10955 "FSTP_S $dst" %} 10956 opcode(0xDB); /* DB /0 */ 10957 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10958 Pop_Mem_FPR(dst), ClearInstMark); 10959 ins_pipe( fpu_mem_mem ); 10960 %} 10961 10962 // This instruction does not round to 24-bits 10963 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 10964 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10965 match(Set dst (ConvI2F src)); 10966 format %{ "FILD $src\n\t" 10967 "FSTP $dst" %} 10968 opcode(0xDB, 0x0); /* DB /0 */ 10969 ins_encode( Push_Mem_I(src), 10970 Pop_Reg_FPR(dst)); 10971 ins_pipe( fpu_reg_mem ); 10972 %} 10973 10974 // This instruction does not round to 24-bits 10975 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 10976 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10977 match(Set dst (ConvI2F (LoadI mem))); 10978 format %{ "FILD $mem\n\t" 10979 "FSTP $dst" %} 10980 opcode(0xDB); /* DB /0 */ 10981 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10982 Pop_Reg_FPR(dst), ClearInstMark); 10983 ins_pipe( fpu_reg_mem ); 10984 %} 10985 10986 // Convert an int to a float in xmm; no rounding step needed. 10987 instruct convI2F_reg(regF dst, rRegI src) %{ 10988 predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F )); 10989 match(Set dst (ConvI2F src)); 10990 format %{ "CVTSI2SS $dst, $src" %} 10991 ins_encode %{ 10992 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 10993 %} 10994 ins_pipe( pipe_slow ); 10995 %} 10996 10997 instruct convXI2F_reg(regF dst, rRegI src) 10998 %{ 10999 predicate( UseSSE>=2 && UseXmmI2F ); 11000 match(Set dst (ConvI2F src)); 11001 11002 format %{ "MOVD $dst,$src\n\t" 11003 "CVTDQ2PS $dst,$dst\t# i2f" %} 11004 ins_encode %{ 11005 __ movdl($dst$$XMMRegister, $src$$Register); 11006 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11007 %} 11008 ins_pipe(pipe_slow); // XXX 11009 %} 11010 11011 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11012 match(Set dst (ConvI2L src)); 11013 effect(KILL cr); 11014 ins_cost(375); 11015 format %{ "MOV $dst.lo,$src\n\t" 11016 "MOV $dst.hi,$src\n\t" 11017 "SAR $dst.hi,31" %} 11018 ins_encode(convert_int_long(dst,src)); 11019 ins_pipe( ialu_reg_reg_long ); 11020 %} 11021 11022 // Zero-extend convert int to long 11023 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11024 match(Set dst (AndL (ConvI2L src) mask) ); 11025 effect( KILL flags ); 11026 ins_cost(250); 11027 format %{ "MOV $dst.lo,$src\n\t" 11028 "XOR $dst.hi,$dst.hi" %} 11029 opcode(0x33); // XOR 11030 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11031 ins_pipe( ialu_reg_reg_long ); 11032 %} 11033 11034 // Zero-extend long 11035 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11036 match(Set dst (AndL src mask) ); 11037 effect( KILL flags ); 11038 ins_cost(250); 11039 format %{ "MOV $dst.lo,$src.lo\n\t" 11040 "XOR $dst.hi,$dst.hi\n\t" %} 11041 opcode(0x33); // XOR 11042 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11043 ins_pipe( ialu_reg_reg_long ); 11044 %} 11045 11046 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11047 predicate (UseSSE<=1); 11048 match(Set dst (ConvL2D src)); 11049 effect( KILL cr ); 11050 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11051 "PUSH $src.lo\n\t" 11052 "FILD ST,[ESP + #0]\n\t" 11053 "ADD ESP,8\n\t" 11054 "FSTP_D $dst\t# D-round" %} 11055 opcode(0xDF, 0x5); /* DF /5 */ 11056 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11057 ins_pipe( pipe_slow ); 11058 %} 11059 11060 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11061 predicate (UseSSE>=2); 11062 match(Set dst (ConvL2D src)); 11063 effect( KILL cr ); 11064 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11065 "PUSH $src.lo\n\t" 11066 "FILD_D [ESP]\n\t" 11067 "FSTP_D [ESP]\n\t" 11068 "MOVSD $dst,[ESP]\n\t" 11069 "ADD ESP,8" %} 11070 opcode(0xDF, 0x5); /* DF /5 */ 11071 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11072 ins_pipe( pipe_slow ); 11073 %} 11074 11075 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11076 predicate (UseSSE>=1); 11077 match(Set dst (ConvL2F src)); 11078 effect( KILL cr ); 11079 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11080 "PUSH $src.lo\n\t" 11081 "FILD_D [ESP]\n\t" 11082 "FSTP_S [ESP]\n\t" 11083 "MOVSS $dst,[ESP]\n\t" 11084 "ADD ESP,8" %} 11085 opcode(0xDF, 0x5); /* DF /5 */ 11086 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11087 ins_pipe( pipe_slow ); 11088 %} 11089 11090 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11091 match(Set dst (ConvL2F src)); 11092 effect( KILL cr ); 11093 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11094 "PUSH $src.lo\n\t" 11095 "FILD ST,[ESP + #0]\n\t" 11096 "ADD ESP,8\n\t" 11097 "FSTP_S $dst\t# F-round" %} 11098 opcode(0xDF, 0x5); /* DF /5 */ 11099 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11100 ins_pipe( pipe_slow ); 11101 %} 11102 11103 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11104 match(Set dst (ConvL2I src)); 11105 effect( DEF dst, USE src ); 11106 format %{ "MOV $dst,$src.lo" %} 11107 ins_encode(enc_CopyL_Lo(dst,src)); 11108 ins_pipe( ialu_reg_reg ); 11109 %} 11110 11111 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11112 match(Set dst (MoveF2I src)); 11113 effect( DEF dst, USE src ); 11114 ins_cost(100); 11115 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11116 ins_encode %{ 11117 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11118 %} 11119 ins_pipe( ialu_reg_mem ); 11120 %} 11121 11122 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11123 predicate(UseSSE==0); 11124 match(Set dst (MoveF2I src)); 11125 effect( DEF dst, USE src ); 11126 11127 ins_cost(125); 11128 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11129 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11130 ins_pipe( fpu_mem_reg ); 11131 %} 11132 11133 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11134 predicate(UseSSE>=1); 11135 match(Set dst (MoveF2I src)); 11136 effect( DEF dst, USE src ); 11137 11138 ins_cost(95); 11139 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11140 ins_encode %{ 11141 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11142 %} 11143 ins_pipe( pipe_slow ); 11144 %} 11145 11146 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11147 predicate(UseSSE>=2); 11148 match(Set dst (MoveF2I src)); 11149 effect( DEF dst, USE src ); 11150 ins_cost(85); 11151 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11152 ins_encode %{ 11153 __ movdl($dst$$Register, $src$$XMMRegister); 11154 %} 11155 ins_pipe( pipe_slow ); 11156 %} 11157 11158 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11159 match(Set dst (MoveI2F src)); 11160 effect( DEF dst, USE src ); 11161 11162 ins_cost(100); 11163 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11164 ins_encode %{ 11165 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11166 %} 11167 ins_pipe( ialu_mem_reg ); 11168 %} 11169 11170 11171 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11172 predicate(UseSSE==0); 11173 match(Set dst (MoveI2F src)); 11174 effect(DEF dst, USE src); 11175 11176 ins_cost(125); 11177 format %{ "FLD_S $src\n\t" 11178 "FSTP $dst\t# MoveI2F_stack_reg" %} 11179 opcode(0xD9); /* D9 /0, FLD m32real */ 11180 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 11181 Pop_Reg_FPR(dst), ClearInstMark ); 11182 ins_pipe( fpu_reg_mem ); 11183 %} 11184 11185 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11186 predicate(UseSSE>=1); 11187 match(Set dst (MoveI2F src)); 11188 effect( DEF dst, USE src ); 11189 11190 ins_cost(95); 11191 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11192 ins_encode %{ 11193 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11194 %} 11195 ins_pipe( pipe_slow ); 11196 %} 11197 11198 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11199 predicate(UseSSE>=2); 11200 match(Set dst (MoveI2F src)); 11201 effect( DEF dst, USE src ); 11202 11203 ins_cost(85); 11204 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11205 ins_encode %{ 11206 __ movdl($dst$$XMMRegister, $src$$Register); 11207 %} 11208 ins_pipe( pipe_slow ); 11209 %} 11210 11211 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11212 match(Set dst (MoveD2L src)); 11213 effect(DEF dst, USE src); 11214 11215 ins_cost(250); 11216 format %{ "MOV $dst.lo,$src\n\t" 11217 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11218 opcode(0x8B, 0x8B); 11219 ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark); 11220 ins_pipe( ialu_mem_long_reg ); 11221 %} 11222 11223 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11224 predicate(UseSSE<=1); 11225 match(Set dst (MoveD2L src)); 11226 effect(DEF dst, USE src); 11227 11228 ins_cost(125); 11229 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11230 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11231 ins_pipe( fpu_mem_reg ); 11232 %} 11233 11234 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11235 predicate(UseSSE>=2); 11236 match(Set dst (MoveD2L src)); 11237 effect(DEF dst, USE src); 11238 ins_cost(95); 11239 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11240 ins_encode %{ 11241 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11242 %} 11243 ins_pipe( pipe_slow ); 11244 %} 11245 11246 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11247 predicate(UseSSE>=2); 11248 match(Set dst (MoveD2L src)); 11249 effect(DEF dst, USE src, TEMP tmp); 11250 ins_cost(85); 11251 format %{ "MOVD $dst.lo,$src\n\t" 11252 "PSHUFLW $tmp,$src,0x4E\n\t" 11253 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11254 ins_encode %{ 11255 __ movdl($dst$$Register, $src$$XMMRegister); 11256 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11257 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11258 %} 11259 ins_pipe( pipe_slow ); 11260 %} 11261 11262 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11263 match(Set dst (MoveL2D src)); 11264 effect(DEF dst, USE src); 11265 11266 ins_cost(200); 11267 format %{ "MOV $dst,$src.lo\n\t" 11268 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11269 opcode(0x89, 0x89); 11270 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); 11271 ins_pipe( ialu_mem_long_reg ); 11272 %} 11273 11274 11275 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11276 predicate(UseSSE<=1); 11277 match(Set dst (MoveL2D src)); 11278 effect(DEF dst, USE src); 11279 ins_cost(125); 11280 11281 format %{ "FLD_D $src\n\t" 11282 "FSTP $dst\t# MoveL2D_stack_reg" %} 11283 opcode(0xDD); /* DD /0, FLD m64real */ 11284 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 11285 Pop_Reg_DPR(dst), ClearInstMark ); 11286 ins_pipe( fpu_reg_mem ); 11287 %} 11288 11289 11290 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11291 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11292 match(Set dst (MoveL2D src)); 11293 effect(DEF dst, USE src); 11294 11295 ins_cost(95); 11296 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11297 ins_encode %{ 11298 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11299 %} 11300 ins_pipe( pipe_slow ); 11301 %} 11302 11303 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11304 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11305 match(Set dst (MoveL2D src)); 11306 effect(DEF dst, USE src); 11307 11308 ins_cost(95); 11309 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11310 ins_encode %{ 11311 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11312 %} 11313 ins_pipe( pipe_slow ); 11314 %} 11315 11316 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11317 predicate(UseSSE>=2); 11318 match(Set dst (MoveL2D src)); 11319 effect(TEMP dst, USE src, TEMP tmp); 11320 ins_cost(85); 11321 format %{ "MOVD $dst,$src.lo\n\t" 11322 "MOVD $tmp,$src.hi\n\t" 11323 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11324 ins_encode %{ 11325 __ movdl($dst$$XMMRegister, $src$$Register); 11326 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11327 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11328 %} 11329 ins_pipe( pipe_slow ); 11330 %} 11331 11332 //----------------------------- CompressBits/ExpandBits ------------------------ 11333 11334 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11335 predicate(n->bottom_type()->isa_long()); 11336 match(Set dst (CompressBits src mask)); 11337 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11338 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11339 ins_encode %{ 11340 Label exit, partail_result; 11341 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11342 // Merge the results of upper and lower destination registers such that upper destination 11343 // results are contiguously laid out after the lower destination result. 11344 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11345 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11346 __ popcntl($rtmp$$Register, $mask$$Register); 11347 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11348 __ cmpl($rtmp$$Register, 32); 11349 __ jccb(Assembler::equal, exit); 11350 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11351 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11352 // Shift left the contents of upper destination register by true bit count of lower mask register 11353 // and merge with lower destination register. 11354 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11355 __ orl($dst$$Register, $rtmp$$Register); 11356 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11357 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11358 // since contents of upper destination have already been copied to lower destination 11359 // register. 11360 __ cmpl($rtmp$$Register, 0); 11361 __ jccb(Assembler::greater, partail_result); 11362 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11363 __ jmp(exit); 11364 __ bind(partail_result); 11365 // Perform right shift over upper destination register to move out bits already copied 11366 // to lower destination register. 11367 __ subl($rtmp$$Register, 32); 11368 __ negl($rtmp$$Register); 11369 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11370 __ bind(exit); 11371 %} 11372 ins_pipe( pipe_slow ); 11373 %} 11374 11375 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11376 predicate(n->bottom_type()->isa_long()); 11377 match(Set dst (ExpandBits src mask)); 11378 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11379 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11380 ins_encode %{ 11381 // Extraction operation sequentially reads the bits from source register starting from LSB 11382 // and lays them out into destination register at bit locations corresponding to true bits 11383 // in mask register. Thus number of source bits read are equal to combined true bit count 11384 // of mask register pair. 11385 Label exit, mask_clipping; 11386 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11387 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11388 __ popcntl($rtmp$$Register, $mask$$Register); 11389 // If true bit count of lower mask register is 32 then none of bit of lower source register 11390 // will feed to upper destination register. 11391 __ cmpl($rtmp$$Register, 32); 11392 __ jccb(Assembler::equal, exit); 11393 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11394 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11395 // Shift right the contents of lower source register to remove already consumed bits. 11396 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11397 // Extract the bits from lower source register starting from LSB under the influence 11398 // of upper mask register. 11399 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11400 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11401 __ subl($rtmp$$Register, 32); 11402 __ negl($rtmp$$Register); 11403 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11404 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11405 // Clear the set bits in upper mask register which have been used to extract the contents 11406 // from lower source register. 11407 __ bind(mask_clipping); 11408 __ blsrl($mask$$Register, $mask$$Register); 11409 __ decrementl($rtmp$$Register, 1); 11410 __ jccb(Assembler::greater, mask_clipping); 11411 // Starting from LSB extract the bits from upper source register under the influence of 11412 // remaining set bits in upper mask register. 11413 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11414 // Merge the partial results extracted from lower and upper source register bits. 11415 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11416 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11417 __ bind(exit); 11418 %} 11419 ins_pipe( pipe_slow ); 11420 %} 11421 11422 // ======================================================================= 11423 // Fast clearing of an array 11424 // Small non-constant length ClearArray for non-AVX512 targets. 11425 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11426 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11427 match(Set dummy (ClearArray cnt base)); 11428 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11429 11430 format %{ $$template 11431 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11432 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11433 $$emit$$"JG LARGE\n\t" 11434 $$emit$$"SHL ECX, 1\n\t" 11435 $$emit$$"DEC ECX\n\t" 11436 $$emit$$"JS DONE\t# Zero length\n\t" 11437 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11438 $$emit$$"DEC ECX\n\t" 11439 $$emit$$"JGE LOOP\n\t" 11440 $$emit$$"JMP DONE\n\t" 11441 $$emit$$"# LARGE:\n\t" 11442 if (UseFastStosb) { 11443 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11444 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11445 } else if (UseXMMForObjInit) { 11446 $$emit$$"MOV RDI,RAX\n\t" 11447 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11448 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11449 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11450 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11451 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11452 $$emit$$"ADD 0x40,RAX\n\t" 11453 $$emit$$"# L_zero_64_bytes:\n\t" 11454 $$emit$$"SUB 0x8,RCX\n\t" 11455 $$emit$$"JGE L_loop\n\t" 11456 $$emit$$"ADD 0x4,RCX\n\t" 11457 $$emit$$"JL L_tail\n\t" 11458 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11459 $$emit$$"ADD 0x20,RAX\n\t" 11460 $$emit$$"SUB 0x4,RCX\n\t" 11461 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11462 $$emit$$"ADD 0x4,RCX\n\t" 11463 $$emit$$"JLE L_end\n\t" 11464 $$emit$$"DEC RCX\n\t" 11465 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11466 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11467 $$emit$$"ADD 0x8,RAX\n\t" 11468 $$emit$$"DEC RCX\n\t" 11469 $$emit$$"JGE L_sloop\n\t" 11470 $$emit$$"# L_end:\n\t" 11471 } else { 11472 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11473 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11474 } 11475 $$emit$$"# DONE" 11476 %} 11477 ins_encode %{ 11478 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11479 $tmp$$XMMRegister, false, knoreg); 11480 %} 11481 ins_pipe( pipe_slow ); 11482 %} 11483 11484 // Small non-constant length ClearArray for AVX512 targets. 11485 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11486 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11487 match(Set dummy (ClearArray cnt base)); 11488 ins_cost(125); 11489 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11490 11491 format %{ $$template 11492 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11493 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11494 $$emit$$"JG LARGE\n\t" 11495 $$emit$$"SHL ECX, 1\n\t" 11496 $$emit$$"DEC ECX\n\t" 11497 $$emit$$"JS DONE\t# Zero length\n\t" 11498 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11499 $$emit$$"DEC ECX\n\t" 11500 $$emit$$"JGE LOOP\n\t" 11501 $$emit$$"JMP DONE\n\t" 11502 $$emit$$"# LARGE:\n\t" 11503 if (UseFastStosb) { 11504 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11505 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11506 } else if (UseXMMForObjInit) { 11507 $$emit$$"MOV RDI,RAX\n\t" 11508 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11509 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11510 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11511 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11512 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11513 $$emit$$"ADD 0x40,RAX\n\t" 11514 $$emit$$"# L_zero_64_bytes:\n\t" 11515 $$emit$$"SUB 0x8,RCX\n\t" 11516 $$emit$$"JGE L_loop\n\t" 11517 $$emit$$"ADD 0x4,RCX\n\t" 11518 $$emit$$"JL L_tail\n\t" 11519 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11520 $$emit$$"ADD 0x20,RAX\n\t" 11521 $$emit$$"SUB 0x4,RCX\n\t" 11522 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11523 $$emit$$"ADD 0x4,RCX\n\t" 11524 $$emit$$"JLE L_end\n\t" 11525 $$emit$$"DEC RCX\n\t" 11526 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11527 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11528 $$emit$$"ADD 0x8,RAX\n\t" 11529 $$emit$$"DEC RCX\n\t" 11530 $$emit$$"JGE L_sloop\n\t" 11531 $$emit$$"# L_end:\n\t" 11532 } else { 11533 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11534 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11535 } 11536 $$emit$$"# DONE" 11537 %} 11538 ins_encode %{ 11539 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11540 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11541 %} 11542 ins_pipe( pipe_slow ); 11543 %} 11544 11545 // Large non-constant length ClearArray for non-AVX512 targets. 11546 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11547 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11548 match(Set dummy (ClearArray cnt base)); 11549 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11550 format %{ $$template 11551 if (UseFastStosb) { 11552 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11553 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11554 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11555 } else if (UseXMMForObjInit) { 11556 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11557 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11558 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11559 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11560 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11561 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11562 $$emit$$"ADD 0x40,RAX\n\t" 11563 $$emit$$"# L_zero_64_bytes:\n\t" 11564 $$emit$$"SUB 0x8,RCX\n\t" 11565 $$emit$$"JGE L_loop\n\t" 11566 $$emit$$"ADD 0x4,RCX\n\t" 11567 $$emit$$"JL L_tail\n\t" 11568 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11569 $$emit$$"ADD 0x20,RAX\n\t" 11570 $$emit$$"SUB 0x4,RCX\n\t" 11571 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11572 $$emit$$"ADD 0x4,RCX\n\t" 11573 $$emit$$"JLE L_end\n\t" 11574 $$emit$$"DEC RCX\n\t" 11575 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11576 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11577 $$emit$$"ADD 0x8,RAX\n\t" 11578 $$emit$$"DEC RCX\n\t" 11579 $$emit$$"JGE L_sloop\n\t" 11580 $$emit$$"# L_end:\n\t" 11581 } else { 11582 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11583 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11584 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11585 } 11586 $$emit$$"# DONE" 11587 %} 11588 ins_encode %{ 11589 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11590 $tmp$$XMMRegister, true, knoreg); 11591 %} 11592 ins_pipe( pipe_slow ); 11593 %} 11594 11595 // Large non-constant length ClearArray for AVX512 targets. 11596 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11597 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11598 match(Set dummy (ClearArray cnt base)); 11599 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11600 format %{ $$template 11601 if (UseFastStosb) { 11602 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11603 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11604 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11605 } else if (UseXMMForObjInit) { 11606 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11607 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11608 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11609 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11610 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11611 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11612 $$emit$$"ADD 0x40,RAX\n\t" 11613 $$emit$$"# L_zero_64_bytes:\n\t" 11614 $$emit$$"SUB 0x8,RCX\n\t" 11615 $$emit$$"JGE L_loop\n\t" 11616 $$emit$$"ADD 0x4,RCX\n\t" 11617 $$emit$$"JL L_tail\n\t" 11618 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11619 $$emit$$"ADD 0x20,RAX\n\t" 11620 $$emit$$"SUB 0x4,RCX\n\t" 11621 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11622 $$emit$$"ADD 0x4,RCX\n\t" 11623 $$emit$$"JLE L_end\n\t" 11624 $$emit$$"DEC RCX\n\t" 11625 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11626 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11627 $$emit$$"ADD 0x8,RAX\n\t" 11628 $$emit$$"DEC RCX\n\t" 11629 $$emit$$"JGE L_sloop\n\t" 11630 $$emit$$"# L_end:\n\t" 11631 } else { 11632 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11633 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11634 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11635 } 11636 $$emit$$"# DONE" 11637 %} 11638 ins_encode %{ 11639 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11640 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11641 %} 11642 ins_pipe( pipe_slow ); 11643 %} 11644 11645 // Small constant length ClearArray for AVX512 targets. 11646 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11647 %{ 11648 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl()); 11649 match(Set dummy (ClearArray cnt base)); 11650 ins_cost(100); 11651 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11652 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11653 ins_encode %{ 11654 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11655 %} 11656 ins_pipe(pipe_slow); 11657 %} 11658 11659 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11660 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11661 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11662 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11663 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11664 11665 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11666 ins_encode %{ 11667 __ string_compare($str1$$Register, $str2$$Register, 11668 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11669 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11670 %} 11671 ins_pipe( pipe_slow ); 11672 %} 11673 11674 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11675 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11676 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11677 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11678 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11679 11680 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11681 ins_encode %{ 11682 __ string_compare($str1$$Register, $str2$$Register, 11683 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11684 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11685 %} 11686 ins_pipe( pipe_slow ); 11687 %} 11688 11689 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11690 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11691 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11692 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11693 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11694 11695 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11696 ins_encode %{ 11697 __ string_compare($str1$$Register, $str2$$Register, 11698 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11699 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11700 %} 11701 ins_pipe( pipe_slow ); 11702 %} 11703 11704 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11705 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11706 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11707 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11708 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11709 11710 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11711 ins_encode %{ 11712 __ string_compare($str1$$Register, $str2$$Register, 11713 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11714 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11715 %} 11716 ins_pipe( pipe_slow ); 11717 %} 11718 11719 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11720 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11721 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11722 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11723 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11724 11725 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11726 ins_encode %{ 11727 __ string_compare($str1$$Register, $str2$$Register, 11728 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11729 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11730 %} 11731 ins_pipe( pipe_slow ); 11732 %} 11733 11734 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11735 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11736 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11737 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11738 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11739 11740 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11741 ins_encode %{ 11742 __ string_compare($str1$$Register, $str2$$Register, 11743 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11744 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11745 %} 11746 ins_pipe( pipe_slow ); 11747 %} 11748 11749 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11750 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11751 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11752 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11753 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11754 11755 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11756 ins_encode %{ 11757 __ string_compare($str2$$Register, $str1$$Register, 11758 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11759 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11760 %} 11761 ins_pipe( pipe_slow ); 11762 %} 11763 11764 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11765 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11766 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11767 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11768 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11769 11770 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11771 ins_encode %{ 11772 __ string_compare($str2$$Register, $str1$$Register, 11773 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11774 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11775 %} 11776 ins_pipe( pipe_slow ); 11777 %} 11778 11779 // fast string equals 11780 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11781 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11782 predicate(!VM_Version::supports_avx512vlbw()); 11783 match(Set result (StrEquals (Binary str1 str2) cnt)); 11784 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11785 11786 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11787 ins_encode %{ 11788 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11789 $cnt$$Register, $result$$Register, $tmp3$$Register, 11790 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11791 %} 11792 11793 ins_pipe( pipe_slow ); 11794 %} 11795 11796 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11797 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11798 predicate(VM_Version::supports_avx512vlbw()); 11799 match(Set result (StrEquals (Binary str1 str2) cnt)); 11800 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11801 11802 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11803 ins_encode %{ 11804 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11805 $cnt$$Register, $result$$Register, $tmp3$$Register, 11806 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11807 %} 11808 11809 ins_pipe( pipe_slow ); 11810 %} 11811 11812 11813 // fast search of substring with known size. 11814 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11815 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11816 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11817 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11818 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11819 11820 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11821 ins_encode %{ 11822 int icnt2 = (int)$int_cnt2$$constant; 11823 if (icnt2 >= 16) { 11824 // IndexOf for constant substrings with size >= 16 elements 11825 // which don't need to be loaded through stack. 11826 __ string_indexofC8($str1$$Register, $str2$$Register, 11827 $cnt1$$Register, $cnt2$$Register, 11828 icnt2, $result$$Register, 11829 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11830 } else { 11831 // Small strings are loaded through stack if they cross page boundary. 11832 __ string_indexof($str1$$Register, $str2$$Register, 11833 $cnt1$$Register, $cnt2$$Register, 11834 icnt2, $result$$Register, 11835 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11836 } 11837 %} 11838 ins_pipe( pipe_slow ); 11839 %} 11840 11841 // fast search of substring with known size. 11842 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11843 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11844 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11845 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11846 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11847 11848 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11849 ins_encode %{ 11850 int icnt2 = (int)$int_cnt2$$constant; 11851 if (icnt2 >= 8) { 11852 // IndexOf for constant substrings with size >= 8 elements 11853 // which don't need to be loaded through stack. 11854 __ string_indexofC8($str1$$Register, $str2$$Register, 11855 $cnt1$$Register, $cnt2$$Register, 11856 icnt2, $result$$Register, 11857 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11858 } else { 11859 // Small strings are loaded through stack if they cross page boundary. 11860 __ string_indexof($str1$$Register, $str2$$Register, 11861 $cnt1$$Register, $cnt2$$Register, 11862 icnt2, $result$$Register, 11863 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11864 } 11865 %} 11866 ins_pipe( pipe_slow ); 11867 %} 11868 11869 // fast search of substring with known size. 11870 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11871 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11872 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11873 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11874 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11875 11876 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11877 ins_encode %{ 11878 int icnt2 = (int)$int_cnt2$$constant; 11879 if (icnt2 >= 8) { 11880 // IndexOf for constant substrings with size >= 8 elements 11881 // which don't need to be loaded through stack. 11882 __ string_indexofC8($str1$$Register, $str2$$Register, 11883 $cnt1$$Register, $cnt2$$Register, 11884 icnt2, $result$$Register, 11885 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11886 } else { 11887 // Small strings are loaded through stack if they cross page boundary. 11888 __ string_indexof($str1$$Register, $str2$$Register, 11889 $cnt1$$Register, $cnt2$$Register, 11890 icnt2, $result$$Register, 11891 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11892 } 11893 %} 11894 ins_pipe( pipe_slow ); 11895 %} 11896 11897 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11898 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11899 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11900 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11901 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11902 11903 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11904 ins_encode %{ 11905 __ string_indexof($str1$$Register, $str2$$Register, 11906 $cnt1$$Register, $cnt2$$Register, 11907 (-1), $result$$Register, 11908 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11909 %} 11910 ins_pipe( pipe_slow ); 11911 %} 11912 11913 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11914 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11915 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11916 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11917 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11918 11919 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11920 ins_encode %{ 11921 __ string_indexof($str1$$Register, $str2$$Register, 11922 $cnt1$$Register, $cnt2$$Register, 11923 (-1), $result$$Register, 11924 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11925 %} 11926 ins_pipe( pipe_slow ); 11927 %} 11928 11929 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11930 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11931 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11932 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11933 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11934 11935 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11936 ins_encode %{ 11937 __ string_indexof($str1$$Register, $str2$$Register, 11938 $cnt1$$Register, $cnt2$$Register, 11939 (-1), $result$$Register, 11940 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11941 %} 11942 ins_pipe( pipe_slow ); 11943 %} 11944 11945 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11946 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11947 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 11948 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11949 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11950 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11951 ins_encode %{ 11952 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11953 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11954 %} 11955 ins_pipe( pipe_slow ); 11956 %} 11957 11958 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11959 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11960 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 11961 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11962 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11963 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11964 ins_encode %{ 11965 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11966 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11967 %} 11968 ins_pipe( pipe_slow ); 11969 %} 11970 11971 11972 // fast array equals 11973 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11974 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11975 %{ 11976 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11977 match(Set result (AryEq ary1 ary2)); 11978 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11979 //ins_cost(300); 11980 11981 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11982 ins_encode %{ 11983 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11984 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11985 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11986 %} 11987 ins_pipe( pipe_slow ); 11988 %} 11989 11990 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11991 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11992 %{ 11993 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11994 match(Set result (AryEq ary1 ary2)); 11995 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11996 //ins_cost(300); 11997 11998 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11999 ins_encode %{ 12000 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12001 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12002 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12003 %} 12004 ins_pipe( pipe_slow ); 12005 %} 12006 12007 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12008 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12009 %{ 12010 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12011 match(Set result (AryEq ary1 ary2)); 12012 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12013 //ins_cost(300); 12014 12015 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12016 ins_encode %{ 12017 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12018 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12019 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12020 %} 12021 ins_pipe( pipe_slow ); 12022 %} 12023 12024 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12025 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12026 %{ 12027 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12028 match(Set result (AryEq ary1 ary2)); 12029 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12030 //ins_cost(300); 12031 12032 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12033 ins_encode %{ 12034 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12035 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12036 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12037 %} 12038 ins_pipe( pipe_slow ); 12039 %} 12040 12041 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12042 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12043 %{ 12044 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12045 match(Set result (CountPositives ary1 len)); 12046 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12047 12048 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12049 ins_encode %{ 12050 __ count_positives($ary1$$Register, $len$$Register, 12051 $result$$Register, $tmp3$$Register, 12052 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12053 %} 12054 ins_pipe( pipe_slow ); 12055 %} 12056 12057 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12058 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12059 %{ 12060 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12061 match(Set result (CountPositives ary1 len)); 12062 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12063 12064 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12065 ins_encode %{ 12066 __ count_positives($ary1$$Register, $len$$Register, 12067 $result$$Register, $tmp3$$Register, 12068 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12069 %} 12070 ins_pipe( pipe_slow ); 12071 %} 12072 12073 12074 // fast char[] to byte[] compression 12075 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12076 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12077 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12078 match(Set result (StrCompressedCopy src (Binary dst len))); 12079 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12080 12081 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12082 ins_encode %{ 12083 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12084 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12085 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12086 knoreg, knoreg); 12087 %} 12088 ins_pipe( pipe_slow ); 12089 %} 12090 12091 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12092 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12093 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12094 match(Set result (StrCompressedCopy src (Binary dst len))); 12095 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12096 12097 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12098 ins_encode %{ 12099 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12100 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12101 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12102 $ktmp1$$KRegister, $ktmp2$$KRegister); 12103 %} 12104 ins_pipe( pipe_slow ); 12105 %} 12106 12107 // fast byte[] to char[] inflation 12108 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12109 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12110 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12111 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12112 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12113 12114 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12115 ins_encode %{ 12116 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12117 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12118 %} 12119 ins_pipe( pipe_slow ); 12120 %} 12121 12122 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12123 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12124 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12125 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12126 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12127 12128 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12129 ins_encode %{ 12130 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12131 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12132 %} 12133 ins_pipe( pipe_slow ); 12134 %} 12135 12136 // encode char[] to byte[] in ISO_8859_1 12137 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12138 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12139 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12140 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12141 match(Set result (EncodeISOArray src (Binary dst len))); 12142 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12143 12144 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12145 ins_encode %{ 12146 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12147 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12148 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12149 %} 12150 ins_pipe( pipe_slow ); 12151 %} 12152 12153 // encode char[] to byte[] in ASCII 12154 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12155 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12156 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12157 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12158 match(Set result (EncodeISOArray src (Binary dst len))); 12159 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12160 12161 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12162 ins_encode %{ 12163 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12164 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12165 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12166 %} 12167 ins_pipe( pipe_slow ); 12168 %} 12169 12170 //----------Control Flow Instructions------------------------------------------ 12171 // Signed compare Instructions 12172 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12173 match(Set cr (CmpI op1 op2)); 12174 effect( DEF cr, USE op1, USE op2 ); 12175 format %{ "CMP $op1,$op2" %} 12176 opcode(0x3B); /* Opcode 3B /r */ 12177 ins_encode( OpcP, RegReg( op1, op2) ); 12178 ins_pipe( ialu_cr_reg_reg ); 12179 %} 12180 12181 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12182 match(Set cr (CmpI op1 op2)); 12183 effect( DEF cr, USE op1 ); 12184 format %{ "CMP $op1,$op2" %} 12185 opcode(0x81,0x07); /* Opcode 81 /7 */ 12186 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12187 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12188 ins_pipe( ialu_cr_reg_imm ); 12189 %} 12190 12191 // Cisc-spilled version of cmpI_eReg 12192 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12193 match(Set cr (CmpI op1 (LoadI op2))); 12194 12195 format %{ "CMP $op1,$op2" %} 12196 ins_cost(500); 12197 opcode(0x3B); /* Opcode 3B /r */ 12198 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12199 ins_pipe( ialu_cr_reg_mem ); 12200 %} 12201 12202 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12203 match(Set cr (CmpI src zero)); 12204 effect( DEF cr, USE src ); 12205 12206 format %{ "TEST $src,$src" %} 12207 opcode(0x85); 12208 ins_encode( OpcP, RegReg( src, src ) ); 12209 ins_pipe( ialu_cr_reg_imm ); 12210 %} 12211 12212 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12213 match(Set cr (CmpI (AndI src con) zero)); 12214 12215 format %{ "TEST $src,$con" %} 12216 opcode(0xF7,0x00); 12217 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12218 ins_pipe( ialu_cr_reg_imm ); 12219 %} 12220 12221 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12222 match(Set cr (CmpI (AndI src mem) zero)); 12223 12224 format %{ "TEST $src,$mem" %} 12225 opcode(0x85); 12226 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 12227 ins_pipe( ialu_cr_reg_mem ); 12228 %} 12229 12230 // Unsigned compare Instructions; really, same as signed except they 12231 // produce an eFlagsRegU instead of eFlagsReg. 12232 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12233 match(Set cr (CmpU op1 op2)); 12234 12235 format %{ "CMPu $op1,$op2" %} 12236 opcode(0x3B); /* Opcode 3B /r */ 12237 ins_encode( OpcP, RegReg( op1, op2) ); 12238 ins_pipe( ialu_cr_reg_reg ); 12239 %} 12240 12241 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12242 match(Set cr (CmpU op1 op2)); 12243 12244 format %{ "CMPu $op1,$op2" %} 12245 opcode(0x81,0x07); /* Opcode 81 /7 */ 12246 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12247 ins_pipe( ialu_cr_reg_imm ); 12248 %} 12249 12250 // // Cisc-spilled version of cmpU_eReg 12251 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12252 match(Set cr (CmpU op1 (LoadI op2))); 12253 12254 format %{ "CMPu $op1,$op2" %} 12255 ins_cost(500); 12256 opcode(0x3B); /* Opcode 3B /r */ 12257 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12258 ins_pipe( ialu_cr_reg_mem ); 12259 %} 12260 12261 // // Cisc-spilled version of cmpU_eReg 12262 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12263 // match(Set cr (CmpU (LoadI op1) op2)); 12264 // 12265 // format %{ "CMPu $op1,$op2" %} 12266 // ins_cost(500); 12267 // opcode(0x39); /* Opcode 39 /r */ 12268 // ins_encode( OpcP, RegMem( op1, op2) ); 12269 //%} 12270 12271 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12272 match(Set cr (CmpU src zero)); 12273 12274 format %{ "TESTu $src,$src" %} 12275 opcode(0x85); 12276 ins_encode( OpcP, RegReg( src, src ) ); 12277 ins_pipe( ialu_cr_reg_imm ); 12278 %} 12279 12280 // Unsigned pointer compare Instructions 12281 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12282 match(Set cr (CmpP op1 op2)); 12283 12284 format %{ "CMPu $op1,$op2" %} 12285 opcode(0x3B); /* Opcode 3B /r */ 12286 ins_encode( OpcP, RegReg( op1, op2) ); 12287 ins_pipe( ialu_cr_reg_reg ); 12288 %} 12289 12290 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12291 match(Set cr (CmpP op1 op2)); 12292 12293 format %{ "CMPu $op1,$op2" %} 12294 opcode(0x81,0x07); /* Opcode 81 /7 */ 12295 ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark ); 12296 ins_pipe( ialu_cr_reg_imm ); 12297 %} 12298 12299 // // Cisc-spilled version of cmpP_eReg 12300 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12301 match(Set cr (CmpP op1 (LoadP op2))); 12302 12303 format %{ "CMPu $op1,$op2" %} 12304 ins_cost(500); 12305 opcode(0x3B); /* Opcode 3B /r */ 12306 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12307 ins_pipe( ialu_cr_reg_mem ); 12308 %} 12309 12310 // // Cisc-spilled version of cmpP_eReg 12311 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12312 // match(Set cr (CmpP (LoadP op1) op2)); 12313 // 12314 // format %{ "CMPu $op1,$op2" %} 12315 // ins_cost(500); 12316 // opcode(0x39); /* Opcode 39 /r */ 12317 // ins_encode( OpcP, RegMem( op1, op2) ); 12318 //%} 12319 12320 // Compare raw pointer (used in out-of-heap check). 12321 // Only works because non-oop pointers must be raw pointers 12322 // and raw pointers have no anti-dependencies. 12323 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12324 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12325 match(Set cr (CmpP op1 (LoadP op2))); 12326 12327 format %{ "CMPu $op1,$op2" %} 12328 opcode(0x3B); /* Opcode 3B /r */ 12329 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12330 ins_pipe( ialu_cr_reg_mem ); 12331 %} 12332 12333 // 12334 // This will generate a signed flags result. This should be ok 12335 // since any compare to a zero should be eq/neq. 12336 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12337 match(Set cr (CmpP src zero)); 12338 12339 format %{ "TEST $src,$src" %} 12340 opcode(0x85); 12341 ins_encode( OpcP, RegReg( src, src ) ); 12342 ins_pipe( ialu_cr_reg_imm ); 12343 %} 12344 12345 // Cisc-spilled version of testP_reg 12346 // This will generate a signed flags result. This should be ok 12347 // since any compare to a zero should be eq/neq. 12348 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12349 match(Set cr (CmpP (LoadP op) zero)); 12350 12351 format %{ "TEST $op,0xFFFFFFFF" %} 12352 ins_cost(500); 12353 opcode(0xF7); /* Opcode F7 /0 */ 12354 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark ); 12355 ins_pipe( ialu_cr_reg_imm ); 12356 %} 12357 12358 // Yanked all unsigned pointer compare operations. 12359 // Pointer compares are done with CmpP which is already unsigned. 12360 12361 //----------Max and Min-------------------------------------------------------- 12362 // Min Instructions 12363 //// 12364 // *** Min and Max using the conditional move are slower than the 12365 // *** branch version on a Pentium III. 12366 // // Conditional move for min 12367 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12368 // effect( USE_DEF op2, USE op1, USE cr ); 12369 // format %{ "CMOVlt $op2,$op1\t! min" %} 12370 // opcode(0x4C,0x0F); 12371 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12372 // ins_pipe( pipe_cmov_reg ); 12373 //%} 12374 // 12375 //// Min Register with Register (P6 version) 12376 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12377 // predicate(VM_Version::supports_cmov() ); 12378 // match(Set op2 (MinI op1 op2)); 12379 // ins_cost(200); 12380 // expand %{ 12381 // eFlagsReg cr; 12382 // compI_eReg(cr,op1,op2); 12383 // cmovI_reg_lt(op2,op1,cr); 12384 // %} 12385 //%} 12386 12387 // Min Register with Register (generic version) 12388 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12389 match(Set dst (MinI dst src)); 12390 effect(KILL flags); 12391 ins_cost(300); 12392 12393 format %{ "MIN $dst,$src" %} 12394 opcode(0xCC); 12395 ins_encode( min_enc(dst,src) ); 12396 ins_pipe( pipe_slow ); 12397 %} 12398 12399 // Max Register with Register 12400 // *** Min and Max using the conditional move are slower than the 12401 // *** branch version on a Pentium III. 12402 // // Conditional move for max 12403 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12404 // effect( USE_DEF op2, USE op1, USE cr ); 12405 // format %{ "CMOVgt $op2,$op1\t! max" %} 12406 // opcode(0x4F,0x0F); 12407 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12408 // ins_pipe( pipe_cmov_reg ); 12409 //%} 12410 // 12411 // // Max Register with Register (P6 version) 12412 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12413 // predicate(VM_Version::supports_cmov() ); 12414 // match(Set op2 (MaxI op1 op2)); 12415 // ins_cost(200); 12416 // expand %{ 12417 // eFlagsReg cr; 12418 // compI_eReg(cr,op1,op2); 12419 // cmovI_reg_gt(op2,op1,cr); 12420 // %} 12421 //%} 12422 12423 // Max Register with Register (generic version) 12424 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12425 match(Set dst (MaxI dst src)); 12426 effect(KILL flags); 12427 ins_cost(300); 12428 12429 format %{ "MAX $dst,$src" %} 12430 opcode(0xCC); 12431 ins_encode( max_enc(dst,src) ); 12432 ins_pipe( pipe_slow ); 12433 %} 12434 12435 // ============================================================================ 12436 // Counted Loop limit node which represents exact final iterator value. 12437 // Note: the resulting value should fit into integer range since 12438 // counted loops have limit check on overflow. 12439 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12440 match(Set limit (LoopLimit (Binary init limit) stride)); 12441 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12442 ins_cost(300); 12443 12444 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12445 ins_encode %{ 12446 int strd = (int)$stride$$constant; 12447 assert(strd != 1 && strd != -1, "sanity"); 12448 int m1 = (strd > 0) ? 1 : -1; 12449 // Convert limit to long (EAX:EDX) 12450 __ cdql(); 12451 // Convert init to long (init:tmp) 12452 __ movl($tmp$$Register, $init$$Register); 12453 __ sarl($tmp$$Register, 31); 12454 // $limit - $init 12455 __ subl($limit$$Register, $init$$Register); 12456 __ sbbl($limit_hi$$Register, $tmp$$Register); 12457 // + ($stride - 1) 12458 if (strd > 0) { 12459 __ addl($limit$$Register, (strd - 1)); 12460 __ adcl($limit_hi$$Register, 0); 12461 __ movl($tmp$$Register, strd); 12462 } else { 12463 __ addl($limit$$Register, (strd + 1)); 12464 __ adcl($limit_hi$$Register, -1); 12465 __ lneg($limit_hi$$Register, $limit$$Register); 12466 __ movl($tmp$$Register, -strd); 12467 } 12468 // signed division: (EAX:EDX) / pos_stride 12469 __ idivl($tmp$$Register); 12470 if (strd < 0) { 12471 // restore sign 12472 __ negl($tmp$$Register); 12473 } 12474 // (EAX) * stride 12475 __ mull($tmp$$Register); 12476 // + init (ignore upper bits) 12477 __ addl($limit$$Register, $init$$Register); 12478 %} 12479 ins_pipe( pipe_slow ); 12480 %} 12481 12482 // ============================================================================ 12483 // Branch Instructions 12484 // Jump Table 12485 instruct jumpXtnd(rRegI switch_val) %{ 12486 match(Jump switch_val); 12487 ins_cost(350); 12488 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12489 ins_encode %{ 12490 // Jump to Address(table_base + switch_reg) 12491 Address index(noreg, $switch_val$$Register, Address::times_1); 12492 __ jump(ArrayAddress($constantaddress, index), noreg); 12493 %} 12494 ins_pipe(pipe_jmp); 12495 %} 12496 12497 // Jump Direct - Label defines a relative address from JMP+1 12498 instruct jmpDir(label labl) %{ 12499 match(Goto); 12500 effect(USE labl); 12501 12502 ins_cost(300); 12503 format %{ "JMP $labl" %} 12504 size(5); 12505 ins_encode %{ 12506 Label* L = $labl$$label; 12507 __ jmp(*L, false); // Always long jump 12508 %} 12509 ins_pipe( pipe_jmp ); 12510 %} 12511 12512 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12513 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12514 match(If cop cr); 12515 effect(USE labl); 12516 12517 ins_cost(300); 12518 format %{ "J$cop $labl" %} 12519 size(6); 12520 ins_encode %{ 12521 Label* L = $labl$$label; 12522 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12523 %} 12524 ins_pipe( pipe_jcc ); 12525 %} 12526 12527 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12528 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12529 match(CountedLoopEnd cop cr); 12530 effect(USE labl); 12531 12532 ins_cost(300); 12533 format %{ "J$cop $labl\t# Loop end" %} 12534 size(6); 12535 ins_encode %{ 12536 Label* L = $labl$$label; 12537 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12538 %} 12539 ins_pipe( pipe_jcc ); 12540 %} 12541 12542 // Jump Direct Conditional - using unsigned comparison 12543 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12544 match(If cop cmp); 12545 effect(USE labl); 12546 12547 ins_cost(300); 12548 format %{ "J$cop,u $labl" %} 12549 size(6); 12550 ins_encode %{ 12551 Label* L = $labl$$label; 12552 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12553 %} 12554 ins_pipe(pipe_jcc); 12555 %} 12556 12557 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12558 match(If cop cmp); 12559 effect(USE labl); 12560 12561 ins_cost(200); 12562 format %{ "J$cop,u $labl" %} 12563 size(6); 12564 ins_encode %{ 12565 Label* L = $labl$$label; 12566 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12567 %} 12568 ins_pipe(pipe_jcc); 12569 %} 12570 12571 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12572 match(If cop cmp); 12573 effect(USE labl); 12574 12575 ins_cost(200); 12576 format %{ $$template 12577 if ($cop$$cmpcode == Assembler::notEqual) { 12578 $$emit$$"JP,u $labl\n\t" 12579 $$emit$$"J$cop,u $labl" 12580 } else { 12581 $$emit$$"JP,u done\n\t" 12582 $$emit$$"J$cop,u $labl\n\t" 12583 $$emit$$"done:" 12584 } 12585 %} 12586 ins_encode %{ 12587 Label* l = $labl$$label; 12588 if ($cop$$cmpcode == Assembler::notEqual) { 12589 __ jcc(Assembler::parity, *l, false); 12590 __ jcc(Assembler::notEqual, *l, false); 12591 } else if ($cop$$cmpcode == Assembler::equal) { 12592 Label done; 12593 __ jccb(Assembler::parity, done); 12594 __ jcc(Assembler::equal, *l, false); 12595 __ bind(done); 12596 } else { 12597 ShouldNotReachHere(); 12598 } 12599 %} 12600 ins_pipe(pipe_jcc); 12601 %} 12602 12603 // ============================================================================ 12604 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12605 // array for an instance of the superklass. Set a hidden internal cache on a 12606 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12607 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12608 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12609 match(Set result (PartialSubtypeCheck sub super)); 12610 effect( KILL rcx, KILL cr ); 12611 12612 ins_cost(1100); // slightly larger than the next version 12613 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12614 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12615 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12616 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12617 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12618 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12619 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12620 "miss:\t" %} 12621 12622 opcode(0x1); // Force a XOR of EDI 12623 ins_encode( enc_PartialSubtypeCheck() ); 12624 ins_pipe( pipe_slow ); 12625 %} 12626 12627 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12628 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12629 effect( KILL rcx, KILL result ); 12630 12631 ins_cost(1000); 12632 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12633 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12634 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12635 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12636 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12637 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12638 "miss:\t" %} 12639 12640 opcode(0x0); // No need to XOR EDI 12641 ins_encode( enc_PartialSubtypeCheck() ); 12642 ins_pipe( pipe_slow ); 12643 %} 12644 12645 // ============================================================================ 12646 // Branch Instructions -- short offset versions 12647 // 12648 // These instructions are used to replace jumps of a long offset (the default 12649 // match) with jumps of a shorter offset. These instructions are all tagged 12650 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12651 // match rules in general matching. Instead, the ADLC generates a conversion 12652 // method in the MachNode which can be used to do in-place replacement of the 12653 // long variant with the shorter variant. The compiler will determine if a 12654 // branch can be taken by the is_short_branch_offset() predicate in the machine 12655 // specific code section of the file. 12656 12657 // Jump Direct - Label defines a relative address from JMP+1 12658 instruct jmpDir_short(label labl) %{ 12659 match(Goto); 12660 effect(USE labl); 12661 12662 ins_cost(300); 12663 format %{ "JMP,s $labl" %} 12664 size(2); 12665 ins_encode %{ 12666 Label* L = $labl$$label; 12667 __ jmpb(*L); 12668 %} 12669 ins_pipe( pipe_jmp ); 12670 ins_short_branch(1); 12671 %} 12672 12673 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12674 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12675 match(If cop cr); 12676 effect(USE labl); 12677 12678 ins_cost(300); 12679 format %{ "J$cop,s $labl" %} 12680 size(2); 12681 ins_encode %{ 12682 Label* L = $labl$$label; 12683 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12684 %} 12685 ins_pipe( pipe_jcc ); 12686 ins_short_branch(1); 12687 %} 12688 12689 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12690 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12691 match(CountedLoopEnd cop cr); 12692 effect(USE labl); 12693 12694 ins_cost(300); 12695 format %{ "J$cop,s $labl\t# Loop end" %} 12696 size(2); 12697 ins_encode %{ 12698 Label* L = $labl$$label; 12699 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12700 %} 12701 ins_pipe( pipe_jcc ); 12702 ins_short_branch(1); 12703 %} 12704 12705 // Jump Direct Conditional - using unsigned comparison 12706 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12707 match(If cop cmp); 12708 effect(USE labl); 12709 12710 ins_cost(300); 12711 format %{ "J$cop,us $labl" %} 12712 size(2); 12713 ins_encode %{ 12714 Label* L = $labl$$label; 12715 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12716 %} 12717 ins_pipe( pipe_jcc ); 12718 ins_short_branch(1); 12719 %} 12720 12721 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12722 match(If cop cmp); 12723 effect(USE labl); 12724 12725 ins_cost(300); 12726 format %{ "J$cop,us $labl" %} 12727 size(2); 12728 ins_encode %{ 12729 Label* L = $labl$$label; 12730 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12731 %} 12732 ins_pipe( pipe_jcc ); 12733 ins_short_branch(1); 12734 %} 12735 12736 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12737 match(If cop cmp); 12738 effect(USE labl); 12739 12740 ins_cost(300); 12741 format %{ $$template 12742 if ($cop$$cmpcode == Assembler::notEqual) { 12743 $$emit$$"JP,u,s $labl\n\t" 12744 $$emit$$"J$cop,u,s $labl" 12745 } else { 12746 $$emit$$"JP,u,s done\n\t" 12747 $$emit$$"J$cop,u,s $labl\n\t" 12748 $$emit$$"done:" 12749 } 12750 %} 12751 size(4); 12752 ins_encode %{ 12753 Label* l = $labl$$label; 12754 if ($cop$$cmpcode == Assembler::notEqual) { 12755 __ jccb(Assembler::parity, *l); 12756 __ jccb(Assembler::notEqual, *l); 12757 } else if ($cop$$cmpcode == Assembler::equal) { 12758 Label done; 12759 __ jccb(Assembler::parity, done); 12760 __ jccb(Assembler::equal, *l); 12761 __ bind(done); 12762 } else { 12763 ShouldNotReachHere(); 12764 } 12765 %} 12766 ins_pipe(pipe_jcc); 12767 ins_short_branch(1); 12768 %} 12769 12770 // ============================================================================ 12771 // Long Compare 12772 // 12773 // Currently we hold longs in 2 registers. Comparing such values efficiently 12774 // is tricky. The flavor of compare used depends on whether we are testing 12775 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12776 // The GE test is the negated LT test. The LE test can be had by commuting 12777 // the operands (yielding a GE test) and then negating; negate again for the 12778 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12779 // NE test is negated from that. 12780 12781 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12782 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12783 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12784 // are collapsed internally in the ADLC's dfa-gen code. The match for 12785 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12786 // foo match ends up with the wrong leaf. One fix is to not match both 12787 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12788 // both forms beat the trinary form of long-compare and both are very useful 12789 // on Intel which has so few registers. 12790 12791 // Manifest a CmpL result in an integer register. Very painful. 12792 // This is the test to avoid. 12793 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12794 match(Set dst (CmpL3 src1 src2)); 12795 effect( KILL flags ); 12796 ins_cost(1000); 12797 format %{ "XOR $dst,$dst\n\t" 12798 "CMP $src1.hi,$src2.hi\n\t" 12799 "JLT,s m_one\n\t" 12800 "JGT,s p_one\n\t" 12801 "CMP $src1.lo,$src2.lo\n\t" 12802 "JB,s m_one\n\t" 12803 "JEQ,s done\n" 12804 "p_one:\tINC $dst\n\t" 12805 "JMP,s done\n" 12806 "m_one:\tDEC $dst\n" 12807 "done:" %} 12808 ins_encode %{ 12809 Label p_one, m_one, done; 12810 __ xorptr($dst$$Register, $dst$$Register); 12811 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12812 __ jccb(Assembler::less, m_one); 12813 __ jccb(Assembler::greater, p_one); 12814 __ cmpl($src1$$Register, $src2$$Register); 12815 __ jccb(Assembler::below, m_one); 12816 __ jccb(Assembler::equal, done); 12817 __ bind(p_one); 12818 __ incrementl($dst$$Register); 12819 __ jmpb(done); 12820 __ bind(m_one); 12821 __ decrementl($dst$$Register); 12822 __ bind(done); 12823 %} 12824 ins_pipe( pipe_slow ); 12825 %} 12826 12827 //====== 12828 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12829 // compares. Can be used for LE or GT compares by reversing arguments. 12830 // NOT GOOD FOR EQ/NE tests. 12831 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12832 match( Set flags (CmpL src zero )); 12833 ins_cost(100); 12834 format %{ "TEST $src.hi,$src.hi" %} 12835 opcode(0x85); 12836 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12837 ins_pipe( ialu_cr_reg_reg ); 12838 %} 12839 12840 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12841 // compares. Can be used for LE or GT compares by reversing arguments. 12842 // NOT GOOD FOR EQ/NE tests. 12843 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12844 match( Set flags (CmpL src1 src2 )); 12845 effect( TEMP tmp ); 12846 ins_cost(300); 12847 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12848 "MOV $tmp,$src1.hi\n\t" 12849 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12850 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12851 ins_pipe( ialu_cr_reg_reg ); 12852 %} 12853 12854 // Long compares reg < zero/req OR reg >= zero/req. 12855 // Just a wrapper for a normal branch, plus the predicate test. 12856 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12857 match(If cmp flags); 12858 effect(USE labl); 12859 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12860 expand %{ 12861 jmpCon(cmp,flags,labl); // JLT or JGE... 12862 %} 12863 %} 12864 12865 //====== 12866 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12867 // compares. Can be used for LE or GT compares by reversing arguments. 12868 // NOT GOOD FOR EQ/NE tests. 12869 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12870 match(Set flags (CmpUL src zero)); 12871 ins_cost(100); 12872 format %{ "TEST $src.hi,$src.hi" %} 12873 opcode(0x85); 12874 ins_encode(OpcP, RegReg_Hi2(src, src)); 12875 ins_pipe(ialu_cr_reg_reg); 12876 %} 12877 12878 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12879 // compares. Can be used for LE or GT compares by reversing arguments. 12880 // NOT GOOD FOR EQ/NE tests. 12881 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12882 match(Set flags (CmpUL src1 src2)); 12883 effect(TEMP tmp); 12884 ins_cost(300); 12885 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12886 "MOV $tmp,$src1.hi\n\t" 12887 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12888 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12889 ins_pipe(ialu_cr_reg_reg); 12890 %} 12891 12892 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12893 // Just a wrapper for a normal branch, plus the predicate test. 12894 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12895 match(If cmp flags); 12896 effect(USE labl); 12897 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12898 expand %{ 12899 jmpCon(cmp, flags, labl); // JLT or JGE... 12900 %} 12901 %} 12902 12903 // Compare 2 longs and CMOVE longs. 12904 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12905 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12906 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12907 ins_cost(400); 12908 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12909 "CMOV$cmp $dst.hi,$src.hi" %} 12910 opcode(0x0F,0x40); 12911 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12912 ins_pipe( pipe_cmov_reg_long ); 12913 %} 12914 12915 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12916 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12917 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12918 ins_cost(500); 12919 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12920 "CMOV$cmp $dst.hi,$src.hi" %} 12921 opcode(0x0F,0x40); 12922 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 12923 ins_pipe( pipe_cmov_reg_long ); 12924 %} 12925 12926 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 12927 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12928 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12929 ins_cost(400); 12930 expand %{ 12931 cmovLL_reg_LTGE(cmp, flags, dst, src); 12932 %} 12933 %} 12934 12935 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 12936 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12937 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12938 ins_cost(500); 12939 expand %{ 12940 cmovLL_mem_LTGE(cmp, flags, dst, src); 12941 %} 12942 %} 12943 12944 // Compare 2 longs and CMOVE ints. 12945 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12946 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12947 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12948 ins_cost(200); 12949 format %{ "CMOV$cmp $dst,$src" %} 12950 opcode(0x0F,0x40); 12951 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12952 ins_pipe( pipe_cmov_reg ); 12953 %} 12954 12955 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12956 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12957 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12958 ins_cost(250); 12959 format %{ "CMOV$cmp $dst,$src" %} 12960 opcode(0x0F,0x40); 12961 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 12962 ins_pipe( pipe_cmov_mem ); 12963 %} 12964 12965 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 12966 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12967 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12968 ins_cost(200); 12969 expand %{ 12970 cmovII_reg_LTGE(cmp, flags, dst, src); 12971 %} 12972 %} 12973 12974 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 12975 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12976 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12977 ins_cost(250); 12978 expand %{ 12979 cmovII_mem_LTGE(cmp, flags, dst, src); 12980 %} 12981 %} 12982 12983 // Compare 2 longs and CMOVE ptrs. 12984 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12985 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12986 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12987 ins_cost(200); 12988 format %{ "CMOV$cmp $dst,$src" %} 12989 opcode(0x0F,0x40); 12990 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12991 ins_pipe( pipe_cmov_reg ); 12992 %} 12993 12994 // Compare 2 unsigned longs and CMOVE ptrs. 12995 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 12996 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12997 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12998 ins_cost(200); 12999 expand %{ 13000 cmovPP_reg_LTGE(cmp,flags,dst,src); 13001 %} 13002 %} 13003 13004 // Compare 2 longs and CMOVE doubles 13005 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13006 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13007 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13008 ins_cost(200); 13009 expand %{ 13010 fcmovDPR_regS(cmp,flags,dst,src); 13011 %} 13012 %} 13013 13014 // Compare 2 longs and CMOVE doubles 13015 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13016 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13017 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13018 ins_cost(200); 13019 expand %{ 13020 fcmovD_regS(cmp,flags,dst,src); 13021 %} 13022 %} 13023 13024 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13025 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13026 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13027 ins_cost(200); 13028 expand %{ 13029 fcmovFPR_regS(cmp,flags,dst,src); 13030 %} 13031 %} 13032 13033 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13034 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13035 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13036 ins_cost(200); 13037 expand %{ 13038 fcmovF_regS(cmp,flags,dst,src); 13039 %} 13040 %} 13041 13042 //====== 13043 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13044 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13045 match( Set flags (CmpL src zero )); 13046 effect(TEMP tmp); 13047 ins_cost(200); 13048 format %{ "MOV $tmp,$src.lo\n\t" 13049 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13050 ins_encode( long_cmp_flags0( src, tmp ) ); 13051 ins_pipe( ialu_reg_reg_long ); 13052 %} 13053 13054 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13055 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13056 match( Set flags (CmpL src1 src2 )); 13057 ins_cost(200+300); 13058 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13059 "JNE,s skip\n\t" 13060 "CMP $src1.hi,$src2.hi\n\t" 13061 "skip:\t" %} 13062 ins_encode( long_cmp_flags1( src1, src2 ) ); 13063 ins_pipe( ialu_cr_reg_reg ); 13064 %} 13065 13066 // Long compare reg == zero/reg OR reg != zero/reg 13067 // Just a wrapper for a normal branch, plus the predicate test. 13068 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13069 match(If cmp flags); 13070 effect(USE labl); 13071 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13072 expand %{ 13073 jmpCon(cmp,flags,labl); // JEQ or JNE... 13074 %} 13075 %} 13076 13077 //====== 13078 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13079 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13080 match(Set flags (CmpUL src zero)); 13081 effect(TEMP tmp); 13082 ins_cost(200); 13083 format %{ "MOV $tmp,$src.lo\n\t" 13084 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13085 ins_encode(long_cmp_flags0(src, tmp)); 13086 ins_pipe(ialu_reg_reg_long); 13087 %} 13088 13089 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13090 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13091 match(Set flags (CmpUL src1 src2)); 13092 ins_cost(200+300); 13093 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13094 "JNE,s skip\n\t" 13095 "CMP $src1.hi,$src2.hi\n\t" 13096 "skip:\t" %} 13097 ins_encode(long_cmp_flags1(src1, src2)); 13098 ins_pipe(ialu_cr_reg_reg); 13099 %} 13100 13101 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13102 // Just a wrapper for a normal branch, plus the predicate test. 13103 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13104 match(If cmp flags); 13105 effect(USE labl); 13106 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13107 expand %{ 13108 jmpCon(cmp, flags, labl); // JEQ or JNE... 13109 %} 13110 %} 13111 13112 // Compare 2 longs and CMOVE longs. 13113 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13114 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13115 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13116 ins_cost(400); 13117 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13118 "CMOV$cmp $dst.hi,$src.hi" %} 13119 opcode(0x0F,0x40); 13120 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13121 ins_pipe( pipe_cmov_reg_long ); 13122 %} 13123 13124 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13125 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13126 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13127 ins_cost(500); 13128 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13129 "CMOV$cmp $dst.hi,$src.hi" %} 13130 opcode(0x0F,0x40); 13131 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 13132 ins_pipe( pipe_cmov_reg_long ); 13133 %} 13134 13135 // Compare 2 longs and CMOVE ints. 13136 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13137 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13138 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13139 ins_cost(200); 13140 format %{ "CMOV$cmp $dst,$src" %} 13141 opcode(0x0F,0x40); 13142 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13143 ins_pipe( pipe_cmov_reg ); 13144 %} 13145 13146 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13147 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13148 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13149 ins_cost(250); 13150 format %{ "CMOV$cmp $dst,$src" %} 13151 opcode(0x0F,0x40); 13152 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 13153 ins_pipe( pipe_cmov_mem ); 13154 %} 13155 13156 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13157 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13158 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13159 ins_cost(200); 13160 expand %{ 13161 cmovII_reg_EQNE(cmp, flags, dst, src); 13162 %} 13163 %} 13164 13165 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13166 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13167 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13168 ins_cost(250); 13169 expand %{ 13170 cmovII_mem_EQNE(cmp, flags, dst, src); 13171 %} 13172 %} 13173 13174 // Compare 2 longs and CMOVE ptrs. 13175 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13176 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13177 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13178 ins_cost(200); 13179 format %{ "CMOV$cmp $dst,$src" %} 13180 opcode(0x0F,0x40); 13181 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13182 ins_pipe( pipe_cmov_reg ); 13183 %} 13184 13185 // Compare 2 unsigned longs and CMOVE ptrs. 13186 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13187 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13188 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13189 ins_cost(200); 13190 expand %{ 13191 cmovPP_reg_EQNE(cmp,flags,dst,src); 13192 %} 13193 %} 13194 13195 // Compare 2 longs and CMOVE doubles 13196 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13197 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13198 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13199 ins_cost(200); 13200 expand %{ 13201 fcmovDPR_regS(cmp,flags,dst,src); 13202 %} 13203 %} 13204 13205 // Compare 2 longs and CMOVE doubles 13206 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13207 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13208 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13209 ins_cost(200); 13210 expand %{ 13211 fcmovD_regS(cmp,flags,dst,src); 13212 %} 13213 %} 13214 13215 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13216 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13217 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13218 ins_cost(200); 13219 expand %{ 13220 fcmovFPR_regS(cmp,flags,dst,src); 13221 %} 13222 %} 13223 13224 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13225 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13226 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13227 ins_cost(200); 13228 expand %{ 13229 fcmovF_regS(cmp,flags,dst,src); 13230 %} 13231 %} 13232 13233 //====== 13234 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13235 // Same as cmpL_reg_flags_LEGT except must negate src 13236 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13237 match( Set flags (CmpL src zero )); 13238 effect( TEMP tmp ); 13239 ins_cost(300); 13240 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13241 "CMP $tmp,$src.lo\n\t" 13242 "SBB $tmp,$src.hi\n\t" %} 13243 ins_encode( long_cmp_flags3(src, tmp) ); 13244 ins_pipe( ialu_reg_reg_long ); 13245 %} 13246 13247 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13248 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13249 // requires a commuted test to get the same result. 13250 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13251 match( Set flags (CmpL src1 src2 )); 13252 effect( TEMP tmp ); 13253 ins_cost(300); 13254 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13255 "MOV $tmp,$src2.hi\n\t" 13256 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13257 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13258 ins_pipe( ialu_cr_reg_reg ); 13259 %} 13260 13261 // Long compares reg < zero/req OR reg >= zero/req. 13262 // Just a wrapper for a normal branch, plus the predicate test 13263 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13264 match(If cmp flags); 13265 effect(USE labl); 13266 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13267 ins_cost(300); 13268 expand %{ 13269 jmpCon(cmp,flags,labl); // JGT or JLE... 13270 %} 13271 %} 13272 13273 //====== 13274 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13275 // Same as cmpUL_reg_flags_LEGT except must negate src 13276 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13277 match(Set flags (CmpUL src zero)); 13278 effect(TEMP tmp); 13279 ins_cost(300); 13280 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13281 "CMP $tmp,$src.lo\n\t" 13282 "SBB $tmp,$src.hi\n\t" %} 13283 ins_encode(long_cmp_flags3(src, tmp)); 13284 ins_pipe(ialu_reg_reg_long); 13285 %} 13286 13287 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13288 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13289 // requires a commuted test to get the same result. 13290 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13291 match(Set flags (CmpUL src1 src2)); 13292 effect(TEMP tmp); 13293 ins_cost(300); 13294 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13295 "MOV $tmp,$src2.hi\n\t" 13296 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13297 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13298 ins_pipe(ialu_cr_reg_reg); 13299 %} 13300 13301 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13302 // Just a wrapper for a normal branch, plus the predicate test 13303 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13304 match(If cmp flags); 13305 effect(USE labl); 13306 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13307 ins_cost(300); 13308 expand %{ 13309 jmpCon(cmp, flags, labl); // JGT or JLE... 13310 %} 13311 %} 13312 13313 // Compare 2 longs and CMOVE longs. 13314 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13315 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13316 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13317 ins_cost(400); 13318 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13319 "CMOV$cmp $dst.hi,$src.hi" %} 13320 opcode(0x0F,0x40); 13321 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13322 ins_pipe( pipe_cmov_reg_long ); 13323 %} 13324 13325 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13326 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13327 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13328 ins_cost(500); 13329 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13330 "CMOV$cmp $dst.hi,$src.hi+4" %} 13331 opcode(0x0F,0x40); 13332 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 13333 ins_pipe( pipe_cmov_reg_long ); 13334 %} 13335 13336 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13337 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13338 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13339 ins_cost(400); 13340 expand %{ 13341 cmovLL_reg_LEGT(cmp, flags, dst, src); 13342 %} 13343 %} 13344 13345 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13346 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13347 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13348 ins_cost(500); 13349 expand %{ 13350 cmovLL_mem_LEGT(cmp, flags, dst, src); 13351 %} 13352 %} 13353 13354 // Compare 2 longs and CMOVE ints. 13355 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13356 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13357 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13358 ins_cost(200); 13359 format %{ "CMOV$cmp $dst,$src" %} 13360 opcode(0x0F,0x40); 13361 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13362 ins_pipe( pipe_cmov_reg ); 13363 %} 13364 13365 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13366 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13367 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13368 ins_cost(250); 13369 format %{ "CMOV$cmp $dst,$src" %} 13370 opcode(0x0F,0x40); 13371 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 13372 ins_pipe( pipe_cmov_mem ); 13373 %} 13374 13375 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13376 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13377 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13378 ins_cost(200); 13379 expand %{ 13380 cmovII_reg_LEGT(cmp, flags, dst, src); 13381 %} 13382 %} 13383 13384 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13385 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13386 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13387 ins_cost(250); 13388 expand %{ 13389 cmovII_mem_LEGT(cmp, flags, dst, src); 13390 %} 13391 %} 13392 13393 // Compare 2 longs and CMOVE ptrs. 13394 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13395 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13396 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13397 ins_cost(200); 13398 format %{ "CMOV$cmp $dst,$src" %} 13399 opcode(0x0F,0x40); 13400 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13401 ins_pipe( pipe_cmov_reg ); 13402 %} 13403 13404 // Compare 2 unsigned longs and CMOVE ptrs. 13405 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13406 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13407 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13408 ins_cost(200); 13409 expand %{ 13410 cmovPP_reg_LEGT(cmp,flags,dst,src); 13411 %} 13412 %} 13413 13414 // Compare 2 longs and CMOVE doubles 13415 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13416 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13417 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13418 ins_cost(200); 13419 expand %{ 13420 fcmovDPR_regS(cmp,flags,dst,src); 13421 %} 13422 %} 13423 13424 // Compare 2 longs and CMOVE doubles 13425 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13426 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13427 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13428 ins_cost(200); 13429 expand %{ 13430 fcmovD_regS(cmp,flags,dst,src); 13431 %} 13432 %} 13433 13434 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13435 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13436 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13437 ins_cost(200); 13438 expand %{ 13439 fcmovFPR_regS(cmp,flags,dst,src); 13440 %} 13441 %} 13442 13443 13444 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13445 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13446 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13447 ins_cost(200); 13448 expand %{ 13449 fcmovF_regS(cmp,flags,dst,src); 13450 %} 13451 %} 13452 13453 13454 // ============================================================================ 13455 // Procedure Call/Return Instructions 13456 // Call Java Static Instruction 13457 // Note: If this code changes, the corresponding ret_addr_offset() and 13458 // compute_padding() functions will have to be adjusted. 13459 instruct CallStaticJavaDirect(method meth) %{ 13460 match(CallStaticJava); 13461 effect(USE meth); 13462 13463 ins_cost(300); 13464 format %{ "CALL,static " %} 13465 opcode(0xE8); /* E8 cd */ 13466 ins_encode( pre_call_resets, 13467 Java_Static_Call( meth ), 13468 call_epilog, 13469 post_call_FPU ); 13470 ins_pipe( pipe_slow ); 13471 ins_alignment(4); 13472 %} 13473 13474 // Call Java Dynamic Instruction 13475 // Note: If this code changes, the corresponding ret_addr_offset() and 13476 // compute_padding() functions will have to be adjusted. 13477 instruct CallDynamicJavaDirect(method meth) %{ 13478 match(CallDynamicJava); 13479 effect(USE meth); 13480 13481 ins_cost(300); 13482 format %{ "MOV EAX,(oop)-1\n\t" 13483 "CALL,dynamic" %} 13484 opcode(0xE8); /* E8 cd */ 13485 ins_encode( pre_call_resets, 13486 Java_Dynamic_Call( meth ), 13487 call_epilog, 13488 post_call_FPU ); 13489 ins_pipe( pipe_slow ); 13490 ins_alignment(4); 13491 %} 13492 13493 // Call Runtime Instruction 13494 instruct CallRuntimeDirect(method meth) %{ 13495 match(CallRuntime ); 13496 effect(USE meth); 13497 13498 ins_cost(300); 13499 format %{ "CALL,runtime " %} 13500 opcode(0xE8); /* E8 cd */ 13501 // Use FFREEs to clear entries in float stack 13502 ins_encode( pre_call_resets, 13503 FFree_Float_Stack_All, 13504 Java_To_Runtime( meth ), 13505 post_call_FPU ); 13506 ins_pipe( pipe_slow ); 13507 %} 13508 13509 // Call runtime without safepoint 13510 instruct CallLeafDirect(method meth) %{ 13511 match(CallLeaf); 13512 effect(USE meth); 13513 13514 ins_cost(300); 13515 format %{ "CALL_LEAF,runtime " %} 13516 opcode(0xE8); /* E8 cd */ 13517 ins_encode( pre_call_resets, 13518 FFree_Float_Stack_All, 13519 Java_To_Runtime( meth ), 13520 Verify_FPU_For_Leaf, post_call_FPU ); 13521 ins_pipe( pipe_slow ); 13522 %} 13523 13524 instruct CallLeafNoFPDirect(method meth) %{ 13525 match(CallLeafNoFP); 13526 effect(USE meth); 13527 13528 ins_cost(300); 13529 format %{ "CALL_LEAF_NOFP,runtime " %} 13530 opcode(0xE8); /* E8 cd */ 13531 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13532 ins_pipe( pipe_slow ); 13533 %} 13534 13535 13536 // Return Instruction 13537 // Remove the return address & jump to it. 13538 instruct Ret() %{ 13539 match(Return); 13540 format %{ "RET" %} 13541 opcode(0xC3); 13542 ins_encode(OpcP); 13543 ins_pipe( pipe_jmp ); 13544 %} 13545 13546 // Tail Call; Jump from runtime stub to Java code. 13547 // Also known as an 'interprocedural jump'. 13548 // Target of jump will eventually return to caller. 13549 // TailJump below removes the return address. 13550 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been 13551 // emitted just above the TailCall which has reset ebp to the caller state. 13552 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13553 match(TailCall jump_target method_ptr); 13554 ins_cost(300); 13555 format %{ "JMP $jump_target \t# EBX holds method" %} 13556 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13557 ins_encode( OpcP, RegOpc(jump_target) ); 13558 ins_pipe( pipe_jmp ); 13559 %} 13560 13561 13562 // Tail Jump; remove the return address; jump to target. 13563 // TailCall above leaves the return address around. 13564 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13565 match( TailJump jump_target ex_oop ); 13566 ins_cost(300); 13567 format %{ "POP EDX\t# pop return address into dummy\n\t" 13568 "JMP $jump_target " %} 13569 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13570 ins_encode( enc_pop_rdx, 13571 OpcP, RegOpc(jump_target) ); 13572 ins_pipe( pipe_jmp ); 13573 %} 13574 13575 // Forward exception. 13576 instruct ForwardExceptionjmp() 13577 %{ 13578 match(ForwardException); 13579 13580 format %{ "JMP forward_exception_stub" %} 13581 ins_encode %{ 13582 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg); 13583 %} 13584 ins_pipe(pipe_jmp); 13585 %} 13586 13587 // Create exception oop: created by stack-crawling runtime code. 13588 // Created exception is now available to this handler, and is setup 13589 // just prior to jumping to this handler. No code emitted. 13590 instruct CreateException( eAXRegP ex_oop ) 13591 %{ 13592 match(Set ex_oop (CreateEx)); 13593 13594 size(0); 13595 // use the following format syntax 13596 format %{ "# exception oop is in EAX; no code emitted" %} 13597 ins_encode(); 13598 ins_pipe( empty ); 13599 %} 13600 13601 13602 // Rethrow exception: 13603 // The exception oop will come in the first argument position. 13604 // Then JUMP (not call) to the rethrow stub code. 13605 instruct RethrowException() 13606 %{ 13607 match(Rethrow); 13608 13609 // use the following format syntax 13610 format %{ "JMP rethrow_stub" %} 13611 ins_encode(enc_rethrow); 13612 ins_pipe( pipe_jmp ); 13613 %} 13614 13615 // inlined locking and unlocking 13616 13617 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13618 predicate(LockingMode != LM_LIGHTWEIGHT); 13619 match(Set cr (FastLock object box)); 13620 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13621 ins_cost(300); 13622 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13623 ins_encode %{ 13624 __ get_thread($thread$$Register); 13625 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13626 $scr$$Register, noreg, noreg, $thread$$Register, nullptr); 13627 %} 13628 ins_pipe(pipe_slow); 13629 %} 13630 13631 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13632 predicate(LockingMode != LM_LIGHTWEIGHT); 13633 match(Set cr (FastUnlock object box)); 13634 effect(TEMP tmp, USE_KILL box); 13635 ins_cost(300); 13636 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13637 ins_encode %{ 13638 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); 13639 %} 13640 ins_pipe(pipe_slow); 13641 %} 13642 13643 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{ 13644 predicate(LockingMode == LM_LIGHTWEIGHT); 13645 match(Set cr (FastLock object box)); 13646 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread); 13647 ins_cost(300); 13648 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %} 13649 ins_encode %{ 13650 __ get_thread($thread$$Register); 13651 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13652 %} 13653 ins_pipe(pipe_slow); 13654 %} 13655 13656 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{ 13657 predicate(LockingMode == LM_LIGHTWEIGHT); 13658 match(Set cr (FastUnlock object eax_reg)); 13659 effect(TEMP tmp, USE_KILL eax_reg, TEMP thread); 13660 ins_cost(300); 13661 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %} 13662 ins_encode %{ 13663 __ get_thread($thread$$Register); 13664 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13665 %} 13666 ins_pipe(pipe_slow); 13667 %} 13668 13669 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13670 predicate(Matcher::vector_length(n) <= 32); 13671 match(Set dst (MaskAll src)); 13672 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13673 ins_encode %{ 13674 int mask_len = Matcher::vector_length(this); 13675 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13676 %} 13677 ins_pipe( pipe_slow ); 13678 %} 13679 13680 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13681 predicate(Matcher::vector_length(n) > 32); 13682 match(Set dst (MaskAll src)); 13683 effect(TEMP ktmp); 13684 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13685 ins_encode %{ 13686 int mask_len = Matcher::vector_length(this); 13687 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13688 %} 13689 ins_pipe( pipe_slow ); 13690 %} 13691 13692 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13693 predicate(Matcher::vector_length(n) > 32); 13694 match(Set dst (MaskAll src)); 13695 effect(TEMP ktmp); 13696 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13697 ins_encode %{ 13698 int mask_len = Matcher::vector_length(this); 13699 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13700 %} 13701 ins_pipe( pipe_slow ); 13702 %} 13703 13704 // ============================================================================ 13705 // Safepoint Instruction 13706 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13707 match(SafePoint poll); 13708 effect(KILL cr, USE poll); 13709 13710 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13711 ins_cost(125); 13712 // EBP would need size(3) 13713 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13714 ins_encode %{ 13715 __ set_inst_mark(); 13716 __ relocate(relocInfo::poll_type); 13717 __ clear_inst_mark(); 13718 address pre_pc = __ pc(); 13719 __ testl(rax, Address($poll$$Register, 0)); 13720 address post_pc = __ pc(); 13721 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13722 %} 13723 ins_pipe(ialu_reg_mem); 13724 %} 13725 13726 13727 // ============================================================================ 13728 // This name is KNOWN by the ADLC and cannot be changed. 13729 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13730 // for this guy. 13731 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13732 match(Set dst (ThreadLocal)); 13733 effect(DEF dst, KILL cr); 13734 13735 format %{ "MOV $dst, Thread::current()" %} 13736 ins_encode %{ 13737 Register dstReg = as_Register($dst$$reg); 13738 __ get_thread(dstReg); 13739 %} 13740 ins_pipe( ialu_reg_fat ); 13741 %} 13742 13743 13744 13745 //----------PEEPHOLE RULES----------------------------------------------------- 13746 // These must follow all instruction definitions as they use the names 13747 // defined in the instructions definitions. 13748 // 13749 // peepmatch ( root_instr_name [preceding_instruction]* ); 13750 // 13751 // peepconstraint %{ 13752 // (instruction_number.operand_name relational_op instruction_number.operand_name 13753 // [, ...] ); 13754 // // instruction numbers are zero-based using left to right order in peepmatch 13755 // 13756 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13757 // // provide an instruction_number.operand_name for each operand that appears 13758 // // in the replacement instruction's match rule 13759 // 13760 // ---------VM FLAGS--------------------------------------------------------- 13761 // 13762 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13763 // 13764 // Each peephole rule is given an identifying number starting with zero and 13765 // increasing by one in the order seen by the parser. An individual peephole 13766 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13767 // on the command-line. 13768 // 13769 // ---------CURRENT LIMITATIONS---------------------------------------------- 13770 // 13771 // Only match adjacent instructions in same basic block 13772 // Only equality constraints 13773 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13774 // Only one replacement instruction 13775 // 13776 // ---------EXAMPLE---------------------------------------------------------- 13777 // 13778 // // pertinent parts of existing instructions in architecture description 13779 // instruct movI(rRegI dst, rRegI src) %{ 13780 // match(Set dst (CopyI src)); 13781 // %} 13782 // 13783 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13784 // match(Set dst (AddI dst src)); 13785 // effect(KILL cr); 13786 // %} 13787 // 13788 // // Change (inc mov) to lea 13789 // peephole %{ 13790 // // increment preceded by register-register move 13791 // peepmatch ( incI_eReg movI ); 13792 // // require that the destination register of the increment 13793 // // match the destination register of the move 13794 // peepconstraint ( 0.dst == 1.dst ); 13795 // // construct a replacement instruction that sets 13796 // // the destination to ( move's source register + one ) 13797 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13798 // %} 13799 // 13800 // Implementation no longer uses movX instructions since 13801 // machine-independent system no longer uses CopyX nodes. 13802 // 13803 // peephole %{ 13804 // peepmatch ( incI_eReg movI ); 13805 // peepconstraint ( 0.dst == 1.dst ); 13806 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13807 // %} 13808 // 13809 // peephole %{ 13810 // peepmatch ( decI_eReg movI ); 13811 // peepconstraint ( 0.dst == 1.dst ); 13812 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13813 // %} 13814 // 13815 // peephole %{ 13816 // peepmatch ( addI_eReg_imm movI ); 13817 // peepconstraint ( 0.dst == 1.dst ); 13818 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13819 // %} 13820 // 13821 // peephole %{ 13822 // peepmatch ( addP_eReg_imm movP ); 13823 // peepconstraint ( 0.dst == 1.dst ); 13824 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13825 // %} 13826 13827 // // Change load of spilled value to only a spill 13828 // instruct storeI(memory mem, rRegI src) %{ 13829 // match(Set mem (StoreI mem src)); 13830 // %} 13831 // 13832 // instruct loadI(rRegI dst, memory mem) %{ 13833 // match(Set dst (LoadI mem)); 13834 // %} 13835 // 13836 peephole %{ 13837 peepmatch ( loadI storeI ); 13838 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13839 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13840 %} 13841 13842 //----------SMARTSPILL RULES--------------------------------------------------- 13843 // These must follow all instruction definitions as they use the names 13844 // defined in the instructions definitions.