1 // 2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ masm-> 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 __ emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 __ emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(C2_MacroAssembler *masm, int code) { 353 __ emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) { 358 __ relocate(__ inst_mark() + offset, reloc); 359 emit_opcode(masm, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(C2_MacroAssembler *masm, int d8) { 364 __ emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(C2_MacroAssembler *masm, int d16) { 369 __ emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(C2_MacroAssembler *masm, int d32) { 374 __ emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc, 379 int format) { 380 __ relocate(__ inst_mark(), reloc, format); 381 __ emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 __ relocate(__ inst_mark(), rspec, format); 393 __ emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) { 398 emit_opcode( masm, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( masm, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (masm, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( masm, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(masm, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(masm, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(masm, 0x1, reg_encoding, base); 423 emit_d8(masm, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(masm, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(masm, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (masm, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(masm, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(masm, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (masm, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(masm, 0x0, reg_encoding, 0x4); 450 emit_rm(masm, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(masm, 0x1, reg_encoding, 0x4); 456 emit_rm(masm, scale, index, base); 457 emit_d8(masm, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(masm, 0x2, reg_encoding, 0x4); 462 emit_rm(masm, scale, index, 0x04); 463 } else { 464 emit_rm(masm, 0x2, reg_encoding, 0x4); 465 emit_rm(masm, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(masm, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (masm, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( masm, 0x8B ); 483 emit_rm(masm, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler* masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 613 __ verified_entry(C); 614 615 C->output()->set_frame_complete(__ offset()); 616 617 if (C->has_mach_constant_base_node()) { 618 // NOTE: We set the table base offset here because users might be 619 // emitted before MachConstantBaseNode. 620 ConstantTable& constant_table = C->output()->constant_table(); 621 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 622 } 623 } 624 625 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 626 return MachNode::size(ra_); // too many variables; just compute it the hard way 627 } 628 629 int MachPrologNode::reloc() const { 630 return 0; // a large enough number 631 } 632 633 //============================================================================= 634 #ifndef PRODUCT 635 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 636 Compile *C = ra_->C; 637 int framesize = C->output()->frame_size_in_bytes(); 638 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 639 // Remove two words for return addr and rbp, 640 framesize -= 2*wordSize; 641 642 if (C->max_vector_size() > 16) { 643 st->print("VZEROUPPER"); 644 st->cr(); st->print("\t"); 645 } 646 if (C->in_24_bit_fp_mode()) { 647 st->print("FLDCW standard control word"); 648 st->cr(); st->print("\t"); 649 } 650 if (framesize) { 651 st->print("ADD ESP,%d\t# Destroy frame",framesize); 652 st->cr(); st->print("\t"); 653 } 654 st->print_cr("POPL EBP"); st->print("\t"); 655 if (do_polling() && C->is_method_compilation()) { 656 st->print("CMPL rsp, poll_offset[thread] \n\t" 657 "JA #safepoint_stub\t" 658 "# Safepoint: poll for GC"); 659 } 660 } 661 #endif 662 663 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 664 Compile *C = ra_->C; 665 666 if (C->max_vector_size() > 16) { 667 // Clear upper bits of YMM registers when current compiled code uses 668 // wide vectors to avoid AVX <-> SSE transition penalty during call. 669 __ vzeroupper(); 670 } 671 // If method set FPU control word, restore to standard control word 672 if (C->in_24_bit_fp_mode()) { 673 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 674 } 675 676 int framesize = C->output()->frame_size_in_bytes(); 677 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 678 // Remove two words for return addr and rbp, 679 framesize -= 2*wordSize; 680 681 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 682 683 if (framesize >= 128) { 684 emit_opcode(masm, 0x81); // add SP, #framesize 685 emit_rm(masm, 0x3, 0x00, ESP_enc); 686 emit_d32(masm, framesize); 687 } else if (framesize) { 688 emit_opcode(masm, 0x83); // add SP, #framesize 689 emit_rm(masm, 0x3, 0x00, ESP_enc); 690 emit_d8(masm, framesize); 691 } 692 693 emit_opcode(masm, 0x58 | EBP_enc); 694 695 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 696 __ reserved_stack_check(); 697 } 698 699 if (do_polling() && C->is_method_compilation()) { 700 Register thread = as_Register(EBX_enc); 701 __ get_thread(thread); 702 Label dummy_label; 703 Label* code_stub = &dummy_label; 704 if (!C->output()->in_scratch_emit_size()) { 705 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 706 C->output()->add_stub(stub); 707 code_stub = &stub->entry(); 708 } 709 __ set_inst_mark(); 710 __ relocate(relocInfo::poll_return_type); 711 __ clear_inst_mark(); 712 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 return MachNode::size(ra_); // too many variables; just compute it 718 // the hard way 719 } 720 721 int MachEpilogNode::reloc() const { 722 return 0; // a large enough number 723 } 724 725 const Pipeline * MachEpilogNode::pipeline() const { 726 return MachNode::pipeline_class(); 727 } 728 729 //============================================================================= 730 731 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 732 static enum RC rc_class( OptoReg::Name reg ) { 733 734 if( !OptoReg::is_valid(reg) ) return rc_bad; 735 if (OptoReg::is_stack(reg)) return rc_stack; 736 737 VMReg r = OptoReg::as_VMReg(reg); 738 if (r->is_Register()) return rc_int; 739 if (r->is_FloatRegister()) { 740 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 741 return rc_float; 742 } 743 if (r->is_KRegister()) return rc_kreg; 744 assert(r->is_XMMRegister(), "must be"); 745 return rc_xmm; 746 } 747 748 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg, 749 int opcode, const char *op_str, int size, outputStream* st ) { 750 if( masm ) { 751 masm->set_inst_mark(); 752 emit_opcode (masm, opcode ); 753 encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 754 masm->clear_inst_mark(); 755 #ifndef PRODUCT 756 } else if( !do_size ) { 757 if( size != 0 ) st->print("\n\t"); 758 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 759 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 760 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 761 } else { // FLD, FST, PUSH, POP 762 st->print("%s [ESP + #%d]",op_str,offset); 763 } 764 #endif 765 } 766 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 767 return size+3+offset_size; 768 } 769 770 // Helper for XMM registers. Extra opcode bits, limited syntax. 771 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, 772 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 773 int in_size_in_bits = Assembler::EVEX_32bit; 774 int evex_encoding = 0; 775 if (reg_lo+1 == reg_hi) { 776 in_size_in_bits = Assembler::EVEX_64bit; 777 evex_encoding = Assembler::VEX_W; 778 } 779 if (masm) { 780 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 781 // it maps more cases to single byte displacement 782 __ set_managed(); 783 if (reg_lo+1 == reg_hi) { // double move? 784 if (is_load) { 785 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 786 } else { 787 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 788 } 789 } else { 790 if (is_load) { 791 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 792 } else { 793 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 794 } 795 } 796 #ifndef PRODUCT 797 } else if (!do_size) { 798 if (size != 0) st->print("\n\t"); 799 if (reg_lo+1 == reg_hi) { // double move? 800 if (is_load) st->print("%s %s,[ESP + #%d]", 801 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 802 Matcher::regName[reg_lo], offset); 803 else st->print("MOVSD [ESP + #%d],%s", 804 offset, Matcher::regName[reg_lo]); 805 } else { 806 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 807 Matcher::regName[reg_lo], offset); 808 else st->print("MOVSS [ESP + #%d],%s", 809 offset, Matcher::regName[reg_lo]); 810 } 811 #endif 812 } 813 bool is_single_byte = false; 814 if ((UseAVX > 2) && (offset != 0)) { 815 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 816 } 817 int offset_size = 0; 818 if (UseAVX > 2 ) { 819 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 820 } else { 821 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 822 } 823 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 824 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 825 return size+5+offset_size; 826 } 827 828 829 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 830 int src_hi, int dst_hi, int size, outputStream* st ) { 831 if (masm) { 832 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 833 __ set_managed(); 834 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 835 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 836 as_XMMRegister(Matcher::_regEncode[src_lo])); 837 } else { 838 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 839 as_XMMRegister(Matcher::_regEncode[src_lo])); 840 } 841 #ifndef PRODUCT 842 } else if (!do_size) { 843 if (size != 0) st->print("\n\t"); 844 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 845 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 846 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 847 } else { 848 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 849 } 850 } else { 851 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 852 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 853 } else { 854 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 855 } 856 } 857 #endif 858 } 859 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 860 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 861 int sz = (UseAVX > 2) ? 6 : 4; 862 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 863 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 864 return size + sz; 865 } 866 867 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 868 int src_hi, int dst_hi, int size, outputStream* st ) { 869 // 32-bit 870 if (masm) { 871 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 872 __ set_managed(); 873 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 874 as_Register(Matcher::_regEncode[src_lo])); 875 #ifndef PRODUCT 876 } else if (!do_size) { 877 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 878 #endif 879 } 880 return (UseAVX> 2) ? 6 : 4; 881 } 882 883 884 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 885 int src_hi, int dst_hi, int size, outputStream* st ) { 886 // 32-bit 887 if (masm) { 888 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 889 __ set_managed(); 890 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 891 as_XMMRegister(Matcher::_regEncode[src_lo])); 892 #ifndef PRODUCT 893 } else if (!do_size) { 894 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 895 #endif 896 } 897 return (UseAVX> 2) ? 6 : 4; 898 } 899 900 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) { 901 if( masm ) { 902 emit_opcode(masm, 0x8B ); 903 emit_rm (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 904 #ifndef PRODUCT 905 } else if( !do_size ) { 906 if( size != 0 ) st->print("\n\t"); 907 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 908 #endif 909 } 910 return size+2; 911 } 912 913 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 914 int offset, int size, outputStream* st ) { 915 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 916 if( masm ) { 917 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) 918 emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] ); 919 #ifndef PRODUCT 920 } else if( !do_size ) { 921 if( size != 0 ) st->print("\n\t"); 922 st->print("FLD %s",Matcher::regName[src_lo]); 923 #endif 924 } 925 size += 2; 926 } 927 928 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 929 const char *op_str; 930 int op; 931 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 932 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 933 op = 0xDD; 934 } else { // 32-bit store 935 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 936 op = 0xD9; 937 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 938 } 939 940 return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st); 941 } 942 943 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 944 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 945 int src_hi, int dst_hi, uint ireg, outputStream* st); 946 947 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 948 int stack_offset, int reg, uint ireg, outputStream* st); 949 950 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset, 951 int dst_offset, uint ireg, outputStream* st) { 952 if (masm) { 953 switch (ireg) { 954 case Op_VecS: 955 __ pushl(Address(rsp, src_offset)); 956 __ popl (Address(rsp, dst_offset)); 957 break; 958 case Op_VecD: 959 __ pushl(Address(rsp, src_offset)); 960 __ popl (Address(rsp, dst_offset)); 961 __ pushl(Address(rsp, src_offset+4)); 962 __ popl (Address(rsp, dst_offset+4)); 963 break; 964 case Op_VecX: 965 __ movdqu(Address(rsp, -16), xmm0); 966 __ movdqu(xmm0, Address(rsp, src_offset)); 967 __ movdqu(Address(rsp, dst_offset), xmm0); 968 __ movdqu(xmm0, Address(rsp, -16)); 969 break; 970 case Op_VecY: 971 __ vmovdqu(Address(rsp, -32), xmm0); 972 __ vmovdqu(xmm0, Address(rsp, src_offset)); 973 __ vmovdqu(Address(rsp, dst_offset), xmm0); 974 __ vmovdqu(xmm0, Address(rsp, -32)); 975 break; 976 case Op_VecZ: 977 __ evmovdquq(Address(rsp, -64), xmm0, 2); 978 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 979 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 980 __ evmovdquq(xmm0, Address(rsp, -64), 2); 981 break; 982 default: 983 ShouldNotReachHere(); 984 } 985 #ifndef PRODUCT 986 } else { 987 switch (ireg) { 988 case Op_VecS: 989 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 990 "popl [rsp + #%d]", 991 src_offset, dst_offset); 992 break; 993 case Op_VecD: 994 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 995 "popq [rsp + #%d]\n\t" 996 "pushl [rsp + #%d]\n\t" 997 "popq [rsp + #%d]", 998 src_offset, dst_offset, src_offset+4, dst_offset+4); 999 break; 1000 case Op_VecX: 1001 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1002 "movdqu xmm0, [rsp + #%d]\n\t" 1003 "movdqu [rsp + #%d], xmm0\n\t" 1004 "movdqu xmm0, [rsp - #16]", 1005 src_offset, dst_offset); 1006 break; 1007 case Op_VecY: 1008 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1009 "vmovdqu xmm0, [rsp + #%d]\n\t" 1010 "vmovdqu [rsp + #%d], xmm0\n\t" 1011 "vmovdqu xmm0, [rsp - #32]", 1012 src_offset, dst_offset); 1013 break; 1014 case Op_VecZ: 1015 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1016 "vmovdqu xmm0, [rsp + #%d]\n\t" 1017 "vmovdqu [rsp + #%d], xmm0\n\t" 1018 "vmovdqu xmm0, [rsp - #64]", 1019 src_offset, dst_offset); 1020 break; 1021 default: 1022 ShouldNotReachHere(); 1023 } 1024 #endif 1025 } 1026 } 1027 1028 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1029 // Get registers to move 1030 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1031 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1032 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1033 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1034 1035 enum RC src_second_rc = rc_class(src_second); 1036 enum RC src_first_rc = rc_class(src_first); 1037 enum RC dst_second_rc = rc_class(dst_second); 1038 enum RC dst_first_rc = rc_class(dst_first); 1039 1040 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1041 1042 // Generate spill code! 1043 int size = 0; 1044 1045 if( src_first == dst_first && src_second == dst_second ) 1046 return size; // Self copy, no move 1047 1048 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) { 1049 uint ireg = ideal_reg(); 1050 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1051 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1052 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1053 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1054 // mem -> mem 1055 int src_offset = ra_->reg2offset(src_first); 1056 int dst_offset = ra_->reg2offset(dst_first); 1057 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st); 1058 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1059 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st); 1060 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1061 int stack_offset = ra_->reg2offset(dst_first); 1062 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st); 1063 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1064 int stack_offset = ra_->reg2offset(src_first); 1065 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st); 1066 } else { 1067 ShouldNotReachHere(); 1068 } 1069 return 0; 1070 } 1071 1072 // -------------------------------------- 1073 // Check for mem-mem move. push/pop to move. 1074 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1075 if( src_second == dst_first ) { // overlapping stack copy ranges 1076 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1077 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1078 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1079 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1080 } 1081 // move low bits 1082 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1083 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1084 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1085 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1086 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1087 } 1088 return size; 1089 } 1090 1091 // -------------------------------------- 1092 // Check for integer reg-reg copy 1093 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1094 size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st); 1095 1096 // Check for integer store 1097 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1098 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1099 1100 // Check for integer load 1101 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1102 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1103 1104 // Check for integer reg-xmm reg copy 1105 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1106 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1107 "no 64 bit integer-float reg moves" ); 1108 return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1109 } 1110 // -------------------------------------- 1111 // Check for float reg-reg copy 1112 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1113 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1114 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1115 if( masm ) { 1116 1117 // Note the mucking with the register encode to compensate for the 0/1 1118 // indexing issue mentioned in a comment in the reg_def sections 1119 // for FPR registers many lines above here. 1120 1121 if( src_first != FPR1L_num ) { 1122 emit_opcode (masm, 0xD9 ); // FLD ST(i) 1123 emit_d8 (masm, 0xC0+Matcher::_regEncode[src_first]-1 ); 1124 emit_opcode (masm, 0xDD ); // FSTP ST(i) 1125 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); 1126 } else { 1127 emit_opcode (masm, 0xDD ); // FST ST(i) 1128 emit_d8 (masm, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1129 } 1130 #ifndef PRODUCT 1131 } else if( !do_size ) { 1132 if( size != 0 ) st->print("\n\t"); 1133 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1134 else st->print( "FST %s", Matcher::regName[dst_first]); 1135 #endif 1136 } 1137 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1138 } 1139 1140 // Check for float store 1141 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1142 return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1143 } 1144 1145 // Check for float load 1146 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1147 int offset = ra_->reg2offset(src_first); 1148 const char *op_str; 1149 int op; 1150 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1151 op_str = "FLD_D"; 1152 op = 0xDD; 1153 } else { // 32-bit load 1154 op_str = "FLD_S"; 1155 op = 0xD9; 1156 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1157 } 1158 if( masm ) { 1159 masm->set_inst_mark(); 1160 emit_opcode (masm, op ); 1161 encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1162 emit_opcode (masm, 0xDD ); // FSTP ST(i) 1163 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); 1164 masm->clear_inst_mark(); 1165 #ifndef PRODUCT 1166 } else if( !do_size ) { 1167 if( size != 0 ) st->print("\n\t"); 1168 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1169 #endif 1170 } 1171 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1172 return size + 3+offset_size+2; 1173 } 1174 1175 // Check for xmm reg-reg copy 1176 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1177 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1178 (src_first+1 == src_second && dst_first+1 == dst_second), 1179 "no non-adjacent float-moves" ); 1180 return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1181 } 1182 1183 // Check for xmm reg-integer reg copy 1184 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1185 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1186 "no 64 bit float-integer reg moves" ); 1187 return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1188 } 1189 1190 // Check for xmm store 1191 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1192 return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1193 } 1194 1195 // Check for float xmm load 1196 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1197 return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1198 } 1199 1200 // Copy from float reg to xmm reg 1201 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1202 // copy to the top of stack from floating point reg 1203 // and use LEA to preserve flags 1204 if( masm ) { 1205 emit_opcode(masm,0x8D); // LEA ESP,[ESP-8] 1206 emit_rm(masm, 0x1, ESP_enc, 0x04); 1207 emit_rm(masm, 0x0, 0x04, ESP_enc); 1208 emit_d8(masm,0xF8); 1209 #ifndef PRODUCT 1210 } else if( !do_size ) { 1211 if( size != 0 ) st->print("\n\t"); 1212 st->print("LEA ESP,[ESP-8]"); 1213 #endif 1214 } 1215 size += 4; 1216 1217 size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1218 1219 // Copy from the temp memory to the xmm reg. 1220 size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st); 1221 1222 if( masm ) { 1223 emit_opcode(masm,0x8D); // LEA ESP,[ESP+8] 1224 emit_rm(masm, 0x1, ESP_enc, 0x04); 1225 emit_rm(masm, 0x0, 0x04, ESP_enc); 1226 emit_d8(masm,0x08); 1227 #ifndef PRODUCT 1228 } else if( !do_size ) { 1229 if( size != 0 ) st->print("\n\t"); 1230 st->print("LEA ESP,[ESP+8]"); 1231 #endif 1232 } 1233 size += 4; 1234 return size; 1235 } 1236 1237 // AVX-512 opmask specific spilling. 1238 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1239 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1240 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1241 int offset = ra_->reg2offset(src_first); 1242 if (masm != nullptr) { 1243 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1244 #ifndef PRODUCT 1245 } else { 1246 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); 1247 #endif 1248 } 1249 return 0; 1250 } 1251 1252 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1253 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1254 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1255 int offset = ra_->reg2offset(dst_first); 1256 if (masm != nullptr) { 1257 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1258 #ifndef PRODUCT 1259 } else { 1260 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); 1261 #endif 1262 } 1263 return 0; 1264 } 1265 1266 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1267 Unimplemented(); 1268 return 0; 1269 } 1270 1271 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1272 Unimplemented(); 1273 return 0; 1274 } 1275 1276 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1277 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1278 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1279 if (masm != nullptr) { 1280 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1281 #ifndef PRODUCT 1282 } else { 1283 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); 1284 #endif 1285 } 1286 return 0; 1287 } 1288 1289 assert( size > 0, "missed a case" ); 1290 1291 // -------------------------------------------------------------------- 1292 // Check for second bits still needing moving. 1293 if( src_second == dst_second ) 1294 return size; // Self copy; no move 1295 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1296 1297 // Check for second word int-int move 1298 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1299 return impl_mov_helper(masm,do_size,src_second,dst_second,size, st); 1300 1301 // Check for second word integer store 1302 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1303 return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1304 1305 // Check for second word integer load 1306 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1307 return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1308 1309 Unimplemented(); 1310 return 0; // Mute compiler 1311 } 1312 1313 #ifndef PRODUCT 1314 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1315 implementation( nullptr, ra_, false, st ); 1316 } 1317 #endif 1318 1319 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1320 implementation( masm, ra_, false, nullptr ); 1321 } 1322 1323 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1324 return MachNode::size(ra_); 1325 } 1326 1327 1328 //============================================================================= 1329 #ifndef PRODUCT 1330 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1331 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1332 int reg = ra_->get_reg_first(this); 1333 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1334 } 1335 #endif 1336 1337 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1338 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1339 int reg = ra_->get_encode(this); 1340 if( offset >= 128 ) { 1341 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] 1342 emit_rm(masm, 0x2, reg, 0x04); 1343 emit_rm(masm, 0x0, 0x04, ESP_enc); 1344 emit_d32(masm, offset); 1345 } 1346 else { 1347 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] 1348 emit_rm(masm, 0x1, reg, 0x04); 1349 emit_rm(masm, 0x0, 0x04, ESP_enc); 1350 emit_d8(masm, offset); 1351 } 1352 } 1353 1354 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1355 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1356 if( offset >= 128 ) { 1357 return 7; 1358 } 1359 else { 1360 return 4; 1361 } 1362 } 1363 1364 //============================================================================= 1365 #ifndef PRODUCT 1366 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1367 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1368 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1369 st->print_cr("\tNOP"); 1370 st->print_cr("\tNOP"); 1371 if( !OptoBreakpoint ) 1372 st->print_cr("\tNOP"); 1373 } 1374 #endif 1375 1376 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1377 __ ic_check(CodeEntryAlignment); 1378 } 1379 1380 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1381 return MachNode::size(ra_); // too many variables; just compute it 1382 // the hard way 1383 } 1384 1385 1386 //============================================================================= 1387 1388 // Vector calling convention not supported. 1389 bool Matcher::supports_vector_calling_convention() { 1390 return false; 1391 } 1392 1393 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1394 Unimplemented(); 1395 return OptoRegPair(0, 0); 1396 } 1397 1398 // Is this branch offset short enough that a short branch can be used? 1399 // 1400 // NOTE: If the platform does not provide any short branch variants, then 1401 // this method should return false for offset 0. 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413 } 1414 1415 // Return whether or not this register is ever used as an argument. This 1416 // function is used on startup to build the trampoline stubs in generateOptoStub. 1417 // Registers not mentioned will be killed by the VM call in the trampoline, and 1418 // arguments in those registers not be available to the callee. 1419 bool Matcher::can_be_java_arg( int reg ) { 1420 if( reg == ECX_num || reg == EDX_num ) return true; 1421 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1422 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1423 return false; 1424 } 1425 1426 bool Matcher::is_spillable_arg( int reg ) { 1427 return can_be_java_arg(reg); 1428 } 1429 1430 uint Matcher::int_pressure_limit() 1431 { 1432 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1433 } 1434 1435 uint Matcher::float_pressure_limit() 1436 { 1437 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1438 } 1439 1440 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1441 // Use hardware integer DIV instruction when 1442 // it is faster than a code which use multiply. 1443 // Only when constant divisor fits into 32 bit 1444 // (min_jint is excluded to get only correct 1445 // positive 32 bit values from negative). 1446 return VM_Version::has_fast_idiv() && 1447 (divisor == (int)divisor && divisor != min_jint); 1448 } 1449 1450 // Register for DIVI projection of divmodI 1451 RegMask Matcher::divI_proj_mask() { 1452 return EAX_REG_mask(); 1453 } 1454 1455 // Register for MODI projection of divmodI 1456 RegMask Matcher::modI_proj_mask() { 1457 return EDX_REG_mask(); 1458 } 1459 1460 // Register for DIVL projection of divmodL 1461 RegMask Matcher::divL_proj_mask() { 1462 ShouldNotReachHere(); 1463 return RegMask(); 1464 } 1465 1466 // Register for MODL projection of divmodL 1467 RegMask Matcher::modL_proj_mask() { 1468 ShouldNotReachHere(); 1469 return RegMask(); 1470 } 1471 1472 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1473 return NO_REG_mask(); 1474 } 1475 1476 // Returns true if the high 32 bits of the value is known to be zero. 1477 bool is_operand_hi32_zero(Node* n) { 1478 int opc = n->Opcode(); 1479 if (opc == Op_AndL) { 1480 Node* o2 = n->in(2); 1481 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1482 return true; 1483 } 1484 } 1485 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1486 return true; 1487 } 1488 return false; 1489 } 1490 1491 %} 1492 1493 //----------ENCODING BLOCK----------------------------------------------------- 1494 // This block specifies the encoding classes used by the compiler to output 1495 // byte streams. Encoding classes generate functions which are called by 1496 // Machine Instruction Nodes in order to generate the bit encoding of the 1497 // instruction. Operands specify their base encoding interface with the 1498 // interface keyword. There are currently supported four interfaces, 1499 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1500 // operand to generate a function which returns its register number when 1501 // queried. CONST_INTER causes an operand to generate a function which 1502 // returns the value of the constant when queried. MEMORY_INTER causes an 1503 // operand to generate four functions which return the Base Register, the 1504 // Index Register, the Scale Value, and the Offset Value of the operand when 1505 // queried. COND_INTER causes an operand to generate six functions which 1506 // return the encoding code (ie - encoding bits for the instruction) 1507 // associated with each basic boolean condition for a conditional instruction. 1508 // Instructions specify two basic values for encoding. They use the 1509 // ins_encode keyword to specify their encoding class (which must be one of 1510 // the class names specified in the encoding block), and they use the 1511 // opcode keyword to specify, in order, their primary, secondary, and 1512 // tertiary opcode. Only the opcode sections which a particular instruction 1513 // needs for encoding need to be specified. 1514 encode %{ 1515 // Build emit functions for each basic byte or larger field in the intel 1516 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1517 // code in the enc_class source block. Emit functions will live in the 1518 // main source block for now. In future, we can generalize this by 1519 // adding a syntax that specifies the sizes of fields in an order, 1520 // so that the adlc can build the emit functions automagically 1521 1522 // Set instruction mark in MacroAssembler. This is used only in 1523 // instructions that emit bytes directly to the CodeBuffer wraped 1524 // in the MacroAssembler. Should go away once all "instruct" are 1525 // patched to emit bytes only using methods in MacroAssembler. 1526 enc_class SetInstMark %{ 1527 __ set_inst_mark(); 1528 %} 1529 1530 enc_class ClearInstMark %{ 1531 __ clear_inst_mark(); 1532 %} 1533 1534 // Emit primary opcode 1535 enc_class OpcP %{ 1536 emit_opcode(masm, $primary); 1537 %} 1538 1539 // Emit secondary opcode 1540 enc_class OpcS %{ 1541 emit_opcode(masm, $secondary); 1542 %} 1543 1544 // Emit opcode directly 1545 enc_class Opcode(immI d8) %{ 1546 emit_opcode(masm, $d8$$constant); 1547 %} 1548 1549 enc_class SizePrefix %{ 1550 emit_opcode(masm,0x66); 1551 %} 1552 1553 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1554 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1555 %} 1556 1557 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1558 emit_opcode(masm,$opcode$$constant); 1559 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1560 %} 1561 1562 enc_class mov_r32_imm0( rRegI dst ) %{ 1563 emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1564 emit_d32 ( masm, 0x0 ); // imm32==0x0 1565 %} 1566 1567 enc_class cdq_enc %{ 1568 // Full implementation of Java idiv and irem; checks for 1569 // special case as described in JVM spec., p.243 & p.271. 1570 // 1571 // normal case special case 1572 // 1573 // input : rax,: dividend min_int 1574 // reg: divisor -1 1575 // 1576 // output: rax,: quotient (= rax, idiv reg) min_int 1577 // rdx: remainder (= rax, irem reg) 0 1578 // 1579 // Code sequnce: 1580 // 1581 // 81 F8 00 00 00 80 cmp rax,80000000h 1582 // 0F 85 0B 00 00 00 jne normal_case 1583 // 33 D2 xor rdx,edx 1584 // 83 F9 FF cmp rcx,0FFh 1585 // 0F 84 03 00 00 00 je done 1586 // normal_case: 1587 // 99 cdq 1588 // F7 F9 idiv rax,ecx 1589 // done: 1590 // 1591 emit_opcode(masm,0x81); emit_d8(masm,0xF8); 1592 emit_opcode(masm,0x00); emit_d8(masm,0x00); 1593 emit_opcode(masm,0x00); emit_d8(masm,0x80); // cmp rax,80000000h 1594 emit_opcode(masm,0x0F); emit_d8(masm,0x85); 1595 emit_opcode(masm,0x0B); emit_d8(masm,0x00); 1596 emit_opcode(masm,0x00); emit_d8(masm,0x00); // jne normal_case 1597 emit_opcode(masm,0x33); emit_d8(masm,0xD2); // xor rdx,edx 1598 emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh 1599 emit_opcode(masm,0x0F); emit_d8(masm,0x84); 1600 emit_opcode(masm,0x03); emit_d8(masm,0x00); 1601 emit_opcode(masm,0x00); emit_d8(masm,0x00); // je done 1602 // normal_case: 1603 emit_opcode(masm,0x99); // cdq 1604 // idiv (note: must be emitted by the user of this rule) 1605 // normal: 1606 %} 1607 1608 // Dense encoding for older common ops 1609 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1610 emit_opcode(masm, $opcode$$constant + $reg$$reg); 1611 %} 1612 1613 1614 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1615 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1616 // Check for 8-bit immediate, and set sign extend bit in opcode 1617 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1618 emit_opcode(masm, $primary | 0x02); 1619 } 1620 else { // If 32-bit immediate 1621 emit_opcode(masm, $primary); 1622 } 1623 %} 1624 1625 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1626 // Emit primary opcode and set sign-extend bit 1627 // Check for 8-bit immediate, and set sign extend bit in opcode 1628 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1629 emit_opcode(masm, $primary | 0x02); } 1630 else { // If 32-bit immediate 1631 emit_opcode(masm, $primary); 1632 } 1633 // Emit r/m byte with secondary opcode, after primary opcode. 1634 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1635 %} 1636 1637 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1638 // Check for 8-bit immediate, and set sign extend bit in opcode 1639 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1640 $$$emit8$imm$$constant; 1641 } 1642 else { // If 32-bit immediate 1643 // Output immediate 1644 $$$emit32$imm$$constant; 1645 } 1646 %} 1647 1648 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1649 // Emit primary opcode and set sign-extend bit 1650 // Check for 8-bit immediate, and set sign extend bit in opcode 1651 int con = (int)$imm$$constant; // Throw away top bits 1652 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1653 // Emit r/m byte with secondary opcode, after primary opcode. 1654 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1655 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); 1656 else emit_d32(masm,con); 1657 %} 1658 1659 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1660 // Emit primary opcode and set sign-extend bit 1661 // Check for 8-bit immediate, and set sign extend bit in opcode 1662 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1663 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1664 // Emit r/m byte with tertiary opcode, after primary opcode. 1665 emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1666 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); 1667 else emit_d32(masm,con); 1668 %} 1669 1670 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1671 emit_cc(masm, $secondary, $dst$$reg ); 1672 %} 1673 1674 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1675 int destlo = $dst$$reg; 1676 int desthi = HIGH_FROM_LOW_ENC(destlo); 1677 // bswap lo 1678 emit_opcode(masm, 0x0F); 1679 emit_cc(masm, 0xC8, destlo); 1680 // bswap hi 1681 emit_opcode(masm, 0x0F); 1682 emit_cc(masm, 0xC8, desthi); 1683 // xchg lo and hi 1684 emit_opcode(masm, 0x87); 1685 emit_rm(masm, 0x3, destlo, desthi); 1686 %} 1687 1688 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1689 emit_rm(masm, 0x3, $secondary, $div$$reg ); 1690 %} 1691 1692 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1693 $$$emit8$primary; 1694 emit_cc(masm, $secondary, $cop$$cmpcode); 1695 %} 1696 1697 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1698 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1699 emit_d8(masm, op >> 8 ); 1700 emit_d8(masm, op & 255); 1701 %} 1702 1703 // emulate a CMOV with a conditional branch around a MOV 1704 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1705 // Invert sense of branch from sense of CMOV 1706 emit_cc( masm, 0x70, ($cop$$cmpcode^1) ); 1707 emit_d8( masm, $brOffs$$constant ); 1708 %} 1709 1710 enc_class enc_PartialSubtypeCheck( ) %{ 1711 Register Redi = as_Register(EDI_enc); // result register 1712 Register Reax = as_Register(EAX_enc); // super class 1713 Register Recx = as_Register(ECX_enc); // killed 1714 Register Resi = as_Register(ESI_enc); // sub class 1715 Label miss; 1716 1717 // NB: Callers may assume that, when $result is a valid register, 1718 // check_klass_subtype_slow_path sets it to a nonzero value. 1719 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1720 nullptr, &miss, 1721 /*set_cond_codes:*/ true); 1722 if ($primary) { 1723 __ xorptr(Redi, Redi); 1724 } 1725 __ bind(miss); 1726 %} 1727 1728 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1729 int start = __ offset(); 1730 if (UseSSE >= 2) { 1731 if (VerifyFPU) { 1732 __ verify_FPU(0, "must be empty in SSE2+ mode"); 1733 } 1734 } else { 1735 // External c_calling_convention expects the FPU stack to be 'clean'. 1736 // Compiled code leaves it dirty. Do cleanup now. 1737 __ empty_FPU_stack(); 1738 } 1739 if (sizeof_FFree_Float_Stack_All == -1) { 1740 sizeof_FFree_Float_Stack_All = __ offset() - start; 1741 } else { 1742 assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1743 } 1744 %} 1745 1746 enc_class Verify_FPU_For_Leaf %{ 1747 if( VerifyFPU ) { 1748 __ verify_FPU( -3, "Returning from Runtime Leaf call"); 1749 } 1750 %} 1751 1752 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1753 // This is the instruction starting address for relocation info. 1754 __ set_inst_mark(); 1755 $$$emit8$primary; 1756 // CALL directly to the runtime 1757 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1758 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1759 __ clear_inst_mark(); 1760 __ post_call_nop(); 1761 1762 if (UseSSE >= 2) { 1763 BasicType rt = tf()->return_type(); 1764 1765 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1766 // A C runtime call where the return value is unused. In SSE2+ 1767 // mode the result needs to be removed from the FPU stack. It's 1768 // likely that this function call could be removed by the 1769 // optimizer if the C function is a pure function. 1770 __ ffree(0); 1771 } else if (rt == T_FLOAT) { 1772 __ lea(rsp, Address(rsp, -4)); 1773 __ fstp_s(Address(rsp, 0)); 1774 __ movflt(xmm0, Address(rsp, 0)); 1775 __ lea(rsp, Address(rsp, 4)); 1776 } else if (rt == T_DOUBLE) { 1777 __ lea(rsp, Address(rsp, -8)); 1778 __ fstp_d(Address(rsp, 0)); 1779 __ movdbl(xmm0, Address(rsp, 0)); 1780 __ lea(rsp, Address(rsp, 8)); 1781 } 1782 } 1783 %} 1784 1785 enc_class pre_call_resets %{ 1786 // If method sets FPU control word restore it here 1787 debug_only(int off0 = __ offset()); 1788 if (ra_->C->in_24_bit_fp_mode()) { 1789 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1790 } 1791 // Clear upper bits of YMM registers when current compiled code uses 1792 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1793 __ vzeroupper(); 1794 debug_only(int off1 = __ offset()); 1795 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1796 %} 1797 1798 enc_class post_call_FPU %{ 1799 // If method sets FPU control word do it here also 1800 if (Compile::current()->in_24_bit_fp_mode()) { 1801 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1802 } 1803 %} 1804 1805 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1806 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1807 // who we intended to call. 1808 __ set_inst_mark(); 1809 $$$emit8$primary; 1810 1811 if (!_method) { 1812 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1813 runtime_call_Relocation::spec(), 1814 RELOC_IMM32); 1815 __ clear_inst_mark(); 1816 __ post_call_nop(); 1817 } else { 1818 int method_index = resolved_method_index(masm); 1819 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1820 : static_call_Relocation::spec(method_index); 1821 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1822 rspec, RELOC_DISP32); 1823 __ post_call_nop(); 1824 address mark = __ inst_mark(); 1825 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1826 // Calls of the same statically bound method can share 1827 // a stub to the interpreter. 1828 __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off()); 1829 __ clear_inst_mark(); 1830 } else { 1831 // Emit stubs for static call. 1832 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark); 1833 __ clear_inst_mark(); 1834 if (stub == nullptr) { 1835 ciEnv::current()->record_failure("CodeCache is full"); 1836 return; 1837 } 1838 } 1839 } 1840 %} 1841 1842 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1843 __ ic_call((address)$meth$$method, resolved_method_index(masm)); 1844 __ post_call_nop(); 1845 %} 1846 1847 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1848 int disp = in_bytes(Method::from_compiled_offset()); 1849 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1850 1851 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1852 __ set_inst_mark(); 1853 $$$emit8$primary; 1854 emit_rm(masm, 0x01, $secondary, EAX_enc ); // R/M byte 1855 emit_d8(masm, disp); // Displacement 1856 __ clear_inst_mark(); 1857 __ post_call_nop(); 1858 %} 1859 1860 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1861 $$$emit8$primary; 1862 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1863 $$$emit8$shift$$constant; 1864 %} 1865 1866 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1867 // Load immediate does not have a zero or sign extended version 1868 // for 8-bit immediates 1869 emit_opcode(masm, 0xB8 + $dst$$reg); 1870 $$$emit32$src$$constant; 1871 %} 1872 1873 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1874 // Load immediate does not have a zero or sign extended version 1875 // for 8-bit immediates 1876 emit_opcode(masm, $primary + $dst$$reg); 1877 $$$emit32$src$$constant; 1878 %} 1879 1880 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1881 // Load immediate does not have a zero or sign extended version 1882 // for 8-bit immediates 1883 int dst_enc = $dst$$reg; 1884 int src_con = $src$$constant & 0x0FFFFFFFFL; 1885 if (src_con == 0) { 1886 // xor dst, dst 1887 emit_opcode(masm, 0x33); 1888 emit_rm(masm, 0x3, dst_enc, dst_enc); 1889 } else { 1890 emit_opcode(masm, $primary + dst_enc); 1891 emit_d32(masm, src_con); 1892 } 1893 %} 1894 1895 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1896 // Load immediate does not have a zero or sign extended version 1897 // for 8-bit immediates 1898 int dst_enc = $dst$$reg + 2; 1899 int src_con = ((julong)($src$$constant)) >> 32; 1900 if (src_con == 0) { 1901 // xor dst, dst 1902 emit_opcode(masm, 0x33); 1903 emit_rm(masm, 0x3, dst_enc, dst_enc); 1904 } else { 1905 emit_opcode(masm, $primary + dst_enc); 1906 emit_d32(masm, src_con); 1907 } 1908 %} 1909 1910 1911 // Encode a reg-reg copy. If it is useless, then empty encoding. 1912 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1913 encode_Copy( masm, $dst$$reg, $src$$reg ); 1914 %} 1915 1916 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1917 encode_Copy( masm, $dst$$reg, $src$$reg ); 1918 %} 1919 1920 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1921 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1922 %} 1923 1924 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1925 $$$emit8$primary; 1926 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1927 %} 1928 1929 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1930 $$$emit8$secondary; 1931 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1932 %} 1933 1934 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1935 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1936 %} 1937 1938 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1939 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1940 %} 1941 1942 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1943 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1944 %} 1945 1946 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1947 // Output immediate 1948 $$$emit32$src$$constant; 1949 %} 1950 1951 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1952 // Output Float immediate bits 1953 jfloat jf = $src$$constant; 1954 int jf_as_bits = jint_cast( jf ); 1955 emit_d32(masm, jf_as_bits); 1956 %} 1957 1958 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1959 // Output Float immediate bits 1960 jfloat jf = $src$$constant; 1961 int jf_as_bits = jint_cast( jf ); 1962 emit_d32(masm, jf_as_bits); 1963 %} 1964 1965 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1966 // Output immediate 1967 $$$emit16$src$$constant; 1968 %} 1969 1970 enc_class Con_d32(immI src) %{ 1971 emit_d32(masm,$src$$constant); 1972 %} 1973 1974 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1975 // Output immediate memory reference 1976 emit_rm(masm, 0x00, $t1$$reg, 0x05 ); 1977 emit_d32(masm, 0x00); 1978 %} 1979 1980 enc_class lock_prefix( ) %{ 1981 emit_opcode(masm,0xF0); // [Lock] 1982 %} 1983 1984 // Cmp-xchg long value. 1985 // Note: we need to swap rbx, and rcx before and after the 1986 // cmpxchg8 instruction because the instruction uses 1987 // rcx as the high order word of the new value to store but 1988 // our register encoding uses rbx,. 1989 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 1990 1991 // XCHG rbx,ecx 1992 emit_opcode(masm,0x87); 1993 emit_opcode(masm,0xD9); 1994 // [Lock] 1995 emit_opcode(masm,0xF0); 1996 // CMPXCHG8 [Eptr] 1997 emit_opcode(masm,0x0F); 1998 emit_opcode(masm,0xC7); 1999 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2000 // XCHG rbx,ecx 2001 emit_opcode(masm,0x87); 2002 emit_opcode(masm,0xD9); 2003 %} 2004 2005 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2006 // [Lock] 2007 emit_opcode(masm,0xF0); 2008 2009 // CMPXCHG [Eptr] 2010 emit_opcode(masm,0x0F); 2011 emit_opcode(masm,0xB1); 2012 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2013 %} 2014 2015 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2016 // [Lock] 2017 emit_opcode(masm,0xF0); 2018 2019 // CMPXCHGB [Eptr] 2020 emit_opcode(masm,0x0F); 2021 emit_opcode(masm,0xB0); 2022 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2023 %} 2024 2025 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2026 // [Lock] 2027 emit_opcode(masm,0xF0); 2028 2029 // 16-bit mode 2030 emit_opcode(masm, 0x66); 2031 2032 // CMPXCHGW [Eptr] 2033 emit_opcode(masm,0x0F); 2034 emit_opcode(masm,0xB1); 2035 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2036 %} 2037 2038 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2039 int res_encoding = $res$$reg; 2040 2041 // MOV res,0 2042 emit_opcode( masm, 0xB8 + res_encoding); 2043 emit_d32( masm, 0 ); 2044 // JNE,s fail 2045 emit_opcode(masm,0x75); 2046 emit_d8(masm, 5 ); 2047 // MOV res,1 2048 emit_opcode( masm, 0xB8 + res_encoding); 2049 emit_d32( masm, 1 ); 2050 // fail: 2051 %} 2052 2053 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2054 int reg_encoding = $ereg$$reg; 2055 int base = $mem$$base; 2056 int index = $mem$$index; 2057 int scale = $mem$$scale; 2058 int displace = $mem$$disp; 2059 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2060 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2061 %} 2062 2063 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2064 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2065 int base = $mem$$base; 2066 int index = $mem$$index; 2067 int scale = $mem$$scale; 2068 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2069 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2070 encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none); 2071 %} 2072 2073 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2074 int r1, r2; 2075 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2076 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2077 emit_opcode(masm,0x0F); 2078 emit_opcode(masm,$tertiary); 2079 emit_rm(masm, 0x3, r1, r2); 2080 emit_d8(masm,$cnt$$constant); 2081 emit_d8(masm,$primary); 2082 emit_rm(masm, 0x3, $secondary, r1); 2083 emit_d8(masm,$cnt$$constant); 2084 %} 2085 2086 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2087 emit_opcode( masm, 0x8B ); // Move 2088 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2089 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2090 emit_d8(masm,$primary); 2091 emit_rm(masm, 0x3, $secondary, $dst$$reg); 2092 emit_d8(masm,$cnt$$constant-32); 2093 } 2094 emit_d8(masm,$primary); 2095 emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2096 emit_d8(masm,31); 2097 %} 2098 2099 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2100 int r1, r2; 2101 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2102 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2103 2104 emit_opcode( masm, 0x8B ); // Move r1,r2 2105 emit_rm(masm, 0x3, r1, r2); 2106 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2107 emit_opcode(masm,$primary); 2108 emit_rm(masm, 0x3, $secondary, r1); 2109 emit_d8(masm,$cnt$$constant-32); 2110 } 2111 emit_opcode(masm,0x33); // XOR r2,r2 2112 emit_rm(masm, 0x3, r2, r2); 2113 %} 2114 2115 // Clone of RegMem but accepts an extra parameter to access each 2116 // half of a double in memory; it never needs relocation info. 2117 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2118 emit_opcode(masm,$opcode$$constant); 2119 int reg_encoding = $rm_reg$$reg; 2120 int base = $mem$$base; 2121 int index = $mem$$index; 2122 int scale = $mem$$scale; 2123 int displace = $mem$$disp + $disp_for_half$$constant; 2124 relocInfo::relocType disp_reloc = relocInfo::none; 2125 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2126 %} 2127 2128 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2129 // 2130 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2131 // and it never needs relocation information. 2132 // Frequently used to move data between FPU's Stack Top and memory. 2133 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2134 int rm_byte_opcode = $rm_opcode$$constant; 2135 int base = $mem$$base; 2136 int index = $mem$$index; 2137 int scale = $mem$$scale; 2138 int displace = $mem$$disp; 2139 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2140 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2141 %} 2142 2143 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2144 int rm_byte_opcode = $rm_opcode$$constant; 2145 int base = $mem$$base; 2146 int index = $mem$$index; 2147 int scale = $mem$$scale; 2148 int displace = $mem$$disp; 2149 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2150 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2151 %} 2152 2153 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2154 int reg_encoding = $dst$$reg; 2155 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2156 int index = 0x04; // 0x04 indicates no index 2157 int scale = 0x00; // 0x00 indicates no scale 2158 int displace = $src1$$constant; // 0x00 indicates no displacement 2159 relocInfo::relocType disp_reloc = relocInfo::none; 2160 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2161 %} 2162 2163 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2164 // Compare dst,src 2165 emit_opcode(masm,0x3B); 2166 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2167 // jmp dst < src around move 2168 emit_opcode(masm,0x7C); 2169 emit_d8(masm,2); 2170 // move dst,src 2171 emit_opcode(masm,0x8B); 2172 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2173 %} 2174 2175 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2176 // Compare dst,src 2177 emit_opcode(masm,0x3B); 2178 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2179 // jmp dst > src around move 2180 emit_opcode(masm,0x7F); 2181 emit_d8(masm,2); 2182 // move dst,src 2183 emit_opcode(masm,0x8B); 2184 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2185 %} 2186 2187 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2188 // If src is FPR1, we can just FST to store it. 2189 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2190 int reg_encoding = 0x2; // Just store 2191 int base = $mem$$base; 2192 int index = $mem$$index; 2193 int scale = $mem$$scale; 2194 int displace = $mem$$disp; 2195 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2196 if( $src$$reg != FPR1L_enc ) { 2197 reg_encoding = 0x3; // Store & pop 2198 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) 2199 emit_d8( masm, 0xC0-1+$src$$reg ); 2200 } 2201 __ set_inst_mark(); // Mark start of opcode for reloc info in mem operand 2202 emit_opcode(masm,$primary); 2203 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2204 __ clear_inst_mark(); 2205 %} 2206 2207 enc_class neg_reg(rRegI dst) %{ 2208 // NEG $dst 2209 emit_opcode(masm,0xF7); 2210 emit_rm(masm, 0x3, 0x03, $dst$$reg ); 2211 %} 2212 2213 enc_class setLT_reg(eCXRegI dst) %{ 2214 // SETLT $dst 2215 emit_opcode(masm,0x0F); 2216 emit_opcode(masm,0x9C); 2217 emit_rm( masm, 0x3, 0x4, $dst$$reg ); 2218 %} 2219 2220 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2221 int tmpReg = $tmp$$reg; 2222 2223 // SUB $p,$q 2224 emit_opcode(masm,0x2B); 2225 emit_rm(masm, 0x3, $p$$reg, $q$$reg); 2226 // SBB $tmp,$tmp 2227 emit_opcode(masm,0x1B); 2228 emit_rm(masm, 0x3, tmpReg, tmpReg); 2229 // AND $tmp,$y 2230 emit_opcode(masm,0x23); 2231 emit_rm(masm, 0x3, tmpReg, $y$$reg); 2232 // ADD $p,$tmp 2233 emit_opcode(masm,0x03); 2234 emit_rm(masm, 0x3, $p$$reg, tmpReg); 2235 %} 2236 2237 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2238 // TEST shift,32 2239 emit_opcode(masm,0xF7); 2240 emit_rm(masm, 0x3, 0, ECX_enc); 2241 emit_d32(masm,0x20); 2242 // JEQ,s small 2243 emit_opcode(masm, 0x74); 2244 emit_d8(masm, 0x04); 2245 // MOV $dst.hi,$dst.lo 2246 emit_opcode( masm, 0x8B ); 2247 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2248 // CLR $dst.lo 2249 emit_opcode(masm, 0x33); 2250 emit_rm(masm, 0x3, $dst$$reg, $dst$$reg); 2251 // small: 2252 // SHLD $dst.hi,$dst.lo,$shift 2253 emit_opcode(masm,0x0F); 2254 emit_opcode(masm,0xA5); 2255 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2256 // SHL $dst.lo,$shift" 2257 emit_opcode(masm,0xD3); 2258 emit_rm(masm, 0x3, 0x4, $dst$$reg ); 2259 %} 2260 2261 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2262 // TEST shift,32 2263 emit_opcode(masm,0xF7); 2264 emit_rm(masm, 0x3, 0, ECX_enc); 2265 emit_d32(masm,0x20); 2266 // JEQ,s small 2267 emit_opcode(masm, 0x74); 2268 emit_d8(masm, 0x04); 2269 // MOV $dst.lo,$dst.hi 2270 emit_opcode( masm, 0x8B ); 2271 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2272 // CLR $dst.hi 2273 emit_opcode(masm, 0x33); 2274 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2275 // small: 2276 // SHRD $dst.lo,$dst.hi,$shift 2277 emit_opcode(masm,0x0F); 2278 emit_opcode(masm,0xAD); 2279 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2280 // SHR $dst.hi,$shift" 2281 emit_opcode(masm,0xD3); 2282 emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2283 %} 2284 2285 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2286 // TEST shift,32 2287 emit_opcode(masm,0xF7); 2288 emit_rm(masm, 0x3, 0, ECX_enc); 2289 emit_d32(masm,0x20); 2290 // JEQ,s small 2291 emit_opcode(masm, 0x74); 2292 emit_d8(masm, 0x05); 2293 // MOV $dst.lo,$dst.hi 2294 emit_opcode( masm, 0x8B ); 2295 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2296 // SAR $dst.hi,31 2297 emit_opcode(masm, 0xC1); 2298 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2299 emit_d8(masm, 0x1F ); 2300 // small: 2301 // SHRD $dst.lo,$dst.hi,$shift 2302 emit_opcode(masm,0x0F); 2303 emit_opcode(masm,0xAD); 2304 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2305 // SAR $dst.hi,$shift" 2306 emit_opcode(masm,0xD3); 2307 emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2308 %} 2309 2310 2311 // ----------------- Encodings for floating point unit ----------------- 2312 // May leave result in FPU-TOS or FPU reg depending on opcodes 2313 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2314 $$$emit8$primary; 2315 emit_rm(masm, 0x3, $secondary, $src$$reg ); 2316 %} 2317 2318 // Pop argument in FPR0 with FSTP ST(0) 2319 enc_class PopFPU() %{ 2320 emit_opcode( masm, 0xDD ); 2321 emit_d8( masm, 0xD8 ); 2322 %} 2323 2324 // !!!!! equivalent to Pop_Reg_F 2325 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2326 emit_opcode( masm, 0xDD ); // FSTP ST(i) 2327 emit_d8( masm, 0xD8+$dst$$reg ); 2328 %} 2329 2330 enc_class Push_Reg_DPR( regDPR dst ) %{ 2331 emit_opcode( masm, 0xD9 ); 2332 emit_d8( masm, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2333 %} 2334 2335 enc_class strictfp_bias1( regDPR dst ) %{ 2336 emit_opcode( masm, 0xDB ); // FLD m80real 2337 emit_opcode( masm, 0x2D ); 2338 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2339 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 2340 emit_opcode( masm, 0xC8+$dst$$reg ); 2341 %} 2342 2343 enc_class strictfp_bias2( regDPR dst ) %{ 2344 emit_opcode( masm, 0xDB ); // FLD m80real 2345 emit_opcode( masm, 0x2D ); 2346 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2347 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 2348 emit_opcode( masm, 0xC8+$dst$$reg ); 2349 %} 2350 2351 // Special case for moving an integer register to a stack slot. 2352 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2353 store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp ); 2354 %} 2355 2356 // Special case for moving a register to a stack slot. 2357 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2358 // Opcode already emitted 2359 emit_rm( masm, 0x02, $src$$reg, ESP_enc ); // R/M byte 2360 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 2361 emit_d32(masm, $dst$$disp); // Displacement 2362 %} 2363 2364 // Push the integer in stackSlot 'src' onto FP-stack 2365 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2366 store_to_stackslot( masm, $primary, $secondary, $src$$disp ); 2367 %} 2368 2369 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2370 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2371 store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp ); 2372 %} 2373 2374 // Same as Pop_Mem_F except for opcode 2375 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2376 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2377 store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp ); 2378 %} 2379 2380 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2381 emit_opcode( masm, 0xDD ); // FSTP ST(i) 2382 emit_d8( masm, 0xD8+$dst$$reg ); 2383 %} 2384 2385 enc_class Push_Reg_FPR( regFPR dst ) %{ 2386 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2387 emit_d8( masm, 0xC0-1+$dst$$reg ); 2388 %} 2389 2390 // Push FPU's float to a stack-slot, and pop FPU-stack 2391 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2392 int pop = 0x02; 2393 if ($src$$reg != FPR1L_enc) { 2394 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2395 emit_d8( masm, 0xC0-1+$src$$reg ); 2396 pop = 0x03; 2397 } 2398 store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2399 %} 2400 2401 // Push FPU's double to a stack-slot, and pop FPU-stack 2402 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2403 int pop = 0x02; 2404 if ($src$$reg != FPR1L_enc) { 2405 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2406 emit_d8( masm, 0xC0-1+$src$$reg ); 2407 pop = 0x03; 2408 } 2409 store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2410 %} 2411 2412 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2413 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2414 int pop = 0xD0 - 1; // -1 since we skip FLD 2415 if ($src$$reg != FPR1L_enc) { 2416 emit_opcode( masm, 0xD9 ); // FLD ST(src-1) 2417 emit_d8( masm, 0xC0-1+$src$$reg ); 2418 pop = 0xD8; 2419 } 2420 emit_opcode( masm, 0xDD ); 2421 emit_d8( masm, pop+$dst$$reg ); // FST<P> ST(i) 2422 %} 2423 2424 2425 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2426 // load dst in FPR0 2427 emit_opcode( masm, 0xD9 ); 2428 emit_d8( masm, 0xC0-1+$dst$$reg ); 2429 if ($src$$reg != FPR1L_enc) { 2430 // fincstp 2431 emit_opcode (masm, 0xD9); 2432 emit_opcode (masm, 0xF7); 2433 // swap src with FPR1: 2434 // FXCH FPR1 with src 2435 emit_opcode(masm, 0xD9); 2436 emit_d8(masm, 0xC8-1+$src$$reg ); 2437 // fdecstp 2438 emit_opcode (masm, 0xD9); 2439 emit_opcode (masm, 0xF6); 2440 } 2441 %} 2442 2443 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2444 __ subptr(rsp, 8); 2445 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2446 __ fld_d(Address(rsp, 0)); 2447 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2448 __ fld_d(Address(rsp, 0)); 2449 %} 2450 2451 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2452 __ subptr(rsp, 4); 2453 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2454 __ fld_s(Address(rsp, 0)); 2455 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2456 __ fld_s(Address(rsp, 0)); 2457 %} 2458 2459 enc_class Push_ResultD(regD dst) %{ 2460 __ fstp_d(Address(rsp, 0)); 2461 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2462 __ addptr(rsp, 8); 2463 %} 2464 2465 enc_class Push_ResultF(regF dst, immI d8) %{ 2466 __ fstp_s(Address(rsp, 0)); 2467 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2468 __ addptr(rsp, $d8$$constant); 2469 %} 2470 2471 enc_class Push_SrcD(regD src) %{ 2472 __ subptr(rsp, 8); 2473 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2474 __ fld_d(Address(rsp, 0)); 2475 %} 2476 2477 enc_class push_stack_temp_qword() %{ 2478 __ subptr(rsp, 8); 2479 %} 2480 2481 enc_class pop_stack_temp_qword() %{ 2482 __ addptr(rsp, 8); 2483 %} 2484 2485 enc_class push_xmm_to_fpr1(regD src) %{ 2486 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2487 __ fld_d(Address(rsp, 0)); 2488 %} 2489 2490 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2491 if ($src$$reg != FPR1L_enc) { 2492 // fincstp 2493 emit_opcode (masm, 0xD9); 2494 emit_opcode (masm, 0xF7); 2495 // FXCH FPR1 with src 2496 emit_opcode(masm, 0xD9); 2497 emit_d8(masm, 0xC8-1+$src$$reg ); 2498 // fdecstp 2499 emit_opcode (masm, 0xD9); 2500 emit_opcode (masm, 0xF6); 2501 } 2502 %} 2503 2504 enc_class fnstsw_sahf_skip_parity() %{ 2505 // fnstsw ax 2506 emit_opcode( masm, 0xDF ); 2507 emit_opcode( masm, 0xE0 ); 2508 // sahf 2509 emit_opcode( masm, 0x9E ); 2510 // jnp ::skip 2511 emit_opcode( masm, 0x7B ); 2512 emit_opcode( masm, 0x05 ); 2513 %} 2514 2515 enc_class emitModDPR() %{ 2516 // fprem must be iterative 2517 // :: loop 2518 // fprem 2519 emit_opcode( masm, 0xD9 ); 2520 emit_opcode( masm, 0xF8 ); 2521 // wait 2522 emit_opcode( masm, 0x9b ); 2523 // fnstsw ax 2524 emit_opcode( masm, 0xDF ); 2525 emit_opcode( masm, 0xE0 ); 2526 // sahf 2527 emit_opcode( masm, 0x9E ); 2528 // jp ::loop 2529 emit_opcode( masm, 0x0F ); 2530 emit_opcode( masm, 0x8A ); 2531 emit_opcode( masm, 0xF4 ); 2532 emit_opcode( masm, 0xFF ); 2533 emit_opcode( masm, 0xFF ); 2534 emit_opcode( masm, 0xFF ); 2535 %} 2536 2537 enc_class fpu_flags() %{ 2538 // fnstsw_ax 2539 emit_opcode( masm, 0xDF); 2540 emit_opcode( masm, 0xE0); 2541 // test ax,0x0400 2542 emit_opcode( masm, 0x66 ); // operand-size prefix for 16-bit immediate 2543 emit_opcode( masm, 0xA9 ); 2544 emit_d16 ( masm, 0x0400 ); 2545 // // // This sequence works, but stalls for 12-16 cycles on PPro 2546 // // test rax,0x0400 2547 // emit_opcode( masm, 0xA9 ); 2548 // emit_d32 ( masm, 0x00000400 ); 2549 // 2550 // jz exit (no unordered comparison) 2551 emit_opcode( masm, 0x74 ); 2552 emit_d8 ( masm, 0x02 ); 2553 // mov ah,1 - treat as LT case (set carry flag) 2554 emit_opcode( masm, 0xB4 ); 2555 emit_d8 ( masm, 0x01 ); 2556 // sahf 2557 emit_opcode( masm, 0x9E); 2558 %} 2559 2560 enc_class cmpF_P6_fixup() %{ 2561 // Fixup the integer flags in case comparison involved a NaN 2562 // 2563 // JNP exit (no unordered comparison, P-flag is set by NaN) 2564 emit_opcode( masm, 0x7B ); 2565 emit_d8 ( masm, 0x03 ); 2566 // MOV AH,1 - treat as LT case (set carry flag) 2567 emit_opcode( masm, 0xB4 ); 2568 emit_d8 ( masm, 0x01 ); 2569 // SAHF 2570 emit_opcode( masm, 0x9E); 2571 // NOP // target for branch to avoid branch to branch 2572 emit_opcode( masm, 0x90); 2573 %} 2574 2575 // fnstsw_ax(); 2576 // sahf(); 2577 // movl(dst, nan_result); 2578 // jcc(Assembler::parity, exit); 2579 // movl(dst, less_result); 2580 // jcc(Assembler::below, exit); 2581 // movl(dst, equal_result); 2582 // jcc(Assembler::equal, exit); 2583 // movl(dst, greater_result); 2584 2585 // less_result = 1; 2586 // greater_result = -1; 2587 // equal_result = 0; 2588 // nan_result = -1; 2589 2590 enc_class CmpF_Result(rRegI dst) %{ 2591 // fnstsw_ax(); 2592 emit_opcode( masm, 0xDF); 2593 emit_opcode( masm, 0xE0); 2594 // sahf 2595 emit_opcode( masm, 0x9E); 2596 // movl(dst, nan_result); 2597 emit_opcode( masm, 0xB8 + $dst$$reg); 2598 emit_d32( masm, -1 ); 2599 // jcc(Assembler::parity, exit); 2600 emit_opcode( masm, 0x7A ); 2601 emit_d8 ( masm, 0x13 ); 2602 // movl(dst, less_result); 2603 emit_opcode( masm, 0xB8 + $dst$$reg); 2604 emit_d32( masm, -1 ); 2605 // jcc(Assembler::below, exit); 2606 emit_opcode( masm, 0x72 ); 2607 emit_d8 ( masm, 0x0C ); 2608 // movl(dst, equal_result); 2609 emit_opcode( masm, 0xB8 + $dst$$reg); 2610 emit_d32( masm, 0 ); 2611 // jcc(Assembler::equal, exit); 2612 emit_opcode( masm, 0x74 ); 2613 emit_d8 ( masm, 0x05 ); 2614 // movl(dst, greater_result); 2615 emit_opcode( masm, 0xB8 + $dst$$reg); 2616 emit_d32( masm, 1 ); 2617 %} 2618 2619 2620 // Compare the longs and set flags 2621 // BROKEN! Do Not use as-is 2622 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2623 // CMP $src1.hi,$src2.hi 2624 emit_opcode( masm, 0x3B ); 2625 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2626 // JNE,s done 2627 emit_opcode(masm,0x75); 2628 emit_d8(masm, 2 ); 2629 // CMP $src1.lo,$src2.lo 2630 emit_opcode( masm, 0x3B ); 2631 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2632 // done: 2633 %} 2634 2635 enc_class convert_int_long( regL dst, rRegI src ) %{ 2636 // mov $dst.lo,$src 2637 int dst_encoding = $dst$$reg; 2638 int src_encoding = $src$$reg; 2639 encode_Copy( masm, dst_encoding , src_encoding ); 2640 // mov $dst.hi,$src 2641 encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2642 // sar $dst.hi,31 2643 emit_opcode( masm, 0xC1 ); 2644 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2645 emit_d8(masm, 0x1F ); 2646 %} 2647 2648 enc_class convert_long_double( eRegL src ) %{ 2649 // push $src.hi 2650 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2651 // push $src.lo 2652 emit_opcode(masm, 0x50+$src$$reg ); 2653 // fild 64-bits at [SP] 2654 emit_opcode(masm,0xdf); 2655 emit_d8(masm, 0x6C); 2656 emit_d8(masm, 0x24); 2657 emit_d8(masm, 0x00); 2658 // pop stack 2659 emit_opcode(masm, 0x83); // add SP, #8 2660 emit_rm(masm, 0x3, 0x00, ESP_enc); 2661 emit_d8(masm, 0x8); 2662 %} 2663 2664 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2665 // IMUL EDX:EAX,$src1 2666 emit_opcode( masm, 0xF7 ); 2667 emit_rm( masm, 0x3, 0x5, $src1$$reg ); 2668 // SAR EDX,$cnt-32 2669 int shift_count = ((int)$cnt$$constant) - 32; 2670 if (shift_count > 0) { 2671 emit_opcode(masm, 0xC1); 2672 emit_rm(masm, 0x3, 7, $dst$$reg ); 2673 emit_d8(masm, shift_count); 2674 } 2675 %} 2676 2677 // this version doesn't have add sp, 8 2678 enc_class convert_long_double2( eRegL src ) %{ 2679 // push $src.hi 2680 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2681 // push $src.lo 2682 emit_opcode(masm, 0x50+$src$$reg ); 2683 // fild 64-bits at [SP] 2684 emit_opcode(masm,0xdf); 2685 emit_d8(masm, 0x6C); 2686 emit_d8(masm, 0x24); 2687 emit_d8(masm, 0x00); 2688 %} 2689 2690 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2691 // Basic idea: long = (long)int * (long)int 2692 // IMUL EDX:EAX, src 2693 emit_opcode( masm, 0xF7 ); 2694 emit_rm( masm, 0x3, 0x5, $src$$reg); 2695 %} 2696 2697 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2698 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2699 // MUL EDX:EAX, src 2700 emit_opcode( masm, 0xF7 ); 2701 emit_rm( masm, 0x3, 0x4, $src$$reg); 2702 %} 2703 2704 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2705 // Basic idea: lo(result) = lo(x_lo * y_lo) 2706 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2707 // MOV $tmp,$src.lo 2708 encode_Copy( masm, $tmp$$reg, $src$$reg ); 2709 // IMUL $tmp,EDX 2710 emit_opcode( masm, 0x0F ); 2711 emit_opcode( masm, 0xAF ); 2712 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2713 // MOV EDX,$src.hi 2714 encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2715 // IMUL EDX,EAX 2716 emit_opcode( masm, 0x0F ); 2717 emit_opcode( masm, 0xAF ); 2718 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2719 // ADD $tmp,EDX 2720 emit_opcode( masm, 0x03 ); 2721 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2722 // MUL EDX:EAX,$src.lo 2723 emit_opcode( masm, 0xF7 ); 2724 emit_rm( masm, 0x3, 0x4, $src$$reg ); 2725 // ADD EDX,ESI 2726 emit_opcode( masm, 0x03 ); 2727 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2728 %} 2729 2730 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2731 // Basic idea: lo(result) = lo(src * y_lo) 2732 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2733 // IMUL $tmp,EDX,$src 2734 emit_opcode( masm, 0x6B ); 2735 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2736 emit_d8( masm, (int)$src$$constant ); 2737 // MOV EDX,$src 2738 emit_opcode(masm, 0xB8 + EDX_enc); 2739 emit_d32( masm, (int)$src$$constant ); 2740 // MUL EDX:EAX,EDX 2741 emit_opcode( masm, 0xF7 ); 2742 emit_rm( masm, 0x3, 0x4, EDX_enc ); 2743 // ADD EDX,ESI 2744 emit_opcode( masm, 0x03 ); 2745 emit_rm( masm, 0x3, EDX_enc, $tmp$$reg ); 2746 %} 2747 2748 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2749 // PUSH src1.hi 2750 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2751 // PUSH src1.lo 2752 emit_opcode(masm, 0x50+$src1$$reg ); 2753 // PUSH src2.hi 2754 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2755 // PUSH src2.lo 2756 emit_opcode(masm, 0x50+$src2$$reg ); 2757 // CALL directly to the runtime 2758 __ set_inst_mark(); 2759 emit_opcode(masm,0xE8); // Call into runtime 2760 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2761 __ clear_inst_mark(); 2762 __ post_call_nop(); 2763 // Restore stack 2764 emit_opcode(masm, 0x83); // add SP, #framesize 2765 emit_rm(masm, 0x3, 0x00, ESP_enc); 2766 emit_d8(masm, 4*4); 2767 %} 2768 2769 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2770 // PUSH src1.hi 2771 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2772 // PUSH src1.lo 2773 emit_opcode(masm, 0x50+$src1$$reg ); 2774 // PUSH src2.hi 2775 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2776 // PUSH src2.lo 2777 emit_opcode(masm, 0x50+$src2$$reg ); 2778 // CALL directly to the runtime 2779 __ set_inst_mark(); 2780 emit_opcode(masm,0xE8); // Call into runtime 2781 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2782 __ clear_inst_mark(); 2783 __ post_call_nop(); 2784 // Restore stack 2785 emit_opcode(masm, 0x83); // add SP, #framesize 2786 emit_rm(masm, 0x3, 0x00, ESP_enc); 2787 emit_d8(masm, 4*4); 2788 %} 2789 2790 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2791 // MOV $tmp,$src.lo 2792 emit_opcode(masm, 0x8B); 2793 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg); 2794 // OR $tmp,$src.hi 2795 emit_opcode(masm, 0x0B); 2796 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2797 %} 2798 2799 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2800 // CMP $src1.lo,$src2.lo 2801 emit_opcode( masm, 0x3B ); 2802 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2803 // JNE,s skip 2804 emit_cc(masm, 0x70, 0x5); 2805 emit_d8(masm,2); 2806 // CMP $src1.hi,$src2.hi 2807 emit_opcode( masm, 0x3B ); 2808 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2809 %} 2810 2811 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2812 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2813 emit_opcode( masm, 0x3B ); 2814 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2815 // MOV $tmp,$src1.hi 2816 emit_opcode( masm, 0x8B ); 2817 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2818 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2819 emit_opcode( masm, 0x1B ); 2820 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2821 %} 2822 2823 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2824 // XOR $tmp,$tmp 2825 emit_opcode(masm,0x33); // XOR 2826 emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg); 2827 // CMP $tmp,$src.lo 2828 emit_opcode( masm, 0x3B ); 2829 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg ); 2830 // SBB $tmp,$src.hi 2831 emit_opcode( masm, 0x1B ); 2832 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2833 %} 2834 2835 // Sniff, sniff... smells like Gnu Superoptimizer 2836 enc_class neg_long( eRegL dst ) %{ 2837 emit_opcode(masm,0xF7); // NEG hi 2838 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2839 emit_opcode(masm,0xF7); // NEG lo 2840 emit_rm (masm,0x3, 0x3, $dst$$reg ); 2841 emit_opcode(masm,0x83); // SBB hi,0 2842 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2843 emit_d8 (masm,0 ); 2844 %} 2845 2846 enc_class enc_pop_rdx() %{ 2847 emit_opcode(masm,0x5A); 2848 %} 2849 2850 enc_class enc_rethrow() %{ 2851 __ set_inst_mark(); 2852 emit_opcode(masm, 0xE9); // jmp entry 2853 emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4, 2854 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2855 __ clear_inst_mark(); 2856 __ post_call_nop(); 2857 %} 2858 2859 2860 // Convert a double to an int. Java semantics require we do complex 2861 // manglelations in the corner cases. So we set the rounding mode to 2862 // 'zero', store the darned double down as an int, and reset the 2863 // rounding mode to 'nearest'. The hardware throws an exception which 2864 // patches up the correct value directly to the stack. 2865 enc_class DPR2I_encoding( regDPR src ) %{ 2866 // Flip to round-to-zero mode. We attempted to allow invalid-op 2867 // exceptions here, so that a NAN or other corner-case value will 2868 // thrown an exception (but normal values get converted at full speed). 2869 // However, I2C adapters and other float-stack manglers leave pending 2870 // invalid-op exceptions hanging. We would have to clear them before 2871 // enabling them and that is more expensive than just testing for the 2872 // invalid value Intel stores down in the corner cases. 2873 emit_opcode(masm,0xD9); // FLDCW trunc 2874 emit_opcode(masm,0x2D); 2875 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2876 // Allocate a word 2877 emit_opcode(masm,0x83); // SUB ESP,4 2878 emit_opcode(masm,0xEC); 2879 emit_d8(masm,0x04); 2880 // Encoding assumes a double has been pushed into FPR0. 2881 // Store down the double as an int, popping the FPU stack 2882 emit_opcode(masm,0xDB); // FISTP [ESP] 2883 emit_opcode(masm,0x1C); 2884 emit_d8(masm,0x24); 2885 // Restore the rounding mode; mask the exception 2886 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode 2887 emit_opcode(masm,0x2D); 2888 emit_d32( masm, Compile::current()->in_24_bit_fp_mode() 2889 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2890 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2891 2892 // Load the converted int; adjust CPU stack 2893 emit_opcode(masm,0x58); // POP EAX 2894 emit_opcode(masm,0x3D); // CMP EAX,imm 2895 emit_d32 (masm,0x80000000); // 0x80000000 2896 emit_opcode(masm,0x75); // JNE around_slow_call 2897 emit_d8 (masm,0x07); // Size of slow_call 2898 // Push src onto stack slow-path 2899 emit_opcode(masm,0xD9 ); // FLD ST(i) 2900 emit_d8 (masm,0xC0-1+$src$$reg ); 2901 // CALL directly to the runtime 2902 __ set_inst_mark(); 2903 emit_opcode(masm,0xE8); // Call into runtime 2904 emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2905 __ clear_inst_mark(); 2906 __ post_call_nop(); 2907 // Carry on here... 2908 %} 2909 2910 enc_class DPR2L_encoding( regDPR src ) %{ 2911 emit_opcode(masm,0xD9); // FLDCW trunc 2912 emit_opcode(masm,0x2D); 2913 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2914 // Allocate a word 2915 emit_opcode(masm,0x83); // SUB ESP,8 2916 emit_opcode(masm,0xEC); 2917 emit_d8(masm,0x08); 2918 // Encoding assumes a double has been pushed into FPR0. 2919 // Store down the double as a long, popping the FPU stack 2920 emit_opcode(masm,0xDF); // FISTP [ESP] 2921 emit_opcode(masm,0x3C); 2922 emit_d8(masm,0x24); 2923 // Restore the rounding mode; mask the exception 2924 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode 2925 emit_opcode(masm,0x2D); 2926 emit_d32( masm, Compile::current()->in_24_bit_fp_mode() 2927 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2928 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2929 2930 // Load the converted int; adjust CPU stack 2931 emit_opcode(masm,0x58); // POP EAX 2932 emit_opcode(masm,0x5A); // POP EDX 2933 emit_opcode(masm,0x81); // CMP EDX,imm 2934 emit_d8 (masm,0xFA); // rdx 2935 emit_d32 (masm,0x80000000); // 0x80000000 2936 emit_opcode(masm,0x75); // JNE around_slow_call 2937 emit_d8 (masm,0x07+4); // Size of slow_call 2938 emit_opcode(masm,0x85); // TEST EAX,EAX 2939 emit_opcode(masm,0xC0); // 2/rax,/rax, 2940 emit_opcode(masm,0x75); // JNE around_slow_call 2941 emit_d8 (masm,0x07); // Size of slow_call 2942 // Push src onto stack slow-path 2943 emit_opcode(masm,0xD9 ); // FLD ST(i) 2944 emit_d8 (masm,0xC0-1+$src$$reg ); 2945 // CALL directly to the runtime 2946 __ set_inst_mark(); 2947 emit_opcode(masm,0xE8); // Call into runtime 2948 emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2949 __ clear_inst_mark(); 2950 __ post_call_nop(); 2951 // Carry on here... 2952 %} 2953 2954 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2955 // Operand was loaded from memory into fp ST (stack top) 2956 // FMUL ST,$src /* D8 C8+i */ 2957 emit_opcode(masm, 0xD8); 2958 emit_opcode(masm, 0xC8 + $src1$$reg); 2959 %} 2960 2961 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2962 // FADDP ST,src2 /* D8 C0+i */ 2963 emit_opcode(masm, 0xD8); 2964 emit_opcode(masm, 0xC0 + $src2$$reg); 2965 //could use FADDP src2,fpST /* DE C0+i */ 2966 %} 2967 2968 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 2969 // FADDP src2,ST /* DE C0+i */ 2970 emit_opcode(masm, 0xDE); 2971 emit_opcode(masm, 0xC0 + $src2$$reg); 2972 %} 2973 2974 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 2975 // Operand has been loaded into fp ST (stack top) 2976 // FSUB ST,$src1 2977 emit_opcode(masm, 0xD8); 2978 emit_opcode(masm, 0xE0 + $src1$$reg); 2979 2980 // FDIV 2981 emit_opcode(masm, 0xD8); 2982 emit_opcode(masm, 0xF0 + $src2$$reg); 2983 %} 2984 2985 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 2986 // Operand was loaded from memory into fp ST (stack top) 2987 // FADD ST,$src /* D8 C0+i */ 2988 emit_opcode(masm, 0xD8); 2989 emit_opcode(masm, 0xC0 + $src1$$reg); 2990 2991 // FMUL ST,src2 /* D8 C*+i */ 2992 emit_opcode(masm, 0xD8); 2993 emit_opcode(masm, 0xC8 + $src2$$reg); 2994 %} 2995 2996 2997 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 2998 // Operand was loaded from memory into fp ST (stack top) 2999 // FADD ST,$src /* D8 C0+i */ 3000 emit_opcode(masm, 0xD8); 3001 emit_opcode(masm, 0xC0 + $src1$$reg); 3002 3003 // FMULP src2,ST /* DE C8+i */ 3004 emit_opcode(masm, 0xDE); 3005 emit_opcode(masm, 0xC8 + $src2$$reg); 3006 %} 3007 3008 // Atomically load the volatile long 3009 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3010 emit_opcode(masm,0xDF); 3011 int rm_byte_opcode = 0x05; 3012 int base = $mem$$base; 3013 int index = $mem$$index; 3014 int scale = $mem$$scale; 3015 int displace = $mem$$disp; 3016 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3017 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3018 store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp ); 3019 %} 3020 3021 // Volatile Store Long. Must be atomic, so move it into 3022 // the FP TOS and then do a 64-bit FIST. Has to probe the 3023 // target address before the store (for null-ptr checks) 3024 // so the memory operand is used twice in the encoding. 3025 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3026 store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp ); 3027 __ set_inst_mark(); // Mark start of FIST in case $mem has an oop 3028 emit_opcode(masm,0xDF); 3029 int rm_byte_opcode = 0x07; 3030 int base = $mem$$base; 3031 int index = $mem$$index; 3032 int scale = $mem$$scale; 3033 int displace = $mem$$disp; 3034 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3035 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3036 __ clear_inst_mark(); 3037 %} 3038 3039 %} 3040 3041 3042 //----------FRAME-------------------------------------------------------------- 3043 // Definition of frame structure and management information. 3044 // 3045 // S T A C K L A Y O U T Allocators stack-slot number 3046 // | (to get allocators register number 3047 // G Owned by | | v add OptoReg::stack0()) 3048 // r CALLER | | 3049 // o | +--------+ pad to even-align allocators stack-slot 3050 // w V | pad0 | numbers; owned by CALLER 3051 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3052 // h ^ | in | 5 3053 // | | args | 4 Holes in incoming args owned by SELF 3054 // | | | | 3 3055 // | | +--------+ 3056 // V | | old out| Empty on Intel, window on Sparc 3057 // | old |preserve| Must be even aligned. 3058 // | SP-+--------+----> Matcher::_old_SP, even aligned 3059 // | | in | 3 area for Intel ret address 3060 // Owned by |preserve| Empty on Sparc. 3061 // SELF +--------+ 3062 // | | pad2 | 2 pad to align old SP 3063 // | +--------+ 1 3064 // | | locks | 0 3065 // | +--------+----> OptoReg::stack0(), even aligned 3066 // | | pad1 | 11 pad to align new SP 3067 // | +--------+ 3068 // | | | 10 3069 // | | spills | 9 spills 3070 // V | | 8 (pad0 slot for callee) 3071 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3072 // ^ | out | 7 3073 // | | args | 6 Holes in outgoing args owned by CALLEE 3074 // Owned by +--------+ 3075 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3076 // | new |preserve| Must be even-aligned. 3077 // | SP-+--------+----> Matcher::_new_SP, even aligned 3078 // | | | 3079 // 3080 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3081 // known from SELF's arguments and the Java calling convention. 3082 // Region 6-7 is determined per call site. 3083 // Note 2: If the calling convention leaves holes in the incoming argument 3084 // area, those holes are owned by SELF. Holes in the outgoing area 3085 // are owned by the CALLEE. Holes should not be necessary in the 3086 // incoming area, as the Java calling convention is completely under 3087 // the control of the AD file. Doubles can be sorted and packed to 3088 // avoid holes. Holes in the outgoing arguments may be necessary for 3089 // varargs C calling conventions. 3090 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3091 // even aligned with pad0 as needed. 3092 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3093 // region 6-11 is even aligned; it may be padded out more so that 3094 // the region from SP to FP meets the minimum stack alignment. 3095 3096 frame %{ 3097 // These three registers define part of the calling convention 3098 // between compiled code and the interpreter. 3099 inline_cache_reg(EAX); // Inline Cache Register 3100 3101 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3102 cisc_spilling_operand_name(indOffset32); 3103 3104 // Number of stack slots consumed by locking an object 3105 sync_stack_slots(1); 3106 3107 // Compiled code's Frame Pointer 3108 frame_pointer(ESP); 3109 // Interpreter stores its frame pointer in a register which is 3110 // stored to the stack by I2CAdaptors. 3111 // I2CAdaptors convert from interpreted java to compiled java. 3112 interpreter_frame_pointer(EBP); 3113 3114 // Stack alignment requirement 3115 // Alignment size in bytes (128-bit -> 16 bytes) 3116 stack_alignment(StackAlignmentInBytes); 3117 3118 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3119 // for calls to C. Supports the var-args backing area for register parms. 3120 varargs_C_out_slots_killed(0); 3121 3122 // The after-PROLOG location of the return address. Location of 3123 // return address specifies a type (REG or STACK) and a number 3124 // representing the register number (i.e. - use a register name) or 3125 // stack slot. 3126 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3127 // Otherwise, it is above the locks and verification slot and alignment word 3128 return_addr(STACK - 1 + 3129 align_up((Compile::current()->in_preserve_stack_slots() + 3130 Compile::current()->fixed_slots()), 3131 stack_alignment_in_slots())); 3132 3133 // Location of C & interpreter return values 3134 c_return_value %{ 3135 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3136 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3137 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3138 3139 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3140 // that C functions return float and double results in XMM0. 3141 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3142 return OptoRegPair(XMM0b_num,XMM0_num); 3143 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3144 return OptoRegPair(OptoReg::Bad,XMM0_num); 3145 3146 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3147 %} 3148 3149 // Location of return values 3150 return_value %{ 3151 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3152 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3153 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3154 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3155 return OptoRegPair(XMM0b_num,XMM0_num); 3156 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3157 return OptoRegPair(OptoReg::Bad,XMM0_num); 3158 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3159 %} 3160 3161 %} 3162 3163 //----------ATTRIBUTES--------------------------------------------------------- 3164 //----------Operand Attributes------------------------------------------------- 3165 op_attrib op_cost(0); // Required cost attribute 3166 3167 //----------Instruction Attributes--------------------------------------------- 3168 ins_attrib ins_cost(100); // Required cost attribute 3169 ins_attrib ins_size(8); // Required size attribute (in bits) 3170 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3171 // non-matching short branch variant of some 3172 // long branch? 3173 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3174 // specifies the alignment that some part of the instruction (not 3175 // necessarily the start) requires. If > 1, a compute_padding() 3176 // function must be provided for the instruction 3177 3178 //----------OPERANDS----------------------------------------------------------- 3179 // Operand definitions must precede instruction definitions for correct parsing 3180 // in the ADLC because operands constitute user defined types which are used in 3181 // instruction definitions. 3182 3183 //----------Simple Operands---------------------------------------------------- 3184 // Immediate Operands 3185 // Integer Immediate 3186 operand immI() %{ 3187 match(ConI); 3188 3189 op_cost(10); 3190 format %{ %} 3191 interface(CONST_INTER); 3192 %} 3193 3194 // Constant for test vs zero 3195 operand immI_0() %{ 3196 predicate(n->get_int() == 0); 3197 match(ConI); 3198 3199 op_cost(0); 3200 format %{ %} 3201 interface(CONST_INTER); 3202 %} 3203 3204 // Constant for increment 3205 operand immI_1() %{ 3206 predicate(n->get_int() == 1); 3207 match(ConI); 3208 3209 op_cost(0); 3210 format %{ %} 3211 interface(CONST_INTER); 3212 %} 3213 3214 // Constant for decrement 3215 operand immI_M1() %{ 3216 predicate(n->get_int() == -1); 3217 match(ConI); 3218 3219 op_cost(0); 3220 format %{ %} 3221 interface(CONST_INTER); 3222 %} 3223 3224 // Valid scale values for addressing modes 3225 operand immI2() %{ 3226 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3227 match(ConI); 3228 3229 format %{ %} 3230 interface(CONST_INTER); 3231 %} 3232 3233 operand immI8() %{ 3234 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3235 match(ConI); 3236 3237 op_cost(5); 3238 format %{ %} 3239 interface(CONST_INTER); 3240 %} 3241 3242 operand immU8() %{ 3243 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3244 match(ConI); 3245 3246 op_cost(5); 3247 format %{ %} 3248 interface(CONST_INTER); 3249 %} 3250 3251 operand immI16() %{ 3252 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3253 match(ConI); 3254 3255 op_cost(10); 3256 format %{ %} 3257 interface(CONST_INTER); 3258 %} 3259 3260 // Int Immediate non-negative 3261 operand immU31() 3262 %{ 3263 predicate(n->get_int() >= 0); 3264 match(ConI); 3265 3266 op_cost(0); 3267 format %{ %} 3268 interface(CONST_INTER); 3269 %} 3270 3271 // Constant for long shifts 3272 operand immI_32() %{ 3273 predicate( n->get_int() == 32 ); 3274 match(ConI); 3275 3276 op_cost(0); 3277 format %{ %} 3278 interface(CONST_INTER); 3279 %} 3280 3281 operand immI_1_31() %{ 3282 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3283 match(ConI); 3284 3285 op_cost(0); 3286 format %{ %} 3287 interface(CONST_INTER); 3288 %} 3289 3290 operand immI_32_63() %{ 3291 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3292 match(ConI); 3293 op_cost(0); 3294 3295 format %{ %} 3296 interface(CONST_INTER); 3297 %} 3298 3299 operand immI_2() %{ 3300 predicate( n->get_int() == 2 ); 3301 match(ConI); 3302 3303 op_cost(0); 3304 format %{ %} 3305 interface(CONST_INTER); 3306 %} 3307 3308 operand immI_3() %{ 3309 predicate( n->get_int() == 3 ); 3310 match(ConI); 3311 3312 op_cost(0); 3313 format %{ %} 3314 interface(CONST_INTER); 3315 %} 3316 3317 operand immI_4() 3318 %{ 3319 predicate(n->get_int() == 4); 3320 match(ConI); 3321 3322 op_cost(0); 3323 format %{ %} 3324 interface(CONST_INTER); 3325 %} 3326 3327 operand immI_8() 3328 %{ 3329 predicate(n->get_int() == 8); 3330 match(ConI); 3331 3332 op_cost(0); 3333 format %{ %} 3334 interface(CONST_INTER); 3335 %} 3336 3337 // Pointer Immediate 3338 operand immP() %{ 3339 match(ConP); 3340 3341 op_cost(10); 3342 format %{ %} 3343 interface(CONST_INTER); 3344 %} 3345 3346 // Null Pointer Immediate 3347 operand immP0() %{ 3348 predicate( n->get_ptr() == 0 ); 3349 match(ConP); 3350 op_cost(0); 3351 3352 format %{ %} 3353 interface(CONST_INTER); 3354 %} 3355 3356 // Long Immediate 3357 operand immL() %{ 3358 match(ConL); 3359 3360 op_cost(20); 3361 format %{ %} 3362 interface(CONST_INTER); 3363 %} 3364 3365 // Long Immediate zero 3366 operand immL0() %{ 3367 predicate( n->get_long() == 0L ); 3368 match(ConL); 3369 op_cost(0); 3370 3371 format %{ %} 3372 interface(CONST_INTER); 3373 %} 3374 3375 // Long Immediate zero 3376 operand immL_M1() %{ 3377 predicate( n->get_long() == -1L ); 3378 match(ConL); 3379 op_cost(0); 3380 3381 format %{ %} 3382 interface(CONST_INTER); 3383 %} 3384 3385 // Long immediate from 0 to 127. 3386 // Used for a shorter form of long mul by 10. 3387 operand immL_127() %{ 3388 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3389 match(ConL); 3390 op_cost(0); 3391 3392 format %{ %} 3393 interface(CONST_INTER); 3394 %} 3395 3396 // Long Immediate: low 32-bit mask 3397 operand immL_32bits() %{ 3398 predicate(n->get_long() == 0xFFFFFFFFL); 3399 match(ConL); 3400 op_cost(0); 3401 3402 format %{ %} 3403 interface(CONST_INTER); 3404 %} 3405 3406 // Long Immediate: low 32-bit mask 3407 operand immL32() %{ 3408 predicate(n->get_long() == (int)(n->get_long())); 3409 match(ConL); 3410 op_cost(20); 3411 3412 format %{ %} 3413 interface(CONST_INTER); 3414 %} 3415 3416 //Double Immediate zero 3417 operand immDPR0() %{ 3418 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3419 // bug that generates code such that NaNs compare equal to 0.0 3420 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3421 match(ConD); 3422 3423 op_cost(5); 3424 format %{ %} 3425 interface(CONST_INTER); 3426 %} 3427 3428 // Double Immediate one 3429 operand immDPR1() %{ 3430 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3431 match(ConD); 3432 3433 op_cost(5); 3434 format %{ %} 3435 interface(CONST_INTER); 3436 %} 3437 3438 // Double Immediate 3439 operand immDPR() %{ 3440 predicate(UseSSE<=1); 3441 match(ConD); 3442 3443 op_cost(5); 3444 format %{ %} 3445 interface(CONST_INTER); 3446 %} 3447 3448 operand immD() %{ 3449 predicate(UseSSE>=2); 3450 match(ConD); 3451 3452 op_cost(5); 3453 format %{ %} 3454 interface(CONST_INTER); 3455 %} 3456 3457 // Double Immediate zero 3458 operand immD0() %{ 3459 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3460 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3461 // compare equal to -0.0. 3462 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3463 match(ConD); 3464 3465 format %{ %} 3466 interface(CONST_INTER); 3467 %} 3468 3469 // Float Immediate zero 3470 operand immFPR0() %{ 3471 predicate(UseSSE == 0 && n->getf() == 0.0F); 3472 match(ConF); 3473 3474 op_cost(5); 3475 format %{ %} 3476 interface(CONST_INTER); 3477 %} 3478 3479 // Float Immediate one 3480 operand immFPR1() %{ 3481 predicate(UseSSE == 0 && n->getf() == 1.0F); 3482 match(ConF); 3483 3484 op_cost(5); 3485 format %{ %} 3486 interface(CONST_INTER); 3487 %} 3488 3489 // Float Immediate 3490 operand immFPR() %{ 3491 predicate( UseSSE == 0 ); 3492 match(ConF); 3493 3494 op_cost(5); 3495 format %{ %} 3496 interface(CONST_INTER); 3497 %} 3498 3499 // Float Immediate 3500 operand immF() %{ 3501 predicate(UseSSE >= 1); 3502 match(ConF); 3503 3504 op_cost(5); 3505 format %{ %} 3506 interface(CONST_INTER); 3507 %} 3508 3509 // Float Immediate zero. Zero and not -0.0 3510 operand immF0() %{ 3511 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3512 match(ConF); 3513 3514 op_cost(5); 3515 format %{ %} 3516 interface(CONST_INTER); 3517 %} 3518 3519 // Immediates for special shifts (sign extend) 3520 3521 // Constants for increment 3522 operand immI_16() %{ 3523 predicate( n->get_int() == 16 ); 3524 match(ConI); 3525 3526 format %{ %} 3527 interface(CONST_INTER); 3528 %} 3529 3530 operand immI_24() %{ 3531 predicate( n->get_int() == 24 ); 3532 match(ConI); 3533 3534 format %{ %} 3535 interface(CONST_INTER); 3536 %} 3537 3538 // Constant for byte-wide masking 3539 operand immI_255() %{ 3540 predicate( n->get_int() == 255 ); 3541 match(ConI); 3542 3543 format %{ %} 3544 interface(CONST_INTER); 3545 %} 3546 3547 // Constant for short-wide masking 3548 operand immI_65535() %{ 3549 predicate(n->get_int() == 65535); 3550 match(ConI); 3551 3552 format %{ %} 3553 interface(CONST_INTER); 3554 %} 3555 3556 operand kReg() 3557 %{ 3558 constraint(ALLOC_IN_RC(vectmask_reg)); 3559 match(RegVectMask); 3560 format %{%} 3561 interface(REG_INTER); 3562 %} 3563 3564 // Register Operands 3565 // Integer Register 3566 operand rRegI() %{ 3567 constraint(ALLOC_IN_RC(int_reg)); 3568 match(RegI); 3569 match(xRegI); 3570 match(eAXRegI); 3571 match(eBXRegI); 3572 match(eCXRegI); 3573 match(eDXRegI); 3574 match(eDIRegI); 3575 match(eSIRegI); 3576 3577 format %{ %} 3578 interface(REG_INTER); 3579 %} 3580 3581 // Subset of Integer Register 3582 operand xRegI(rRegI reg) %{ 3583 constraint(ALLOC_IN_RC(int_x_reg)); 3584 match(reg); 3585 match(eAXRegI); 3586 match(eBXRegI); 3587 match(eCXRegI); 3588 match(eDXRegI); 3589 3590 format %{ %} 3591 interface(REG_INTER); 3592 %} 3593 3594 // Special Registers 3595 operand eAXRegI(xRegI reg) %{ 3596 constraint(ALLOC_IN_RC(eax_reg)); 3597 match(reg); 3598 match(rRegI); 3599 3600 format %{ "EAX" %} 3601 interface(REG_INTER); 3602 %} 3603 3604 // Special Registers 3605 operand eBXRegI(xRegI reg) %{ 3606 constraint(ALLOC_IN_RC(ebx_reg)); 3607 match(reg); 3608 match(rRegI); 3609 3610 format %{ "EBX" %} 3611 interface(REG_INTER); 3612 %} 3613 3614 operand eCXRegI(xRegI reg) %{ 3615 constraint(ALLOC_IN_RC(ecx_reg)); 3616 match(reg); 3617 match(rRegI); 3618 3619 format %{ "ECX" %} 3620 interface(REG_INTER); 3621 %} 3622 3623 operand eDXRegI(xRegI reg) %{ 3624 constraint(ALLOC_IN_RC(edx_reg)); 3625 match(reg); 3626 match(rRegI); 3627 3628 format %{ "EDX" %} 3629 interface(REG_INTER); 3630 %} 3631 3632 operand eDIRegI(xRegI reg) %{ 3633 constraint(ALLOC_IN_RC(edi_reg)); 3634 match(reg); 3635 match(rRegI); 3636 3637 format %{ "EDI" %} 3638 interface(REG_INTER); 3639 %} 3640 3641 operand nadxRegI() %{ 3642 constraint(ALLOC_IN_RC(nadx_reg)); 3643 match(RegI); 3644 match(eBXRegI); 3645 match(eCXRegI); 3646 match(eSIRegI); 3647 match(eDIRegI); 3648 3649 format %{ %} 3650 interface(REG_INTER); 3651 %} 3652 3653 operand ncxRegI() %{ 3654 constraint(ALLOC_IN_RC(ncx_reg)); 3655 match(RegI); 3656 match(eAXRegI); 3657 match(eDXRegI); 3658 match(eSIRegI); 3659 match(eDIRegI); 3660 3661 format %{ %} 3662 interface(REG_INTER); 3663 %} 3664 3665 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3666 // // 3667 operand eSIRegI(xRegI reg) %{ 3668 constraint(ALLOC_IN_RC(esi_reg)); 3669 match(reg); 3670 match(rRegI); 3671 3672 format %{ "ESI" %} 3673 interface(REG_INTER); 3674 %} 3675 3676 // Pointer Register 3677 operand anyRegP() %{ 3678 constraint(ALLOC_IN_RC(any_reg)); 3679 match(RegP); 3680 match(eAXRegP); 3681 match(eBXRegP); 3682 match(eCXRegP); 3683 match(eDIRegP); 3684 match(eRegP); 3685 3686 format %{ %} 3687 interface(REG_INTER); 3688 %} 3689 3690 operand eRegP() %{ 3691 constraint(ALLOC_IN_RC(int_reg)); 3692 match(RegP); 3693 match(eAXRegP); 3694 match(eBXRegP); 3695 match(eCXRegP); 3696 match(eDIRegP); 3697 3698 format %{ %} 3699 interface(REG_INTER); 3700 %} 3701 3702 operand rRegP() %{ 3703 constraint(ALLOC_IN_RC(int_reg)); 3704 match(RegP); 3705 match(eAXRegP); 3706 match(eBXRegP); 3707 match(eCXRegP); 3708 match(eDIRegP); 3709 3710 format %{ %} 3711 interface(REG_INTER); 3712 %} 3713 3714 // On windows95, EBP is not safe to use for implicit null tests. 3715 operand eRegP_no_EBP() %{ 3716 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3717 match(RegP); 3718 match(eAXRegP); 3719 match(eBXRegP); 3720 match(eCXRegP); 3721 match(eDIRegP); 3722 3723 op_cost(100); 3724 format %{ %} 3725 interface(REG_INTER); 3726 %} 3727 3728 operand pRegP() %{ 3729 constraint(ALLOC_IN_RC(p_reg)); 3730 match(RegP); 3731 match(eBXRegP); 3732 match(eDXRegP); 3733 match(eSIRegP); 3734 match(eDIRegP); 3735 3736 format %{ %} 3737 interface(REG_INTER); 3738 %} 3739 3740 // Special Registers 3741 // Return a pointer value 3742 operand eAXRegP(eRegP reg) %{ 3743 constraint(ALLOC_IN_RC(eax_reg)); 3744 match(reg); 3745 format %{ "EAX" %} 3746 interface(REG_INTER); 3747 %} 3748 3749 // Used in AtomicAdd 3750 operand eBXRegP(eRegP reg) %{ 3751 constraint(ALLOC_IN_RC(ebx_reg)); 3752 match(reg); 3753 format %{ "EBX" %} 3754 interface(REG_INTER); 3755 %} 3756 3757 // Tail-call (interprocedural jump) to interpreter 3758 operand eCXRegP(eRegP reg) %{ 3759 constraint(ALLOC_IN_RC(ecx_reg)); 3760 match(reg); 3761 format %{ "ECX" %} 3762 interface(REG_INTER); 3763 %} 3764 3765 operand eDXRegP(eRegP reg) %{ 3766 constraint(ALLOC_IN_RC(edx_reg)); 3767 match(reg); 3768 format %{ "EDX" %} 3769 interface(REG_INTER); 3770 %} 3771 3772 operand eSIRegP(eRegP reg) %{ 3773 constraint(ALLOC_IN_RC(esi_reg)); 3774 match(reg); 3775 format %{ "ESI" %} 3776 interface(REG_INTER); 3777 %} 3778 3779 // Used in rep stosw 3780 operand eDIRegP(eRegP reg) %{ 3781 constraint(ALLOC_IN_RC(edi_reg)); 3782 match(reg); 3783 format %{ "EDI" %} 3784 interface(REG_INTER); 3785 %} 3786 3787 operand eRegL() %{ 3788 constraint(ALLOC_IN_RC(long_reg)); 3789 match(RegL); 3790 match(eADXRegL); 3791 3792 format %{ %} 3793 interface(REG_INTER); 3794 %} 3795 3796 operand eADXRegL( eRegL reg ) %{ 3797 constraint(ALLOC_IN_RC(eadx_reg)); 3798 match(reg); 3799 3800 format %{ "EDX:EAX" %} 3801 interface(REG_INTER); 3802 %} 3803 3804 operand eBCXRegL( eRegL reg ) %{ 3805 constraint(ALLOC_IN_RC(ebcx_reg)); 3806 match(reg); 3807 3808 format %{ "EBX:ECX" %} 3809 interface(REG_INTER); 3810 %} 3811 3812 operand eBDPRegL( eRegL reg ) %{ 3813 constraint(ALLOC_IN_RC(ebpd_reg)); 3814 match(reg); 3815 3816 format %{ "EBP:EDI" %} 3817 interface(REG_INTER); 3818 %} 3819 // Special case for integer high multiply 3820 operand eADXRegL_low_only() %{ 3821 constraint(ALLOC_IN_RC(eadx_reg)); 3822 match(RegL); 3823 3824 format %{ "EAX" %} 3825 interface(REG_INTER); 3826 %} 3827 3828 // Flags register, used as output of compare instructions 3829 operand rFlagsReg() %{ 3830 constraint(ALLOC_IN_RC(int_flags)); 3831 match(RegFlags); 3832 3833 format %{ "EFLAGS" %} 3834 interface(REG_INTER); 3835 %} 3836 3837 // Flags register, used as output of compare instructions 3838 operand eFlagsReg() %{ 3839 constraint(ALLOC_IN_RC(int_flags)); 3840 match(RegFlags); 3841 3842 format %{ "EFLAGS" %} 3843 interface(REG_INTER); 3844 %} 3845 3846 // Flags register, used as output of FLOATING POINT compare instructions 3847 operand eFlagsRegU() %{ 3848 constraint(ALLOC_IN_RC(int_flags)); 3849 match(RegFlags); 3850 3851 format %{ "EFLAGS_U" %} 3852 interface(REG_INTER); 3853 %} 3854 3855 operand eFlagsRegUCF() %{ 3856 constraint(ALLOC_IN_RC(int_flags)); 3857 match(RegFlags); 3858 predicate(false); 3859 3860 format %{ "EFLAGS_U_CF" %} 3861 interface(REG_INTER); 3862 %} 3863 3864 // Condition Code Register used by long compare 3865 operand flagsReg_long_LTGE() %{ 3866 constraint(ALLOC_IN_RC(int_flags)); 3867 match(RegFlags); 3868 format %{ "FLAGS_LTGE" %} 3869 interface(REG_INTER); 3870 %} 3871 operand flagsReg_long_EQNE() %{ 3872 constraint(ALLOC_IN_RC(int_flags)); 3873 match(RegFlags); 3874 format %{ "FLAGS_EQNE" %} 3875 interface(REG_INTER); 3876 %} 3877 operand flagsReg_long_LEGT() %{ 3878 constraint(ALLOC_IN_RC(int_flags)); 3879 match(RegFlags); 3880 format %{ "FLAGS_LEGT" %} 3881 interface(REG_INTER); 3882 %} 3883 3884 // Condition Code Register used by unsigned long compare 3885 operand flagsReg_ulong_LTGE() %{ 3886 constraint(ALLOC_IN_RC(int_flags)); 3887 match(RegFlags); 3888 format %{ "FLAGS_U_LTGE" %} 3889 interface(REG_INTER); 3890 %} 3891 operand flagsReg_ulong_EQNE() %{ 3892 constraint(ALLOC_IN_RC(int_flags)); 3893 match(RegFlags); 3894 format %{ "FLAGS_U_EQNE" %} 3895 interface(REG_INTER); 3896 %} 3897 operand flagsReg_ulong_LEGT() %{ 3898 constraint(ALLOC_IN_RC(int_flags)); 3899 match(RegFlags); 3900 format %{ "FLAGS_U_LEGT" %} 3901 interface(REG_INTER); 3902 %} 3903 3904 // Float register operands 3905 operand regDPR() %{ 3906 predicate( UseSSE < 2 ); 3907 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3908 match(RegD); 3909 match(regDPR1); 3910 match(regDPR2); 3911 format %{ %} 3912 interface(REG_INTER); 3913 %} 3914 3915 operand regDPR1(regDPR reg) %{ 3916 predicate( UseSSE < 2 ); 3917 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 3918 match(reg); 3919 format %{ "FPR1" %} 3920 interface(REG_INTER); 3921 %} 3922 3923 operand regDPR2(regDPR reg) %{ 3924 predicate( UseSSE < 2 ); 3925 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 3926 match(reg); 3927 format %{ "FPR2" %} 3928 interface(REG_INTER); 3929 %} 3930 3931 operand regnotDPR1(regDPR reg) %{ 3932 predicate( UseSSE < 2 ); 3933 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 3934 match(reg); 3935 format %{ %} 3936 interface(REG_INTER); 3937 %} 3938 3939 // Float register operands 3940 operand regFPR() %{ 3941 predicate( UseSSE < 2 ); 3942 constraint(ALLOC_IN_RC(fp_flt_reg)); 3943 match(RegF); 3944 match(regFPR1); 3945 format %{ %} 3946 interface(REG_INTER); 3947 %} 3948 3949 // Float register operands 3950 operand regFPR1(regFPR reg) %{ 3951 predicate( UseSSE < 2 ); 3952 constraint(ALLOC_IN_RC(fp_flt_reg0)); 3953 match(reg); 3954 format %{ "FPR1" %} 3955 interface(REG_INTER); 3956 %} 3957 3958 // XMM Float register operands 3959 operand regF() %{ 3960 predicate( UseSSE>=1 ); 3961 constraint(ALLOC_IN_RC(float_reg_legacy)); 3962 match(RegF); 3963 format %{ %} 3964 interface(REG_INTER); 3965 %} 3966 3967 operand legRegF() %{ 3968 predicate( UseSSE>=1 ); 3969 constraint(ALLOC_IN_RC(float_reg_legacy)); 3970 match(RegF); 3971 format %{ %} 3972 interface(REG_INTER); 3973 %} 3974 3975 // Float register operands 3976 operand vlRegF() %{ 3977 constraint(ALLOC_IN_RC(float_reg_vl)); 3978 match(RegF); 3979 3980 format %{ %} 3981 interface(REG_INTER); 3982 %} 3983 3984 // XMM Double register operands 3985 operand regD() %{ 3986 predicate( UseSSE>=2 ); 3987 constraint(ALLOC_IN_RC(double_reg_legacy)); 3988 match(RegD); 3989 format %{ %} 3990 interface(REG_INTER); 3991 %} 3992 3993 // Double register operands 3994 operand legRegD() %{ 3995 predicate( UseSSE>=2 ); 3996 constraint(ALLOC_IN_RC(double_reg_legacy)); 3997 match(RegD); 3998 format %{ %} 3999 interface(REG_INTER); 4000 %} 4001 4002 operand vlRegD() %{ 4003 constraint(ALLOC_IN_RC(double_reg_vl)); 4004 match(RegD); 4005 4006 format %{ %} 4007 interface(REG_INTER); 4008 %} 4009 4010 //----------Memory Operands---------------------------------------------------- 4011 // Direct Memory Operand 4012 operand direct(immP addr) %{ 4013 match(addr); 4014 4015 format %{ "[$addr]" %} 4016 interface(MEMORY_INTER) %{ 4017 base(0xFFFFFFFF); 4018 index(0x4); 4019 scale(0x0); 4020 disp($addr); 4021 %} 4022 %} 4023 4024 // Indirect Memory Operand 4025 operand indirect(eRegP reg) %{ 4026 constraint(ALLOC_IN_RC(int_reg)); 4027 match(reg); 4028 4029 format %{ "[$reg]" %} 4030 interface(MEMORY_INTER) %{ 4031 base($reg); 4032 index(0x4); 4033 scale(0x0); 4034 disp(0x0); 4035 %} 4036 %} 4037 4038 // Indirect Memory Plus Short Offset Operand 4039 operand indOffset8(eRegP reg, immI8 off) %{ 4040 match(AddP reg off); 4041 4042 format %{ "[$reg + $off]" %} 4043 interface(MEMORY_INTER) %{ 4044 base($reg); 4045 index(0x4); 4046 scale(0x0); 4047 disp($off); 4048 %} 4049 %} 4050 4051 // Indirect Memory Plus Long Offset Operand 4052 operand indOffset32(eRegP reg, immI off) %{ 4053 match(AddP reg off); 4054 4055 format %{ "[$reg + $off]" %} 4056 interface(MEMORY_INTER) %{ 4057 base($reg); 4058 index(0x4); 4059 scale(0x0); 4060 disp($off); 4061 %} 4062 %} 4063 4064 // Indirect Memory Plus Long Offset Operand 4065 operand indOffset32X(rRegI reg, immP off) %{ 4066 match(AddP off reg); 4067 4068 format %{ "[$reg + $off]" %} 4069 interface(MEMORY_INTER) %{ 4070 base($reg); 4071 index(0x4); 4072 scale(0x0); 4073 disp($off); 4074 %} 4075 %} 4076 4077 // Indirect Memory Plus Index Register Plus Offset Operand 4078 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4079 match(AddP (AddP reg ireg) off); 4080 4081 op_cost(10); 4082 format %{"[$reg + $off + $ireg]" %} 4083 interface(MEMORY_INTER) %{ 4084 base($reg); 4085 index($ireg); 4086 scale(0x0); 4087 disp($off); 4088 %} 4089 %} 4090 4091 // Indirect Memory Plus Index Register Plus Offset Operand 4092 operand indIndex(eRegP reg, rRegI ireg) %{ 4093 match(AddP reg ireg); 4094 4095 op_cost(10); 4096 format %{"[$reg + $ireg]" %} 4097 interface(MEMORY_INTER) %{ 4098 base($reg); 4099 index($ireg); 4100 scale(0x0); 4101 disp(0x0); 4102 %} 4103 %} 4104 4105 // // ------------------------------------------------------------------------- 4106 // // 486 architecture doesn't support "scale * index + offset" with out a base 4107 // // ------------------------------------------------------------------------- 4108 // // Scaled Memory Operands 4109 // // Indirect Memory Times Scale Plus Offset Operand 4110 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4111 // match(AddP off (LShiftI ireg scale)); 4112 // 4113 // op_cost(10); 4114 // format %{"[$off + $ireg << $scale]" %} 4115 // interface(MEMORY_INTER) %{ 4116 // base(0x4); 4117 // index($ireg); 4118 // scale($scale); 4119 // disp($off); 4120 // %} 4121 // %} 4122 4123 // Indirect Memory Times Scale Plus Index Register 4124 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4125 match(AddP reg (LShiftI ireg scale)); 4126 4127 op_cost(10); 4128 format %{"[$reg + $ireg << $scale]" %} 4129 interface(MEMORY_INTER) %{ 4130 base($reg); 4131 index($ireg); 4132 scale($scale); 4133 disp(0x0); 4134 %} 4135 %} 4136 4137 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4138 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4139 match(AddP (AddP reg (LShiftI ireg scale)) off); 4140 4141 op_cost(10); 4142 format %{"[$reg + $off + $ireg << $scale]" %} 4143 interface(MEMORY_INTER) %{ 4144 base($reg); 4145 index($ireg); 4146 scale($scale); 4147 disp($off); 4148 %} 4149 %} 4150 4151 //----------Load Long Memory Operands------------------------------------------ 4152 // The load-long idiom will use it's address expression again after loading 4153 // the first word of the long. If the load-long destination overlaps with 4154 // registers used in the addressing expression, the 2nd half will be loaded 4155 // from a clobbered address. Fix this by requiring that load-long use 4156 // address registers that do not overlap with the load-long target. 4157 4158 // load-long support 4159 operand load_long_RegP() %{ 4160 constraint(ALLOC_IN_RC(esi_reg)); 4161 match(RegP); 4162 match(eSIRegP); 4163 op_cost(100); 4164 format %{ %} 4165 interface(REG_INTER); 4166 %} 4167 4168 // Indirect Memory Operand Long 4169 operand load_long_indirect(load_long_RegP reg) %{ 4170 constraint(ALLOC_IN_RC(esi_reg)); 4171 match(reg); 4172 4173 format %{ "[$reg]" %} 4174 interface(MEMORY_INTER) %{ 4175 base($reg); 4176 index(0x4); 4177 scale(0x0); 4178 disp(0x0); 4179 %} 4180 %} 4181 4182 // Indirect Memory Plus Long Offset Operand 4183 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4184 match(AddP reg off); 4185 4186 format %{ "[$reg + $off]" %} 4187 interface(MEMORY_INTER) %{ 4188 base($reg); 4189 index(0x4); 4190 scale(0x0); 4191 disp($off); 4192 %} 4193 %} 4194 4195 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4196 4197 4198 //----------Special Memory Operands-------------------------------------------- 4199 // Stack Slot Operand - This operand is used for loading and storing temporary 4200 // values on the stack where a match requires a value to 4201 // flow through memory. 4202 operand stackSlotP(sRegP reg) %{ 4203 constraint(ALLOC_IN_RC(stack_slots)); 4204 // No match rule because this operand is only generated in matching 4205 format %{ "[$reg]" %} 4206 interface(MEMORY_INTER) %{ 4207 base(0x4); // ESP 4208 index(0x4); // No Index 4209 scale(0x0); // No Scale 4210 disp($reg); // Stack Offset 4211 %} 4212 %} 4213 4214 operand stackSlotI(sRegI reg) %{ 4215 constraint(ALLOC_IN_RC(stack_slots)); 4216 // No match rule because this operand is only generated in matching 4217 format %{ "[$reg]" %} 4218 interface(MEMORY_INTER) %{ 4219 base(0x4); // ESP 4220 index(0x4); // No Index 4221 scale(0x0); // No Scale 4222 disp($reg); // Stack Offset 4223 %} 4224 %} 4225 4226 operand stackSlotF(sRegF reg) %{ 4227 constraint(ALLOC_IN_RC(stack_slots)); 4228 // No match rule because this operand is only generated in matching 4229 format %{ "[$reg]" %} 4230 interface(MEMORY_INTER) %{ 4231 base(0x4); // ESP 4232 index(0x4); // No Index 4233 scale(0x0); // No Scale 4234 disp($reg); // Stack Offset 4235 %} 4236 %} 4237 4238 operand stackSlotD(sRegD reg) %{ 4239 constraint(ALLOC_IN_RC(stack_slots)); 4240 // No match rule because this operand is only generated in matching 4241 format %{ "[$reg]" %} 4242 interface(MEMORY_INTER) %{ 4243 base(0x4); // ESP 4244 index(0x4); // No Index 4245 scale(0x0); // No Scale 4246 disp($reg); // Stack Offset 4247 %} 4248 %} 4249 4250 operand stackSlotL(sRegL reg) %{ 4251 constraint(ALLOC_IN_RC(stack_slots)); 4252 // No match rule because this operand is only generated in matching 4253 format %{ "[$reg]" %} 4254 interface(MEMORY_INTER) %{ 4255 base(0x4); // ESP 4256 index(0x4); // No Index 4257 scale(0x0); // No Scale 4258 disp($reg); // Stack Offset 4259 %} 4260 %} 4261 4262 //----------Conditional Branch Operands---------------------------------------- 4263 // Comparison Op - This is the operation of the comparison, and is limited to 4264 // the following set of codes: 4265 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4266 // 4267 // Other attributes of the comparison, such as unsignedness, are specified 4268 // by the comparison instruction that sets a condition code flags register. 4269 // That result is represented by a flags operand whose subtype is appropriate 4270 // to the unsignedness (etc.) of the comparison. 4271 // 4272 // Later, the instruction which matches both the Comparison Op (a Bool) and 4273 // the flags (produced by the Cmp) specifies the coding of the comparison op 4274 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4275 4276 // Comparison Code 4277 operand cmpOp() %{ 4278 match(Bool); 4279 4280 format %{ "" %} 4281 interface(COND_INTER) %{ 4282 equal(0x4, "e"); 4283 not_equal(0x5, "ne"); 4284 less(0xC, "l"); 4285 greater_equal(0xD, "ge"); 4286 less_equal(0xE, "le"); 4287 greater(0xF, "g"); 4288 overflow(0x0, "o"); 4289 no_overflow(0x1, "no"); 4290 %} 4291 %} 4292 4293 // Comparison Code, unsigned compare. Used by FP also, with 4294 // C2 (unordered) turned into GT or LT already. The other bits 4295 // C0 and C3 are turned into Carry & Zero flags. 4296 operand cmpOpU() %{ 4297 match(Bool); 4298 4299 format %{ "" %} 4300 interface(COND_INTER) %{ 4301 equal(0x4, "e"); 4302 not_equal(0x5, "ne"); 4303 less(0x2, "b"); 4304 greater_equal(0x3, "nb"); 4305 less_equal(0x6, "be"); 4306 greater(0x7, "nbe"); 4307 overflow(0x0, "o"); 4308 no_overflow(0x1, "no"); 4309 %} 4310 %} 4311 4312 // Floating comparisons that don't require any fixup for the unordered case 4313 operand cmpOpUCF() %{ 4314 match(Bool); 4315 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4316 n->as_Bool()->_test._test == BoolTest::ge || 4317 n->as_Bool()->_test._test == BoolTest::le || 4318 n->as_Bool()->_test._test == BoolTest::gt); 4319 format %{ "" %} 4320 interface(COND_INTER) %{ 4321 equal(0x4, "e"); 4322 not_equal(0x5, "ne"); 4323 less(0x2, "b"); 4324 greater_equal(0x3, "nb"); 4325 less_equal(0x6, "be"); 4326 greater(0x7, "nbe"); 4327 overflow(0x0, "o"); 4328 no_overflow(0x1, "no"); 4329 %} 4330 %} 4331 4332 4333 // Floating comparisons that can be fixed up with extra conditional jumps 4334 operand cmpOpUCF2() %{ 4335 match(Bool); 4336 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4337 n->as_Bool()->_test._test == BoolTest::eq); 4338 format %{ "" %} 4339 interface(COND_INTER) %{ 4340 equal(0x4, "e"); 4341 not_equal(0x5, "ne"); 4342 less(0x2, "b"); 4343 greater_equal(0x3, "nb"); 4344 less_equal(0x6, "be"); 4345 greater(0x7, "nbe"); 4346 overflow(0x0, "o"); 4347 no_overflow(0x1, "no"); 4348 %} 4349 %} 4350 4351 // Comparison Code for FP conditional move 4352 operand cmpOp_fcmov() %{ 4353 match(Bool); 4354 4355 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4356 n->as_Bool()->_test._test != BoolTest::no_overflow); 4357 format %{ "" %} 4358 interface(COND_INTER) %{ 4359 equal (0x0C8); 4360 not_equal (0x1C8); 4361 less (0x0C0); 4362 greater_equal(0x1C0); 4363 less_equal (0x0D0); 4364 greater (0x1D0); 4365 overflow(0x0, "o"); // not really supported by the instruction 4366 no_overflow(0x1, "no"); // not really supported by the instruction 4367 %} 4368 %} 4369 4370 // Comparison Code used in long compares 4371 operand cmpOp_commute() %{ 4372 match(Bool); 4373 4374 format %{ "" %} 4375 interface(COND_INTER) %{ 4376 equal(0x4, "e"); 4377 not_equal(0x5, "ne"); 4378 less(0xF, "g"); 4379 greater_equal(0xE, "le"); 4380 less_equal(0xD, "ge"); 4381 greater(0xC, "l"); 4382 overflow(0x0, "o"); 4383 no_overflow(0x1, "no"); 4384 %} 4385 %} 4386 4387 // Comparison Code used in unsigned long compares 4388 operand cmpOpU_commute() %{ 4389 match(Bool); 4390 4391 format %{ "" %} 4392 interface(COND_INTER) %{ 4393 equal(0x4, "e"); 4394 not_equal(0x5, "ne"); 4395 less(0x7, "nbe"); 4396 greater_equal(0x6, "be"); 4397 less_equal(0x3, "nb"); 4398 greater(0x2, "b"); 4399 overflow(0x0, "o"); 4400 no_overflow(0x1, "no"); 4401 %} 4402 %} 4403 4404 //----------OPERAND CLASSES---------------------------------------------------- 4405 // Operand Classes are groups of operands that are used as to simplify 4406 // instruction definitions by not requiring the AD writer to specify separate 4407 // instructions for every form of operand when the instruction accepts 4408 // multiple operand types with the same basic encoding and format. The classic 4409 // case of this is memory operands. 4410 4411 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4412 indIndex, indIndexScale, indIndexScaleOffset); 4413 4414 // Long memory operations are encoded in 2 instructions and a +4 offset. 4415 // This means some kind of offset is always required and you cannot use 4416 // an oop as the offset (done when working on static globals). 4417 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4418 indIndex, indIndexScale, indIndexScaleOffset); 4419 4420 4421 //----------PIPELINE----------------------------------------------------------- 4422 // Rules which define the behavior of the target architectures pipeline. 4423 pipeline %{ 4424 4425 //----------ATTRIBUTES--------------------------------------------------------- 4426 attributes %{ 4427 variable_size_instructions; // Fixed size instructions 4428 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4429 instruction_unit_size = 1; // An instruction is 1 bytes long 4430 instruction_fetch_unit_size = 16; // The processor fetches one line 4431 instruction_fetch_units = 1; // of 16 bytes 4432 4433 // List of nop instructions 4434 nops( MachNop ); 4435 %} 4436 4437 //----------RESOURCES---------------------------------------------------------- 4438 // Resources are the functional units available to the machine 4439 4440 // Generic P2/P3 pipeline 4441 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4442 // 3 instructions decoded per cycle. 4443 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4444 // 2 ALU op, only ALU0 handles mul/div instructions. 4445 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4446 MS0, MS1, MEM = MS0 | MS1, 4447 BR, FPU, 4448 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4449 4450 //----------PIPELINE DESCRIPTION----------------------------------------------- 4451 // Pipeline Description specifies the stages in the machine's pipeline 4452 4453 // Generic P2/P3 pipeline 4454 pipe_desc(S0, S1, S2, S3, S4, S5); 4455 4456 //----------PIPELINE CLASSES--------------------------------------------------- 4457 // Pipeline Classes describe the stages in which input and output are 4458 // referenced by the hardware pipeline. 4459 4460 // Naming convention: ialu or fpu 4461 // Then: _reg 4462 // Then: _reg if there is a 2nd register 4463 // Then: _long if it's a pair of instructions implementing a long 4464 // Then: _fat if it requires the big decoder 4465 // Or: _mem if it requires the big decoder and a memory unit. 4466 4467 // Integer ALU reg operation 4468 pipe_class ialu_reg(rRegI dst) %{ 4469 single_instruction; 4470 dst : S4(write); 4471 dst : S3(read); 4472 DECODE : S0; // any decoder 4473 ALU : S3; // any alu 4474 %} 4475 4476 // Long ALU reg operation 4477 pipe_class ialu_reg_long(eRegL dst) %{ 4478 instruction_count(2); 4479 dst : S4(write); 4480 dst : S3(read); 4481 DECODE : S0(2); // any 2 decoders 4482 ALU : S3(2); // both alus 4483 %} 4484 4485 // Integer ALU reg operation using big decoder 4486 pipe_class ialu_reg_fat(rRegI dst) %{ 4487 single_instruction; 4488 dst : S4(write); 4489 dst : S3(read); 4490 D0 : S0; // big decoder only 4491 ALU : S3; // any alu 4492 %} 4493 4494 // Long ALU reg operation using big decoder 4495 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4496 instruction_count(2); 4497 dst : S4(write); 4498 dst : S3(read); 4499 D0 : S0(2); // big decoder only; twice 4500 ALU : S3(2); // any 2 alus 4501 %} 4502 4503 // Integer ALU reg-reg operation 4504 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4505 single_instruction; 4506 dst : S4(write); 4507 src : S3(read); 4508 DECODE : S0; // any decoder 4509 ALU : S3; // any alu 4510 %} 4511 4512 // Long ALU reg-reg operation 4513 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4514 instruction_count(2); 4515 dst : S4(write); 4516 src : S3(read); 4517 DECODE : S0(2); // any 2 decoders 4518 ALU : S3(2); // both alus 4519 %} 4520 4521 // Integer ALU reg-reg operation 4522 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4523 single_instruction; 4524 dst : S4(write); 4525 src : S3(read); 4526 D0 : S0; // big decoder only 4527 ALU : S3; // any alu 4528 %} 4529 4530 // Long ALU reg-reg operation 4531 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4532 instruction_count(2); 4533 dst : S4(write); 4534 src : S3(read); 4535 D0 : S0(2); // big decoder only; twice 4536 ALU : S3(2); // both alus 4537 %} 4538 4539 // Integer ALU reg-mem operation 4540 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4541 single_instruction; 4542 dst : S5(write); 4543 mem : S3(read); 4544 D0 : S0; // big decoder only 4545 ALU : S4; // any alu 4546 MEM : S3; // any mem 4547 %} 4548 4549 // Long ALU reg-mem operation 4550 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4551 instruction_count(2); 4552 dst : S5(write); 4553 mem : S3(read); 4554 D0 : S0(2); // big decoder only; twice 4555 ALU : S4(2); // any 2 alus 4556 MEM : S3(2); // both mems 4557 %} 4558 4559 // Integer mem operation (prefetch) 4560 pipe_class ialu_mem(memory mem) 4561 %{ 4562 single_instruction; 4563 mem : S3(read); 4564 D0 : S0; // big decoder only 4565 MEM : S3; // any mem 4566 %} 4567 4568 // Integer Store to Memory 4569 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4570 single_instruction; 4571 mem : S3(read); 4572 src : S5(read); 4573 D0 : S0; // big decoder only 4574 ALU : S4; // any alu 4575 MEM : S3; 4576 %} 4577 4578 // Long Store to Memory 4579 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4580 instruction_count(2); 4581 mem : S3(read); 4582 src : S5(read); 4583 D0 : S0(2); // big decoder only; twice 4584 ALU : S4(2); // any 2 alus 4585 MEM : S3(2); // Both mems 4586 %} 4587 4588 // Integer Store to Memory 4589 pipe_class ialu_mem_imm(memory mem) %{ 4590 single_instruction; 4591 mem : S3(read); 4592 D0 : S0; // big decoder only 4593 ALU : S4; // any alu 4594 MEM : S3; 4595 %} 4596 4597 // Integer ALU0 reg-reg operation 4598 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4599 single_instruction; 4600 dst : S4(write); 4601 src : S3(read); 4602 D0 : S0; // Big decoder only 4603 ALU0 : S3; // only alu0 4604 %} 4605 4606 // Integer ALU0 reg-mem operation 4607 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4608 single_instruction; 4609 dst : S5(write); 4610 mem : S3(read); 4611 D0 : S0; // big decoder only 4612 ALU0 : S4; // ALU0 only 4613 MEM : S3; // any mem 4614 %} 4615 4616 // Integer ALU reg-reg operation 4617 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4618 single_instruction; 4619 cr : S4(write); 4620 src1 : S3(read); 4621 src2 : S3(read); 4622 DECODE : S0; // any decoder 4623 ALU : S3; // any alu 4624 %} 4625 4626 // Integer ALU reg-imm operation 4627 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4628 single_instruction; 4629 cr : S4(write); 4630 src1 : S3(read); 4631 DECODE : S0; // any decoder 4632 ALU : S3; // any alu 4633 %} 4634 4635 // Integer ALU reg-mem operation 4636 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4637 single_instruction; 4638 cr : S4(write); 4639 src1 : S3(read); 4640 src2 : S3(read); 4641 D0 : S0; // big decoder only 4642 ALU : S4; // any alu 4643 MEM : S3; 4644 %} 4645 4646 // Conditional move reg-reg 4647 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4648 instruction_count(4); 4649 y : S4(read); 4650 q : S3(read); 4651 p : S3(read); 4652 DECODE : S0(4); // any decoder 4653 %} 4654 4655 // Conditional move reg-reg 4656 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4657 single_instruction; 4658 dst : S4(write); 4659 src : S3(read); 4660 cr : S3(read); 4661 DECODE : S0; // any decoder 4662 %} 4663 4664 // Conditional move reg-mem 4665 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4666 single_instruction; 4667 dst : S4(write); 4668 src : S3(read); 4669 cr : S3(read); 4670 DECODE : S0; // any decoder 4671 MEM : S3; 4672 %} 4673 4674 // Conditional move reg-reg long 4675 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4676 single_instruction; 4677 dst : S4(write); 4678 src : S3(read); 4679 cr : S3(read); 4680 DECODE : S0(2); // any 2 decoders 4681 %} 4682 4683 // Conditional move double reg-reg 4684 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4685 single_instruction; 4686 dst : S4(write); 4687 src : S3(read); 4688 cr : S3(read); 4689 DECODE : S0; // any decoder 4690 %} 4691 4692 // Float reg-reg operation 4693 pipe_class fpu_reg(regDPR dst) %{ 4694 instruction_count(2); 4695 dst : S3(read); 4696 DECODE : S0(2); // any 2 decoders 4697 FPU : S3; 4698 %} 4699 4700 // Float reg-reg operation 4701 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4702 instruction_count(2); 4703 dst : S4(write); 4704 src : S3(read); 4705 DECODE : S0(2); // any 2 decoders 4706 FPU : S3; 4707 %} 4708 4709 // Float reg-reg operation 4710 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4711 instruction_count(3); 4712 dst : S4(write); 4713 src1 : S3(read); 4714 src2 : S3(read); 4715 DECODE : S0(3); // any 3 decoders 4716 FPU : S3(2); 4717 %} 4718 4719 // Float reg-reg operation 4720 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4721 instruction_count(4); 4722 dst : S4(write); 4723 src1 : S3(read); 4724 src2 : S3(read); 4725 src3 : S3(read); 4726 DECODE : S0(4); // any 3 decoders 4727 FPU : S3(2); 4728 %} 4729 4730 // Float reg-reg operation 4731 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4732 instruction_count(4); 4733 dst : S4(write); 4734 src1 : S3(read); 4735 src2 : S3(read); 4736 src3 : S3(read); 4737 DECODE : S1(3); // any 3 decoders 4738 D0 : S0; // Big decoder only 4739 FPU : S3(2); 4740 MEM : S3; 4741 %} 4742 4743 // Float reg-mem operation 4744 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4745 instruction_count(2); 4746 dst : S5(write); 4747 mem : S3(read); 4748 D0 : S0; // big decoder only 4749 DECODE : S1; // any decoder for FPU POP 4750 FPU : S4; 4751 MEM : S3; // any mem 4752 %} 4753 4754 // Float reg-mem operation 4755 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4756 instruction_count(3); 4757 dst : S5(write); 4758 src1 : S3(read); 4759 mem : S3(read); 4760 D0 : S0; // big decoder only 4761 DECODE : S1(2); // any decoder for FPU POP 4762 FPU : S4; 4763 MEM : S3; // any mem 4764 %} 4765 4766 // Float mem-reg operation 4767 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4768 instruction_count(2); 4769 src : S5(read); 4770 mem : S3(read); 4771 DECODE : S0; // any decoder for FPU PUSH 4772 D0 : S1; // big decoder only 4773 FPU : S4; 4774 MEM : S3; // any mem 4775 %} 4776 4777 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4778 instruction_count(3); 4779 src1 : S3(read); 4780 src2 : S3(read); 4781 mem : S3(read); 4782 DECODE : S0(2); // any decoder for FPU PUSH 4783 D0 : S1; // big decoder only 4784 FPU : S4; 4785 MEM : S3; // any mem 4786 %} 4787 4788 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4789 instruction_count(3); 4790 src1 : S3(read); 4791 src2 : S3(read); 4792 mem : S4(read); 4793 DECODE : S0; // any decoder for FPU PUSH 4794 D0 : S0(2); // big decoder only 4795 FPU : S4; 4796 MEM : S3(2); // any mem 4797 %} 4798 4799 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4800 instruction_count(2); 4801 src1 : S3(read); 4802 dst : S4(read); 4803 D0 : S0(2); // big decoder only 4804 MEM : S3(2); // any mem 4805 %} 4806 4807 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4808 instruction_count(3); 4809 src1 : S3(read); 4810 src2 : S3(read); 4811 dst : S4(read); 4812 D0 : S0(3); // big decoder only 4813 FPU : S4; 4814 MEM : S3(3); // any mem 4815 %} 4816 4817 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4818 instruction_count(3); 4819 src1 : S4(read); 4820 mem : S4(read); 4821 DECODE : S0; // any decoder for FPU PUSH 4822 D0 : S0(2); // big decoder only 4823 FPU : S4; 4824 MEM : S3(2); // any mem 4825 %} 4826 4827 // Float load constant 4828 pipe_class fpu_reg_con(regDPR dst) %{ 4829 instruction_count(2); 4830 dst : S5(write); 4831 D0 : S0; // big decoder only for the load 4832 DECODE : S1; // any decoder for FPU POP 4833 FPU : S4; 4834 MEM : S3; // any mem 4835 %} 4836 4837 // Float load constant 4838 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4839 instruction_count(3); 4840 dst : S5(write); 4841 src : S3(read); 4842 D0 : S0; // big decoder only for the load 4843 DECODE : S1(2); // any decoder for FPU POP 4844 FPU : S4; 4845 MEM : S3; // any mem 4846 %} 4847 4848 // UnConditional branch 4849 pipe_class pipe_jmp( label labl ) %{ 4850 single_instruction; 4851 BR : S3; 4852 %} 4853 4854 // Conditional branch 4855 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4856 single_instruction; 4857 cr : S1(read); 4858 BR : S3; 4859 %} 4860 4861 // Allocation idiom 4862 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4863 instruction_count(1); force_serialization; 4864 fixed_latency(6); 4865 heap_ptr : S3(read); 4866 DECODE : S0(3); 4867 D0 : S2; 4868 MEM : S3; 4869 ALU : S3(2); 4870 dst : S5(write); 4871 BR : S5; 4872 %} 4873 4874 // Generic big/slow expanded idiom 4875 pipe_class pipe_slow( ) %{ 4876 instruction_count(10); multiple_bundles; force_serialization; 4877 fixed_latency(100); 4878 D0 : S0(2); 4879 MEM : S3(2); 4880 %} 4881 4882 // The real do-nothing guy 4883 pipe_class empty( ) %{ 4884 instruction_count(0); 4885 %} 4886 4887 // Define the class for the Nop node 4888 define %{ 4889 MachNop = empty; 4890 %} 4891 4892 %} 4893 4894 //----------INSTRUCTIONS------------------------------------------------------- 4895 // 4896 // match -- States which machine-independent subtree may be replaced 4897 // by this instruction. 4898 // ins_cost -- The estimated cost of this instruction is used by instruction 4899 // selection to identify a minimum cost tree of machine 4900 // instructions that matches a tree of machine-independent 4901 // instructions. 4902 // format -- A string providing the disassembly for this instruction. 4903 // The value of an instruction's operand may be inserted 4904 // by referring to it with a '$' prefix. 4905 // opcode -- Three instruction opcodes may be provided. These are referred 4906 // to within an encode class as $primary, $secondary, and $tertiary 4907 // respectively. The primary opcode is commonly used to 4908 // indicate the type of machine instruction, while secondary 4909 // and tertiary are often used for prefix options or addressing 4910 // modes. 4911 // ins_encode -- A list of encode classes with parameters. The encode class 4912 // name must have been defined in an 'enc_class' specification 4913 // in the encode section of the architecture description. 4914 4915 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4916 // Load Float 4917 instruct MoveF2LEG(legRegF dst, regF src) %{ 4918 match(Set dst src); 4919 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4920 ins_encode %{ 4921 ShouldNotReachHere(); 4922 %} 4923 ins_pipe( fpu_reg_reg ); 4924 %} 4925 4926 // Load Float 4927 instruct MoveLEG2F(regF dst, legRegF src) %{ 4928 match(Set dst src); 4929 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4930 ins_encode %{ 4931 ShouldNotReachHere(); 4932 %} 4933 ins_pipe( fpu_reg_reg ); 4934 %} 4935 4936 // Load Float 4937 instruct MoveF2VL(vlRegF dst, regF src) %{ 4938 match(Set dst src); 4939 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4940 ins_encode %{ 4941 ShouldNotReachHere(); 4942 %} 4943 ins_pipe( fpu_reg_reg ); 4944 %} 4945 4946 // Load Float 4947 instruct MoveVL2F(regF dst, vlRegF src) %{ 4948 match(Set dst src); 4949 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4950 ins_encode %{ 4951 ShouldNotReachHere(); 4952 %} 4953 ins_pipe( fpu_reg_reg ); 4954 %} 4955 4956 4957 4958 // Load Double 4959 instruct MoveD2LEG(legRegD dst, regD src) %{ 4960 match(Set dst src); 4961 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 4962 ins_encode %{ 4963 ShouldNotReachHere(); 4964 %} 4965 ins_pipe( fpu_reg_reg ); 4966 %} 4967 4968 // Load Double 4969 instruct MoveLEG2D(regD dst, legRegD src) %{ 4970 match(Set dst src); 4971 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 4972 ins_encode %{ 4973 ShouldNotReachHere(); 4974 %} 4975 ins_pipe( fpu_reg_reg ); 4976 %} 4977 4978 // Load Double 4979 instruct MoveD2VL(vlRegD dst, regD src) %{ 4980 match(Set dst src); 4981 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 4982 ins_encode %{ 4983 ShouldNotReachHere(); 4984 %} 4985 ins_pipe( fpu_reg_reg ); 4986 %} 4987 4988 // Load Double 4989 instruct MoveVL2D(regD dst, vlRegD src) %{ 4990 match(Set dst src); 4991 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 4992 ins_encode %{ 4993 ShouldNotReachHere(); 4994 %} 4995 ins_pipe( fpu_reg_reg ); 4996 %} 4997 4998 //----------BSWAP-Instruction-------------------------------------------------- 4999 instruct bytes_reverse_int(rRegI dst) %{ 5000 match(Set dst (ReverseBytesI dst)); 5001 5002 format %{ "BSWAP $dst" %} 5003 opcode(0x0F, 0xC8); 5004 ins_encode( OpcP, OpcSReg(dst) ); 5005 ins_pipe( ialu_reg ); 5006 %} 5007 5008 instruct bytes_reverse_long(eRegL dst) %{ 5009 match(Set dst (ReverseBytesL dst)); 5010 5011 format %{ "BSWAP $dst.lo\n\t" 5012 "BSWAP $dst.hi\n\t" 5013 "XCHG $dst.lo $dst.hi" %} 5014 5015 ins_cost(125); 5016 ins_encode( bswap_long_bytes(dst) ); 5017 ins_pipe( ialu_reg_reg); 5018 %} 5019 5020 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5021 match(Set dst (ReverseBytesUS dst)); 5022 effect(KILL cr); 5023 5024 format %{ "BSWAP $dst\n\t" 5025 "SHR $dst,16\n\t" %} 5026 ins_encode %{ 5027 __ bswapl($dst$$Register); 5028 __ shrl($dst$$Register, 16); 5029 %} 5030 ins_pipe( ialu_reg ); 5031 %} 5032 5033 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5034 match(Set dst (ReverseBytesS dst)); 5035 effect(KILL cr); 5036 5037 format %{ "BSWAP $dst\n\t" 5038 "SAR $dst,16\n\t" %} 5039 ins_encode %{ 5040 __ bswapl($dst$$Register); 5041 __ sarl($dst$$Register, 16); 5042 %} 5043 ins_pipe( ialu_reg ); 5044 %} 5045 5046 5047 //---------- Zeros Count Instructions ------------------------------------------ 5048 5049 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5050 predicate(UseCountLeadingZerosInstruction); 5051 match(Set dst (CountLeadingZerosI src)); 5052 effect(KILL cr); 5053 5054 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5055 ins_encode %{ 5056 __ lzcntl($dst$$Register, $src$$Register); 5057 %} 5058 ins_pipe(ialu_reg); 5059 %} 5060 5061 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5062 predicate(!UseCountLeadingZerosInstruction); 5063 match(Set dst (CountLeadingZerosI src)); 5064 effect(KILL cr); 5065 5066 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5067 "JNZ skip\n\t" 5068 "MOV $dst, -1\n" 5069 "skip:\n\t" 5070 "NEG $dst\n\t" 5071 "ADD $dst, 31" %} 5072 ins_encode %{ 5073 Register Rdst = $dst$$Register; 5074 Register Rsrc = $src$$Register; 5075 Label skip; 5076 __ bsrl(Rdst, Rsrc); 5077 __ jccb(Assembler::notZero, skip); 5078 __ movl(Rdst, -1); 5079 __ bind(skip); 5080 __ negl(Rdst); 5081 __ addl(Rdst, BitsPerInt - 1); 5082 %} 5083 ins_pipe(ialu_reg); 5084 %} 5085 5086 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5087 predicate(UseCountLeadingZerosInstruction); 5088 match(Set dst (CountLeadingZerosL src)); 5089 effect(TEMP dst, KILL cr); 5090 5091 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5092 "JNC done\n\t" 5093 "LZCNT $dst, $src.lo\n\t" 5094 "ADD $dst, 32\n" 5095 "done:" %} 5096 ins_encode %{ 5097 Register Rdst = $dst$$Register; 5098 Register Rsrc = $src$$Register; 5099 Label done; 5100 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5101 __ jccb(Assembler::carryClear, done); 5102 __ lzcntl(Rdst, Rsrc); 5103 __ addl(Rdst, BitsPerInt); 5104 __ bind(done); 5105 %} 5106 ins_pipe(ialu_reg); 5107 %} 5108 5109 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5110 predicate(!UseCountLeadingZerosInstruction); 5111 match(Set dst (CountLeadingZerosL src)); 5112 effect(TEMP dst, KILL cr); 5113 5114 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5115 "JZ msw_is_zero\n\t" 5116 "ADD $dst, 32\n\t" 5117 "JMP not_zero\n" 5118 "msw_is_zero:\n\t" 5119 "BSR $dst, $src.lo\n\t" 5120 "JNZ not_zero\n\t" 5121 "MOV $dst, -1\n" 5122 "not_zero:\n\t" 5123 "NEG $dst\n\t" 5124 "ADD $dst, 63\n" %} 5125 ins_encode %{ 5126 Register Rdst = $dst$$Register; 5127 Register Rsrc = $src$$Register; 5128 Label msw_is_zero; 5129 Label not_zero; 5130 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5131 __ jccb(Assembler::zero, msw_is_zero); 5132 __ addl(Rdst, BitsPerInt); 5133 __ jmpb(not_zero); 5134 __ bind(msw_is_zero); 5135 __ bsrl(Rdst, Rsrc); 5136 __ jccb(Assembler::notZero, not_zero); 5137 __ movl(Rdst, -1); 5138 __ bind(not_zero); 5139 __ negl(Rdst); 5140 __ addl(Rdst, BitsPerLong - 1); 5141 %} 5142 ins_pipe(ialu_reg); 5143 %} 5144 5145 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5146 predicate(UseCountTrailingZerosInstruction); 5147 match(Set dst (CountTrailingZerosI src)); 5148 effect(KILL cr); 5149 5150 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5151 ins_encode %{ 5152 __ tzcntl($dst$$Register, $src$$Register); 5153 %} 5154 ins_pipe(ialu_reg); 5155 %} 5156 5157 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5158 predicate(!UseCountTrailingZerosInstruction); 5159 match(Set dst (CountTrailingZerosI src)); 5160 effect(KILL cr); 5161 5162 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5163 "JNZ done\n\t" 5164 "MOV $dst, 32\n" 5165 "done:" %} 5166 ins_encode %{ 5167 Register Rdst = $dst$$Register; 5168 Label done; 5169 __ bsfl(Rdst, $src$$Register); 5170 __ jccb(Assembler::notZero, done); 5171 __ movl(Rdst, BitsPerInt); 5172 __ bind(done); 5173 %} 5174 ins_pipe(ialu_reg); 5175 %} 5176 5177 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5178 predicate(UseCountTrailingZerosInstruction); 5179 match(Set dst (CountTrailingZerosL src)); 5180 effect(TEMP dst, KILL cr); 5181 5182 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5183 "JNC done\n\t" 5184 "TZCNT $dst, $src.hi\n\t" 5185 "ADD $dst, 32\n" 5186 "done:" %} 5187 ins_encode %{ 5188 Register Rdst = $dst$$Register; 5189 Register Rsrc = $src$$Register; 5190 Label done; 5191 __ tzcntl(Rdst, Rsrc); 5192 __ jccb(Assembler::carryClear, done); 5193 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5194 __ addl(Rdst, BitsPerInt); 5195 __ bind(done); 5196 %} 5197 ins_pipe(ialu_reg); 5198 %} 5199 5200 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5201 predicate(!UseCountTrailingZerosInstruction); 5202 match(Set dst (CountTrailingZerosL src)); 5203 effect(TEMP dst, KILL cr); 5204 5205 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5206 "JNZ done\n\t" 5207 "BSF $dst, $src.hi\n\t" 5208 "JNZ msw_not_zero\n\t" 5209 "MOV $dst, 32\n" 5210 "msw_not_zero:\n\t" 5211 "ADD $dst, 32\n" 5212 "done:" %} 5213 ins_encode %{ 5214 Register Rdst = $dst$$Register; 5215 Register Rsrc = $src$$Register; 5216 Label msw_not_zero; 5217 Label done; 5218 __ bsfl(Rdst, Rsrc); 5219 __ jccb(Assembler::notZero, done); 5220 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5221 __ jccb(Assembler::notZero, msw_not_zero); 5222 __ movl(Rdst, BitsPerInt); 5223 __ bind(msw_not_zero); 5224 __ addl(Rdst, BitsPerInt); 5225 __ bind(done); 5226 %} 5227 ins_pipe(ialu_reg); 5228 %} 5229 5230 5231 //---------- Population Count Instructions ------------------------------------- 5232 5233 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5234 predicate(UsePopCountInstruction); 5235 match(Set dst (PopCountI src)); 5236 effect(KILL cr); 5237 5238 format %{ "POPCNT $dst, $src" %} 5239 ins_encode %{ 5240 __ popcntl($dst$$Register, $src$$Register); 5241 %} 5242 ins_pipe(ialu_reg); 5243 %} 5244 5245 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5246 predicate(UsePopCountInstruction); 5247 match(Set dst (PopCountI (LoadI mem))); 5248 effect(KILL cr); 5249 5250 format %{ "POPCNT $dst, $mem" %} 5251 ins_encode %{ 5252 __ popcntl($dst$$Register, $mem$$Address); 5253 %} 5254 ins_pipe(ialu_reg); 5255 %} 5256 5257 // Note: Long.bitCount(long) returns an int. 5258 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5259 predicate(UsePopCountInstruction); 5260 match(Set dst (PopCountL src)); 5261 effect(KILL cr, TEMP tmp, TEMP dst); 5262 5263 format %{ "POPCNT $dst, $src.lo\n\t" 5264 "POPCNT $tmp, $src.hi\n\t" 5265 "ADD $dst, $tmp" %} 5266 ins_encode %{ 5267 __ popcntl($dst$$Register, $src$$Register); 5268 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5269 __ addl($dst$$Register, $tmp$$Register); 5270 %} 5271 ins_pipe(ialu_reg); 5272 %} 5273 5274 // Note: Long.bitCount(long) returns an int. 5275 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5276 predicate(UsePopCountInstruction); 5277 match(Set dst (PopCountL (LoadL mem))); 5278 effect(KILL cr, TEMP tmp, TEMP dst); 5279 5280 format %{ "POPCNT $dst, $mem\n\t" 5281 "POPCNT $tmp, $mem+4\n\t" 5282 "ADD $dst, $tmp" %} 5283 ins_encode %{ 5284 //__ popcntl($dst$$Register, $mem$$Address$$first); 5285 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5286 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5287 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5288 __ addl($dst$$Register, $tmp$$Register); 5289 %} 5290 ins_pipe(ialu_reg); 5291 %} 5292 5293 5294 //----------Load/Store/Move Instructions--------------------------------------- 5295 //----------Load Instructions-------------------------------------------------- 5296 // Load Byte (8bit signed) 5297 instruct loadB(xRegI dst, memory mem) %{ 5298 match(Set dst (LoadB mem)); 5299 5300 ins_cost(125); 5301 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5302 5303 ins_encode %{ 5304 __ movsbl($dst$$Register, $mem$$Address); 5305 %} 5306 5307 ins_pipe(ialu_reg_mem); 5308 %} 5309 5310 // Load Byte (8bit signed) into Long Register 5311 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5312 match(Set dst (ConvI2L (LoadB mem))); 5313 effect(KILL cr); 5314 5315 ins_cost(375); 5316 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5317 "MOV $dst.hi,$dst.lo\n\t" 5318 "SAR $dst.hi,7" %} 5319 5320 ins_encode %{ 5321 __ movsbl($dst$$Register, $mem$$Address); 5322 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5323 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5324 %} 5325 5326 ins_pipe(ialu_reg_mem); 5327 %} 5328 5329 // Load Unsigned Byte (8bit UNsigned) 5330 instruct loadUB(xRegI dst, memory mem) %{ 5331 match(Set dst (LoadUB mem)); 5332 5333 ins_cost(125); 5334 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5335 5336 ins_encode %{ 5337 __ movzbl($dst$$Register, $mem$$Address); 5338 %} 5339 5340 ins_pipe(ialu_reg_mem); 5341 %} 5342 5343 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5344 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5345 match(Set dst (ConvI2L (LoadUB mem))); 5346 effect(KILL cr); 5347 5348 ins_cost(250); 5349 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5350 "XOR $dst.hi,$dst.hi" %} 5351 5352 ins_encode %{ 5353 Register Rdst = $dst$$Register; 5354 __ movzbl(Rdst, $mem$$Address); 5355 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5356 %} 5357 5358 ins_pipe(ialu_reg_mem); 5359 %} 5360 5361 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5362 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5363 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5364 effect(KILL cr); 5365 5366 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5367 "XOR $dst.hi,$dst.hi\n\t" 5368 "AND $dst.lo,right_n_bits($mask, 8)" %} 5369 ins_encode %{ 5370 Register Rdst = $dst$$Register; 5371 __ movzbl(Rdst, $mem$$Address); 5372 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5373 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5374 %} 5375 ins_pipe(ialu_reg_mem); 5376 %} 5377 5378 // Load Short (16bit signed) 5379 instruct loadS(rRegI dst, memory mem) %{ 5380 match(Set dst (LoadS mem)); 5381 5382 ins_cost(125); 5383 format %{ "MOVSX $dst,$mem\t# short" %} 5384 5385 ins_encode %{ 5386 __ movswl($dst$$Register, $mem$$Address); 5387 %} 5388 5389 ins_pipe(ialu_reg_mem); 5390 %} 5391 5392 // Load Short (16 bit signed) to Byte (8 bit signed) 5393 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5394 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5395 5396 ins_cost(125); 5397 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5398 ins_encode %{ 5399 __ movsbl($dst$$Register, $mem$$Address); 5400 %} 5401 ins_pipe(ialu_reg_mem); 5402 %} 5403 5404 // Load Short (16bit signed) into Long Register 5405 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5406 match(Set dst (ConvI2L (LoadS mem))); 5407 effect(KILL cr); 5408 5409 ins_cost(375); 5410 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5411 "MOV $dst.hi,$dst.lo\n\t" 5412 "SAR $dst.hi,15" %} 5413 5414 ins_encode %{ 5415 __ movswl($dst$$Register, $mem$$Address); 5416 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5417 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5418 %} 5419 5420 ins_pipe(ialu_reg_mem); 5421 %} 5422 5423 // Load Unsigned Short/Char (16bit unsigned) 5424 instruct loadUS(rRegI dst, memory mem) %{ 5425 match(Set dst (LoadUS mem)); 5426 5427 ins_cost(125); 5428 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5429 5430 ins_encode %{ 5431 __ movzwl($dst$$Register, $mem$$Address); 5432 %} 5433 5434 ins_pipe(ialu_reg_mem); 5435 %} 5436 5437 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5438 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5439 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5440 5441 ins_cost(125); 5442 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5443 ins_encode %{ 5444 __ movsbl($dst$$Register, $mem$$Address); 5445 %} 5446 ins_pipe(ialu_reg_mem); 5447 %} 5448 5449 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5450 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5451 match(Set dst (ConvI2L (LoadUS mem))); 5452 effect(KILL cr); 5453 5454 ins_cost(250); 5455 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5456 "XOR $dst.hi,$dst.hi" %} 5457 5458 ins_encode %{ 5459 __ movzwl($dst$$Register, $mem$$Address); 5460 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5461 %} 5462 5463 ins_pipe(ialu_reg_mem); 5464 %} 5465 5466 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5467 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5468 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5469 effect(KILL cr); 5470 5471 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5472 "XOR $dst.hi,$dst.hi" %} 5473 ins_encode %{ 5474 Register Rdst = $dst$$Register; 5475 __ movzbl(Rdst, $mem$$Address); 5476 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5477 %} 5478 ins_pipe(ialu_reg_mem); 5479 %} 5480 5481 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5482 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5483 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5484 effect(KILL cr); 5485 5486 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5487 "XOR $dst.hi,$dst.hi\n\t" 5488 "AND $dst.lo,right_n_bits($mask, 16)" %} 5489 ins_encode %{ 5490 Register Rdst = $dst$$Register; 5491 __ movzwl(Rdst, $mem$$Address); 5492 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5493 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5494 %} 5495 ins_pipe(ialu_reg_mem); 5496 %} 5497 5498 // Load Integer 5499 instruct loadI(rRegI dst, memory mem) %{ 5500 match(Set dst (LoadI mem)); 5501 5502 ins_cost(125); 5503 format %{ "MOV $dst,$mem\t# int" %} 5504 5505 ins_encode %{ 5506 __ movl($dst$$Register, $mem$$Address); 5507 %} 5508 5509 ins_pipe(ialu_reg_mem); 5510 %} 5511 5512 // Load Integer (32 bit signed) to Byte (8 bit signed) 5513 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5514 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5515 5516 ins_cost(125); 5517 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5518 ins_encode %{ 5519 __ movsbl($dst$$Register, $mem$$Address); 5520 %} 5521 ins_pipe(ialu_reg_mem); 5522 %} 5523 5524 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5525 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5526 match(Set dst (AndI (LoadI mem) mask)); 5527 5528 ins_cost(125); 5529 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5530 ins_encode %{ 5531 __ movzbl($dst$$Register, $mem$$Address); 5532 %} 5533 ins_pipe(ialu_reg_mem); 5534 %} 5535 5536 // Load Integer (32 bit signed) to Short (16 bit signed) 5537 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5538 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5539 5540 ins_cost(125); 5541 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5542 ins_encode %{ 5543 __ movswl($dst$$Register, $mem$$Address); 5544 %} 5545 ins_pipe(ialu_reg_mem); 5546 %} 5547 5548 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5549 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5550 match(Set dst (AndI (LoadI mem) mask)); 5551 5552 ins_cost(125); 5553 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5554 ins_encode %{ 5555 __ movzwl($dst$$Register, $mem$$Address); 5556 %} 5557 ins_pipe(ialu_reg_mem); 5558 %} 5559 5560 // Load Integer into Long Register 5561 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5562 match(Set dst (ConvI2L (LoadI mem))); 5563 effect(KILL cr); 5564 5565 ins_cost(375); 5566 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5567 "MOV $dst.hi,$dst.lo\n\t" 5568 "SAR $dst.hi,31" %} 5569 5570 ins_encode %{ 5571 __ movl($dst$$Register, $mem$$Address); 5572 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5573 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5574 %} 5575 5576 ins_pipe(ialu_reg_mem); 5577 %} 5578 5579 // Load Integer with mask 0xFF into Long Register 5580 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5581 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5582 effect(KILL cr); 5583 5584 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5585 "XOR $dst.hi,$dst.hi" %} 5586 ins_encode %{ 5587 Register Rdst = $dst$$Register; 5588 __ movzbl(Rdst, $mem$$Address); 5589 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5590 %} 5591 ins_pipe(ialu_reg_mem); 5592 %} 5593 5594 // Load Integer with mask 0xFFFF into Long Register 5595 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5596 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5597 effect(KILL cr); 5598 5599 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5600 "XOR $dst.hi,$dst.hi" %} 5601 ins_encode %{ 5602 Register Rdst = $dst$$Register; 5603 __ movzwl(Rdst, $mem$$Address); 5604 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5605 %} 5606 ins_pipe(ialu_reg_mem); 5607 %} 5608 5609 // Load Integer with 31-bit mask into Long Register 5610 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5611 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5612 effect(KILL cr); 5613 5614 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5615 "XOR $dst.hi,$dst.hi\n\t" 5616 "AND $dst.lo,$mask" %} 5617 ins_encode %{ 5618 Register Rdst = $dst$$Register; 5619 __ movl(Rdst, $mem$$Address); 5620 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5621 __ andl(Rdst, $mask$$constant); 5622 %} 5623 ins_pipe(ialu_reg_mem); 5624 %} 5625 5626 // Load Unsigned Integer into Long Register 5627 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5628 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5629 effect(KILL cr); 5630 5631 ins_cost(250); 5632 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5633 "XOR $dst.hi,$dst.hi" %} 5634 5635 ins_encode %{ 5636 __ movl($dst$$Register, $mem$$Address); 5637 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5638 %} 5639 5640 ins_pipe(ialu_reg_mem); 5641 %} 5642 5643 // Load Long. Cannot clobber address while loading, so restrict address 5644 // register to ESI 5645 instruct loadL(eRegL dst, load_long_memory mem) %{ 5646 predicate(!((LoadLNode*)n)->require_atomic_access()); 5647 match(Set dst (LoadL mem)); 5648 5649 ins_cost(250); 5650 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5651 "MOV $dst.hi,$mem+4" %} 5652 5653 ins_encode %{ 5654 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5655 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5656 __ movl($dst$$Register, Amemlo); 5657 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5658 %} 5659 5660 ins_pipe(ialu_reg_long_mem); 5661 %} 5662 5663 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5664 // then store it down to the stack and reload on the int 5665 // side. 5666 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5667 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5668 match(Set dst (LoadL mem)); 5669 5670 ins_cost(200); 5671 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5672 "FISTp $dst" %} 5673 ins_encode(enc_loadL_volatile(mem,dst)); 5674 ins_pipe( fpu_reg_mem ); 5675 %} 5676 5677 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5678 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5679 match(Set dst (LoadL mem)); 5680 effect(TEMP tmp); 5681 ins_cost(180); 5682 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5683 "MOVSD $dst,$tmp" %} 5684 ins_encode %{ 5685 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5686 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5687 %} 5688 ins_pipe( pipe_slow ); 5689 %} 5690 5691 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5692 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5693 match(Set dst (LoadL mem)); 5694 effect(TEMP tmp); 5695 ins_cost(160); 5696 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5697 "MOVD $dst.lo,$tmp\n\t" 5698 "PSRLQ $tmp,32\n\t" 5699 "MOVD $dst.hi,$tmp" %} 5700 ins_encode %{ 5701 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5702 __ movdl($dst$$Register, $tmp$$XMMRegister); 5703 __ psrlq($tmp$$XMMRegister, 32); 5704 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5705 %} 5706 ins_pipe( pipe_slow ); 5707 %} 5708 5709 // Load Range 5710 instruct loadRange(rRegI dst, memory mem) %{ 5711 match(Set dst (LoadRange mem)); 5712 5713 ins_cost(125); 5714 format %{ "MOV $dst,$mem" %} 5715 opcode(0x8B); 5716 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5717 ins_pipe( ialu_reg_mem ); 5718 %} 5719 5720 5721 // Load Pointer 5722 instruct loadP(eRegP dst, memory mem) %{ 5723 match(Set dst (LoadP mem)); 5724 5725 ins_cost(125); 5726 format %{ "MOV $dst,$mem" %} 5727 opcode(0x8B); 5728 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5729 ins_pipe( ialu_reg_mem ); 5730 %} 5731 5732 // Load Klass Pointer 5733 instruct loadKlass(eRegP dst, memory mem) %{ 5734 match(Set dst (LoadKlass mem)); 5735 5736 ins_cost(125); 5737 format %{ "MOV $dst,$mem" %} 5738 opcode(0x8B); 5739 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5740 ins_pipe( ialu_reg_mem ); 5741 %} 5742 5743 // Load Double 5744 instruct loadDPR(regDPR dst, memory mem) %{ 5745 predicate(UseSSE<=1); 5746 match(Set dst (LoadD mem)); 5747 5748 ins_cost(150); 5749 format %{ "FLD_D ST,$mem\n\t" 5750 "FSTP $dst" %} 5751 opcode(0xDD); /* DD /0 */ 5752 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 5753 Pop_Reg_DPR(dst), ClearInstMark ); 5754 ins_pipe( fpu_reg_mem ); 5755 %} 5756 5757 // Load Double to XMM 5758 instruct loadD(regD dst, memory mem) %{ 5759 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5760 match(Set dst (LoadD mem)); 5761 ins_cost(145); 5762 format %{ "MOVSD $dst,$mem" %} 5763 ins_encode %{ 5764 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5765 %} 5766 ins_pipe( pipe_slow ); 5767 %} 5768 5769 instruct loadD_partial(regD dst, memory mem) %{ 5770 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5771 match(Set dst (LoadD mem)); 5772 ins_cost(145); 5773 format %{ "MOVLPD $dst,$mem" %} 5774 ins_encode %{ 5775 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5776 %} 5777 ins_pipe( pipe_slow ); 5778 %} 5779 5780 // Load to XMM register (single-precision floating point) 5781 // MOVSS instruction 5782 instruct loadF(regF dst, memory mem) %{ 5783 predicate(UseSSE>=1); 5784 match(Set dst (LoadF mem)); 5785 ins_cost(145); 5786 format %{ "MOVSS $dst,$mem" %} 5787 ins_encode %{ 5788 __ movflt ($dst$$XMMRegister, $mem$$Address); 5789 %} 5790 ins_pipe( pipe_slow ); 5791 %} 5792 5793 // Load Float 5794 instruct loadFPR(regFPR dst, memory mem) %{ 5795 predicate(UseSSE==0); 5796 match(Set dst (LoadF mem)); 5797 5798 ins_cost(150); 5799 format %{ "FLD_S ST,$mem\n\t" 5800 "FSTP $dst" %} 5801 opcode(0xD9); /* D9 /0 */ 5802 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 5803 Pop_Reg_FPR(dst), ClearInstMark ); 5804 ins_pipe( fpu_reg_mem ); 5805 %} 5806 5807 // Load Effective Address 5808 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5809 match(Set dst mem); 5810 5811 ins_cost(110); 5812 format %{ "LEA $dst,$mem" %} 5813 opcode(0x8D); 5814 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5815 ins_pipe( ialu_reg_reg_fat ); 5816 %} 5817 5818 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5819 match(Set dst mem); 5820 5821 ins_cost(110); 5822 format %{ "LEA $dst,$mem" %} 5823 opcode(0x8D); 5824 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5825 ins_pipe( ialu_reg_reg_fat ); 5826 %} 5827 5828 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5829 match(Set dst mem); 5830 5831 ins_cost(110); 5832 format %{ "LEA $dst,$mem" %} 5833 opcode(0x8D); 5834 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5835 ins_pipe( ialu_reg_reg_fat ); 5836 %} 5837 5838 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5839 match(Set dst mem); 5840 5841 ins_cost(110); 5842 format %{ "LEA $dst,$mem" %} 5843 opcode(0x8D); 5844 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5845 ins_pipe( ialu_reg_reg_fat ); 5846 %} 5847 5848 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5849 match(Set dst mem); 5850 5851 ins_cost(110); 5852 format %{ "LEA $dst,$mem" %} 5853 opcode(0x8D); 5854 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5855 ins_pipe( ialu_reg_reg_fat ); 5856 %} 5857 5858 // Load Constant 5859 instruct loadConI(rRegI dst, immI src) %{ 5860 match(Set dst src); 5861 5862 format %{ "MOV $dst,$src" %} 5863 ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark ); 5864 ins_pipe( ialu_reg_fat ); 5865 %} 5866 5867 // Load Constant zero 5868 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5869 match(Set dst src); 5870 effect(KILL cr); 5871 5872 ins_cost(50); 5873 format %{ "XOR $dst,$dst" %} 5874 opcode(0x33); /* + rd */ 5875 ins_encode( OpcP, RegReg( dst, dst ) ); 5876 ins_pipe( ialu_reg ); 5877 %} 5878 5879 instruct loadConP(eRegP dst, immP src) %{ 5880 match(Set dst src); 5881 5882 format %{ "MOV $dst,$src" %} 5883 opcode(0xB8); /* + rd */ 5884 ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark ); 5885 ins_pipe( ialu_reg_fat ); 5886 %} 5887 5888 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5889 match(Set dst src); 5890 effect(KILL cr); 5891 ins_cost(200); 5892 format %{ "MOV $dst.lo,$src.lo\n\t" 5893 "MOV $dst.hi,$src.hi" %} 5894 opcode(0xB8); 5895 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5896 ins_pipe( ialu_reg_long_fat ); 5897 %} 5898 5899 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5900 match(Set dst src); 5901 effect(KILL cr); 5902 ins_cost(150); 5903 format %{ "XOR $dst.lo,$dst.lo\n\t" 5904 "XOR $dst.hi,$dst.hi" %} 5905 opcode(0x33,0x33); 5906 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5907 ins_pipe( ialu_reg_long ); 5908 %} 5909 5910 // The instruction usage is guarded by predicate in operand immFPR(). 5911 instruct loadConFPR(regFPR dst, immFPR con) %{ 5912 match(Set dst con); 5913 ins_cost(125); 5914 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5915 "FSTP $dst" %} 5916 ins_encode %{ 5917 __ fld_s($constantaddress($con)); 5918 __ fstp_d($dst$$reg); 5919 %} 5920 ins_pipe(fpu_reg_con); 5921 %} 5922 5923 // The instruction usage is guarded by predicate in operand immFPR0(). 5924 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5925 match(Set dst con); 5926 ins_cost(125); 5927 format %{ "FLDZ ST\n\t" 5928 "FSTP $dst" %} 5929 ins_encode %{ 5930 __ fldz(); 5931 __ fstp_d($dst$$reg); 5932 %} 5933 ins_pipe(fpu_reg_con); 5934 %} 5935 5936 // The instruction usage is guarded by predicate in operand immFPR1(). 5937 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 5938 match(Set dst con); 5939 ins_cost(125); 5940 format %{ "FLD1 ST\n\t" 5941 "FSTP $dst" %} 5942 ins_encode %{ 5943 __ fld1(); 5944 __ fstp_d($dst$$reg); 5945 %} 5946 ins_pipe(fpu_reg_con); 5947 %} 5948 5949 // The instruction usage is guarded by predicate in operand immF(). 5950 instruct loadConF(regF dst, immF con) %{ 5951 match(Set dst con); 5952 ins_cost(125); 5953 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 5954 ins_encode %{ 5955 __ movflt($dst$$XMMRegister, $constantaddress($con)); 5956 %} 5957 ins_pipe(pipe_slow); 5958 %} 5959 5960 // The instruction usage is guarded by predicate in operand immF0(). 5961 instruct loadConF0(regF dst, immF0 src) %{ 5962 match(Set dst src); 5963 ins_cost(100); 5964 format %{ "XORPS $dst,$dst\t# float 0.0" %} 5965 ins_encode %{ 5966 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 5967 %} 5968 ins_pipe(pipe_slow); 5969 %} 5970 5971 // The instruction usage is guarded by predicate in operand immDPR(). 5972 instruct loadConDPR(regDPR dst, immDPR con) %{ 5973 match(Set dst con); 5974 ins_cost(125); 5975 5976 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 5977 "FSTP $dst" %} 5978 ins_encode %{ 5979 __ fld_d($constantaddress($con)); 5980 __ fstp_d($dst$$reg); 5981 %} 5982 ins_pipe(fpu_reg_con); 5983 %} 5984 5985 // The instruction usage is guarded by predicate in operand immDPR0(). 5986 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 5987 match(Set dst con); 5988 ins_cost(125); 5989 5990 format %{ "FLDZ ST\n\t" 5991 "FSTP $dst" %} 5992 ins_encode %{ 5993 __ fldz(); 5994 __ fstp_d($dst$$reg); 5995 %} 5996 ins_pipe(fpu_reg_con); 5997 %} 5998 5999 // The instruction usage is guarded by predicate in operand immDPR1(). 6000 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6001 match(Set dst con); 6002 ins_cost(125); 6003 6004 format %{ "FLD1 ST\n\t" 6005 "FSTP $dst" %} 6006 ins_encode %{ 6007 __ fld1(); 6008 __ fstp_d($dst$$reg); 6009 %} 6010 ins_pipe(fpu_reg_con); 6011 %} 6012 6013 // The instruction usage is guarded by predicate in operand immD(). 6014 instruct loadConD(regD dst, immD con) %{ 6015 match(Set dst con); 6016 ins_cost(125); 6017 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6018 ins_encode %{ 6019 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6020 %} 6021 ins_pipe(pipe_slow); 6022 %} 6023 6024 // The instruction usage is guarded by predicate in operand immD0(). 6025 instruct loadConD0(regD dst, immD0 src) %{ 6026 match(Set dst src); 6027 ins_cost(100); 6028 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6029 ins_encode %{ 6030 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6031 %} 6032 ins_pipe( pipe_slow ); 6033 %} 6034 6035 // Load Stack Slot 6036 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6037 match(Set dst src); 6038 ins_cost(125); 6039 6040 format %{ "MOV $dst,$src" %} 6041 opcode(0x8B); 6042 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); 6043 ins_pipe( ialu_reg_mem ); 6044 %} 6045 6046 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6047 match(Set dst src); 6048 6049 ins_cost(200); 6050 format %{ "MOV $dst,$src.lo\n\t" 6051 "MOV $dst+4,$src.hi" %} 6052 opcode(0x8B, 0x8B); 6053 ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark ); 6054 ins_pipe( ialu_mem_long_reg ); 6055 %} 6056 6057 // Load Stack Slot 6058 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6059 match(Set dst src); 6060 ins_cost(125); 6061 6062 format %{ "MOV $dst,$src" %} 6063 opcode(0x8B); 6064 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); 6065 ins_pipe( ialu_reg_mem ); 6066 %} 6067 6068 // Load Stack Slot 6069 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6070 match(Set dst src); 6071 ins_cost(125); 6072 6073 format %{ "FLD_S $src\n\t" 6074 "FSTP $dst" %} 6075 opcode(0xD9); /* D9 /0, FLD m32real */ 6076 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 6077 Pop_Reg_FPR(dst), ClearInstMark ); 6078 ins_pipe( fpu_reg_mem ); 6079 %} 6080 6081 // Load Stack Slot 6082 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6083 match(Set dst src); 6084 ins_cost(125); 6085 6086 format %{ "FLD_D $src\n\t" 6087 "FSTP $dst" %} 6088 opcode(0xDD); /* DD /0, FLD m64real */ 6089 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 6090 Pop_Reg_DPR(dst), ClearInstMark ); 6091 ins_pipe( fpu_reg_mem ); 6092 %} 6093 6094 // Prefetch instructions for allocation. 6095 // Must be safe to execute with invalid address (cannot fault). 6096 6097 instruct prefetchAlloc0( memory mem ) %{ 6098 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6099 match(PrefetchAllocation mem); 6100 ins_cost(0); 6101 size(0); 6102 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6103 ins_encode(); 6104 ins_pipe(empty); 6105 %} 6106 6107 instruct prefetchAlloc( memory mem ) %{ 6108 predicate(AllocatePrefetchInstr==3); 6109 match( PrefetchAllocation mem ); 6110 ins_cost(100); 6111 6112 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6113 ins_encode %{ 6114 __ prefetchw($mem$$Address); 6115 %} 6116 ins_pipe(ialu_mem); 6117 %} 6118 6119 instruct prefetchAllocNTA( memory mem ) %{ 6120 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6121 match(PrefetchAllocation mem); 6122 ins_cost(100); 6123 6124 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6125 ins_encode %{ 6126 __ prefetchnta($mem$$Address); 6127 %} 6128 ins_pipe(ialu_mem); 6129 %} 6130 6131 instruct prefetchAllocT0( memory mem ) %{ 6132 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6133 match(PrefetchAllocation mem); 6134 ins_cost(100); 6135 6136 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6137 ins_encode %{ 6138 __ prefetcht0($mem$$Address); 6139 %} 6140 ins_pipe(ialu_mem); 6141 %} 6142 6143 instruct prefetchAllocT2( memory mem ) %{ 6144 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6145 match(PrefetchAllocation mem); 6146 ins_cost(100); 6147 6148 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6149 ins_encode %{ 6150 __ prefetcht2($mem$$Address); 6151 %} 6152 ins_pipe(ialu_mem); 6153 %} 6154 6155 //----------Store Instructions------------------------------------------------- 6156 6157 // Store Byte 6158 instruct storeB(memory mem, xRegI src) %{ 6159 match(Set mem (StoreB mem src)); 6160 6161 ins_cost(125); 6162 format %{ "MOV8 $mem,$src" %} 6163 opcode(0x88); 6164 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6165 ins_pipe( ialu_mem_reg ); 6166 %} 6167 6168 // Store Char/Short 6169 instruct storeC(memory mem, rRegI src) %{ 6170 match(Set mem (StoreC mem src)); 6171 6172 ins_cost(125); 6173 format %{ "MOV16 $mem,$src" %} 6174 opcode(0x89, 0x66); 6175 ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark ); 6176 ins_pipe( ialu_mem_reg ); 6177 %} 6178 6179 // Store Integer 6180 instruct storeI(memory mem, rRegI src) %{ 6181 match(Set mem (StoreI mem src)); 6182 6183 ins_cost(125); 6184 format %{ "MOV $mem,$src" %} 6185 opcode(0x89); 6186 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6187 ins_pipe( ialu_mem_reg ); 6188 %} 6189 6190 // Store Long 6191 instruct storeL(long_memory mem, eRegL src) %{ 6192 predicate(!((StoreLNode*)n)->require_atomic_access()); 6193 match(Set mem (StoreL mem src)); 6194 6195 ins_cost(200); 6196 format %{ "MOV $mem,$src.lo\n\t" 6197 "MOV $mem+4,$src.hi" %} 6198 opcode(0x89, 0x89); 6199 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark ); 6200 ins_pipe( ialu_mem_long_reg ); 6201 %} 6202 6203 // Store Long to Integer 6204 instruct storeL2I(memory mem, eRegL src) %{ 6205 match(Set mem (StoreI mem (ConvL2I src))); 6206 6207 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6208 ins_encode %{ 6209 __ movl($mem$$Address, $src$$Register); 6210 %} 6211 ins_pipe(ialu_mem_reg); 6212 %} 6213 6214 // Volatile Store Long. Must be atomic, so move it into 6215 // the FP TOS and then do a 64-bit FIST. Has to probe the 6216 // target address before the store (for null-ptr checks) 6217 // so the memory operand is used twice in the encoding. 6218 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6219 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6220 match(Set mem (StoreL mem src)); 6221 effect( KILL cr ); 6222 ins_cost(400); 6223 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6224 "FILD $src\n\t" 6225 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6226 opcode(0x3B); 6227 ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark); 6228 ins_pipe( fpu_reg_mem ); 6229 %} 6230 6231 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6232 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6233 match(Set mem (StoreL mem src)); 6234 effect( TEMP tmp, KILL cr ); 6235 ins_cost(380); 6236 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6237 "MOVSD $tmp,$src\n\t" 6238 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6239 ins_encode %{ 6240 __ cmpl(rax, $mem$$Address); 6241 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6242 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6243 %} 6244 ins_pipe( pipe_slow ); 6245 %} 6246 6247 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6248 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6249 match(Set mem (StoreL mem src)); 6250 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6251 ins_cost(360); 6252 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6253 "MOVD $tmp,$src.lo\n\t" 6254 "MOVD $tmp2,$src.hi\n\t" 6255 "PUNPCKLDQ $tmp,$tmp2\n\t" 6256 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6257 ins_encode %{ 6258 __ cmpl(rax, $mem$$Address); 6259 __ movdl($tmp$$XMMRegister, $src$$Register); 6260 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6261 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6262 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6263 %} 6264 ins_pipe( pipe_slow ); 6265 %} 6266 6267 // Store Pointer; for storing unknown oops and raw pointers 6268 instruct storeP(memory mem, anyRegP src) %{ 6269 match(Set mem (StoreP mem src)); 6270 6271 ins_cost(125); 6272 format %{ "MOV $mem,$src" %} 6273 opcode(0x89); 6274 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6275 ins_pipe( ialu_mem_reg ); 6276 %} 6277 6278 // Store Integer Immediate 6279 instruct storeImmI(memory mem, immI src) %{ 6280 match(Set mem (StoreI mem src)); 6281 6282 ins_cost(150); 6283 format %{ "MOV $mem,$src" %} 6284 opcode(0xC7); /* C7 /0 */ 6285 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark); 6286 ins_pipe( ialu_mem_imm ); 6287 %} 6288 6289 // Store Short/Char Immediate 6290 instruct storeImmI16(memory mem, immI16 src) %{ 6291 predicate(UseStoreImmI16); 6292 match(Set mem (StoreC mem src)); 6293 6294 ins_cost(150); 6295 format %{ "MOV16 $mem,$src" %} 6296 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6297 ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark); 6298 ins_pipe( ialu_mem_imm ); 6299 %} 6300 6301 // Store Pointer Immediate; null pointers or constant oops that do not 6302 // need card-mark barriers. 6303 instruct storeImmP(memory mem, immP src) %{ 6304 match(Set mem (StoreP mem src)); 6305 6306 ins_cost(150); 6307 format %{ "MOV $mem,$src" %} 6308 opcode(0xC7); /* C7 /0 */ 6309 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark); 6310 ins_pipe( ialu_mem_imm ); 6311 %} 6312 6313 // Store Byte Immediate 6314 instruct storeImmB(memory mem, immI8 src) %{ 6315 match(Set mem (StoreB mem src)); 6316 6317 ins_cost(150); 6318 format %{ "MOV8 $mem,$src" %} 6319 opcode(0xC6); /* C6 /0 */ 6320 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark); 6321 ins_pipe( ialu_mem_imm ); 6322 %} 6323 6324 // Store Double 6325 instruct storeDPR( memory mem, regDPR1 src) %{ 6326 predicate(UseSSE<=1); 6327 match(Set mem (StoreD mem src)); 6328 6329 ins_cost(100); 6330 format %{ "FST_D $mem,$src" %} 6331 opcode(0xDD); /* DD /2 */ 6332 ins_encode( enc_FPR_store(mem,src) ); 6333 ins_pipe( fpu_mem_reg ); 6334 %} 6335 6336 // Store double does rounding on x86 6337 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6338 predicate(UseSSE<=1); 6339 match(Set mem (StoreD mem (RoundDouble src))); 6340 6341 ins_cost(100); 6342 format %{ "FST_D $mem,$src\t# round" %} 6343 opcode(0xDD); /* DD /2 */ 6344 ins_encode( enc_FPR_store(mem,src) ); 6345 ins_pipe( fpu_mem_reg ); 6346 %} 6347 6348 // Store XMM register to memory (double-precision floating points) 6349 // MOVSD instruction 6350 instruct storeD(memory mem, regD src) %{ 6351 predicate(UseSSE>=2); 6352 match(Set mem (StoreD mem src)); 6353 ins_cost(95); 6354 format %{ "MOVSD $mem,$src" %} 6355 ins_encode %{ 6356 __ movdbl($mem$$Address, $src$$XMMRegister); 6357 %} 6358 ins_pipe( pipe_slow ); 6359 %} 6360 6361 // Store XMM register to memory (single-precision floating point) 6362 // MOVSS instruction 6363 instruct storeF(memory mem, regF src) %{ 6364 predicate(UseSSE>=1); 6365 match(Set mem (StoreF mem src)); 6366 ins_cost(95); 6367 format %{ "MOVSS $mem,$src" %} 6368 ins_encode %{ 6369 __ movflt($mem$$Address, $src$$XMMRegister); 6370 %} 6371 ins_pipe( pipe_slow ); 6372 %} 6373 6374 6375 // Store Float 6376 instruct storeFPR( memory mem, regFPR1 src) %{ 6377 predicate(UseSSE==0); 6378 match(Set mem (StoreF mem src)); 6379 6380 ins_cost(100); 6381 format %{ "FST_S $mem,$src" %} 6382 opcode(0xD9); /* D9 /2 */ 6383 ins_encode( enc_FPR_store(mem,src) ); 6384 ins_pipe( fpu_mem_reg ); 6385 %} 6386 6387 // Store Float does rounding on x86 6388 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6389 predicate(UseSSE==0); 6390 match(Set mem (StoreF mem (RoundFloat src))); 6391 6392 ins_cost(100); 6393 format %{ "FST_S $mem,$src\t# round" %} 6394 opcode(0xD9); /* D9 /2 */ 6395 ins_encode( enc_FPR_store(mem,src) ); 6396 ins_pipe( fpu_mem_reg ); 6397 %} 6398 6399 // Store Float does rounding on x86 6400 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6401 predicate(UseSSE<=1); 6402 match(Set mem (StoreF mem (ConvD2F src))); 6403 6404 ins_cost(100); 6405 format %{ "FST_S $mem,$src\t# D-round" %} 6406 opcode(0xD9); /* D9 /2 */ 6407 ins_encode( enc_FPR_store(mem,src) ); 6408 ins_pipe( fpu_mem_reg ); 6409 %} 6410 6411 // Store immediate Float value (it is faster than store from FPU register) 6412 // The instruction usage is guarded by predicate in operand immFPR(). 6413 instruct storeFPR_imm( memory mem, immFPR src) %{ 6414 match(Set mem (StoreF mem src)); 6415 6416 ins_cost(50); 6417 format %{ "MOV $mem,$src\t# store float" %} 6418 opcode(0xC7); /* C7 /0 */ 6419 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits(src), ClearInstMark); 6420 ins_pipe( ialu_mem_imm ); 6421 %} 6422 6423 // Store immediate Float value (it is faster than store from XMM register) 6424 // The instruction usage is guarded by predicate in operand immF(). 6425 instruct storeF_imm( memory mem, immF src) %{ 6426 match(Set mem (StoreF mem src)); 6427 6428 ins_cost(50); 6429 format %{ "MOV $mem,$src\t# store float" %} 6430 opcode(0xC7); /* C7 /0 */ 6431 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits(src), ClearInstMark); 6432 ins_pipe( ialu_mem_imm ); 6433 %} 6434 6435 // Store Integer to stack slot 6436 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6437 match(Set dst src); 6438 6439 ins_cost(100); 6440 format %{ "MOV $dst,$src" %} 6441 opcode(0x89); 6442 ins_encode( OpcPRegSS( dst, src ) ); 6443 ins_pipe( ialu_mem_reg ); 6444 %} 6445 6446 // Store Integer to stack slot 6447 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6448 match(Set dst src); 6449 6450 ins_cost(100); 6451 format %{ "MOV $dst,$src" %} 6452 opcode(0x89); 6453 ins_encode( OpcPRegSS( dst, src ) ); 6454 ins_pipe( ialu_mem_reg ); 6455 %} 6456 6457 // Store Long to stack slot 6458 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6459 match(Set dst src); 6460 6461 ins_cost(200); 6462 format %{ "MOV $dst,$src.lo\n\t" 6463 "MOV $dst+4,$src.hi" %} 6464 opcode(0x89, 0x89); 6465 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); 6466 ins_pipe( ialu_mem_long_reg ); 6467 %} 6468 6469 //----------MemBar Instructions----------------------------------------------- 6470 // Memory barrier flavors 6471 6472 instruct membar_acquire() %{ 6473 match(MemBarAcquire); 6474 match(LoadFence); 6475 ins_cost(400); 6476 6477 size(0); 6478 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6479 ins_encode(); 6480 ins_pipe(empty); 6481 %} 6482 6483 instruct membar_acquire_lock() %{ 6484 match(MemBarAcquireLock); 6485 ins_cost(0); 6486 6487 size(0); 6488 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6489 ins_encode( ); 6490 ins_pipe(empty); 6491 %} 6492 6493 instruct membar_release() %{ 6494 match(MemBarRelease); 6495 match(StoreFence); 6496 ins_cost(400); 6497 6498 size(0); 6499 format %{ "MEMBAR-release ! (empty encoding)" %} 6500 ins_encode( ); 6501 ins_pipe(empty); 6502 %} 6503 6504 instruct membar_release_lock() %{ 6505 match(MemBarReleaseLock); 6506 ins_cost(0); 6507 6508 size(0); 6509 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6510 ins_encode( ); 6511 ins_pipe(empty); 6512 %} 6513 6514 instruct membar_volatile(eFlagsReg cr) %{ 6515 match(MemBarVolatile); 6516 effect(KILL cr); 6517 ins_cost(400); 6518 6519 format %{ 6520 $$template 6521 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6522 %} 6523 ins_encode %{ 6524 __ membar(Assembler::StoreLoad); 6525 %} 6526 ins_pipe(pipe_slow); 6527 %} 6528 6529 instruct unnecessary_membar_volatile() %{ 6530 match(MemBarVolatile); 6531 predicate(Matcher::post_store_load_barrier(n)); 6532 ins_cost(0); 6533 6534 size(0); 6535 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6536 ins_encode( ); 6537 ins_pipe(empty); 6538 %} 6539 6540 instruct membar_storestore() %{ 6541 match(MemBarStoreStore); 6542 match(StoreStoreFence); 6543 ins_cost(0); 6544 6545 size(0); 6546 format %{ "MEMBAR-storestore (empty encoding)" %} 6547 ins_encode( ); 6548 ins_pipe(empty); 6549 %} 6550 6551 //----------Move Instructions-------------------------------------------------- 6552 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6553 match(Set dst (CastX2P src)); 6554 format %{ "# X2P $dst, $src" %} 6555 ins_encode( /*empty encoding*/ ); 6556 ins_cost(0); 6557 ins_pipe(empty); 6558 %} 6559 6560 instruct castP2X(rRegI dst, eRegP src ) %{ 6561 match(Set dst (CastP2X src)); 6562 ins_cost(50); 6563 format %{ "MOV $dst, $src\t# CastP2X" %} 6564 ins_encode( enc_Copy( dst, src) ); 6565 ins_pipe( ialu_reg_reg ); 6566 %} 6567 6568 //----------Conditional Move--------------------------------------------------- 6569 // Conditional move 6570 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6571 predicate(!VM_Version::supports_cmov() ); 6572 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6573 ins_cost(200); 6574 format %{ "J$cop,us skip\t# signed cmove\n\t" 6575 "MOV $dst,$src\n" 6576 "skip:" %} 6577 ins_encode %{ 6578 Label Lskip; 6579 // Invert sense of branch from sense of CMOV 6580 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6581 __ movl($dst$$Register, $src$$Register); 6582 __ bind(Lskip); 6583 %} 6584 ins_pipe( pipe_cmov_reg ); 6585 %} 6586 6587 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6588 predicate(!VM_Version::supports_cmov() ); 6589 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6590 ins_cost(200); 6591 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6592 "MOV $dst,$src\n" 6593 "skip:" %} 6594 ins_encode %{ 6595 Label Lskip; 6596 // Invert sense of branch from sense of CMOV 6597 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6598 __ movl($dst$$Register, $src$$Register); 6599 __ bind(Lskip); 6600 %} 6601 ins_pipe( pipe_cmov_reg ); 6602 %} 6603 6604 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6605 predicate(VM_Version::supports_cmov() ); 6606 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6607 ins_cost(200); 6608 format %{ "CMOV$cop $dst,$src" %} 6609 opcode(0x0F,0x40); 6610 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6611 ins_pipe( pipe_cmov_reg ); 6612 %} 6613 6614 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6615 predicate(VM_Version::supports_cmov() ); 6616 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6617 ins_cost(200); 6618 format %{ "CMOV$cop $dst,$src" %} 6619 opcode(0x0F,0x40); 6620 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6621 ins_pipe( pipe_cmov_reg ); 6622 %} 6623 6624 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6625 predicate(VM_Version::supports_cmov() ); 6626 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6627 ins_cost(200); 6628 expand %{ 6629 cmovI_regU(cop, cr, dst, src); 6630 %} 6631 %} 6632 6633 // Conditional move 6634 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6635 predicate(VM_Version::supports_cmov() ); 6636 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6637 ins_cost(250); 6638 format %{ "CMOV$cop $dst,$src" %} 6639 opcode(0x0F,0x40); 6640 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); 6641 ins_pipe( pipe_cmov_mem ); 6642 %} 6643 6644 // Conditional move 6645 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6646 predicate(VM_Version::supports_cmov() ); 6647 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6648 ins_cost(250); 6649 format %{ "CMOV$cop $dst,$src" %} 6650 opcode(0x0F,0x40); 6651 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); 6652 ins_pipe( pipe_cmov_mem ); 6653 %} 6654 6655 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6656 predicate(VM_Version::supports_cmov() ); 6657 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6658 ins_cost(250); 6659 expand %{ 6660 cmovI_memU(cop, cr, dst, src); 6661 %} 6662 %} 6663 6664 // Conditional move 6665 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6666 predicate(VM_Version::supports_cmov() ); 6667 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6668 ins_cost(200); 6669 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6670 opcode(0x0F,0x40); 6671 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6672 ins_pipe( pipe_cmov_reg ); 6673 %} 6674 6675 // Conditional move (non-P6 version) 6676 // Note: a CMoveP is generated for stubs and native wrappers 6677 // regardless of whether we are on a P6, so we 6678 // emulate a cmov here 6679 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6680 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6681 ins_cost(300); 6682 format %{ "Jn$cop skip\n\t" 6683 "MOV $dst,$src\t# pointer\n" 6684 "skip:" %} 6685 opcode(0x8b); 6686 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6687 ins_pipe( pipe_cmov_reg ); 6688 %} 6689 6690 // Conditional move 6691 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6692 predicate(VM_Version::supports_cmov() ); 6693 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6694 ins_cost(200); 6695 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6696 opcode(0x0F,0x40); 6697 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6698 ins_pipe( pipe_cmov_reg ); 6699 %} 6700 6701 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6702 predicate(VM_Version::supports_cmov() ); 6703 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6704 ins_cost(200); 6705 expand %{ 6706 cmovP_regU(cop, cr, dst, src); 6707 %} 6708 %} 6709 6710 // DISABLED: Requires the ADLC to emit a bottom_type call that 6711 // correctly meets the two pointer arguments; one is an incoming 6712 // register but the other is a memory operand. ALSO appears to 6713 // be buggy with implicit null checks. 6714 // 6715 //// Conditional move 6716 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6717 // predicate(VM_Version::supports_cmov() ); 6718 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6719 // ins_cost(250); 6720 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6721 // opcode(0x0F,0x40); 6722 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6723 // ins_pipe( pipe_cmov_mem ); 6724 //%} 6725 // 6726 //// Conditional move 6727 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6728 // predicate(VM_Version::supports_cmov() ); 6729 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6730 // ins_cost(250); 6731 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6732 // opcode(0x0F,0x40); 6733 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6734 // ins_pipe( pipe_cmov_mem ); 6735 //%} 6736 6737 // Conditional move 6738 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6739 predicate(UseSSE<=1); 6740 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6741 ins_cost(200); 6742 format %{ "FCMOV$cop $dst,$src\t# double" %} 6743 opcode(0xDA); 6744 ins_encode( enc_cmov_dpr(cop,src) ); 6745 ins_pipe( pipe_cmovDPR_reg ); 6746 %} 6747 6748 // Conditional move 6749 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6750 predicate(UseSSE==0); 6751 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6752 ins_cost(200); 6753 format %{ "FCMOV$cop $dst,$src\t# float" %} 6754 opcode(0xDA); 6755 ins_encode( enc_cmov_dpr(cop,src) ); 6756 ins_pipe( pipe_cmovDPR_reg ); 6757 %} 6758 6759 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6760 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6761 predicate(UseSSE<=1); 6762 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6763 ins_cost(200); 6764 format %{ "Jn$cop skip\n\t" 6765 "MOV $dst,$src\t# double\n" 6766 "skip:" %} 6767 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6768 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6769 ins_pipe( pipe_cmovDPR_reg ); 6770 %} 6771 6772 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6773 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6774 predicate(UseSSE==0); 6775 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6776 ins_cost(200); 6777 format %{ "Jn$cop skip\n\t" 6778 "MOV $dst,$src\t# float\n" 6779 "skip:" %} 6780 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6781 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6782 ins_pipe( pipe_cmovDPR_reg ); 6783 %} 6784 6785 // No CMOVE with SSE/SSE2 6786 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6787 predicate (UseSSE>=1); 6788 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6789 ins_cost(200); 6790 format %{ "Jn$cop skip\n\t" 6791 "MOVSS $dst,$src\t# float\n" 6792 "skip:" %} 6793 ins_encode %{ 6794 Label skip; 6795 // Invert sense of branch from sense of CMOV 6796 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6797 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6798 __ bind(skip); 6799 %} 6800 ins_pipe( pipe_slow ); 6801 %} 6802 6803 // No CMOVE with SSE/SSE2 6804 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6805 predicate (UseSSE>=2); 6806 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6807 ins_cost(200); 6808 format %{ "Jn$cop skip\n\t" 6809 "MOVSD $dst,$src\t# float\n" 6810 "skip:" %} 6811 ins_encode %{ 6812 Label skip; 6813 // Invert sense of branch from sense of CMOV 6814 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6815 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6816 __ bind(skip); 6817 %} 6818 ins_pipe( pipe_slow ); 6819 %} 6820 6821 // unsigned version 6822 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6823 predicate (UseSSE>=1); 6824 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6825 ins_cost(200); 6826 format %{ "Jn$cop skip\n\t" 6827 "MOVSS $dst,$src\t# float\n" 6828 "skip:" %} 6829 ins_encode %{ 6830 Label skip; 6831 // Invert sense of branch from sense of CMOV 6832 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6833 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6834 __ bind(skip); 6835 %} 6836 ins_pipe( pipe_slow ); 6837 %} 6838 6839 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6840 predicate (UseSSE>=1); 6841 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6842 ins_cost(200); 6843 expand %{ 6844 fcmovF_regU(cop, cr, dst, src); 6845 %} 6846 %} 6847 6848 // unsigned version 6849 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6850 predicate (UseSSE>=2); 6851 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6852 ins_cost(200); 6853 format %{ "Jn$cop skip\n\t" 6854 "MOVSD $dst,$src\t# float\n" 6855 "skip:" %} 6856 ins_encode %{ 6857 Label skip; 6858 // Invert sense of branch from sense of CMOV 6859 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6860 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6861 __ bind(skip); 6862 %} 6863 ins_pipe( pipe_slow ); 6864 %} 6865 6866 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6867 predicate (UseSSE>=2); 6868 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6869 ins_cost(200); 6870 expand %{ 6871 fcmovD_regU(cop, cr, dst, src); 6872 %} 6873 %} 6874 6875 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6876 predicate(VM_Version::supports_cmov() ); 6877 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6878 ins_cost(200); 6879 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6880 "CMOV$cop $dst.hi,$src.hi" %} 6881 opcode(0x0F,0x40); 6882 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6883 ins_pipe( pipe_cmov_reg_long ); 6884 %} 6885 6886 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6887 predicate(VM_Version::supports_cmov() ); 6888 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6889 ins_cost(200); 6890 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6891 "CMOV$cop $dst.hi,$src.hi" %} 6892 opcode(0x0F,0x40); 6893 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6894 ins_pipe( pipe_cmov_reg_long ); 6895 %} 6896 6897 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6898 predicate(VM_Version::supports_cmov() ); 6899 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6900 ins_cost(200); 6901 expand %{ 6902 cmovL_regU(cop, cr, dst, src); 6903 %} 6904 %} 6905 6906 //----------Arithmetic Instructions-------------------------------------------- 6907 //----------Addition Instructions---------------------------------------------- 6908 6909 // Integer Addition Instructions 6910 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 6911 match(Set dst (AddI dst src)); 6912 effect(KILL cr); 6913 6914 size(2); 6915 format %{ "ADD $dst,$src" %} 6916 opcode(0x03); 6917 ins_encode( OpcP, RegReg( dst, src) ); 6918 ins_pipe( ialu_reg_reg ); 6919 %} 6920 6921 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 6922 match(Set dst (AddI dst src)); 6923 effect(KILL cr); 6924 6925 format %{ "ADD $dst,$src" %} 6926 opcode(0x81, 0x00); /* /0 id */ 6927 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 6928 ins_pipe( ialu_reg ); 6929 %} 6930 6931 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 6932 predicate(UseIncDec); 6933 match(Set dst (AddI dst src)); 6934 effect(KILL cr); 6935 6936 size(1); 6937 format %{ "INC $dst" %} 6938 opcode(0x40); /* */ 6939 ins_encode( Opc_plus( primary, dst ) ); 6940 ins_pipe( ialu_reg ); 6941 %} 6942 6943 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 6944 match(Set dst (AddI src0 src1)); 6945 ins_cost(110); 6946 6947 format %{ "LEA $dst,[$src0 + $src1]" %} 6948 opcode(0x8D); /* 0x8D /r */ 6949 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); 6950 ins_pipe( ialu_reg_reg ); 6951 %} 6952 6953 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 6954 match(Set dst (AddP src0 src1)); 6955 ins_cost(110); 6956 6957 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 6958 opcode(0x8D); /* 0x8D /r */ 6959 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); 6960 ins_pipe( ialu_reg_reg ); 6961 %} 6962 6963 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 6964 predicate(UseIncDec); 6965 match(Set dst (AddI dst src)); 6966 effect(KILL cr); 6967 6968 size(1); 6969 format %{ "DEC $dst" %} 6970 opcode(0x48); /* */ 6971 ins_encode( Opc_plus( primary, dst ) ); 6972 ins_pipe( ialu_reg ); 6973 %} 6974 6975 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 6976 match(Set dst (AddP dst src)); 6977 effect(KILL cr); 6978 6979 size(2); 6980 format %{ "ADD $dst,$src" %} 6981 opcode(0x03); 6982 ins_encode( OpcP, RegReg( dst, src) ); 6983 ins_pipe( ialu_reg_reg ); 6984 %} 6985 6986 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 6987 match(Set dst (AddP dst src)); 6988 effect(KILL cr); 6989 6990 format %{ "ADD $dst,$src" %} 6991 opcode(0x81,0x00); /* Opcode 81 /0 id */ 6992 // ins_encode( RegImm( dst, src) ); 6993 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 6994 ins_pipe( ialu_reg ); 6995 %} 6996 6997 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 6998 match(Set dst (AddI dst (LoadI src))); 6999 effect(KILL cr); 7000 7001 ins_cost(150); 7002 format %{ "ADD $dst,$src" %} 7003 opcode(0x03); 7004 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 7005 ins_pipe( ialu_reg_mem ); 7006 %} 7007 7008 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7009 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7010 effect(KILL cr); 7011 7012 ins_cost(150); 7013 format %{ "ADD $dst,$src" %} 7014 opcode(0x01); /* Opcode 01 /r */ 7015 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 7016 ins_pipe( ialu_mem_reg ); 7017 %} 7018 7019 // Add Memory with Immediate 7020 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7021 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7022 effect(KILL cr); 7023 7024 ins_cost(125); 7025 format %{ "ADD $dst,$src" %} 7026 opcode(0x81); /* Opcode 81 /0 id */ 7027 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark ); 7028 ins_pipe( ialu_mem_imm ); 7029 %} 7030 7031 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7032 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7033 effect(KILL cr); 7034 7035 ins_cost(125); 7036 format %{ "INC $dst" %} 7037 opcode(0xFF); /* Opcode FF /0 */ 7038 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark); 7039 ins_pipe( ialu_mem_imm ); 7040 %} 7041 7042 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7043 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7044 effect(KILL cr); 7045 7046 ins_cost(125); 7047 format %{ "DEC $dst" %} 7048 opcode(0xFF); /* Opcode FF /1 */ 7049 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark); 7050 ins_pipe( ialu_mem_imm ); 7051 %} 7052 7053 7054 instruct checkCastPP( eRegP dst ) %{ 7055 match(Set dst (CheckCastPP dst)); 7056 7057 size(0); 7058 format %{ "#checkcastPP of $dst" %} 7059 ins_encode( /*empty encoding*/ ); 7060 ins_pipe( empty ); 7061 %} 7062 7063 instruct castPP( eRegP dst ) %{ 7064 match(Set dst (CastPP dst)); 7065 format %{ "#castPP of $dst" %} 7066 ins_encode( /*empty encoding*/ ); 7067 ins_pipe( empty ); 7068 %} 7069 7070 instruct castII( rRegI dst ) %{ 7071 match(Set dst (CastII dst)); 7072 format %{ "#castII of $dst" %} 7073 ins_encode( /*empty encoding*/ ); 7074 ins_cost(0); 7075 ins_pipe( empty ); 7076 %} 7077 7078 instruct castLL( eRegL dst ) %{ 7079 match(Set dst (CastLL dst)); 7080 format %{ "#castLL of $dst" %} 7081 ins_encode( /*empty encoding*/ ); 7082 ins_cost(0); 7083 ins_pipe( empty ); 7084 %} 7085 7086 instruct castFF( regF dst ) %{ 7087 predicate(UseSSE >= 1); 7088 match(Set dst (CastFF dst)); 7089 format %{ "#castFF of $dst" %} 7090 ins_encode( /*empty encoding*/ ); 7091 ins_cost(0); 7092 ins_pipe( empty ); 7093 %} 7094 7095 instruct castDD( regD dst ) %{ 7096 predicate(UseSSE >= 2); 7097 match(Set dst (CastDD dst)); 7098 format %{ "#castDD of $dst" %} 7099 ins_encode( /*empty encoding*/ ); 7100 ins_cost(0); 7101 ins_pipe( empty ); 7102 %} 7103 7104 instruct castFF_PR( regFPR dst ) %{ 7105 predicate(UseSSE < 1); 7106 match(Set dst (CastFF dst)); 7107 format %{ "#castFF of $dst" %} 7108 ins_encode( /*empty encoding*/ ); 7109 ins_cost(0); 7110 ins_pipe( empty ); 7111 %} 7112 7113 instruct castDD_PR( regDPR dst ) %{ 7114 predicate(UseSSE < 2); 7115 match(Set dst (CastDD dst)); 7116 format %{ "#castDD of $dst" %} 7117 ins_encode( /*empty encoding*/ ); 7118 ins_cost(0); 7119 ins_pipe( empty ); 7120 %} 7121 7122 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7123 7124 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7125 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7126 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7127 effect(KILL cr, KILL oldval); 7128 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7129 "MOV $res,0\n\t" 7130 "JNE,s fail\n\t" 7131 "MOV $res,1\n" 7132 "fail:" %} 7133 ins_encode( enc_cmpxchg8(mem_ptr), 7134 enc_flags_ne_to_boolean(res) ); 7135 ins_pipe( pipe_cmpxchg ); 7136 %} 7137 7138 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7139 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7140 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7141 effect(KILL cr, KILL oldval); 7142 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7143 "MOV $res,0\n\t" 7144 "JNE,s fail\n\t" 7145 "MOV $res,1\n" 7146 "fail:" %} 7147 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7148 ins_pipe( pipe_cmpxchg ); 7149 %} 7150 7151 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7152 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7153 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7154 effect(KILL cr, KILL oldval); 7155 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7156 "MOV $res,0\n\t" 7157 "JNE,s fail\n\t" 7158 "MOV $res,1\n" 7159 "fail:" %} 7160 ins_encode( enc_cmpxchgb(mem_ptr), 7161 enc_flags_ne_to_boolean(res) ); 7162 ins_pipe( pipe_cmpxchg ); 7163 %} 7164 7165 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7166 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7167 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7168 effect(KILL cr, KILL oldval); 7169 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7170 "MOV $res,0\n\t" 7171 "JNE,s fail\n\t" 7172 "MOV $res,1\n" 7173 "fail:" %} 7174 ins_encode( enc_cmpxchgw(mem_ptr), 7175 enc_flags_ne_to_boolean(res) ); 7176 ins_pipe( pipe_cmpxchg ); 7177 %} 7178 7179 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7180 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7181 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7182 effect(KILL cr, KILL oldval); 7183 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7184 "MOV $res,0\n\t" 7185 "JNE,s fail\n\t" 7186 "MOV $res,1\n" 7187 "fail:" %} 7188 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7189 ins_pipe( pipe_cmpxchg ); 7190 %} 7191 7192 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7193 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7194 effect(KILL cr); 7195 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7196 ins_encode( enc_cmpxchg8(mem_ptr) ); 7197 ins_pipe( pipe_cmpxchg ); 7198 %} 7199 7200 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7201 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7202 effect(KILL cr); 7203 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7204 ins_encode( enc_cmpxchg(mem_ptr) ); 7205 ins_pipe( pipe_cmpxchg ); 7206 %} 7207 7208 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7209 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7210 effect(KILL cr); 7211 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7212 ins_encode( enc_cmpxchgb(mem_ptr) ); 7213 ins_pipe( pipe_cmpxchg ); 7214 %} 7215 7216 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7217 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7218 effect(KILL cr); 7219 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7220 ins_encode( enc_cmpxchgw(mem_ptr) ); 7221 ins_pipe( pipe_cmpxchg ); 7222 %} 7223 7224 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7225 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7226 effect(KILL cr); 7227 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7228 ins_encode( enc_cmpxchg(mem_ptr) ); 7229 ins_pipe( pipe_cmpxchg ); 7230 %} 7231 7232 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7233 predicate(n->as_LoadStore()->result_not_used()); 7234 match(Set dummy (GetAndAddB mem add)); 7235 effect(KILL cr); 7236 format %{ "ADDB [$mem],$add" %} 7237 ins_encode %{ 7238 __ lock(); 7239 __ addb($mem$$Address, $add$$constant); 7240 %} 7241 ins_pipe( pipe_cmpxchg ); 7242 %} 7243 7244 // Important to match to xRegI: only 8-bit regs. 7245 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7246 match(Set newval (GetAndAddB mem newval)); 7247 effect(KILL cr); 7248 format %{ "XADDB [$mem],$newval" %} 7249 ins_encode %{ 7250 __ lock(); 7251 __ xaddb($mem$$Address, $newval$$Register); 7252 %} 7253 ins_pipe( pipe_cmpxchg ); 7254 %} 7255 7256 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7257 predicate(n->as_LoadStore()->result_not_used()); 7258 match(Set dummy (GetAndAddS mem add)); 7259 effect(KILL cr); 7260 format %{ "ADDS [$mem],$add" %} 7261 ins_encode %{ 7262 __ lock(); 7263 __ addw($mem$$Address, $add$$constant); 7264 %} 7265 ins_pipe( pipe_cmpxchg ); 7266 %} 7267 7268 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7269 match(Set newval (GetAndAddS mem newval)); 7270 effect(KILL cr); 7271 format %{ "XADDS [$mem],$newval" %} 7272 ins_encode %{ 7273 __ lock(); 7274 __ xaddw($mem$$Address, $newval$$Register); 7275 %} 7276 ins_pipe( pipe_cmpxchg ); 7277 %} 7278 7279 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7280 predicate(n->as_LoadStore()->result_not_used()); 7281 match(Set dummy (GetAndAddI mem add)); 7282 effect(KILL cr); 7283 format %{ "ADDL [$mem],$add" %} 7284 ins_encode %{ 7285 __ lock(); 7286 __ addl($mem$$Address, $add$$constant); 7287 %} 7288 ins_pipe( pipe_cmpxchg ); 7289 %} 7290 7291 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7292 match(Set newval (GetAndAddI mem newval)); 7293 effect(KILL cr); 7294 format %{ "XADDL [$mem],$newval" %} 7295 ins_encode %{ 7296 __ lock(); 7297 __ xaddl($mem$$Address, $newval$$Register); 7298 %} 7299 ins_pipe( pipe_cmpxchg ); 7300 %} 7301 7302 // Important to match to xRegI: only 8-bit regs. 7303 instruct xchgB( memory mem, xRegI newval) %{ 7304 match(Set newval (GetAndSetB mem newval)); 7305 format %{ "XCHGB $newval,[$mem]" %} 7306 ins_encode %{ 7307 __ xchgb($newval$$Register, $mem$$Address); 7308 %} 7309 ins_pipe( pipe_cmpxchg ); 7310 %} 7311 7312 instruct xchgS( memory mem, rRegI newval) %{ 7313 match(Set newval (GetAndSetS mem newval)); 7314 format %{ "XCHGW $newval,[$mem]" %} 7315 ins_encode %{ 7316 __ xchgw($newval$$Register, $mem$$Address); 7317 %} 7318 ins_pipe( pipe_cmpxchg ); 7319 %} 7320 7321 instruct xchgI( memory mem, rRegI newval) %{ 7322 match(Set newval (GetAndSetI mem newval)); 7323 format %{ "XCHGL $newval,[$mem]" %} 7324 ins_encode %{ 7325 __ xchgl($newval$$Register, $mem$$Address); 7326 %} 7327 ins_pipe( pipe_cmpxchg ); 7328 %} 7329 7330 instruct xchgP( memory mem, pRegP newval) %{ 7331 match(Set newval (GetAndSetP mem newval)); 7332 format %{ "XCHGL $newval,[$mem]" %} 7333 ins_encode %{ 7334 __ xchgl($newval$$Register, $mem$$Address); 7335 %} 7336 ins_pipe( pipe_cmpxchg ); 7337 %} 7338 7339 //----------Subtraction Instructions------------------------------------------- 7340 7341 // Integer Subtraction Instructions 7342 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7343 match(Set dst (SubI dst src)); 7344 effect(KILL cr); 7345 7346 size(2); 7347 format %{ "SUB $dst,$src" %} 7348 opcode(0x2B); 7349 ins_encode( OpcP, RegReg( dst, src) ); 7350 ins_pipe( ialu_reg_reg ); 7351 %} 7352 7353 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7354 match(Set dst (SubI dst src)); 7355 effect(KILL cr); 7356 7357 format %{ "SUB $dst,$src" %} 7358 opcode(0x81,0x05); /* Opcode 81 /5 */ 7359 // ins_encode( RegImm( dst, src) ); 7360 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7361 ins_pipe( ialu_reg ); 7362 %} 7363 7364 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7365 match(Set dst (SubI dst (LoadI src))); 7366 effect(KILL cr); 7367 7368 ins_cost(150); 7369 format %{ "SUB $dst,$src" %} 7370 opcode(0x2B); 7371 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 7372 ins_pipe( ialu_reg_mem ); 7373 %} 7374 7375 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7376 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7377 effect(KILL cr); 7378 7379 ins_cost(150); 7380 format %{ "SUB $dst,$src" %} 7381 opcode(0x29); /* Opcode 29 /r */ 7382 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 7383 ins_pipe( ialu_mem_reg ); 7384 %} 7385 7386 // Subtract from a pointer 7387 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7388 match(Set dst (AddP dst (SubI zero src))); 7389 effect(KILL cr); 7390 7391 size(2); 7392 format %{ "SUB $dst,$src" %} 7393 opcode(0x2B); 7394 ins_encode( OpcP, RegReg( dst, src) ); 7395 ins_pipe( ialu_reg_reg ); 7396 %} 7397 7398 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7399 match(Set dst (SubI zero dst)); 7400 effect(KILL cr); 7401 7402 size(2); 7403 format %{ "NEG $dst" %} 7404 opcode(0xF7,0x03); // Opcode F7 /3 7405 ins_encode( OpcP, RegOpc( dst ) ); 7406 ins_pipe( ialu_reg ); 7407 %} 7408 7409 //----------Multiplication/Division Instructions------------------------------- 7410 // Integer Multiplication Instructions 7411 // Multiply Register 7412 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7413 match(Set dst (MulI dst src)); 7414 effect(KILL cr); 7415 7416 size(3); 7417 ins_cost(300); 7418 format %{ "IMUL $dst,$src" %} 7419 opcode(0xAF, 0x0F); 7420 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7421 ins_pipe( ialu_reg_reg_alu0 ); 7422 %} 7423 7424 // Multiply 32-bit Immediate 7425 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7426 match(Set dst (MulI src imm)); 7427 effect(KILL cr); 7428 7429 ins_cost(300); 7430 format %{ "IMUL $dst,$src,$imm" %} 7431 opcode(0x69); /* 69 /r id */ 7432 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7433 ins_pipe( ialu_reg_reg_alu0 ); 7434 %} 7435 7436 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7437 match(Set dst src); 7438 effect(KILL cr); 7439 7440 // Note that this is artificially increased to make it more expensive than loadConL 7441 ins_cost(250); 7442 format %{ "MOV EAX,$src\t// low word only" %} 7443 opcode(0xB8); 7444 ins_encode( LdImmL_Lo(dst, src) ); 7445 ins_pipe( ialu_reg_fat ); 7446 %} 7447 7448 // Multiply by 32-bit Immediate, taking the shifted high order results 7449 // (special case for shift by 32) 7450 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7451 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7452 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7453 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7454 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7455 effect(USE src1, KILL cr); 7456 7457 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7458 ins_cost(0*100 + 1*400 - 150); 7459 format %{ "IMUL EDX:EAX,$src1" %} 7460 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7461 ins_pipe( pipe_slow ); 7462 %} 7463 7464 // Multiply by 32-bit Immediate, taking the shifted high order results 7465 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7466 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7467 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7468 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7469 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7470 effect(USE src1, KILL cr); 7471 7472 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7473 ins_cost(1*100 + 1*400 - 150); 7474 format %{ "IMUL EDX:EAX,$src1\n\t" 7475 "SAR EDX,$cnt-32" %} 7476 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7477 ins_pipe( pipe_slow ); 7478 %} 7479 7480 // Multiply Memory 32-bit Immediate 7481 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7482 match(Set dst (MulI (LoadI src) imm)); 7483 effect(KILL cr); 7484 7485 ins_cost(300); 7486 format %{ "IMUL $dst,$src,$imm" %} 7487 opcode(0x69); /* 69 /r id */ 7488 ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark ); 7489 ins_pipe( ialu_reg_mem_alu0 ); 7490 %} 7491 7492 // Multiply Memory 7493 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7494 match(Set dst (MulI dst (LoadI src))); 7495 effect(KILL cr); 7496 7497 ins_cost(350); 7498 format %{ "IMUL $dst,$src" %} 7499 opcode(0xAF, 0x0F); 7500 ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark ); 7501 ins_pipe( ialu_reg_mem_alu0 ); 7502 %} 7503 7504 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7505 %{ 7506 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7507 effect(KILL cr, KILL src2); 7508 7509 expand %{ mulI_eReg(dst, src1, cr); 7510 mulI_eReg(src2, src3, cr); 7511 addI_eReg(dst, src2, cr); %} 7512 %} 7513 7514 // Multiply Register Int to Long 7515 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7516 // Basic Idea: long = (long)int * (long)int 7517 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7518 effect(DEF dst, USE src, USE src1, KILL flags); 7519 7520 ins_cost(300); 7521 format %{ "IMUL $dst,$src1" %} 7522 7523 ins_encode( long_int_multiply( dst, src1 ) ); 7524 ins_pipe( ialu_reg_reg_alu0 ); 7525 %} 7526 7527 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7528 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7529 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7530 effect(KILL flags); 7531 7532 ins_cost(300); 7533 format %{ "MUL $dst,$src1" %} 7534 7535 ins_encode( long_uint_multiply(dst, src1) ); 7536 ins_pipe( ialu_reg_reg_alu0 ); 7537 %} 7538 7539 // Multiply Register Long 7540 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7541 match(Set dst (MulL dst src)); 7542 effect(KILL cr, TEMP tmp); 7543 ins_cost(4*100+3*400); 7544 // Basic idea: lo(result) = lo(x_lo * y_lo) 7545 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7546 format %{ "MOV $tmp,$src.lo\n\t" 7547 "IMUL $tmp,EDX\n\t" 7548 "MOV EDX,$src.hi\n\t" 7549 "IMUL EDX,EAX\n\t" 7550 "ADD $tmp,EDX\n\t" 7551 "MUL EDX:EAX,$src.lo\n\t" 7552 "ADD EDX,$tmp" %} 7553 ins_encode( long_multiply( dst, src, tmp ) ); 7554 ins_pipe( pipe_slow ); 7555 %} 7556 7557 // Multiply Register Long where the left operand's high 32 bits are zero 7558 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7559 predicate(is_operand_hi32_zero(n->in(1))); 7560 match(Set dst (MulL dst src)); 7561 effect(KILL cr, TEMP tmp); 7562 ins_cost(2*100+2*400); 7563 // Basic idea: lo(result) = lo(x_lo * y_lo) 7564 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7565 format %{ "MOV $tmp,$src.hi\n\t" 7566 "IMUL $tmp,EAX\n\t" 7567 "MUL EDX:EAX,$src.lo\n\t" 7568 "ADD EDX,$tmp" %} 7569 ins_encode %{ 7570 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7571 __ imull($tmp$$Register, rax); 7572 __ mull($src$$Register); 7573 __ addl(rdx, $tmp$$Register); 7574 %} 7575 ins_pipe( pipe_slow ); 7576 %} 7577 7578 // Multiply Register Long where the right operand's high 32 bits are zero 7579 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7580 predicate(is_operand_hi32_zero(n->in(2))); 7581 match(Set dst (MulL dst src)); 7582 effect(KILL cr, TEMP tmp); 7583 ins_cost(2*100+2*400); 7584 // Basic idea: lo(result) = lo(x_lo * y_lo) 7585 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7586 format %{ "MOV $tmp,$src.lo\n\t" 7587 "IMUL $tmp,EDX\n\t" 7588 "MUL EDX:EAX,$src.lo\n\t" 7589 "ADD EDX,$tmp" %} 7590 ins_encode %{ 7591 __ movl($tmp$$Register, $src$$Register); 7592 __ imull($tmp$$Register, rdx); 7593 __ mull($src$$Register); 7594 __ addl(rdx, $tmp$$Register); 7595 %} 7596 ins_pipe( pipe_slow ); 7597 %} 7598 7599 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7600 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7601 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7602 match(Set dst (MulL dst src)); 7603 effect(KILL cr); 7604 ins_cost(1*400); 7605 // Basic idea: lo(result) = lo(x_lo * y_lo) 7606 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7607 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7608 ins_encode %{ 7609 __ mull($src$$Register); 7610 %} 7611 ins_pipe( pipe_slow ); 7612 %} 7613 7614 // Multiply Register Long by small constant 7615 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7616 match(Set dst (MulL dst src)); 7617 effect(KILL cr, TEMP tmp); 7618 ins_cost(2*100+2*400); 7619 size(12); 7620 // Basic idea: lo(result) = lo(src * EAX) 7621 // hi(result) = hi(src * EAX) + lo(src * EDX) 7622 format %{ "IMUL $tmp,EDX,$src\n\t" 7623 "MOV EDX,$src\n\t" 7624 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7625 "ADD EDX,$tmp" %} 7626 ins_encode( long_multiply_con( dst, src, tmp ) ); 7627 ins_pipe( pipe_slow ); 7628 %} 7629 7630 // Integer DIV with Register 7631 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7632 match(Set rax (DivI rax div)); 7633 effect(KILL rdx, KILL cr); 7634 size(26); 7635 ins_cost(30*100+10*100); 7636 format %{ "CMP EAX,0x80000000\n\t" 7637 "JNE,s normal\n\t" 7638 "XOR EDX,EDX\n\t" 7639 "CMP ECX,-1\n\t" 7640 "JE,s done\n" 7641 "normal: CDQ\n\t" 7642 "IDIV $div\n\t" 7643 "done:" %} 7644 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7645 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7646 ins_pipe( ialu_reg_reg_alu0 ); 7647 %} 7648 7649 // Divide Register Long 7650 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7651 match(Set dst (DivL src1 src2)); 7652 effect(CALL); 7653 ins_cost(10000); 7654 format %{ "PUSH $src1.hi\n\t" 7655 "PUSH $src1.lo\n\t" 7656 "PUSH $src2.hi\n\t" 7657 "PUSH $src2.lo\n\t" 7658 "CALL SharedRuntime::ldiv\n\t" 7659 "ADD ESP,16" %} 7660 ins_encode( long_div(src1,src2) ); 7661 ins_pipe( pipe_slow ); 7662 %} 7663 7664 // Integer DIVMOD with Register, both quotient and mod results 7665 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7666 match(DivModI rax div); 7667 effect(KILL cr); 7668 size(26); 7669 ins_cost(30*100+10*100); 7670 format %{ "CMP EAX,0x80000000\n\t" 7671 "JNE,s normal\n\t" 7672 "XOR EDX,EDX\n\t" 7673 "CMP ECX,-1\n\t" 7674 "JE,s done\n" 7675 "normal: CDQ\n\t" 7676 "IDIV $div\n\t" 7677 "done:" %} 7678 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7679 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7680 ins_pipe( pipe_slow ); 7681 %} 7682 7683 // Integer MOD with Register 7684 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7685 match(Set rdx (ModI rax div)); 7686 effect(KILL rax, KILL cr); 7687 7688 size(26); 7689 ins_cost(300); 7690 format %{ "CDQ\n\t" 7691 "IDIV $div" %} 7692 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7693 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7694 ins_pipe( ialu_reg_reg_alu0 ); 7695 %} 7696 7697 // Remainder Register Long 7698 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7699 match(Set dst (ModL src1 src2)); 7700 effect(CALL); 7701 ins_cost(10000); 7702 format %{ "PUSH $src1.hi\n\t" 7703 "PUSH $src1.lo\n\t" 7704 "PUSH $src2.hi\n\t" 7705 "PUSH $src2.lo\n\t" 7706 "CALL SharedRuntime::lrem\n\t" 7707 "ADD ESP,16" %} 7708 ins_encode( long_mod(src1,src2) ); 7709 ins_pipe( pipe_slow ); 7710 %} 7711 7712 // Divide Register Long (no special case since divisor != -1) 7713 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7714 match(Set dst (DivL dst imm)); 7715 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7716 ins_cost(1000); 7717 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7718 "XOR $tmp2,$tmp2\n\t" 7719 "CMP $tmp,EDX\n\t" 7720 "JA,s fast\n\t" 7721 "MOV $tmp2,EAX\n\t" 7722 "MOV EAX,EDX\n\t" 7723 "MOV EDX,0\n\t" 7724 "JLE,s pos\n\t" 7725 "LNEG EAX : $tmp2\n\t" 7726 "DIV $tmp # unsigned division\n\t" 7727 "XCHG EAX,$tmp2\n\t" 7728 "DIV $tmp\n\t" 7729 "LNEG $tmp2 : EAX\n\t" 7730 "JMP,s done\n" 7731 "pos:\n\t" 7732 "DIV $tmp\n\t" 7733 "XCHG EAX,$tmp2\n" 7734 "fast:\n\t" 7735 "DIV $tmp\n" 7736 "done:\n\t" 7737 "MOV EDX,$tmp2\n\t" 7738 "NEG EDX:EAX # if $imm < 0" %} 7739 ins_encode %{ 7740 int con = (int)$imm$$constant; 7741 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7742 int pcon = (con > 0) ? con : -con; 7743 Label Lfast, Lpos, Ldone; 7744 7745 __ movl($tmp$$Register, pcon); 7746 __ xorl($tmp2$$Register,$tmp2$$Register); 7747 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7748 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7749 7750 __ movl($tmp2$$Register, $dst$$Register); // save 7751 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7752 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7753 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7754 7755 // Negative dividend. 7756 // convert value to positive to use unsigned division 7757 __ lneg($dst$$Register, $tmp2$$Register); 7758 __ divl($tmp$$Register); 7759 __ xchgl($dst$$Register, $tmp2$$Register); 7760 __ divl($tmp$$Register); 7761 // revert result back to negative 7762 __ lneg($tmp2$$Register, $dst$$Register); 7763 __ jmpb(Ldone); 7764 7765 __ bind(Lpos); 7766 __ divl($tmp$$Register); // Use unsigned division 7767 __ xchgl($dst$$Register, $tmp2$$Register); 7768 // Fallthrow for final divide, tmp2 has 32 bit hi result 7769 7770 __ bind(Lfast); 7771 // fast path: src is positive 7772 __ divl($tmp$$Register); // Use unsigned division 7773 7774 __ bind(Ldone); 7775 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7776 if (con < 0) { 7777 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7778 } 7779 %} 7780 ins_pipe( pipe_slow ); 7781 %} 7782 7783 // Remainder Register Long (remainder fit into 32 bits) 7784 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7785 match(Set dst (ModL dst imm)); 7786 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7787 ins_cost(1000); 7788 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7789 "CMP $tmp,EDX\n\t" 7790 "JA,s fast\n\t" 7791 "MOV $tmp2,EAX\n\t" 7792 "MOV EAX,EDX\n\t" 7793 "MOV EDX,0\n\t" 7794 "JLE,s pos\n\t" 7795 "LNEG EAX : $tmp2\n\t" 7796 "DIV $tmp # unsigned division\n\t" 7797 "MOV EAX,$tmp2\n\t" 7798 "DIV $tmp\n\t" 7799 "NEG EDX\n\t" 7800 "JMP,s done\n" 7801 "pos:\n\t" 7802 "DIV $tmp\n\t" 7803 "MOV EAX,$tmp2\n" 7804 "fast:\n\t" 7805 "DIV $tmp\n" 7806 "done:\n\t" 7807 "MOV EAX,EDX\n\t" 7808 "SAR EDX,31\n\t" %} 7809 ins_encode %{ 7810 int con = (int)$imm$$constant; 7811 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7812 int pcon = (con > 0) ? con : -con; 7813 Label Lfast, Lpos, Ldone; 7814 7815 __ movl($tmp$$Register, pcon); 7816 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7817 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7818 7819 __ movl($tmp2$$Register, $dst$$Register); // save 7820 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7821 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7822 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7823 7824 // Negative dividend. 7825 // convert value to positive to use unsigned division 7826 __ lneg($dst$$Register, $tmp2$$Register); 7827 __ divl($tmp$$Register); 7828 __ movl($dst$$Register, $tmp2$$Register); 7829 __ divl($tmp$$Register); 7830 // revert remainder back to negative 7831 __ negl(HIGH_FROM_LOW($dst$$Register)); 7832 __ jmpb(Ldone); 7833 7834 __ bind(Lpos); 7835 __ divl($tmp$$Register); 7836 __ movl($dst$$Register, $tmp2$$Register); 7837 7838 __ bind(Lfast); 7839 // fast path: src is positive 7840 __ divl($tmp$$Register); 7841 7842 __ bind(Ldone); 7843 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7844 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7845 7846 %} 7847 ins_pipe( pipe_slow ); 7848 %} 7849 7850 // Integer Shift Instructions 7851 // Shift Left by one 7852 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7853 match(Set dst (LShiftI dst shift)); 7854 effect(KILL cr); 7855 7856 size(2); 7857 format %{ "SHL $dst,$shift" %} 7858 opcode(0xD1, 0x4); /* D1 /4 */ 7859 ins_encode( OpcP, RegOpc( dst ) ); 7860 ins_pipe( ialu_reg ); 7861 %} 7862 7863 // Shift Left by 8-bit immediate 7864 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7865 match(Set dst (LShiftI dst shift)); 7866 effect(KILL cr); 7867 7868 size(3); 7869 format %{ "SHL $dst,$shift" %} 7870 opcode(0xC1, 0x4); /* C1 /4 ib */ 7871 ins_encode( RegOpcImm( dst, shift) ); 7872 ins_pipe( ialu_reg ); 7873 %} 7874 7875 // Shift Left by variable 7876 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7877 match(Set dst (LShiftI dst shift)); 7878 effect(KILL cr); 7879 7880 size(2); 7881 format %{ "SHL $dst,$shift" %} 7882 opcode(0xD3, 0x4); /* D3 /4 */ 7883 ins_encode( OpcP, RegOpc( dst ) ); 7884 ins_pipe( ialu_reg_reg ); 7885 %} 7886 7887 // Arithmetic shift right by one 7888 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7889 match(Set dst (RShiftI dst shift)); 7890 effect(KILL cr); 7891 7892 size(2); 7893 format %{ "SAR $dst,$shift" %} 7894 opcode(0xD1, 0x7); /* D1 /7 */ 7895 ins_encode( OpcP, RegOpc( dst ) ); 7896 ins_pipe( ialu_reg ); 7897 %} 7898 7899 // Arithmetic shift right by one 7900 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 7901 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7902 effect(KILL cr); 7903 format %{ "SAR $dst,$shift" %} 7904 opcode(0xD1, 0x7); /* D1 /7 */ 7905 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark ); 7906 ins_pipe( ialu_mem_imm ); 7907 %} 7908 7909 // Arithmetic Shift Right by 8-bit immediate 7910 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7911 match(Set dst (RShiftI dst shift)); 7912 effect(KILL cr); 7913 7914 size(3); 7915 format %{ "SAR $dst,$shift" %} 7916 opcode(0xC1, 0x7); /* C1 /7 ib */ 7917 ins_encode( RegOpcImm( dst, shift ) ); 7918 ins_pipe( ialu_mem_imm ); 7919 %} 7920 7921 // Arithmetic Shift Right by 8-bit immediate 7922 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7923 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7924 effect(KILL cr); 7925 7926 format %{ "SAR $dst,$shift" %} 7927 opcode(0xC1, 0x7); /* C1 /7 ib */ 7928 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark ); 7929 ins_pipe( ialu_mem_imm ); 7930 %} 7931 7932 // Arithmetic Shift Right by variable 7933 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7934 match(Set dst (RShiftI dst shift)); 7935 effect(KILL cr); 7936 7937 size(2); 7938 format %{ "SAR $dst,$shift" %} 7939 opcode(0xD3, 0x7); /* D3 /7 */ 7940 ins_encode( OpcP, RegOpc( dst ) ); 7941 ins_pipe( ialu_reg_reg ); 7942 %} 7943 7944 // Logical shift right by one 7945 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7946 match(Set dst (URShiftI dst shift)); 7947 effect(KILL cr); 7948 7949 size(2); 7950 format %{ "SHR $dst,$shift" %} 7951 opcode(0xD1, 0x5); /* D1 /5 */ 7952 ins_encode( OpcP, RegOpc( dst ) ); 7953 ins_pipe( ialu_reg ); 7954 %} 7955 7956 // Logical Shift Right by 8-bit immediate 7957 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7958 match(Set dst (URShiftI dst shift)); 7959 effect(KILL cr); 7960 7961 size(3); 7962 format %{ "SHR $dst,$shift" %} 7963 opcode(0xC1, 0x5); /* C1 /5 ib */ 7964 ins_encode( RegOpcImm( dst, shift) ); 7965 ins_pipe( ialu_reg ); 7966 %} 7967 7968 7969 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7970 // This idiom is used by the compiler for the i2b bytecode. 7971 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7972 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7973 7974 size(3); 7975 format %{ "MOVSX $dst,$src :8" %} 7976 ins_encode %{ 7977 __ movsbl($dst$$Register, $src$$Register); 7978 %} 7979 ins_pipe(ialu_reg_reg); 7980 %} 7981 7982 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7983 // This idiom is used by the compiler the i2s bytecode. 7984 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7985 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7986 7987 size(3); 7988 format %{ "MOVSX $dst,$src :16" %} 7989 ins_encode %{ 7990 __ movswl($dst$$Register, $src$$Register); 7991 %} 7992 ins_pipe(ialu_reg_reg); 7993 %} 7994 7995 7996 // Logical Shift Right by variable 7997 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7998 match(Set dst (URShiftI dst shift)); 7999 effect(KILL cr); 8000 8001 size(2); 8002 format %{ "SHR $dst,$shift" %} 8003 opcode(0xD3, 0x5); /* D3 /5 */ 8004 ins_encode( OpcP, RegOpc( dst ) ); 8005 ins_pipe( ialu_reg_reg ); 8006 %} 8007 8008 8009 //----------Logical Instructions----------------------------------------------- 8010 //----------Integer Logical Instructions--------------------------------------- 8011 // And Instructions 8012 // And Register with Register 8013 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8014 match(Set dst (AndI dst src)); 8015 effect(KILL cr); 8016 8017 size(2); 8018 format %{ "AND $dst,$src" %} 8019 opcode(0x23); 8020 ins_encode( OpcP, RegReg( dst, src) ); 8021 ins_pipe( ialu_reg_reg ); 8022 %} 8023 8024 // And Register with Immediate 8025 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8026 match(Set dst (AndI dst src)); 8027 effect(KILL cr); 8028 8029 format %{ "AND $dst,$src" %} 8030 opcode(0x81,0x04); /* Opcode 81 /4 */ 8031 // ins_encode( RegImm( dst, src) ); 8032 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8033 ins_pipe( ialu_reg ); 8034 %} 8035 8036 // And Register with Memory 8037 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8038 match(Set dst (AndI dst (LoadI src))); 8039 effect(KILL cr); 8040 8041 ins_cost(150); 8042 format %{ "AND $dst,$src" %} 8043 opcode(0x23); 8044 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 8045 ins_pipe( ialu_reg_mem ); 8046 %} 8047 8048 // And Memory with Register 8049 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8050 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8051 effect(KILL cr); 8052 8053 ins_cost(150); 8054 format %{ "AND $dst,$src" %} 8055 opcode(0x21); /* Opcode 21 /r */ 8056 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8057 ins_pipe( ialu_mem_reg ); 8058 %} 8059 8060 // And Memory with Immediate 8061 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8062 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8063 effect(KILL cr); 8064 8065 ins_cost(125); 8066 format %{ "AND $dst,$src" %} 8067 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8068 // ins_encode( MemImm( dst, src) ); 8069 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8070 ins_pipe( ialu_mem_imm ); 8071 %} 8072 8073 // BMI1 instructions 8074 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8075 match(Set dst (AndI (XorI src1 minus_1) src2)); 8076 predicate(UseBMI1Instructions); 8077 effect(KILL cr); 8078 8079 format %{ "ANDNL $dst, $src1, $src2" %} 8080 8081 ins_encode %{ 8082 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8083 %} 8084 ins_pipe(ialu_reg); 8085 %} 8086 8087 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8088 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8089 predicate(UseBMI1Instructions); 8090 effect(KILL cr); 8091 8092 ins_cost(125); 8093 format %{ "ANDNL $dst, $src1, $src2" %} 8094 8095 ins_encode %{ 8096 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8097 %} 8098 ins_pipe(ialu_reg_mem); 8099 %} 8100 8101 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8102 match(Set dst (AndI (SubI imm_zero src) src)); 8103 predicate(UseBMI1Instructions); 8104 effect(KILL cr); 8105 8106 format %{ "BLSIL $dst, $src" %} 8107 8108 ins_encode %{ 8109 __ blsil($dst$$Register, $src$$Register); 8110 %} 8111 ins_pipe(ialu_reg); 8112 %} 8113 8114 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8115 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8116 predicate(UseBMI1Instructions); 8117 effect(KILL cr); 8118 8119 ins_cost(125); 8120 format %{ "BLSIL $dst, $src" %} 8121 8122 ins_encode %{ 8123 __ blsil($dst$$Register, $src$$Address); 8124 %} 8125 ins_pipe(ialu_reg_mem); 8126 %} 8127 8128 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8129 %{ 8130 match(Set dst (XorI (AddI src minus_1) src)); 8131 predicate(UseBMI1Instructions); 8132 effect(KILL cr); 8133 8134 format %{ "BLSMSKL $dst, $src" %} 8135 8136 ins_encode %{ 8137 __ blsmskl($dst$$Register, $src$$Register); 8138 %} 8139 8140 ins_pipe(ialu_reg); 8141 %} 8142 8143 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8144 %{ 8145 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8146 predicate(UseBMI1Instructions); 8147 effect(KILL cr); 8148 8149 ins_cost(125); 8150 format %{ "BLSMSKL $dst, $src" %} 8151 8152 ins_encode %{ 8153 __ blsmskl($dst$$Register, $src$$Address); 8154 %} 8155 8156 ins_pipe(ialu_reg_mem); 8157 %} 8158 8159 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8160 %{ 8161 match(Set dst (AndI (AddI src minus_1) src) ); 8162 predicate(UseBMI1Instructions); 8163 effect(KILL cr); 8164 8165 format %{ "BLSRL $dst, $src" %} 8166 8167 ins_encode %{ 8168 __ blsrl($dst$$Register, $src$$Register); 8169 %} 8170 8171 ins_pipe(ialu_reg); 8172 %} 8173 8174 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8175 %{ 8176 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8177 predicate(UseBMI1Instructions); 8178 effect(KILL cr); 8179 8180 ins_cost(125); 8181 format %{ "BLSRL $dst, $src" %} 8182 8183 ins_encode %{ 8184 __ blsrl($dst$$Register, $src$$Address); 8185 %} 8186 8187 ins_pipe(ialu_reg_mem); 8188 %} 8189 8190 // Or Instructions 8191 // Or Register with Register 8192 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8193 match(Set dst (OrI dst src)); 8194 effect(KILL cr); 8195 8196 size(2); 8197 format %{ "OR $dst,$src" %} 8198 opcode(0x0B); 8199 ins_encode( OpcP, RegReg( dst, src) ); 8200 ins_pipe( ialu_reg_reg ); 8201 %} 8202 8203 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8204 match(Set dst (OrI dst (CastP2X src))); 8205 effect(KILL cr); 8206 8207 size(2); 8208 format %{ "OR $dst,$src" %} 8209 opcode(0x0B); 8210 ins_encode( OpcP, RegReg( dst, src) ); 8211 ins_pipe( ialu_reg_reg ); 8212 %} 8213 8214 8215 // Or Register with Immediate 8216 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8217 match(Set dst (OrI dst src)); 8218 effect(KILL cr); 8219 8220 format %{ "OR $dst,$src" %} 8221 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8222 // ins_encode( RegImm( dst, src) ); 8223 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8224 ins_pipe( ialu_reg ); 8225 %} 8226 8227 // Or Register with Memory 8228 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8229 match(Set dst (OrI dst (LoadI src))); 8230 effect(KILL cr); 8231 8232 ins_cost(150); 8233 format %{ "OR $dst,$src" %} 8234 opcode(0x0B); 8235 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 8236 ins_pipe( ialu_reg_mem ); 8237 %} 8238 8239 // Or Memory with Register 8240 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8241 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8242 effect(KILL cr); 8243 8244 ins_cost(150); 8245 format %{ "OR $dst,$src" %} 8246 opcode(0x09); /* Opcode 09 /r */ 8247 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8248 ins_pipe( ialu_mem_reg ); 8249 %} 8250 8251 // Or Memory with Immediate 8252 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8253 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8254 effect(KILL cr); 8255 8256 ins_cost(125); 8257 format %{ "OR $dst,$src" %} 8258 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8259 // ins_encode( MemImm( dst, src) ); 8260 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8261 ins_pipe( ialu_mem_imm ); 8262 %} 8263 8264 // ROL/ROR 8265 // ROL expand 8266 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8267 effect(USE_DEF dst, USE shift, KILL cr); 8268 8269 format %{ "ROL $dst, $shift" %} 8270 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8271 ins_encode( OpcP, RegOpc( dst )); 8272 ins_pipe( ialu_reg ); 8273 %} 8274 8275 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8276 effect(USE_DEF dst, USE shift, KILL cr); 8277 8278 format %{ "ROL $dst, $shift" %} 8279 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8280 ins_encode( RegOpcImm(dst, shift) ); 8281 ins_pipe(ialu_reg); 8282 %} 8283 8284 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8285 effect(USE_DEF dst, USE shift, KILL cr); 8286 8287 format %{ "ROL $dst, $shift" %} 8288 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8289 ins_encode(OpcP, RegOpc(dst)); 8290 ins_pipe( ialu_reg_reg ); 8291 %} 8292 // end of ROL expand 8293 8294 // ROL 32bit by one once 8295 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8296 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8297 8298 expand %{ 8299 rolI_eReg_imm1(dst, lshift, cr); 8300 %} 8301 %} 8302 8303 // ROL 32bit var by imm8 once 8304 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8305 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8306 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8307 8308 expand %{ 8309 rolI_eReg_imm8(dst, lshift, cr); 8310 %} 8311 %} 8312 8313 // ROL 32bit var by var once 8314 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8315 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8316 8317 expand %{ 8318 rolI_eReg_CL(dst, shift, cr); 8319 %} 8320 %} 8321 8322 // ROL 32bit var by var once 8323 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8324 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8325 8326 expand %{ 8327 rolI_eReg_CL(dst, shift, cr); 8328 %} 8329 %} 8330 8331 // ROR expand 8332 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8333 effect(USE_DEF dst, USE shift, KILL cr); 8334 8335 format %{ "ROR $dst, $shift" %} 8336 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8337 ins_encode( OpcP, RegOpc( dst ) ); 8338 ins_pipe( ialu_reg ); 8339 %} 8340 8341 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8342 effect (USE_DEF dst, USE shift, KILL cr); 8343 8344 format %{ "ROR $dst, $shift" %} 8345 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8346 ins_encode( RegOpcImm(dst, shift) ); 8347 ins_pipe( ialu_reg ); 8348 %} 8349 8350 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8351 effect(USE_DEF dst, USE shift, KILL cr); 8352 8353 format %{ "ROR $dst, $shift" %} 8354 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8355 ins_encode(OpcP, RegOpc(dst)); 8356 ins_pipe( ialu_reg_reg ); 8357 %} 8358 // end of ROR expand 8359 8360 // ROR right once 8361 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8362 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8363 8364 expand %{ 8365 rorI_eReg_imm1(dst, rshift, cr); 8366 %} 8367 %} 8368 8369 // ROR 32bit by immI8 once 8370 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8371 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8372 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8373 8374 expand %{ 8375 rorI_eReg_imm8(dst, rshift, cr); 8376 %} 8377 %} 8378 8379 // ROR 32bit var by var once 8380 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8381 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8382 8383 expand %{ 8384 rorI_eReg_CL(dst, shift, cr); 8385 %} 8386 %} 8387 8388 // ROR 32bit var by var once 8389 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8390 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8391 8392 expand %{ 8393 rorI_eReg_CL(dst, shift, cr); 8394 %} 8395 %} 8396 8397 // Xor Instructions 8398 // Xor Register with Register 8399 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8400 match(Set dst (XorI dst src)); 8401 effect(KILL cr); 8402 8403 size(2); 8404 format %{ "XOR $dst,$src" %} 8405 opcode(0x33); 8406 ins_encode( OpcP, RegReg( dst, src) ); 8407 ins_pipe( ialu_reg_reg ); 8408 %} 8409 8410 // Xor Register with Immediate -1 8411 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8412 match(Set dst (XorI dst imm)); 8413 8414 size(2); 8415 format %{ "NOT $dst" %} 8416 ins_encode %{ 8417 __ notl($dst$$Register); 8418 %} 8419 ins_pipe( ialu_reg ); 8420 %} 8421 8422 // Xor Register with Immediate 8423 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8424 match(Set dst (XorI dst src)); 8425 effect(KILL cr); 8426 8427 format %{ "XOR $dst,$src" %} 8428 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8429 // ins_encode( RegImm( dst, src) ); 8430 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8431 ins_pipe( ialu_reg ); 8432 %} 8433 8434 // Xor Register with Memory 8435 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8436 match(Set dst (XorI dst (LoadI src))); 8437 effect(KILL cr); 8438 8439 ins_cost(150); 8440 format %{ "XOR $dst,$src" %} 8441 opcode(0x33); 8442 ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark ); 8443 ins_pipe( ialu_reg_mem ); 8444 %} 8445 8446 // Xor Memory with Register 8447 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8448 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8449 effect(KILL cr); 8450 8451 ins_cost(150); 8452 format %{ "XOR $dst,$src" %} 8453 opcode(0x31); /* Opcode 31 /r */ 8454 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8455 ins_pipe( ialu_mem_reg ); 8456 %} 8457 8458 // Xor Memory with Immediate 8459 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8460 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8461 effect(KILL cr); 8462 8463 ins_cost(125); 8464 format %{ "XOR $dst,$src" %} 8465 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8466 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8467 ins_pipe( ialu_mem_imm ); 8468 %} 8469 8470 //----------Convert Int to Boolean--------------------------------------------- 8471 8472 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8473 effect( DEF dst, USE src ); 8474 format %{ "MOV $dst,$src" %} 8475 ins_encode( enc_Copy( dst, src) ); 8476 ins_pipe( ialu_reg_reg ); 8477 %} 8478 8479 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8480 effect( USE_DEF dst, USE src, KILL cr ); 8481 8482 size(4); 8483 format %{ "NEG $dst\n\t" 8484 "ADC $dst,$src" %} 8485 ins_encode( neg_reg(dst), 8486 OpcRegReg(0x13,dst,src) ); 8487 ins_pipe( ialu_reg_reg_long ); 8488 %} 8489 8490 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8491 match(Set dst (Conv2B src)); 8492 8493 expand %{ 8494 movI_nocopy(dst,src); 8495 ci2b(dst,src,cr); 8496 %} 8497 %} 8498 8499 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8500 effect( DEF dst, USE src ); 8501 format %{ "MOV $dst,$src" %} 8502 ins_encode( enc_Copy( dst, src) ); 8503 ins_pipe( ialu_reg_reg ); 8504 %} 8505 8506 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8507 effect( USE_DEF dst, USE src, KILL cr ); 8508 format %{ "NEG $dst\n\t" 8509 "ADC $dst,$src" %} 8510 ins_encode( neg_reg(dst), 8511 OpcRegReg(0x13,dst,src) ); 8512 ins_pipe( ialu_reg_reg_long ); 8513 %} 8514 8515 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8516 match(Set dst (Conv2B src)); 8517 8518 expand %{ 8519 movP_nocopy(dst,src); 8520 cp2b(dst,src,cr); 8521 %} 8522 %} 8523 8524 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8525 match(Set dst (CmpLTMask p q)); 8526 effect(KILL cr); 8527 ins_cost(400); 8528 8529 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8530 format %{ "XOR $dst,$dst\n\t" 8531 "CMP $p,$q\n\t" 8532 "SETlt $dst\n\t" 8533 "NEG $dst" %} 8534 ins_encode %{ 8535 Register Rp = $p$$Register; 8536 Register Rq = $q$$Register; 8537 Register Rd = $dst$$Register; 8538 Label done; 8539 __ xorl(Rd, Rd); 8540 __ cmpl(Rp, Rq); 8541 __ setb(Assembler::less, Rd); 8542 __ negl(Rd); 8543 %} 8544 8545 ins_pipe(pipe_slow); 8546 %} 8547 8548 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8549 match(Set dst (CmpLTMask dst zero)); 8550 effect(DEF dst, KILL cr); 8551 ins_cost(100); 8552 8553 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8554 ins_encode %{ 8555 __ sarl($dst$$Register, 31); 8556 %} 8557 ins_pipe(ialu_reg); 8558 %} 8559 8560 /* better to save a register than avoid a branch */ 8561 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8562 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8563 effect(KILL cr); 8564 ins_cost(400); 8565 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8566 "JGE done\n\t" 8567 "ADD $p,$y\n" 8568 "done: " %} 8569 ins_encode %{ 8570 Register Rp = $p$$Register; 8571 Register Rq = $q$$Register; 8572 Register Ry = $y$$Register; 8573 Label done; 8574 __ subl(Rp, Rq); 8575 __ jccb(Assembler::greaterEqual, done); 8576 __ addl(Rp, Ry); 8577 __ bind(done); 8578 %} 8579 8580 ins_pipe(pipe_cmplt); 8581 %} 8582 8583 /* better to save a register than avoid a branch */ 8584 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8585 match(Set y (AndI (CmpLTMask p q) y)); 8586 effect(KILL cr); 8587 8588 ins_cost(300); 8589 8590 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8591 "JLT done\n\t" 8592 "XORL $y, $y\n" 8593 "done: " %} 8594 ins_encode %{ 8595 Register Rp = $p$$Register; 8596 Register Rq = $q$$Register; 8597 Register Ry = $y$$Register; 8598 Label done; 8599 __ cmpl(Rp, Rq); 8600 __ jccb(Assembler::less, done); 8601 __ xorl(Ry, Ry); 8602 __ bind(done); 8603 %} 8604 8605 ins_pipe(pipe_cmplt); 8606 %} 8607 8608 /* If I enable this, I encourage spilling in the inner loop of compress. 8609 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8610 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8611 */ 8612 //----------Overflow Math Instructions----------------------------------------- 8613 8614 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8615 %{ 8616 match(Set cr (OverflowAddI op1 op2)); 8617 effect(DEF cr, USE_KILL op1, USE op2); 8618 8619 format %{ "ADD $op1, $op2\t# overflow check int" %} 8620 8621 ins_encode %{ 8622 __ addl($op1$$Register, $op2$$Register); 8623 %} 8624 ins_pipe(ialu_reg_reg); 8625 %} 8626 8627 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8628 %{ 8629 match(Set cr (OverflowAddI op1 op2)); 8630 effect(DEF cr, USE_KILL op1, USE op2); 8631 8632 format %{ "ADD $op1, $op2\t# overflow check int" %} 8633 8634 ins_encode %{ 8635 __ addl($op1$$Register, $op2$$constant); 8636 %} 8637 ins_pipe(ialu_reg_reg); 8638 %} 8639 8640 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8641 %{ 8642 match(Set cr (OverflowSubI op1 op2)); 8643 8644 format %{ "CMP $op1, $op2\t# overflow check int" %} 8645 ins_encode %{ 8646 __ cmpl($op1$$Register, $op2$$Register); 8647 %} 8648 ins_pipe(ialu_reg_reg); 8649 %} 8650 8651 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8652 %{ 8653 match(Set cr (OverflowSubI op1 op2)); 8654 8655 format %{ "CMP $op1, $op2\t# overflow check int" %} 8656 ins_encode %{ 8657 __ cmpl($op1$$Register, $op2$$constant); 8658 %} 8659 ins_pipe(ialu_reg_reg); 8660 %} 8661 8662 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8663 %{ 8664 match(Set cr (OverflowSubI zero op2)); 8665 effect(DEF cr, USE_KILL op2); 8666 8667 format %{ "NEG $op2\t# overflow check int" %} 8668 ins_encode %{ 8669 __ negl($op2$$Register); 8670 %} 8671 ins_pipe(ialu_reg_reg); 8672 %} 8673 8674 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8675 %{ 8676 match(Set cr (OverflowMulI op1 op2)); 8677 effect(DEF cr, USE_KILL op1, USE op2); 8678 8679 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8680 ins_encode %{ 8681 __ imull($op1$$Register, $op2$$Register); 8682 %} 8683 ins_pipe(ialu_reg_reg_alu0); 8684 %} 8685 8686 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8687 %{ 8688 match(Set cr (OverflowMulI op1 op2)); 8689 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8690 8691 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8692 ins_encode %{ 8693 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8694 %} 8695 ins_pipe(ialu_reg_reg_alu0); 8696 %} 8697 8698 // Integer Absolute Instructions 8699 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8700 %{ 8701 match(Set dst (AbsI src)); 8702 effect(TEMP dst, TEMP tmp, KILL cr); 8703 format %{ "movl $tmp, $src\n\t" 8704 "sarl $tmp, 31\n\t" 8705 "movl $dst, $src\n\t" 8706 "xorl $dst, $tmp\n\t" 8707 "subl $dst, $tmp\n" 8708 %} 8709 ins_encode %{ 8710 __ movl($tmp$$Register, $src$$Register); 8711 __ sarl($tmp$$Register, 31); 8712 __ movl($dst$$Register, $src$$Register); 8713 __ xorl($dst$$Register, $tmp$$Register); 8714 __ subl($dst$$Register, $tmp$$Register); 8715 %} 8716 8717 ins_pipe(ialu_reg_reg); 8718 %} 8719 8720 //----------Long Instructions------------------------------------------------ 8721 // Add Long Register with Register 8722 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8723 match(Set dst (AddL dst src)); 8724 effect(KILL cr); 8725 ins_cost(200); 8726 format %{ "ADD $dst.lo,$src.lo\n\t" 8727 "ADC $dst.hi,$src.hi" %} 8728 opcode(0x03, 0x13); 8729 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8730 ins_pipe( ialu_reg_reg_long ); 8731 %} 8732 8733 // Add Long Register with Immediate 8734 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8735 match(Set dst (AddL dst src)); 8736 effect(KILL cr); 8737 format %{ "ADD $dst.lo,$src.lo\n\t" 8738 "ADC $dst.hi,$src.hi" %} 8739 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8740 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8741 ins_pipe( ialu_reg_long ); 8742 %} 8743 8744 // Add Long Register with Memory 8745 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8746 match(Set dst (AddL dst (LoadL mem))); 8747 effect(KILL cr); 8748 ins_cost(125); 8749 format %{ "ADD $dst.lo,$mem\n\t" 8750 "ADC $dst.hi,$mem+4" %} 8751 opcode(0x03, 0x13); 8752 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8753 ins_pipe( ialu_reg_long_mem ); 8754 %} 8755 8756 // Subtract Long Register with Register. 8757 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8758 match(Set dst (SubL dst src)); 8759 effect(KILL cr); 8760 ins_cost(200); 8761 format %{ "SUB $dst.lo,$src.lo\n\t" 8762 "SBB $dst.hi,$src.hi" %} 8763 opcode(0x2B, 0x1B); 8764 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8765 ins_pipe( ialu_reg_reg_long ); 8766 %} 8767 8768 // Subtract Long Register with Immediate 8769 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8770 match(Set dst (SubL dst src)); 8771 effect(KILL cr); 8772 format %{ "SUB $dst.lo,$src.lo\n\t" 8773 "SBB $dst.hi,$src.hi" %} 8774 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8775 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8776 ins_pipe( ialu_reg_long ); 8777 %} 8778 8779 // Subtract Long Register with Memory 8780 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8781 match(Set dst (SubL dst (LoadL mem))); 8782 effect(KILL cr); 8783 ins_cost(125); 8784 format %{ "SUB $dst.lo,$mem\n\t" 8785 "SBB $dst.hi,$mem+4" %} 8786 opcode(0x2B, 0x1B); 8787 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8788 ins_pipe( ialu_reg_long_mem ); 8789 %} 8790 8791 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8792 match(Set dst (SubL zero dst)); 8793 effect(KILL cr); 8794 ins_cost(300); 8795 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8796 ins_encode( neg_long(dst) ); 8797 ins_pipe( ialu_reg_reg_long ); 8798 %} 8799 8800 // And Long Register with Register 8801 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8802 match(Set dst (AndL dst src)); 8803 effect(KILL cr); 8804 format %{ "AND $dst.lo,$src.lo\n\t" 8805 "AND $dst.hi,$src.hi" %} 8806 opcode(0x23,0x23); 8807 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8808 ins_pipe( ialu_reg_reg_long ); 8809 %} 8810 8811 // And Long Register with Immediate 8812 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8813 match(Set dst (AndL dst src)); 8814 effect(KILL cr); 8815 format %{ "AND $dst.lo,$src.lo\n\t" 8816 "AND $dst.hi,$src.hi" %} 8817 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8818 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8819 ins_pipe( ialu_reg_long ); 8820 %} 8821 8822 // And Long Register with Memory 8823 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8824 match(Set dst (AndL dst (LoadL mem))); 8825 effect(KILL cr); 8826 ins_cost(125); 8827 format %{ "AND $dst.lo,$mem\n\t" 8828 "AND $dst.hi,$mem+4" %} 8829 opcode(0x23, 0x23); 8830 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8831 ins_pipe( ialu_reg_long_mem ); 8832 %} 8833 8834 // BMI1 instructions 8835 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8836 match(Set dst (AndL (XorL src1 minus_1) src2)); 8837 predicate(UseBMI1Instructions); 8838 effect(KILL cr, TEMP dst); 8839 8840 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8841 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8842 %} 8843 8844 ins_encode %{ 8845 Register Rdst = $dst$$Register; 8846 Register Rsrc1 = $src1$$Register; 8847 Register Rsrc2 = $src2$$Register; 8848 __ andnl(Rdst, Rsrc1, Rsrc2); 8849 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8850 %} 8851 ins_pipe(ialu_reg_reg_long); 8852 %} 8853 8854 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8855 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8856 predicate(UseBMI1Instructions); 8857 effect(KILL cr, TEMP dst); 8858 8859 ins_cost(125); 8860 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8861 "ANDNL $dst.hi, $src1.hi, $src2+4" 8862 %} 8863 8864 ins_encode %{ 8865 Register Rdst = $dst$$Register; 8866 Register Rsrc1 = $src1$$Register; 8867 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8868 8869 __ andnl(Rdst, Rsrc1, $src2$$Address); 8870 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8871 %} 8872 ins_pipe(ialu_reg_mem); 8873 %} 8874 8875 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8876 match(Set dst (AndL (SubL imm_zero src) src)); 8877 predicate(UseBMI1Instructions); 8878 effect(KILL cr, TEMP dst); 8879 8880 format %{ "MOVL $dst.hi, 0\n\t" 8881 "BLSIL $dst.lo, $src.lo\n\t" 8882 "JNZ done\n\t" 8883 "BLSIL $dst.hi, $src.hi\n" 8884 "done:" 8885 %} 8886 8887 ins_encode %{ 8888 Label done; 8889 Register Rdst = $dst$$Register; 8890 Register Rsrc = $src$$Register; 8891 __ movl(HIGH_FROM_LOW(Rdst), 0); 8892 __ blsil(Rdst, Rsrc); 8893 __ jccb(Assembler::notZero, done); 8894 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8895 __ bind(done); 8896 %} 8897 ins_pipe(ialu_reg); 8898 %} 8899 8900 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8901 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8902 predicate(UseBMI1Instructions); 8903 effect(KILL cr, TEMP dst); 8904 8905 ins_cost(125); 8906 format %{ "MOVL $dst.hi, 0\n\t" 8907 "BLSIL $dst.lo, $src\n\t" 8908 "JNZ done\n\t" 8909 "BLSIL $dst.hi, $src+4\n" 8910 "done:" 8911 %} 8912 8913 ins_encode %{ 8914 Label done; 8915 Register Rdst = $dst$$Register; 8916 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8917 8918 __ movl(HIGH_FROM_LOW(Rdst), 0); 8919 __ blsil(Rdst, $src$$Address); 8920 __ jccb(Assembler::notZero, done); 8921 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8922 __ bind(done); 8923 %} 8924 ins_pipe(ialu_reg_mem); 8925 %} 8926 8927 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8928 %{ 8929 match(Set dst (XorL (AddL src minus_1) src)); 8930 predicate(UseBMI1Instructions); 8931 effect(KILL cr, TEMP dst); 8932 8933 format %{ "MOVL $dst.hi, 0\n\t" 8934 "BLSMSKL $dst.lo, $src.lo\n\t" 8935 "JNC done\n\t" 8936 "BLSMSKL $dst.hi, $src.hi\n" 8937 "done:" 8938 %} 8939 8940 ins_encode %{ 8941 Label done; 8942 Register Rdst = $dst$$Register; 8943 Register Rsrc = $src$$Register; 8944 __ movl(HIGH_FROM_LOW(Rdst), 0); 8945 __ blsmskl(Rdst, Rsrc); 8946 __ jccb(Assembler::carryClear, done); 8947 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8948 __ bind(done); 8949 %} 8950 8951 ins_pipe(ialu_reg); 8952 %} 8953 8954 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8955 %{ 8956 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8957 predicate(UseBMI1Instructions); 8958 effect(KILL cr, TEMP dst); 8959 8960 ins_cost(125); 8961 format %{ "MOVL $dst.hi, 0\n\t" 8962 "BLSMSKL $dst.lo, $src\n\t" 8963 "JNC done\n\t" 8964 "BLSMSKL $dst.hi, $src+4\n" 8965 "done:" 8966 %} 8967 8968 ins_encode %{ 8969 Label done; 8970 Register Rdst = $dst$$Register; 8971 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8972 8973 __ movl(HIGH_FROM_LOW(Rdst), 0); 8974 __ blsmskl(Rdst, $src$$Address); 8975 __ jccb(Assembler::carryClear, done); 8976 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8977 __ bind(done); 8978 %} 8979 8980 ins_pipe(ialu_reg_mem); 8981 %} 8982 8983 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8984 %{ 8985 match(Set dst (AndL (AddL src minus_1) src) ); 8986 predicate(UseBMI1Instructions); 8987 effect(KILL cr, TEMP dst); 8988 8989 format %{ "MOVL $dst.hi, $src.hi\n\t" 8990 "BLSRL $dst.lo, $src.lo\n\t" 8991 "JNC done\n\t" 8992 "BLSRL $dst.hi, $src.hi\n" 8993 "done:" 8994 %} 8995 8996 ins_encode %{ 8997 Label done; 8998 Register Rdst = $dst$$Register; 8999 Register Rsrc = $src$$Register; 9000 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9001 __ blsrl(Rdst, Rsrc); 9002 __ jccb(Assembler::carryClear, done); 9003 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9004 __ bind(done); 9005 %} 9006 9007 ins_pipe(ialu_reg); 9008 %} 9009 9010 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9011 %{ 9012 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9013 predicate(UseBMI1Instructions); 9014 effect(KILL cr, TEMP dst); 9015 9016 ins_cost(125); 9017 format %{ "MOVL $dst.hi, $src+4\n\t" 9018 "BLSRL $dst.lo, $src\n\t" 9019 "JNC done\n\t" 9020 "BLSRL $dst.hi, $src+4\n" 9021 "done:" 9022 %} 9023 9024 ins_encode %{ 9025 Label done; 9026 Register Rdst = $dst$$Register; 9027 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9028 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9029 __ blsrl(Rdst, $src$$Address); 9030 __ jccb(Assembler::carryClear, done); 9031 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9032 __ bind(done); 9033 %} 9034 9035 ins_pipe(ialu_reg_mem); 9036 %} 9037 9038 // Or Long Register with Register 9039 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9040 match(Set dst (OrL dst src)); 9041 effect(KILL cr); 9042 format %{ "OR $dst.lo,$src.lo\n\t" 9043 "OR $dst.hi,$src.hi" %} 9044 opcode(0x0B,0x0B); 9045 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9046 ins_pipe( ialu_reg_reg_long ); 9047 %} 9048 9049 // Or Long Register with Immediate 9050 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9051 match(Set dst (OrL dst src)); 9052 effect(KILL cr); 9053 format %{ "OR $dst.lo,$src.lo\n\t" 9054 "OR $dst.hi,$src.hi" %} 9055 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9056 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9057 ins_pipe( ialu_reg_long ); 9058 %} 9059 9060 // Or Long Register with Memory 9061 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9062 match(Set dst (OrL dst (LoadL mem))); 9063 effect(KILL cr); 9064 ins_cost(125); 9065 format %{ "OR $dst.lo,$mem\n\t" 9066 "OR $dst.hi,$mem+4" %} 9067 opcode(0x0B,0x0B); 9068 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 9069 ins_pipe( ialu_reg_long_mem ); 9070 %} 9071 9072 // Xor Long Register with Register 9073 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9074 match(Set dst (XorL dst src)); 9075 effect(KILL cr); 9076 format %{ "XOR $dst.lo,$src.lo\n\t" 9077 "XOR $dst.hi,$src.hi" %} 9078 opcode(0x33,0x33); 9079 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9080 ins_pipe( ialu_reg_reg_long ); 9081 %} 9082 9083 // Xor Long Register with Immediate -1 9084 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9085 match(Set dst (XorL dst imm)); 9086 format %{ "NOT $dst.lo\n\t" 9087 "NOT $dst.hi" %} 9088 ins_encode %{ 9089 __ notl($dst$$Register); 9090 __ notl(HIGH_FROM_LOW($dst$$Register)); 9091 %} 9092 ins_pipe( ialu_reg_long ); 9093 %} 9094 9095 // Xor Long Register with Immediate 9096 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9097 match(Set dst (XorL dst src)); 9098 effect(KILL cr); 9099 format %{ "XOR $dst.lo,$src.lo\n\t" 9100 "XOR $dst.hi,$src.hi" %} 9101 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9102 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9103 ins_pipe( ialu_reg_long ); 9104 %} 9105 9106 // Xor Long Register with Memory 9107 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9108 match(Set dst (XorL dst (LoadL mem))); 9109 effect(KILL cr); 9110 ins_cost(125); 9111 format %{ "XOR $dst.lo,$mem\n\t" 9112 "XOR $dst.hi,$mem+4" %} 9113 opcode(0x33,0x33); 9114 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 9115 ins_pipe( ialu_reg_long_mem ); 9116 %} 9117 9118 // Shift Left Long by 1 9119 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9120 predicate(UseNewLongLShift); 9121 match(Set dst (LShiftL dst cnt)); 9122 effect(KILL cr); 9123 ins_cost(100); 9124 format %{ "ADD $dst.lo,$dst.lo\n\t" 9125 "ADC $dst.hi,$dst.hi" %} 9126 ins_encode %{ 9127 __ addl($dst$$Register,$dst$$Register); 9128 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9129 %} 9130 ins_pipe( ialu_reg_long ); 9131 %} 9132 9133 // Shift Left Long by 2 9134 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9135 predicate(UseNewLongLShift); 9136 match(Set dst (LShiftL dst cnt)); 9137 effect(KILL cr); 9138 ins_cost(100); 9139 format %{ "ADD $dst.lo,$dst.lo\n\t" 9140 "ADC $dst.hi,$dst.hi\n\t" 9141 "ADD $dst.lo,$dst.lo\n\t" 9142 "ADC $dst.hi,$dst.hi" %} 9143 ins_encode %{ 9144 __ addl($dst$$Register,$dst$$Register); 9145 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9146 __ addl($dst$$Register,$dst$$Register); 9147 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9148 %} 9149 ins_pipe( ialu_reg_long ); 9150 %} 9151 9152 // Shift Left Long by 3 9153 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9154 predicate(UseNewLongLShift); 9155 match(Set dst (LShiftL dst cnt)); 9156 effect(KILL cr); 9157 ins_cost(100); 9158 format %{ "ADD $dst.lo,$dst.lo\n\t" 9159 "ADC $dst.hi,$dst.hi\n\t" 9160 "ADD $dst.lo,$dst.lo\n\t" 9161 "ADC $dst.hi,$dst.hi\n\t" 9162 "ADD $dst.lo,$dst.lo\n\t" 9163 "ADC $dst.hi,$dst.hi" %} 9164 ins_encode %{ 9165 __ addl($dst$$Register,$dst$$Register); 9166 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9167 __ addl($dst$$Register,$dst$$Register); 9168 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9169 __ addl($dst$$Register,$dst$$Register); 9170 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9171 %} 9172 ins_pipe( ialu_reg_long ); 9173 %} 9174 9175 // Shift Left Long by 1-31 9176 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9177 match(Set dst (LShiftL dst cnt)); 9178 effect(KILL cr); 9179 ins_cost(200); 9180 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9181 "SHL $dst.lo,$cnt" %} 9182 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9183 ins_encode( move_long_small_shift(dst,cnt) ); 9184 ins_pipe( ialu_reg_long ); 9185 %} 9186 9187 // Shift Left Long by 32-63 9188 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9189 match(Set dst (LShiftL dst cnt)); 9190 effect(KILL cr); 9191 ins_cost(300); 9192 format %{ "MOV $dst.hi,$dst.lo\n" 9193 "\tSHL $dst.hi,$cnt-32\n" 9194 "\tXOR $dst.lo,$dst.lo" %} 9195 opcode(0xC1, 0x4); /* C1 /4 ib */ 9196 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9197 ins_pipe( ialu_reg_long ); 9198 %} 9199 9200 // Shift Left Long by variable 9201 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9202 match(Set dst (LShiftL dst shift)); 9203 effect(KILL cr); 9204 ins_cost(500+200); 9205 size(17); 9206 format %{ "TEST $shift,32\n\t" 9207 "JEQ,s small\n\t" 9208 "MOV $dst.hi,$dst.lo\n\t" 9209 "XOR $dst.lo,$dst.lo\n" 9210 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9211 "SHL $dst.lo,$shift" %} 9212 ins_encode( shift_left_long( dst, shift ) ); 9213 ins_pipe( pipe_slow ); 9214 %} 9215 9216 // Shift Right Long by 1-31 9217 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9218 match(Set dst (URShiftL dst cnt)); 9219 effect(KILL cr); 9220 ins_cost(200); 9221 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9222 "SHR $dst.hi,$cnt" %} 9223 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9224 ins_encode( move_long_small_shift(dst,cnt) ); 9225 ins_pipe( ialu_reg_long ); 9226 %} 9227 9228 // Shift Right Long by 32-63 9229 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9230 match(Set dst (URShiftL dst cnt)); 9231 effect(KILL cr); 9232 ins_cost(300); 9233 format %{ "MOV $dst.lo,$dst.hi\n" 9234 "\tSHR $dst.lo,$cnt-32\n" 9235 "\tXOR $dst.hi,$dst.hi" %} 9236 opcode(0xC1, 0x5); /* C1 /5 ib */ 9237 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9238 ins_pipe( ialu_reg_long ); 9239 %} 9240 9241 // Shift Right Long by variable 9242 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9243 match(Set dst (URShiftL dst shift)); 9244 effect(KILL cr); 9245 ins_cost(600); 9246 size(17); 9247 format %{ "TEST $shift,32\n\t" 9248 "JEQ,s small\n\t" 9249 "MOV $dst.lo,$dst.hi\n\t" 9250 "XOR $dst.hi,$dst.hi\n" 9251 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9252 "SHR $dst.hi,$shift" %} 9253 ins_encode( shift_right_long( dst, shift ) ); 9254 ins_pipe( pipe_slow ); 9255 %} 9256 9257 // Shift Right Long by 1-31 9258 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9259 match(Set dst (RShiftL dst cnt)); 9260 effect(KILL cr); 9261 ins_cost(200); 9262 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9263 "SAR $dst.hi,$cnt" %} 9264 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9265 ins_encode( move_long_small_shift(dst,cnt) ); 9266 ins_pipe( ialu_reg_long ); 9267 %} 9268 9269 // Shift Right Long by 32-63 9270 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9271 match(Set dst (RShiftL dst cnt)); 9272 effect(KILL cr); 9273 ins_cost(300); 9274 format %{ "MOV $dst.lo,$dst.hi\n" 9275 "\tSAR $dst.lo,$cnt-32\n" 9276 "\tSAR $dst.hi,31" %} 9277 opcode(0xC1, 0x7); /* C1 /7 ib */ 9278 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9279 ins_pipe( ialu_reg_long ); 9280 %} 9281 9282 // Shift Right arithmetic Long by variable 9283 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9284 match(Set dst (RShiftL dst shift)); 9285 effect(KILL cr); 9286 ins_cost(600); 9287 size(18); 9288 format %{ "TEST $shift,32\n\t" 9289 "JEQ,s small\n\t" 9290 "MOV $dst.lo,$dst.hi\n\t" 9291 "SAR $dst.hi,31\n" 9292 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9293 "SAR $dst.hi,$shift" %} 9294 ins_encode( shift_right_arith_long( dst, shift ) ); 9295 ins_pipe( pipe_slow ); 9296 %} 9297 9298 9299 //----------Double Instructions------------------------------------------------ 9300 // Double Math 9301 9302 // Compare & branch 9303 9304 // P6 version of float compare, sets condition codes in EFLAGS 9305 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9306 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9307 match(Set cr (CmpD src1 src2)); 9308 effect(KILL rax); 9309 ins_cost(150); 9310 format %{ "FLD $src1\n\t" 9311 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9312 "JNP exit\n\t" 9313 "MOV ah,1 // saw a NaN, set CF\n\t" 9314 "SAHF\n" 9315 "exit:\tNOP // avoid branch to branch" %} 9316 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9317 ins_encode( Push_Reg_DPR(src1), 9318 OpcP, RegOpc(src2), 9319 cmpF_P6_fixup ); 9320 ins_pipe( pipe_slow ); 9321 %} 9322 9323 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9324 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9325 match(Set cr (CmpD src1 src2)); 9326 ins_cost(150); 9327 format %{ "FLD $src1\n\t" 9328 "FUCOMIP ST,$src2 // P6 instruction" %} 9329 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9330 ins_encode( Push_Reg_DPR(src1), 9331 OpcP, RegOpc(src2)); 9332 ins_pipe( pipe_slow ); 9333 %} 9334 9335 // Compare & branch 9336 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9337 predicate(UseSSE<=1); 9338 match(Set cr (CmpD src1 src2)); 9339 effect(KILL rax); 9340 ins_cost(200); 9341 format %{ "FLD $src1\n\t" 9342 "FCOMp $src2\n\t" 9343 "FNSTSW AX\n\t" 9344 "TEST AX,0x400\n\t" 9345 "JZ,s flags\n\t" 9346 "MOV AH,1\t# unordered treat as LT\n" 9347 "flags:\tSAHF" %} 9348 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9349 ins_encode( Push_Reg_DPR(src1), 9350 OpcP, RegOpc(src2), 9351 fpu_flags); 9352 ins_pipe( pipe_slow ); 9353 %} 9354 9355 // Compare vs zero into -1,0,1 9356 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9357 predicate(UseSSE<=1); 9358 match(Set dst (CmpD3 src1 zero)); 9359 effect(KILL cr, KILL rax); 9360 ins_cost(280); 9361 format %{ "FTSTD $dst,$src1" %} 9362 opcode(0xE4, 0xD9); 9363 ins_encode( Push_Reg_DPR(src1), 9364 OpcS, OpcP, PopFPU, 9365 CmpF_Result(dst)); 9366 ins_pipe( pipe_slow ); 9367 %} 9368 9369 // Compare into -1,0,1 9370 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9371 predicate(UseSSE<=1); 9372 match(Set dst (CmpD3 src1 src2)); 9373 effect(KILL cr, KILL rax); 9374 ins_cost(300); 9375 format %{ "FCMPD $dst,$src1,$src2" %} 9376 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9377 ins_encode( Push_Reg_DPR(src1), 9378 OpcP, RegOpc(src2), 9379 CmpF_Result(dst)); 9380 ins_pipe( pipe_slow ); 9381 %} 9382 9383 // float compare and set condition codes in EFLAGS by XMM regs 9384 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9385 predicate(UseSSE>=2); 9386 match(Set cr (CmpD src1 src2)); 9387 ins_cost(145); 9388 format %{ "UCOMISD $src1,$src2\n\t" 9389 "JNP,s exit\n\t" 9390 "PUSHF\t# saw NaN, set CF\n\t" 9391 "AND [rsp], #0xffffff2b\n\t" 9392 "POPF\n" 9393 "exit:" %} 9394 ins_encode %{ 9395 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9396 emit_cmpfp_fixup(masm); 9397 %} 9398 ins_pipe( pipe_slow ); 9399 %} 9400 9401 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9402 predicate(UseSSE>=2); 9403 match(Set cr (CmpD src1 src2)); 9404 ins_cost(100); 9405 format %{ "UCOMISD $src1,$src2" %} 9406 ins_encode %{ 9407 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9408 %} 9409 ins_pipe( pipe_slow ); 9410 %} 9411 9412 // float compare and set condition codes in EFLAGS by XMM regs 9413 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9414 predicate(UseSSE>=2); 9415 match(Set cr (CmpD src1 (LoadD src2))); 9416 ins_cost(145); 9417 format %{ "UCOMISD $src1,$src2\n\t" 9418 "JNP,s exit\n\t" 9419 "PUSHF\t# saw NaN, set CF\n\t" 9420 "AND [rsp], #0xffffff2b\n\t" 9421 "POPF\n" 9422 "exit:" %} 9423 ins_encode %{ 9424 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9425 emit_cmpfp_fixup(masm); 9426 %} 9427 ins_pipe( pipe_slow ); 9428 %} 9429 9430 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9431 predicate(UseSSE>=2); 9432 match(Set cr (CmpD src1 (LoadD src2))); 9433 ins_cost(100); 9434 format %{ "UCOMISD $src1,$src2" %} 9435 ins_encode %{ 9436 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9437 %} 9438 ins_pipe( pipe_slow ); 9439 %} 9440 9441 // Compare into -1,0,1 in XMM 9442 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9443 predicate(UseSSE>=2); 9444 match(Set dst (CmpD3 src1 src2)); 9445 effect(KILL cr); 9446 ins_cost(255); 9447 format %{ "UCOMISD $src1, $src2\n\t" 9448 "MOV $dst, #-1\n\t" 9449 "JP,s done\n\t" 9450 "JB,s done\n\t" 9451 "SETNE $dst\n\t" 9452 "MOVZB $dst, $dst\n" 9453 "done:" %} 9454 ins_encode %{ 9455 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9456 emit_cmpfp3(masm, $dst$$Register); 9457 %} 9458 ins_pipe( pipe_slow ); 9459 %} 9460 9461 // Compare into -1,0,1 in XMM and memory 9462 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9463 predicate(UseSSE>=2); 9464 match(Set dst (CmpD3 src1 (LoadD src2))); 9465 effect(KILL cr); 9466 ins_cost(275); 9467 format %{ "UCOMISD $src1, $src2\n\t" 9468 "MOV $dst, #-1\n\t" 9469 "JP,s done\n\t" 9470 "JB,s done\n\t" 9471 "SETNE $dst\n\t" 9472 "MOVZB $dst, $dst\n" 9473 "done:" %} 9474 ins_encode %{ 9475 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9476 emit_cmpfp3(masm, $dst$$Register); 9477 %} 9478 ins_pipe( pipe_slow ); 9479 %} 9480 9481 9482 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9483 predicate (UseSSE <=1); 9484 match(Set dst (SubD dst src)); 9485 9486 format %{ "FLD $src\n\t" 9487 "DSUBp $dst,ST" %} 9488 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9489 ins_cost(150); 9490 ins_encode( Push_Reg_DPR(src), 9491 OpcP, RegOpc(dst) ); 9492 ins_pipe( fpu_reg_reg ); 9493 %} 9494 9495 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9496 predicate (UseSSE <=1); 9497 match(Set dst (RoundDouble (SubD src1 src2))); 9498 ins_cost(250); 9499 9500 format %{ "FLD $src2\n\t" 9501 "DSUB ST,$src1\n\t" 9502 "FSTP_D $dst\t# D-round" %} 9503 opcode(0xD8, 0x5); 9504 ins_encode( Push_Reg_DPR(src2), 9505 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9506 ins_pipe( fpu_mem_reg_reg ); 9507 %} 9508 9509 9510 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9511 predicate (UseSSE <=1); 9512 match(Set dst (SubD dst (LoadD src))); 9513 ins_cost(150); 9514 9515 format %{ "FLD $src\n\t" 9516 "DSUBp $dst,ST" %} 9517 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9518 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9519 OpcP, RegOpc(dst), ClearInstMark ); 9520 ins_pipe( fpu_reg_mem ); 9521 %} 9522 9523 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9524 predicate (UseSSE<=1); 9525 match(Set dst (AbsD src)); 9526 ins_cost(100); 9527 format %{ "FABS" %} 9528 opcode(0xE1, 0xD9); 9529 ins_encode( OpcS, OpcP ); 9530 ins_pipe( fpu_reg_reg ); 9531 %} 9532 9533 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9534 predicate(UseSSE<=1); 9535 match(Set dst (NegD src)); 9536 ins_cost(100); 9537 format %{ "FCHS" %} 9538 opcode(0xE0, 0xD9); 9539 ins_encode( OpcS, OpcP ); 9540 ins_pipe( fpu_reg_reg ); 9541 %} 9542 9543 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9544 predicate(UseSSE<=1); 9545 match(Set dst (AddD dst src)); 9546 format %{ "FLD $src\n\t" 9547 "DADD $dst,ST" %} 9548 size(4); 9549 ins_cost(150); 9550 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9551 ins_encode( Push_Reg_DPR(src), 9552 OpcP, RegOpc(dst) ); 9553 ins_pipe( fpu_reg_reg ); 9554 %} 9555 9556 9557 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9558 predicate(UseSSE<=1); 9559 match(Set dst (RoundDouble (AddD src1 src2))); 9560 ins_cost(250); 9561 9562 format %{ "FLD $src2\n\t" 9563 "DADD ST,$src1\n\t" 9564 "FSTP_D $dst\t# D-round" %} 9565 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9566 ins_encode( Push_Reg_DPR(src2), 9567 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9568 ins_pipe( fpu_mem_reg_reg ); 9569 %} 9570 9571 9572 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9573 predicate(UseSSE<=1); 9574 match(Set dst (AddD dst (LoadD src))); 9575 ins_cost(150); 9576 9577 format %{ "FLD $src\n\t" 9578 "DADDp $dst,ST" %} 9579 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9580 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9581 OpcP, RegOpc(dst), ClearInstMark ); 9582 ins_pipe( fpu_reg_mem ); 9583 %} 9584 9585 // add-to-memory 9586 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9587 predicate(UseSSE<=1); 9588 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9589 ins_cost(150); 9590 9591 format %{ "FLD_D $dst\n\t" 9592 "DADD ST,$src\n\t" 9593 "FST_D $dst" %} 9594 opcode(0xDD, 0x0); 9595 ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst), 9596 Opcode(0xD8), RegOpc(src), ClearInstMark, 9597 SetInstMark, 9598 Opcode(0xDD), RMopc_Mem(0x03,dst), 9599 ClearInstMark); 9600 ins_pipe( fpu_reg_mem ); 9601 %} 9602 9603 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9604 predicate(UseSSE<=1); 9605 match(Set dst (AddD dst con)); 9606 ins_cost(125); 9607 format %{ "FLD1\n\t" 9608 "DADDp $dst,ST" %} 9609 ins_encode %{ 9610 __ fld1(); 9611 __ faddp($dst$$reg); 9612 %} 9613 ins_pipe(fpu_reg); 9614 %} 9615 9616 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9617 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9618 match(Set dst (AddD dst con)); 9619 ins_cost(200); 9620 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9621 "DADDp $dst,ST" %} 9622 ins_encode %{ 9623 __ fld_d($constantaddress($con)); 9624 __ faddp($dst$$reg); 9625 %} 9626 ins_pipe(fpu_reg_mem); 9627 %} 9628 9629 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9630 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9631 match(Set dst (RoundDouble (AddD src con))); 9632 ins_cost(200); 9633 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9634 "DADD ST,$src\n\t" 9635 "FSTP_D $dst\t# D-round" %} 9636 ins_encode %{ 9637 __ fld_d($constantaddress($con)); 9638 __ fadd($src$$reg); 9639 __ fstp_d(Address(rsp, $dst$$disp)); 9640 %} 9641 ins_pipe(fpu_mem_reg_con); 9642 %} 9643 9644 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9645 predicate(UseSSE<=1); 9646 match(Set dst (MulD dst src)); 9647 format %{ "FLD $src\n\t" 9648 "DMULp $dst,ST" %} 9649 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9650 ins_cost(150); 9651 ins_encode( Push_Reg_DPR(src), 9652 OpcP, RegOpc(dst) ); 9653 ins_pipe( fpu_reg_reg ); 9654 %} 9655 9656 // Strict FP instruction biases argument before multiply then 9657 // biases result to avoid double rounding of subnormals. 9658 // 9659 // scale arg1 by multiplying arg1 by 2^(-15360) 9660 // load arg2 9661 // multiply scaled arg1 by arg2 9662 // rescale product by 2^(15360) 9663 // 9664 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9665 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9666 match(Set dst (MulD dst src)); 9667 ins_cost(1); // Select this instruction for all FP double multiplies 9668 9669 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9670 "DMULp $dst,ST\n\t" 9671 "FLD $src\n\t" 9672 "DMULp $dst,ST\n\t" 9673 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9674 "DMULp $dst,ST\n\t" %} 9675 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9676 ins_encode( strictfp_bias1(dst), 9677 Push_Reg_DPR(src), 9678 OpcP, RegOpc(dst), 9679 strictfp_bias2(dst) ); 9680 ins_pipe( fpu_reg_reg ); 9681 %} 9682 9683 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9684 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9685 match(Set dst (MulD dst con)); 9686 ins_cost(200); 9687 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9688 "DMULp $dst,ST" %} 9689 ins_encode %{ 9690 __ fld_d($constantaddress($con)); 9691 __ fmulp($dst$$reg); 9692 %} 9693 ins_pipe(fpu_reg_mem); 9694 %} 9695 9696 9697 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9698 predicate( UseSSE<=1 ); 9699 match(Set dst (MulD dst (LoadD src))); 9700 ins_cost(200); 9701 format %{ "FLD_D $src\n\t" 9702 "DMULp $dst,ST" %} 9703 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9704 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9705 OpcP, RegOpc(dst), ClearInstMark ); 9706 ins_pipe( fpu_reg_mem ); 9707 %} 9708 9709 // 9710 // Cisc-alternate to reg-reg multiply 9711 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9712 predicate( UseSSE<=1 ); 9713 match(Set dst (MulD src (LoadD mem))); 9714 ins_cost(250); 9715 format %{ "FLD_D $mem\n\t" 9716 "DMUL ST,$src\n\t" 9717 "FSTP_D $dst" %} 9718 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9719 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem), 9720 OpcReg_FPR(src), 9721 Pop_Reg_DPR(dst), ClearInstMark ); 9722 ins_pipe( fpu_reg_reg_mem ); 9723 %} 9724 9725 9726 // MACRO3 -- addDPR a mulDPR 9727 // This instruction is a '2-address' instruction in that the result goes 9728 // back to src2. This eliminates a move from the macro; possibly the 9729 // register allocator will have to add it back (and maybe not). 9730 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9731 predicate( UseSSE<=1 ); 9732 match(Set src2 (AddD (MulD src0 src1) src2)); 9733 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9734 "DMUL ST,$src1\n\t" 9735 "DADDp $src2,ST" %} 9736 ins_cost(250); 9737 opcode(0xDD); /* LoadD DD /0 */ 9738 ins_encode( Push_Reg_FPR(src0), 9739 FMul_ST_reg(src1), 9740 FAddP_reg_ST(src2) ); 9741 ins_pipe( fpu_reg_reg_reg ); 9742 %} 9743 9744 9745 // MACRO3 -- subDPR a mulDPR 9746 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9747 predicate( UseSSE<=1 ); 9748 match(Set src2 (SubD (MulD src0 src1) src2)); 9749 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9750 "DMUL ST,$src1\n\t" 9751 "DSUBRp $src2,ST" %} 9752 ins_cost(250); 9753 ins_encode( Push_Reg_FPR(src0), 9754 FMul_ST_reg(src1), 9755 Opcode(0xDE), Opc_plus(0xE0,src2)); 9756 ins_pipe( fpu_reg_reg_reg ); 9757 %} 9758 9759 9760 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9761 predicate( UseSSE<=1 ); 9762 match(Set dst (DivD dst src)); 9763 9764 format %{ "FLD $src\n\t" 9765 "FDIVp $dst,ST" %} 9766 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9767 ins_cost(150); 9768 ins_encode( Push_Reg_DPR(src), 9769 OpcP, RegOpc(dst) ); 9770 ins_pipe( fpu_reg_reg ); 9771 %} 9772 9773 // Strict FP instruction biases argument before division then 9774 // biases result, to avoid double rounding of subnormals. 9775 // 9776 // scale dividend by multiplying dividend by 2^(-15360) 9777 // load divisor 9778 // divide scaled dividend by divisor 9779 // rescale quotient by 2^(15360) 9780 // 9781 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9782 predicate (UseSSE<=1); 9783 match(Set dst (DivD dst src)); 9784 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9785 ins_cost(01); 9786 9787 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9788 "DMULp $dst,ST\n\t" 9789 "FLD $src\n\t" 9790 "FDIVp $dst,ST\n\t" 9791 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9792 "DMULp $dst,ST\n\t" %} 9793 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9794 ins_encode( strictfp_bias1(dst), 9795 Push_Reg_DPR(src), 9796 OpcP, RegOpc(dst), 9797 strictfp_bias2(dst) ); 9798 ins_pipe( fpu_reg_reg ); 9799 %} 9800 9801 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9802 predicate(UseSSE<=1); 9803 match(Set dst (ModD dst src)); 9804 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9805 9806 format %{ "DMOD $dst,$src" %} 9807 ins_cost(250); 9808 ins_encode(Push_Reg_Mod_DPR(dst, src), 9809 emitModDPR(), 9810 Push_Result_Mod_DPR(src), 9811 Pop_Reg_DPR(dst)); 9812 ins_pipe( pipe_slow ); 9813 %} 9814 9815 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9816 predicate(UseSSE>=2); 9817 match(Set dst (ModD src0 src1)); 9818 effect(KILL rax, KILL cr); 9819 9820 format %{ "SUB ESP,8\t # DMOD\n" 9821 "\tMOVSD [ESP+0],$src1\n" 9822 "\tFLD_D [ESP+0]\n" 9823 "\tMOVSD [ESP+0],$src0\n" 9824 "\tFLD_D [ESP+0]\n" 9825 "loop:\tFPREM\n" 9826 "\tFWAIT\n" 9827 "\tFNSTSW AX\n" 9828 "\tSAHF\n" 9829 "\tJP loop\n" 9830 "\tFSTP_D [ESP+0]\n" 9831 "\tMOVSD $dst,[ESP+0]\n" 9832 "\tADD ESP,8\n" 9833 "\tFSTP ST0\t # Restore FPU Stack" 9834 %} 9835 ins_cost(250); 9836 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9837 ins_pipe( pipe_slow ); 9838 %} 9839 9840 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9841 predicate (UseSSE<=1); 9842 match(Set dst(AtanD dst src)); 9843 format %{ "DATA $dst,$src" %} 9844 opcode(0xD9, 0xF3); 9845 ins_encode( Push_Reg_DPR(src), 9846 OpcP, OpcS, RegOpc(dst) ); 9847 ins_pipe( pipe_slow ); 9848 %} 9849 9850 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9851 predicate (UseSSE>=2); 9852 match(Set dst(AtanD dst src)); 9853 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9854 format %{ "DATA $dst,$src" %} 9855 opcode(0xD9, 0xF3); 9856 ins_encode( Push_SrcD(src), 9857 OpcP, OpcS, Push_ResultD(dst) ); 9858 ins_pipe( pipe_slow ); 9859 %} 9860 9861 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9862 predicate (UseSSE<=1); 9863 match(Set dst (SqrtD src)); 9864 format %{ "DSQRT $dst,$src" %} 9865 opcode(0xFA, 0xD9); 9866 ins_encode( Push_Reg_DPR(src), 9867 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9868 ins_pipe( pipe_slow ); 9869 %} 9870 9871 //-------------Float Instructions------------------------------- 9872 // Float Math 9873 9874 // Code for float compare: 9875 // fcompp(); 9876 // fwait(); fnstsw_ax(); 9877 // sahf(); 9878 // movl(dst, unordered_result); 9879 // jcc(Assembler::parity, exit); 9880 // movl(dst, less_result); 9881 // jcc(Assembler::below, exit); 9882 // movl(dst, equal_result); 9883 // jcc(Assembler::equal, exit); 9884 // movl(dst, greater_result); 9885 // exit: 9886 9887 // P6 version of float compare, sets condition codes in EFLAGS 9888 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9889 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9890 match(Set cr (CmpF src1 src2)); 9891 effect(KILL rax); 9892 ins_cost(150); 9893 format %{ "FLD $src1\n\t" 9894 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9895 "JNP exit\n\t" 9896 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9897 "SAHF\n" 9898 "exit:\tNOP // avoid branch to branch" %} 9899 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9900 ins_encode( Push_Reg_DPR(src1), 9901 OpcP, RegOpc(src2), 9902 cmpF_P6_fixup ); 9903 ins_pipe( pipe_slow ); 9904 %} 9905 9906 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9907 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9908 match(Set cr (CmpF src1 src2)); 9909 ins_cost(100); 9910 format %{ "FLD $src1\n\t" 9911 "FUCOMIP ST,$src2 // P6 instruction" %} 9912 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9913 ins_encode( Push_Reg_DPR(src1), 9914 OpcP, RegOpc(src2)); 9915 ins_pipe( pipe_slow ); 9916 %} 9917 9918 9919 // Compare & branch 9920 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9921 predicate(UseSSE == 0); 9922 match(Set cr (CmpF src1 src2)); 9923 effect(KILL rax); 9924 ins_cost(200); 9925 format %{ "FLD $src1\n\t" 9926 "FCOMp $src2\n\t" 9927 "FNSTSW AX\n\t" 9928 "TEST AX,0x400\n\t" 9929 "JZ,s flags\n\t" 9930 "MOV AH,1\t# unordered treat as LT\n" 9931 "flags:\tSAHF" %} 9932 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9933 ins_encode( Push_Reg_DPR(src1), 9934 OpcP, RegOpc(src2), 9935 fpu_flags); 9936 ins_pipe( pipe_slow ); 9937 %} 9938 9939 // Compare vs zero into -1,0,1 9940 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9941 predicate(UseSSE == 0); 9942 match(Set dst (CmpF3 src1 zero)); 9943 effect(KILL cr, KILL rax); 9944 ins_cost(280); 9945 format %{ "FTSTF $dst,$src1" %} 9946 opcode(0xE4, 0xD9); 9947 ins_encode( Push_Reg_DPR(src1), 9948 OpcS, OpcP, PopFPU, 9949 CmpF_Result(dst)); 9950 ins_pipe( pipe_slow ); 9951 %} 9952 9953 // Compare into -1,0,1 9954 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9955 predicate(UseSSE == 0); 9956 match(Set dst (CmpF3 src1 src2)); 9957 effect(KILL cr, KILL rax); 9958 ins_cost(300); 9959 format %{ "FCMPF $dst,$src1,$src2" %} 9960 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9961 ins_encode( Push_Reg_DPR(src1), 9962 OpcP, RegOpc(src2), 9963 CmpF_Result(dst)); 9964 ins_pipe( pipe_slow ); 9965 %} 9966 9967 // float compare and set condition codes in EFLAGS by XMM regs 9968 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 9969 predicate(UseSSE>=1); 9970 match(Set cr (CmpF src1 src2)); 9971 ins_cost(145); 9972 format %{ "UCOMISS $src1,$src2\n\t" 9973 "JNP,s exit\n\t" 9974 "PUSHF\t# saw NaN, set CF\n\t" 9975 "AND [rsp], #0xffffff2b\n\t" 9976 "POPF\n" 9977 "exit:" %} 9978 ins_encode %{ 9979 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9980 emit_cmpfp_fixup(masm); 9981 %} 9982 ins_pipe( pipe_slow ); 9983 %} 9984 9985 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 9986 predicate(UseSSE>=1); 9987 match(Set cr (CmpF src1 src2)); 9988 ins_cost(100); 9989 format %{ "UCOMISS $src1,$src2" %} 9990 ins_encode %{ 9991 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9992 %} 9993 ins_pipe( pipe_slow ); 9994 %} 9995 9996 // float compare and set condition codes in EFLAGS by XMM regs 9997 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 9998 predicate(UseSSE>=1); 9999 match(Set cr (CmpF src1 (LoadF src2))); 10000 ins_cost(165); 10001 format %{ "UCOMISS $src1,$src2\n\t" 10002 "JNP,s exit\n\t" 10003 "PUSHF\t# saw NaN, set CF\n\t" 10004 "AND [rsp], #0xffffff2b\n\t" 10005 "POPF\n" 10006 "exit:" %} 10007 ins_encode %{ 10008 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10009 emit_cmpfp_fixup(masm); 10010 %} 10011 ins_pipe( pipe_slow ); 10012 %} 10013 10014 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10015 predicate(UseSSE>=1); 10016 match(Set cr (CmpF src1 (LoadF src2))); 10017 ins_cost(100); 10018 format %{ "UCOMISS $src1,$src2" %} 10019 ins_encode %{ 10020 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10021 %} 10022 ins_pipe( pipe_slow ); 10023 %} 10024 10025 // Compare into -1,0,1 in XMM 10026 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10027 predicate(UseSSE>=1); 10028 match(Set dst (CmpF3 src1 src2)); 10029 effect(KILL cr); 10030 ins_cost(255); 10031 format %{ "UCOMISS $src1, $src2\n\t" 10032 "MOV $dst, #-1\n\t" 10033 "JP,s done\n\t" 10034 "JB,s done\n\t" 10035 "SETNE $dst\n\t" 10036 "MOVZB $dst, $dst\n" 10037 "done:" %} 10038 ins_encode %{ 10039 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10040 emit_cmpfp3(masm, $dst$$Register); 10041 %} 10042 ins_pipe( pipe_slow ); 10043 %} 10044 10045 // Compare into -1,0,1 in XMM and memory 10046 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10047 predicate(UseSSE>=1); 10048 match(Set dst (CmpF3 src1 (LoadF src2))); 10049 effect(KILL cr); 10050 ins_cost(275); 10051 format %{ "UCOMISS $src1, $src2\n\t" 10052 "MOV $dst, #-1\n\t" 10053 "JP,s done\n\t" 10054 "JB,s done\n\t" 10055 "SETNE $dst\n\t" 10056 "MOVZB $dst, $dst\n" 10057 "done:" %} 10058 ins_encode %{ 10059 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10060 emit_cmpfp3(masm, $dst$$Register); 10061 %} 10062 ins_pipe( pipe_slow ); 10063 %} 10064 10065 // Spill to obtain 24-bit precision 10066 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10067 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10068 match(Set dst (SubF src1 src2)); 10069 10070 format %{ "FSUB $dst,$src1 - $src2" %} 10071 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10072 ins_encode( Push_Reg_FPR(src1), 10073 OpcReg_FPR(src2), 10074 Pop_Mem_FPR(dst) ); 10075 ins_pipe( fpu_mem_reg_reg ); 10076 %} 10077 // 10078 // This instruction does not round to 24-bits 10079 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10080 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10081 match(Set dst (SubF dst src)); 10082 10083 format %{ "FSUB $dst,$src" %} 10084 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10085 ins_encode( Push_Reg_FPR(src), 10086 OpcP, RegOpc(dst) ); 10087 ins_pipe( fpu_reg_reg ); 10088 %} 10089 10090 // Spill to obtain 24-bit precision 10091 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10092 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10093 match(Set dst (AddF src1 src2)); 10094 10095 format %{ "FADD $dst,$src1,$src2" %} 10096 opcode(0xD8, 0x0); /* D8 C0+i */ 10097 ins_encode( Push_Reg_FPR(src2), 10098 OpcReg_FPR(src1), 10099 Pop_Mem_FPR(dst) ); 10100 ins_pipe( fpu_mem_reg_reg ); 10101 %} 10102 // 10103 // This instruction does not round to 24-bits 10104 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10105 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10106 match(Set dst (AddF dst src)); 10107 10108 format %{ "FLD $src\n\t" 10109 "FADDp $dst,ST" %} 10110 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10111 ins_encode( Push_Reg_FPR(src), 10112 OpcP, RegOpc(dst) ); 10113 ins_pipe( fpu_reg_reg ); 10114 %} 10115 10116 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10117 predicate(UseSSE==0); 10118 match(Set dst (AbsF src)); 10119 ins_cost(100); 10120 format %{ "FABS" %} 10121 opcode(0xE1, 0xD9); 10122 ins_encode( OpcS, OpcP ); 10123 ins_pipe( fpu_reg_reg ); 10124 %} 10125 10126 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10127 predicate(UseSSE==0); 10128 match(Set dst (NegF src)); 10129 ins_cost(100); 10130 format %{ "FCHS" %} 10131 opcode(0xE0, 0xD9); 10132 ins_encode( OpcS, OpcP ); 10133 ins_pipe( fpu_reg_reg ); 10134 %} 10135 10136 // Cisc-alternate to addFPR_reg 10137 // Spill to obtain 24-bit precision 10138 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10139 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10140 match(Set dst (AddF src1 (LoadF src2))); 10141 10142 format %{ "FLD $src2\n\t" 10143 "FADD ST,$src1\n\t" 10144 "FSTP_S $dst" %} 10145 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10146 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10147 OpcReg_FPR(src1), 10148 Pop_Mem_FPR(dst), ClearInstMark ); 10149 ins_pipe( fpu_mem_reg_mem ); 10150 %} 10151 // 10152 // Cisc-alternate to addFPR_reg 10153 // This instruction does not round to 24-bits 10154 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10155 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10156 match(Set dst (AddF dst (LoadF src))); 10157 10158 format %{ "FADD $dst,$src" %} 10159 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10160 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 10161 OpcP, RegOpc(dst), ClearInstMark ); 10162 ins_pipe( fpu_reg_mem ); 10163 %} 10164 10165 // // Following two instructions for _222_mpegaudio 10166 // Spill to obtain 24-bit precision 10167 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10168 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10169 match(Set dst (AddF src1 src2)); 10170 10171 format %{ "FADD $dst,$src1,$src2" %} 10172 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10173 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1), 10174 OpcReg_FPR(src2), 10175 Pop_Mem_FPR(dst), ClearInstMark ); 10176 ins_pipe( fpu_mem_reg_mem ); 10177 %} 10178 10179 // Cisc-spill variant 10180 // Spill to obtain 24-bit precision 10181 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10182 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10183 match(Set dst (AddF src1 (LoadF src2))); 10184 10185 format %{ "FADD $dst,$src1,$src2 cisc" %} 10186 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10187 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10188 OpcP, RMopc_Mem(secondary,src1), 10189 Pop_Mem_FPR(dst), 10190 ClearInstMark); 10191 ins_pipe( fpu_mem_mem_mem ); 10192 %} 10193 10194 // Spill to obtain 24-bit precision 10195 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10196 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10197 match(Set dst (AddF src1 src2)); 10198 10199 format %{ "FADD $dst,$src1,$src2" %} 10200 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10201 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10202 OpcP, RMopc_Mem(secondary,src1), 10203 Pop_Mem_FPR(dst), 10204 ClearInstMark); 10205 ins_pipe( fpu_mem_mem_mem ); 10206 %} 10207 10208 10209 // Spill to obtain 24-bit precision 10210 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10211 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10212 match(Set dst (AddF src con)); 10213 format %{ "FLD $src\n\t" 10214 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10215 "FSTP_S $dst" %} 10216 ins_encode %{ 10217 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10218 __ fadd_s($constantaddress($con)); 10219 __ fstp_s(Address(rsp, $dst$$disp)); 10220 %} 10221 ins_pipe(fpu_mem_reg_con); 10222 %} 10223 // 10224 // This instruction does not round to 24-bits 10225 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10226 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10227 match(Set dst (AddF src con)); 10228 format %{ "FLD $src\n\t" 10229 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10230 "FSTP $dst" %} 10231 ins_encode %{ 10232 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10233 __ fadd_s($constantaddress($con)); 10234 __ fstp_d($dst$$reg); 10235 %} 10236 ins_pipe(fpu_reg_reg_con); 10237 %} 10238 10239 // Spill to obtain 24-bit precision 10240 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10241 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10242 match(Set dst (MulF src1 src2)); 10243 10244 format %{ "FLD $src1\n\t" 10245 "FMUL $src2\n\t" 10246 "FSTP_S $dst" %} 10247 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10248 ins_encode( Push_Reg_FPR(src1), 10249 OpcReg_FPR(src2), 10250 Pop_Mem_FPR(dst) ); 10251 ins_pipe( fpu_mem_reg_reg ); 10252 %} 10253 // 10254 // This instruction does not round to 24-bits 10255 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10256 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10257 match(Set dst (MulF src1 src2)); 10258 10259 format %{ "FLD $src1\n\t" 10260 "FMUL $src2\n\t" 10261 "FSTP_S $dst" %} 10262 opcode(0xD8, 0x1); /* D8 C8+i */ 10263 ins_encode( Push_Reg_FPR(src2), 10264 OpcReg_FPR(src1), 10265 Pop_Reg_FPR(dst) ); 10266 ins_pipe( fpu_reg_reg_reg ); 10267 %} 10268 10269 10270 // Spill to obtain 24-bit precision 10271 // Cisc-alternate to reg-reg multiply 10272 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10273 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10274 match(Set dst (MulF src1 (LoadF src2))); 10275 10276 format %{ "FLD_S $src2\n\t" 10277 "FMUL $src1\n\t" 10278 "FSTP_S $dst" %} 10279 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10280 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10281 OpcReg_FPR(src1), 10282 Pop_Mem_FPR(dst), ClearInstMark ); 10283 ins_pipe( fpu_mem_reg_mem ); 10284 %} 10285 // 10286 // This instruction does not round to 24-bits 10287 // Cisc-alternate to reg-reg multiply 10288 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10289 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10290 match(Set dst (MulF src1 (LoadF src2))); 10291 10292 format %{ "FMUL $dst,$src1,$src2" %} 10293 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10294 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10295 OpcReg_FPR(src1), 10296 Pop_Reg_FPR(dst), ClearInstMark ); 10297 ins_pipe( fpu_reg_reg_mem ); 10298 %} 10299 10300 // Spill to obtain 24-bit precision 10301 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10302 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10303 match(Set dst (MulF src1 src2)); 10304 10305 format %{ "FMUL $dst,$src1,$src2" %} 10306 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10307 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10308 OpcP, RMopc_Mem(secondary,src1), 10309 Pop_Mem_FPR(dst), 10310 ClearInstMark ); 10311 ins_pipe( fpu_mem_mem_mem ); 10312 %} 10313 10314 // Spill to obtain 24-bit precision 10315 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10316 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10317 match(Set dst (MulF src con)); 10318 10319 format %{ "FLD $src\n\t" 10320 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10321 "FSTP_S $dst" %} 10322 ins_encode %{ 10323 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10324 __ fmul_s($constantaddress($con)); 10325 __ fstp_s(Address(rsp, $dst$$disp)); 10326 %} 10327 ins_pipe(fpu_mem_reg_con); 10328 %} 10329 // 10330 // This instruction does not round to 24-bits 10331 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10332 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10333 match(Set dst (MulF src con)); 10334 10335 format %{ "FLD $src\n\t" 10336 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10337 "FSTP $dst" %} 10338 ins_encode %{ 10339 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10340 __ fmul_s($constantaddress($con)); 10341 __ fstp_d($dst$$reg); 10342 %} 10343 ins_pipe(fpu_reg_reg_con); 10344 %} 10345 10346 10347 // 10348 // MACRO1 -- subsume unshared load into mulFPR 10349 // This instruction does not round to 24-bits 10350 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10351 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10352 match(Set dst (MulF (LoadF mem1) src)); 10353 10354 format %{ "FLD $mem1 ===MACRO1===\n\t" 10355 "FMUL ST,$src\n\t" 10356 "FSTP $dst" %} 10357 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10358 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1), 10359 OpcReg_FPR(src), 10360 Pop_Reg_FPR(dst), ClearInstMark ); 10361 ins_pipe( fpu_reg_reg_mem ); 10362 %} 10363 // 10364 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10365 // This instruction does not round to 24-bits 10366 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10367 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10368 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10369 ins_cost(95); 10370 10371 format %{ "FLD $mem1 ===MACRO2===\n\t" 10372 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10373 "FADD ST,$src2\n\t" 10374 "FSTP $dst" %} 10375 opcode(0xD9); /* LoadF D9 /0 */ 10376 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1), 10377 FMul_ST_reg(src1), 10378 FAdd_ST_reg(src2), 10379 Pop_Reg_FPR(dst), ClearInstMark ); 10380 ins_pipe( fpu_reg_mem_reg_reg ); 10381 %} 10382 10383 // MACRO3 -- addFPR a mulFPR 10384 // This instruction does not round to 24-bits. It is a '2-address' 10385 // instruction in that the result goes back to src2. This eliminates 10386 // a move from the macro; possibly the register allocator will have 10387 // to add it back (and maybe not). 10388 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10389 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10390 match(Set src2 (AddF (MulF src0 src1) src2)); 10391 10392 format %{ "FLD $src0 ===MACRO3===\n\t" 10393 "FMUL ST,$src1\n\t" 10394 "FADDP $src2,ST" %} 10395 opcode(0xD9); /* LoadF D9 /0 */ 10396 ins_encode( Push_Reg_FPR(src0), 10397 FMul_ST_reg(src1), 10398 FAddP_reg_ST(src2) ); 10399 ins_pipe( fpu_reg_reg_reg ); 10400 %} 10401 10402 // MACRO4 -- divFPR subFPR 10403 // This instruction does not round to 24-bits 10404 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10405 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10406 match(Set dst (DivF (SubF src2 src1) src3)); 10407 10408 format %{ "FLD $src2 ===MACRO4===\n\t" 10409 "FSUB ST,$src1\n\t" 10410 "FDIV ST,$src3\n\t" 10411 "FSTP $dst" %} 10412 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10413 ins_encode( Push_Reg_FPR(src2), 10414 subFPR_divFPR_encode(src1,src3), 10415 Pop_Reg_FPR(dst) ); 10416 ins_pipe( fpu_reg_reg_reg_reg ); 10417 %} 10418 10419 // Spill to obtain 24-bit precision 10420 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10421 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10422 match(Set dst (DivF src1 src2)); 10423 10424 format %{ "FDIV $dst,$src1,$src2" %} 10425 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10426 ins_encode( Push_Reg_FPR(src1), 10427 OpcReg_FPR(src2), 10428 Pop_Mem_FPR(dst) ); 10429 ins_pipe( fpu_mem_reg_reg ); 10430 %} 10431 // 10432 // This instruction does not round to 24-bits 10433 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10434 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10435 match(Set dst (DivF dst src)); 10436 10437 format %{ "FDIV $dst,$src" %} 10438 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10439 ins_encode( Push_Reg_FPR(src), 10440 OpcP, RegOpc(dst) ); 10441 ins_pipe( fpu_reg_reg ); 10442 %} 10443 10444 10445 // Spill to obtain 24-bit precision 10446 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10447 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10448 match(Set dst (ModF src1 src2)); 10449 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10450 10451 format %{ "FMOD $dst,$src1,$src2" %} 10452 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10453 emitModDPR(), 10454 Push_Result_Mod_DPR(src2), 10455 Pop_Mem_FPR(dst)); 10456 ins_pipe( pipe_slow ); 10457 %} 10458 // 10459 // This instruction does not round to 24-bits 10460 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10461 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10462 match(Set dst (ModF dst src)); 10463 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10464 10465 format %{ "FMOD $dst,$src" %} 10466 ins_encode(Push_Reg_Mod_DPR(dst, src), 10467 emitModDPR(), 10468 Push_Result_Mod_DPR(src), 10469 Pop_Reg_FPR(dst)); 10470 ins_pipe( pipe_slow ); 10471 %} 10472 10473 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10474 predicate(UseSSE>=1); 10475 match(Set dst (ModF src0 src1)); 10476 effect(KILL rax, KILL cr); 10477 format %{ "SUB ESP,4\t # FMOD\n" 10478 "\tMOVSS [ESP+0],$src1\n" 10479 "\tFLD_S [ESP+0]\n" 10480 "\tMOVSS [ESP+0],$src0\n" 10481 "\tFLD_S [ESP+0]\n" 10482 "loop:\tFPREM\n" 10483 "\tFWAIT\n" 10484 "\tFNSTSW AX\n" 10485 "\tSAHF\n" 10486 "\tJP loop\n" 10487 "\tFSTP_S [ESP+0]\n" 10488 "\tMOVSS $dst,[ESP+0]\n" 10489 "\tADD ESP,4\n" 10490 "\tFSTP ST0\t # Restore FPU Stack" 10491 %} 10492 ins_cost(250); 10493 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10494 ins_pipe( pipe_slow ); 10495 %} 10496 10497 10498 //----------Arithmetic Conversion Instructions--------------------------------- 10499 // The conversions operations are all Alpha sorted. Please keep it that way! 10500 10501 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10502 predicate(UseSSE==0); 10503 match(Set dst (RoundFloat src)); 10504 ins_cost(125); 10505 format %{ "FST_S $dst,$src\t# F-round" %} 10506 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10507 ins_pipe( fpu_mem_reg ); 10508 %} 10509 10510 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10511 predicate(UseSSE<=1); 10512 match(Set dst (RoundDouble src)); 10513 ins_cost(125); 10514 format %{ "FST_D $dst,$src\t# D-round" %} 10515 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10516 ins_pipe( fpu_mem_reg ); 10517 %} 10518 10519 // Force rounding to 24-bit precision and 6-bit exponent 10520 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10521 predicate(UseSSE==0); 10522 match(Set dst (ConvD2F src)); 10523 format %{ "FST_S $dst,$src\t# F-round" %} 10524 expand %{ 10525 roundFloat_mem_reg(dst,src); 10526 %} 10527 %} 10528 10529 // Force rounding to 24-bit precision and 6-bit exponent 10530 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10531 predicate(UseSSE==1); 10532 match(Set dst (ConvD2F src)); 10533 effect( KILL cr ); 10534 format %{ "SUB ESP,4\n\t" 10535 "FST_S [ESP],$src\t# F-round\n\t" 10536 "MOVSS $dst,[ESP]\n\t" 10537 "ADD ESP,4" %} 10538 ins_encode %{ 10539 __ subptr(rsp, 4); 10540 if ($src$$reg != FPR1L_enc) { 10541 __ fld_s($src$$reg-1); 10542 __ fstp_s(Address(rsp, 0)); 10543 } else { 10544 __ fst_s(Address(rsp, 0)); 10545 } 10546 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10547 __ addptr(rsp, 4); 10548 %} 10549 ins_pipe( pipe_slow ); 10550 %} 10551 10552 // Force rounding double precision to single precision 10553 instruct convD2F_reg(regF dst, regD src) %{ 10554 predicate(UseSSE>=2); 10555 match(Set dst (ConvD2F src)); 10556 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10557 ins_encode %{ 10558 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10559 %} 10560 ins_pipe( pipe_slow ); 10561 %} 10562 10563 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10564 predicate(UseSSE==0); 10565 match(Set dst (ConvF2D src)); 10566 format %{ "FST_S $dst,$src\t# D-round" %} 10567 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10568 ins_pipe( fpu_reg_reg ); 10569 %} 10570 10571 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10572 predicate(UseSSE==1); 10573 match(Set dst (ConvF2D src)); 10574 format %{ "FST_D $dst,$src\t# D-round" %} 10575 expand %{ 10576 roundDouble_mem_reg(dst,src); 10577 %} 10578 %} 10579 10580 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10581 predicate(UseSSE==1); 10582 match(Set dst (ConvF2D src)); 10583 effect( KILL cr ); 10584 format %{ "SUB ESP,4\n\t" 10585 "MOVSS [ESP] $src\n\t" 10586 "FLD_S [ESP]\n\t" 10587 "ADD ESP,4\n\t" 10588 "FSTP $dst\t# D-round" %} 10589 ins_encode %{ 10590 __ subptr(rsp, 4); 10591 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10592 __ fld_s(Address(rsp, 0)); 10593 __ addptr(rsp, 4); 10594 __ fstp_d($dst$$reg); 10595 %} 10596 ins_pipe( pipe_slow ); 10597 %} 10598 10599 instruct convF2D_reg(regD dst, regF src) %{ 10600 predicate(UseSSE>=2); 10601 match(Set dst (ConvF2D src)); 10602 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10603 ins_encode %{ 10604 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10605 %} 10606 ins_pipe( pipe_slow ); 10607 %} 10608 10609 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10610 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10611 predicate(UseSSE<=1); 10612 match(Set dst (ConvD2I src)); 10613 effect( KILL tmp, KILL cr ); 10614 format %{ "FLD $src\t# Convert double to int \n\t" 10615 "FLDCW trunc mode\n\t" 10616 "SUB ESP,4\n\t" 10617 "FISTp [ESP + #0]\n\t" 10618 "FLDCW std/24-bit mode\n\t" 10619 "POP EAX\n\t" 10620 "CMP EAX,0x80000000\n\t" 10621 "JNE,s fast\n\t" 10622 "FLD_D $src\n\t" 10623 "CALL d2i_wrapper\n" 10624 "fast:" %} 10625 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10626 ins_pipe( pipe_slow ); 10627 %} 10628 10629 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10630 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10631 predicate(UseSSE>=2); 10632 match(Set dst (ConvD2I src)); 10633 effect( KILL tmp, KILL cr ); 10634 format %{ "CVTTSD2SI $dst, $src\n\t" 10635 "CMP $dst,0x80000000\n\t" 10636 "JNE,s fast\n\t" 10637 "SUB ESP, 8\n\t" 10638 "MOVSD [ESP], $src\n\t" 10639 "FLD_D [ESP]\n\t" 10640 "ADD ESP, 8\n\t" 10641 "CALL d2i_wrapper\n" 10642 "fast:" %} 10643 ins_encode %{ 10644 Label fast; 10645 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10646 __ cmpl($dst$$Register, 0x80000000); 10647 __ jccb(Assembler::notEqual, fast); 10648 __ subptr(rsp, 8); 10649 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10650 __ fld_d(Address(rsp, 0)); 10651 __ addptr(rsp, 8); 10652 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10653 __ post_call_nop(); 10654 __ bind(fast); 10655 %} 10656 ins_pipe( pipe_slow ); 10657 %} 10658 10659 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10660 predicate(UseSSE<=1); 10661 match(Set dst (ConvD2L src)); 10662 effect( KILL cr ); 10663 format %{ "FLD $src\t# Convert double to long\n\t" 10664 "FLDCW trunc mode\n\t" 10665 "SUB ESP,8\n\t" 10666 "FISTp [ESP + #0]\n\t" 10667 "FLDCW std/24-bit mode\n\t" 10668 "POP EAX\n\t" 10669 "POP EDX\n\t" 10670 "CMP EDX,0x80000000\n\t" 10671 "JNE,s fast\n\t" 10672 "TEST EAX,EAX\n\t" 10673 "JNE,s fast\n\t" 10674 "FLD $src\n\t" 10675 "CALL d2l_wrapper\n" 10676 "fast:" %} 10677 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10678 ins_pipe( pipe_slow ); 10679 %} 10680 10681 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10682 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10683 predicate (UseSSE>=2); 10684 match(Set dst (ConvD2L src)); 10685 effect( KILL cr ); 10686 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10687 "MOVSD [ESP],$src\n\t" 10688 "FLD_D [ESP]\n\t" 10689 "FLDCW trunc mode\n\t" 10690 "FISTp [ESP + #0]\n\t" 10691 "FLDCW std/24-bit mode\n\t" 10692 "POP EAX\n\t" 10693 "POP EDX\n\t" 10694 "CMP EDX,0x80000000\n\t" 10695 "JNE,s fast\n\t" 10696 "TEST EAX,EAX\n\t" 10697 "JNE,s fast\n\t" 10698 "SUB ESP,8\n\t" 10699 "MOVSD [ESP],$src\n\t" 10700 "FLD_D [ESP]\n\t" 10701 "ADD ESP,8\n\t" 10702 "CALL d2l_wrapper\n" 10703 "fast:" %} 10704 ins_encode %{ 10705 Label fast; 10706 __ subptr(rsp, 8); 10707 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10708 __ fld_d(Address(rsp, 0)); 10709 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10710 __ fistp_d(Address(rsp, 0)); 10711 // Restore the rounding mode, mask the exception 10712 if (Compile::current()->in_24_bit_fp_mode()) { 10713 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10714 } else { 10715 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10716 } 10717 // Load the converted long, adjust CPU stack 10718 __ pop(rax); 10719 __ pop(rdx); 10720 __ cmpl(rdx, 0x80000000); 10721 __ jccb(Assembler::notEqual, fast); 10722 __ testl(rax, rax); 10723 __ jccb(Assembler::notEqual, fast); 10724 __ subptr(rsp, 8); 10725 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10726 __ fld_d(Address(rsp, 0)); 10727 __ addptr(rsp, 8); 10728 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10729 __ post_call_nop(); 10730 __ bind(fast); 10731 %} 10732 ins_pipe( pipe_slow ); 10733 %} 10734 10735 // Convert a double to an int. Java semantics require we do complex 10736 // manglations in the corner cases. So we set the rounding mode to 10737 // 'zero', store the darned double down as an int, and reset the 10738 // rounding mode to 'nearest'. The hardware stores a flag value down 10739 // if we would overflow or converted a NAN; we check for this and 10740 // and go the slow path if needed. 10741 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10742 predicate(UseSSE==0); 10743 match(Set dst (ConvF2I src)); 10744 effect( KILL tmp, KILL cr ); 10745 format %{ "FLD $src\t# Convert float to int \n\t" 10746 "FLDCW trunc mode\n\t" 10747 "SUB ESP,4\n\t" 10748 "FISTp [ESP + #0]\n\t" 10749 "FLDCW std/24-bit mode\n\t" 10750 "POP EAX\n\t" 10751 "CMP EAX,0x80000000\n\t" 10752 "JNE,s fast\n\t" 10753 "FLD $src\n\t" 10754 "CALL d2i_wrapper\n" 10755 "fast:" %} 10756 // DPR2I_encoding works for FPR2I 10757 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10758 ins_pipe( pipe_slow ); 10759 %} 10760 10761 // Convert a float in xmm to an int reg. 10762 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10763 predicate(UseSSE>=1); 10764 match(Set dst (ConvF2I src)); 10765 effect( KILL tmp, KILL cr ); 10766 format %{ "CVTTSS2SI $dst, $src\n\t" 10767 "CMP $dst,0x80000000\n\t" 10768 "JNE,s fast\n\t" 10769 "SUB ESP, 4\n\t" 10770 "MOVSS [ESP], $src\n\t" 10771 "FLD [ESP]\n\t" 10772 "ADD ESP, 4\n\t" 10773 "CALL d2i_wrapper\n" 10774 "fast:" %} 10775 ins_encode %{ 10776 Label fast; 10777 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10778 __ cmpl($dst$$Register, 0x80000000); 10779 __ jccb(Assembler::notEqual, fast); 10780 __ subptr(rsp, 4); 10781 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10782 __ fld_s(Address(rsp, 0)); 10783 __ addptr(rsp, 4); 10784 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10785 __ post_call_nop(); 10786 __ bind(fast); 10787 %} 10788 ins_pipe( pipe_slow ); 10789 %} 10790 10791 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10792 predicate(UseSSE==0); 10793 match(Set dst (ConvF2L src)); 10794 effect( KILL cr ); 10795 format %{ "FLD $src\t# Convert float to long\n\t" 10796 "FLDCW trunc mode\n\t" 10797 "SUB ESP,8\n\t" 10798 "FISTp [ESP + #0]\n\t" 10799 "FLDCW std/24-bit mode\n\t" 10800 "POP EAX\n\t" 10801 "POP EDX\n\t" 10802 "CMP EDX,0x80000000\n\t" 10803 "JNE,s fast\n\t" 10804 "TEST EAX,EAX\n\t" 10805 "JNE,s fast\n\t" 10806 "FLD $src\n\t" 10807 "CALL d2l_wrapper\n" 10808 "fast:" %} 10809 // DPR2L_encoding works for FPR2L 10810 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10811 ins_pipe( pipe_slow ); 10812 %} 10813 10814 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10815 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10816 predicate (UseSSE>=1); 10817 match(Set dst (ConvF2L src)); 10818 effect( KILL cr ); 10819 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10820 "MOVSS [ESP],$src\n\t" 10821 "FLD_S [ESP]\n\t" 10822 "FLDCW trunc mode\n\t" 10823 "FISTp [ESP + #0]\n\t" 10824 "FLDCW std/24-bit mode\n\t" 10825 "POP EAX\n\t" 10826 "POP EDX\n\t" 10827 "CMP EDX,0x80000000\n\t" 10828 "JNE,s fast\n\t" 10829 "TEST EAX,EAX\n\t" 10830 "JNE,s fast\n\t" 10831 "SUB ESP,4\t# Convert float to long\n\t" 10832 "MOVSS [ESP],$src\n\t" 10833 "FLD_S [ESP]\n\t" 10834 "ADD ESP,4\n\t" 10835 "CALL d2l_wrapper\n" 10836 "fast:" %} 10837 ins_encode %{ 10838 Label fast; 10839 __ subptr(rsp, 8); 10840 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10841 __ fld_s(Address(rsp, 0)); 10842 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10843 __ fistp_d(Address(rsp, 0)); 10844 // Restore the rounding mode, mask the exception 10845 if (Compile::current()->in_24_bit_fp_mode()) { 10846 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10847 } else { 10848 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10849 } 10850 // Load the converted long, adjust CPU stack 10851 __ pop(rax); 10852 __ pop(rdx); 10853 __ cmpl(rdx, 0x80000000); 10854 __ jccb(Assembler::notEqual, fast); 10855 __ testl(rax, rax); 10856 __ jccb(Assembler::notEqual, fast); 10857 __ subptr(rsp, 4); 10858 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10859 __ fld_s(Address(rsp, 0)); 10860 __ addptr(rsp, 4); 10861 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10862 __ post_call_nop(); 10863 __ bind(fast); 10864 %} 10865 ins_pipe( pipe_slow ); 10866 %} 10867 10868 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10869 predicate( UseSSE<=1 ); 10870 match(Set dst (ConvI2D src)); 10871 format %{ "FILD $src\n\t" 10872 "FSTP $dst" %} 10873 opcode(0xDB, 0x0); /* DB /0 */ 10874 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10875 ins_pipe( fpu_reg_mem ); 10876 %} 10877 10878 instruct convI2D_reg(regD dst, rRegI src) %{ 10879 predicate( UseSSE>=2 && !UseXmmI2D ); 10880 match(Set dst (ConvI2D src)); 10881 format %{ "CVTSI2SD $dst,$src" %} 10882 ins_encode %{ 10883 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10884 %} 10885 ins_pipe( pipe_slow ); 10886 %} 10887 10888 instruct convI2D_mem(regD dst, memory mem) %{ 10889 predicate( UseSSE>=2 ); 10890 match(Set dst (ConvI2D (LoadI mem))); 10891 format %{ "CVTSI2SD $dst,$mem" %} 10892 ins_encode %{ 10893 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10894 %} 10895 ins_pipe( pipe_slow ); 10896 %} 10897 10898 instruct convXI2D_reg(regD dst, rRegI src) 10899 %{ 10900 predicate( UseSSE>=2 && UseXmmI2D ); 10901 match(Set dst (ConvI2D src)); 10902 10903 format %{ "MOVD $dst,$src\n\t" 10904 "CVTDQ2PD $dst,$dst\t# i2d" %} 10905 ins_encode %{ 10906 __ movdl($dst$$XMMRegister, $src$$Register); 10907 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10908 %} 10909 ins_pipe(pipe_slow); // XXX 10910 %} 10911 10912 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10913 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10914 match(Set dst (ConvI2D (LoadI mem))); 10915 format %{ "FILD $mem\n\t" 10916 "FSTP $dst" %} 10917 opcode(0xDB); /* DB /0 */ 10918 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10919 Pop_Reg_DPR(dst), ClearInstMark); 10920 ins_pipe( fpu_reg_mem ); 10921 %} 10922 10923 // Convert a byte to a float; no rounding step needed. 10924 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10925 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10926 match(Set dst (ConvI2F src)); 10927 format %{ "FILD $src\n\t" 10928 "FSTP $dst" %} 10929 10930 opcode(0xDB, 0x0); /* DB /0 */ 10931 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10932 ins_pipe( fpu_reg_mem ); 10933 %} 10934 10935 // In 24-bit mode, force exponent rounding by storing back out 10936 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10937 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10938 match(Set dst (ConvI2F src)); 10939 ins_cost(200); 10940 format %{ "FILD $src\n\t" 10941 "FSTP_S $dst" %} 10942 opcode(0xDB, 0x0); /* DB /0 */ 10943 ins_encode( Push_Mem_I(src), 10944 Pop_Mem_FPR(dst)); 10945 ins_pipe( fpu_mem_mem ); 10946 %} 10947 10948 // In 24-bit mode, force exponent rounding by storing back out 10949 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10950 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10951 match(Set dst (ConvI2F (LoadI mem))); 10952 ins_cost(200); 10953 format %{ "FILD $mem\n\t" 10954 "FSTP_S $dst" %} 10955 opcode(0xDB); /* DB /0 */ 10956 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10957 Pop_Mem_FPR(dst), ClearInstMark); 10958 ins_pipe( fpu_mem_mem ); 10959 %} 10960 10961 // This instruction does not round to 24-bits 10962 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 10963 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10964 match(Set dst (ConvI2F src)); 10965 format %{ "FILD $src\n\t" 10966 "FSTP $dst" %} 10967 opcode(0xDB, 0x0); /* DB /0 */ 10968 ins_encode( Push_Mem_I(src), 10969 Pop_Reg_FPR(dst)); 10970 ins_pipe( fpu_reg_mem ); 10971 %} 10972 10973 // This instruction does not round to 24-bits 10974 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 10975 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10976 match(Set dst (ConvI2F (LoadI mem))); 10977 format %{ "FILD $mem\n\t" 10978 "FSTP $dst" %} 10979 opcode(0xDB); /* DB /0 */ 10980 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10981 Pop_Reg_FPR(dst), ClearInstMark); 10982 ins_pipe( fpu_reg_mem ); 10983 %} 10984 10985 // Convert an int to a float in xmm; no rounding step needed. 10986 instruct convI2F_reg(regF dst, rRegI src) %{ 10987 predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F )); 10988 match(Set dst (ConvI2F src)); 10989 format %{ "CVTSI2SS $dst, $src" %} 10990 ins_encode %{ 10991 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 10992 %} 10993 ins_pipe( pipe_slow ); 10994 %} 10995 10996 instruct convXI2F_reg(regF dst, rRegI src) 10997 %{ 10998 predicate( UseSSE>=2 && UseXmmI2F ); 10999 match(Set dst (ConvI2F src)); 11000 11001 format %{ "MOVD $dst,$src\n\t" 11002 "CVTDQ2PS $dst,$dst\t# i2f" %} 11003 ins_encode %{ 11004 __ movdl($dst$$XMMRegister, $src$$Register); 11005 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11006 %} 11007 ins_pipe(pipe_slow); // XXX 11008 %} 11009 11010 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11011 match(Set dst (ConvI2L src)); 11012 effect(KILL cr); 11013 ins_cost(375); 11014 format %{ "MOV $dst.lo,$src\n\t" 11015 "MOV $dst.hi,$src\n\t" 11016 "SAR $dst.hi,31" %} 11017 ins_encode(convert_int_long(dst,src)); 11018 ins_pipe( ialu_reg_reg_long ); 11019 %} 11020 11021 // Zero-extend convert int to long 11022 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11023 match(Set dst (AndL (ConvI2L src) mask) ); 11024 effect( KILL flags ); 11025 ins_cost(250); 11026 format %{ "MOV $dst.lo,$src\n\t" 11027 "XOR $dst.hi,$dst.hi" %} 11028 opcode(0x33); // XOR 11029 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11030 ins_pipe( ialu_reg_reg_long ); 11031 %} 11032 11033 // Zero-extend long 11034 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11035 match(Set dst (AndL src mask) ); 11036 effect( KILL flags ); 11037 ins_cost(250); 11038 format %{ "MOV $dst.lo,$src.lo\n\t" 11039 "XOR $dst.hi,$dst.hi\n\t" %} 11040 opcode(0x33); // XOR 11041 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11042 ins_pipe( ialu_reg_reg_long ); 11043 %} 11044 11045 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11046 predicate (UseSSE<=1); 11047 match(Set dst (ConvL2D src)); 11048 effect( KILL cr ); 11049 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11050 "PUSH $src.lo\n\t" 11051 "FILD ST,[ESP + #0]\n\t" 11052 "ADD ESP,8\n\t" 11053 "FSTP_D $dst\t# D-round" %} 11054 opcode(0xDF, 0x5); /* DF /5 */ 11055 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11056 ins_pipe( pipe_slow ); 11057 %} 11058 11059 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11060 predicate (UseSSE>=2); 11061 match(Set dst (ConvL2D src)); 11062 effect( KILL cr ); 11063 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11064 "PUSH $src.lo\n\t" 11065 "FILD_D [ESP]\n\t" 11066 "FSTP_D [ESP]\n\t" 11067 "MOVSD $dst,[ESP]\n\t" 11068 "ADD ESP,8" %} 11069 opcode(0xDF, 0x5); /* DF /5 */ 11070 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11071 ins_pipe( pipe_slow ); 11072 %} 11073 11074 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11075 predicate (UseSSE>=1); 11076 match(Set dst (ConvL2F src)); 11077 effect( KILL cr ); 11078 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11079 "PUSH $src.lo\n\t" 11080 "FILD_D [ESP]\n\t" 11081 "FSTP_S [ESP]\n\t" 11082 "MOVSS $dst,[ESP]\n\t" 11083 "ADD ESP,8" %} 11084 opcode(0xDF, 0x5); /* DF /5 */ 11085 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11086 ins_pipe( pipe_slow ); 11087 %} 11088 11089 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11090 match(Set dst (ConvL2F src)); 11091 effect( KILL cr ); 11092 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11093 "PUSH $src.lo\n\t" 11094 "FILD ST,[ESP + #0]\n\t" 11095 "ADD ESP,8\n\t" 11096 "FSTP_S $dst\t# F-round" %} 11097 opcode(0xDF, 0x5); /* DF /5 */ 11098 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11099 ins_pipe( pipe_slow ); 11100 %} 11101 11102 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11103 match(Set dst (ConvL2I src)); 11104 effect( DEF dst, USE src ); 11105 format %{ "MOV $dst,$src.lo" %} 11106 ins_encode(enc_CopyL_Lo(dst,src)); 11107 ins_pipe( ialu_reg_reg ); 11108 %} 11109 11110 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11111 match(Set dst (MoveF2I src)); 11112 effect( DEF dst, USE src ); 11113 ins_cost(100); 11114 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11115 ins_encode %{ 11116 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11117 %} 11118 ins_pipe( ialu_reg_mem ); 11119 %} 11120 11121 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11122 predicate(UseSSE==0); 11123 match(Set dst (MoveF2I src)); 11124 effect( DEF dst, USE src ); 11125 11126 ins_cost(125); 11127 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11128 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11129 ins_pipe( fpu_mem_reg ); 11130 %} 11131 11132 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11133 predicate(UseSSE>=1); 11134 match(Set dst (MoveF2I src)); 11135 effect( DEF dst, USE src ); 11136 11137 ins_cost(95); 11138 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11139 ins_encode %{ 11140 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11141 %} 11142 ins_pipe( pipe_slow ); 11143 %} 11144 11145 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11146 predicate(UseSSE>=2); 11147 match(Set dst (MoveF2I src)); 11148 effect( DEF dst, USE src ); 11149 ins_cost(85); 11150 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11151 ins_encode %{ 11152 __ movdl($dst$$Register, $src$$XMMRegister); 11153 %} 11154 ins_pipe( pipe_slow ); 11155 %} 11156 11157 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11158 match(Set dst (MoveI2F src)); 11159 effect( DEF dst, USE src ); 11160 11161 ins_cost(100); 11162 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11163 ins_encode %{ 11164 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11165 %} 11166 ins_pipe( ialu_mem_reg ); 11167 %} 11168 11169 11170 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11171 predicate(UseSSE==0); 11172 match(Set dst (MoveI2F src)); 11173 effect(DEF dst, USE src); 11174 11175 ins_cost(125); 11176 format %{ "FLD_S $src\n\t" 11177 "FSTP $dst\t# MoveI2F_stack_reg" %} 11178 opcode(0xD9); /* D9 /0, FLD m32real */ 11179 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 11180 Pop_Reg_FPR(dst), ClearInstMark ); 11181 ins_pipe( fpu_reg_mem ); 11182 %} 11183 11184 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11185 predicate(UseSSE>=1); 11186 match(Set dst (MoveI2F src)); 11187 effect( DEF dst, USE src ); 11188 11189 ins_cost(95); 11190 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11191 ins_encode %{ 11192 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11193 %} 11194 ins_pipe( pipe_slow ); 11195 %} 11196 11197 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11198 predicate(UseSSE>=2); 11199 match(Set dst (MoveI2F src)); 11200 effect( DEF dst, USE src ); 11201 11202 ins_cost(85); 11203 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11204 ins_encode %{ 11205 __ movdl($dst$$XMMRegister, $src$$Register); 11206 %} 11207 ins_pipe( pipe_slow ); 11208 %} 11209 11210 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11211 match(Set dst (MoveD2L src)); 11212 effect(DEF dst, USE src); 11213 11214 ins_cost(250); 11215 format %{ "MOV $dst.lo,$src\n\t" 11216 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11217 opcode(0x8B, 0x8B); 11218 ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark); 11219 ins_pipe( ialu_mem_long_reg ); 11220 %} 11221 11222 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11223 predicate(UseSSE<=1); 11224 match(Set dst (MoveD2L src)); 11225 effect(DEF dst, USE src); 11226 11227 ins_cost(125); 11228 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11229 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11230 ins_pipe( fpu_mem_reg ); 11231 %} 11232 11233 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11234 predicate(UseSSE>=2); 11235 match(Set dst (MoveD2L src)); 11236 effect(DEF dst, USE src); 11237 ins_cost(95); 11238 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11239 ins_encode %{ 11240 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11241 %} 11242 ins_pipe( pipe_slow ); 11243 %} 11244 11245 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11246 predicate(UseSSE>=2); 11247 match(Set dst (MoveD2L src)); 11248 effect(DEF dst, USE src, TEMP tmp); 11249 ins_cost(85); 11250 format %{ "MOVD $dst.lo,$src\n\t" 11251 "PSHUFLW $tmp,$src,0x4E\n\t" 11252 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11253 ins_encode %{ 11254 __ movdl($dst$$Register, $src$$XMMRegister); 11255 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11256 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11257 %} 11258 ins_pipe( pipe_slow ); 11259 %} 11260 11261 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11262 match(Set dst (MoveL2D src)); 11263 effect(DEF dst, USE src); 11264 11265 ins_cost(200); 11266 format %{ "MOV $dst,$src.lo\n\t" 11267 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11268 opcode(0x89, 0x89); 11269 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); 11270 ins_pipe( ialu_mem_long_reg ); 11271 %} 11272 11273 11274 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11275 predicate(UseSSE<=1); 11276 match(Set dst (MoveL2D src)); 11277 effect(DEF dst, USE src); 11278 ins_cost(125); 11279 11280 format %{ "FLD_D $src\n\t" 11281 "FSTP $dst\t# MoveL2D_stack_reg" %} 11282 opcode(0xDD); /* DD /0, FLD m64real */ 11283 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 11284 Pop_Reg_DPR(dst), ClearInstMark ); 11285 ins_pipe( fpu_reg_mem ); 11286 %} 11287 11288 11289 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11290 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11291 match(Set dst (MoveL2D src)); 11292 effect(DEF dst, USE src); 11293 11294 ins_cost(95); 11295 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11296 ins_encode %{ 11297 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11298 %} 11299 ins_pipe( pipe_slow ); 11300 %} 11301 11302 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11303 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11304 match(Set dst (MoveL2D src)); 11305 effect(DEF dst, USE src); 11306 11307 ins_cost(95); 11308 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11309 ins_encode %{ 11310 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11311 %} 11312 ins_pipe( pipe_slow ); 11313 %} 11314 11315 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11316 predicate(UseSSE>=2); 11317 match(Set dst (MoveL2D src)); 11318 effect(TEMP dst, USE src, TEMP tmp); 11319 ins_cost(85); 11320 format %{ "MOVD $dst,$src.lo\n\t" 11321 "MOVD $tmp,$src.hi\n\t" 11322 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11323 ins_encode %{ 11324 __ movdl($dst$$XMMRegister, $src$$Register); 11325 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11326 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11327 %} 11328 ins_pipe( pipe_slow ); 11329 %} 11330 11331 //----------------------------- CompressBits/ExpandBits ------------------------ 11332 11333 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11334 predicate(n->bottom_type()->isa_long()); 11335 match(Set dst (CompressBits src mask)); 11336 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11337 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11338 ins_encode %{ 11339 Label exit, partail_result; 11340 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11341 // Merge the results of upper and lower destination registers such that upper destination 11342 // results are contiguously laid out after the lower destination result. 11343 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11344 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11345 __ popcntl($rtmp$$Register, $mask$$Register); 11346 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11347 __ cmpl($rtmp$$Register, 32); 11348 __ jccb(Assembler::equal, exit); 11349 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11350 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11351 // Shift left the contents of upper destination register by true bit count of lower mask register 11352 // and merge with lower destination register. 11353 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11354 __ orl($dst$$Register, $rtmp$$Register); 11355 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11356 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11357 // since contents of upper destination have already been copied to lower destination 11358 // register. 11359 __ cmpl($rtmp$$Register, 0); 11360 __ jccb(Assembler::greater, partail_result); 11361 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11362 __ jmp(exit); 11363 __ bind(partail_result); 11364 // Perform right shift over upper destination register to move out bits already copied 11365 // to lower destination register. 11366 __ subl($rtmp$$Register, 32); 11367 __ negl($rtmp$$Register); 11368 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11369 __ bind(exit); 11370 %} 11371 ins_pipe( pipe_slow ); 11372 %} 11373 11374 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11375 predicate(n->bottom_type()->isa_long()); 11376 match(Set dst (ExpandBits src mask)); 11377 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11378 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11379 ins_encode %{ 11380 // Extraction operation sequentially reads the bits from source register starting from LSB 11381 // and lays them out into destination register at bit locations corresponding to true bits 11382 // in mask register. Thus number of source bits read are equal to combined true bit count 11383 // of mask register pair. 11384 Label exit, mask_clipping; 11385 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11386 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11387 __ popcntl($rtmp$$Register, $mask$$Register); 11388 // If true bit count of lower mask register is 32 then none of bit of lower source register 11389 // will feed to upper destination register. 11390 __ cmpl($rtmp$$Register, 32); 11391 __ jccb(Assembler::equal, exit); 11392 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11393 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11394 // Shift right the contents of lower source register to remove already consumed bits. 11395 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11396 // Extract the bits from lower source register starting from LSB under the influence 11397 // of upper mask register. 11398 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11399 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11400 __ subl($rtmp$$Register, 32); 11401 __ negl($rtmp$$Register); 11402 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11403 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11404 // Clear the set bits in upper mask register which have been used to extract the contents 11405 // from lower source register. 11406 __ bind(mask_clipping); 11407 __ blsrl($mask$$Register, $mask$$Register); 11408 __ decrementl($rtmp$$Register, 1); 11409 __ jccb(Assembler::greater, mask_clipping); 11410 // Starting from LSB extract the bits from upper source register under the influence of 11411 // remaining set bits in upper mask register. 11412 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11413 // Merge the partial results extracted from lower and upper source register bits. 11414 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11415 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11416 __ bind(exit); 11417 %} 11418 ins_pipe( pipe_slow ); 11419 %} 11420 11421 // ======================================================================= 11422 // Fast clearing of an array 11423 // Small non-constant length ClearArray for non-AVX512 targets. 11424 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11425 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11426 match(Set dummy (ClearArray cnt base)); 11427 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11428 11429 format %{ $$template 11430 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11431 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11432 $$emit$$"JG LARGE\n\t" 11433 $$emit$$"SHL ECX, 1\n\t" 11434 $$emit$$"DEC ECX\n\t" 11435 $$emit$$"JS DONE\t# Zero length\n\t" 11436 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11437 $$emit$$"DEC ECX\n\t" 11438 $$emit$$"JGE LOOP\n\t" 11439 $$emit$$"JMP DONE\n\t" 11440 $$emit$$"# LARGE:\n\t" 11441 if (UseFastStosb) { 11442 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11443 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11444 } else if (UseXMMForObjInit) { 11445 $$emit$$"MOV RDI,RAX\n\t" 11446 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11447 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11448 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11449 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11450 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11451 $$emit$$"ADD 0x40,RAX\n\t" 11452 $$emit$$"# L_zero_64_bytes:\n\t" 11453 $$emit$$"SUB 0x8,RCX\n\t" 11454 $$emit$$"JGE L_loop\n\t" 11455 $$emit$$"ADD 0x4,RCX\n\t" 11456 $$emit$$"JL L_tail\n\t" 11457 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11458 $$emit$$"ADD 0x20,RAX\n\t" 11459 $$emit$$"SUB 0x4,RCX\n\t" 11460 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11461 $$emit$$"ADD 0x4,RCX\n\t" 11462 $$emit$$"JLE L_end\n\t" 11463 $$emit$$"DEC RCX\n\t" 11464 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11465 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11466 $$emit$$"ADD 0x8,RAX\n\t" 11467 $$emit$$"DEC RCX\n\t" 11468 $$emit$$"JGE L_sloop\n\t" 11469 $$emit$$"# L_end:\n\t" 11470 } else { 11471 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11472 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11473 } 11474 $$emit$$"# DONE" 11475 %} 11476 ins_encode %{ 11477 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11478 $tmp$$XMMRegister, false, knoreg); 11479 %} 11480 ins_pipe( pipe_slow ); 11481 %} 11482 11483 // Small non-constant length ClearArray for AVX512 targets. 11484 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11485 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11486 match(Set dummy (ClearArray cnt base)); 11487 ins_cost(125); 11488 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11489 11490 format %{ $$template 11491 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11492 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11493 $$emit$$"JG LARGE\n\t" 11494 $$emit$$"SHL ECX, 1\n\t" 11495 $$emit$$"DEC ECX\n\t" 11496 $$emit$$"JS DONE\t# Zero length\n\t" 11497 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11498 $$emit$$"DEC ECX\n\t" 11499 $$emit$$"JGE LOOP\n\t" 11500 $$emit$$"JMP DONE\n\t" 11501 $$emit$$"# LARGE:\n\t" 11502 if (UseFastStosb) { 11503 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11504 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11505 } else if (UseXMMForObjInit) { 11506 $$emit$$"MOV RDI,RAX\n\t" 11507 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11508 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11509 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11510 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11511 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11512 $$emit$$"ADD 0x40,RAX\n\t" 11513 $$emit$$"# L_zero_64_bytes:\n\t" 11514 $$emit$$"SUB 0x8,RCX\n\t" 11515 $$emit$$"JGE L_loop\n\t" 11516 $$emit$$"ADD 0x4,RCX\n\t" 11517 $$emit$$"JL L_tail\n\t" 11518 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11519 $$emit$$"ADD 0x20,RAX\n\t" 11520 $$emit$$"SUB 0x4,RCX\n\t" 11521 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11522 $$emit$$"ADD 0x4,RCX\n\t" 11523 $$emit$$"JLE L_end\n\t" 11524 $$emit$$"DEC RCX\n\t" 11525 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11526 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11527 $$emit$$"ADD 0x8,RAX\n\t" 11528 $$emit$$"DEC RCX\n\t" 11529 $$emit$$"JGE L_sloop\n\t" 11530 $$emit$$"# L_end:\n\t" 11531 } else { 11532 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11533 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11534 } 11535 $$emit$$"# DONE" 11536 %} 11537 ins_encode %{ 11538 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11539 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11540 %} 11541 ins_pipe( pipe_slow ); 11542 %} 11543 11544 // Large non-constant length ClearArray for non-AVX512 targets. 11545 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11546 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11547 match(Set dummy (ClearArray cnt base)); 11548 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11549 format %{ $$template 11550 if (UseFastStosb) { 11551 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11552 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11553 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11554 } else if (UseXMMForObjInit) { 11555 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11556 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11557 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11558 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11559 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11560 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11561 $$emit$$"ADD 0x40,RAX\n\t" 11562 $$emit$$"# L_zero_64_bytes:\n\t" 11563 $$emit$$"SUB 0x8,RCX\n\t" 11564 $$emit$$"JGE L_loop\n\t" 11565 $$emit$$"ADD 0x4,RCX\n\t" 11566 $$emit$$"JL L_tail\n\t" 11567 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11568 $$emit$$"ADD 0x20,RAX\n\t" 11569 $$emit$$"SUB 0x4,RCX\n\t" 11570 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11571 $$emit$$"ADD 0x4,RCX\n\t" 11572 $$emit$$"JLE L_end\n\t" 11573 $$emit$$"DEC RCX\n\t" 11574 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11575 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11576 $$emit$$"ADD 0x8,RAX\n\t" 11577 $$emit$$"DEC RCX\n\t" 11578 $$emit$$"JGE L_sloop\n\t" 11579 $$emit$$"# L_end:\n\t" 11580 } else { 11581 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11582 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11583 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11584 } 11585 $$emit$$"# DONE" 11586 %} 11587 ins_encode %{ 11588 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11589 $tmp$$XMMRegister, true, knoreg); 11590 %} 11591 ins_pipe( pipe_slow ); 11592 %} 11593 11594 // Large non-constant length ClearArray for AVX512 targets. 11595 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11596 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11597 match(Set dummy (ClearArray cnt base)); 11598 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11599 format %{ $$template 11600 if (UseFastStosb) { 11601 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11602 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11603 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11604 } else if (UseXMMForObjInit) { 11605 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11606 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11607 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11608 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11609 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11610 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11611 $$emit$$"ADD 0x40,RAX\n\t" 11612 $$emit$$"# L_zero_64_bytes:\n\t" 11613 $$emit$$"SUB 0x8,RCX\n\t" 11614 $$emit$$"JGE L_loop\n\t" 11615 $$emit$$"ADD 0x4,RCX\n\t" 11616 $$emit$$"JL L_tail\n\t" 11617 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11618 $$emit$$"ADD 0x20,RAX\n\t" 11619 $$emit$$"SUB 0x4,RCX\n\t" 11620 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11621 $$emit$$"ADD 0x4,RCX\n\t" 11622 $$emit$$"JLE L_end\n\t" 11623 $$emit$$"DEC RCX\n\t" 11624 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11625 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11626 $$emit$$"ADD 0x8,RAX\n\t" 11627 $$emit$$"DEC RCX\n\t" 11628 $$emit$$"JGE L_sloop\n\t" 11629 $$emit$$"# L_end:\n\t" 11630 } else { 11631 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11632 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11633 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11634 } 11635 $$emit$$"# DONE" 11636 %} 11637 ins_encode %{ 11638 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11639 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11640 %} 11641 ins_pipe( pipe_slow ); 11642 %} 11643 11644 // Small constant length ClearArray for AVX512 targets. 11645 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11646 %{ 11647 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl()); 11648 match(Set dummy (ClearArray cnt base)); 11649 ins_cost(100); 11650 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11651 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11652 ins_encode %{ 11653 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11654 %} 11655 ins_pipe(pipe_slow); 11656 %} 11657 11658 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11659 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11660 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11661 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11662 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11663 11664 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11665 ins_encode %{ 11666 __ string_compare($str1$$Register, $str2$$Register, 11667 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11668 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11669 %} 11670 ins_pipe( pipe_slow ); 11671 %} 11672 11673 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11674 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11675 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11676 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11677 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11678 11679 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11680 ins_encode %{ 11681 __ string_compare($str1$$Register, $str2$$Register, 11682 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11683 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11684 %} 11685 ins_pipe( pipe_slow ); 11686 %} 11687 11688 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11689 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11690 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11691 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11692 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11693 11694 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11695 ins_encode %{ 11696 __ string_compare($str1$$Register, $str2$$Register, 11697 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11698 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11699 %} 11700 ins_pipe( pipe_slow ); 11701 %} 11702 11703 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11704 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11705 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11706 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11707 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11708 11709 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11710 ins_encode %{ 11711 __ string_compare($str1$$Register, $str2$$Register, 11712 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11713 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11714 %} 11715 ins_pipe( pipe_slow ); 11716 %} 11717 11718 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11719 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11720 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11721 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11722 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11723 11724 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11725 ins_encode %{ 11726 __ string_compare($str1$$Register, $str2$$Register, 11727 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11728 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11729 %} 11730 ins_pipe( pipe_slow ); 11731 %} 11732 11733 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11734 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11735 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11736 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11737 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11738 11739 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11740 ins_encode %{ 11741 __ string_compare($str1$$Register, $str2$$Register, 11742 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11743 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11744 %} 11745 ins_pipe( pipe_slow ); 11746 %} 11747 11748 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11749 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11750 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11751 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11752 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11753 11754 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11755 ins_encode %{ 11756 __ string_compare($str2$$Register, $str1$$Register, 11757 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11758 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11759 %} 11760 ins_pipe( pipe_slow ); 11761 %} 11762 11763 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11764 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11765 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11766 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11767 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11768 11769 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11770 ins_encode %{ 11771 __ string_compare($str2$$Register, $str1$$Register, 11772 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11773 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11774 %} 11775 ins_pipe( pipe_slow ); 11776 %} 11777 11778 // fast string equals 11779 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11780 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11781 predicate(!VM_Version::supports_avx512vlbw()); 11782 match(Set result (StrEquals (Binary str1 str2) cnt)); 11783 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11784 11785 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11786 ins_encode %{ 11787 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11788 $cnt$$Register, $result$$Register, $tmp3$$Register, 11789 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11790 %} 11791 11792 ins_pipe( pipe_slow ); 11793 %} 11794 11795 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11796 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11797 predicate(VM_Version::supports_avx512vlbw()); 11798 match(Set result (StrEquals (Binary str1 str2) cnt)); 11799 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11800 11801 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11802 ins_encode %{ 11803 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11804 $cnt$$Register, $result$$Register, $tmp3$$Register, 11805 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11806 %} 11807 11808 ins_pipe( pipe_slow ); 11809 %} 11810 11811 11812 // fast search of substring with known size. 11813 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11814 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11815 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11816 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11817 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11818 11819 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11820 ins_encode %{ 11821 int icnt2 = (int)$int_cnt2$$constant; 11822 if (icnt2 >= 16) { 11823 // IndexOf for constant substrings with size >= 16 elements 11824 // which don't need to be loaded through stack. 11825 __ string_indexofC8($str1$$Register, $str2$$Register, 11826 $cnt1$$Register, $cnt2$$Register, 11827 icnt2, $result$$Register, 11828 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11829 } else { 11830 // Small strings are loaded through stack if they cross page boundary. 11831 __ string_indexof($str1$$Register, $str2$$Register, 11832 $cnt1$$Register, $cnt2$$Register, 11833 icnt2, $result$$Register, 11834 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11835 } 11836 %} 11837 ins_pipe( pipe_slow ); 11838 %} 11839 11840 // fast search of substring with known size. 11841 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11842 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11843 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11844 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11845 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11846 11847 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11848 ins_encode %{ 11849 int icnt2 = (int)$int_cnt2$$constant; 11850 if (icnt2 >= 8) { 11851 // IndexOf for constant substrings with size >= 8 elements 11852 // which don't need to be loaded through stack. 11853 __ string_indexofC8($str1$$Register, $str2$$Register, 11854 $cnt1$$Register, $cnt2$$Register, 11855 icnt2, $result$$Register, 11856 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11857 } else { 11858 // Small strings are loaded through stack if they cross page boundary. 11859 __ string_indexof($str1$$Register, $str2$$Register, 11860 $cnt1$$Register, $cnt2$$Register, 11861 icnt2, $result$$Register, 11862 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11863 } 11864 %} 11865 ins_pipe( pipe_slow ); 11866 %} 11867 11868 // fast search of substring with known size. 11869 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11870 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11871 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11872 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11873 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11874 11875 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11876 ins_encode %{ 11877 int icnt2 = (int)$int_cnt2$$constant; 11878 if (icnt2 >= 8) { 11879 // IndexOf for constant substrings with size >= 8 elements 11880 // which don't need to be loaded through stack. 11881 __ string_indexofC8($str1$$Register, $str2$$Register, 11882 $cnt1$$Register, $cnt2$$Register, 11883 icnt2, $result$$Register, 11884 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11885 } else { 11886 // Small strings are loaded through stack if they cross page boundary. 11887 __ string_indexof($str1$$Register, $str2$$Register, 11888 $cnt1$$Register, $cnt2$$Register, 11889 icnt2, $result$$Register, 11890 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11891 } 11892 %} 11893 ins_pipe( pipe_slow ); 11894 %} 11895 11896 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11897 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11898 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11899 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11900 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11901 11902 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11903 ins_encode %{ 11904 __ string_indexof($str1$$Register, $str2$$Register, 11905 $cnt1$$Register, $cnt2$$Register, 11906 (-1), $result$$Register, 11907 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11908 %} 11909 ins_pipe( pipe_slow ); 11910 %} 11911 11912 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11913 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11914 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11915 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11916 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11917 11918 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11919 ins_encode %{ 11920 __ string_indexof($str1$$Register, $str2$$Register, 11921 $cnt1$$Register, $cnt2$$Register, 11922 (-1), $result$$Register, 11923 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11924 %} 11925 ins_pipe( pipe_slow ); 11926 %} 11927 11928 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11929 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11930 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11931 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11932 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11933 11934 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11935 ins_encode %{ 11936 __ string_indexof($str1$$Register, $str2$$Register, 11937 $cnt1$$Register, $cnt2$$Register, 11938 (-1), $result$$Register, 11939 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11940 %} 11941 ins_pipe( pipe_slow ); 11942 %} 11943 11944 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11945 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11946 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 11947 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11948 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11949 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11950 ins_encode %{ 11951 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11952 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11953 %} 11954 ins_pipe( pipe_slow ); 11955 %} 11956 11957 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11958 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11959 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 11960 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11961 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11962 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11963 ins_encode %{ 11964 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11965 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11966 %} 11967 ins_pipe( pipe_slow ); 11968 %} 11969 11970 11971 // fast array equals 11972 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11973 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11974 %{ 11975 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11976 match(Set result (AryEq ary1 ary2)); 11977 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11978 //ins_cost(300); 11979 11980 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11981 ins_encode %{ 11982 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11983 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11984 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11985 %} 11986 ins_pipe( pipe_slow ); 11987 %} 11988 11989 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11990 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11991 %{ 11992 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11993 match(Set result (AryEq ary1 ary2)); 11994 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11995 //ins_cost(300); 11996 11997 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11998 ins_encode %{ 11999 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12000 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12001 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12002 %} 12003 ins_pipe( pipe_slow ); 12004 %} 12005 12006 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12007 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12008 %{ 12009 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12010 match(Set result (AryEq ary1 ary2)); 12011 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12012 //ins_cost(300); 12013 12014 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12015 ins_encode %{ 12016 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12017 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12018 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12019 %} 12020 ins_pipe( pipe_slow ); 12021 %} 12022 12023 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12024 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12025 %{ 12026 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12027 match(Set result (AryEq ary1 ary2)); 12028 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12029 //ins_cost(300); 12030 12031 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12032 ins_encode %{ 12033 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12034 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12035 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12036 %} 12037 ins_pipe( pipe_slow ); 12038 %} 12039 12040 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12041 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12042 %{ 12043 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12044 match(Set result (CountPositives ary1 len)); 12045 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12046 12047 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12048 ins_encode %{ 12049 __ count_positives($ary1$$Register, $len$$Register, 12050 $result$$Register, $tmp3$$Register, 12051 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12052 %} 12053 ins_pipe( pipe_slow ); 12054 %} 12055 12056 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12057 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12058 %{ 12059 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12060 match(Set result (CountPositives ary1 len)); 12061 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12062 12063 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12064 ins_encode %{ 12065 __ count_positives($ary1$$Register, $len$$Register, 12066 $result$$Register, $tmp3$$Register, 12067 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12068 %} 12069 ins_pipe( pipe_slow ); 12070 %} 12071 12072 12073 // fast char[] to byte[] compression 12074 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12075 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12076 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12077 match(Set result (StrCompressedCopy src (Binary dst len))); 12078 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12079 12080 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12081 ins_encode %{ 12082 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12083 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12084 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12085 knoreg, knoreg); 12086 %} 12087 ins_pipe( pipe_slow ); 12088 %} 12089 12090 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12091 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12092 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12093 match(Set result (StrCompressedCopy src (Binary dst len))); 12094 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12095 12096 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12097 ins_encode %{ 12098 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12099 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12100 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12101 $ktmp1$$KRegister, $ktmp2$$KRegister); 12102 %} 12103 ins_pipe( pipe_slow ); 12104 %} 12105 12106 // fast byte[] to char[] inflation 12107 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12108 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12109 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12110 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12111 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12112 12113 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12114 ins_encode %{ 12115 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12116 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12117 %} 12118 ins_pipe( pipe_slow ); 12119 %} 12120 12121 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12122 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12123 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12124 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12125 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12126 12127 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12128 ins_encode %{ 12129 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12130 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12131 %} 12132 ins_pipe( pipe_slow ); 12133 %} 12134 12135 // encode char[] to byte[] in ISO_8859_1 12136 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12137 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12138 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12139 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12140 match(Set result (EncodeISOArray src (Binary dst len))); 12141 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12142 12143 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12144 ins_encode %{ 12145 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12146 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12147 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12148 %} 12149 ins_pipe( pipe_slow ); 12150 %} 12151 12152 // encode char[] to byte[] in ASCII 12153 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12154 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12155 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12156 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12157 match(Set result (EncodeISOArray src (Binary dst len))); 12158 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12159 12160 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12161 ins_encode %{ 12162 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12163 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12164 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12165 %} 12166 ins_pipe( pipe_slow ); 12167 %} 12168 12169 //----------Control Flow Instructions------------------------------------------ 12170 // Signed compare Instructions 12171 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12172 match(Set cr (CmpI op1 op2)); 12173 effect( DEF cr, USE op1, USE op2 ); 12174 format %{ "CMP $op1,$op2" %} 12175 opcode(0x3B); /* Opcode 3B /r */ 12176 ins_encode( OpcP, RegReg( op1, op2) ); 12177 ins_pipe( ialu_cr_reg_reg ); 12178 %} 12179 12180 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12181 match(Set cr (CmpI op1 op2)); 12182 effect( DEF cr, USE op1 ); 12183 format %{ "CMP $op1,$op2" %} 12184 opcode(0x81,0x07); /* Opcode 81 /7 */ 12185 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12186 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12187 ins_pipe( ialu_cr_reg_imm ); 12188 %} 12189 12190 // Cisc-spilled version of cmpI_eReg 12191 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12192 match(Set cr (CmpI op1 (LoadI op2))); 12193 12194 format %{ "CMP $op1,$op2" %} 12195 ins_cost(500); 12196 opcode(0x3B); /* Opcode 3B /r */ 12197 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12198 ins_pipe( ialu_cr_reg_mem ); 12199 %} 12200 12201 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12202 match(Set cr (CmpI src zero)); 12203 effect( DEF cr, USE src ); 12204 12205 format %{ "TEST $src,$src" %} 12206 opcode(0x85); 12207 ins_encode( OpcP, RegReg( src, src ) ); 12208 ins_pipe( ialu_cr_reg_imm ); 12209 %} 12210 12211 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12212 match(Set cr (CmpI (AndI src con) zero)); 12213 12214 format %{ "TEST $src,$con" %} 12215 opcode(0xF7,0x00); 12216 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12217 ins_pipe( ialu_cr_reg_imm ); 12218 %} 12219 12220 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12221 match(Set cr (CmpI (AndI src mem) zero)); 12222 12223 format %{ "TEST $src,$mem" %} 12224 opcode(0x85); 12225 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 12226 ins_pipe( ialu_cr_reg_mem ); 12227 %} 12228 12229 // Unsigned compare Instructions; really, same as signed except they 12230 // produce an eFlagsRegU instead of eFlagsReg. 12231 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12232 match(Set cr (CmpU op1 op2)); 12233 12234 format %{ "CMPu $op1,$op2" %} 12235 opcode(0x3B); /* Opcode 3B /r */ 12236 ins_encode( OpcP, RegReg( op1, op2) ); 12237 ins_pipe( ialu_cr_reg_reg ); 12238 %} 12239 12240 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12241 match(Set cr (CmpU op1 op2)); 12242 12243 format %{ "CMPu $op1,$op2" %} 12244 opcode(0x81,0x07); /* Opcode 81 /7 */ 12245 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12246 ins_pipe( ialu_cr_reg_imm ); 12247 %} 12248 12249 // // Cisc-spilled version of cmpU_eReg 12250 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12251 match(Set cr (CmpU op1 (LoadI op2))); 12252 12253 format %{ "CMPu $op1,$op2" %} 12254 ins_cost(500); 12255 opcode(0x3B); /* Opcode 3B /r */ 12256 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12257 ins_pipe( ialu_cr_reg_mem ); 12258 %} 12259 12260 // // Cisc-spilled version of cmpU_eReg 12261 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12262 // match(Set cr (CmpU (LoadI op1) op2)); 12263 // 12264 // format %{ "CMPu $op1,$op2" %} 12265 // ins_cost(500); 12266 // opcode(0x39); /* Opcode 39 /r */ 12267 // ins_encode( OpcP, RegMem( op1, op2) ); 12268 //%} 12269 12270 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12271 match(Set cr (CmpU src zero)); 12272 12273 format %{ "TESTu $src,$src" %} 12274 opcode(0x85); 12275 ins_encode( OpcP, RegReg( src, src ) ); 12276 ins_pipe( ialu_cr_reg_imm ); 12277 %} 12278 12279 // Unsigned pointer compare Instructions 12280 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12281 match(Set cr (CmpP op1 op2)); 12282 12283 format %{ "CMPu $op1,$op2" %} 12284 opcode(0x3B); /* Opcode 3B /r */ 12285 ins_encode( OpcP, RegReg( op1, op2) ); 12286 ins_pipe( ialu_cr_reg_reg ); 12287 %} 12288 12289 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12290 match(Set cr (CmpP op1 op2)); 12291 12292 format %{ "CMPu $op1,$op2" %} 12293 opcode(0x81,0x07); /* Opcode 81 /7 */ 12294 ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark ); 12295 ins_pipe( ialu_cr_reg_imm ); 12296 %} 12297 12298 // // Cisc-spilled version of cmpP_eReg 12299 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12300 match(Set cr (CmpP op1 (LoadP op2))); 12301 12302 format %{ "CMPu $op1,$op2" %} 12303 ins_cost(500); 12304 opcode(0x3B); /* Opcode 3B /r */ 12305 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12306 ins_pipe( ialu_cr_reg_mem ); 12307 %} 12308 12309 // // Cisc-spilled version of cmpP_eReg 12310 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12311 // match(Set cr (CmpP (LoadP op1) op2)); 12312 // 12313 // format %{ "CMPu $op1,$op2" %} 12314 // ins_cost(500); 12315 // opcode(0x39); /* Opcode 39 /r */ 12316 // ins_encode( OpcP, RegMem( op1, op2) ); 12317 //%} 12318 12319 // Compare raw pointer (used in out-of-heap check). 12320 // Only works because non-oop pointers must be raw pointers 12321 // and raw pointers have no anti-dependencies. 12322 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12323 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12324 match(Set cr (CmpP op1 (LoadP op2))); 12325 12326 format %{ "CMPu $op1,$op2" %} 12327 opcode(0x3B); /* Opcode 3B /r */ 12328 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12329 ins_pipe( ialu_cr_reg_mem ); 12330 %} 12331 12332 // 12333 // This will generate a signed flags result. This should be ok 12334 // since any compare to a zero should be eq/neq. 12335 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12336 match(Set cr (CmpP src zero)); 12337 12338 format %{ "TEST $src,$src" %} 12339 opcode(0x85); 12340 ins_encode( OpcP, RegReg( src, src ) ); 12341 ins_pipe( ialu_cr_reg_imm ); 12342 %} 12343 12344 // Cisc-spilled version of testP_reg 12345 // This will generate a signed flags result. This should be ok 12346 // since any compare to a zero should be eq/neq. 12347 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12348 match(Set cr (CmpP (LoadP op) zero)); 12349 12350 format %{ "TEST $op,0xFFFFFFFF" %} 12351 ins_cost(500); 12352 opcode(0xF7); /* Opcode F7 /0 */ 12353 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark ); 12354 ins_pipe( ialu_cr_reg_imm ); 12355 %} 12356 12357 // Yanked all unsigned pointer compare operations. 12358 // Pointer compares are done with CmpP which is already unsigned. 12359 12360 //----------Max and Min-------------------------------------------------------- 12361 // Min Instructions 12362 //// 12363 // *** Min and Max using the conditional move are slower than the 12364 // *** branch version on a Pentium III. 12365 // // Conditional move for min 12366 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12367 // effect( USE_DEF op2, USE op1, USE cr ); 12368 // format %{ "CMOVlt $op2,$op1\t! min" %} 12369 // opcode(0x4C,0x0F); 12370 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12371 // ins_pipe( pipe_cmov_reg ); 12372 //%} 12373 // 12374 //// Min Register with Register (P6 version) 12375 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12376 // predicate(VM_Version::supports_cmov() ); 12377 // match(Set op2 (MinI op1 op2)); 12378 // ins_cost(200); 12379 // expand %{ 12380 // eFlagsReg cr; 12381 // compI_eReg(cr,op1,op2); 12382 // cmovI_reg_lt(op2,op1,cr); 12383 // %} 12384 //%} 12385 12386 // Min Register with Register (generic version) 12387 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12388 match(Set dst (MinI dst src)); 12389 effect(KILL flags); 12390 ins_cost(300); 12391 12392 format %{ "MIN $dst,$src" %} 12393 opcode(0xCC); 12394 ins_encode( min_enc(dst,src) ); 12395 ins_pipe( pipe_slow ); 12396 %} 12397 12398 // Max Register with Register 12399 // *** Min and Max using the conditional move are slower than the 12400 // *** branch version on a Pentium III. 12401 // // Conditional move for max 12402 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12403 // effect( USE_DEF op2, USE op1, USE cr ); 12404 // format %{ "CMOVgt $op2,$op1\t! max" %} 12405 // opcode(0x4F,0x0F); 12406 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12407 // ins_pipe( pipe_cmov_reg ); 12408 //%} 12409 // 12410 // // Max Register with Register (P6 version) 12411 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12412 // predicate(VM_Version::supports_cmov() ); 12413 // match(Set op2 (MaxI op1 op2)); 12414 // ins_cost(200); 12415 // expand %{ 12416 // eFlagsReg cr; 12417 // compI_eReg(cr,op1,op2); 12418 // cmovI_reg_gt(op2,op1,cr); 12419 // %} 12420 //%} 12421 12422 // Max Register with Register (generic version) 12423 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12424 match(Set dst (MaxI dst src)); 12425 effect(KILL flags); 12426 ins_cost(300); 12427 12428 format %{ "MAX $dst,$src" %} 12429 opcode(0xCC); 12430 ins_encode( max_enc(dst,src) ); 12431 ins_pipe( pipe_slow ); 12432 %} 12433 12434 // ============================================================================ 12435 // Counted Loop limit node which represents exact final iterator value. 12436 // Note: the resulting value should fit into integer range since 12437 // counted loops have limit check on overflow. 12438 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12439 match(Set limit (LoopLimit (Binary init limit) stride)); 12440 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12441 ins_cost(300); 12442 12443 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12444 ins_encode %{ 12445 int strd = (int)$stride$$constant; 12446 assert(strd != 1 && strd != -1, "sanity"); 12447 int m1 = (strd > 0) ? 1 : -1; 12448 // Convert limit to long (EAX:EDX) 12449 __ cdql(); 12450 // Convert init to long (init:tmp) 12451 __ movl($tmp$$Register, $init$$Register); 12452 __ sarl($tmp$$Register, 31); 12453 // $limit - $init 12454 __ subl($limit$$Register, $init$$Register); 12455 __ sbbl($limit_hi$$Register, $tmp$$Register); 12456 // + ($stride - 1) 12457 if (strd > 0) { 12458 __ addl($limit$$Register, (strd - 1)); 12459 __ adcl($limit_hi$$Register, 0); 12460 __ movl($tmp$$Register, strd); 12461 } else { 12462 __ addl($limit$$Register, (strd + 1)); 12463 __ adcl($limit_hi$$Register, -1); 12464 __ lneg($limit_hi$$Register, $limit$$Register); 12465 __ movl($tmp$$Register, -strd); 12466 } 12467 // signed division: (EAX:EDX) / pos_stride 12468 __ idivl($tmp$$Register); 12469 if (strd < 0) { 12470 // restore sign 12471 __ negl($tmp$$Register); 12472 } 12473 // (EAX) * stride 12474 __ mull($tmp$$Register); 12475 // + init (ignore upper bits) 12476 __ addl($limit$$Register, $init$$Register); 12477 %} 12478 ins_pipe( pipe_slow ); 12479 %} 12480 12481 // ============================================================================ 12482 // Branch Instructions 12483 // Jump Table 12484 instruct jumpXtnd(rRegI switch_val) %{ 12485 match(Jump switch_val); 12486 ins_cost(350); 12487 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12488 ins_encode %{ 12489 // Jump to Address(table_base + switch_reg) 12490 Address index(noreg, $switch_val$$Register, Address::times_1); 12491 __ jump(ArrayAddress($constantaddress, index), noreg); 12492 %} 12493 ins_pipe(pipe_jmp); 12494 %} 12495 12496 // Jump Direct - Label defines a relative address from JMP+1 12497 instruct jmpDir(label labl) %{ 12498 match(Goto); 12499 effect(USE labl); 12500 12501 ins_cost(300); 12502 format %{ "JMP $labl" %} 12503 size(5); 12504 ins_encode %{ 12505 Label* L = $labl$$label; 12506 __ jmp(*L, false); // Always long jump 12507 %} 12508 ins_pipe( pipe_jmp ); 12509 %} 12510 12511 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12512 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12513 match(If cop cr); 12514 effect(USE labl); 12515 12516 ins_cost(300); 12517 format %{ "J$cop $labl" %} 12518 size(6); 12519 ins_encode %{ 12520 Label* L = $labl$$label; 12521 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12522 %} 12523 ins_pipe( pipe_jcc ); 12524 %} 12525 12526 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12527 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12528 match(CountedLoopEnd cop cr); 12529 effect(USE labl); 12530 12531 ins_cost(300); 12532 format %{ "J$cop $labl\t# Loop end" %} 12533 size(6); 12534 ins_encode %{ 12535 Label* L = $labl$$label; 12536 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12537 %} 12538 ins_pipe( pipe_jcc ); 12539 %} 12540 12541 // Jump Direct Conditional - using unsigned comparison 12542 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12543 match(If cop cmp); 12544 effect(USE labl); 12545 12546 ins_cost(300); 12547 format %{ "J$cop,u $labl" %} 12548 size(6); 12549 ins_encode %{ 12550 Label* L = $labl$$label; 12551 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12552 %} 12553 ins_pipe(pipe_jcc); 12554 %} 12555 12556 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12557 match(If cop cmp); 12558 effect(USE labl); 12559 12560 ins_cost(200); 12561 format %{ "J$cop,u $labl" %} 12562 size(6); 12563 ins_encode %{ 12564 Label* L = $labl$$label; 12565 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12566 %} 12567 ins_pipe(pipe_jcc); 12568 %} 12569 12570 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12571 match(If cop cmp); 12572 effect(USE labl); 12573 12574 ins_cost(200); 12575 format %{ $$template 12576 if ($cop$$cmpcode == Assembler::notEqual) { 12577 $$emit$$"JP,u $labl\n\t" 12578 $$emit$$"J$cop,u $labl" 12579 } else { 12580 $$emit$$"JP,u done\n\t" 12581 $$emit$$"J$cop,u $labl\n\t" 12582 $$emit$$"done:" 12583 } 12584 %} 12585 ins_encode %{ 12586 Label* l = $labl$$label; 12587 if ($cop$$cmpcode == Assembler::notEqual) { 12588 __ jcc(Assembler::parity, *l, false); 12589 __ jcc(Assembler::notEqual, *l, false); 12590 } else if ($cop$$cmpcode == Assembler::equal) { 12591 Label done; 12592 __ jccb(Assembler::parity, done); 12593 __ jcc(Assembler::equal, *l, false); 12594 __ bind(done); 12595 } else { 12596 ShouldNotReachHere(); 12597 } 12598 %} 12599 ins_pipe(pipe_jcc); 12600 %} 12601 12602 // ============================================================================ 12603 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12604 // array for an instance of the superklass. Set a hidden internal cache on a 12605 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12606 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12607 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12608 match(Set result (PartialSubtypeCheck sub super)); 12609 effect( KILL rcx, KILL cr ); 12610 12611 ins_cost(1100); // slightly larger than the next version 12612 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12613 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12614 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12615 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12616 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12617 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12618 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12619 "miss:\t" %} 12620 12621 opcode(0x1); // Force a XOR of EDI 12622 ins_encode( enc_PartialSubtypeCheck() ); 12623 ins_pipe( pipe_slow ); 12624 %} 12625 12626 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12627 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12628 effect( KILL rcx, KILL result ); 12629 12630 ins_cost(1000); 12631 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12632 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12633 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12634 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12635 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12636 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12637 "miss:\t" %} 12638 12639 opcode(0x0); // No need to XOR EDI 12640 ins_encode( enc_PartialSubtypeCheck() ); 12641 ins_pipe( pipe_slow ); 12642 %} 12643 12644 // ============================================================================ 12645 // Branch Instructions -- short offset versions 12646 // 12647 // These instructions are used to replace jumps of a long offset (the default 12648 // match) with jumps of a shorter offset. These instructions are all tagged 12649 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12650 // match rules in general matching. Instead, the ADLC generates a conversion 12651 // method in the MachNode which can be used to do in-place replacement of the 12652 // long variant with the shorter variant. The compiler will determine if a 12653 // branch can be taken by the is_short_branch_offset() predicate in the machine 12654 // specific code section of the file. 12655 12656 // Jump Direct - Label defines a relative address from JMP+1 12657 instruct jmpDir_short(label labl) %{ 12658 match(Goto); 12659 effect(USE labl); 12660 12661 ins_cost(300); 12662 format %{ "JMP,s $labl" %} 12663 size(2); 12664 ins_encode %{ 12665 Label* L = $labl$$label; 12666 __ jmpb(*L); 12667 %} 12668 ins_pipe( pipe_jmp ); 12669 ins_short_branch(1); 12670 %} 12671 12672 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12673 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12674 match(If cop cr); 12675 effect(USE labl); 12676 12677 ins_cost(300); 12678 format %{ "J$cop,s $labl" %} 12679 size(2); 12680 ins_encode %{ 12681 Label* L = $labl$$label; 12682 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12683 %} 12684 ins_pipe( pipe_jcc ); 12685 ins_short_branch(1); 12686 %} 12687 12688 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12689 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12690 match(CountedLoopEnd cop cr); 12691 effect(USE labl); 12692 12693 ins_cost(300); 12694 format %{ "J$cop,s $labl\t# Loop end" %} 12695 size(2); 12696 ins_encode %{ 12697 Label* L = $labl$$label; 12698 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12699 %} 12700 ins_pipe( pipe_jcc ); 12701 ins_short_branch(1); 12702 %} 12703 12704 // Jump Direct Conditional - using unsigned comparison 12705 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12706 match(If cop cmp); 12707 effect(USE labl); 12708 12709 ins_cost(300); 12710 format %{ "J$cop,us $labl" %} 12711 size(2); 12712 ins_encode %{ 12713 Label* L = $labl$$label; 12714 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12715 %} 12716 ins_pipe( pipe_jcc ); 12717 ins_short_branch(1); 12718 %} 12719 12720 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12721 match(If cop cmp); 12722 effect(USE labl); 12723 12724 ins_cost(300); 12725 format %{ "J$cop,us $labl" %} 12726 size(2); 12727 ins_encode %{ 12728 Label* L = $labl$$label; 12729 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12730 %} 12731 ins_pipe( pipe_jcc ); 12732 ins_short_branch(1); 12733 %} 12734 12735 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12736 match(If cop cmp); 12737 effect(USE labl); 12738 12739 ins_cost(300); 12740 format %{ $$template 12741 if ($cop$$cmpcode == Assembler::notEqual) { 12742 $$emit$$"JP,u,s $labl\n\t" 12743 $$emit$$"J$cop,u,s $labl" 12744 } else { 12745 $$emit$$"JP,u,s done\n\t" 12746 $$emit$$"J$cop,u,s $labl\n\t" 12747 $$emit$$"done:" 12748 } 12749 %} 12750 size(4); 12751 ins_encode %{ 12752 Label* l = $labl$$label; 12753 if ($cop$$cmpcode == Assembler::notEqual) { 12754 __ jccb(Assembler::parity, *l); 12755 __ jccb(Assembler::notEqual, *l); 12756 } else if ($cop$$cmpcode == Assembler::equal) { 12757 Label done; 12758 __ jccb(Assembler::parity, done); 12759 __ jccb(Assembler::equal, *l); 12760 __ bind(done); 12761 } else { 12762 ShouldNotReachHere(); 12763 } 12764 %} 12765 ins_pipe(pipe_jcc); 12766 ins_short_branch(1); 12767 %} 12768 12769 // ============================================================================ 12770 // Long Compare 12771 // 12772 // Currently we hold longs in 2 registers. Comparing such values efficiently 12773 // is tricky. The flavor of compare used depends on whether we are testing 12774 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12775 // The GE test is the negated LT test. The LE test can be had by commuting 12776 // the operands (yielding a GE test) and then negating; negate again for the 12777 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12778 // NE test is negated from that. 12779 12780 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12781 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12782 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12783 // are collapsed internally in the ADLC's dfa-gen code. The match for 12784 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12785 // foo match ends up with the wrong leaf. One fix is to not match both 12786 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12787 // both forms beat the trinary form of long-compare and both are very useful 12788 // on Intel which has so few registers. 12789 12790 // Manifest a CmpL result in an integer register. Very painful. 12791 // This is the test to avoid. 12792 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12793 match(Set dst (CmpL3 src1 src2)); 12794 effect( KILL flags ); 12795 ins_cost(1000); 12796 format %{ "XOR $dst,$dst\n\t" 12797 "CMP $src1.hi,$src2.hi\n\t" 12798 "JLT,s m_one\n\t" 12799 "JGT,s p_one\n\t" 12800 "CMP $src1.lo,$src2.lo\n\t" 12801 "JB,s m_one\n\t" 12802 "JEQ,s done\n" 12803 "p_one:\tINC $dst\n\t" 12804 "JMP,s done\n" 12805 "m_one:\tDEC $dst\n" 12806 "done:" %} 12807 ins_encode %{ 12808 Label p_one, m_one, done; 12809 __ xorptr($dst$$Register, $dst$$Register); 12810 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12811 __ jccb(Assembler::less, m_one); 12812 __ jccb(Assembler::greater, p_one); 12813 __ cmpl($src1$$Register, $src2$$Register); 12814 __ jccb(Assembler::below, m_one); 12815 __ jccb(Assembler::equal, done); 12816 __ bind(p_one); 12817 __ incrementl($dst$$Register); 12818 __ jmpb(done); 12819 __ bind(m_one); 12820 __ decrementl($dst$$Register); 12821 __ bind(done); 12822 %} 12823 ins_pipe( pipe_slow ); 12824 %} 12825 12826 //====== 12827 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12828 // compares. Can be used for LE or GT compares by reversing arguments. 12829 // NOT GOOD FOR EQ/NE tests. 12830 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12831 match( Set flags (CmpL src zero )); 12832 ins_cost(100); 12833 format %{ "TEST $src.hi,$src.hi" %} 12834 opcode(0x85); 12835 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12836 ins_pipe( ialu_cr_reg_reg ); 12837 %} 12838 12839 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12840 // compares. Can be used for LE or GT compares by reversing arguments. 12841 // NOT GOOD FOR EQ/NE tests. 12842 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12843 match( Set flags (CmpL src1 src2 )); 12844 effect( TEMP tmp ); 12845 ins_cost(300); 12846 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12847 "MOV $tmp,$src1.hi\n\t" 12848 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12849 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12850 ins_pipe( ialu_cr_reg_reg ); 12851 %} 12852 12853 // Long compares reg < zero/req OR reg >= zero/req. 12854 // Just a wrapper for a normal branch, plus the predicate test. 12855 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12856 match(If cmp flags); 12857 effect(USE labl); 12858 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12859 expand %{ 12860 jmpCon(cmp,flags,labl); // JLT or JGE... 12861 %} 12862 %} 12863 12864 //====== 12865 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12866 // compares. Can be used for LE or GT compares by reversing arguments. 12867 // NOT GOOD FOR EQ/NE tests. 12868 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12869 match(Set flags (CmpUL src zero)); 12870 ins_cost(100); 12871 format %{ "TEST $src.hi,$src.hi" %} 12872 opcode(0x85); 12873 ins_encode(OpcP, RegReg_Hi2(src, src)); 12874 ins_pipe(ialu_cr_reg_reg); 12875 %} 12876 12877 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12878 // compares. Can be used for LE or GT compares by reversing arguments. 12879 // NOT GOOD FOR EQ/NE tests. 12880 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12881 match(Set flags (CmpUL src1 src2)); 12882 effect(TEMP tmp); 12883 ins_cost(300); 12884 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12885 "MOV $tmp,$src1.hi\n\t" 12886 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12887 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12888 ins_pipe(ialu_cr_reg_reg); 12889 %} 12890 12891 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12892 // Just a wrapper for a normal branch, plus the predicate test. 12893 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12894 match(If cmp flags); 12895 effect(USE labl); 12896 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12897 expand %{ 12898 jmpCon(cmp, flags, labl); // JLT or JGE... 12899 %} 12900 %} 12901 12902 // Compare 2 longs and CMOVE longs. 12903 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12904 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12905 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12906 ins_cost(400); 12907 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12908 "CMOV$cmp $dst.hi,$src.hi" %} 12909 opcode(0x0F,0x40); 12910 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12911 ins_pipe( pipe_cmov_reg_long ); 12912 %} 12913 12914 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12915 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12916 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12917 ins_cost(500); 12918 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12919 "CMOV$cmp $dst.hi,$src.hi" %} 12920 opcode(0x0F,0x40); 12921 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 12922 ins_pipe( pipe_cmov_reg_long ); 12923 %} 12924 12925 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 12926 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12927 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12928 ins_cost(400); 12929 expand %{ 12930 cmovLL_reg_LTGE(cmp, flags, dst, src); 12931 %} 12932 %} 12933 12934 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 12935 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12936 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12937 ins_cost(500); 12938 expand %{ 12939 cmovLL_mem_LTGE(cmp, flags, dst, src); 12940 %} 12941 %} 12942 12943 // Compare 2 longs and CMOVE ints. 12944 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12945 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12946 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12947 ins_cost(200); 12948 format %{ "CMOV$cmp $dst,$src" %} 12949 opcode(0x0F,0x40); 12950 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12951 ins_pipe( pipe_cmov_reg ); 12952 %} 12953 12954 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12955 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12956 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12957 ins_cost(250); 12958 format %{ "CMOV$cmp $dst,$src" %} 12959 opcode(0x0F,0x40); 12960 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 12961 ins_pipe( pipe_cmov_mem ); 12962 %} 12963 12964 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 12965 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12966 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12967 ins_cost(200); 12968 expand %{ 12969 cmovII_reg_LTGE(cmp, flags, dst, src); 12970 %} 12971 %} 12972 12973 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 12974 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12975 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12976 ins_cost(250); 12977 expand %{ 12978 cmovII_mem_LTGE(cmp, flags, dst, src); 12979 %} 12980 %} 12981 12982 // Compare 2 longs and CMOVE ptrs. 12983 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12984 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12985 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12986 ins_cost(200); 12987 format %{ "CMOV$cmp $dst,$src" %} 12988 opcode(0x0F,0x40); 12989 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12990 ins_pipe( pipe_cmov_reg ); 12991 %} 12992 12993 // Compare 2 unsigned longs and CMOVE ptrs. 12994 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 12995 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12996 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12997 ins_cost(200); 12998 expand %{ 12999 cmovPP_reg_LTGE(cmp,flags,dst,src); 13000 %} 13001 %} 13002 13003 // Compare 2 longs and CMOVE doubles 13004 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13005 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13006 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13007 ins_cost(200); 13008 expand %{ 13009 fcmovDPR_regS(cmp,flags,dst,src); 13010 %} 13011 %} 13012 13013 // Compare 2 longs and CMOVE doubles 13014 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13015 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13016 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13017 ins_cost(200); 13018 expand %{ 13019 fcmovD_regS(cmp,flags,dst,src); 13020 %} 13021 %} 13022 13023 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13024 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13025 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13026 ins_cost(200); 13027 expand %{ 13028 fcmovFPR_regS(cmp,flags,dst,src); 13029 %} 13030 %} 13031 13032 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13033 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13034 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13035 ins_cost(200); 13036 expand %{ 13037 fcmovF_regS(cmp,flags,dst,src); 13038 %} 13039 %} 13040 13041 //====== 13042 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13043 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13044 match( Set flags (CmpL src zero )); 13045 effect(TEMP tmp); 13046 ins_cost(200); 13047 format %{ "MOV $tmp,$src.lo\n\t" 13048 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13049 ins_encode( long_cmp_flags0( src, tmp ) ); 13050 ins_pipe( ialu_reg_reg_long ); 13051 %} 13052 13053 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13054 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13055 match( Set flags (CmpL src1 src2 )); 13056 ins_cost(200+300); 13057 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13058 "JNE,s skip\n\t" 13059 "CMP $src1.hi,$src2.hi\n\t" 13060 "skip:\t" %} 13061 ins_encode( long_cmp_flags1( src1, src2 ) ); 13062 ins_pipe( ialu_cr_reg_reg ); 13063 %} 13064 13065 // Long compare reg == zero/reg OR reg != zero/reg 13066 // Just a wrapper for a normal branch, plus the predicate test. 13067 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13068 match(If cmp flags); 13069 effect(USE labl); 13070 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13071 expand %{ 13072 jmpCon(cmp,flags,labl); // JEQ or JNE... 13073 %} 13074 %} 13075 13076 //====== 13077 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13078 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13079 match(Set flags (CmpUL src zero)); 13080 effect(TEMP tmp); 13081 ins_cost(200); 13082 format %{ "MOV $tmp,$src.lo\n\t" 13083 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13084 ins_encode(long_cmp_flags0(src, tmp)); 13085 ins_pipe(ialu_reg_reg_long); 13086 %} 13087 13088 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13089 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13090 match(Set flags (CmpUL src1 src2)); 13091 ins_cost(200+300); 13092 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13093 "JNE,s skip\n\t" 13094 "CMP $src1.hi,$src2.hi\n\t" 13095 "skip:\t" %} 13096 ins_encode(long_cmp_flags1(src1, src2)); 13097 ins_pipe(ialu_cr_reg_reg); 13098 %} 13099 13100 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13101 // Just a wrapper for a normal branch, plus the predicate test. 13102 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13103 match(If cmp flags); 13104 effect(USE labl); 13105 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13106 expand %{ 13107 jmpCon(cmp, flags, labl); // JEQ or JNE... 13108 %} 13109 %} 13110 13111 // Compare 2 longs and CMOVE longs. 13112 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13113 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13114 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13115 ins_cost(400); 13116 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13117 "CMOV$cmp $dst.hi,$src.hi" %} 13118 opcode(0x0F,0x40); 13119 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13120 ins_pipe( pipe_cmov_reg_long ); 13121 %} 13122 13123 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13124 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13125 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13126 ins_cost(500); 13127 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13128 "CMOV$cmp $dst.hi,$src.hi" %} 13129 opcode(0x0F,0x40); 13130 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 13131 ins_pipe( pipe_cmov_reg_long ); 13132 %} 13133 13134 // Compare 2 longs and CMOVE ints. 13135 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13136 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13137 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13138 ins_cost(200); 13139 format %{ "CMOV$cmp $dst,$src" %} 13140 opcode(0x0F,0x40); 13141 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13142 ins_pipe( pipe_cmov_reg ); 13143 %} 13144 13145 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13146 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13147 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13148 ins_cost(250); 13149 format %{ "CMOV$cmp $dst,$src" %} 13150 opcode(0x0F,0x40); 13151 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 13152 ins_pipe( pipe_cmov_mem ); 13153 %} 13154 13155 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13156 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13157 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13158 ins_cost(200); 13159 expand %{ 13160 cmovII_reg_EQNE(cmp, flags, dst, src); 13161 %} 13162 %} 13163 13164 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13165 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13166 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13167 ins_cost(250); 13168 expand %{ 13169 cmovII_mem_EQNE(cmp, flags, dst, src); 13170 %} 13171 %} 13172 13173 // Compare 2 longs and CMOVE ptrs. 13174 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13175 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13176 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13177 ins_cost(200); 13178 format %{ "CMOV$cmp $dst,$src" %} 13179 opcode(0x0F,0x40); 13180 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13181 ins_pipe( pipe_cmov_reg ); 13182 %} 13183 13184 // Compare 2 unsigned longs and CMOVE ptrs. 13185 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13186 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13187 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13188 ins_cost(200); 13189 expand %{ 13190 cmovPP_reg_EQNE(cmp,flags,dst,src); 13191 %} 13192 %} 13193 13194 // Compare 2 longs and CMOVE doubles 13195 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13196 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13197 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13198 ins_cost(200); 13199 expand %{ 13200 fcmovDPR_regS(cmp,flags,dst,src); 13201 %} 13202 %} 13203 13204 // Compare 2 longs and CMOVE doubles 13205 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13206 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13207 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13208 ins_cost(200); 13209 expand %{ 13210 fcmovD_regS(cmp,flags,dst,src); 13211 %} 13212 %} 13213 13214 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13215 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13216 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13217 ins_cost(200); 13218 expand %{ 13219 fcmovFPR_regS(cmp,flags,dst,src); 13220 %} 13221 %} 13222 13223 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13224 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13225 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13226 ins_cost(200); 13227 expand %{ 13228 fcmovF_regS(cmp,flags,dst,src); 13229 %} 13230 %} 13231 13232 //====== 13233 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13234 // Same as cmpL_reg_flags_LEGT except must negate src 13235 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13236 match( Set flags (CmpL src zero )); 13237 effect( TEMP tmp ); 13238 ins_cost(300); 13239 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13240 "CMP $tmp,$src.lo\n\t" 13241 "SBB $tmp,$src.hi\n\t" %} 13242 ins_encode( long_cmp_flags3(src, tmp) ); 13243 ins_pipe( ialu_reg_reg_long ); 13244 %} 13245 13246 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13247 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13248 // requires a commuted test to get the same result. 13249 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13250 match( Set flags (CmpL src1 src2 )); 13251 effect( TEMP tmp ); 13252 ins_cost(300); 13253 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13254 "MOV $tmp,$src2.hi\n\t" 13255 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13256 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13257 ins_pipe( ialu_cr_reg_reg ); 13258 %} 13259 13260 // Long compares reg < zero/req OR reg >= zero/req. 13261 // Just a wrapper for a normal branch, plus the predicate test 13262 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13263 match(If cmp flags); 13264 effect(USE labl); 13265 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13266 ins_cost(300); 13267 expand %{ 13268 jmpCon(cmp,flags,labl); // JGT or JLE... 13269 %} 13270 %} 13271 13272 //====== 13273 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13274 // Same as cmpUL_reg_flags_LEGT except must negate src 13275 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13276 match(Set flags (CmpUL src zero)); 13277 effect(TEMP tmp); 13278 ins_cost(300); 13279 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13280 "CMP $tmp,$src.lo\n\t" 13281 "SBB $tmp,$src.hi\n\t" %} 13282 ins_encode(long_cmp_flags3(src, tmp)); 13283 ins_pipe(ialu_reg_reg_long); 13284 %} 13285 13286 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13287 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13288 // requires a commuted test to get the same result. 13289 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13290 match(Set flags (CmpUL src1 src2)); 13291 effect(TEMP tmp); 13292 ins_cost(300); 13293 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13294 "MOV $tmp,$src2.hi\n\t" 13295 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13296 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13297 ins_pipe(ialu_cr_reg_reg); 13298 %} 13299 13300 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13301 // Just a wrapper for a normal branch, plus the predicate test 13302 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13303 match(If cmp flags); 13304 effect(USE labl); 13305 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13306 ins_cost(300); 13307 expand %{ 13308 jmpCon(cmp, flags, labl); // JGT or JLE... 13309 %} 13310 %} 13311 13312 // Compare 2 longs and CMOVE longs. 13313 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13314 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13315 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13316 ins_cost(400); 13317 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13318 "CMOV$cmp $dst.hi,$src.hi" %} 13319 opcode(0x0F,0x40); 13320 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13321 ins_pipe( pipe_cmov_reg_long ); 13322 %} 13323 13324 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13325 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13326 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13327 ins_cost(500); 13328 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13329 "CMOV$cmp $dst.hi,$src.hi+4" %} 13330 opcode(0x0F,0x40); 13331 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 13332 ins_pipe( pipe_cmov_reg_long ); 13333 %} 13334 13335 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13336 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13337 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13338 ins_cost(400); 13339 expand %{ 13340 cmovLL_reg_LEGT(cmp, flags, dst, src); 13341 %} 13342 %} 13343 13344 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13345 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13346 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13347 ins_cost(500); 13348 expand %{ 13349 cmovLL_mem_LEGT(cmp, flags, dst, src); 13350 %} 13351 %} 13352 13353 // Compare 2 longs and CMOVE ints. 13354 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13355 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13356 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13357 ins_cost(200); 13358 format %{ "CMOV$cmp $dst,$src" %} 13359 opcode(0x0F,0x40); 13360 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13361 ins_pipe( pipe_cmov_reg ); 13362 %} 13363 13364 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13365 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13366 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13367 ins_cost(250); 13368 format %{ "CMOV$cmp $dst,$src" %} 13369 opcode(0x0F,0x40); 13370 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 13371 ins_pipe( pipe_cmov_mem ); 13372 %} 13373 13374 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13375 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13376 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13377 ins_cost(200); 13378 expand %{ 13379 cmovII_reg_LEGT(cmp, flags, dst, src); 13380 %} 13381 %} 13382 13383 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13384 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13385 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13386 ins_cost(250); 13387 expand %{ 13388 cmovII_mem_LEGT(cmp, flags, dst, src); 13389 %} 13390 %} 13391 13392 // Compare 2 longs and CMOVE ptrs. 13393 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13394 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13395 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13396 ins_cost(200); 13397 format %{ "CMOV$cmp $dst,$src" %} 13398 opcode(0x0F,0x40); 13399 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13400 ins_pipe( pipe_cmov_reg ); 13401 %} 13402 13403 // Compare 2 unsigned longs and CMOVE ptrs. 13404 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13405 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13406 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13407 ins_cost(200); 13408 expand %{ 13409 cmovPP_reg_LEGT(cmp,flags,dst,src); 13410 %} 13411 %} 13412 13413 // Compare 2 longs and CMOVE doubles 13414 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13415 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13416 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13417 ins_cost(200); 13418 expand %{ 13419 fcmovDPR_regS(cmp,flags,dst,src); 13420 %} 13421 %} 13422 13423 // Compare 2 longs and CMOVE doubles 13424 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13425 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13426 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13427 ins_cost(200); 13428 expand %{ 13429 fcmovD_regS(cmp,flags,dst,src); 13430 %} 13431 %} 13432 13433 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13434 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13435 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13436 ins_cost(200); 13437 expand %{ 13438 fcmovFPR_regS(cmp,flags,dst,src); 13439 %} 13440 %} 13441 13442 13443 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13444 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13445 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13446 ins_cost(200); 13447 expand %{ 13448 fcmovF_regS(cmp,flags,dst,src); 13449 %} 13450 %} 13451 13452 13453 // ============================================================================ 13454 // Procedure Call/Return Instructions 13455 // Call Java Static Instruction 13456 // Note: If this code changes, the corresponding ret_addr_offset() and 13457 // compute_padding() functions will have to be adjusted. 13458 instruct CallStaticJavaDirect(method meth) %{ 13459 match(CallStaticJava); 13460 effect(USE meth); 13461 13462 ins_cost(300); 13463 format %{ "CALL,static " %} 13464 opcode(0xE8); /* E8 cd */ 13465 ins_encode( pre_call_resets, 13466 Java_Static_Call( meth ), 13467 call_epilog, 13468 post_call_FPU ); 13469 ins_pipe( pipe_slow ); 13470 ins_alignment(4); 13471 %} 13472 13473 // Call Java Dynamic Instruction 13474 // Note: If this code changes, the corresponding ret_addr_offset() and 13475 // compute_padding() functions will have to be adjusted. 13476 instruct CallDynamicJavaDirect(method meth) %{ 13477 match(CallDynamicJava); 13478 effect(USE meth); 13479 13480 ins_cost(300); 13481 format %{ "MOV EAX,(oop)-1\n\t" 13482 "CALL,dynamic" %} 13483 opcode(0xE8); /* E8 cd */ 13484 ins_encode( pre_call_resets, 13485 Java_Dynamic_Call( meth ), 13486 call_epilog, 13487 post_call_FPU ); 13488 ins_pipe( pipe_slow ); 13489 ins_alignment(4); 13490 %} 13491 13492 // Call Runtime Instruction 13493 instruct CallRuntimeDirect(method meth) %{ 13494 match(CallRuntime ); 13495 effect(USE meth); 13496 13497 ins_cost(300); 13498 format %{ "CALL,runtime " %} 13499 opcode(0xE8); /* E8 cd */ 13500 // Use FFREEs to clear entries in float stack 13501 ins_encode( pre_call_resets, 13502 FFree_Float_Stack_All, 13503 Java_To_Runtime( meth ), 13504 post_call_FPU ); 13505 ins_pipe( pipe_slow ); 13506 %} 13507 13508 // Call runtime without safepoint 13509 instruct CallLeafDirect(method meth) %{ 13510 match(CallLeaf); 13511 effect(USE meth); 13512 13513 ins_cost(300); 13514 format %{ "CALL_LEAF,runtime " %} 13515 opcode(0xE8); /* E8 cd */ 13516 ins_encode( pre_call_resets, 13517 FFree_Float_Stack_All, 13518 Java_To_Runtime( meth ), 13519 Verify_FPU_For_Leaf, post_call_FPU ); 13520 ins_pipe( pipe_slow ); 13521 %} 13522 13523 instruct CallLeafNoFPDirect(method meth) %{ 13524 match(CallLeafNoFP); 13525 effect(USE meth); 13526 13527 ins_cost(300); 13528 format %{ "CALL_LEAF_NOFP,runtime " %} 13529 opcode(0xE8); /* E8 cd */ 13530 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13531 ins_pipe( pipe_slow ); 13532 %} 13533 13534 13535 // Return Instruction 13536 // Remove the return address & jump to it. 13537 instruct Ret() %{ 13538 match(Return); 13539 format %{ "RET" %} 13540 opcode(0xC3); 13541 ins_encode(OpcP); 13542 ins_pipe( pipe_jmp ); 13543 %} 13544 13545 // Tail Call; Jump from runtime stub to Java code. 13546 // Also known as an 'interprocedural jump'. 13547 // Target of jump will eventually return to caller. 13548 // TailJump below removes the return address. 13549 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been 13550 // emitted just above the TailCall which has reset ebp to the caller state. 13551 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13552 match(TailCall jump_target method_ptr); 13553 ins_cost(300); 13554 format %{ "JMP $jump_target \t# EBX holds method" %} 13555 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13556 ins_encode( OpcP, RegOpc(jump_target) ); 13557 ins_pipe( pipe_jmp ); 13558 %} 13559 13560 13561 // Tail Jump; remove the return address; jump to target. 13562 // TailCall above leaves the return address around. 13563 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13564 match( TailJump jump_target ex_oop ); 13565 ins_cost(300); 13566 format %{ "POP EDX\t# pop return address into dummy\n\t" 13567 "JMP $jump_target " %} 13568 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13569 ins_encode( enc_pop_rdx, 13570 OpcP, RegOpc(jump_target) ); 13571 ins_pipe( pipe_jmp ); 13572 %} 13573 13574 // Forward exception. 13575 instruct ForwardExceptionjmp() 13576 %{ 13577 match(ForwardException); 13578 13579 format %{ "JMP forward_exception_stub" %} 13580 ins_encode %{ 13581 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg); 13582 %} 13583 ins_pipe(pipe_jmp); 13584 %} 13585 13586 // Create exception oop: created by stack-crawling runtime code. 13587 // Created exception is now available to this handler, and is setup 13588 // just prior to jumping to this handler. No code emitted. 13589 instruct CreateException( eAXRegP ex_oop ) 13590 %{ 13591 match(Set ex_oop (CreateEx)); 13592 13593 size(0); 13594 // use the following format syntax 13595 format %{ "# exception oop is in EAX; no code emitted" %} 13596 ins_encode(); 13597 ins_pipe( empty ); 13598 %} 13599 13600 13601 // Rethrow exception: 13602 // The exception oop will come in the first argument position. 13603 // Then JUMP (not call) to the rethrow stub code. 13604 instruct RethrowException() 13605 %{ 13606 match(Rethrow); 13607 13608 // use the following format syntax 13609 format %{ "JMP rethrow_stub" %} 13610 ins_encode(enc_rethrow); 13611 ins_pipe( pipe_jmp ); 13612 %} 13613 13614 // inlined locking and unlocking 13615 13616 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13617 predicate(LockingMode != LM_LIGHTWEIGHT); 13618 match(Set cr (FastLock object box)); 13619 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13620 ins_cost(300); 13621 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13622 ins_encode %{ 13623 __ get_thread($thread$$Register); 13624 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13625 $scr$$Register, noreg, noreg, $thread$$Register, nullptr); 13626 %} 13627 ins_pipe(pipe_slow); 13628 %} 13629 13630 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13631 predicate(LockingMode != LM_LIGHTWEIGHT); 13632 match(Set cr (FastUnlock object box)); 13633 effect(TEMP tmp, USE_KILL box); 13634 ins_cost(300); 13635 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13636 ins_encode %{ 13637 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); 13638 %} 13639 ins_pipe(pipe_slow); 13640 %} 13641 13642 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{ 13643 predicate(LockingMode == LM_LIGHTWEIGHT); 13644 match(Set cr (FastLock object box)); 13645 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread); 13646 ins_cost(300); 13647 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %} 13648 ins_encode %{ 13649 __ get_thread($thread$$Register); 13650 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13651 %} 13652 ins_pipe(pipe_slow); 13653 %} 13654 13655 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{ 13656 predicate(LockingMode == LM_LIGHTWEIGHT); 13657 match(Set cr (FastUnlock object eax_reg)); 13658 effect(TEMP tmp, USE_KILL eax_reg, TEMP thread); 13659 ins_cost(300); 13660 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %} 13661 ins_encode %{ 13662 __ get_thread($thread$$Register); 13663 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13664 %} 13665 ins_pipe(pipe_slow); 13666 %} 13667 13668 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13669 predicate(Matcher::vector_length(n) <= 32); 13670 match(Set dst (MaskAll src)); 13671 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13672 ins_encode %{ 13673 int mask_len = Matcher::vector_length(this); 13674 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13675 %} 13676 ins_pipe( pipe_slow ); 13677 %} 13678 13679 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13680 predicate(Matcher::vector_length(n) > 32); 13681 match(Set dst (MaskAll src)); 13682 effect(TEMP ktmp); 13683 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13684 ins_encode %{ 13685 int mask_len = Matcher::vector_length(this); 13686 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13687 %} 13688 ins_pipe( pipe_slow ); 13689 %} 13690 13691 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13692 predicate(Matcher::vector_length(n) > 32); 13693 match(Set dst (MaskAll src)); 13694 effect(TEMP ktmp); 13695 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13696 ins_encode %{ 13697 int mask_len = Matcher::vector_length(this); 13698 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13699 %} 13700 ins_pipe( pipe_slow ); 13701 %} 13702 13703 // ============================================================================ 13704 // Safepoint Instruction 13705 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13706 match(SafePoint poll); 13707 effect(KILL cr, USE poll); 13708 13709 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13710 ins_cost(125); 13711 // EBP would need size(3) 13712 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13713 ins_encode %{ 13714 __ set_inst_mark(); 13715 __ relocate(relocInfo::poll_type); 13716 __ clear_inst_mark(); 13717 address pre_pc = __ pc(); 13718 __ testl(rax, Address($poll$$Register, 0)); 13719 address post_pc = __ pc(); 13720 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13721 %} 13722 ins_pipe(ialu_reg_mem); 13723 %} 13724 13725 13726 // ============================================================================ 13727 // This name is KNOWN by the ADLC and cannot be changed. 13728 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13729 // for this guy. 13730 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13731 match(Set dst (ThreadLocal)); 13732 effect(DEF dst, KILL cr); 13733 13734 format %{ "MOV $dst, Thread::current()" %} 13735 ins_encode %{ 13736 Register dstReg = as_Register($dst$$reg); 13737 __ get_thread(dstReg); 13738 %} 13739 ins_pipe( ialu_reg_fat ); 13740 %} 13741 13742 13743 13744 //----------PEEPHOLE RULES----------------------------------------------------- 13745 // These must follow all instruction definitions as they use the names 13746 // defined in the instructions definitions. 13747 // 13748 // peepmatch ( root_instr_name [preceding_instruction]* ); 13749 // 13750 // peepconstraint %{ 13751 // (instruction_number.operand_name relational_op instruction_number.operand_name 13752 // [, ...] ); 13753 // // instruction numbers are zero-based using left to right order in peepmatch 13754 // 13755 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13756 // // provide an instruction_number.operand_name for each operand that appears 13757 // // in the replacement instruction's match rule 13758 // 13759 // ---------VM FLAGS--------------------------------------------------------- 13760 // 13761 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13762 // 13763 // Each peephole rule is given an identifying number starting with zero and 13764 // increasing by one in the order seen by the parser. An individual peephole 13765 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13766 // on the command-line. 13767 // 13768 // ---------CURRENT LIMITATIONS---------------------------------------------- 13769 // 13770 // Only match adjacent instructions in same basic block 13771 // Only equality constraints 13772 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13773 // Only one replacement instruction 13774 // 13775 // ---------EXAMPLE---------------------------------------------------------- 13776 // 13777 // // pertinent parts of existing instructions in architecture description 13778 // instruct movI(rRegI dst, rRegI src) %{ 13779 // match(Set dst (CopyI src)); 13780 // %} 13781 // 13782 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13783 // match(Set dst (AddI dst src)); 13784 // effect(KILL cr); 13785 // %} 13786 // 13787 // // Change (inc mov) to lea 13788 // peephole %{ 13789 // // increment preceded by register-register move 13790 // peepmatch ( incI_eReg movI ); 13791 // // require that the destination register of the increment 13792 // // match the destination register of the move 13793 // peepconstraint ( 0.dst == 1.dst ); 13794 // // construct a replacement instruction that sets 13795 // // the destination to ( move's source register + one ) 13796 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13797 // %} 13798 // 13799 // Implementation no longer uses movX instructions since 13800 // machine-independent system no longer uses CopyX nodes. 13801 // 13802 // peephole %{ 13803 // peepmatch ( incI_eReg movI ); 13804 // peepconstraint ( 0.dst == 1.dst ); 13805 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13806 // %} 13807 // 13808 // peephole %{ 13809 // peepmatch ( decI_eReg movI ); 13810 // peepconstraint ( 0.dst == 1.dst ); 13811 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13812 // %} 13813 // 13814 // peephole %{ 13815 // peepmatch ( addI_eReg_imm movI ); 13816 // peepconstraint ( 0.dst == 1.dst ); 13817 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13818 // %} 13819 // 13820 // peephole %{ 13821 // peepmatch ( addP_eReg_imm movP ); 13822 // peepconstraint ( 0.dst == 1.dst ); 13823 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13824 // %} 13825 13826 // // Change load of spilled value to only a spill 13827 // instruct storeI(memory mem, rRegI src) %{ 13828 // match(Set mem (StoreI mem src)); 13829 // %} 13830 // 13831 // instruct loadI(rRegI dst, memory mem) %{ 13832 // match(Set dst (LoadI mem)); 13833 // %} 13834 // 13835 peephole %{ 13836 peepmatch ( loadI storeI ); 13837 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13838 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13839 %} 13840 13841 //----------SMARTSPILL RULES--------------------------------------------------- 13842 // These must follow all instruction definitions as they use the names 13843 // defined in the instructions definitions.