1 // 2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ masm-> 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 __ emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 __ emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(C2_MacroAssembler *masm, int code) { 353 __ emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) { 358 __ relocate(__ inst_mark() + offset, reloc); 359 emit_opcode(masm, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(C2_MacroAssembler *masm, int d8) { 364 __ emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(C2_MacroAssembler *masm, int d16) { 369 __ emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(C2_MacroAssembler *masm, int d32) { 374 __ emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc, 379 int format) { 380 __ relocate(__ inst_mark(), reloc, format); 381 __ emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 __ relocate(__ inst_mark(), rspec, format); 393 __ emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) { 398 emit_opcode( masm, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( masm, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (masm, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( masm, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(masm, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(masm, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(masm, 0x1, reg_encoding, base); 423 emit_d8(masm, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(masm, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(masm, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (masm, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(masm, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(masm, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (masm, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(masm, 0x0, reg_encoding, 0x4); 450 emit_rm(masm, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(masm, 0x1, reg_encoding, 0x4); 456 emit_rm(masm, scale, index, base); 457 emit_d8(masm, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(masm, 0x2, reg_encoding, 0x4); 462 emit_rm(masm, scale, index, 0x04); 463 } else { 464 emit_rm(masm, 0x2, reg_encoding, 0x4); 465 emit_rm(masm, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(masm, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (masm, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( masm, 0x8B ); 483 emit_rm(masm, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler* masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 613 __ verified_entry(C); 614 615 C->output()->set_frame_complete(__ offset()); 616 617 if (C->has_mach_constant_base_node()) { 618 // NOTE: We set the table base offset here because users might be 619 // emitted before MachConstantBaseNode. 620 ConstantTable& constant_table = C->output()->constant_table(); 621 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 622 } 623 } 624 625 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 626 return MachNode::size(ra_); // too many variables; just compute it the hard way 627 } 628 629 int MachPrologNode::reloc() const { 630 return 0; // a large enough number 631 } 632 633 //============================================================================= 634 #ifndef PRODUCT 635 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 636 Compile *C = ra_->C; 637 int framesize = C->output()->frame_size_in_bytes(); 638 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 639 // Remove two words for return addr and rbp, 640 framesize -= 2*wordSize; 641 642 if (C->max_vector_size() > 16) { 643 st->print("VZEROUPPER"); 644 st->cr(); st->print("\t"); 645 } 646 if (C->in_24_bit_fp_mode()) { 647 st->print("FLDCW standard control word"); 648 st->cr(); st->print("\t"); 649 } 650 if (framesize) { 651 st->print("ADD ESP,%d\t# Destroy frame",framesize); 652 st->cr(); st->print("\t"); 653 } 654 st->print_cr("POPL EBP"); st->print("\t"); 655 if (do_polling() && C->is_method_compilation()) { 656 st->print("CMPL rsp, poll_offset[thread] \n\t" 657 "JA #safepoint_stub\t" 658 "# Safepoint: poll for GC"); 659 } 660 } 661 #endif 662 663 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 664 Compile *C = ra_->C; 665 666 if (C->max_vector_size() > 16) { 667 // Clear upper bits of YMM registers when current compiled code uses 668 // wide vectors to avoid AVX <-> SSE transition penalty during call. 669 __ vzeroupper(); 670 } 671 // If method set FPU control word, restore to standard control word 672 if (C->in_24_bit_fp_mode()) { 673 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 674 } 675 676 int framesize = C->output()->frame_size_in_bytes(); 677 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 678 // Remove two words for return addr and rbp, 679 framesize -= 2*wordSize; 680 681 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 682 683 if (framesize >= 128) { 684 emit_opcode(masm, 0x81); // add SP, #framesize 685 emit_rm(masm, 0x3, 0x00, ESP_enc); 686 emit_d32(masm, framesize); 687 } else if (framesize) { 688 emit_opcode(masm, 0x83); // add SP, #framesize 689 emit_rm(masm, 0x3, 0x00, ESP_enc); 690 emit_d8(masm, framesize); 691 } 692 693 emit_opcode(masm, 0x58 | EBP_enc); 694 695 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 696 __ reserved_stack_check(); 697 } 698 699 if (do_polling() && C->is_method_compilation()) { 700 Register thread = as_Register(EBX_enc); 701 __ get_thread(thread); 702 Label dummy_label; 703 Label* code_stub = &dummy_label; 704 if (!C->output()->in_scratch_emit_size()) { 705 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 706 C->output()->add_stub(stub); 707 code_stub = &stub->entry(); 708 } 709 __ set_inst_mark(); 710 __ relocate(relocInfo::poll_return_type); 711 __ clear_inst_mark(); 712 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 return MachNode::size(ra_); // too many variables; just compute it 718 // the hard way 719 } 720 721 int MachEpilogNode::reloc() const { 722 return 0; // a large enough number 723 } 724 725 const Pipeline * MachEpilogNode::pipeline() const { 726 return MachNode::pipeline_class(); 727 } 728 729 //============================================================================= 730 731 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 732 static enum RC rc_class( OptoReg::Name reg ) { 733 734 if( !OptoReg::is_valid(reg) ) return rc_bad; 735 if (OptoReg::is_stack(reg)) return rc_stack; 736 737 VMReg r = OptoReg::as_VMReg(reg); 738 if (r->is_Register()) return rc_int; 739 if (r->is_FloatRegister()) { 740 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 741 return rc_float; 742 } 743 if (r->is_KRegister()) return rc_kreg; 744 assert(r->is_XMMRegister(), "must be"); 745 return rc_xmm; 746 } 747 748 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg, 749 int opcode, const char *op_str, int size, outputStream* st ) { 750 if( masm ) { 751 masm->set_inst_mark(); 752 emit_opcode (masm, opcode ); 753 encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 754 masm->clear_inst_mark(); 755 #ifndef PRODUCT 756 } else if( !do_size ) { 757 if( size != 0 ) st->print("\n\t"); 758 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 759 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 760 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 761 } else { // FLD, FST, PUSH, POP 762 st->print("%s [ESP + #%d]",op_str,offset); 763 } 764 #endif 765 } 766 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 767 return size+3+offset_size; 768 } 769 770 // Helper for XMM registers. Extra opcode bits, limited syntax. 771 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, 772 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 773 int in_size_in_bits = Assembler::EVEX_32bit; 774 int evex_encoding = 0; 775 if (reg_lo+1 == reg_hi) { 776 in_size_in_bits = Assembler::EVEX_64bit; 777 evex_encoding = Assembler::VEX_W; 778 } 779 if (masm) { 780 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 781 // it maps more cases to single byte displacement 782 __ set_managed(); 783 if (reg_lo+1 == reg_hi) { // double move? 784 if (is_load) { 785 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 786 } else { 787 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 788 } 789 } else { 790 if (is_load) { 791 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 792 } else { 793 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 794 } 795 } 796 #ifndef PRODUCT 797 } else if (!do_size) { 798 if (size != 0) st->print("\n\t"); 799 if (reg_lo+1 == reg_hi) { // double move? 800 if (is_load) st->print("%s %s,[ESP + #%d]", 801 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 802 Matcher::regName[reg_lo], offset); 803 else st->print("MOVSD [ESP + #%d],%s", 804 offset, Matcher::regName[reg_lo]); 805 } else { 806 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 807 Matcher::regName[reg_lo], offset); 808 else st->print("MOVSS [ESP + #%d],%s", 809 offset, Matcher::regName[reg_lo]); 810 } 811 #endif 812 } 813 bool is_single_byte = false; 814 if ((UseAVX > 2) && (offset != 0)) { 815 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 816 } 817 int offset_size = 0; 818 if (UseAVX > 2 ) { 819 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 820 } else { 821 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 822 } 823 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 824 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 825 return size+5+offset_size; 826 } 827 828 829 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 830 int src_hi, int dst_hi, int size, outputStream* st ) { 831 if (masm) { 832 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 833 __ set_managed(); 834 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 835 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 836 as_XMMRegister(Matcher::_regEncode[src_lo])); 837 } else { 838 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 839 as_XMMRegister(Matcher::_regEncode[src_lo])); 840 } 841 #ifndef PRODUCT 842 } else if (!do_size) { 843 if (size != 0) st->print("\n\t"); 844 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 845 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 846 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 847 } else { 848 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 849 } 850 } else { 851 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 852 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 853 } else { 854 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 855 } 856 } 857 #endif 858 } 859 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 860 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 861 int sz = (UseAVX > 2) ? 6 : 4; 862 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 863 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 864 return size + sz; 865 } 866 867 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 868 int src_hi, int dst_hi, int size, outputStream* st ) { 869 // 32-bit 870 if (masm) { 871 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 872 __ set_managed(); 873 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 874 as_Register(Matcher::_regEncode[src_lo])); 875 #ifndef PRODUCT 876 } else if (!do_size) { 877 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 878 #endif 879 } 880 return (UseAVX> 2) ? 6 : 4; 881 } 882 883 884 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 885 int src_hi, int dst_hi, int size, outputStream* st ) { 886 // 32-bit 887 if (masm) { 888 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 889 __ set_managed(); 890 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 891 as_XMMRegister(Matcher::_regEncode[src_lo])); 892 #ifndef PRODUCT 893 } else if (!do_size) { 894 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 895 #endif 896 } 897 return (UseAVX> 2) ? 6 : 4; 898 } 899 900 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) { 901 if( masm ) { 902 emit_opcode(masm, 0x8B ); 903 emit_rm (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 904 #ifndef PRODUCT 905 } else if( !do_size ) { 906 if( size != 0 ) st->print("\n\t"); 907 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 908 #endif 909 } 910 return size+2; 911 } 912 913 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 914 int offset, int size, outputStream* st ) { 915 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 916 if( masm ) { 917 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) 918 emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] ); 919 #ifndef PRODUCT 920 } else if( !do_size ) { 921 if( size != 0 ) st->print("\n\t"); 922 st->print("FLD %s",Matcher::regName[src_lo]); 923 #endif 924 } 925 size += 2; 926 } 927 928 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 929 const char *op_str; 930 int op; 931 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 932 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 933 op = 0xDD; 934 } else { // 32-bit store 935 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 936 op = 0xD9; 937 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 938 } 939 940 return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st); 941 } 942 943 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 944 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 945 int src_hi, int dst_hi, uint ireg, outputStream* st); 946 947 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 948 int stack_offset, int reg, uint ireg, outputStream* st); 949 950 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset, 951 int dst_offset, uint ireg, outputStream* st) { 952 if (masm) { 953 switch (ireg) { 954 case Op_VecS: 955 __ pushl(Address(rsp, src_offset)); 956 __ popl (Address(rsp, dst_offset)); 957 break; 958 case Op_VecD: 959 __ pushl(Address(rsp, src_offset)); 960 __ popl (Address(rsp, dst_offset)); 961 __ pushl(Address(rsp, src_offset+4)); 962 __ popl (Address(rsp, dst_offset+4)); 963 break; 964 case Op_VecX: 965 __ movdqu(Address(rsp, -16), xmm0); 966 __ movdqu(xmm0, Address(rsp, src_offset)); 967 __ movdqu(Address(rsp, dst_offset), xmm0); 968 __ movdqu(xmm0, Address(rsp, -16)); 969 break; 970 case Op_VecY: 971 __ vmovdqu(Address(rsp, -32), xmm0); 972 __ vmovdqu(xmm0, Address(rsp, src_offset)); 973 __ vmovdqu(Address(rsp, dst_offset), xmm0); 974 __ vmovdqu(xmm0, Address(rsp, -32)); 975 break; 976 case Op_VecZ: 977 __ evmovdquq(Address(rsp, -64), xmm0, 2); 978 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 979 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 980 __ evmovdquq(xmm0, Address(rsp, -64), 2); 981 break; 982 default: 983 ShouldNotReachHere(); 984 } 985 #ifndef PRODUCT 986 } else { 987 switch (ireg) { 988 case Op_VecS: 989 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 990 "popl [rsp + #%d]", 991 src_offset, dst_offset); 992 break; 993 case Op_VecD: 994 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 995 "popq [rsp + #%d]\n\t" 996 "pushl [rsp + #%d]\n\t" 997 "popq [rsp + #%d]", 998 src_offset, dst_offset, src_offset+4, dst_offset+4); 999 break; 1000 case Op_VecX: 1001 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1002 "movdqu xmm0, [rsp + #%d]\n\t" 1003 "movdqu [rsp + #%d], xmm0\n\t" 1004 "movdqu xmm0, [rsp - #16]", 1005 src_offset, dst_offset); 1006 break; 1007 case Op_VecY: 1008 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1009 "vmovdqu xmm0, [rsp + #%d]\n\t" 1010 "vmovdqu [rsp + #%d], xmm0\n\t" 1011 "vmovdqu xmm0, [rsp - #32]", 1012 src_offset, dst_offset); 1013 break; 1014 case Op_VecZ: 1015 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1016 "vmovdqu xmm0, [rsp + #%d]\n\t" 1017 "vmovdqu [rsp + #%d], xmm0\n\t" 1018 "vmovdqu xmm0, [rsp - #64]", 1019 src_offset, dst_offset); 1020 break; 1021 default: 1022 ShouldNotReachHere(); 1023 } 1024 #endif 1025 } 1026 } 1027 1028 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1029 // Get registers to move 1030 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1031 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1032 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1033 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1034 1035 enum RC src_second_rc = rc_class(src_second); 1036 enum RC src_first_rc = rc_class(src_first); 1037 enum RC dst_second_rc = rc_class(dst_second); 1038 enum RC dst_first_rc = rc_class(dst_first); 1039 1040 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1041 1042 // Generate spill code! 1043 int size = 0; 1044 1045 if( src_first == dst_first && src_second == dst_second ) 1046 return size; // Self copy, no move 1047 1048 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) { 1049 uint ireg = ideal_reg(); 1050 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1051 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1052 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1053 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1054 // mem -> mem 1055 int src_offset = ra_->reg2offset(src_first); 1056 int dst_offset = ra_->reg2offset(dst_first); 1057 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st); 1058 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1059 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st); 1060 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1061 int stack_offset = ra_->reg2offset(dst_first); 1062 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st); 1063 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1064 int stack_offset = ra_->reg2offset(src_first); 1065 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st); 1066 } else { 1067 ShouldNotReachHere(); 1068 } 1069 return 0; 1070 } 1071 1072 // -------------------------------------- 1073 // Check for mem-mem move. push/pop to move. 1074 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1075 if( src_second == dst_first ) { // overlapping stack copy ranges 1076 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1077 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1078 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1079 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1080 } 1081 // move low bits 1082 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1083 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1084 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1085 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1086 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1087 } 1088 return size; 1089 } 1090 1091 // -------------------------------------- 1092 // Check for integer reg-reg copy 1093 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1094 size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st); 1095 1096 // Check for integer store 1097 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1098 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1099 1100 // Check for integer load 1101 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1102 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1103 1104 // Check for integer reg-xmm reg copy 1105 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1106 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1107 "no 64 bit integer-float reg moves" ); 1108 return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1109 } 1110 // -------------------------------------- 1111 // Check for float reg-reg copy 1112 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1113 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1114 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1115 if( masm ) { 1116 1117 // Note the mucking with the register encode to compensate for the 0/1 1118 // indexing issue mentioned in a comment in the reg_def sections 1119 // for FPR registers many lines above here. 1120 1121 if( src_first != FPR1L_num ) { 1122 emit_opcode (masm, 0xD9 ); // FLD ST(i) 1123 emit_d8 (masm, 0xC0+Matcher::_regEncode[src_first]-1 ); 1124 emit_opcode (masm, 0xDD ); // FSTP ST(i) 1125 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); 1126 } else { 1127 emit_opcode (masm, 0xDD ); // FST ST(i) 1128 emit_d8 (masm, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1129 } 1130 #ifndef PRODUCT 1131 } else if( !do_size ) { 1132 if( size != 0 ) st->print("\n\t"); 1133 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1134 else st->print( "FST %s", Matcher::regName[dst_first]); 1135 #endif 1136 } 1137 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1138 } 1139 1140 // Check for float store 1141 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1142 return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1143 } 1144 1145 // Check for float load 1146 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1147 int offset = ra_->reg2offset(src_first); 1148 const char *op_str; 1149 int op; 1150 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1151 op_str = "FLD_D"; 1152 op = 0xDD; 1153 } else { // 32-bit load 1154 op_str = "FLD_S"; 1155 op = 0xD9; 1156 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1157 } 1158 if( masm ) { 1159 masm->set_inst_mark(); 1160 emit_opcode (masm, op ); 1161 encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1162 emit_opcode (masm, 0xDD ); // FSTP ST(i) 1163 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); 1164 masm->clear_inst_mark(); 1165 #ifndef PRODUCT 1166 } else if( !do_size ) { 1167 if( size != 0 ) st->print("\n\t"); 1168 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1169 #endif 1170 } 1171 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1172 return size + 3+offset_size+2; 1173 } 1174 1175 // Check for xmm reg-reg copy 1176 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1177 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1178 (src_first+1 == src_second && dst_first+1 == dst_second), 1179 "no non-adjacent float-moves" ); 1180 return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1181 } 1182 1183 // Check for xmm reg-integer reg copy 1184 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1185 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1186 "no 64 bit float-integer reg moves" ); 1187 return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1188 } 1189 1190 // Check for xmm store 1191 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1192 return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1193 } 1194 1195 // Check for float xmm load 1196 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1197 return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1198 } 1199 1200 // Copy from float reg to xmm reg 1201 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1202 // copy to the top of stack from floating point reg 1203 // and use LEA to preserve flags 1204 if( masm ) { 1205 emit_opcode(masm,0x8D); // LEA ESP,[ESP-8] 1206 emit_rm(masm, 0x1, ESP_enc, 0x04); 1207 emit_rm(masm, 0x0, 0x04, ESP_enc); 1208 emit_d8(masm,0xF8); 1209 #ifndef PRODUCT 1210 } else if( !do_size ) { 1211 if( size != 0 ) st->print("\n\t"); 1212 st->print("LEA ESP,[ESP-8]"); 1213 #endif 1214 } 1215 size += 4; 1216 1217 size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1218 1219 // Copy from the temp memory to the xmm reg. 1220 size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st); 1221 1222 if( masm ) { 1223 emit_opcode(masm,0x8D); // LEA ESP,[ESP+8] 1224 emit_rm(masm, 0x1, ESP_enc, 0x04); 1225 emit_rm(masm, 0x0, 0x04, ESP_enc); 1226 emit_d8(masm,0x08); 1227 #ifndef PRODUCT 1228 } else if( !do_size ) { 1229 if( size != 0 ) st->print("\n\t"); 1230 st->print("LEA ESP,[ESP+8]"); 1231 #endif 1232 } 1233 size += 4; 1234 return size; 1235 } 1236 1237 // AVX-512 opmask specific spilling. 1238 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1239 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1240 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1241 int offset = ra_->reg2offset(src_first); 1242 if (masm != nullptr) { 1243 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1244 #ifndef PRODUCT 1245 } else { 1246 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); 1247 #endif 1248 } 1249 return 0; 1250 } 1251 1252 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1253 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1254 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1255 int offset = ra_->reg2offset(dst_first); 1256 if (masm != nullptr) { 1257 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1258 #ifndef PRODUCT 1259 } else { 1260 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); 1261 #endif 1262 } 1263 return 0; 1264 } 1265 1266 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1267 Unimplemented(); 1268 return 0; 1269 } 1270 1271 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1272 Unimplemented(); 1273 return 0; 1274 } 1275 1276 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1277 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1278 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1279 if (masm != nullptr) { 1280 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1281 #ifndef PRODUCT 1282 } else { 1283 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); 1284 #endif 1285 } 1286 return 0; 1287 } 1288 1289 assert( size > 0, "missed a case" ); 1290 1291 // -------------------------------------------------------------------- 1292 // Check for second bits still needing moving. 1293 if( src_second == dst_second ) 1294 return size; // Self copy; no move 1295 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1296 1297 // Check for second word int-int move 1298 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1299 return impl_mov_helper(masm,do_size,src_second,dst_second,size, st); 1300 1301 // Check for second word integer store 1302 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1303 return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1304 1305 // Check for second word integer load 1306 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1307 return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1308 1309 Unimplemented(); 1310 return 0; // Mute compiler 1311 } 1312 1313 #ifndef PRODUCT 1314 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1315 implementation( nullptr, ra_, false, st ); 1316 } 1317 #endif 1318 1319 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1320 implementation( masm, ra_, false, nullptr ); 1321 } 1322 1323 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1324 return MachNode::size(ra_); 1325 } 1326 1327 1328 //============================================================================= 1329 #ifndef PRODUCT 1330 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1331 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1332 int reg = ra_->get_reg_first(this); 1333 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1334 } 1335 #endif 1336 1337 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1338 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1339 int reg = ra_->get_encode(this); 1340 if( offset >= 128 ) { 1341 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] 1342 emit_rm(masm, 0x2, reg, 0x04); 1343 emit_rm(masm, 0x0, 0x04, ESP_enc); 1344 emit_d32(masm, offset); 1345 } 1346 else { 1347 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] 1348 emit_rm(masm, 0x1, reg, 0x04); 1349 emit_rm(masm, 0x0, 0x04, ESP_enc); 1350 emit_d8(masm, offset); 1351 } 1352 } 1353 1354 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1355 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1356 if( offset >= 128 ) { 1357 return 7; 1358 } 1359 else { 1360 return 4; 1361 } 1362 } 1363 1364 //============================================================================= 1365 #ifndef PRODUCT 1366 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1367 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1368 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1369 st->print_cr("\tNOP"); 1370 st->print_cr("\tNOP"); 1371 if( !OptoBreakpoint ) 1372 st->print_cr("\tNOP"); 1373 } 1374 #endif 1375 1376 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1377 __ ic_check(CodeEntryAlignment); 1378 } 1379 1380 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1381 return MachNode::size(ra_); // too many variables; just compute it 1382 // the hard way 1383 } 1384 1385 1386 //============================================================================= 1387 1388 // Vector calling convention not supported. 1389 bool Matcher::supports_vector_calling_convention() { 1390 return false; 1391 } 1392 1393 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1394 Unimplemented(); 1395 return OptoRegPair(0, 0); 1396 } 1397 1398 // Is this branch offset short enough that a short branch can be used? 1399 // 1400 // NOTE: If the platform does not provide any short branch variants, then 1401 // this method should return false for offset 0. 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413 } 1414 1415 // Return whether or not this register is ever used as an argument. This 1416 // function is used on startup to build the trampoline stubs in generateOptoStub. 1417 // Registers not mentioned will be killed by the VM call in the trampoline, and 1418 // arguments in those registers not be available to the callee. 1419 bool Matcher::can_be_java_arg( int reg ) { 1420 if( reg == ECX_num || reg == EDX_num ) return true; 1421 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1422 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1423 return false; 1424 } 1425 1426 bool Matcher::is_spillable_arg( int reg ) { 1427 return can_be_java_arg(reg); 1428 } 1429 1430 uint Matcher::int_pressure_limit() 1431 { 1432 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1433 } 1434 1435 uint Matcher::float_pressure_limit() 1436 { 1437 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1438 } 1439 1440 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1441 // Use hardware integer DIV instruction when 1442 // it is faster than a code which use multiply. 1443 // Only when constant divisor fits into 32 bit 1444 // (min_jint is excluded to get only correct 1445 // positive 32 bit values from negative). 1446 return VM_Version::has_fast_idiv() && 1447 (divisor == (int)divisor && divisor != min_jint); 1448 } 1449 1450 // Register for DIVI projection of divmodI 1451 RegMask Matcher::divI_proj_mask() { 1452 return EAX_REG_mask(); 1453 } 1454 1455 // Register for MODI projection of divmodI 1456 RegMask Matcher::modI_proj_mask() { 1457 return EDX_REG_mask(); 1458 } 1459 1460 // Register for DIVL projection of divmodL 1461 RegMask Matcher::divL_proj_mask() { 1462 ShouldNotReachHere(); 1463 return RegMask(); 1464 } 1465 1466 // Register for MODL projection of divmodL 1467 RegMask Matcher::modL_proj_mask() { 1468 ShouldNotReachHere(); 1469 return RegMask(); 1470 } 1471 1472 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1473 return NO_REG_mask(); 1474 } 1475 1476 // Returns true if the high 32 bits of the value is known to be zero. 1477 bool is_operand_hi32_zero(Node* n) { 1478 int opc = n->Opcode(); 1479 if (opc == Op_AndL) { 1480 Node* o2 = n->in(2); 1481 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1482 return true; 1483 } 1484 } 1485 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1486 return true; 1487 } 1488 return false; 1489 } 1490 1491 %} 1492 1493 //----------ENCODING BLOCK----------------------------------------------------- 1494 // This block specifies the encoding classes used by the compiler to output 1495 // byte streams. Encoding classes generate functions which are called by 1496 // Machine Instruction Nodes in order to generate the bit encoding of the 1497 // instruction. Operands specify their base encoding interface with the 1498 // interface keyword. There are currently supported four interfaces, 1499 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1500 // operand to generate a function which returns its register number when 1501 // queried. CONST_INTER causes an operand to generate a function which 1502 // returns the value of the constant when queried. MEMORY_INTER causes an 1503 // operand to generate four functions which return the Base Register, the 1504 // Index Register, the Scale Value, and the Offset Value of the operand when 1505 // queried. COND_INTER causes an operand to generate six functions which 1506 // return the encoding code (ie - encoding bits for the instruction) 1507 // associated with each basic boolean condition for a conditional instruction. 1508 // Instructions specify two basic values for encoding. They use the 1509 // ins_encode keyword to specify their encoding class (which must be one of 1510 // the class names specified in the encoding block), and they use the 1511 // opcode keyword to specify, in order, their primary, secondary, and 1512 // tertiary opcode. Only the opcode sections which a particular instruction 1513 // needs for encoding need to be specified. 1514 encode %{ 1515 // Build emit functions for each basic byte or larger field in the intel 1516 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1517 // code in the enc_class source block. Emit functions will live in the 1518 // main source block for now. In future, we can generalize this by 1519 // adding a syntax that specifies the sizes of fields in an order, 1520 // so that the adlc can build the emit functions automagically 1521 1522 // Set instruction mark in MacroAssembler. This is used only in 1523 // instructions that emit bytes directly to the CodeBuffer wraped 1524 // in the MacroAssembler. Should go away once all "instruct" are 1525 // patched to emit bytes only using methods in MacroAssembler. 1526 enc_class SetInstMark %{ 1527 __ set_inst_mark(); 1528 %} 1529 1530 enc_class ClearInstMark %{ 1531 __ clear_inst_mark(); 1532 %} 1533 1534 // Emit primary opcode 1535 enc_class OpcP %{ 1536 emit_opcode(masm, $primary); 1537 %} 1538 1539 // Emit secondary opcode 1540 enc_class OpcS %{ 1541 emit_opcode(masm, $secondary); 1542 %} 1543 1544 // Emit opcode directly 1545 enc_class Opcode(immI d8) %{ 1546 emit_opcode(masm, $d8$$constant); 1547 %} 1548 1549 enc_class SizePrefix %{ 1550 emit_opcode(masm,0x66); 1551 %} 1552 1553 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1554 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1555 %} 1556 1557 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1558 emit_opcode(masm,$opcode$$constant); 1559 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1560 %} 1561 1562 enc_class mov_r32_imm0( rRegI dst ) %{ 1563 emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1564 emit_d32 ( masm, 0x0 ); // imm32==0x0 1565 %} 1566 1567 enc_class cdq_enc %{ 1568 // Full implementation of Java idiv and irem; checks for 1569 // special case as described in JVM spec., p.243 & p.271. 1570 // 1571 // normal case special case 1572 // 1573 // input : rax,: dividend min_int 1574 // reg: divisor -1 1575 // 1576 // output: rax,: quotient (= rax, idiv reg) min_int 1577 // rdx: remainder (= rax, irem reg) 0 1578 // 1579 // Code sequnce: 1580 // 1581 // 81 F8 00 00 00 80 cmp rax,80000000h 1582 // 0F 85 0B 00 00 00 jne normal_case 1583 // 33 D2 xor rdx,edx 1584 // 83 F9 FF cmp rcx,0FFh 1585 // 0F 84 03 00 00 00 je done 1586 // normal_case: 1587 // 99 cdq 1588 // F7 F9 idiv rax,ecx 1589 // done: 1590 // 1591 emit_opcode(masm,0x81); emit_d8(masm,0xF8); 1592 emit_opcode(masm,0x00); emit_d8(masm,0x00); 1593 emit_opcode(masm,0x00); emit_d8(masm,0x80); // cmp rax,80000000h 1594 emit_opcode(masm,0x0F); emit_d8(masm,0x85); 1595 emit_opcode(masm,0x0B); emit_d8(masm,0x00); 1596 emit_opcode(masm,0x00); emit_d8(masm,0x00); // jne normal_case 1597 emit_opcode(masm,0x33); emit_d8(masm,0xD2); // xor rdx,edx 1598 emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh 1599 emit_opcode(masm,0x0F); emit_d8(masm,0x84); 1600 emit_opcode(masm,0x03); emit_d8(masm,0x00); 1601 emit_opcode(masm,0x00); emit_d8(masm,0x00); // je done 1602 // normal_case: 1603 emit_opcode(masm,0x99); // cdq 1604 // idiv (note: must be emitted by the user of this rule) 1605 // normal: 1606 %} 1607 1608 // Dense encoding for older common ops 1609 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1610 emit_opcode(masm, $opcode$$constant + $reg$$reg); 1611 %} 1612 1613 1614 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1615 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1616 // Check for 8-bit immediate, and set sign extend bit in opcode 1617 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1618 emit_opcode(masm, $primary | 0x02); 1619 } 1620 else { // If 32-bit immediate 1621 emit_opcode(masm, $primary); 1622 } 1623 %} 1624 1625 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1626 // Emit primary opcode and set sign-extend bit 1627 // Check for 8-bit immediate, and set sign extend bit in opcode 1628 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1629 emit_opcode(masm, $primary | 0x02); } 1630 else { // If 32-bit immediate 1631 emit_opcode(masm, $primary); 1632 } 1633 // Emit r/m byte with secondary opcode, after primary opcode. 1634 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1635 %} 1636 1637 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1638 // Check for 8-bit immediate, and set sign extend bit in opcode 1639 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1640 $$$emit8$imm$$constant; 1641 } 1642 else { // If 32-bit immediate 1643 // Output immediate 1644 $$$emit32$imm$$constant; 1645 } 1646 %} 1647 1648 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1649 // Emit primary opcode and set sign-extend bit 1650 // Check for 8-bit immediate, and set sign extend bit in opcode 1651 int con = (int)$imm$$constant; // Throw away top bits 1652 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1653 // Emit r/m byte with secondary opcode, after primary opcode. 1654 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1655 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); 1656 else emit_d32(masm,con); 1657 %} 1658 1659 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1660 // Emit primary opcode and set sign-extend bit 1661 // Check for 8-bit immediate, and set sign extend bit in opcode 1662 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1663 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1664 // Emit r/m byte with tertiary opcode, after primary opcode. 1665 emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1666 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); 1667 else emit_d32(masm,con); 1668 %} 1669 1670 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1671 emit_cc(masm, $secondary, $dst$$reg ); 1672 %} 1673 1674 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1675 int destlo = $dst$$reg; 1676 int desthi = HIGH_FROM_LOW_ENC(destlo); 1677 // bswap lo 1678 emit_opcode(masm, 0x0F); 1679 emit_cc(masm, 0xC8, destlo); 1680 // bswap hi 1681 emit_opcode(masm, 0x0F); 1682 emit_cc(masm, 0xC8, desthi); 1683 // xchg lo and hi 1684 emit_opcode(masm, 0x87); 1685 emit_rm(masm, 0x3, destlo, desthi); 1686 %} 1687 1688 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1689 emit_rm(masm, 0x3, $secondary, $div$$reg ); 1690 %} 1691 1692 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1693 $$$emit8$primary; 1694 emit_cc(masm, $secondary, $cop$$cmpcode); 1695 %} 1696 1697 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1698 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1699 emit_d8(masm, op >> 8 ); 1700 emit_d8(masm, op & 255); 1701 %} 1702 1703 // emulate a CMOV with a conditional branch around a MOV 1704 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1705 // Invert sense of branch from sense of CMOV 1706 emit_cc( masm, 0x70, ($cop$$cmpcode^1) ); 1707 emit_d8( masm, $brOffs$$constant ); 1708 %} 1709 1710 enc_class enc_PartialSubtypeCheck( ) %{ 1711 Register Redi = as_Register(EDI_enc); // result register 1712 Register Reax = as_Register(EAX_enc); // super class 1713 Register Recx = as_Register(ECX_enc); // killed 1714 Register Resi = as_Register(ESI_enc); // sub class 1715 Label miss; 1716 1717 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1718 nullptr, &miss, 1719 /*set_cond_codes:*/ true); 1720 if ($primary) { 1721 __ xorptr(Redi, Redi); 1722 } 1723 __ bind(miss); 1724 %} 1725 1726 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1727 int start = __ offset(); 1728 if (UseSSE >= 2) { 1729 if (VerifyFPU) { 1730 __ verify_FPU(0, "must be empty in SSE2+ mode"); 1731 } 1732 } else { 1733 // External c_calling_convention expects the FPU stack to be 'clean'. 1734 // Compiled code leaves it dirty. Do cleanup now. 1735 __ empty_FPU_stack(); 1736 } 1737 if (sizeof_FFree_Float_Stack_All == -1) { 1738 sizeof_FFree_Float_Stack_All = __ offset() - start; 1739 } else { 1740 assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1741 } 1742 %} 1743 1744 enc_class Verify_FPU_For_Leaf %{ 1745 if( VerifyFPU ) { 1746 __ verify_FPU( -3, "Returning from Runtime Leaf call"); 1747 } 1748 %} 1749 1750 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1751 // This is the instruction starting address for relocation info. 1752 __ set_inst_mark(); 1753 $$$emit8$primary; 1754 // CALL directly to the runtime 1755 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1756 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1757 __ clear_inst_mark(); 1758 __ post_call_nop(); 1759 1760 if (UseSSE >= 2) { 1761 BasicType rt = tf()->return_type(); 1762 1763 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1764 // A C runtime call where the return value is unused. In SSE2+ 1765 // mode the result needs to be removed from the FPU stack. It's 1766 // likely that this function call could be removed by the 1767 // optimizer if the C function is a pure function. 1768 __ ffree(0); 1769 } else if (rt == T_FLOAT) { 1770 __ lea(rsp, Address(rsp, -4)); 1771 __ fstp_s(Address(rsp, 0)); 1772 __ movflt(xmm0, Address(rsp, 0)); 1773 __ lea(rsp, Address(rsp, 4)); 1774 } else if (rt == T_DOUBLE) { 1775 __ lea(rsp, Address(rsp, -8)); 1776 __ fstp_d(Address(rsp, 0)); 1777 __ movdbl(xmm0, Address(rsp, 0)); 1778 __ lea(rsp, Address(rsp, 8)); 1779 } 1780 } 1781 %} 1782 1783 enc_class pre_call_resets %{ 1784 // If method sets FPU control word restore it here 1785 debug_only(int off0 = __ offset()); 1786 if (ra_->C->in_24_bit_fp_mode()) { 1787 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1788 } 1789 // Clear upper bits of YMM registers when current compiled code uses 1790 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1791 __ vzeroupper(); 1792 debug_only(int off1 = __ offset()); 1793 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1794 %} 1795 1796 enc_class post_call_FPU %{ 1797 // If method sets FPU control word do it here also 1798 if (Compile::current()->in_24_bit_fp_mode()) { 1799 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1800 } 1801 %} 1802 1803 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1804 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1805 // who we intended to call. 1806 __ set_inst_mark(); 1807 $$$emit8$primary; 1808 1809 if (!_method) { 1810 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1811 runtime_call_Relocation::spec(), 1812 RELOC_IMM32); 1813 __ clear_inst_mark(); 1814 __ post_call_nop(); 1815 } else { 1816 int method_index = resolved_method_index(masm); 1817 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1818 : static_call_Relocation::spec(method_index); 1819 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1820 rspec, RELOC_DISP32); 1821 __ post_call_nop(); 1822 address mark = __ inst_mark(); 1823 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1824 // Calls of the same statically bound method can share 1825 // a stub to the interpreter. 1826 __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off()); 1827 __ clear_inst_mark(); 1828 } else { 1829 // Emit stubs for static call. 1830 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark); 1831 __ clear_inst_mark(); 1832 if (stub == nullptr) { 1833 ciEnv::current()->record_failure("CodeCache is full"); 1834 return; 1835 } 1836 } 1837 } 1838 %} 1839 1840 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1841 __ ic_call((address)$meth$$method, resolved_method_index(masm)); 1842 __ post_call_nop(); 1843 %} 1844 1845 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1846 int disp = in_bytes(Method::from_compiled_offset()); 1847 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1848 1849 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1850 __ set_inst_mark(); 1851 $$$emit8$primary; 1852 emit_rm(masm, 0x01, $secondary, EAX_enc ); // R/M byte 1853 emit_d8(masm, disp); // Displacement 1854 __ clear_inst_mark(); 1855 __ post_call_nop(); 1856 %} 1857 1858 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1859 $$$emit8$primary; 1860 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1861 $$$emit8$shift$$constant; 1862 %} 1863 1864 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1865 // Load immediate does not have a zero or sign extended version 1866 // for 8-bit immediates 1867 emit_opcode(masm, 0xB8 + $dst$$reg); 1868 $$$emit32$src$$constant; 1869 %} 1870 1871 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1872 // Load immediate does not have a zero or sign extended version 1873 // for 8-bit immediates 1874 emit_opcode(masm, $primary + $dst$$reg); 1875 $$$emit32$src$$constant; 1876 %} 1877 1878 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1879 // Load immediate does not have a zero or sign extended version 1880 // for 8-bit immediates 1881 int dst_enc = $dst$$reg; 1882 int src_con = $src$$constant & 0x0FFFFFFFFL; 1883 if (src_con == 0) { 1884 // xor dst, dst 1885 emit_opcode(masm, 0x33); 1886 emit_rm(masm, 0x3, dst_enc, dst_enc); 1887 } else { 1888 emit_opcode(masm, $primary + dst_enc); 1889 emit_d32(masm, src_con); 1890 } 1891 %} 1892 1893 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1894 // Load immediate does not have a zero or sign extended version 1895 // for 8-bit immediates 1896 int dst_enc = $dst$$reg + 2; 1897 int src_con = ((julong)($src$$constant)) >> 32; 1898 if (src_con == 0) { 1899 // xor dst, dst 1900 emit_opcode(masm, 0x33); 1901 emit_rm(masm, 0x3, dst_enc, dst_enc); 1902 } else { 1903 emit_opcode(masm, $primary + dst_enc); 1904 emit_d32(masm, src_con); 1905 } 1906 %} 1907 1908 1909 // Encode a reg-reg copy. If it is useless, then empty encoding. 1910 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1911 encode_Copy( masm, $dst$$reg, $src$$reg ); 1912 %} 1913 1914 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1915 encode_Copy( masm, $dst$$reg, $src$$reg ); 1916 %} 1917 1918 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1919 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1920 %} 1921 1922 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1923 $$$emit8$primary; 1924 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1925 %} 1926 1927 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1928 $$$emit8$secondary; 1929 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1930 %} 1931 1932 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1933 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1934 %} 1935 1936 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1937 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1938 %} 1939 1940 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1941 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1942 %} 1943 1944 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1945 // Output immediate 1946 $$$emit32$src$$constant; 1947 %} 1948 1949 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1950 // Output Float immediate bits 1951 jfloat jf = $src$$constant; 1952 int jf_as_bits = jint_cast( jf ); 1953 emit_d32(masm, jf_as_bits); 1954 %} 1955 1956 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1957 // Output Float immediate bits 1958 jfloat jf = $src$$constant; 1959 int jf_as_bits = jint_cast( jf ); 1960 emit_d32(masm, jf_as_bits); 1961 %} 1962 1963 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1964 // Output immediate 1965 $$$emit16$src$$constant; 1966 %} 1967 1968 enc_class Con_d32(immI src) %{ 1969 emit_d32(masm,$src$$constant); 1970 %} 1971 1972 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1973 // Output immediate memory reference 1974 emit_rm(masm, 0x00, $t1$$reg, 0x05 ); 1975 emit_d32(masm, 0x00); 1976 %} 1977 1978 enc_class lock_prefix( ) %{ 1979 emit_opcode(masm,0xF0); // [Lock] 1980 %} 1981 1982 // Cmp-xchg long value. 1983 // Note: we need to swap rbx, and rcx before and after the 1984 // cmpxchg8 instruction because the instruction uses 1985 // rcx as the high order word of the new value to store but 1986 // our register encoding uses rbx,. 1987 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 1988 1989 // XCHG rbx,ecx 1990 emit_opcode(masm,0x87); 1991 emit_opcode(masm,0xD9); 1992 // [Lock] 1993 emit_opcode(masm,0xF0); 1994 // CMPXCHG8 [Eptr] 1995 emit_opcode(masm,0x0F); 1996 emit_opcode(masm,0xC7); 1997 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 1998 // XCHG rbx,ecx 1999 emit_opcode(masm,0x87); 2000 emit_opcode(masm,0xD9); 2001 %} 2002 2003 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2004 // [Lock] 2005 emit_opcode(masm,0xF0); 2006 2007 // CMPXCHG [Eptr] 2008 emit_opcode(masm,0x0F); 2009 emit_opcode(masm,0xB1); 2010 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2011 %} 2012 2013 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2014 // [Lock] 2015 emit_opcode(masm,0xF0); 2016 2017 // CMPXCHGB [Eptr] 2018 emit_opcode(masm,0x0F); 2019 emit_opcode(masm,0xB0); 2020 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2021 %} 2022 2023 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2024 // [Lock] 2025 emit_opcode(masm,0xF0); 2026 2027 // 16-bit mode 2028 emit_opcode(masm, 0x66); 2029 2030 // CMPXCHGW [Eptr] 2031 emit_opcode(masm,0x0F); 2032 emit_opcode(masm,0xB1); 2033 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2034 %} 2035 2036 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2037 int res_encoding = $res$$reg; 2038 2039 // MOV res,0 2040 emit_opcode( masm, 0xB8 + res_encoding); 2041 emit_d32( masm, 0 ); 2042 // JNE,s fail 2043 emit_opcode(masm,0x75); 2044 emit_d8(masm, 5 ); 2045 // MOV res,1 2046 emit_opcode( masm, 0xB8 + res_encoding); 2047 emit_d32( masm, 1 ); 2048 // fail: 2049 %} 2050 2051 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2052 int reg_encoding = $ereg$$reg; 2053 int base = $mem$$base; 2054 int index = $mem$$index; 2055 int scale = $mem$$scale; 2056 int displace = $mem$$disp; 2057 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2058 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2059 %} 2060 2061 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2062 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2063 int base = $mem$$base; 2064 int index = $mem$$index; 2065 int scale = $mem$$scale; 2066 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2067 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2068 encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none); 2069 %} 2070 2071 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2072 int r1, r2; 2073 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2074 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2075 emit_opcode(masm,0x0F); 2076 emit_opcode(masm,$tertiary); 2077 emit_rm(masm, 0x3, r1, r2); 2078 emit_d8(masm,$cnt$$constant); 2079 emit_d8(masm,$primary); 2080 emit_rm(masm, 0x3, $secondary, r1); 2081 emit_d8(masm,$cnt$$constant); 2082 %} 2083 2084 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2085 emit_opcode( masm, 0x8B ); // Move 2086 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2087 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2088 emit_d8(masm,$primary); 2089 emit_rm(masm, 0x3, $secondary, $dst$$reg); 2090 emit_d8(masm,$cnt$$constant-32); 2091 } 2092 emit_d8(masm,$primary); 2093 emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2094 emit_d8(masm,31); 2095 %} 2096 2097 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2098 int r1, r2; 2099 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2100 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2101 2102 emit_opcode( masm, 0x8B ); // Move r1,r2 2103 emit_rm(masm, 0x3, r1, r2); 2104 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2105 emit_opcode(masm,$primary); 2106 emit_rm(masm, 0x3, $secondary, r1); 2107 emit_d8(masm,$cnt$$constant-32); 2108 } 2109 emit_opcode(masm,0x33); // XOR r2,r2 2110 emit_rm(masm, 0x3, r2, r2); 2111 %} 2112 2113 // Clone of RegMem but accepts an extra parameter to access each 2114 // half of a double in memory; it never needs relocation info. 2115 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2116 emit_opcode(masm,$opcode$$constant); 2117 int reg_encoding = $rm_reg$$reg; 2118 int base = $mem$$base; 2119 int index = $mem$$index; 2120 int scale = $mem$$scale; 2121 int displace = $mem$$disp + $disp_for_half$$constant; 2122 relocInfo::relocType disp_reloc = relocInfo::none; 2123 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2124 %} 2125 2126 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2127 // 2128 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2129 // and it never needs relocation information. 2130 // Frequently used to move data between FPU's Stack Top and memory. 2131 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2132 int rm_byte_opcode = $rm_opcode$$constant; 2133 int base = $mem$$base; 2134 int index = $mem$$index; 2135 int scale = $mem$$scale; 2136 int displace = $mem$$disp; 2137 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2138 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2139 %} 2140 2141 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2142 int rm_byte_opcode = $rm_opcode$$constant; 2143 int base = $mem$$base; 2144 int index = $mem$$index; 2145 int scale = $mem$$scale; 2146 int displace = $mem$$disp; 2147 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2148 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2149 %} 2150 2151 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2152 int reg_encoding = $dst$$reg; 2153 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2154 int index = 0x04; // 0x04 indicates no index 2155 int scale = 0x00; // 0x00 indicates no scale 2156 int displace = $src1$$constant; // 0x00 indicates no displacement 2157 relocInfo::relocType disp_reloc = relocInfo::none; 2158 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2159 %} 2160 2161 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2162 // Compare dst,src 2163 emit_opcode(masm,0x3B); 2164 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2165 // jmp dst < src around move 2166 emit_opcode(masm,0x7C); 2167 emit_d8(masm,2); 2168 // move dst,src 2169 emit_opcode(masm,0x8B); 2170 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2171 %} 2172 2173 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2174 // Compare dst,src 2175 emit_opcode(masm,0x3B); 2176 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2177 // jmp dst > src around move 2178 emit_opcode(masm,0x7F); 2179 emit_d8(masm,2); 2180 // move dst,src 2181 emit_opcode(masm,0x8B); 2182 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2183 %} 2184 2185 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2186 // If src is FPR1, we can just FST to store it. 2187 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2188 int reg_encoding = 0x2; // Just store 2189 int base = $mem$$base; 2190 int index = $mem$$index; 2191 int scale = $mem$$scale; 2192 int displace = $mem$$disp; 2193 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2194 if( $src$$reg != FPR1L_enc ) { 2195 reg_encoding = 0x3; // Store & pop 2196 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) 2197 emit_d8( masm, 0xC0-1+$src$$reg ); 2198 } 2199 __ set_inst_mark(); // Mark start of opcode for reloc info in mem operand 2200 emit_opcode(masm,$primary); 2201 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2202 __ clear_inst_mark(); 2203 %} 2204 2205 enc_class neg_reg(rRegI dst) %{ 2206 // NEG $dst 2207 emit_opcode(masm,0xF7); 2208 emit_rm(masm, 0x3, 0x03, $dst$$reg ); 2209 %} 2210 2211 enc_class setLT_reg(eCXRegI dst) %{ 2212 // SETLT $dst 2213 emit_opcode(masm,0x0F); 2214 emit_opcode(masm,0x9C); 2215 emit_rm( masm, 0x3, 0x4, $dst$$reg ); 2216 %} 2217 2218 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2219 int tmpReg = $tmp$$reg; 2220 2221 // SUB $p,$q 2222 emit_opcode(masm,0x2B); 2223 emit_rm(masm, 0x3, $p$$reg, $q$$reg); 2224 // SBB $tmp,$tmp 2225 emit_opcode(masm,0x1B); 2226 emit_rm(masm, 0x3, tmpReg, tmpReg); 2227 // AND $tmp,$y 2228 emit_opcode(masm,0x23); 2229 emit_rm(masm, 0x3, tmpReg, $y$$reg); 2230 // ADD $p,$tmp 2231 emit_opcode(masm,0x03); 2232 emit_rm(masm, 0x3, $p$$reg, tmpReg); 2233 %} 2234 2235 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2236 // TEST shift,32 2237 emit_opcode(masm,0xF7); 2238 emit_rm(masm, 0x3, 0, ECX_enc); 2239 emit_d32(masm,0x20); 2240 // JEQ,s small 2241 emit_opcode(masm, 0x74); 2242 emit_d8(masm, 0x04); 2243 // MOV $dst.hi,$dst.lo 2244 emit_opcode( masm, 0x8B ); 2245 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2246 // CLR $dst.lo 2247 emit_opcode(masm, 0x33); 2248 emit_rm(masm, 0x3, $dst$$reg, $dst$$reg); 2249 // small: 2250 // SHLD $dst.hi,$dst.lo,$shift 2251 emit_opcode(masm,0x0F); 2252 emit_opcode(masm,0xA5); 2253 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2254 // SHL $dst.lo,$shift" 2255 emit_opcode(masm,0xD3); 2256 emit_rm(masm, 0x3, 0x4, $dst$$reg ); 2257 %} 2258 2259 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2260 // TEST shift,32 2261 emit_opcode(masm,0xF7); 2262 emit_rm(masm, 0x3, 0, ECX_enc); 2263 emit_d32(masm,0x20); 2264 // JEQ,s small 2265 emit_opcode(masm, 0x74); 2266 emit_d8(masm, 0x04); 2267 // MOV $dst.lo,$dst.hi 2268 emit_opcode( masm, 0x8B ); 2269 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2270 // CLR $dst.hi 2271 emit_opcode(masm, 0x33); 2272 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2273 // small: 2274 // SHRD $dst.lo,$dst.hi,$shift 2275 emit_opcode(masm,0x0F); 2276 emit_opcode(masm,0xAD); 2277 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2278 // SHR $dst.hi,$shift" 2279 emit_opcode(masm,0xD3); 2280 emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2281 %} 2282 2283 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2284 // TEST shift,32 2285 emit_opcode(masm,0xF7); 2286 emit_rm(masm, 0x3, 0, ECX_enc); 2287 emit_d32(masm,0x20); 2288 // JEQ,s small 2289 emit_opcode(masm, 0x74); 2290 emit_d8(masm, 0x05); 2291 // MOV $dst.lo,$dst.hi 2292 emit_opcode( masm, 0x8B ); 2293 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2294 // SAR $dst.hi,31 2295 emit_opcode(masm, 0xC1); 2296 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2297 emit_d8(masm, 0x1F ); 2298 // small: 2299 // SHRD $dst.lo,$dst.hi,$shift 2300 emit_opcode(masm,0x0F); 2301 emit_opcode(masm,0xAD); 2302 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2303 // SAR $dst.hi,$shift" 2304 emit_opcode(masm,0xD3); 2305 emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2306 %} 2307 2308 2309 // ----------------- Encodings for floating point unit ----------------- 2310 // May leave result in FPU-TOS or FPU reg depending on opcodes 2311 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2312 $$$emit8$primary; 2313 emit_rm(masm, 0x3, $secondary, $src$$reg ); 2314 %} 2315 2316 // Pop argument in FPR0 with FSTP ST(0) 2317 enc_class PopFPU() %{ 2318 emit_opcode( masm, 0xDD ); 2319 emit_d8( masm, 0xD8 ); 2320 %} 2321 2322 // !!!!! equivalent to Pop_Reg_F 2323 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2324 emit_opcode( masm, 0xDD ); // FSTP ST(i) 2325 emit_d8( masm, 0xD8+$dst$$reg ); 2326 %} 2327 2328 enc_class Push_Reg_DPR( regDPR dst ) %{ 2329 emit_opcode( masm, 0xD9 ); 2330 emit_d8( masm, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2331 %} 2332 2333 enc_class strictfp_bias1( regDPR dst ) %{ 2334 emit_opcode( masm, 0xDB ); // FLD m80real 2335 emit_opcode( masm, 0x2D ); 2336 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2337 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 2338 emit_opcode( masm, 0xC8+$dst$$reg ); 2339 %} 2340 2341 enc_class strictfp_bias2( regDPR dst ) %{ 2342 emit_opcode( masm, 0xDB ); // FLD m80real 2343 emit_opcode( masm, 0x2D ); 2344 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2345 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 2346 emit_opcode( masm, 0xC8+$dst$$reg ); 2347 %} 2348 2349 // Special case for moving an integer register to a stack slot. 2350 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2351 store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp ); 2352 %} 2353 2354 // Special case for moving a register to a stack slot. 2355 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2356 // Opcode already emitted 2357 emit_rm( masm, 0x02, $src$$reg, ESP_enc ); // R/M byte 2358 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 2359 emit_d32(masm, $dst$$disp); // Displacement 2360 %} 2361 2362 // Push the integer in stackSlot 'src' onto FP-stack 2363 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2364 store_to_stackslot( masm, $primary, $secondary, $src$$disp ); 2365 %} 2366 2367 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2368 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2369 store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp ); 2370 %} 2371 2372 // Same as Pop_Mem_F except for opcode 2373 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2374 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2375 store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp ); 2376 %} 2377 2378 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2379 emit_opcode( masm, 0xDD ); // FSTP ST(i) 2380 emit_d8( masm, 0xD8+$dst$$reg ); 2381 %} 2382 2383 enc_class Push_Reg_FPR( regFPR dst ) %{ 2384 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2385 emit_d8( masm, 0xC0-1+$dst$$reg ); 2386 %} 2387 2388 // Push FPU's float to a stack-slot, and pop FPU-stack 2389 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2390 int pop = 0x02; 2391 if ($src$$reg != FPR1L_enc) { 2392 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2393 emit_d8( masm, 0xC0-1+$src$$reg ); 2394 pop = 0x03; 2395 } 2396 store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2397 %} 2398 2399 // Push FPU's double to a stack-slot, and pop FPU-stack 2400 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2401 int pop = 0x02; 2402 if ($src$$reg != FPR1L_enc) { 2403 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2404 emit_d8( masm, 0xC0-1+$src$$reg ); 2405 pop = 0x03; 2406 } 2407 store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2408 %} 2409 2410 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2411 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2412 int pop = 0xD0 - 1; // -1 since we skip FLD 2413 if ($src$$reg != FPR1L_enc) { 2414 emit_opcode( masm, 0xD9 ); // FLD ST(src-1) 2415 emit_d8( masm, 0xC0-1+$src$$reg ); 2416 pop = 0xD8; 2417 } 2418 emit_opcode( masm, 0xDD ); 2419 emit_d8( masm, pop+$dst$$reg ); // FST<P> ST(i) 2420 %} 2421 2422 2423 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2424 // load dst in FPR0 2425 emit_opcode( masm, 0xD9 ); 2426 emit_d8( masm, 0xC0-1+$dst$$reg ); 2427 if ($src$$reg != FPR1L_enc) { 2428 // fincstp 2429 emit_opcode (masm, 0xD9); 2430 emit_opcode (masm, 0xF7); 2431 // swap src with FPR1: 2432 // FXCH FPR1 with src 2433 emit_opcode(masm, 0xD9); 2434 emit_d8(masm, 0xC8-1+$src$$reg ); 2435 // fdecstp 2436 emit_opcode (masm, 0xD9); 2437 emit_opcode (masm, 0xF6); 2438 } 2439 %} 2440 2441 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2442 __ subptr(rsp, 8); 2443 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2444 __ fld_d(Address(rsp, 0)); 2445 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2446 __ fld_d(Address(rsp, 0)); 2447 %} 2448 2449 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2450 __ subptr(rsp, 4); 2451 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2452 __ fld_s(Address(rsp, 0)); 2453 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2454 __ fld_s(Address(rsp, 0)); 2455 %} 2456 2457 enc_class Push_ResultD(regD dst) %{ 2458 __ fstp_d(Address(rsp, 0)); 2459 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2460 __ addptr(rsp, 8); 2461 %} 2462 2463 enc_class Push_ResultF(regF dst, immI d8) %{ 2464 __ fstp_s(Address(rsp, 0)); 2465 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2466 __ addptr(rsp, $d8$$constant); 2467 %} 2468 2469 enc_class Push_SrcD(regD src) %{ 2470 __ subptr(rsp, 8); 2471 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2472 __ fld_d(Address(rsp, 0)); 2473 %} 2474 2475 enc_class push_stack_temp_qword() %{ 2476 __ subptr(rsp, 8); 2477 %} 2478 2479 enc_class pop_stack_temp_qword() %{ 2480 __ addptr(rsp, 8); 2481 %} 2482 2483 enc_class push_xmm_to_fpr1(regD src) %{ 2484 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2485 __ fld_d(Address(rsp, 0)); 2486 %} 2487 2488 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2489 if ($src$$reg != FPR1L_enc) { 2490 // fincstp 2491 emit_opcode (masm, 0xD9); 2492 emit_opcode (masm, 0xF7); 2493 // FXCH FPR1 with src 2494 emit_opcode(masm, 0xD9); 2495 emit_d8(masm, 0xC8-1+$src$$reg ); 2496 // fdecstp 2497 emit_opcode (masm, 0xD9); 2498 emit_opcode (masm, 0xF6); 2499 } 2500 %} 2501 2502 enc_class fnstsw_sahf_skip_parity() %{ 2503 // fnstsw ax 2504 emit_opcode( masm, 0xDF ); 2505 emit_opcode( masm, 0xE0 ); 2506 // sahf 2507 emit_opcode( masm, 0x9E ); 2508 // jnp ::skip 2509 emit_opcode( masm, 0x7B ); 2510 emit_opcode( masm, 0x05 ); 2511 %} 2512 2513 enc_class emitModDPR() %{ 2514 // fprem must be iterative 2515 // :: loop 2516 // fprem 2517 emit_opcode( masm, 0xD9 ); 2518 emit_opcode( masm, 0xF8 ); 2519 // wait 2520 emit_opcode( masm, 0x9b ); 2521 // fnstsw ax 2522 emit_opcode( masm, 0xDF ); 2523 emit_opcode( masm, 0xE0 ); 2524 // sahf 2525 emit_opcode( masm, 0x9E ); 2526 // jp ::loop 2527 emit_opcode( masm, 0x0F ); 2528 emit_opcode( masm, 0x8A ); 2529 emit_opcode( masm, 0xF4 ); 2530 emit_opcode( masm, 0xFF ); 2531 emit_opcode( masm, 0xFF ); 2532 emit_opcode( masm, 0xFF ); 2533 %} 2534 2535 enc_class fpu_flags() %{ 2536 // fnstsw_ax 2537 emit_opcode( masm, 0xDF); 2538 emit_opcode( masm, 0xE0); 2539 // test ax,0x0400 2540 emit_opcode( masm, 0x66 ); // operand-size prefix for 16-bit immediate 2541 emit_opcode( masm, 0xA9 ); 2542 emit_d16 ( masm, 0x0400 ); 2543 // // // This sequence works, but stalls for 12-16 cycles on PPro 2544 // // test rax,0x0400 2545 // emit_opcode( masm, 0xA9 ); 2546 // emit_d32 ( masm, 0x00000400 ); 2547 // 2548 // jz exit (no unordered comparison) 2549 emit_opcode( masm, 0x74 ); 2550 emit_d8 ( masm, 0x02 ); 2551 // mov ah,1 - treat as LT case (set carry flag) 2552 emit_opcode( masm, 0xB4 ); 2553 emit_d8 ( masm, 0x01 ); 2554 // sahf 2555 emit_opcode( masm, 0x9E); 2556 %} 2557 2558 enc_class cmpF_P6_fixup() %{ 2559 // Fixup the integer flags in case comparison involved a NaN 2560 // 2561 // JNP exit (no unordered comparison, P-flag is set by NaN) 2562 emit_opcode( masm, 0x7B ); 2563 emit_d8 ( masm, 0x03 ); 2564 // MOV AH,1 - treat as LT case (set carry flag) 2565 emit_opcode( masm, 0xB4 ); 2566 emit_d8 ( masm, 0x01 ); 2567 // SAHF 2568 emit_opcode( masm, 0x9E); 2569 // NOP // target for branch to avoid branch to branch 2570 emit_opcode( masm, 0x90); 2571 %} 2572 2573 // fnstsw_ax(); 2574 // sahf(); 2575 // movl(dst, nan_result); 2576 // jcc(Assembler::parity, exit); 2577 // movl(dst, less_result); 2578 // jcc(Assembler::below, exit); 2579 // movl(dst, equal_result); 2580 // jcc(Assembler::equal, exit); 2581 // movl(dst, greater_result); 2582 2583 // less_result = 1; 2584 // greater_result = -1; 2585 // equal_result = 0; 2586 // nan_result = -1; 2587 2588 enc_class CmpF_Result(rRegI dst) %{ 2589 // fnstsw_ax(); 2590 emit_opcode( masm, 0xDF); 2591 emit_opcode( masm, 0xE0); 2592 // sahf 2593 emit_opcode( masm, 0x9E); 2594 // movl(dst, nan_result); 2595 emit_opcode( masm, 0xB8 + $dst$$reg); 2596 emit_d32( masm, -1 ); 2597 // jcc(Assembler::parity, exit); 2598 emit_opcode( masm, 0x7A ); 2599 emit_d8 ( masm, 0x13 ); 2600 // movl(dst, less_result); 2601 emit_opcode( masm, 0xB8 + $dst$$reg); 2602 emit_d32( masm, -1 ); 2603 // jcc(Assembler::below, exit); 2604 emit_opcode( masm, 0x72 ); 2605 emit_d8 ( masm, 0x0C ); 2606 // movl(dst, equal_result); 2607 emit_opcode( masm, 0xB8 + $dst$$reg); 2608 emit_d32( masm, 0 ); 2609 // jcc(Assembler::equal, exit); 2610 emit_opcode( masm, 0x74 ); 2611 emit_d8 ( masm, 0x05 ); 2612 // movl(dst, greater_result); 2613 emit_opcode( masm, 0xB8 + $dst$$reg); 2614 emit_d32( masm, 1 ); 2615 %} 2616 2617 2618 // Compare the longs and set flags 2619 // BROKEN! Do Not use as-is 2620 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2621 // CMP $src1.hi,$src2.hi 2622 emit_opcode( masm, 0x3B ); 2623 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2624 // JNE,s done 2625 emit_opcode(masm,0x75); 2626 emit_d8(masm, 2 ); 2627 // CMP $src1.lo,$src2.lo 2628 emit_opcode( masm, 0x3B ); 2629 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2630 // done: 2631 %} 2632 2633 enc_class convert_int_long( regL dst, rRegI src ) %{ 2634 // mov $dst.lo,$src 2635 int dst_encoding = $dst$$reg; 2636 int src_encoding = $src$$reg; 2637 encode_Copy( masm, dst_encoding , src_encoding ); 2638 // mov $dst.hi,$src 2639 encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2640 // sar $dst.hi,31 2641 emit_opcode( masm, 0xC1 ); 2642 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2643 emit_d8(masm, 0x1F ); 2644 %} 2645 2646 enc_class convert_long_double( eRegL src ) %{ 2647 // push $src.hi 2648 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2649 // push $src.lo 2650 emit_opcode(masm, 0x50+$src$$reg ); 2651 // fild 64-bits at [SP] 2652 emit_opcode(masm,0xdf); 2653 emit_d8(masm, 0x6C); 2654 emit_d8(masm, 0x24); 2655 emit_d8(masm, 0x00); 2656 // pop stack 2657 emit_opcode(masm, 0x83); // add SP, #8 2658 emit_rm(masm, 0x3, 0x00, ESP_enc); 2659 emit_d8(masm, 0x8); 2660 %} 2661 2662 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2663 // IMUL EDX:EAX,$src1 2664 emit_opcode( masm, 0xF7 ); 2665 emit_rm( masm, 0x3, 0x5, $src1$$reg ); 2666 // SAR EDX,$cnt-32 2667 int shift_count = ((int)$cnt$$constant) - 32; 2668 if (shift_count > 0) { 2669 emit_opcode(masm, 0xC1); 2670 emit_rm(masm, 0x3, 7, $dst$$reg ); 2671 emit_d8(masm, shift_count); 2672 } 2673 %} 2674 2675 // this version doesn't have add sp, 8 2676 enc_class convert_long_double2( eRegL src ) %{ 2677 // push $src.hi 2678 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2679 // push $src.lo 2680 emit_opcode(masm, 0x50+$src$$reg ); 2681 // fild 64-bits at [SP] 2682 emit_opcode(masm,0xdf); 2683 emit_d8(masm, 0x6C); 2684 emit_d8(masm, 0x24); 2685 emit_d8(masm, 0x00); 2686 %} 2687 2688 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2689 // Basic idea: long = (long)int * (long)int 2690 // IMUL EDX:EAX, src 2691 emit_opcode( masm, 0xF7 ); 2692 emit_rm( masm, 0x3, 0x5, $src$$reg); 2693 %} 2694 2695 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2696 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2697 // MUL EDX:EAX, src 2698 emit_opcode( masm, 0xF7 ); 2699 emit_rm( masm, 0x3, 0x4, $src$$reg); 2700 %} 2701 2702 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2703 // Basic idea: lo(result) = lo(x_lo * y_lo) 2704 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2705 // MOV $tmp,$src.lo 2706 encode_Copy( masm, $tmp$$reg, $src$$reg ); 2707 // IMUL $tmp,EDX 2708 emit_opcode( masm, 0x0F ); 2709 emit_opcode( masm, 0xAF ); 2710 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2711 // MOV EDX,$src.hi 2712 encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2713 // IMUL EDX,EAX 2714 emit_opcode( masm, 0x0F ); 2715 emit_opcode( masm, 0xAF ); 2716 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2717 // ADD $tmp,EDX 2718 emit_opcode( masm, 0x03 ); 2719 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2720 // MUL EDX:EAX,$src.lo 2721 emit_opcode( masm, 0xF7 ); 2722 emit_rm( masm, 0x3, 0x4, $src$$reg ); 2723 // ADD EDX,ESI 2724 emit_opcode( masm, 0x03 ); 2725 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2726 %} 2727 2728 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2729 // Basic idea: lo(result) = lo(src * y_lo) 2730 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2731 // IMUL $tmp,EDX,$src 2732 emit_opcode( masm, 0x6B ); 2733 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2734 emit_d8( masm, (int)$src$$constant ); 2735 // MOV EDX,$src 2736 emit_opcode(masm, 0xB8 + EDX_enc); 2737 emit_d32( masm, (int)$src$$constant ); 2738 // MUL EDX:EAX,EDX 2739 emit_opcode( masm, 0xF7 ); 2740 emit_rm( masm, 0x3, 0x4, EDX_enc ); 2741 // ADD EDX,ESI 2742 emit_opcode( masm, 0x03 ); 2743 emit_rm( masm, 0x3, EDX_enc, $tmp$$reg ); 2744 %} 2745 2746 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2747 // PUSH src1.hi 2748 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2749 // PUSH src1.lo 2750 emit_opcode(masm, 0x50+$src1$$reg ); 2751 // PUSH src2.hi 2752 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2753 // PUSH src2.lo 2754 emit_opcode(masm, 0x50+$src2$$reg ); 2755 // CALL directly to the runtime 2756 __ set_inst_mark(); 2757 emit_opcode(masm,0xE8); // Call into runtime 2758 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2759 __ clear_inst_mark(); 2760 __ post_call_nop(); 2761 // Restore stack 2762 emit_opcode(masm, 0x83); // add SP, #framesize 2763 emit_rm(masm, 0x3, 0x00, ESP_enc); 2764 emit_d8(masm, 4*4); 2765 %} 2766 2767 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2768 // PUSH src1.hi 2769 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2770 // PUSH src1.lo 2771 emit_opcode(masm, 0x50+$src1$$reg ); 2772 // PUSH src2.hi 2773 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2774 // PUSH src2.lo 2775 emit_opcode(masm, 0x50+$src2$$reg ); 2776 // CALL directly to the runtime 2777 __ set_inst_mark(); 2778 emit_opcode(masm,0xE8); // Call into runtime 2779 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2780 __ clear_inst_mark(); 2781 __ post_call_nop(); 2782 // Restore stack 2783 emit_opcode(masm, 0x83); // add SP, #framesize 2784 emit_rm(masm, 0x3, 0x00, ESP_enc); 2785 emit_d8(masm, 4*4); 2786 %} 2787 2788 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2789 // MOV $tmp,$src.lo 2790 emit_opcode(masm, 0x8B); 2791 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg); 2792 // OR $tmp,$src.hi 2793 emit_opcode(masm, 0x0B); 2794 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2795 %} 2796 2797 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2798 // CMP $src1.lo,$src2.lo 2799 emit_opcode( masm, 0x3B ); 2800 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2801 // JNE,s skip 2802 emit_cc(masm, 0x70, 0x5); 2803 emit_d8(masm,2); 2804 // CMP $src1.hi,$src2.hi 2805 emit_opcode( masm, 0x3B ); 2806 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2807 %} 2808 2809 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2810 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2811 emit_opcode( masm, 0x3B ); 2812 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2813 // MOV $tmp,$src1.hi 2814 emit_opcode( masm, 0x8B ); 2815 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2816 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2817 emit_opcode( masm, 0x1B ); 2818 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2819 %} 2820 2821 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2822 // XOR $tmp,$tmp 2823 emit_opcode(masm,0x33); // XOR 2824 emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg); 2825 // CMP $tmp,$src.lo 2826 emit_opcode( masm, 0x3B ); 2827 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg ); 2828 // SBB $tmp,$src.hi 2829 emit_opcode( masm, 0x1B ); 2830 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2831 %} 2832 2833 // Sniff, sniff... smells like Gnu Superoptimizer 2834 enc_class neg_long( eRegL dst ) %{ 2835 emit_opcode(masm,0xF7); // NEG hi 2836 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2837 emit_opcode(masm,0xF7); // NEG lo 2838 emit_rm (masm,0x3, 0x3, $dst$$reg ); 2839 emit_opcode(masm,0x83); // SBB hi,0 2840 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2841 emit_d8 (masm,0 ); 2842 %} 2843 2844 enc_class enc_pop_rdx() %{ 2845 emit_opcode(masm,0x5A); 2846 %} 2847 2848 enc_class enc_rethrow() %{ 2849 __ set_inst_mark(); 2850 emit_opcode(masm, 0xE9); // jmp entry 2851 emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4, 2852 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2853 __ clear_inst_mark(); 2854 __ post_call_nop(); 2855 %} 2856 2857 2858 // Convert a double to an int. Java semantics require we do complex 2859 // manglelations in the corner cases. So we set the rounding mode to 2860 // 'zero', store the darned double down as an int, and reset the 2861 // rounding mode to 'nearest'. The hardware throws an exception which 2862 // patches up the correct value directly to the stack. 2863 enc_class DPR2I_encoding( regDPR src ) %{ 2864 // Flip to round-to-zero mode. We attempted to allow invalid-op 2865 // exceptions here, so that a NAN or other corner-case value will 2866 // thrown an exception (but normal values get converted at full speed). 2867 // However, I2C adapters and other float-stack manglers leave pending 2868 // invalid-op exceptions hanging. We would have to clear them before 2869 // enabling them and that is more expensive than just testing for the 2870 // invalid value Intel stores down in the corner cases. 2871 emit_opcode(masm,0xD9); // FLDCW trunc 2872 emit_opcode(masm,0x2D); 2873 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2874 // Allocate a word 2875 emit_opcode(masm,0x83); // SUB ESP,4 2876 emit_opcode(masm,0xEC); 2877 emit_d8(masm,0x04); 2878 // Encoding assumes a double has been pushed into FPR0. 2879 // Store down the double as an int, popping the FPU stack 2880 emit_opcode(masm,0xDB); // FISTP [ESP] 2881 emit_opcode(masm,0x1C); 2882 emit_d8(masm,0x24); 2883 // Restore the rounding mode; mask the exception 2884 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode 2885 emit_opcode(masm,0x2D); 2886 emit_d32( masm, Compile::current()->in_24_bit_fp_mode() 2887 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2888 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2889 2890 // Load the converted int; adjust CPU stack 2891 emit_opcode(masm,0x58); // POP EAX 2892 emit_opcode(masm,0x3D); // CMP EAX,imm 2893 emit_d32 (masm,0x80000000); // 0x80000000 2894 emit_opcode(masm,0x75); // JNE around_slow_call 2895 emit_d8 (masm,0x07); // Size of slow_call 2896 // Push src onto stack slow-path 2897 emit_opcode(masm,0xD9 ); // FLD ST(i) 2898 emit_d8 (masm,0xC0-1+$src$$reg ); 2899 // CALL directly to the runtime 2900 __ set_inst_mark(); 2901 emit_opcode(masm,0xE8); // Call into runtime 2902 emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2903 __ clear_inst_mark(); 2904 __ post_call_nop(); 2905 // Carry on here... 2906 %} 2907 2908 enc_class DPR2L_encoding( regDPR src ) %{ 2909 emit_opcode(masm,0xD9); // FLDCW trunc 2910 emit_opcode(masm,0x2D); 2911 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2912 // Allocate a word 2913 emit_opcode(masm,0x83); // SUB ESP,8 2914 emit_opcode(masm,0xEC); 2915 emit_d8(masm,0x08); 2916 // Encoding assumes a double has been pushed into FPR0. 2917 // Store down the double as a long, popping the FPU stack 2918 emit_opcode(masm,0xDF); // FISTP [ESP] 2919 emit_opcode(masm,0x3C); 2920 emit_d8(masm,0x24); 2921 // Restore the rounding mode; mask the exception 2922 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode 2923 emit_opcode(masm,0x2D); 2924 emit_d32( masm, Compile::current()->in_24_bit_fp_mode() 2925 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2926 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2927 2928 // Load the converted int; adjust CPU stack 2929 emit_opcode(masm,0x58); // POP EAX 2930 emit_opcode(masm,0x5A); // POP EDX 2931 emit_opcode(masm,0x81); // CMP EDX,imm 2932 emit_d8 (masm,0xFA); // rdx 2933 emit_d32 (masm,0x80000000); // 0x80000000 2934 emit_opcode(masm,0x75); // JNE around_slow_call 2935 emit_d8 (masm,0x07+4); // Size of slow_call 2936 emit_opcode(masm,0x85); // TEST EAX,EAX 2937 emit_opcode(masm,0xC0); // 2/rax,/rax, 2938 emit_opcode(masm,0x75); // JNE around_slow_call 2939 emit_d8 (masm,0x07); // Size of slow_call 2940 // Push src onto stack slow-path 2941 emit_opcode(masm,0xD9 ); // FLD ST(i) 2942 emit_d8 (masm,0xC0-1+$src$$reg ); 2943 // CALL directly to the runtime 2944 __ set_inst_mark(); 2945 emit_opcode(masm,0xE8); // Call into runtime 2946 emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2947 __ clear_inst_mark(); 2948 __ post_call_nop(); 2949 // Carry on here... 2950 %} 2951 2952 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2953 // Operand was loaded from memory into fp ST (stack top) 2954 // FMUL ST,$src /* D8 C8+i */ 2955 emit_opcode(masm, 0xD8); 2956 emit_opcode(masm, 0xC8 + $src1$$reg); 2957 %} 2958 2959 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2960 // FADDP ST,src2 /* D8 C0+i */ 2961 emit_opcode(masm, 0xD8); 2962 emit_opcode(masm, 0xC0 + $src2$$reg); 2963 //could use FADDP src2,fpST /* DE C0+i */ 2964 %} 2965 2966 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 2967 // FADDP src2,ST /* DE C0+i */ 2968 emit_opcode(masm, 0xDE); 2969 emit_opcode(masm, 0xC0 + $src2$$reg); 2970 %} 2971 2972 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 2973 // Operand has been loaded into fp ST (stack top) 2974 // FSUB ST,$src1 2975 emit_opcode(masm, 0xD8); 2976 emit_opcode(masm, 0xE0 + $src1$$reg); 2977 2978 // FDIV 2979 emit_opcode(masm, 0xD8); 2980 emit_opcode(masm, 0xF0 + $src2$$reg); 2981 %} 2982 2983 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 2984 // Operand was loaded from memory into fp ST (stack top) 2985 // FADD ST,$src /* D8 C0+i */ 2986 emit_opcode(masm, 0xD8); 2987 emit_opcode(masm, 0xC0 + $src1$$reg); 2988 2989 // FMUL ST,src2 /* D8 C*+i */ 2990 emit_opcode(masm, 0xD8); 2991 emit_opcode(masm, 0xC8 + $src2$$reg); 2992 %} 2993 2994 2995 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 2996 // Operand was loaded from memory into fp ST (stack top) 2997 // FADD ST,$src /* D8 C0+i */ 2998 emit_opcode(masm, 0xD8); 2999 emit_opcode(masm, 0xC0 + $src1$$reg); 3000 3001 // FMULP src2,ST /* DE C8+i */ 3002 emit_opcode(masm, 0xDE); 3003 emit_opcode(masm, 0xC8 + $src2$$reg); 3004 %} 3005 3006 // Atomically load the volatile long 3007 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3008 emit_opcode(masm,0xDF); 3009 int rm_byte_opcode = 0x05; 3010 int base = $mem$$base; 3011 int index = $mem$$index; 3012 int scale = $mem$$scale; 3013 int displace = $mem$$disp; 3014 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3015 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3016 store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp ); 3017 %} 3018 3019 // Volatile Store Long. Must be atomic, so move it into 3020 // the FP TOS and then do a 64-bit FIST. Has to probe the 3021 // target address before the store (for null-ptr checks) 3022 // so the memory operand is used twice in the encoding. 3023 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3024 store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp ); 3025 __ set_inst_mark(); // Mark start of FIST in case $mem has an oop 3026 emit_opcode(masm,0xDF); 3027 int rm_byte_opcode = 0x07; 3028 int base = $mem$$base; 3029 int index = $mem$$index; 3030 int scale = $mem$$scale; 3031 int displace = $mem$$disp; 3032 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3033 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3034 __ clear_inst_mark(); 3035 %} 3036 3037 %} 3038 3039 3040 //----------FRAME-------------------------------------------------------------- 3041 // Definition of frame structure and management information. 3042 // 3043 // S T A C K L A Y O U T Allocators stack-slot number 3044 // | (to get allocators register number 3045 // G Owned by | | v add OptoReg::stack0()) 3046 // r CALLER | | 3047 // o | +--------+ pad to even-align allocators stack-slot 3048 // w V | pad0 | numbers; owned by CALLER 3049 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3050 // h ^ | in | 5 3051 // | | args | 4 Holes in incoming args owned by SELF 3052 // | | | | 3 3053 // | | +--------+ 3054 // V | | old out| Empty on Intel, window on Sparc 3055 // | old |preserve| Must be even aligned. 3056 // | SP-+--------+----> Matcher::_old_SP, even aligned 3057 // | | in | 3 area for Intel ret address 3058 // Owned by |preserve| Empty on Sparc. 3059 // SELF +--------+ 3060 // | | pad2 | 2 pad to align old SP 3061 // | +--------+ 1 3062 // | | locks | 0 3063 // | +--------+----> OptoReg::stack0(), even aligned 3064 // | | pad1 | 11 pad to align new SP 3065 // | +--------+ 3066 // | | | 10 3067 // | | spills | 9 spills 3068 // V | | 8 (pad0 slot for callee) 3069 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3070 // ^ | out | 7 3071 // | | args | 6 Holes in outgoing args owned by CALLEE 3072 // Owned by +--------+ 3073 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3074 // | new |preserve| Must be even-aligned. 3075 // | SP-+--------+----> Matcher::_new_SP, even aligned 3076 // | | | 3077 // 3078 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3079 // known from SELF's arguments and the Java calling convention. 3080 // Region 6-7 is determined per call site. 3081 // Note 2: If the calling convention leaves holes in the incoming argument 3082 // area, those holes are owned by SELF. Holes in the outgoing area 3083 // are owned by the CALLEE. Holes should not be necessary in the 3084 // incoming area, as the Java calling convention is completely under 3085 // the control of the AD file. Doubles can be sorted and packed to 3086 // avoid holes. Holes in the outgoing arguments may be necessary for 3087 // varargs C calling conventions. 3088 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3089 // even aligned with pad0 as needed. 3090 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3091 // region 6-11 is even aligned; it may be padded out more so that 3092 // the region from SP to FP meets the minimum stack alignment. 3093 3094 frame %{ 3095 // These three registers define part of the calling convention 3096 // between compiled code and the interpreter. 3097 inline_cache_reg(EAX); // Inline Cache Register 3098 3099 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3100 cisc_spilling_operand_name(indOffset32); 3101 3102 // Number of stack slots consumed by locking an object 3103 sync_stack_slots(1); 3104 3105 // Compiled code's Frame Pointer 3106 frame_pointer(ESP); 3107 // Interpreter stores its frame pointer in a register which is 3108 // stored to the stack by I2CAdaptors. 3109 // I2CAdaptors convert from interpreted java to compiled java. 3110 interpreter_frame_pointer(EBP); 3111 3112 // Stack alignment requirement 3113 // Alignment size in bytes (128-bit -> 16 bytes) 3114 stack_alignment(StackAlignmentInBytes); 3115 3116 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3117 // for calls to C. Supports the var-args backing area for register parms. 3118 varargs_C_out_slots_killed(0); 3119 3120 // The after-PROLOG location of the return address. Location of 3121 // return address specifies a type (REG or STACK) and a number 3122 // representing the register number (i.e. - use a register name) or 3123 // stack slot. 3124 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3125 // Otherwise, it is above the locks and verification slot and alignment word 3126 return_addr(STACK - 1 + 3127 align_up((Compile::current()->in_preserve_stack_slots() + 3128 Compile::current()->fixed_slots()), 3129 stack_alignment_in_slots())); 3130 3131 // Location of C & interpreter return values 3132 c_return_value %{ 3133 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3134 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3135 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3136 3137 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3138 // that C functions return float and double results in XMM0. 3139 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3140 return OptoRegPair(XMM0b_num,XMM0_num); 3141 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3142 return OptoRegPair(OptoReg::Bad,XMM0_num); 3143 3144 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3145 %} 3146 3147 // Location of return values 3148 return_value %{ 3149 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3150 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3151 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3152 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3153 return OptoRegPair(XMM0b_num,XMM0_num); 3154 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3155 return OptoRegPair(OptoReg::Bad,XMM0_num); 3156 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3157 %} 3158 3159 %} 3160 3161 //----------ATTRIBUTES--------------------------------------------------------- 3162 //----------Operand Attributes------------------------------------------------- 3163 op_attrib op_cost(0); // Required cost attribute 3164 3165 //----------Instruction Attributes--------------------------------------------- 3166 ins_attrib ins_cost(100); // Required cost attribute 3167 ins_attrib ins_size(8); // Required size attribute (in bits) 3168 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3169 // non-matching short branch variant of some 3170 // long branch? 3171 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3172 // specifies the alignment that some part of the instruction (not 3173 // necessarily the start) requires. If > 1, a compute_padding() 3174 // function must be provided for the instruction 3175 3176 //----------OPERANDS----------------------------------------------------------- 3177 // Operand definitions must precede instruction definitions for correct parsing 3178 // in the ADLC because operands constitute user defined types which are used in 3179 // instruction definitions. 3180 3181 //----------Simple Operands---------------------------------------------------- 3182 // Immediate Operands 3183 // Integer Immediate 3184 operand immI() %{ 3185 match(ConI); 3186 3187 op_cost(10); 3188 format %{ %} 3189 interface(CONST_INTER); 3190 %} 3191 3192 // Constant for test vs zero 3193 operand immI_0() %{ 3194 predicate(n->get_int() == 0); 3195 match(ConI); 3196 3197 op_cost(0); 3198 format %{ %} 3199 interface(CONST_INTER); 3200 %} 3201 3202 // Constant for increment 3203 operand immI_1() %{ 3204 predicate(n->get_int() == 1); 3205 match(ConI); 3206 3207 op_cost(0); 3208 format %{ %} 3209 interface(CONST_INTER); 3210 %} 3211 3212 // Constant for decrement 3213 operand immI_M1() %{ 3214 predicate(n->get_int() == -1); 3215 match(ConI); 3216 3217 op_cost(0); 3218 format %{ %} 3219 interface(CONST_INTER); 3220 %} 3221 3222 // Valid scale values for addressing modes 3223 operand immI2() %{ 3224 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3225 match(ConI); 3226 3227 format %{ %} 3228 interface(CONST_INTER); 3229 %} 3230 3231 operand immI8() %{ 3232 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3233 match(ConI); 3234 3235 op_cost(5); 3236 format %{ %} 3237 interface(CONST_INTER); 3238 %} 3239 3240 operand immU8() %{ 3241 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3242 match(ConI); 3243 3244 op_cost(5); 3245 format %{ %} 3246 interface(CONST_INTER); 3247 %} 3248 3249 operand immI16() %{ 3250 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3251 match(ConI); 3252 3253 op_cost(10); 3254 format %{ %} 3255 interface(CONST_INTER); 3256 %} 3257 3258 // Int Immediate non-negative 3259 operand immU31() 3260 %{ 3261 predicate(n->get_int() >= 0); 3262 match(ConI); 3263 3264 op_cost(0); 3265 format %{ %} 3266 interface(CONST_INTER); 3267 %} 3268 3269 // Constant for long shifts 3270 operand immI_32() %{ 3271 predicate( n->get_int() == 32 ); 3272 match(ConI); 3273 3274 op_cost(0); 3275 format %{ %} 3276 interface(CONST_INTER); 3277 %} 3278 3279 operand immI_1_31() %{ 3280 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3281 match(ConI); 3282 3283 op_cost(0); 3284 format %{ %} 3285 interface(CONST_INTER); 3286 %} 3287 3288 operand immI_32_63() %{ 3289 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3290 match(ConI); 3291 op_cost(0); 3292 3293 format %{ %} 3294 interface(CONST_INTER); 3295 %} 3296 3297 operand immI_2() %{ 3298 predicate( n->get_int() == 2 ); 3299 match(ConI); 3300 3301 op_cost(0); 3302 format %{ %} 3303 interface(CONST_INTER); 3304 %} 3305 3306 operand immI_3() %{ 3307 predicate( n->get_int() == 3 ); 3308 match(ConI); 3309 3310 op_cost(0); 3311 format %{ %} 3312 interface(CONST_INTER); 3313 %} 3314 3315 operand immI_4() 3316 %{ 3317 predicate(n->get_int() == 4); 3318 match(ConI); 3319 3320 op_cost(0); 3321 format %{ %} 3322 interface(CONST_INTER); 3323 %} 3324 3325 operand immI_8() 3326 %{ 3327 predicate(n->get_int() == 8); 3328 match(ConI); 3329 3330 op_cost(0); 3331 format %{ %} 3332 interface(CONST_INTER); 3333 %} 3334 3335 // Pointer Immediate 3336 operand immP() %{ 3337 match(ConP); 3338 3339 op_cost(10); 3340 format %{ %} 3341 interface(CONST_INTER); 3342 %} 3343 3344 // Null Pointer Immediate 3345 operand immP0() %{ 3346 predicate( n->get_ptr() == 0 ); 3347 match(ConP); 3348 op_cost(0); 3349 3350 format %{ %} 3351 interface(CONST_INTER); 3352 %} 3353 3354 // Long Immediate 3355 operand immL() %{ 3356 match(ConL); 3357 3358 op_cost(20); 3359 format %{ %} 3360 interface(CONST_INTER); 3361 %} 3362 3363 // Long Immediate zero 3364 operand immL0() %{ 3365 predicate( n->get_long() == 0L ); 3366 match(ConL); 3367 op_cost(0); 3368 3369 format %{ %} 3370 interface(CONST_INTER); 3371 %} 3372 3373 // Long Immediate zero 3374 operand immL_M1() %{ 3375 predicate( n->get_long() == -1L ); 3376 match(ConL); 3377 op_cost(0); 3378 3379 format %{ %} 3380 interface(CONST_INTER); 3381 %} 3382 3383 // Long immediate from 0 to 127. 3384 // Used for a shorter form of long mul by 10. 3385 operand immL_127() %{ 3386 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3387 match(ConL); 3388 op_cost(0); 3389 3390 format %{ %} 3391 interface(CONST_INTER); 3392 %} 3393 3394 // Long Immediate: low 32-bit mask 3395 operand immL_32bits() %{ 3396 predicate(n->get_long() == 0xFFFFFFFFL); 3397 match(ConL); 3398 op_cost(0); 3399 3400 format %{ %} 3401 interface(CONST_INTER); 3402 %} 3403 3404 // Long Immediate: low 32-bit mask 3405 operand immL32() %{ 3406 predicate(n->get_long() == (int)(n->get_long())); 3407 match(ConL); 3408 op_cost(20); 3409 3410 format %{ %} 3411 interface(CONST_INTER); 3412 %} 3413 3414 //Double Immediate zero 3415 operand immDPR0() %{ 3416 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3417 // bug that generates code such that NaNs compare equal to 0.0 3418 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3419 match(ConD); 3420 3421 op_cost(5); 3422 format %{ %} 3423 interface(CONST_INTER); 3424 %} 3425 3426 // Double Immediate one 3427 operand immDPR1() %{ 3428 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3429 match(ConD); 3430 3431 op_cost(5); 3432 format %{ %} 3433 interface(CONST_INTER); 3434 %} 3435 3436 // Double Immediate 3437 operand immDPR() %{ 3438 predicate(UseSSE<=1); 3439 match(ConD); 3440 3441 op_cost(5); 3442 format %{ %} 3443 interface(CONST_INTER); 3444 %} 3445 3446 operand immD() %{ 3447 predicate(UseSSE>=2); 3448 match(ConD); 3449 3450 op_cost(5); 3451 format %{ %} 3452 interface(CONST_INTER); 3453 %} 3454 3455 // Double Immediate zero 3456 operand immD0() %{ 3457 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3458 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3459 // compare equal to -0.0. 3460 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3461 match(ConD); 3462 3463 format %{ %} 3464 interface(CONST_INTER); 3465 %} 3466 3467 // Float Immediate zero 3468 operand immFPR0() %{ 3469 predicate(UseSSE == 0 && n->getf() == 0.0F); 3470 match(ConF); 3471 3472 op_cost(5); 3473 format %{ %} 3474 interface(CONST_INTER); 3475 %} 3476 3477 // Float Immediate one 3478 operand immFPR1() %{ 3479 predicate(UseSSE == 0 && n->getf() == 1.0F); 3480 match(ConF); 3481 3482 op_cost(5); 3483 format %{ %} 3484 interface(CONST_INTER); 3485 %} 3486 3487 // Float Immediate 3488 operand immFPR() %{ 3489 predicate( UseSSE == 0 ); 3490 match(ConF); 3491 3492 op_cost(5); 3493 format %{ %} 3494 interface(CONST_INTER); 3495 %} 3496 3497 // Float Immediate 3498 operand immF() %{ 3499 predicate(UseSSE >= 1); 3500 match(ConF); 3501 3502 op_cost(5); 3503 format %{ %} 3504 interface(CONST_INTER); 3505 %} 3506 3507 // Float Immediate zero. Zero and not -0.0 3508 operand immF0() %{ 3509 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3510 match(ConF); 3511 3512 op_cost(5); 3513 format %{ %} 3514 interface(CONST_INTER); 3515 %} 3516 3517 // Immediates for special shifts (sign extend) 3518 3519 // Constants for increment 3520 operand immI_16() %{ 3521 predicate( n->get_int() == 16 ); 3522 match(ConI); 3523 3524 format %{ %} 3525 interface(CONST_INTER); 3526 %} 3527 3528 operand immI_24() %{ 3529 predicate( n->get_int() == 24 ); 3530 match(ConI); 3531 3532 format %{ %} 3533 interface(CONST_INTER); 3534 %} 3535 3536 // Constant for byte-wide masking 3537 operand immI_255() %{ 3538 predicate( n->get_int() == 255 ); 3539 match(ConI); 3540 3541 format %{ %} 3542 interface(CONST_INTER); 3543 %} 3544 3545 // Constant for short-wide masking 3546 operand immI_65535() %{ 3547 predicate(n->get_int() == 65535); 3548 match(ConI); 3549 3550 format %{ %} 3551 interface(CONST_INTER); 3552 %} 3553 3554 operand kReg() 3555 %{ 3556 constraint(ALLOC_IN_RC(vectmask_reg)); 3557 match(RegVectMask); 3558 format %{%} 3559 interface(REG_INTER); 3560 %} 3561 3562 // Register Operands 3563 // Integer Register 3564 operand rRegI() %{ 3565 constraint(ALLOC_IN_RC(int_reg)); 3566 match(RegI); 3567 match(xRegI); 3568 match(eAXRegI); 3569 match(eBXRegI); 3570 match(eCXRegI); 3571 match(eDXRegI); 3572 match(eDIRegI); 3573 match(eSIRegI); 3574 3575 format %{ %} 3576 interface(REG_INTER); 3577 %} 3578 3579 // Subset of Integer Register 3580 operand xRegI(rRegI reg) %{ 3581 constraint(ALLOC_IN_RC(int_x_reg)); 3582 match(reg); 3583 match(eAXRegI); 3584 match(eBXRegI); 3585 match(eCXRegI); 3586 match(eDXRegI); 3587 3588 format %{ %} 3589 interface(REG_INTER); 3590 %} 3591 3592 // Special Registers 3593 operand eAXRegI(xRegI reg) %{ 3594 constraint(ALLOC_IN_RC(eax_reg)); 3595 match(reg); 3596 match(rRegI); 3597 3598 format %{ "EAX" %} 3599 interface(REG_INTER); 3600 %} 3601 3602 // Special Registers 3603 operand eBXRegI(xRegI reg) %{ 3604 constraint(ALLOC_IN_RC(ebx_reg)); 3605 match(reg); 3606 match(rRegI); 3607 3608 format %{ "EBX" %} 3609 interface(REG_INTER); 3610 %} 3611 3612 operand eCXRegI(xRegI reg) %{ 3613 constraint(ALLOC_IN_RC(ecx_reg)); 3614 match(reg); 3615 match(rRegI); 3616 3617 format %{ "ECX" %} 3618 interface(REG_INTER); 3619 %} 3620 3621 operand eDXRegI(xRegI reg) %{ 3622 constraint(ALLOC_IN_RC(edx_reg)); 3623 match(reg); 3624 match(rRegI); 3625 3626 format %{ "EDX" %} 3627 interface(REG_INTER); 3628 %} 3629 3630 operand eDIRegI(xRegI reg) %{ 3631 constraint(ALLOC_IN_RC(edi_reg)); 3632 match(reg); 3633 match(rRegI); 3634 3635 format %{ "EDI" %} 3636 interface(REG_INTER); 3637 %} 3638 3639 operand nadxRegI() %{ 3640 constraint(ALLOC_IN_RC(nadx_reg)); 3641 match(RegI); 3642 match(eBXRegI); 3643 match(eCXRegI); 3644 match(eSIRegI); 3645 match(eDIRegI); 3646 3647 format %{ %} 3648 interface(REG_INTER); 3649 %} 3650 3651 operand ncxRegI() %{ 3652 constraint(ALLOC_IN_RC(ncx_reg)); 3653 match(RegI); 3654 match(eAXRegI); 3655 match(eDXRegI); 3656 match(eSIRegI); 3657 match(eDIRegI); 3658 3659 format %{ %} 3660 interface(REG_INTER); 3661 %} 3662 3663 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3664 // // 3665 operand eSIRegI(xRegI reg) %{ 3666 constraint(ALLOC_IN_RC(esi_reg)); 3667 match(reg); 3668 match(rRegI); 3669 3670 format %{ "ESI" %} 3671 interface(REG_INTER); 3672 %} 3673 3674 // Pointer Register 3675 operand anyRegP() %{ 3676 constraint(ALLOC_IN_RC(any_reg)); 3677 match(RegP); 3678 match(eAXRegP); 3679 match(eBXRegP); 3680 match(eCXRegP); 3681 match(eDIRegP); 3682 match(eRegP); 3683 3684 format %{ %} 3685 interface(REG_INTER); 3686 %} 3687 3688 operand eRegP() %{ 3689 constraint(ALLOC_IN_RC(int_reg)); 3690 match(RegP); 3691 match(eAXRegP); 3692 match(eBXRegP); 3693 match(eCXRegP); 3694 match(eDIRegP); 3695 3696 format %{ %} 3697 interface(REG_INTER); 3698 %} 3699 3700 operand rRegP() %{ 3701 constraint(ALLOC_IN_RC(int_reg)); 3702 match(RegP); 3703 match(eAXRegP); 3704 match(eBXRegP); 3705 match(eCXRegP); 3706 match(eDIRegP); 3707 3708 format %{ %} 3709 interface(REG_INTER); 3710 %} 3711 3712 // On windows95, EBP is not safe to use for implicit null tests. 3713 operand eRegP_no_EBP() %{ 3714 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3715 match(RegP); 3716 match(eAXRegP); 3717 match(eBXRegP); 3718 match(eCXRegP); 3719 match(eDIRegP); 3720 3721 op_cost(100); 3722 format %{ %} 3723 interface(REG_INTER); 3724 %} 3725 3726 operand pRegP() %{ 3727 constraint(ALLOC_IN_RC(p_reg)); 3728 match(RegP); 3729 match(eBXRegP); 3730 match(eDXRegP); 3731 match(eSIRegP); 3732 match(eDIRegP); 3733 3734 format %{ %} 3735 interface(REG_INTER); 3736 %} 3737 3738 // Special Registers 3739 // Return a pointer value 3740 operand eAXRegP(eRegP reg) %{ 3741 constraint(ALLOC_IN_RC(eax_reg)); 3742 match(reg); 3743 format %{ "EAX" %} 3744 interface(REG_INTER); 3745 %} 3746 3747 // Used in AtomicAdd 3748 operand eBXRegP(eRegP reg) %{ 3749 constraint(ALLOC_IN_RC(ebx_reg)); 3750 match(reg); 3751 format %{ "EBX" %} 3752 interface(REG_INTER); 3753 %} 3754 3755 // Tail-call (interprocedural jump) to interpreter 3756 operand eCXRegP(eRegP reg) %{ 3757 constraint(ALLOC_IN_RC(ecx_reg)); 3758 match(reg); 3759 format %{ "ECX" %} 3760 interface(REG_INTER); 3761 %} 3762 3763 operand eDXRegP(eRegP reg) %{ 3764 constraint(ALLOC_IN_RC(edx_reg)); 3765 match(reg); 3766 format %{ "EDX" %} 3767 interface(REG_INTER); 3768 %} 3769 3770 operand eSIRegP(eRegP reg) %{ 3771 constraint(ALLOC_IN_RC(esi_reg)); 3772 match(reg); 3773 format %{ "ESI" %} 3774 interface(REG_INTER); 3775 %} 3776 3777 // Used in rep stosw 3778 operand eDIRegP(eRegP reg) %{ 3779 constraint(ALLOC_IN_RC(edi_reg)); 3780 match(reg); 3781 format %{ "EDI" %} 3782 interface(REG_INTER); 3783 %} 3784 3785 operand eRegL() %{ 3786 constraint(ALLOC_IN_RC(long_reg)); 3787 match(RegL); 3788 match(eADXRegL); 3789 3790 format %{ %} 3791 interface(REG_INTER); 3792 %} 3793 3794 operand eADXRegL( eRegL reg ) %{ 3795 constraint(ALLOC_IN_RC(eadx_reg)); 3796 match(reg); 3797 3798 format %{ "EDX:EAX" %} 3799 interface(REG_INTER); 3800 %} 3801 3802 operand eBCXRegL( eRegL reg ) %{ 3803 constraint(ALLOC_IN_RC(ebcx_reg)); 3804 match(reg); 3805 3806 format %{ "EBX:ECX" %} 3807 interface(REG_INTER); 3808 %} 3809 3810 operand eBDPRegL( eRegL reg ) %{ 3811 constraint(ALLOC_IN_RC(ebpd_reg)); 3812 match(reg); 3813 3814 format %{ "EBP:EDI" %} 3815 interface(REG_INTER); 3816 %} 3817 // Special case for integer high multiply 3818 operand eADXRegL_low_only() %{ 3819 constraint(ALLOC_IN_RC(eadx_reg)); 3820 match(RegL); 3821 3822 format %{ "EAX" %} 3823 interface(REG_INTER); 3824 %} 3825 3826 // Flags register, used as output of compare instructions 3827 operand rFlagsReg() %{ 3828 constraint(ALLOC_IN_RC(int_flags)); 3829 match(RegFlags); 3830 3831 format %{ "EFLAGS" %} 3832 interface(REG_INTER); 3833 %} 3834 3835 // Flags register, used as output of compare instructions 3836 operand eFlagsReg() %{ 3837 constraint(ALLOC_IN_RC(int_flags)); 3838 match(RegFlags); 3839 3840 format %{ "EFLAGS" %} 3841 interface(REG_INTER); 3842 %} 3843 3844 // Flags register, used as output of FLOATING POINT compare instructions 3845 operand eFlagsRegU() %{ 3846 constraint(ALLOC_IN_RC(int_flags)); 3847 match(RegFlags); 3848 3849 format %{ "EFLAGS_U" %} 3850 interface(REG_INTER); 3851 %} 3852 3853 operand eFlagsRegUCF() %{ 3854 constraint(ALLOC_IN_RC(int_flags)); 3855 match(RegFlags); 3856 predicate(false); 3857 3858 format %{ "EFLAGS_U_CF" %} 3859 interface(REG_INTER); 3860 %} 3861 3862 // Condition Code Register used by long compare 3863 operand flagsReg_long_LTGE() %{ 3864 constraint(ALLOC_IN_RC(int_flags)); 3865 match(RegFlags); 3866 format %{ "FLAGS_LTGE" %} 3867 interface(REG_INTER); 3868 %} 3869 operand flagsReg_long_EQNE() %{ 3870 constraint(ALLOC_IN_RC(int_flags)); 3871 match(RegFlags); 3872 format %{ "FLAGS_EQNE" %} 3873 interface(REG_INTER); 3874 %} 3875 operand flagsReg_long_LEGT() %{ 3876 constraint(ALLOC_IN_RC(int_flags)); 3877 match(RegFlags); 3878 format %{ "FLAGS_LEGT" %} 3879 interface(REG_INTER); 3880 %} 3881 3882 // Condition Code Register used by unsigned long compare 3883 operand flagsReg_ulong_LTGE() %{ 3884 constraint(ALLOC_IN_RC(int_flags)); 3885 match(RegFlags); 3886 format %{ "FLAGS_U_LTGE" %} 3887 interface(REG_INTER); 3888 %} 3889 operand flagsReg_ulong_EQNE() %{ 3890 constraint(ALLOC_IN_RC(int_flags)); 3891 match(RegFlags); 3892 format %{ "FLAGS_U_EQNE" %} 3893 interface(REG_INTER); 3894 %} 3895 operand flagsReg_ulong_LEGT() %{ 3896 constraint(ALLOC_IN_RC(int_flags)); 3897 match(RegFlags); 3898 format %{ "FLAGS_U_LEGT" %} 3899 interface(REG_INTER); 3900 %} 3901 3902 // Float register operands 3903 operand regDPR() %{ 3904 predicate( UseSSE < 2 ); 3905 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3906 match(RegD); 3907 match(regDPR1); 3908 match(regDPR2); 3909 format %{ %} 3910 interface(REG_INTER); 3911 %} 3912 3913 operand regDPR1(regDPR reg) %{ 3914 predicate( UseSSE < 2 ); 3915 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 3916 match(reg); 3917 format %{ "FPR1" %} 3918 interface(REG_INTER); 3919 %} 3920 3921 operand regDPR2(regDPR reg) %{ 3922 predicate( UseSSE < 2 ); 3923 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 3924 match(reg); 3925 format %{ "FPR2" %} 3926 interface(REG_INTER); 3927 %} 3928 3929 operand regnotDPR1(regDPR reg) %{ 3930 predicate( UseSSE < 2 ); 3931 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 3932 match(reg); 3933 format %{ %} 3934 interface(REG_INTER); 3935 %} 3936 3937 // Float register operands 3938 operand regFPR() %{ 3939 predicate( UseSSE < 2 ); 3940 constraint(ALLOC_IN_RC(fp_flt_reg)); 3941 match(RegF); 3942 match(regFPR1); 3943 format %{ %} 3944 interface(REG_INTER); 3945 %} 3946 3947 // Float register operands 3948 operand regFPR1(regFPR reg) %{ 3949 predicate( UseSSE < 2 ); 3950 constraint(ALLOC_IN_RC(fp_flt_reg0)); 3951 match(reg); 3952 format %{ "FPR1" %} 3953 interface(REG_INTER); 3954 %} 3955 3956 // XMM Float register operands 3957 operand regF() %{ 3958 predicate( UseSSE>=1 ); 3959 constraint(ALLOC_IN_RC(float_reg_legacy)); 3960 match(RegF); 3961 format %{ %} 3962 interface(REG_INTER); 3963 %} 3964 3965 operand legRegF() %{ 3966 predicate( UseSSE>=1 ); 3967 constraint(ALLOC_IN_RC(float_reg_legacy)); 3968 match(RegF); 3969 format %{ %} 3970 interface(REG_INTER); 3971 %} 3972 3973 // Float register operands 3974 operand vlRegF() %{ 3975 constraint(ALLOC_IN_RC(float_reg_vl)); 3976 match(RegF); 3977 3978 format %{ %} 3979 interface(REG_INTER); 3980 %} 3981 3982 // XMM Double register operands 3983 operand regD() %{ 3984 predicate( UseSSE>=2 ); 3985 constraint(ALLOC_IN_RC(double_reg_legacy)); 3986 match(RegD); 3987 format %{ %} 3988 interface(REG_INTER); 3989 %} 3990 3991 // Double register operands 3992 operand legRegD() %{ 3993 predicate( UseSSE>=2 ); 3994 constraint(ALLOC_IN_RC(double_reg_legacy)); 3995 match(RegD); 3996 format %{ %} 3997 interface(REG_INTER); 3998 %} 3999 4000 operand vlRegD() %{ 4001 constraint(ALLOC_IN_RC(double_reg_vl)); 4002 match(RegD); 4003 4004 format %{ %} 4005 interface(REG_INTER); 4006 %} 4007 4008 //----------Memory Operands---------------------------------------------------- 4009 // Direct Memory Operand 4010 operand direct(immP addr) %{ 4011 match(addr); 4012 4013 format %{ "[$addr]" %} 4014 interface(MEMORY_INTER) %{ 4015 base(0xFFFFFFFF); 4016 index(0x4); 4017 scale(0x0); 4018 disp($addr); 4019 %} 4020 %} 4021 4022 // Indirect Memory Operand 4023 operand indirect(eRegP reg) %{ 4024 constraint(ALLOC_IN_RC(int_reg)); 4025 match(reg); 4026 4027 format %{ "[$reg]" %} 4028 interface(MEMORY_INTER) %{ 4029 base($reg); 4030 index(0x4); 4031 scale(0x0); 4032 disp(0x0); 4033 %} 4034 %} 4035 4036 // Indirect Memory Plus Short Offset Operand 4037 operand indOffset8(eRegP reg, immI8 off) %{ 4038 match(AddP reg off); 4039 4040 format %{ "[$reg + $off]" %} 4041 interface(MEMORY_INTER) %{ 4042 base($reg); 4043 index(0x4); 4044 scale(0x0); 4045 disp($off); 4046 %} 4047 %} 4048 4049 // Indirect Memory Plus Long Offset Operand 4050 operand indOffset32(eRegP reg, immI off) %{ 4051 match(AddP reg off); 4052 4053 format %{ "[$reg + $off]" %} 4054 interface(MEMORY_INTER) %{ 4055 base($reg); 4056 index(0x4); 4057 scale(0x0); 4058 disp($off); 4059 %} 4060 %} 4061 4062 // Indirect Memory Plus Long Offset Operand 4063 operand indOffset32X(rRegI reg, immP off) %{ 4064 match(AddP off reg); 4065 4066 format %{ "[$reg + $off]" %} 4067 interface(MEMORY_INTER) %{ 4068 base($reg); 4069 index(0x4); 4070 scale(0x0); 4071 disp($off); 4072 %} 4073 %} 4074 4075 // Indirect Memory Plus Index Register Plus Offset Operand 4076 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4077 match(AddP (AddP reg ireg) off); 4078 4079 op_cost(10); 4080 format %{"[$reg + $off + $ireg]" %} 4081 interface(MEMORY_INTER) %{ 4082 base($reg); 4083 index($ireg); 4084 scale(0x0); 4085 disp($off); 4086 %} 4087 %} 4088 4089 // Indirect Memory Plus Index Register Plus Offset Operand 4090 operand indIndex(eRegP reg, rRegI ireg) %{ 4091 match(AddP reg ireg); 4092 4093 op_cost(10); 4094 format %{"[$reg + $ireg]" %} 4095 interface(MEMORY_INTER) %{ 4096 base($reg); 4097 index($ireg); 4098 scale(0x0); 4099 disp(0x0); 4100 %} 4101 %} 4102 4103 // // ------------------------------------------------------------------------- 4104 // // 486 architecture doesn't support "scale * index + offset" with out a base 4105 // // ------------------------------------------------------------------------- 4106 // // Scaled Memory Operands 4107 // // Indirect Memory Times Scale Plus Offset Operand 4108 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4109 // match(AddP off (LShiftI ireg scale)); 4110 // 4111 // op_cost(10); 4112 // format %{"[$off + $ireg << $scale]" %} 4113 // interface(MEMORY_INTER) %{ 4114 // base(0x4); 4115 // index($ireg); 4116 // scale($scale); 4117 // disp($off); 4118 // %} 4119 // %} 4120 4121 // Indirect Memory Times Scale Plus Index Register 4122 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4123 match(AddP reg (LShiftI ireg scale)); 4124 4125 op_cost(10); 4126 format %{"[$reg + $ireg << $scale]" %} 4127 interface(MEMORY_INTER) %{ 4128 base($reg); 4129 index($ireg); 4130 scale($scale); 4131 disp(0x0); 4132 %} 4133 %} 4134 4135 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4136 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4137 match(AddP (AddP reg (LShiftI ireg scale)) off); 4138 4139 op_cost(10); 4140 format %{"[$reg + $off + $ireg << $scale]" %} 4141 interface(MEMORY_INTER) %{ 4142 base($reg); 4143 index($ireg); 4144 scale($scale); 4145 disp($off); 4146 %} 4147 %} 4148 4149 //----------Load Long Memory Operands------------------------------------------ 4150 // The load-long idiom will use it's address expression again after loading 4151 // the first word of the long. If the load-long destination overlaps with 4152 // registers used in the addressing expression, the 2nd half will be loaded 4153 // from a clobbered address. Fix this by requiring that load-long use 4154 // address registers that do not overlap with the load-long target. 4155 4156 // load-long support 4157 operand load_long_RegP() %{ 4158 constraint(ALLOC_IN_RC(esi_reg)); 4159 match(RegP); 4160 match(eSIRegP); 4161 op_cost(100); 4162 format %{ %} 4163 interface(REG_INTER); 4164 %} 4165 4166 // Indirect Memory Operand Long 4167 operand load_long_indirect(load_long_RegP reg) %{ 4168 constraint(ALLOC_IN_RC(esi_reg)); 4169 match(reg); 4170 4171 format %{ "[$reg]" %} 4172 interface(MEMORY_INTER) %{ 4173 base($reg); 4174 index(0x4); 4175 scale(0x0); 4176 disp(0x0); 4177 %} 4178 %} 4179 4180 // Indirect Memory Plus Long Offset Operand 4181 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4182 match(AddP reg off); 4183 4184 format %{ "[$reg + $off]" %} 4185 interface(MEMORY_INTER) %{ 4186 base($reg); 4187 index(0x4); 4188 scale(0x0); 4189 disp($off); 4190 %} 4191 %} 4192 4193 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4194 4195 4196 //----------Special Memory Operands-------------------------------------------- 4197 // Stack Slot Operand - This operand is used for loading and storing temporary 4198 // values on the stack where a match requires a value to 4199 // flow through memory. 4200 operand stackSlotP(sRegP reg) %{ 4201 constraint(ALLOC_IN_RC(stack_slots)); 4202 // No match rule because this operand is only generated in matching 4203 format %{ "[$reg]" %} 4204 interface(MEMORY_INTER) %{ 4205 base(0x4); // ESP 4206 index(0x4); // No Index 4207 scale(0x0); // No Scale 4208 disp($reg); // Stack Offset 4209 %} 4210 %} 4211 4212 operand stackSlotI(sRegI reg) %{ 4213 constraint(ALLOC_IN_RC(stack_slots)); 4214 // No match rule because this operand is only generated in matching 4215 format %{ "[$reg]" %} 4216 interface(MEMORY_INTER) %{ 4217 base(0x4); // ESP 4218 index(0x4); // No Index 4219 scale(0x0); // No Scale 4220 disp($reg); // Stack Offset 4221 %} 4222 %} 4223 4224 operand stackSlotF(sRegF reg) %{ 4225 constraint(ALLOC_IN_RC(stack_slots)); 4226 // No match rule because this operand is only generated in matching 4227 format %{ "[$reg]" %} 4228 interface(MEMORY_INTER) %{ 4229 base(0x4); // ESP 4230 index(0x4); // No Index 4231 scale(0x0); // No Scale 4232 disp($reg); // Stack Offset 4233 %} 4234 %} 4235 4236 operand stackSlotD(sRegD reg) %{ 4237 constraint(ALLOC_IN_RC(stack_slots)); 4238 // No match rule because this operand is only generated in matching 4239 format %{ "[$reg]" %} 4240 interface(MEMORY_INTER) %{ 4241 base(0x4); // ESP 4242 index(0x4); // No Index 4243 scale(0x0); // No Scale 4244 disp($reg); // Stack Offset 4245 %} 4246 %} 4247 4248 operand stackSlotL(sRegL reg) %{ 4249 constraint(ALLOC_IN_RC(stack_slots)); 4250 // No match rule because this operand is only generated in matching 4251 format %{ "[$reg]" %} 4252 interface(MEMORY_INTER) %{ 4253 base(0x4); // ESP 4254 index(0x4); // No Index 4255 scale(0x0); // No Scale 4256 disp($reg); // Stack Offset 4257 %} 4258 %} 4259 4260 //----------Conditional Branch Operands---------------------------------------- 4261 // Comparison Op - This is the operation of the comparison, and is limited to 4262 // the following set of codes: 4263 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4264 // 4265 // Other attributes of the comparison, such as unsignedness, are specified 4266 // by the comparison instruction that sets a condition code flags register. 4267 // That result is represented by a flags operand whose subtype is appropriate 4268 // to the unsignedness (etc.) of the comparison. 4269 // 4270 // Later, the instruction which matches both the Comparison Op (a Bool) and 4271 // the flags (produced by the Cmp) specifies the coding of the comparison op 4272 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4273 4274 // Comparison Code 4275 operand cmpOp() %{ 4276 match(Bool); 4277 4278 format %{ "" %} 4279 interface(COND_INTER) %{ 4280 equal(0x4, "e"); 4281 not_equal(0x5, "ne"); 4282 less(0xC, "l"); 4283 greater_equal(0xD, "ge"); 4284 less_equal(0xE, "le"); 4285 greater(0xF, "g"); 4286 overflow(0x0, "o"); 4287 no_overflow(0x1, "no"); 4288 %} 4289 %} 4290 4291 // Comparison Code, unsigned compare. Used by FP also, with 4292 // C2 (unordered) turned into GT or LT already. The other bits 4293 // C0 and C3 are turned into Carry & Zero flags. 4294 operand cmpOpU() %{ 4295 match(Bool); 4296 4297 format %{ "" %} 4298 interface(COND_INTER) %{ 4299 equal(0x4, "e"); 4300 not_equal(0x5, "ne"); 4301 less(0x2, "b"); 4302 greater_equal(0x3, "nb"); 4303 less_equal(0x6, "be"); 4304 greater(0x7, "nbe"); 4305 overflow(0x0, "o"); 4306 no_overflow(0x1, "no"); 4307 %} 4308 %} 4309 4310 // Floating comparisons that don't require any fixup for the unordered case 4311 operand cmpOpUCF() %{ 4312 match(Bool); 4313 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4314 n->as_Bool()->_test._test == BoolTest::ge || 4315 n->as_Bool()->_test._test == BoolTest::le || 4316 n->as_Bool()->_test._test == BoolTest::gt); 4317 format %{ "" %} 4318 interface(COND_INTER) %{ 4319 equal(0x4, "e"); 4320 not_equal(0x5, "ne"); 4321 less(0x2, "b"); 4322 greater_equal(0x3, "nb"); 4323 less_equal(0x6, "be"); 4324 greater(0x7, "nbe"); 4325 overflow(0x0, "o"); 4326 no_overflow(0x1, "no"); 4327 %} 4328 %} 4329 4330 4331 // Floating comparisons that can be fixed up with extra conditional jumps 4332 operand cmpOpUCF2() %{ 4333 match(Bool); 4334 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4335 n->as_Bool()->_test._test == BoolTest::eq); 4336 format %{ "" %} 4337 interface(COND_INTER) %{ 4338 equal(0x4, "e"); 4339 not_equal(0x5, "ne"); 4340 less(0x2, "b"); 4341 greater_equal(0x3, "nb"); 4342 less_equal(0x6, "be"); 4343 greater(0x7, "nbe"); 4344 overflow(0x0, "o"); 4345 no_overflow(0x1, "no"); 4346 %} 4347 %} 4348 4349 // Comparison Code for FP conditional move 4350 operand cmpOp_fcmov() %{ 4351 match(Bool); 4352 4353 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4354 n->as_Bool()->_test._test != BoolTest::no_overflow); 4355 format %{ "" %} 4356 interface(COND_INTER) %{ 4357 equal (0x0C8); 4358 not_equal (0x1C8); 4359 less (0x0C0); 4360 greater_equal(0x1C0); 4361 less_equal (0x0D0); 4362 greater (0x1D0); 4363 overflow(0x0, "o"); // not really supported by the instruction 4364 no_overflow(0x1, "no"); // not really supported by the instruction 4365 %} 4366 %} 4367 4368 // Comparison Code used in long compares 4369 operand cmpOp_commute() %{ 4370 match(Bool); 4371 4372 format %{ "" %} 4373 interface(COND_INTER) %{ 4374 equal(0x4, "e"); 4375 not_equal(0x5, "ne"); 4376 less(0xF, "g"); 4377 greater_equal(0xE, "le"); 4378 less_equal(0xD, "ge"); 4379 greater(0xC, "l"); 4380 overflow(0x0, "o"); 4381 no_overflow(0x1, "no"); 4382 %} 4383 %} 4384 4385 // Comparison Code used in unsigned long compares 4386 operand cmpOpU_commute() %{ 4387 match(Bool); 4388 4389 format %{ "" %} 4390 interface(COND_INTER) %{ 4391 equal(0x4, "e"); 4392 not_equal(0x5, "ne"); 4393 less(0x7, "nbe"); 4394 greater_equal(0x6, "be"); 4395 less_equal(0x3, "nb"); 4396 greater(0x2, "b"); 4397 overflow(0x0, "o"); 4398 no_overflow(0x1, "no"); 4399 %} 4400 %} 4401 4402 //----------OPERAND CLASSES---------------------------------------------------- 4403 // Operand Classes are groups of operands that are used as to simplify 4404 // instruction definitions by not requiring the AD writer to specify separate 4405 // instructions for every form of operand when the instruction accepts 4406 // multiple operand types with the same basic encoding and format. The classic 4407 // case of this is memory operands. 4408 4409 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4410 indIndex, indIndexScale, indIndexScaleOffset); 4411 4412 // Long memory operations are encoded in 2 instructions and a +4 offset. 4413 // This means some kind of offset is always required and you cannot use 4414 // an oop as the offset (done when working on static globals). 4415 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4416 indIndex, indIndexScale, indIndexScaleOffset); 4417 4418 4419 //----------PIPELINE----------------------------------------------------------- 4420 // Rules which define the behavior of the target architectures pipeline. 4421 pipeline %{ 4422 4423 //----------ATTRIBUTES--------------------------------------------------------- 4424 attributes %{ 4425 variable_size_instructions; // Fixed size instructions 4426 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4427 instruction_unit_size = 1; // An instruction is 1 bytes long 4428 instruction_fetch_unit_size = 16; // The processor fetches one line 4429 instruction_fetch_units = 1; // of 16 bytes 4430 4431 // List of nop instructions 4432 nops( MachNop ); 4433 %} 4434 4435 //----------RESOURCES---------------------------------------------------------- 4436 // Resources are the functional units available to the machine 4437 4438 // Generic P2/P3 pipeline 4439 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4440 // 3 instructions decoded per cycle. 4441 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4442 // 2 ALU op, only ALU0 handles mul/div instructions. 4443 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4444 MS0, MS1, MEM = MS0 | MS1, 4445 BR, FPU, 4446 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4447 4448 //----------PIPELINE DESCRIPTION----------------------------------------------- 4449 // Pipeline Description specifies the stages in the machine's pipeline 4450 4451 // Generic P2/P3 pipeline 4452 pipe_desc(S0, S1, S2, S3, S4, S5); 4453 4454 //----------PIPELINE CLASSES--------------------------------------------------- 4455 // Pipeline Classes describe the stages in which input and output are 4456 // referenced by the hardware pipeline. 4457 4458 // Naming convention: ialu or fpu 4459 // Then: _reg 4460 // Then: _reg if there is a 2nd register 4461 // Then: _long if it's a pair of instructions implementing a long 4462 // Then: _fat if it requires the big decoder 4463 // Or: _mem if it requires the big decoder and a memory unit. 4464 4465 // Integer ALU reg operation 4466 pipe_class ialu_reg(rRegI dst) %{ 4467 single_instruction; 4468 dst : S4(write); 4469 dst : S3(read); 4470 DECODE : S0; // any decoder 4471 ALU : S3; // any alu 4472 %} 4473 4474 // Long ALU reg operation 4475 pipe_class ialu_reg_long(eRegL dst) %{ 4476 instruction_count(2); 4477 dst : S4(write); 4478 dst : S3(read); 4479 DECODE : S0(2); // any 2 decoders 4480 ALU : S3(2); // both alus 4481 %} 4482 4483 // Integer ALU reg operation using big decoder 4484 pipe_class ialu_reg_fat(rRegI dst) %{ 4485 single_instruction; 4486 dst : S4(write); 4487 dst : S3(read); 4488 D0 : S0; // big decoder only 4489 ALU : S3; // any alu 4490 %} 4491 4492 // Long ALU reg operation using big decoder 4493 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4494 instruction_count(2); 4495 dst : S4(write); 4496 dst : S3(read); 4497 D0 : S0(2); // big decoder only; twice 4498 ALU : S3(2); // any 2 alus 4499 %} 4500 4501 // Integer ALU reg-reg operation 4502 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4503 single_instruction; 4504 dst : S4(write); 4505 src : S3(read); 4506 DECODE : S0; // any decoder 4507 ALU : S3; // any alu 4508 %} 4509 4510 // Long ALU reg-reg operation 4511 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4512 instruction_count(2); 4513 dst : S4(write); 4514 src : S3(read); 4515 DECODE : S0(2); // any 2 decoders 4516 ALU : S3(2); // both alus 4517 %} 4518 4519 // Integer ALU reg-reg operation 4520 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4521 single_instruction; 4522 dst : S4(write); 4523 src : S3(read); 4524 D0 : S0; // big decoder only 4525 ALU : S3; // any alu 4526 %} 4527 4528 // Long ALU reg-reg operation 4529 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4530 instruction_count(2); 4531 dst : S4(write); 4532 src : S3(read); 4533 D0 : S0(2); // big decoder only; twice 4534 ALU : S3(2); // both alus 4535 %} 4536 4537 // Integer ALU reg-mem operation 4538 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4539 single_instruction; 4540 dst : S5(write); 4541 mem : S3(read); 4542 D0 : S0; // big decoder only 4543 ALU : S4; // any alu 4544 MEM : S3; // any mem 4545 %} 4546 4547 // Long ALU reg-mem operation 4548 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4549 instruction_count(2); 4550 dst : S5(write); 4551 mem : S3(read); 4552 D0 : S0(2); // big decoder only; twice 4553 ALU : S4(2); // any 2 alus 4554 MEM : S3(2); // both mems 4555 %} 4556 4557 // Integer mem operation (prefetch) 4558 pipe_class ialu_mem(memory mem) 4559 %{ 4560 single_instruction; 4561 mem : S3(read); 4562 D0 : S0; // big decoder only 4563 MEM : S3; // any mem 4564 %} 4565 4566 // Integer Store to Memory 4567 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4568 single_instruction; 4569 mem : S3(read); 4570 src : S5(read); 4571 D0 : S0; // big decoder only 4572 ALU : S4; // any alu 4573 MEM : S3; 4574 %} 4575 4576 // Long Store to Memory 4577 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4578 instruction_count(2); 4579 mem : S3(read); 4580 src : S5(read); 4581 D0 : S0(2); // big decoder only; twice 4582 ALU : S4(2); // any 2 alus 4583 MEM : S3(2); // Both mems 4584 %} 4585 4586 // Integer Store to Memory 4587 pipe_class ialu_mem_imm(memory mem) %{ 4588 single_instruction; 4589 mem : S3(read); 4590 D0 : S0; // big decoder only 4591 ALU : S4; // any alu 4592 MEM : S3; 4593 %} 4594 4595 // Integer ALU0 reg-reg operation 4596 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4597 single_instruction; 4598 dst : S4(write); 4599 src : S3(read); 4600 D0 : S0; // Big decoder only 4601 ALU0 : S3; // only alu0 4602 %} 4603 4604 // Integer ALU0 reg-mem operation 4605 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4606 single_instruction; 4607 dst : S5(write); 4608 mem : S3(read); 4609 D0 : S0; // big decoder only 4610 ALU0 : S4; // ALU0 only 4611 MEM : S3; // any mem 4612 %} 4613 4614 // Integer ALU reg-reg operation 4615 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4616 single_instruction; 4617 cr : S4(write); 4618 src1 : S3(read); 4619 src2 : S3(read); 4620 DECODE : S0; // any decoder 4621 ALU : S3; // any alu 4622 %} 4623 4624 // Integer ALU reg-imm operation 4625 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4626 single_instruction; 4627 cr : S4(write); 4628 src1 : S3(read); 4629 DECODE : S0; // any decoder 4630 ALU : S3; // any alu 4631 %} 4632 4633 // Integer ALU reg-mem operation 4634 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4635 single_instruction; 4636 cr : S4(write); 4637 src1 : S3(read); 4638 src2 : S3(read); 4639 D0 : S0; // big decoder only 4640 ALU : S4; // any alu 4641 MEM : S3; 4642 %} 4643 4644 // Conditional move reg-reg 4645 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4646 instruction_count(4); 4647 y : S4(read); 4648 q : S3(read); 4649 p : S3(read); 4650 DECODE : S0(4); // any decoder 4651 %} 4652 4653 // Conditional move reg-reg 4654 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4655 single_instruction; 4656 dst : S4(write); 4657 src : S3(read); 4658 cr : S3(read); 4659 DECODE : S0; // any decoder 4660 %} 4661 4662 // Conditional move reg-mem 4663 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4664 single_instruction; 4665 dst : S4(write); 4666 src : S3(read); 4667 cr : S3(read); 4668 DECODE : S0; // any decoder 4669 MEM : S3; 4670 %} 4671 4672 // Conditional move reg-reg long 4673 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4674 single_instruction; 4675 dst : S4(write); 4676 src : S3(read); 4677 cr : S3(read); 4678 DECODE : S0(2); // any 2 decoders 4679 %} 4680 4681 // Conditional move double reg-reg 4682 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4683 single_instruction; 4684 dst : S4(write); 4685 src : S3(read); 4686 cr : S3(read); 4687 DECODE : S0; // any decoder 4688 %} 4689 4690 // Float reg-reg operation 4691 pipe_class fpu_reg(regDPR dst) %{ 4692 instruction_count(2); 4693 dst : S3(read); 4694 DECODE : S0(2); // any 2 decoders 4695 FPU : S3; 4696 %} 4697 4698 // Float reg-reg operation 4699 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4700 instruction_count(2); 4701 dst : S4(write); 4702 src : S3(read); 4703 DECODE : S0(2); // any 2 decoders 4704 FPU : S3; 4705 %} 4706 4707 // Float reg-reg operation 4708 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4709 instruction_count(3); 4710 dst : S4(write); 4711 src1 : S3(read); 4712 src2 : S3(read); 4713 DECODE : S0(3); // any 3 decoders 4714 FPU : S3(2); 4715 %} 4716 4717 // Float reg-reg operation 4718 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4719 instruction_count(4); 4720 dst : S4(write); 4721 src1 : S3(read); 4722 src2 : S3(read); 4723 src3 : S3(read); 4724 DECODE : S0(4); // any 3 decoders 4725 FPU : S3(2); 4726 %} 4727 4728 // Float reg-reg operation 4729 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4730 instruction_count(4); 4731 dst : S4(write); 4732 src1 : S3(read); 4733 src2 : S3(read); 4734 src3 : S3(read); 4735 DECODE : S1(3); // any 3 decoders 4736 D0 : S0; // Big decoder only 4737 FPU : S3(2); 4738 MEM : S3; 4739 %} 4740 4741 // Float reg-mem operation 4742 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4743 instruction_count(2); 4744 dst : S5(write); 4745 mem : S3(read); 4746 D0 : S0; // big decoder only 4747 DECODE : S1; // any decoder for FPU POP 4748 FPU : S4; 4749 MEM : S3; // any mem 4750 %} 4751 4752 // Float reg-mem operation 4753 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4754 instruction_count(3); 4755 dst : S5(write); 4756 src1 : S3(read); 4757 mem : S3(read); 4758 D0 : S0; // big decoder only 4759 DECODE : S1(2); // any decoder for FPU POP 4760 FPU : S4; 4761 MEM : S3; // any mem 4762 %} 4763 4764 // Float mem-reg operation 4765 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4766 instruction_count(2); 4767 src : S5(read); 4768 mem : S3(read); 4769 DECODE : S0; // any decoder for FPU PUSH 4770 D0 : S1; // big decoder only 4771 FPU : S4; 4772 MEM : S3; // any mem 4773 %} 4774 4775 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4776 instruction_count(3); 4777 src1 : S3(read); 4778 src2 : S3(read); 4779 mem : S3(read); 4780 DECODE : S0(2); // any decoder for FPU PUSH 4781 D0 : S1; // big decoder only 4782 FPU : S4; 4783 MEM : S3; // any mem 4784 %} 4785 4786 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4787 instruction_count(3); 4788 src1 : S3(read); 4789 src2 : S3(read); 4790 mem : S4(read); 4791 DECODE : S0; // any decoder for FPU PUSH 4792 D0 : S0(2); // big decoder only 4793 FPU : S4; 4794 MEM : S3(2); // any mem 4795 %} 4796 4797 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4798 instruction_count(2); 4799 src1 : S3(read); 4800 dst : S4(read); 4801 D0 : S0(2); // big decoder only 4802 MEM : S3(2); // any mem 4803 %} 4804 4805 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4806 instruction_count(3); 4807 src1 : S3(read); 4808 src2 : S3(read); 4809 dst : S4(read); 4810 D0 : S0(3); // big decoder only 4811 FPU : S4; 4812 MEM : S3(3); // any mem 4813 %} 4814 4815 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4816 instruction_count(3); 4817 src1 : S4(read); 4818 mem : S4(read); 4819 DECODE : S0; // any decoder for FPU PUSH 4820 D0 : S0(2); // big decoder only 4821 FPU : S4; 4822 MEM : S3(2); // any mem 4823 %} 4824 4825 // Float load constant 4826 pipe_class fpu_reg_con(regDPR dst) %{ 4827 instruction_count(2); 4828 dst : S5(write); 4829 D0 : S0; // big decoder only for the load 4830 DECODE : S1; // any decoder for FPU POP 4831 FPU : S4; 4832 MEM : S3; // any mem 4833 %} 4834 4835 // Float load constant 4836 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4837 instruction_count(3); 4838 dst : S5(write); 4839 src : S3(read); 4840 D0 : S0; // big decoder only for the load 4841 DECODE : S1(2); // any decoder for FPU POP 4842 FPU : S4; 4843 MEM : S3; // any mem 4844 %} 4845 4846 // UnConditional branch 4847 pipe_class pipe_jmp( label labl ) %{ 4848 single_instruction; 4849 BR : S3; 4850 %} 4851 4852 // Conditional branch 4853 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4854 single_instruction; 4855 cr : S1(read); 4856 BR : S3; 4857 %} 4858 4859 // Allocation idiom 4860 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4861 instruction_count(1); force_serialization; 4862 fixed_latency(6); 4863 heap_ptr : S3(read); 4864 DECODE : S0(3); 4865 D0 : S2; 4866 MEM : S3; 4867 ALU : S3(2); 4868 dst : S5(write); 4869 BR : S5; 4870 %} 4871 4872 // Generic big/slow expanded idiom 4873 pipe_class pipe_slow( ) %{ 4874 instruction_count(10); multiple_bundles; force_serialization; 4875 fixed_latency(100); 4876 D0 : S0(2); 4877 MEM : S3(2); 4878 %} 4879 4880 // The real do-nothing guy 4881 pipe_class empty( ) %{ 4882 instruction_count(0); 4883 %} 4884 4885 // Define the class for the Nop node 4886 define %{ 4887 MachNop = empty; 4888 %} 4889 4890 %} 4891 4892 //----------INSTRUCTIONS------------------------------------------------------- 4893 // 4894 // match -- States which machine-independent subtree may be replaced 4895 // by this instruction. 4896 // ins_cost -- The estimated cost of this instruction is used by instruction 4897 // selection to identify a minimum cost tree of machine 4898 // instructions that matches a tree of machine-independent 4899 // instructions. 4900 // format -- A string providing the disassembly for this instruction. 4901 // The value of an instruction's operand may be inserted 4902 // by referring to it with a '$' prefix. 4903 // opcode -- Three instruction opcodes may be provided. These are referred 4904 // to within an encode class as $primary, $secondary, and $tertiary 4905 // respectively. The primary opcode is commonly used to 4906 // indicate the type of machine instruction, while secondary 4907 // and tertiary are often used for prefix options or addressing 4908 // modes. 4909 // ins_encode -- A list of encode classes with parameters. The encode class 4910 // name must have been defined in an 'enc_class' specification 4911 // in the encode section of the architecture description. 4912 4913 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4914 // Load Float 4915 instruct MoveF2LEG(legRegF dst, regF src) %{ 4916 match(Set dst src); 4917 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4918 ins_encode %{ 4919 ShouldNotReachHere(); 4920 %} 4921 ins_pipe( fpu_reg_reg ); 4922 %} 4923 4924 // Load Float 4925 instruct MoveLEG2F(regF dst, legRegF src) %{ 4926 match(Set dst src); 4927 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4928 ins_encode %{ 4929 ShouldNotReachHere(); 4930 %} 4931 ins_pipe( fpu_reg_reg ); 4932 %} 4933 4934 // Load Float 4935 instruct MoveF2VL(vlRegF dst, regF src) %{ 4936 match(Set dst src); 4937 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4938 ins_encode %{ 4939 ShouldNotReachHere(); 4940 %} 4941 ins_pipe( fpu_reg_reg ); 4942 %} 4943 4944 // Load Float 4945 instruct MoveVL2F(regF dst, vlRegF src) %{ 4946 match(Set dst src); 4947 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4948 ins_encode %{ 4949 ShouldNotReachHere(); 4950 %} 4951 ins_pipe( fpu_reg_reg ); 4952 %} 4953 4954 4955 4956 // Load Double 4957 instruct MoveD2LEG(legRegD dst, regD src) %{ 4958 match(Set dst src); 4959 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 4960 ins_encode %{ 4961 ShouldNotReachHere(); 4962 %} 4963 ins_pipe( fpu_reg_reg ); 4964 %} 4965 4966 // Load Double 4967 instruct MoveLEG2D(regD dst, legRegD src) %{ 4968 match(Set dst src); 4969 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 4970 ins_encode %{ 4971 ShouldNotReachHere(); 4972 %} 4973 ins_pipe( fpu_reg_reg ); 4974 %} 4975 4976 // Load Double 4977 instruct MoveD2VL(vlRegD dst, regD src) %{ 4978 match(Set dst src); 4979 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 4980 ins_encode %{ 4981 ShouldNotReachHere(); 4982 %} 4983 ins_pipe( fpu_reg_reg ); 4984 %} 4985 4986 // Load Double 4987 instruct MoveVL2D(regD dst, vlRegD src) %{ 4988 match(Set dst src); 4989 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 4990 ins_encode %{ 4991 ShouldNotReachHere(); 4992 %} 4993 ins_pipe( fpu_reg_reg ); 4994 %} 4995 4996 //----------BSWAP-Instruction-------------------------------------------------- 4997 instruct bytes_reverse_int(rRegI dst) %{ 4998 match(Set dst (ReverseBytesI dst)); 4999 5000 format %{ "BSWAP $dst" %} 5001 opcode(0x0F, 0xC8); 5002 ins_encode( OpcP, OpcSReg(dst) ); 5003 ins_pipe( ialu_reg ); 5004 %} 5005 5006 instruct bytes_reverse_long(eRegL dst) %{ 5007 match(Set dst (ReverseBytesL dst)); 5008 5009 format %{ "BSWAP $dst.lo\n\t" 5010 "BSWAP $dst.hi\n\t" 5011 "XCHG $dst.lo $dst.hi" %} 5012 5013 ins_cost(125); 5014 ins_encode( bswap_long_bytes(dst) ); 5015 ins_pipe( ialu_reg_reg); 5016 %} 5017 5018 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5019 match(Set dst (ReverseBytesUS dst)); 5020 effect(KILL cr); 5021 5022 format %{ "BSWAP $dst\n\t" 5023 "SHR $dst,16\n\t" %} 5024 ins_encode %{ 5025 __ bswapl($dst$$Register); 5026 __ shrl($dst$$Register, 16); 5027 %} 5028 ins_pipe( ialu_reg ); 5029 %} 5030 5031 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5032 match(Set dst (ReverseBytesS dst)); 5033 effect(KILL cr); 5034 5035 format %{ "BSWAP $dst\n\t" 5036 "SAR $dst,16\n\t" %} 5037 ins_encode %{ 5038 __ bswapl($dst$$Register); 5039 __ sarl($dst$$Register, 16); 5040 %} 5041 ins_pipe( ialu_reg ); 5042 %} 5043 5044 5045 //---------- Zeros Count Instructions ------------------------------------------ 5046 5047 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5048 predicate(UseCountLeadingZerosInstruction); 5049 match(Set dst (CountLeadingZerosI src)); 5050 effect(KILL cr); 5051 5052 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5053 ins_encode %{ 5054 __ lzcntl($dst$$Register, $src$$Register); 5055 %} 5056 ins_pipe(ialu_reg); 5057 %} 5058 5059 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5060 predicate(!UseCountLeadingZerosInstruction); 5061 match(Set dst (CountLeadingZerosI src)); 5062 effect(KILL cr); 5063 5064 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5065 "JNZ skip\n\t" 5066 "MOV $dst, -1\n" 5067 "skip:\n\t" 5068 "NEG $dst\n\t" 5069 "ADD $dst, 31" %} 5070 ins_encode %{ 5071 Register Rdst = $dst$$Register; 5072 Register Rsrc = $src$$Register; 5073 Label skip; 5074 __ bsrl(Rdst, Rsrc); 5075 __ jccb(Assembler::notZero, skip); 5076 __ movl(Rdst, -1); 5077 __ bind(skip); 5078 __ negl(Rdst); 5079 __ addl(Rdst, BitsPerInt - 1); 5080 %} 5081 ins_pipe(ialu_reg); 5082 %} 5083 5084 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5085 predicate(UseCountLeadingZerosInstruction); 5086 match(Set dst (CountLeadingZerosL src)); 5087 effect(TEMP dst, KILL cr); 5088 5089 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5090 "JNC done\n\t" 5091 "LZCNT $dst, $src.lo\n\t" 5092 "ADD $dst, 32\n" 5093 "done:" %} 5094 ins_encode %{ 5095 Register Rdst = $dst$$Register; 5096 Register Rsrc = $src$$Register; 5097 Label done; 5098 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5099 __ jccb(Assembler::carryClear, done); 5100 __ lzcntl(Rdst, Rsrc); 5101 __ addl(Rdst, BitsPerInt); 5102 __ bind(done); 5103 %} 5104 ins_pipe(ialu_reg); 5105 %} 5106 5107 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5108 predicate(!UseCountLeadingZerosInstruction); 5109 match(Set dst (CountLeadingZerosL src)); 5110 effect(TEMP dst, KILL cr); 5111 5112 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5113 "JZ msw_is_zero\n\t" 5114 "ADD $dst, 32\n\t" 5115 "JMP not_zero\n" 5116 "msw_is_zero:\n\t" 5117 "BSR $dst, $src.lo\n\t" 5118 "JNZ not_zero\n\t" 5119 "MOV $dst, -1\n" 5120 "not_zero:\n\t" 5121 "NEG $dst\n\t" 5122 "ADD $dst, 63\n" %} 5123 ins_encode %{ 5124 Register Rdst = $dst$$Register; 5125 Register Rsrc = $src$$Register; 5126 Label msw_is_zero; 5127 Label not_zero; 5128 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5129 __ jccb(Assembler::zero, msw_is_zero); 5130 __ addl(Rdst, BitsPerInt); 5131 __ jmpb(not_zero); 5132 __ bind(msw_is_zero); 5133 __ bsrl(Rdst, Rsrc); 5134 __ jccb(Assembler::notZero, not_zero); 5135 __ movl(Rdst, -1); 5136 __ bind(not_zero); 5137 __ negl(Rdst); 5138 __ addl(Rdst, BitsPerLong - 1); 5139 %} 5140 ins_pipe(ialu_reg); 5141 %} 5142 5143 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5144 predicate(UseCountTrailingZerosInstruction); 5145 match(Set dst (CountTrailingZerosI src)); 5146 effect(KILL cr); 5147 5148 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5149 ins_encode %{ 5150 __ tzcntl($dst$$Register, $src$$Register); 5151 %} 5152 ins_pipe(ialu_reg); 5153 %} 5154 5155 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5156 predicate(!UseCountTrailingZerosInstruction); 5157 match(Set dst (CountTrailingZerosI src)); 5158 effect(KILL cr); 5159 5160 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5161 "JNZ done\n\t" 5162 "MOV $dst, 32\n" 5163 "done:" %} 5164 ins_encode %{ 5165 Register Rdst = $dst$$Register; 5166 Label done; 5167 __ bsfl(Rdst, $src$$Register); 5168 __ jccb(Assembler::notZero, done); 5169 __ movl(Rdst, BitsPerInt); 5170 __ bind(done); 5171 %} 5172 ins_pipe(ialu_reg); 5173 %} 5174 5175 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5176 predicate(UseCountTrailingZerosInstruction); 5177 match(Set dst (CountTrailingZerosL src)); 5178 effect(TEMP dst, KILL cr); 5179 5180 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5181 "JNC done\n\t" 5182 "TZCNT $dst, $src.hi\n\t" 5183 "ADD $dst, 32\n" 5184 "done:" %} 5185 ins_encode %{ 5186 Register Rdst = $dst$$Register; 5187 Register Rsrc = $src$$Register; 5188 Label done; 5189 __ tzcntl(Rdst, Rsrc); 5190 __ jccb(Assembler::carryClear, done); 5191 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5192 __ addl(Rdst, BitsPerInt); 5193 __ bind(done); 5194 %} 5195 ins_pipe(ialu_reg); 5196 %} 5197 5198 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5199 predicate(!UseCountTrailingZerosInstruction); 5200 match(Set dst (CountTrailingZerosL src)); 5201 effect(TEMP dst, KILL cr); 5202 5203 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5204 "JNZ done\n\t" 5205 "BSF $dst, $src.hi\n\t" 5206 "JNZ msw_not_zero\n\t" 5207 "MOV $dst, 32\n" 5208 "msw_not_zero:\n\t" 5209 "ADD $dst, 32\n" 5210 "done:" %} 5211 ins_encode %{ 5212 Register Rdst = $dst$$Register; 5213 Register Rsrc = $src$$Register; 5214 Label msw_not_zero; 5215 Label done; 5216 __ bsfl(Rdst, Rsrc); 5217 __ jccb(Assembler::notZero, done); 5218 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5219 __ jccb(Assembler::notZero, msw_not_zero); 5220 __ movl(Rdst, BitsPerInt); 5221 __ bind(msw_not_zero); 5222 __ addl(Rdst, BitsPerInt); 5223 __ bind(done); 5224 %} 5225 ins_pipe(ialu_reg); 5226 %} 5227 5228 5229 //---------- Population Count Instructions ------------------------------------- 5230 5231 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5232 predicate(UsePopCountInstruction); 5233 match(Set dst (PopCountI src)); 5234 effect(KILL cr); 5235 5236 format %{ "POPCNT $dst, $src" %} 5237 ins_encode %{ 5238 __ popcntl($dst$$Register, $src$$Register); 5239 %} 5240 ins_pipe(ialu_reg); 5241 %} 5242 5243 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5244 predicate(UsePopCountInstruction); 5245 match(Set dst (PopCountI (LoadI mem))); 5246 effect(KILL cr); 5247 5248 format %{ "POPCNT $dst, $mem" %} 5249 ins_encode %{ 5250 __ popcntl($dst$$Register, $mem$$Address); 5251 %} 5252 ins_pipe(ialu_reg); 5253 %} 5254 5255 // Note: Long.bitCount(long) returns an int. 5256 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5257 predicate(UsePopCountInstruction); 5258 match(Set dst (PopCountL src)); 5259 effect(KILL cr, TEMP tmp, TEMP dst); 5260 5261 format %{ "POPCNT $dst, $src.lo\n\t" 5262 "POPCNT $tmp, $src.hi\n\t" 5263 "ADD $dst, $tmp" %} 5264 ins_encode %{ 5265 __ popcntl($dst$$Register, $src$$Register); 5266 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5267 __ addl($dst$$Register, $tmp$$Register); 5268 %} 5269 ins_pipe(ialu_reg); 5270 %} 5271 5272 // Note: Long.bitCount(long) returns an int. 5273 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5274 predicate(UsePopCountInstruction); 5275 match(Set dst (PopCountL (LoadL mem))); 5276 effect(KILL cr, TEMP tmp, TEMP dst); 5277 5278 format %{ "POPCNT $dst, $mem\n\t" 5279 "POPCNT $tmp, $mem+4\n\t" 5280 "ADD $dst, $tmp" %} 5281 ins_encode %{ 5282 //__ popcntl($dst$$Register, $mem$$Address$$first); 5283 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5284 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5285 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5286 __ addl($dst$$Register, $tmp$$Register); 5287 %} 5288 ins_pipe(ialu_reg); 5289 %} 5290 5291 5292 //----------Load/Store/Move Instructions--------------------------------------- 5293 //----------Load Instructions-------------------------------------------------- 5294 // Load Byte (8bit signed) 5295 instruct loadB(xRegI dst, memory mem) %{ 5296 match(Set dst (LoadB mem)); 5297 5298 ins_cost(125); 5299 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5300 5301 ins_encode %{ 5302 __ movsbl($dst$$Register, $mem$$Address); 5303 %} 5304 5305 ins_pipe(ialu_reg_mem); 5306 %} 5307 5308 // Load Byte (8bit signed) into Long Register 5309 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5310 match(Set dst (ConvI2L (LoadB mem))); 5311 effect(KILL cr); 5312 5313 ins_cost(375); 5314 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5315 "MOV $dst.hi,$dst.lo\n\t" 5316 "SAR $dst.hi,7" %} 5317 5318 ins_encode %{ 5319 __ movsbl($dst$$Register, $mem$$Address); 5320 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5321 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5322 %} 5323 5324 ins_pipe(ialu_reg_mem); 5325 %} 5326 5327 // Load Unsigned Byte (8bit UNsigned) 5328 instruct loadUB(xRegI dst, memory mem) %{ 5329 match(Set dst (LoadUB mem)); 5330 5331 ins_cost(125); 5332 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5333 5334 ins_encode %{ 5335 __ movzbl($dst$$Register, $mem$$Address); 5336 %} 5337 5338 ins_pipe(ialu_reg_mem); 5339 %} 5340 5341 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5342 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5343 match(Set dst (ConvI2L (LoadUB mem))); 5344 effect(KILL cr); 5345 5346 ins_cost(250); 5347 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5348 "XOR $dst.hi,$dst.hi" %} 5349 5350 ins_encode %{ 5351 Register Rdst = $dst$$Register; 5352 __ movzbl(Rdst, $mem$$Address); 5353 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5354 %} 5355 5356 ins_pipe(ialu_reg_mem); 5357 %} 5358 5359 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5360 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5361 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5362 effect(KILL cr); 5363 5364 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5365 "XOR $dst.hi,$dst.hi\n\t" 5366 "AND $dst.lo,right_n_bits($mask, 8)" %} 5367 ins_encode %{ 5368 Register Rdst = $dst$$Register; 5369 __ movzbl(Rdst, $mem$$Address); 5370 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5371 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5372 %} 5373 ins_pipe(ialu_reg_mem); 5374 %} 5375 5376 // Load Short (16bit signed) 5377 instruct loadS(rRegI dst, memory mem) %{ 5378 match(Set dst (LoadS mem)); 5379 5380 ins_cost(125); 5381 format %{ "MOVSX $dst,$mem\t# short" %} 5382 5383 ins_encode %{ 5384 __ movswl($dst$$Register, $mem$$Address); 5385 %} 5386 5387 ins_pipe(ialu_reg_mem); 5388 %} 5389 5390 // Load Short (16 bit signed) to Byte (8 bit signed) 5391 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5392 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5393 5394 ins_cost(125); 5395 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5396 ins_encode %{ 5397 __ movsbl($dst$$Register, $mem$$Address); 5398 %} 5399 ins_pipe(ialu_reg_mem); 5400 %} 5401 5402 // Load Short (16bit signed) into Long Register 5403 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5404 match(Set dst (ConvI2L (LoadS mem))); 5405 effect(KILL cr); 5406 5407 ins_cost(375); 5408 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5409 "MOV $dst.hi,$dst.lo\n\t" 5410 "SAR $dst.hi,15" %} 5411 5412 ins_encode %{ 5413 __ movswl($dst$$Register, $mem$$Address); 5414 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5415 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5416 %} 5417 5418 ins_pipe(ialu_reg_mem); 5419 %} 5420 5421 // Load Unsigned Short/Char (16bit unsigned) 5422 instruct loadUS(rRegI dst, memory mem) %{ 5423 match(Set dst (LoadUS mem)); 5424 5425 ins_cost(125); 5426 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5427 5428 ins_encode %{ 5429 __ movzwl($dst$$Register, $mem$$Address); 5430 %} 5431 5432 ins_pipe(ialu_reg_mem); 5433 %} 5434 5435 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5436 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5437 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5438 5439 ins_cost(125); 5440 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5441 ins_encode %{ 5442 __ movsbl($dst$$Register, $mem$$Address); 5443 %} 5444 ins_pipe(ialu_reg_mem); 5445 %} 5446 5447 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5448 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5449 match(Set dst (ConvI2L (LoadUS mem))); 5450 effect(KILL cr); 5451 5452 ins_cost(250); 5453 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5454 "XOR $dst.hi,$dst.hi" %} 5455 5456 ins_encode %{ 5457 __ movzwl($dst$$Register, $mem$$Address); 5458 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5459 %} 5460 5461 ins_pipe(ialu_reg_mem); 5462 %} 5463 5464 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5465 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5466 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5467 effect(KILL cr); 5468 5469 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5470 "XOR $dst.hi,$dst.hi" %} 5471 ins_encode %{ 5472 Register Rdst = $dst$$Register; 5473 __ movzbl(Rdst, $mem$$Address); 5474 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5475 %} 5476 ins_pipe(ialu_reg_mem); 5477 %} 5478 5479 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5480 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5481 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5482 effect(KILL cr); 5483 5484 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5485 "XOR $dst.hi,$dst.hi\n\t" 5486 "AND $dst.lo,right_n_bits($mask, 16)" %} 5487 ins_encode %{ 5488 Register Rdst = $dst$$Register; 5489 __ movzwl(Rdst, $mem$$Address); 5490 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5491 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5492 %} 5493 ins_pipe(ialu_reg_mem); 5494 %} 5495 5496 // Load Integer 5497 instruct loadI(rRegI dst, memory mem) %{ 5498 match(Set dst (LoadI mem)); 5499 5500 ins_cost(125); 5501 format %{ "MOV $dst,$mem\t# int" %} 5502 5503 ins_encode %{ 5504 __ movl($dst$$Register, $mem$$Address); 5505 %} 5506 5507 ins_pipe(ialu_reg_mem); 5508 %} 5509 5510 // Load Integer (32 bit signed) to Byte (8 bit signed) 5511 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5512 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5513 5514 ins_cost(125); 5515 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5516 ins_encode %{ 5517 __ movsbl($dst$$Register, $mem$$Address); 5518 %} 5519 ins_pipe(ialu_reg_mem); 5520 %} 5521 5522 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5523 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5524 match(Set dst (AndI (LoadI mem) mask)); 5525 5526 ins_cost(125); 5527 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5528 ins_encode %{ 5529 __ movzbl($dst$$Register, $mem$$Address); 5530 %} 5531 ins_pipe(ialu_reg_mem); 5532 %} 5533 5534 // Load Integer (32 bit signed) to Short (16 bit signed) 5535 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5536 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5537 5538 ins_cost(125); 5539 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5540 ins_encode %{ 5541 __ movswl($dst$$Register, $mem$$Address); 5542 %} 5543 ins_pipe(ialu_reg_mem); 5544 %} 5545 5546 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5547 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5548 match(Set dst (AndI (LoadI mem) mask)); 5549 5550 ins_cost(125); 5551 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5552 ins_encode %{ 5553 __ movzwl($dst$$Register, $mem$$Address); 5554 %} 5555 ins_pipe(ialu_reg_mem); 5556 %} 5557 5558 // Load Integer into Long Register 5559 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5560 match(Set dst (ConvI2L (LoadI mem))); 5561 effect(KILL cr); 5562 5563 ins_cost(375); 5564 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5565 "MOV $dst.hi,$dst.lo\n\t" 5566 "SAR $dst.hi,31" %} 5567 5568 ins_encode %{ 5569 __ movl($dst$$Register, $mem$$Address); 5570 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5571 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5572 %} 5573 5574 ins_pipe(ialu_reg_mem); 5575 %} 5576 5577 // Load Integer with mask 0xFF into Long Register 5578 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5579 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5580 effect(KILL cr); 5581 5582 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5583 "XOR $dst.hi,$dst.hi" %} 5584 ins_encode %{ 5585 Register Rdst = $dst$$Register; 5586 __ movzbl(Rdst, $mem$$Address); 5587 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5588 %} 5589 ins_pipe(ialu_reg_mem); 5590 %} 5591 5592 // Load Integer with mask 0xFFFF into Long Register 5593 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5594 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5595 effect(KILL cr); 5596 5597 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5598 "XOR $dst.hi,$dst.hi" %} 5599 ins_encode %{ 5600 Register Rdst = $dst$$Register; 5601 __ movzwl(Rdst, $mem$$Address); 5602 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5603 %} 5604 ins_pipe(ialu_reg_mem); 5605 %} 5606 5607 // Load Integer with 31-bit mask into Long Register 5608 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5609 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5610 effect(KILL cr); 5611 5612 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5613 "XOR $dst.hi,$dst.hi\n\t" 5614 "AND $dst.lo,$mask" %} 5615 ins_encode %{ 5616 Register Rdst = $dst$$Register; 5617 __ movl(Rdst, $mem$$Address); 5618 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5619 __ andl(Rdst, $mask$$constant); 5620 %} 5621 ins_pipe(ialu_reg_mem); 5622 %} 5623 5624 // Load Unsigned Integer into Long Register 5625 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5626 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5627 effect(KILL cr); 5628 5629 ins_cost(250); 5630 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5631 "XOR $dst.hi,$dst.hi" %} 5632 5633 ins_encode %{ 5634 __ movl($dst$$Register, $mem$$Address); 5635 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5636 %} 5637 5638 ins_pipe(ialu_reg_mem); 5639 %} 5640 5641 // Load Long. Cannot clobber address while loading, so restrict address 5642 // register to ESI 5643 instruct loadL(eRegL dst, load_long_memory mem) %{ 5644 predicate(!((LoadLNode*)n)->require_atomic_access()); 5645 match(Set dst (LoadL mem)); 5646 5647 ins_cost(250); 5648 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5649 "MOV $dst.hi,$mem+4" %} 5650 5651 ins_encode %{ 5652 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5653 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5654 __ movl($dst$$Register, Amemlo); 5655 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5656 %} 5657 5658 ins_pipe(ialu_reg_long_mem); 5659 %} 5660 5661 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5662 // then store it down to the stack and reload on the int 5663 // side. 5664 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5665 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5666 match(Set dst (LoadL mem)); 5667 5668 ins_cost(200); 5669 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5670 "FISTp $dst" %} 5671 ins_encode(enc_loadL_volatile(mem,dst)); 5672 ins_pipe( fpu_reg_mem ); 5673 %} 5674 5675 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5676 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5677 match(Set dst (LoadL mem)); 5678 effect(TEMP tmp); 5679 ins_cost(180); 5680 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5681 "MOVSD $dst,$tmp" %} 5682 ins_encode %{ 5683 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5684 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5685 %} 5686 ins_pipe( pipe_slow ); 5687 %} 5688 5689 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5690 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5691 match(Set dst (LoadL mem)); 5692 effect(TEMP tmp); 5693 ins_cost(160); 5694 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5695 "MOVD $dst.lo,$tmp\n\t" 5696 "PSRLQ $tmp,32\n\t" 5697 "MOVD $dst.hi,$tmp" %} 5698 ins_encode %{ 5699 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5700 __ movdl($dst$$Register, $tmp$$XMMRegister); 5701 __ psrlq($tmp$$XMMRegister, 32); 5702 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5703 %} 5704 ins_pipe( pipe_slow ); 5705 %} 5706 5707 // Load Range 5708 instruct loadRange(rRegI dst, memory mem) %{ 5709 match(Set dst (LoadRange mem)); 5710 5711 ins_cost(125); 5712 format %{ "MOV $dst,$mem" %} 5713 opcode(0x8B); 5714 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5715 ins_pipe( ialu_reg_mem ); 5716 %} 5717 5718 5719 // Load Pointer 5720 instruct loadP(eRegP dst, memory mem) %{ 5721 match(Set dst (LoadP mem)); 5722 5723 ins_cost(125); 5724 format %{ "MOV $dst,$mem" %} 5725 opcode(0x8B); 5726 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5727 ins_pipe( ialu_reg_mem ); 5728 %} 5729 5730 // Load Klass Pointer 5731 instruct loadKlass(eRegP dst, memory mem) %{ 5732 match(Set dst (LoadKlass mem)); 5733 5734 ins_cost(125); 5735 format %{ "MOV $dst,$mem" %} 5736 opcode(0x8B); 5737 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5738 ins_pipe( ialu_reg_mem ); 5739 %} 5740 5741 // Load Double 5742 instruct loadDPR(regDPR dst, memory mem) %{ 5743 predicate(UseSSE<=1); 5744 match(Set dst (LoadD mem)); 5745 5746 ins_cost(150); 5747 format %{ "FLD_D ST,$mem\n\t" 5748 "FSTP $dst" %} 5749 opcode(0xDD); /* DD /0 */ 5750 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 5751 Pop_Reg_DPR(dst), ClearInstMark ); 5752 ins_pipe( fpu_reg_mem ); 5753 %} 5754 5755 // Load Double to XMM 5756 instruct loadD(regD dst, memory mem) %{ 5757 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5758 match(Set dst (LoadD mem)); 5759 ins_cost(145); 5760 format %{ "MOVSD $dst,$mem" %} 5761 ins_encode %{ 5762 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5763 %} 5764 ins_pipe( pipe_slow ); 5765 %} 5766 5767 instruct loadD_partial(regD dst, memory mem) %{ 5768 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5769 match(Set dst (LoadD mem)); 5770 ins_cost(145); 5771 format %{ "MOVLPD $dst,$mem" %} 5772 ins_encode %{ 5773 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5774 %} 5775 ins_pipe( pipe_slow ); 5776 %} 5777 5778 // Load to XMM register (single-precision floating point) 5779 // MOVSS instruction 5780 instruct loadF(regF dst, memory mem) %{ 5781 predicate(UseSSE>=1); 5782 match(Set dst (LoadF mem)); 5783 ins_cost(145); 5784 format %{ "MOVSS $dst,$mem" %} 5785 ins_encode %{ 5786 __ movflt ($dst$$XMMRegister, $mem$$Address); 5787 %} 5788 ins_pipe( pipe_slow ); 5789 %} 5790 5791 // Load Float 5792 instruct loadFPR(regFPR dst, memory mem) %{ 5793 predicate(UseSSE==0); 5794 match(Set dst (LoadF mem)); 5795 5796 ins_cost(150); 5797 format %{ "FLD_S ST,$mem\n\t" 5798 "FSTP $dst" %} 5799 opcode(0xD9); /* D9 /0 */ 5800 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 5801 Pop_Reg_FPR(dst), ClearInstMark ); 5802 ins_pipe( fpu_reg_mem ); 5803 %} 5804 5805 // Load Effective Address 5806 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5807 match(Set dst mem); 5808 5809 ins_cost(110); 5810 format %{ "LEA $dst,$mem" %} 5811 opcode(0x8D); 5812 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5813 ins_pipe( ialu_reg_reg_fat ); 5814 %} 5815 5816 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5817 match(Set dst mem); 5818 5819 ins_cost(110); 5820 format %{ "LEA $dst,$mem" %} 5821 opcode(0x8D); 5822 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5823 ins_pipe( ialu_reg_reg_fat ); 5824 %} 5825 5826 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5827 match(Set dst mem); 5828 5829 ins_cost(110); 5830 format %{ "LEA $dst,$mem" %} 5831 opcode(0x8D); 5832 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5833 ins_pipe( ialu_reg_reg_fat ); 5834 %} 5835 5836 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5837 match(Set dst mem); 5838 5839 ins_cost(110); 5840 format %{ "LEA $dst,$mem" %} 5841 opcode(0x8D); 5842 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5843 ins_pipe( ialu_reg_reg_fat ); 5844 %} 5845 5846 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5847 match(Set dst mem); 5848 5849 ins_cost(110); 5850 format %{ "LEA $dst,$mem" %} 5851 opcode(0x8D); 5852 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5853 ins_pipe( ialu_reg_reg_fat ); 5854 %} 5855 5856 // Load Constant 5857 instruct loadConI(rRegI dst, immI src) %{ 5858 match(Set dst src); 5859 5860 format %{ "MOV $dst,$src" %} 5861 ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark ); 5862 ins_pipe( ialu_reg_fat ); 5863 %} 5864 5865 // Load Constant zero 5866 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5867 match(Set dst src); 5868 effect(KILL cr); 5869 5870 ins_cost(50); 5871 format %{ "XOR $dst,$dst" %} 5872 opcode(0x33); /* + rd */ 5873 ins_encode( OpcP, RegReg( dst, dst ) ); 5874 ins_pipe( ialu_reg ); 5875 %} 5876 5877 instruct loadConP(eRegP dst, immP src) %{ 5878 match(Set dst src); 5879 5880 format %{ "MOV $dst,$src" %} 5881 opcode(0xB8); /* + rd */ 5882 ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark ); 5883 ins_pipe( ialu_reg_fat ); 5884 %} 5885 5886 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5887 match(Set dst src); 5888 effect(KILL cr); 5889 ins_cost(200); 5890 format %{ "MOV $dst.lo,$src.lo\n\t" 5891 "MOV $dst.hi,$src.hi" %} 5892 opcode(0xB8); 5893 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5894 ins_pipe( ialu_reg_long_fat ); 5895 %} 5896 5897 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5898 match(Set dst src); 5899 effect(KILL cr); 5900 ins_cost(150); 5901 format %{ "XOR $dst.lo,$dst.lo\n\t" 5902 "XOR $dst.hi,$dst.hi" %} 5903 opcode(0x33,0x33); 5904 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5905 ins_pipe( ialu_reg_long ); 5906 %} 5907 5908 // The instruction usage is guarded by predicate in operand immFPR(). 5909 instruct loadConFPR(regFPR dst, immFPR con) %{ 5910 match(Set dst con); 5911 ins_cost(125); 5912 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5913 "FSTP $dst" %} 5914 ins_encode %{ 5915 __ fld_s($constantaddress($con)); 5916 __ fstp_d($dst$$reg); 5917 %} 5918 ins_pipe(fpu_reg_con); 5919 %} 5920 5921 // The instruction usage is guarded by predicate in operand immFPR0(). 5922 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5923 match(Set dst con); 5924 ins_cost(125); 5925 format %{ "FLDZ ST\n\t" 5926 "FSTP $dst" %} 5927 ins_encode %{ 5928 __ fldz(); 5929 __ fstp_d($dst$$reg); 5930 %} 5931 ins_pipe(fpu_reg_con); 5932 %} 5933 5934 // The instruction usage is guarded by predicate in operand immFPR1(). 5935 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 5936 match(Set dst con); 5937 ins_cost(125); 5938 format %{ "FLD1 ST\n\t" 5939 "FSTP $dst" %} 5940 ins_encode %{ 5941 __ fld1(); 5942 __ fstp_d($dst$$reg); 5943 %} 5944 ins_pipe(fpu_reg_con); 5945 %} 5946 5947 // The instruction usage is guarded by predicate in operand immF(). 5948 instruct loadConF(regF dst, immF con) %{ 5949 match(Set dst con); 5950 ins_cost(125); 5951 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 5952 ins_encode %{ 5953 __ movflt($dst$$XMMRegister, $constantaddress($con)); 5954 %} 5955 ins_pipe(pipe_slow); 5956 %} 5957 5958 // The instruction usage is guarded by predicate in operand immF0(). 5959 instruct loadConF0(regF dst, immF0 src) %{ 5960 match(Set dst src); 5961 ins_cost(100); 5962 format %{ "XORPS $dst,$dst\t# float 0.0" %} 5963 ins_encode %{ 5964 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 5965 %} 5966 ins_pipe(pipe_slow); 5967 %} 5968 5969 // The instruction usage is guarded by predicate in operand immDPR(). 5970 instruct loadConDPR(regDPR dst, immDPR con) %{ 5971 match(Set dst con); 5972 ins_cost(125); 5973 5974 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 5975 "FSTP $dst" %} 5976 ins_encode %{ 5977 __ fld_d($constantaddress($con)); 5978 __ fstp_d($dst$$reg); 5979 %} 5980 ins_pipe(fpu_reg_con); 5981 %} 5982 5983 // The instruction usage is guarded by predicate in operand immDPR0(). 5984 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 5985 match(Set dst con); 5986 ins_cost(125); 5987 5988 format %{ "FLDZ ST\n\t" 5989 "FSTP $dst" %} 5990 ins_encode %{ 5991 __ fldz(); 5992 __ fstp_d($dst$$reg); 5993 %} 5994 ins_pipe(fpu_reg_con); 5995 %} 5996 5997 // The instruction usage is guarded by predicate in operand immDPR1(). 5998 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 5999 match(Set dst con); 6000 ins_cost(125); 6001 6002 format %{ "FLD1 ST\n\t" 6003 "FSTP $dst" %} 6004 ins_encode %{ 6005 __ fld1(); 6006 __ fstp_d($dst$$reg); 6007 %} 6008 ins_pipe(fpu_reg_con); 6009 %} 6010 6011 // The instruction usage is guarded by predicate in operand immD(). 6012 instruct loadConD(regD dst, immD con) %{ 6013 match(Set dst con); 6014 ins_cost(125); 6015 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6016 ins_encode %{ 6017 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6018 %} 6019 ins_pipe(pipe_slow); 6020 %} 6021 6022 // The instruction usage is guarded by predicate in operand immD0(). 6023 instruct loadConD0(regD dst, immD0 src) %{ 6024 match(Set dst src); 6025 ins_cost(100); 6026 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6027 ins_encode %{ 6028 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6029 %} 6030 ins_pipe( pipe_slow ); 6031 %} 6032 6033 // Load Stack Slot 6034 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6035 match(Set dst src); 6036 ins_cost(125); 6037 6038 format %{ "MOV $dst,$src" %} 6039 opcode(0x8B); 6040 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); 6041 ins_pipe( ialu_reg_mem ); 6042 %} 6043 6044 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6045 match(Set dst src); 6046 6047 ins_cost(200); 6048 format %{ "MOV $dst,$src.lo\n\t" 6049 "MOV $dst+4,$src.hi" %} 6050 opcode(0x8B, 0x8B); 6051 ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark ); 6052 ins_pipe( ialu_mem_long_reg ); 6053 %} 6054 6055 // Load Stack Slot 6056 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6057 match(Set dst src); 6058 ins_cost(125); 6059 6060 format %{ "MOV $dst,$src" %} 6061 opcode(0x8B); 6062 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); 6063 ins_pipe( ialu_reg_mem ); 6064 %} 6065 6066 // Load Stack Slot 6067 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6068 match(Set dst src); 6069 ins_cost(125); 6070 6071 format %{ "FLD_S $src\n\t" 6072 "FSTP $dst" %} 6073 opcode(0xD9); /* D9 /0, FLD m32real */ 6074 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 6075 Pop_Reg_FPR(dst), ClearInstMark ); 6076 ins_pipe( fpu_reg_mem ); 6077 %} 6078 6079 // Load Stack Slot 6080 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6081 match(Set dst src); 6082 ins_cost(125); 6083 6084 format %{ "FLD_D $src\n\t" 6085 "FSTP $dst" %} 6086 opcode(0xDD); /* DD /0, FLD m64real */ 6087 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 6088 Pop_Reg_DPR(dst), ClearInstMark ); 6089 ins_pipe( fpu_reg_mem ); 6090 %} 6091 6092 // Prefetch instructions for allocation. 6093 // Must be safe to execute with invalid address (cannot fault). 6094 6095 instruct prefetchAlloc0( memory mem ) %{ 6096 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6097 match(PrefetchAllocation mem); 6098 ins_cost(0); 6099 size(0); 6100 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6101 ins_encode(); 6102 ins_pipe(empty); 6103 %} 6104 6105 instruct prefetchAlloc( memory mem ) %{ 6106 predicate(AllocatePrefetchInstr==3); 6107 match( PrefetchAllocation mem ); 6108 ins_cost(100); 6109 6110 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6111 ins_encode %{ 6112 __ prefetchw($mem$$Address); 6113 %} 6114 ins_pipe(ialu_mem); 6115 %} 6116 6117 instruct prefetchAllocNTA( memory mem ) %{ 6118 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6119 match(PrefetchAllocation mem); 6120 ins_cost(100); 6121 6122 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6123 ins_encode %{ 6124 __ prefetchnta($mem$$Address); 6125 %} 6126 ins_pipe(ialu_mem); 6127 %} 6128 6129 instruct prefetchAllocT0( memory mem ) %{ 6130 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6131 match(PrefetchAllocation mem); 6132 ins_cost(100); 6133 6134 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6135 ins_encode %{ 6136 __ prefetcht0($mem$$Address); 6137 %} 6138 ins_pipe(ialu_mem); 6139 %} 6140 6141 instruct prefetchAllocT2( memory mem ) %{ 6142 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6143 match(PrefetchAllocation mem); 6144 ins_cost(100); 6145 6146 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6147 ins_encode %{ 6148 __ prefetcht2($mem$$Address); 6149 %} 6150 ins_pipe(ialu_mem); 6151 %} 6152 6153 //----------Store Instructions------------------------------------------------- 6154 6155 // Store Byte 6156 instruct storeB(memory mem, xRegI src) %{ 6157 match(Set mem (StoreB mem src)); 6158 6159 ins_cost(125); 6160 format %{ "MOV8 $mem,$src" %} 6161 opcode(0x88); 6162 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6163 ins_pipe( ialu_mem_reg ); 6164 %} 6165 6166 // Store Char/Short 6167 instruct storeC(memory mem, rRegI src) %{ 6168 match(Set mem (StoreC mem src)); 6169 6170 ins_cost(125); 6171 format %{ "MOV16 $mem,$src" %} 6172 opcode(0x89, 0x66); 6173 ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark ); 6174 ins_pipe( ialu_mem_reg ); 6175 %} 6176 6177 // Store Integer 6178 instruct storeI(memory mem, rRegI src) %{ 6179 match(Set mem (StoreI mem src)); 6180 6181 ins_cost(125); 6182 format %{ "MOV $mem,$src" %} 6183 opcode(0x89); 6184 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6185 ins_pipe( ialu_mem_reg ); 6186 %} 6187 6188 // Store Long 6189 instruct storeL(long_memory mem, eRegL src) %{ 6190 predicate(!((StoreLNode*)n)->require_atomic_access()); 6191 match(Set mem (StoreL mem src)); 6192 6193 ins_cost(200); 6194 format %{ "MOV $mem,$src.lo\n\t" 6195 "MOV $mem+4,$src.hi" %} 6196 opcode(0x89, 0x89); 6197 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark ); 6198 ins_pipe( ialu_mem_long_reg ); 6199 %} 6200 6201 // Store Long to Integer 6202 instruct storeL2I(memory mem, eRegL src) %{ 6203 match(Set mem (StoreI mem (ConvL2I src))); 6204 6205 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6206 ins_encode %{ 6207 __ movl($mem$$Address, $src$$Register); 6208 %} 6209 ins_pipe(ialu_mem_reg); 6210 %} 6211 6212 // Volatile Store Long. Must be atomic, so move it into 6213 // the FP TOS and then do a 64-bit FIST. Has to probe the 6214 // target address before the store (for null-ptr checks) 6215 // so the memory operand is used twice in the encoding. 6216 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6217 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6218 match(Set mem (StoreL mem src)); 6219 effect( KILL cr ); 6220 ins_cost(400); 6221 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6222 "FILD $src\n\t" 6223 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6224 opcode(0x3B); 6225 ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark); 6226 ins_pipe( fpu_reg_mem ); 6227 %} 6228 6229 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6230 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6231 match(Set mem (StoreL mem src)); 6232 effect( TEMP tmp, KILL cr ); 6233 ins_cost(380); 6234 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6235 "MOVSD $tmp,$src\n\t" 6236 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6237 ins_encode %{ 6238 __ cmpl(rax, $mem$$Address); 6239 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6240 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6241 %} 6242 ins_pipe( pipe_slow ); 6243 %} 6244 6245 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6246 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6247 match(Set mem (StoreL mem src)); 6248 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6249 ins_cost(360); 6250 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6251 "MOVD $tmp,$src.lo\n\t" 6252 "MOVD $tmp2,$src.hi\n\t" 6253 "PUNPCKLDQ $tmp,$tmp2\n\t" 6254 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6255 ins_encode %{ 6256 __ cmpl(rax, $mem$$Address); 6257 __ movdl($tmp$$XMMRegister, $src$$Register); 6258 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6259 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6260 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6261 %} 6262 ins_pipe( pipe_slow ); 6263 %} 6264 6265 // Store Pointer; for storing unknown oops and raw pointers 6266 instruct storeP(memory mem, anyRegP src) %{ 6267 match(Set mem (StoreP mem src)); 6268 6269 ins_cost(125); 6270 format %{ "MOV $mem,$src" %} 6271 opcode(0x89); 6272 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6273 ins_pipe( ialu_mem_reg ); 6274 %} 6275 6276 // Store Integer Immediate 6277 instruct storeImmI(memory mem, immI src) %{ 6278 match(Set mem (StoreI mem src)); 6279 6280 ins_cost(150); 6281 format %{ "MOV $mem,$src" %} 6282 opcode(0xC7); /* C7 /0 */ 6283 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark); 6284 ins_pipe( ialu_mem_imm ); 6285 %} 6286 6287 // Store Short/Char Immediate 6288 instruct storeImmI16(memory mem, immI16 src) %{ 6289 predicate(UseStoreImmI16); 6290 match(Set mem (StoreC mem src)); 6291 6292 ins_cost(150); 6293 format %{ "MOV16 $mem,$src" %} 6294 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6295 ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark); 6296 ins_pipe( ialu_mem_imm ); 6297 %} 6298 6299 // Store Pointer Immediate; null pointers or constant oops that do not 6300 // need card-mark barriers. 6301 instruct storeImmP(memory mem, immP src) %{ 6302 match(Set mem (StoreP mem src)); 6303 6304 ins_cost(150); 6305 format %{ "MOV $mem,$src" %} 6306 opcode(0xC7); /* C7 /0 */ 6307 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark); 6308 ins_pipe( ialu_mem_imm ); 6309 %} 6310 6311 // Store Byte Immediate 6312 instruct storeImmB(memory mem, immI8 src) %{ 6313 match(Set mem (StoreB mem src)); 6314 6315 ins_cost(150); 6316 format %{ "MOV8 $mem,$src" %} 6317 opcode(0xC6); /* C6 /0 */ 6318 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark); 6319 ins_pipe( ialu_mem_imm ); 6320 %} 6321 6322 // Store Double 6323 instruct storeDPR( memory mem, regDPR1 src) %{ 6324 predicate(UseSSE<=1); 6325 match(Set mem (StoreD mem src)); 6326 6327 ins_cost(100); 6328 format %{ "FST_D $mem,$src" %} 6329 opcode(0xDD); /* DD /2 */ 6330 ins_encode( enc_FPR_store(mem,src) ); 6331 ins_pipe( fpu_mem_reg ); 6332 %} 6333 6334 // Store double does rounding on x86 6335 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6336 predicate(UseSSE<=1); 6337 match(Set mem (StoreD mem (RoundDouble src))); 6338 6339 ins_cost(100); 6340 format %{ "FST_D $mem,$src\t# round" %} 6341 opcode(0xDD); /* DD /2 */ 6342 ins_encode( enc_FPR_store(mem,src) ); 6343 ins_pipe( fpu_mem_reg ); 6344 %} 6345 6346 // Store XMM register to memory (double-precision floating points) 6347 // MOVSD instruction 6348 instruct storeD(memory mem, regD src) %{ 6349 predicate(UseSSE>=2); 6350 match(Set mem (StoreD mem src)); 6351 ins_cost(95); 6352 format %{ "MOVSD $mem,$src" %} 6353 ins_encode %{ 6354 __ movdbl($mem$$Address, $src$$XMMRegister); 6355 %} 6356 ins_pipe( pipe_slow ); 6357 %} 6358 6359 // Store XMM register to memory (single-precision floating point) 6360 // MOVSS instruction 6361 instruct storeF(memory mem, regF src) %{ 6362 predicate(UseSSE>=1); 6363 match(Set mem (StoreF mem src)); 6364 ins_cost(95); 6365 format %{ "MOVSS $mem,$src" %} 6366 ins_encode %{ 6367 __ movflt($mem$$Address, $src$$XMMRegister); 6368 %} 6369 ins_pipe( pipe_slow ); 6370 %} 6371 6372 6373 // Store Float 6374 instruct storeFPR( memory mem, regFPR1 src) %{ 6375 predicate(UseSSE==0); 6376 match(Set mem (StoreF mem src)); 6377 6378 ins_cost(100); 6379 format %{ "FST_S $mem,$src" %} 6380 opcode(0xD9); /* D9 /2 */ 6381 ins_encode( enc_FPR_store(mem,src) ); 6382 ins_pipe( fpu_mem_reg ); 6383 %} 6384 6385 // Store Float does rounding on x86 6386 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6387 predicate(UseSSE==0); 6388 match(Set mem (StoreF mem (RoundFloat src))); 6389 6390 ins_cost(100); 6391 format %{ "FST_S $mem,$src\t# round" %} 6392 opcode(0xD9); /* D9 /2 */ 6393 ins_encode( enc_FPR_store(mem,src) ); 6394 ins_pipe( fpu_mem_reg ); 6395 %} 6396 6397 // Store Float does rounding on x86 6398 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6399 predicate(UseSSE<=1); 6400 match(Set mem (StoreF mem (ConvD2F src))); 6401 6402 ins_cost(100); 6403 format %{ "FST_S $mem,$src\t# D-round" %} 6404 opcode(0xD9); /* D9 /2 */ 6405 ins_encode( enc_FPR_store(mem,src) ); 6406 ins_pipe( fpu_mem_reg ); 6407 %} 6408 6409 // Store immediate Float value (it is faster than store from FPU register) 6410 // The instruction usage is guarded by predicate in operand immFPR(). 6411 instruct storeFPR_imm( memory mem, immFPR src) %{ 6412 match(Set mem (StoreF mem src)); 6413 6414 ins_cost(50); 6415 format %{ "MOV $mem,$src\t# store float" %} 6416 opcode(0xC7); /* C7 /0 */ 6417 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits(src), ClearInstMark); 6418 ins_pipe( ialu_mem_imm ); 6419 %} 6420 6421 // Store immediate Float value (it is faster than store from XMM register) 6422 // The instruction usage is guarded by predicate in operand immF(). 6423 instruct storeF_imm( memory mem, immF src) %{ 6424 match(Set mem (StoreF mem src)); 6425 6426 ins_cost(50); 6427 format %{ "MOV $mem,$src\t# store float" %} 6428 opcode(0xC7); /* C7 /0 */ 6429 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits(src), ClearInstMark); 6430 ins_pipe( ialu_mem_imm ); 6431 %} 6432 6433 // Store Integer to stack slot 6434 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6435 match(Set dst src); 6436 6437 ins_cost(100); 6438 format %{ "MOV $dst,$src" %} 6439 opcode(0x89); 6440 ins_encode( OpcPRegSS( dst, src ) ); 6441 ins_pipe( ialu_mem_reg ); 6442 %} 6443 6444 // Store Integer to stack slot 6445 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6446 match(Set dst src); 6447 6448 ins_cost(100); 6449 format %{ "MOV $dst,$src" %} 6450 opcode(0x89); 6451 ins_encode( OpcPRegSS( dst, src ) ); 6452 ins_pipe( ialu_mem_reg ); 6453 %} 6454 6455 // Store Long to stack slot 6456 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6457 match(Set dst src); 6458 6459 ins_cost(200); 6460 format %{ "MOV $dst,$src.lo\n\t" 6461 "MOV $dst+4,$src.hi" %} 6462 opcode(0x89, 0x89); 6463 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); 6464 ins_pipe( ialu_mem_long_reg ); 6465 %} 6466 6467 //----------MemBar Instructions----------------------------------------------- 6468 // Memory barrier flavors 6469 6470 instruct membar_acquire() %{ 6471 match(MemBarAcquire); 6472 match(LoadFence); 6473 ins_cost(400); 6474 6475 size(0); 6476 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6477 ins_encode(); 6478 ins_pipe(empty); 6479 %} 6480 6481 instruct membar_acquire_lock() %{ 6482 match(MemBarAcquireLock); 6483 ins_cost(0); 6484 6485 size(0); 6486 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6487 ins_encode( ); 6488 ins_pipe(empty); 6489 %} 6490 6491 instruct membar_release() %{ 6492 match(MemBarRelease); 6493 match(StoreFence); 6494 ins_cost(400); 6495 6496 size(0); 6497 format %{ "MEMBAR-release ! (empty encoding)" %} 6498 ins_encode( ); 6499 ins_pipe(empty); 6500 %} 6501 6502 instruct membar_release_lock() %{ 6503 match(MemBarReleaseLock); 6504 ins_cost(0); 6505 6506 size(0); 6507 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6508 ins_encode( ); 6509 ins_pipe(empty); 6510 %} 6511 6512 instruct membar_volatile(eFlagsReg cr) %{ 6513 match(MemBarVolatile); 6514 effect(KILL cr); 6515 ins_cost(400); 6516 6517 format %{ 6518 $$template 6519 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6520 %} 6521 ins_encode %{ 6522 __ membar(Assembler::StoreLoad); 6523 %} 6524 ins_pipe(pipe_slow); 6525 %} 6526 6527 instruct unnecessary_membar_volatile() %{ 6528 match(MemBarVolatile); 6529 predicate(Matcher::post_store_load_barrier(n)); 6530 ins_cost(0); 6531 6532 size(0); 6533 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6534 ins_encode( ); 6535 ins_pipe(empty); 6536 %} 6537 6538 instruct membar_storestore() %{ 6539 match(MemBarStoreStore); 6540 match(StoreStoreFence); 6541 ins_cost(0); 6542 6543 size(0); 6544 format %{ "MEMBAR-storestore (empty encoding)" %} 6545 ins_encode( ); 6546 ins_pipe(empty); 6547 %} 6548 6549 //----------Move Instructions-------------------------------------------------- 6550 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6551 match(Set dst (CastX2P src)); 6552 format %{ "# X2P $dst, $src" %} 6553 ins_encode( /*empty encoding*/ ); 6554 ins_cost(0); 6555 ins_pipe(empty); 6556 %} 6557 6558 instruct castP2X(rRegI dst, eRegP src ) %{ 6559 match(Set dst (CastP2X src)); 6560 ins_cost(50); 6561 format %{ "MOV $dst, $src\t# CastP2X" %} 6562 ins_encode( enc_Copy( dst, src) ); 6563 ins_pipe( ialu_reg_reg ); 6564 %} 6565 6566 //----------Conditional Move--------------------------------------------------- 6567 // Conditional move 6568 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6569 predicate(!VM_Version::supports_cmov() ); 6570 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6571 ins_cost(200); 6572 format %{ "J$cop,us skip\t# signed cmove\n\t" 6573 "MOV $dst,$src\n" 6574 "skip:" %} 6575 ins_encode %{ 6576 Label Lskip; 6577 // Invert sense of branch from sense of CMOV 6578 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6579 __ movl($dst$$Register, $src$$Register); 6580 __ bind(Lskip); 6581 %} 6582 ins_pipe( pipe_cmov_reg ); 6583 %} 6584 6585 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6586 predicate(!VM_Version::supports_cmov() ); 6587 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6588 ins_cost(200); 6589 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6590 "MOV $dst,$src\n" 6591 "skip:" %} 6592 ins_encode %{ 6593 Label Lskip; 6594 // Invert sense of branch from sense of CMOV 6595 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6596 __ movl($dst$$Register, $src$$Register); 6597 __ bind(Lskip); 6598 %} 6599 ins_pipe( pipe_cmov_reg ); 6600 %} 6601 6602 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6603 predicate(VM_Version::supports_cmov() ); 6604 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6605 ins_cost(200); 6606 format %{ "CMOV$cop $dst,$src" %} 6607 opcode(0x0F,0x40); 6608 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6609 ins_pipe( pipe_cmov_reg ); 6610 %} 6611 6612 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6613 predicate(VM_Version::supports_cmov() ); 6614 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6615 ins_cost(200); 6616 format %{ "CMOV$cop $dst,$src" %} 6617 opcode(0x0F,0x40); 6618 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6619 ins_pipe( pipe_cmov_reg ); 6620 %} 6621 6622 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6623 predicate(VM_Version::supports_cmov() ); 6624 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6625 ins_cost(200); 6626 expand %{ 6627 cmovI_regU(cop, cr, dst, src); 6628 %} 6629 %} 6630 6631 // Conditional move 6632 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6633 predicate(VM_Version::supports_cmov() ); 6634 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6635 ins_cost(250); 6636 format %{ "CMOV$cop $dst,$src" %} 6637 opcode(0x0F,0x40); 6638 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); 6639 ins_pipe( pipe_cmov_mem ); 6640 %} 6641 6642 // Conditional move 6643 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6644 predicate(VM_Version::supports_cmov() ); 6645 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6646 ins_cost(250); 6647 format %{ "CMOV$cop $dst,$src" %} 6648 opcode(0x0F,0x40); 6649 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); 6650 ins_pipe( pipe_cmov_mem ); 6651 %} 6652 6653 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6654 predicate(VM_Version::supports_cmov() ); 6655 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6656 ins_cost(250); 6657 expand %{ 6658 cmovI_memU(cop, cr, dst, src); 6659 %} 6660 %} 6661 6662 // Conditional move 6663 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6664 predicate(VM_Version::supports_cmov() ); 6665 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6666 ins_cost(200); 6667 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6668 opcode(0x0F,0x40); 6669 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6670 ins_pipe( pipe_cmov_reg ); 6671 %} 6672 6673 // Conditional move (non-P6 version) 6674 // Note: a CMoveP is generated for stubs and native wrappers 6675 // regardless of whether we are on a P6, so we 6676 // emulate a cmov here 6677 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6678 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6679 ins_cost(300); 6680 format %{ "Jn$cop skip\n\t" 6681 "MOV $dst,$src\t# pointer\n" 6682 "skip:" %} 6683 opcode(0x8b); 6684 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6685 ins_pipe( pipe_cmov_reg ); 6686 %} 6687 6688 // Conditional move 6689 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6690 predicate(VM_Version::supports_cmov() ); 6691 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6692 ins_cost(200); 6693 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6694 opcode(0x0F,0x40); 6695 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6696 ins_pipe( pipe_cmov_reg ); 6697 %} 6698 6699 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6700 predicate(VM_Version::supports_cmov() ); 6701 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6702 ins_cost(200); 6703 expand %{ 6704 cmovP_regU(cop, cr, dst, src); 6705 %} 6706 %} 6707 6708 // DISABLED: Requires the ADLC to emit a bottom_type call that 6709 // correctly meets the two pointer arguments; one is an incoming 6710 // register but the other is a memory operand. ALSO appears to 6711 // be buggy with implicit null checks. 6712 // 6713 //// Conditional move 6714 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6715 // predicate(VM_Version::supports_cmov() ); 6716 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6717 // ins_cost(250); 6718 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6719 // opcode(0x0F,0x40); 6720 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6721 // ins_pipe( pipe_cmov_mem ); 6722 //%} 6723 // 6724 //// Conditional move 6725 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6726 // predicate(VM_Version::supports_cmov() ); 6727 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6728 // ins_cost(250); 6729 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6730 // opcode(0x0F,0x40); 6731 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6732 // ins_pipe( pipe_cmov_mem ); 6733 //%} 6734 6735 // Conditional move 6736 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6737 predicate(UseSSE<=1); 6738 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6739 ins_cost(200); 6740 format %{ "FCMOV$cop $dst,$src\t# double" %} 6741 opcode(0xDA); 6742 ins_encode( enc_cmov_dpr(cop,src) ); 6743 ins_pipe( pipe_cmovDPR_reg ); 6744 %} 6745 6746 // Conditional move 6747 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6748 predicate(UseSSE==0); 6749 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6750 ins_cost(200); 6751 format %{ "FCMOV$cop $dst,$src\t# float" %} 6752 opcode(0xDA); 6753 ins_encode( enc_cmov_dpr(cop,src) ); 6754 ins_pipe( pipe_cmovDPR_reg ); 6755 %} 6756 6757 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6758 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6759 predicate(UseSSE<=1); 6760 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6761 ins_cost(200); 6762 format %{ "Jn$cop skip\n\t" 6763 "MOV $dst,$src\t# double\n" 6764 "skip:" %} 6765 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6766 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6767 ins_pipe( pipe_cmovDPR_reg ); 6768 %} 6769 6770 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6771 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6772 predicate(UseSSE==0); 6773 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6774 ins_cost(200); 6775 format %{ "Jn$cop skip\n\t" 6776 "MOV $dst,$src\t# float\n" 6777 "skip:" %} 6778 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6779 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6780 ins_pipe( pipe_cmovDPR_reg ); 6781 %} 6782 6783 // No CMOVE with SSE/SSE2 6784 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6785 predicate (UseSSE>=1); 6786 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6787 ins_cost(200); 6788 format %{ "Jn$cop skip\n\t" 6789 "MOVSS $dst,$src\t# float\n" 6790 "skip:" %} 6791 ins_encode %{ 6792 Label skip; 6793 // Invert sense of branch from sense of CMOV 6794 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6795 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6796 __ bind(skip); 6797 %} 6798 ins_pipe( pipe_slow ); 6799 %} 6800 6801 // No CMOVE with SSE/SSE2 6802 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6803 predicate (UseSSE>=2); 6804 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6805 ins_cost(200); 6806 format %{ "Jn$cop skip\n\t" 6807 "MOVSD $dst,$src\t# float\n" 6808 "skip:" %} 6809 ins_encode %{ 6810 Label skip; 6811 // Invert sense of branch from sense of CMOV 6812 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6813 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6814 __ bind(skip); 6815 %} 6816 ins_pipe( pipe_slow ); 6817 %} 6818 6819 // unsigned version 6820 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6821 predicate (UseSSE>=1); 6822 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6823 ins_cost(200); 6824 format %{ "Jn$cop skip\n\t" 6825 "MOVSS $dst,$src\t# float\n" 6826 "skip:" %} 6827 ins_encode %{ 6828 Label skip; 6829 // Invert sense of branch from sense of CMOV 6830 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6831 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6832 __ bind(skip); 6833 %} 6834 ins_pipe( pipe_slow ); 6835 %} 6836 6837 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6838 predicate (UseSSE>=1); 6839 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6840 ins_cost(200); 6841 expand %{ 6842 fcmovF_regU(cop, cr, dst, src); 6843 %} 6844 %} 6845 6846 // unsigned version 6847 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6848 predicate (UseSSE>=2); 6849 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6850 ins_cost(200); 6851 format %{ "Jn$cop skip\n\t" 6852 "MOVSD $dst,$src\t# float\n" 6853 "skip:" %} 6854 ins_encode %{ 6855 Label skip; 6856 // Invert sense of branch from sense of CMOV 6857 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6858 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6859 __ bind(skip); 6860 %} 6861 ins_pipe( pipe_slow ); 6862 %} 6863 6864 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6865 predicate (UseSSE>=2); 6866 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6867 ins_cost(200); 6868 expand %{ 6869 fcmovD_regU(cop, cr, dst, src); 6870 %} 6871 %} 6872 6873 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6874 predicate(VM_Version::supports_cmov() ); 6875 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6876 ins_cost(200); 6877 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6878 "CMOV$cop $dst.hi,$src.hi" %} 6879 opcode(0x0F,0x40); 6880 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6881 ins_pipe( pipe_cmov_reg_long ); 6882 %} 6883 6884 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6885 predicate(VM_Version::supports_cmov() ); 6886 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6887 ins_cost(200); 6888 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6889 "CMOV$cop $dst.hi,$src.hi" %} 6890 opcode(0x0F,0x40); 6891 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6892 ins_pipe( pipe_cmov_reg_long ); 6893 %} 6894 6895 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6896 predicate(VM_Version::supports_cmov() ); 6897 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6898 ins_cost(200); 6899 expand %{ 6900 cmovL_regU(cop, cr, dst, src); 6901 %} 6902 %} 6903 6904 //----------Arithmetic Instructions-------------------------------------------- 6905 //----------Addition Instructions---------------------------------------------- 6906 6907 // Integer Addition Instructions 6908 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 6909 match(Set dst (AddI dst src)); 6910 effect(KILL cr); 6911 6912 size(2); 6913 format %{ "ADD $dst,$src" %} 6914 opcode(0x03); 6915 ins_encode( OpcP, RegReg( dst, src) ); 6916 ins_pipe( ialu_reg_reg ); 6917 %} 6918 6919 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 6920 match(Set dst (AddI dst src)); 6921 effect(KILL cr); 6922 6923 format %{ "ADD $dst,$src" %} 6924 opcode(0x81, 0x00); /* /0 id */ 6925 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 6926 ins_pipe( ialu_reg ); 6927 %} 6928 6929 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 6930 predicate(UseIncDec); 6931 match(Set dst (AddI dst src)); 6932 effect(KILL cr); 6933 6934 size(1); 6935 format %{ "INC $dst" %} 6936 opcode(0x40); /* */ 6937 ins_encode( Opc_plus( primary, dst ) ); 6938 ins_pipe( ialu_reg ); 6939 %} 6940 6941 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 6942 match(Set dst (AddI src0 src1)); 6943 ins_cost(110); 6944 6945 format %{ "LEA $dst,[$src0 + $src1]" %} 6946 opcode(0x8D); /* 0x8D /r */ 6947 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); 6948 ins_pipe( ialu_reg_reg ); 6949 %} 6950 6951 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 6952 match(Set dst (AddP src0 src1)); 6953 ins_cost(110); 6954 6955 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 6956 opcode(0x8D); /* 0x8D /r */ 6957 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); 6958 ins_pipe( ialu_reg_reg ); 6959 %} 6960 6961 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 6962 predicate(UseIncDec); 6963 match(Set dst (AddI dst src)); 6964 effect(KILL cr); 6965 6966 size(1); 6967 format %{ "DEC $dst" %} 6968 opcode(0x48); /* */ 6969 ins_encode( Opc_plus( primary, dst ) ); 6970 ins_pipe( ialu_reg ); 6971 %} 6972 6973 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 6974 match(Set dst (AddP dst src)); 6975 effect(KILL cr); 6976 6977 size(2); 6978 format %{ "ADD $dst,$src" %} 6979 opcode(0x03); 6980 ins_encode( OpcP, RegReg( dst, src) ); 6981 ins_pipe( ialu_reg_reg ); 6982 %} 6983 6984 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 6985 match(Set dst (AddP dst src)); 6986 effect(KILL cr); 6987 6988 format %{ "ADD $dst,$src" %} 6989 opcode(0x81,0x00); /* Opcode 81 /0 id */ 6990 // ins_encode( RegImm( dst, src) ); 6991 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 6992 ins_pipe( ialu_reg ); 6993 %} 6994 6995 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 6996 match(Set dst (AddI dst (LoadI src))); 6997 effect(KILL cr); 6998 6999 ins_cost(150); 7000 format %{ "ADD $dst,$src" %} 7001 opcode(0x03); 7002 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 7003 ins_pipe( ialu_reg_mem ); 7004 %} 7005 7006 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7007 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7008 effect(KILL cr); 7009 7010 ins_cost(150); 7011 format %{ "ADD $dst,$src" %} 7012 opcode(0x01); /* Opcode 01 /r */ 7013 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 7014 ins_pipe( ialu_mem_reg ); 7015 %} 7016 7017 // Add Memory with Immediate 7018 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7019 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7020 effect(KILL cr); 7021 7022 ins_cost(125); 7023 format %{ "ADD $dst,$src" %} 7024 opcode(0x81); /* Opcode 81 /0 id */ 7025 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark ); 7026 ins_pipe( ialu_mem_imm ); 7027 %} 7028 7029 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7030 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7031 effect(KILL cr); 7032 7033 ins_cost(125); 7034 format %{ "INC $dst" %} 7035 opcode(0xFF); /* Opcode FF /0 */ 7036 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark); 7037 ins_pipe( ialu_mem_imm ); 7038 %} 7039 7040 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7041 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7042 effect(KILL cr); 7043 7044 ins_cost(125); 7045 format %{ "DEC $dst" %} 7046 opcode(0xFF); /* Opcode FF /1 */ 7047 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark); 7048 ins_pipe( ialu_mem_imm ); 7049 %} 7050 7051 7052 instruct checkCastPP( eRegP dst ) %{ 7053 match(Set dst (CheckCastPP dst)); 7054 7055 size(0); 7056 format %{ "#checkcastPP of $dst" %} 7057 ins_encode( /*empty encoding*/ ); 7058 ins_pipe( empty ); 7059 %} 7060 7061 instruct castPP( eRegP dst ) %{ 7062 match(Set dst (CastPP dst)); 7063 format %{ "#castPP of $dst" %} 7064 ins_encode( /*empty encoding*/ ); 7065 ins_pipe( empty ); 7066 %} 7067 7068 instruct castII( rRegI dst ) %{ 7069 match(Set dst (CastII dst)); 7070 format %{ "#castII of $dst" %} 7071 ins_encode( /*empty encoding*/ ); 7072 ins_cost(0); 7073 ins_pipe( empty ); 7074 %} 7075 7076 instruct castLL( eRegL dst ) %{ 7077 match(Set dst (CastLL dst)); 7078 format %{ "#castLL of $dst" %} 7079 ins_encode( /*empty encoding*/ ); 7080 ins_cost(0); 7081 ins_pipe( empty ); 7082 %} 7083 7084 instruct castFF( regF dst ) %{ 7085 predicate(UseSSE >= 1); 7086 match(Set dst (CastFF dst)); 7087 format %{ "#castFF of $dst" %} 7088 ins_encode( /*empty encoding*/ ); 7089 ins_cost(0); 7090 ins_pipe( empty ); 7091 %} 7092 7093 instruct castDD( regD dst ) %{ 7094 predicate(UseSSE >= 2); 7095 match(Set dst (CastDD dst)); 7096 format %{ "#castDD of $dst" %} 7097 ins_encode( /*empty encoding*/ ); 7098 ins_cost(0); 7099 ins_pipe( empty ); 7100 %} 7101 7102 instruct castFF_PR( regFPR dst ) %{ 7103 predicate(UseSSE < 1); 7104 match(Set dst (CastFF dst)); 7105 format %{ "#castFF of $dst" %} 7106 ins_encode( /*empty encoding*/ ); 7107 ins_cost(0); 7108 ins_pipe( empty ); 7109 %} 7110 7111 instruct castDD_PR( regDPR dst ) %{ 7112 predicate(UseSSE < 2); 7113 match(Set dst (CastDD dst)); 7114 format %{ "#castDD of $dst" %} 7115 ins_encode( /*empty encoding*/ ); 7116 ins_cost(0); 7117 ins_pipe( empty ); 7118 %} 7119 7120 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7121 7122 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7123 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7124 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7125 effect(KILL cr, KILL oldval); 7126 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7127 "MOV $res,0\n\t" 7128 "JNE,s fail\n\t" 7129 "MOV $res,1\n" 7130 "fail:" %} 7131 ins_encode( enc_cmpxchg8(mem_ptr), 7132 enc_flags_ne_to_boolean(res) ); 7133 ins_pipe( pipe_cmpxchg ); 7134 %} 7135 7136 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7137 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7138 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7139 effect(KILL cr, KILL oldval); 7140 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7141 "MOV $res,0\n\t" 7142 "JNE,s fail\n\t" 7143 "MOV $res,1\n" 7144 "fail:" %} 7145 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7146 ins_pipe( pipe_cmpxchg ); 7147 %} 7148 7149 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7150 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7151 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7152 effect(KILL cr, KILL oldval); 7153 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7154 "MOV $res,0\n\t" 7155 "JNE,s fail\n\t" 7156 "MOV $res,1\n" 7157 "fail:" %} 7158 ins_encode( enc_cmpxchgb(mem_ptr), 7159 enc_flags_ne_to_boolean(res) ); 7160 ins_pipe( pipe_cmpxchg ); 7161 %} 7162 7163 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7164 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7165 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7166 effect(KILL cr, KILL oldval); 7167 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7168 "MOV $res,0\n\t" 7169 "JNE,s fail\n\t" 7170 "MOV $res,1\n" 7171 "fail:" %} 7172 ins_encode( enc_cmpxchgw(mem_ptr), 7173 enc_flags_ne_to_boolean(res) ); 7174 ins_pipe( pipe_cmpxchg ); 7175 %} 7176 7177 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7178 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7179 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7180 effect(KILL cr, KILL oldval); 7181 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7182 "MOV $res,0\n\t" 7183 "JNE,s fail\n\t" 7184 "MOV $res,1\n" 7185 "fail:" %} 7186 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7187 ins_pipe( pipe_cmpxchg ); 7188 %} 7189 7190 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7191 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7192 effect(KILL cr); 7193 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7194 ins_encode( enc_cmpxchg8(mem_ptr) ); 7195 ins_pipe( pipe_cmpxchg ); 7196 %} 7197 7198 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7199 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7200 effect(KILL cr); 7201 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7202 ins_encode( enc_cmpxchg(mem_ptr) ); 7203 ins_pipe( pipe_cmpxchg ); 7204 %} 7205 7206 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7207 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7208 effect(KILL cr); 7209 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7210 ins_encode( enc_cmpxchgb(mem_ptr) ); 7211 ins_pipe( pipe_cmpxchg ); 7212 %} 7213 7214 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7215 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7216 effect(KILL cr); 7217 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7218 ins_encode( enc_cmpxchgw(mem_ptr) ); 7219 ins_pipe( pipe_cmpxchg ); 7220 %} 7221 7222 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7223 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7224 effect(KILL cr); 7225 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7226 ins_encode( enc_cmpxchg(mem_ptr) ); 7227 ins_pipe( pipe_cmpxchg ); 7228 %} 7229 7230 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7231 predicate(n->as_LoadStore()->result_not_used()); 7232 match(Set dummy (GetAndAddB mem add)); 7233 effect(KILL cr); 7234 format %{ "ADDB [$mem],$add" %} 7235 ins_encode %{ 7236 __ lock(); 7237 __ addb($mem$$Address, $add$$constant); 7238 %} 7239 ins_pipe( pipe_cmpxchg ); 7240 %} 7241 7242 // Important to match to xRegI: only 8-bit regs. 7243 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7244 match(Set newval (GetAndAddB mem newval)); 7245 effect(KILL cr); 7246 format %{ "XADDB [$mem],$newval" %} 7247 ins_encode %{ 7248 __ lock(); 7249 __ xaddb($mem$$Address, $newval$$Register); 7250 %} 7251 ins_pipe( pipe_cmpxchg ); 7252 %} 7253 7254 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7255 predicate(n->as_LoadStore()->result_not_used()); 7256 match(Set dummy (GetAndAddS mem add)); 7257 effect(KILL cr); 7258 format %{ "ADDS [$mem],$add" %} 7259 ins_encode %{ 7260 __ lock(); 7261 __ addw($mem$$Address, $add$$constant); 7262 %} 7263 ins_pipe( pipe_cmpxchg ); 7264 %} 7265 7266 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7267 match(Set newval (GetAndAddS mem newval)); 7268 effect(KILL cr); 7269 format %{ "XADDS [$mem],$newval" %} 7270 ins_encode %{ 7271 __ lock(); 7272 __ xaddw($mem$$Address, $newval$$Register); 7273 %} 7274 ins_pipe( pipe_cmpxchg ); 7275 %} 7276 7277 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7278 predicate(n->as_LoadStore()->result_not_used()); 7279 match(Set dummy (GetAndAddI mem add)); 7280 effect(KILL cr); 7281 format %{ "ADDL [$mem],$add" %} 7282 ins_encode %{ 7283 __ lock(); 7284 __ addl($mem$$Address, $add$$constant); 7285 %} 7286 ins_pipe( pipe_cmpxchg ); 7287 %} 7288 7289 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7290 match(Set newval (GetAndAddI mem newval)); 7291 effect(KILL cr); 7292 format %{ "XADDL [$mem],$newval" %} 7293 ins_encode %{ 7294 __ lock(); 7295 __ xaddl($mem$$Address, $newval$$Register); 7296 %} 7297 ins_pipe( pipe_cmpxchg ); 7298 %} 7299 7300 // Important to match to xRegI: only 8-bit regs. 7301 instruct xchgB( memory mem, xRegI newval) %{ 7302 match(Set newval (GetAndSetB mem newval)); 7303 format %{ "XCHGB $newval,[$mem]" %} 7304 ins_encode %{ 7305 __ xchgb($newval$$Register, $mem$$Address); 7306 %} 7307 ins_pipe( pipe_cmpxchg ); 7308 %} 7309 7310 instruct xchgS( memory mem, rRegI newval) %{ 7311 match(Set newval (GetAndSetS mem newval)); 7312 format %{ "XCHGW $newval,[$mem]" %} 7313 ins_encode %{ 7314 __ xchgw($newval$$Register, $mem$$Address); 7315 %} 7316 ins_pipe( pipe_cmpxchg ); 7317 %} 7318 7319 instruct xchgI( memory mem, rRegI newval) %{ 7320 match(Set newval (GetAndSetI mem newval)); 7321 format %{ "XCHGL $newval,[$mem]" %} 7322 ins_encode %{ 7323 __ xchgl($newval$$Register, $mem$$Address); 7324 %} 7325 ins_pipe( pipe_cmpxchg ); 7326 %} 7327 7328 instruct xchgP( memory mem, pRegP newval) %{ 7329 match(Set newval (GetAndSetP mem newval)); 7330 format %{ "XCHGL $newval,[$mem]" %} 7331 ins_encode %{ 7332 __ xchgl($newval$$Register, $mem$$Address); 7333 %} 7334 ins_pipe( pipe_cmpxchg ); 7335 %} 7336 7337 //----------Subtraction Instructions------------------------------------------- 7338 7339 // Integer Subtraction Instructions 7340 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7341 match(Set dst (SubI dst src)); 7342 effect(KILL cr); 7343 7344 size(2); 7345 format %{ "SUB $dst,$src" %} 7346 opcode(0x2B); 7347 ins_encode( OpcP, RegReg( dst, src) ); 7348 ins_pipe( ialu_reg_reg ); 7349 %} 7350 7351 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7352 match(Set dst (SubI dst src)); 7353 effect(KILL cr); 7354 7355 format %{ "SUB $dst,$src" %} 7356 opcode(0x81,0x05); /* Opcode 81 /5 */ 7357 // ins_encode( RegImm( dst, src) ); 7358 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7359 ins_pipe( ialu_reg ); 7360 %} 7361 7362 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7363 match(Set dst (SubI dst (LoadI src))); 7364 effect(KILL cr); 7365 7366 ins_cost(150); 7367 format %{ "SUB $dst,$src" %} 7368 opcode(0x2B); 7369 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 7370 ins_pipe( ialu_reg_mem ); 7371 %} 7372 7373 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7374 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7375 effect(KILL cr); 7376 7377 ins_cost(150); 7378 format %{ "SUB $dst,$src" %} 7379 opcode(0x29); /* Opcode 29 /r */ 7380 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 7381 ins_pipe( ialu_mem_reg ); 7382 %} 7383 7384 // Subtract from a pointer 7385 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7386 match(Set dst (AddP dst (SubI zero src))); 7387 effect(KILL cr); 7388 7389 size(2); 7390 format %{ "SUB $dst,$src" %} 7391 opcode(0x2B); 7392 ins_encode( OpcP, RegReg( dst, src) ); 7393 ins_pipe( ialu_reg_reg ); 7394 %} 7395 7396 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7397 match(Set dst (SubI zero dst)); 7398 effect(KILL cr); 7399 7400 size(2); 7401 format %{ "NEG $dst" %} 7402 opcode(0xF7,0x03); // Opcode F7 /3 7403 ins_encode( OpcP, RegOpc( dst ) ); 7404 ins_pipe( ialu_reg ); 7405 %} 7406 7407 //----------Multiplication/Division Instructions------------------------------- 7408 // Integer Multiplication Instructions 7409 // Multiply Register 7410 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7411 match(Set dst (MulI dst src)); 7412 effect(KILL cr); 7413 7414 size(3); 7415 ins_cost(300); 7416 format %{ "IMUL $dst,$src" %} 7417 opcode(0xAF, 0x0F); 7418 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7419 ins_pipe( ialu_reg_reg_alu0 ); 7420 %} 7421 7422 // Multiply 32-bit Immediate 7423 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7424 match(Set dst (MulI src imm)); 7425 effect(KILL cr); 7426 7427 ins_cost(300); 7428 format %{ "IMUL $dst,$src,$imm" %} 7429 opcode(0x69); /* 69 /r id */ 7430 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7431 ins_pipe( ialu_reg_reg_alu0 ); 7432 %} 7433 7434 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7435 match(Set dst src); 7436 effect(KILL cr); 7437 7438 // Note that this is artificially increased to make it more expensive than loadConL 7439 ins_cost(250); 7440 format %{ "MOV EAX,$src\t// low word only" %} 7441 opcode(0xB8); 7442 ins_encode( LdImmL_Lo(dst, src) ); 7443 ins_pipe( ialu_reg_fat ); 7444 %} 7445 7446 // Multiply by 32-bit Immediate, taking the shifted high order results 7447 // (special case for shift by 32) 7448 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7449 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7450 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7451 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7452 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7453 effect(USE src1, KILL cr); 7454 7455 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7456 ins_cost(0*100 + 1*400 - 150); 7457 format %{ "IMUL EDX:EAX,$src1" %} 7458 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7459 ins_pipe( pipe_slow ); 7460 %} 7461 7462 // Multiply by 32-bit Immediate, taking the shifted high order results 7463 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7464 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7465 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7466 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7467 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7468 effect(USE src1, KILL cr); 7469 7470 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7471 ins_cost(1*100 + 1*400 - 150); 7472 format %{ "IMUL EDX:EAX,$src1\n\t" 7473 "SAR EDX,$cnt-32" %} 7474 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7475 ins_pipe( pipe_slow ); 7476 %} 7477 7478 // Multiply Memory 32-bit Immediate 7479 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7480 match(Set dst (MulI (LoadI src) imm)); 7481 effect(KILL cr); 7482 7483 ins_cost(300); 7484 format %{ "IMUL $dst,$src,$imm" %} 7485 opcode(0x69); /* 69 /r id */ 7486 ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark ); 7487 ins_pipe( ialu_reg_mem_alu0 ); 7488 %} 7489 7490 // Multiply Memory 7491 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7492 match(Set dst (MulI dst (LoadI src))); 7493 effect(KILL cr); 7494 7495 ins_cost(350); 7496 format %{ "IMUL $dst,$src" %} 7497 opcode(0xAF, 0x0F); 7498 ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark ); 7499 ins_pipe( ialu_reg_mem_alu0 ); 7500 %} 7501 7502 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7503 %{ 7504 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7505 effect(KILL cr, KILL src2); 7506 7507 expand %{ mulI_eReg(dst, src1, cr); 7508 mulI_eReg(src2, src3, cr); 7509 addI_eReg(dst, src2, cr); %} 7510 %} 7511 7512 // Multiply Register Int to Long 7513 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7514 // Basic Idea: long = (long)int * (long)int 7515 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7516 effect(DEF dst, USE src, USE src1, KILL flags); 7517 7518 ins_cost(300); 7519 format %{ "IMUL $dst,$src1" %} 7520 7521 ins_encode( long_int_multiply( dst, src1 ) ); 7522 ins_pipe( ialu_reg_reg_alu0 ); 7523 %} 7524 7525 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7526 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7527 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7528 effect(KILL flags); 7529 7530 ins_cost(300); 7531 format %{ "MUL $dst,$src1" %} 7532 7533 ins_encode( long_uint_multiply(dst, src1) ); 7534 ins_pipe( ialu_reg_reg_alu0 ); 7535 %} 7536 7537 // Multiply Register Long 7538 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7539 match(Set dst (MulL dst src)); 7540 effect(KILL cr, TEMP tmp); 7541 ins_cost(4*100+3*400); 7542 // Basic idea: lo(result) = lo(x_lo * y_lo) 7543 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7544 format %{ "MOV $tmp,$src.lo\n\t" 7545 "IMUL $tmp,EDX\n\t" 7546 "MOV EDX,$src.hi\n\t" 7547 "IMUL EDX,EAX\n\t" 7548 "ADD $tmp,EDX\n\t" 7549 "MUL EDX:EAX,$src.lo\n\t" 7550 "ADD EDX,$tmp" %} 7551 ins_encode( long_multiply( dst, src, tmp ) ); 7552 ins_pipe( pipe_slow ); 7553 %} 7554 7555 // Multiply Register Long where the left operand's high 32 bits are zero 7556 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7557 predicate(is_operand_hi32_zero(n->in(1))); 7558 match(Set dst (MulL dst src)); 7559 effect(KILL cr, TEMP tmp); 7560 ins_cost(2*100+2*400); 7561 // Basic idea: lo(result) = lo(x_lo * y_lo) 7562 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7563 format %{ "MOV $tmp,$src.hi\n\t" 7564 "IMUL $tmp,EAX\n\t" 7565 "MUL EDX:EAX,$src.lo\n\t" 7566 "ADD EDX,$tmp" %} 7567 ins_encode %{ 7568 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7569 __ imull($tmp$$Register, rax); 7570 __ mull($src$$Register); 7571 __ addl(rdx, $tmp$$Register); 7572 %} 7573 ins_pipe( pipe_slow ); 7574 %} 7575 7576 // Multiply Register Long where the right operand's high 32 bits are zero 7577 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7578 predicate(is_operand_hi32_zero(n->in(2))); 7579 match(Set dst (MulL dst src)); 7580 effect(KILL cr, TEMP tmp); 7581 ins_cost(2*100+2*400); 7582 // Basic idea: lo(result) = lo(x_lo * y_lo) 7583 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7584 format %{ "MOV $tmp,$src.lo\n\t" 7585 "IMUL $tmp,EDX\n\t" 7586 "MUL EDX:EAX,$src.lo\n\t" 7587 "ADD EDX,$tmp" %} 7588 ins_encode %{ 7589 __ movl($tmp$$Register, $src$$Register); 7590 __ imull($tmp$$Register, rdx); 7591 __ mull($src$$Register); 7592 __ addl(rdx, $tmp$$Register); 7593 %} 7594 ins_pipe( pipe_slow ); 7595 %} 7596 7597 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7598 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7599 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7600 match(Set dst (MulL dst src)); 7601 effect(KILL cr); 7602 ins_cost(1*400); 7603 // Basic idea: lo(result) = lo(x_lo * y_lo) 7604 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7605 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7606 ins_encode %{ 7607 __ mull($src$$Register); 7608 %} 7609 ins_pipe( pipe_slow ); 7610 %} 7611 7612 // Multiply Register Long by small constant 7613 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7614 match(Set dst (MulL dst src)); 7615 effect(KILL cr, TEMP tmp); 7616 ins_cost(2*100+2*400); 7617 size(12); 7618 // Basic idea: lo(result) = lo(src * EAX) 7619 // hi(result) = hi(src * EAX) + lo(src * EDX) 7620 format %{ "IMUL $tmp,EDX,$src\n\t" 7621 "MOV EDX,$src\n\t" 7622 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7623 "ADD EDX,$tmp" %} 7624 ins_encode( long_multiply_con( dst, src, tmp ) ); 7625 ins_pipe( pipe_slow ); 7626 %} 7627 7628 // Integer DIV with Register 7629 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7630 match(Set rax (DivI rax div)); 7631 effect(KILL rdx, KILL cr); 7632 size(26); 7633 ins_cost(30*100+10*100); 7634 format %{ "CMP EAX,0x80000000\n\t" 7635 "JNE,s normal\n\t" 7636 "XOR EDX,EDX\n\t" 7637 "CMP ECX,-1\n\t" 7638 "JE,s done\n" 7639 "normal: CDQ\n\t" 7640 "IDIV $div\n\t" 7641 "done:" %} 7642 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7643 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7644 ins_pipe( ialu_reg_reg_alu0 ); 7645 %} 7646 7647 // Divide Register Long 7648 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7649 match(Set dst (DivL src1 src2)); 7650 effect(CALL); 7651 ins_cost(10000); 7652 format %{ "PUSH $src1.hi\n\t" 7653 "PUSH $src1.lo\n\t" 7654 "PUSH $src2.hi\n\t" 7655 "PUSH $src2.lo\n\t" 7656 "CALL SharedRuntime::ldiv\n\t" 7657 "ADD ESP,16" %} 7658 ins_encode( long_div(src1,src2) ); 7659 ins_pipe( pipe_slow ); 7660 %} 7661 7662 // Integer DIVMOD with Register, both quotient and mod results 7663 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7664 match(DivModI rax div); 7665 effect(KILL cr); 7666 size(26); 7667 ins_cost(30*100+10*100); 7668 format %{ "CMP EAX,0x80000000\n\t" 7669 "JNE,s normal\n\t" 7670 "XOR EDX,EDX\n\t" 7671 "CMP ECX,-1\n\t" 7672 "JE,s done\n" 7673 "normal: CDQ\n\t" 7674 "IDIV $div\n\t" 7675 "done:" %} 7676 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7677 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7678 ins_pipe( pipe_slow ); 7679 %} 7680 7681 // Integer MOD with Register 7682 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7683 match(Set rdx (ModI rax div)); 7684 effect(KILL rax, KILL cr); 7685 7686 size(26); 7687 ins_cost(300); 7688 format %{ "CDQ\n\t" 7689 "IDIV $div" %} 7690 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7691 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7692 ins_pipe( ialu_reg_reg_alu0 ); 7693 %} 7694 7695 // Remainder Register Long 7696 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7697 match(Set dst (ModL src1 src2)); 7698 effect(CALL); 7699 ins_cost(10000); 7700 format %{ "PUSH $src1.hi\n\t" 7701 "PUSH $src1.lo\n\t" 7702 "PUSH $src2.hi\n\t" 7703 "PUSH $src2.lo\n\t" 7704 "CALL SharedRuntime::lrem\n\t" 7705 "ADD ESP,16" %} 7706 ins_encode( long_mod(src1,src2) ); 7707 ins_pipe( pipe_slow ); 7708 %} 7709 7710 // Divide Register Long (no special case since divisor != -1) 7711 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7712 match(Set dst (DivL dst imm)); 7713 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7714 ins_cost(1000); 7715 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7716 "XOR $tmp2,$tmp2\n\t" 7717 "CMP $tmp,EDX\n\t" 7718 "JA,s fast\n\t" 7719 "MOV $tmp2,EAX\n\t" 7720 "MOV EAX,EDX\n\t" 7721 "MOV EDX,0\n\t" 7722 "JLE,s pos\n\t" 7723 "LNEG EAX : $tmp2\n\t" 7724 "DIV $tmp # unsigned division\n\t" 7725 "XCHG EAX,$tmp2\n\t" 7726 "DIV $tmp\n\t" 7727 "LNEG $tmp2 : EAX\n\t" 7728 "JMP,s done\n" 7729 "pos:\n\t" 7730 "DIV $tmp\n\t" 7731 "XCHG EAX,$tmp2\n" 7732 "fast:\n\t" 7733 "DIV $tmp\n" 7734 "done:\n\t" 7735 "MOV EDX,$tmp2\n\t" 7736 "NEG EDX:EAX # if $imm < 0" %} 7737 ins_encode %{ 7738 int con = (int)$imm$$constant; 7739 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7740 int pcon = (con > 0) ? con : -con; 7741 Label Lfast, Lpos, Ldone; 7742 7743 __ movl($tmp$$Register, pcon); 7744 __ xorl($tmp2$$Register,$tmp2$$Register); 7745 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7746 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7747 7748 __ movl($tmp2$$Register, $dst$$Register); // save 7749 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7750 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7751 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7752 7753 // Negative dividend. 7754 // convert value to positive to use unsigned division 7755 __ lneg($dst$$Register, $tmp2$$Register); 7756 __ divl($tmp$$Register); 7757 __ xchgl($dst$$Register, $tmp2$$Register); 7758 __ divl($tmp$$Register); 7759 // revert result back to negative 7760 __ lneg($tmp2$$Register, $dst$$Register); 7761 __ jmpb(Ldone); 7762 7763 __ bind(Lpos); 7764 __ divl($tmp$$Register); // Use unsigned division 7765 __ xchgl($dst$$Register, $tmp2$$Register); 7766 // Fallthrow for final divide, tmp2 has 32 bit hi result 7767 7768 __ bind(Lfast); 7769 // fast path: src is positive 7770 __ divl($tmp$$Register); // Use unsigned division 7771 7772 __ bind(Ldone); 7773 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7774 if (con < 0) { 7775 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7776 } 7777 %} 7778 ins_pipe( pipe_slow ); 7779 %} 7780 7781 // Remainder Register Long (remainder fit into 32 bits) 7782 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7783 match(Set dst (ModL dst imm)); 7784 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7785 ins_cost(1000); 7786 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7787 "CMP $tmp,EDX\n\t" 7788 "JA,s fast\n\t" 7789 "MOV $tmp2,EAX\n\t" 7790 "MOV EAX,EDX\n\t" 7791 "MOV EDX,0\n\t" 7792 "JLE,s pos\n\t" 7793 "LNEG EAX : $tmp2\n\t" 7794 "DIV $tmp # unsigned division\n\t" 7795 "MOV EAX,$tmp2\n\t" 7796 "DIV $tmp\n\t" 7797 "NEG EDX\n\t" 7798 "JMP,s done\n" 7799 "pos:\n\t" 7800 "DIV $tmp\n\t" 7801 "MOV EAX,$tmp2\n" 7802 "fast:\n\t" 7803 "DIV $tmp\n" 7804 "done:\n\t" 7805 "MOV EAX,EDX\n\t" 7806 "SAR EDX,31\n\t" %} 7807 ins_encode %{ 7808 int con = (int)$imm$$constant; 7809 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7810 int pcon = (con > 0) ? con : -con; 7811 Label Lfast, Lpos, Ldone; 7812 7813 __ movl($tmp$$Register, pcon); 7814 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7815 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7816 7817 __ movl($tmp2$$Register, $dst$$Register); // save 7818 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7819 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7820 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7821 7822 // Negative dividend. 7823 // convert value to positive to use unsigned division 7824 __ lneg($dst$$Register, $tmp2$$Register); 7825 __ divl($tmp$$Register); 7826 __ movl($dst$$Register, $tmp2$$Register); 7827 __ divl($tmp$$Register); 7828 // revert remainder back to negative 7829 __ negl(HIGH_FROM_LOW($dst$$Register)); 7830 __ jmpb(Ldone); 7831 7832 __ bind(Lpos); 7833 __ divl($tmp$$Register); 7834 __ movl($dst$$Register, $tmp2$$Register); 7835 7836 __ bind(Lfast); 7837 // fast path: src is positive 7838 __ divl($tmp$$Register); 7839 7840 __ bind(Ldone); 7841 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7842 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7843 7844 %} 7845 ins_pipe( pipe_slow ); 7846 %} 7847 7848 // Integer Shift Instructions 7849 // Shift Left by one 7850 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7851 match(Set dst (LShiftI dst shift)); 7852 effect(KILL cr); 7853 7854 size(2); 7855 format %{ "SHL $dst,$shift" %} 7856 opcode(0xD1, 0x4); /* D1 /4 */ 7857 ins_encode( OpcP, RegOpc( dst ) ); 7858 ins_pipe( ialu_reg ); 7859 %} 7860 7861 // Shift Left by 8-bit immediate 7862 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7863 match(Set dst (LShiftI dst shift)); 7864 effect(KILL cr); 7865 7866 size(3); 7867 format %{ "SHL $dst,$shift" %} 7868 opcode(0xC1, 0x4); /* C1 /4 ib */ 7869 ins_encode( RegOpcImm( dst, shift) ); 7870 ins_pipe( ialu_reg ); 7871 %} 7872 7873 // Shift Left by variable 7874 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7875 match(Set dst (LShiftI dst shift)); 7876 effect(KILL cr); 7877 7878 size(2); 7879 format %{ "SHL $dst,$shift" %} 7880 opcode(0xD3, 0x4); /* D3 /4 */ 7881 ins_encode( OpcP, RegOpc( dst ) ); 7882 ins_pipe( ialu_reg_reg ); 7883 %} 7884 7885 // Arithmetic shift right by one 7886 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7887 match(Set dst (RShiftI dst shift)); 7888 effect(KILL cr); 7889 7890 size(2); 7891 format %{ "SAR $dst,$shift" %} 7892 opcode(0xD1, 0x7); /* D1 /7 */ 7893 ins_encode( OpcP, RegOpc( dst ) ); 7894 ins_pipe( ialu_reg ); 7895 %} 7896 7897 // Arithmetic shift right by one 7898 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 7899 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7900 effect(KILL cr); 7901 format %{ "SAR $dst,$shift" %} 7902 opcode(0xD1, 0x7); /* D1 /7 */ 7903 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark ); 7904 ins_pipe( ialu_mem_imm ); 7905 %} 7906 7907 // Arithmetic Shift Right by 8-bit immediate 7908 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7909 match(Set dst (RShiftI dst shift)); 7910 effect(KILL cr); 7911 7912 size(3); 7913 format %{ "SAR $dst,$shift" %} 7914 opcode(0xC1, 0x7); /* C1 /7 ib */ 7915 ins_encode( RegOpcImm( dst, shift ) ); 7916 ins_pipe( ialu_mem_imm ); 7917 %} 7918 7919 // Arithmetic Shift Right by 8-bit immediate 7920 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7921 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7922 effect(KILL cr); 7923 7924 format %{ "SAR $dst,$shift" %} 7925 opcode(0xC1, 0x7); /* C1 /7 ib */ 7926 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark ); 7927 ins_pipe( ialu_mem_imm ); 7928 %} 7929 7930 // Arithmetic Shift Right by variable 7931 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7932 match(Set dst (RShiftI dst shift)); 7933 effect(KILL cr); 7934 7935 size(2); 7936 format %{ "SAR $dst,$shift" %} 7937 opcode(0xD3, 0x7); /* D3 /7 */ 7938 ins_encode( OpcP, RegOpc( dst ) ); 7939 ins_pipe( ialu_reg_reg ); 7940 %} 7941 7942 // Logical shift right by one 7943 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7944 match(Set dst (URShiftI dst shift)); 7945 effect(KILL cr); 7946 7947 size(2); 7948 format %{ "SHR $dst,$shift" %} 7949 opcode(0xD1, 0x5); /* D1 /5 */ 7950 ins_encode( OpcP, RegOpc( dst ) ); 7951 ins_pipe( ialu_reg ); 7952 %} 7953 7954 // Logical Shift Right by 8-bit immediate 7955 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7956 match(Set dst (URShiftI dst shift)); 7957 effect(KILL cr); 7958 7959 size(3); 7960 format %{ "SHR $dst,$shift" %} 7961 opcode(0xC1, 0x5); /* C1 /5 ib */ 7962 ins_encode( RegOpcImm( dst, shift) ); 7963 ins_pipe( ialu_reg ); 7964 %} 7965 7966 7967 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7968 // This idiom is used by the compiler for the i2b bytecode. 7969 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7970 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7971 7972 size(3); 7973 format %{ "MOVSX $dst,$src :8" %} 7974 ins_encode %{ 7975 __ movsbl($dst$$Register, $src$$Register); 7976 %} 7977 ins_pipe(ialu_reg_reg); 7978 %} 7979 7980 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7981 // This idiom is used by the compiler the i2s bytecode. 7982 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7983 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7984 7985 size(3); 7986 format %{ "MOVSX $dst,$src :16" %} 7987 ins_encode %{ 7988 __ movswl($dst$$Register, $src$$Register); 7989 %} 7990 ins_pipe(ialu_reg_reg); 7991 %} 7992 7993 7994 // Logical Shift Right by variable 7995 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7996 match(Set dst (URShiftI dst shift)); 7997 effect(KILL cr); 7998 7999 size(2); 8000 format %{ "SHR $dst,$shift" %} 8001 opcode(0xD3, 0x5); /* D3 /5 */ 8002 ins_encode( OpcP, RegOpc( dst ) ); 8003 ins_pipe( ialu_reg_reg ); 8004 %} 8005 8006 8007 //----------Logical Instructions----------------------------------------------- 8008 //----------Integer Logical Instructions--------------------------------------- 8009 // And Instructions 8010 // And Register with Register 8011 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8012 match(Set dst (AndI dst src)); 8013 effect(KILL cr); 8014 8015 size(2); 8016 format %{ "AND $dst,$src" %} 8017 opcode(0x23); 8018 ins_encode( OpcP, RegReg( dst, src) ); 8019 ins_pipe( ialu_reg_reg ); 8020 %} 8021 8022 // And Register with Immediate 8023 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8024 match(Set dst (AndI dst src)); 8025 effect(KILL cr); 8026 8027 format %{ "AND $dst,$src" %} 8028 opcode(0x81,0x04); /* Opcode 81 /4 */ 8029 // ins_encode( RegImm( dst, src) ); 8030 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8031 ins_pipe( ialu_reg ); 8032 %} 8033 8034 // And Register with Memory 8035 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8036 match(Set dst (AndI dst (LoadI src))); 8037 effect(KILL cr); 8038 8039 ins_cost(150); 8040 format %{ "AND $dst,$src" %} 8041 opcode(0x23); 8042 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 8043 ins_pipe( ialu_reg_mem ); 8044 %} 8045 8046 // And Memory with Register 8047 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8048 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8049 effect(KILL cr); 8050 8051 ins_cost(150); 8052 format %{ "AND $dst,$src" %} 8053 opcode(0x21); /* Opcode 21 /r */ 8054 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8055 ins_pipe( ialu_mem_reg ); 8056 %} 8057 8058 // And Memory with Immediate 8059 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8060 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8061 effect(KILL cr); 8062 8063 ins_cost(125); 8064 format %{ "AND $dst,$src" %} 8065 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8066 // ins_encode( MemImm( dst, src) ); 8067 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8068 ins_pipe( ialu_mem_imm ); 8069 %} 8070 8071 // BMI1 instructions 8072 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8073 match(Set dst (AndI (XorI src1 minus_1) src2)); 8074 predicate(UseBMI1Instructions); 8075 effect(KILL cr); 8076 8077 format %{ "ANDNL $dst, $src1, $src2" %} 8078 8079 ins_encode %{ 8080 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8081 %} 8082 ins_pipe(ialu_reg); 8083 %} 8084 8085 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8086 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8087 predicate(UseBMI1Instructions); 8088 effect(KILL cr); 8089 8090 ins_cost(125); 8091 format %{ "ANDNL $dst, $src1, $src2" %} 8092 8093 ins_encode %{ 8094 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8095 %} 8096 ins_pipe(ialu_reg_mem); 8097 %} 8098 8099 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8100 match(Set dst (AndI (SubI imm_zero src) src)); 8101 predicate(UseBMI1Instructions); 8102 effect(KILL cr); 8103 8104 format %{ "BLSIL $dst, $src" %} 8105 8106 ins_encode %{ 8107 __ blsil($dst$$Register, $src$$Register); 8108 %} 8109 ins_pipe(ialu_reg); 8110 %} 8111 8112 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8113 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8114 predicate(UseBMI1Instructions); 8115 effect(KILL cr); 8116 8117 ins_cost(125); 8118 format %{ "BLSIL $dst, $src" %} 8119 8120 ins_encode %{ 8121 __ blsil($dst$$Register, $src$$Address); 8122 %} 8123 ins_pipe(ialu_reg_mem); 8124 %} 8125 8126 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8127 %{ 8128 match(Set dst (XorI (AddI src minus_1) src)); 8129 predicate(UseBMI1Instructions); 8130 effect(KILL cr); 8131 8132 format %{ "BLSMSKL $dst, $src" %} 8133 8134 ins_encode %{ 8135 __ blsmskl($dst$$Register, $src$$Register); 8136 %} 8137 8138 ins_pipe(ialu_reg); 8139 %} 8140 8141 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8142 %{ 8143 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8144 predicate(UseBMI1Instructions); 8145 effect(KILL cr); 8146 8147 ins_cost(125); 8148 format %{ "BLSMSKL $dst, $src" %} 8149 8150 ins_encode %{ 8151 __ blsmskl($dst$$Register, $src$$Address); 8152 %} 8153 8154 ins_pipe(ialu_reg_mem); 8155 %} 8156 8157 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8158 %{ 8159 match(Set dst (AndI (AddI src minus_1) src) ); 8160 predicate(UseBMI1Instructions); 8161 effect(KILL cr); 8162 8163 format %{ "BLSRL $dst, $src" %} 8164 8165 ins_encode %{ 8166 __ blsrl($dst$$Register, $src$$Register); 8167 %} 8168 8169 ins_pipe(ialu_reg); 8170 %} 8171 8172 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8173 %{ 8174 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8175 predicate(UseBMI1Instructions); 8176 effect(KILL cr); 8177 8178 ins_cost(125); 8179 format %{ "BLSRL $dst, $src" %} 8180 8181 ins_encode %{ 8182 __ blsrl($dst$$Register, $src$$Address); 8183 %} 8184 8185 ins_pipe(ialu_reg_mem); 8186 %} 8187 8188 // Or Instructions 8189 // Or Register with Register 8190 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8191 match(Set dst (OrI dst src)); 8192 effect(KILL cr); 8193 8194 size(2); 8195 format %{ "OR $dst,$src" %} 8196 opcode(0x0B); 8197 ins_encode( OpcP, RegReg( dst, src) ); 8198 ins_pipe( ialu_reg_reg ); 8199 %} 8200 8201 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8202 match(Set dst (OrI dst (CastP2X src))); 8203 effect(KILL cr); 8204 8205 size(2); 8206 format %{ "OR $dst,$src" %} 8207 opcode(0x0B); 8208 ins_encode( OpcP, RegReg( dst, src) ); 8209 ins_pipe( ialu_reg_reg ); 8210 %} 8211 8212 8213 // Or Register with Immediate 8214 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8215 match(Set dst (OrI dst src)); 8216 effect(KILL cr); 8217 8218 format %{ "OR $dst,$src" %} 8219 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8220 // ins_encode( RegImm( dst, src) ); 8221 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8222 ins_pipe( ialu_reg ); 8223 %} 8224 8225 // Or Register with Memory 8226 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8227 match(Set dst (OrI dst (LoadI src))); 8228 effect(KILL cr); 8229 8230 ins_cost(150); 8231 format %{ "OR $dst,$src" %} 8232 opcode(0x0B); 8233 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 8234 ins_pipe( ialu_reg_mem ); 8235 %} 8236 8237 // Or Memory with Register 8238 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8239 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8240 effect(KILL cr); 8241 8242 ins_cost(150); 8243 format %{ "OR $dst,$src" %} 8244 opcode(0x09); /* Opcode 09 /r */ 8245 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8246 ins_pipe( ialu_mem_reg ); 8247 %} 8248 8249 // Or Memory with Immediate 8250 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8251 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8252 effect(KILL cr); 8253 8254 ins_cost(125); 8255 format %{ "OR $dst,$src" %} 8256 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8257 // ins_encode( MemImm( dst, src) ); 8258 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8259 ins_pipe( ialu_mem_imm ); 8260 %} 8261 8262 // ROL/ROR 8263 // ROL expand 8264 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8265 effect(USE_DEF dst, USE shift, KILL cr); 8266 8267 format %{ "ROL $dst, $shift" %} 8268 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8269 ins_encode( OpcP, RegOpc( dst )); 8270 ins_pipe( ialu_reg ); 8271 %} 8272 8273 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8274 effect(USE_DEF dst, USE shift, KILL cr); 8275 8276 format %{ "ROL $dst, $shift" %} 8277 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8278 ins_encode( RegOpcImm(dst, shift) ); 8279 ins_pipe(ialu_reg); 8280 %} 8281 8282 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8283 effect(USE_DEF dst, USE shift, KILL cr); 8284 8285 format %{ "ROL $dst, $shift" %} 8286 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8287 ins_encode(OpcP, RegOpc(dst)); 8288 ins_pipe( ialu_reg_reg ); 8289 %} 8290 // end of ROL expand 8291 8292 // ROL 32bit by one once 8293 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8294 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8295 8296 expand %{ 8297 rolI_eReg_imm1(dst, lshift, cr); 8298 %} 8299 %} 8300 8301 // ROL 32bit var by imm8 once 8302 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8303 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8304 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8305 8306 expand %{ 8307 rolI_eReg_imm8(dst, lshift, cr); 8308 %} 8309 %} 8310 8311 // ROL 32bit var by var once 8312 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8313 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8314 8315 expand %{ 8316 rolI_eReg_CL(dst, shift, cr); 8317 %} 8318 %} 8319 8320 // ROL 32bit var by var once 8321 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8322 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8323 8324 expand %{ 8325 rolI_eReg_CL(dst, shift, cr); 8326 %} 8327 %} 8328 8329 // ROR expand 8330 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8331 effect(USE_DEF dst, USE shift, KILL cr); 8332 8333 format %{ "ROR $dst, $shift" %} 8334 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8335 ins_encode( OpcP, RegOpc( dst ) ); 8336 ins_pipe( ialu_reg ); 8337 %} 8338 8339 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8340 effect (USE_DEF dst, USE shift, KILL cr); 8341 8342 format %{ "ROR $dst, $shift" %} 8343 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8344 ins_encode( RegOpcImm(dst, shift) ); 8345 ins_pipe( ialu_reg ); 8346 %} 8347 8348 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8349 effect(USE_DEF dst, USE shift, KILL cr); 8350 8351 format %{ "ROR $dst, $shift" %} 8352 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8353 ins_encode(OpcP, RegOpc(dst)); 8354 ins_pipe( ialu_reg_reg ); 8355 %} 8356 // end of ROR expand 8357 8358 // ROR right once 8359 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8360 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8361 8362 expand %{ 8363 rorI_eReg_imm1(dst, rshift, cr); 8364 %} 8365 %} 8366 8367 // ROR 32bit by immI8 once 8368 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8369 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8370 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8371 8372 expand %{ 8373 rorI_eReg_imm8(dst, rshift, cr); 8374 %} 8375 %} 8376 8377 // ROR 32bit var by var once 8378 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8379 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8380 8381 expand %{ 8382 rorI_eReg_CL(dst, shift, cr); 8383 %} 8384 %} 8385 8386 // ROR 32bit var by var once 8387 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8388 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8389 8390 expand %{ 8391 rorI_eReg_CL(dst, shift, cr); 8392 %} 8393 %} 8394 8395 // Xor Instructions 8396 // Xor Register with Register 8397 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8398 match(Set dst (XorI dst src)); 8399 effect(KILL cr); 8400 8401 size(2); 8402 format %{ "XOR $dst,$src" %} 8403 opcode(0x33); 8404 ins_encode( OpcP, RegReg( dst, src) ); 8405 ins_pipe( ialu_reg_reg ); 8406 %} 8407 8408 // Xor Register with Immediate -1 8409 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8410 match(Set dst (XorI dst imm)); 8411 8412 size(2); 8413 format %{ "NOT $dst" %} 8414 ins_encode %{ 8415 __ notl($dst$$Register); 8416 %} 8417 ins_pipe( ialu_reg ); 8418 %} 8419 8420 // Xor Register with Immediate 8421 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8422 match(Set dst (XorI dst src)); 8423 effect(KILL cr); 8424 8425 format %{ "XOR $dst,$src" %} 8426 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8427 // ins_encode( RegImm( dst, src) ); 8428 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8429 ins_pipe( ialu_reg ); 8430 %} 8431 8432 // Xor Register with Memory 8433 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8434 match(Set dst (XorI dst (LoadI src))); 8435 effect(KILL cr); 8436 8437 ins_cost(150); 8438 format %{ "XOR $dst,$src" %} 8439 opcode(0x33); 8440 ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark ); 8441 ins_pipe( ialu_reg_mem ); 8442 %} 8443 8444 // Xor Memory with Register 8445 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8446 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8447 effect(KILL cr); 8448 8449 ins_cost(150); 8450 format %{ "XOR $dst,$src" %} 8451 opcode(0x31); /* Opcode 31 /r */ 8452 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8453 ins_pipe( ialu_mem_reg ); 8454 %} 8455 8456 // Xor Memory with Immediate 8457 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8458 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8459 effect(KILL cr); 8460 8461 ins_cost(125); 8462 format %{ "XOR $dst,$src" %} 8463 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8464 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8465 ins_pipe( ialu_mem_imm ); 8466 %} 8467 8468 //----------Convert Int to Boolean--------------------------------------------- 8469 8470 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8471 effect( DEF dst, USE src ); 8472 format %{ "MOV $dst,$src" %} 8473 ins_encode( enc_Copy( dst, src) ); 8474 ins_pipe( ialu_reg_reg ); 8475 %} 8476 8477 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8478 effect( USE_DEF dst, USE src, KILL cr ); 8479 8480 size(4); 8481 format %{ "NEG $dst\n\t" 8482 "ADC $dst,$src" %} 8483 ins_encode( neg_reg(dst), 8484 OpcRegReg(0x13,dst,src) ); 8485 ins_pipe( ialu_reg_reg_long ); 8486 %} 8487 8488 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8489 match(Set dst (Conv2B src)); 8490 8491 expand %{ 8492 movI_nocopy(dst,src); 8493 ci2b(dst,src,cr); 8494 %} 8495 %} 8496 8497 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8498 effect( DEF dst, USE src ); 8499 format %{ "MOV $dst,$src" %} 8500 ins_encode( enc_Copy( dst, src) ); 8501 ins_pipe( ialu_reg_reg ); 8502 %} 8503 8504 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8505 effect( USE_DEF dst, USE src, KILL cr ); 8506 format %{ "NEG $dst\n\t" 8507 "ADC $dst,$src" %} 8508 ins_encode( neg_reg(dst), 8509 OpcRegReg(0x13,dst,src) ); 8510 ins_pipe( ialu_reg_reg_long ); 8511 %} 8512 8513 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8514 match(Set dst (Conv2B src)); 8515 8516 expand %{ 8517 movP_nocopy(dst,src); 8518 cp2b(dst,src,cr); 8519 %} 8520 %} 8521 8522 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8523 match(Set dst (CmpLTMask p q)); 8524 effect(KILL cr); 8525 ins_cost(400); 8526 8527 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8528 format %{ "XOR $dst,$dst\n\t" 8529 "CMP $p,$q\n\t" 8530 "SETlt $dst\n\t" 8531 "NEG $dst" %} 8532 ins_encode %{ 8533 Register Rp = $p$$Register; 8534 Register Rq = $q$$Register; 8535 Register Rd = $dst$$Register; 8536 Label done; 8537 __ xorl(Rd, Rd); 8538 __ cmpl(Rp, Rq); 8539 __ setb(Assembler::less, Rd); 8540 __ negl(Rd); 8541 %} 8542 8543 ins_pipe(pipe_slow); 8544 %} 8545 8546 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8547 match(Set dst (CmpLTMask dst zero)); 8548 effect(DEF dst, KILL cr); 8549 ins_cost(100); 8550 8551 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8552 ins_encode %{ 8553 __ sarl($dst$$Register, 31); 8554 %} 8555 ins_pipe(ialu_reg); 8556 %} 8557 8558 /* better to save a register than avoid a branch */ 8559 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8560 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8561 effect(KILL cr); 8562 ins_cost(400); 8563 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8564 "JGE done\n\t" 8565 "ADD $p,$y\n" 8566 "done: " %} 8567 ins_encode %{ 8568 Register Rp = $p$$Register; 8569 Register Rq = $q$$Register; 8570 Register Ry = $y$$Register; 8571 Label done; 8572 __ subl(Rp, Rq); 8573 __ jccb(Assembler::greaterEqual, done); 8574 __ addl(Rp, Ry); 8575 __ bind(done); 8576 %} 8577 8578 ins_pipe(pipe_cmplt); 8579 %} 8580 8581 /* better to save a register than avoid a branch */ 8582 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8583 match(Set y (AndI (CmpLTMask p q) y)); 8584 effect(KILL cr); 8585 8586 ins_cost(300); 8587 8588 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8589 "JLT done\n\t" 8590 "XORL $y, $y\n" 8591 "done: " %} 8592 ins_encode %{ 8593 Register Rp = $p$$Register; 8594 Register Rq = $q$$Register; 8595 Register Ry = $y$$Register; 8596 Label done; 8597 __ cmpl(Rp, Rq); 8598 __ jccb(Assembler::less, done); 8599 __ xorl(Ry, Ry); 8600 __ bind(done); 8601 %} 8602 8603 ins_pipe(pipe_cmplt); 8604 %} 8605 8606 /* If I enable this, I encourage spilling in the inner loop of compress. 8607 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8608 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8609 */ 8610 //----------Overflow Math Instructions----------------------------------------- 8611 8612 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8613 %{ 8614 match(Set cr (OverflowAddI op1 op2)); 8615 effect(DEF cr, USE_KILL op1, USE op2); 8616 8617 format %{ "ADD $op1, $op2\t# overflow check int" %} 8618 8619 ins_encode %{ 8620 __ addl($op1$$Register, $op2$$Register); 8621 %} 8622 ins_pipe(ialu_reg_reg); 8623 %} 8624 8625 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8626 %{ 8627 match(Set cr (OverflowAddI op1 op2)); 8628 effect(DEF cr, USE_KILL op1, USE op2); 8629 8630 format %{ "ADD $op1, $op2\t# overflow check int" %} 8631 8632 ins_encode %{ 8633 __ addl($op1$$Register, $op2$$constant); 8634 %} 8635 ins_pipe(ialu_reg_reg); 8636 %} 8637 8638 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8639 %{ 8640 match(Set cr (OverflowSubI op1 op2)); 8641 8642 format %{ "CMP $op1, $op2\t# overflow check int" %} 8643 ins_encode %{ 8644 __ cmpl($op1$$Register, $op2$$Register); 8645 %} 8646 ins_pipe(ialu_reg_reg); 8647 %} 8648 8649 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8650 %{ 8651 match(Set cr (OverflowSubI op1 op2)); 8652 8653 format %{ "CMP $op1, $op2\t# overflow check int" %} 8654 ins_encode %{ 8655 __ cmpl($op1$$Register, $op2$$constant); 8656 %} 8657 ins_pipe(ialu_reg_reg); 8658 %} 8659 8660 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8661 %{ 8662 match(Set cr (OverflowSubI zero op2)); 8663 effect(DEF cr, USE_KILL op2); 8664 8665 format %{ "NEG $op2\t# overflow check int" %} 8666 ins_encode %{ 8667 __ negl($op2$$Register); 8668 %} 8669 ins_pipe(ialu_reg_reg); 8670 %} 8671 8672 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8673 %{ 8674 match(Set cr (OverflowMulI op1 op2)); 8675 effect(DEF cr, USE_KILL op1, USE op2); 8676 8677 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8678 ins_encode %{ 8679 __ imull($op1$$Register, $op2$$Register); 8680 %} 8681 ins_pipe(ialu_reg_reg_alu0); 8682 %} 8683 8684 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8685 %{ 8686 match(Set cr (OverflowMulI op1 op2)); 8687 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8688 8689 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8690 ins_encode %{ 8691 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8692 %} 8693 ins_pipe(ialu_reg_reg_alu0); 8694 %} 8695 8696 // Integer Absolute Instructions 8697 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8698 %{ 8699 match(Set dst (AbsI src)); 8700 effect(TEMP dst, TEMP tmp, KILL cr); 8701 format %{ "movl $tmp, $src\n\t" 8702 "sarl $tmp, 31\n\t" 8703 "movl $dst, $src\n\t" 8704 "xorl $dst, $tmp\n\t" 8705 "subl $dst, $tmp\n" 8706 %} 8707 ins_encode %{ 8708 __ movl($tmp$$Register, $src$$Register); 8709 __ sarl($tmp$$Register, 31); 8710 __ movl($dst$$Register, $src$$Register); 8711 __ xorl($dst$$Register, $tmp$$Register); 8712 __ subl($dst$$Register, $tmp$$Register); 8713 %} 8714 8715 ins_pipe(ialu_reg_reg); 8716 %} 8717 8718 //----------Long Instructions------------------------------------------------ 8719 // Add Long Register with Register 8720 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8721 match(Set dst (AddL dst src)); 8722 effect(KILL cr); 8723 ins_cost(200); 8724 format %{ "ADD $dst.lo,$src.lo\n\t" 8725 "ADC $dst.hi,$src.hi" %} 8726 opcode(0x03, 0x13); 8727 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8728 ins_pipe( ialu_reg_reg_long ); 8729 %} 8730 8731 // Add Long Register with Immediate 8732 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8733 match(Set dst (AddL dst src)); 8734 effect(KILL cr); 8735 format %{ "ADD $dst.lo,$src.lo\n\t" 8736 "ADC $dst.hi,$src.hi" %} 8737 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8738 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8739 ins_pipe( ialu_reg_long ); 8740 %} 8741 8742 // Add Long Register with Memory 8743 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8744 match(Set dst (AddL dst (LoadL mem))); 8745 effect(KILL cr); 8746 ins_cost(125); 8747 format %{ "ADD $dst.lo,$mem\n\t" 8748 "ADC $dst.hi,$mem+4" %} 8749 opcode(0x03, 0x13); 8750 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8751 ins_pipe( ialu_reg_long_mem ); 8752 %} 8753 8754 // Subtract Long Register with Register. 8755 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8756 match(Set dst (SubL dst src)); 8757 effect(KILL cr); 8758 ins_cost(200); 8759 format %{ "SUB $dst.lo,$src.lo\n\t" 8760 "SBB $dst.hi,$src.hi" %} 8761 opcode(0x2B, 0x1B); 8762 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8763 ins_pipe( ialu_reg_reg_long ); 8764 %} 8765 8766 // Subtract Long Register with Immediate 8767 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8768 match(Set dst (SubL dst src)); 8769 effect(KILL cr); 8770 format %{ "SUB $dst.lo,$src.lo\n\t" 8771 "SBB $dst.hi,$src.hi" %} 8772 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8773 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8774 ins_pipe( ialu_reg_long ); 8775 %} 8776 8777 // Subtract Long Register with Memory 8778 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8779 match(Set dst (SubL dst (LoadL mem))); 8780 effect(KILL cr); 8781 ins_cost(125); 8782 format %{ "SUB $dst.lo,$mem\n\t" 8783 "SBB $dst.hi,$mem+4" %} 8784 opcode(0x2B, 0x1B); 8785 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8786 ins_pipe( ialu_reg_long_mem ); 8787 %} 8788 8789 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8790 match(Set dst (SubL zero dst)); 8791 effect(KILL cr); 8792 ins_cost(300); 8793 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8794 ins_encode( neg_long(dst) ); 8795 ins_pipe( ialu_reg_reg_long ); 8796 %} 8797 8798 // And Long Register with Register 8799 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8800 match(Set dst (AndL dst src)); 8801 effect(KILL cr); 8802 format %{ "AND $dst.lo,$src.lo\n\t" 8803 "AND $dst.hi,$src.hi" %} 8804 opcode(0x23,0x23); 8805 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8806 ins_pipe( ialu_reg_reg_long ); 8807 %} 8808 8809 // And Long Register with Immediate 8810 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8811 match(Set dst (AndL dst src)); 8812 effect(KILL cr); 8813 format %{ "AND $dst.lo,$src.lo\n\t" 8814 "AND $dst.hi,$src.hi" %} 8815 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8816 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8817 ins_pipe( ialu_reg_long ); 8818 %} 8819 8820 // And Long Register with Memory 8821 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8822 match(Set dst (AndL dst (LoadL mem))); 8823 effect(KILL cr); 8824 ins_cost(125); 8825 format %{ "AND $dst.lo,$mem\n\t" 8826 "AND $dst.hi,$mem+4" %} 8827 opcode(0x23, 0x23); 8828 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8829 ins_pipe( ialu_reg_long_mem ); 8830 %} 8831 8832 // BMI1 instructions 8833 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8834 match(Set dst (AndL (XorL src1 minus_1) src2)); 8835 predicate(UseBMI1Instructions); 8836 effect(KILL cr, TEMP dst); 8837 8838 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8839 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8840 %} 8841 8842 ins_encode %{ 8843 Register Rdst = $dst$$Register; 8844 Register Rsrc1 = $src1$$Register; 8845 Register Rsrc2 = $src2$$Register; 8846 __ andnl(Rdst, Rsrc1, Rsrc2); 8847 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8848 %} 8849 ins_pipe(ialu_reg_reg_long); 8850 %} 8851 8852 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8853 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8854 predicate(UseBMI1Instructions); 8855 effect(KILL cr, TEMP dst); 8856 8857 ins_cost(125); 8858 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8859 "ANDNL $dst.hi, $src1.hi, $src2+4" 8860 %} 8861 8862 ins_encode %{ 8863 Register Rdst = $dst$$Register; 8864 Register Rsrc1 = $src1$$Register; 8865 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8866 8867 __ andnl(Rdst, Rsrc1, $src2$$Address); 8868 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8869 %} 8870 ins_pipe(ialu_reg_mem); 8871 %} 8872 8873 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8874 match(Set dst (AndL (SubL imm_zero src) src)); 8875 predicate(UseBMI1Instructions); 8876 effect(KILL cr, TEMP dst); 8877 8878 format %{ "MOVL $dst.hi, 0\n\t" 8879 "BLSIL $dst.lo, $src.lo\n\t" 8880 "JNZ done\n\t" 8881 "BLSIL $dst.hi, $src.hi\n" 8882 "done:" 8883 %} 8884 8885 ins_encode %{ 8886 Label done; 8887 Register Rdst = $dst$$Register; 8888 Register Rsrc = $src$$Register; 8889 __ movl(HIGH_FROM_LOW(Rdst), 0); 8890 __ blsil(Rdst, Rsrc); 8891 __ jccb(Assembler::notZero, done); 8892 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8893 __ bind(done); 8894 %} 8895 ins_pipe(ialu_reg); 8896 %} 8897 8898 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8899 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8900 predicate(UseBMI1Instructions); 8901 effect(KILL cr, TEMP dst); 8902 8903 ins_cost(125); 8904 format %{ "MOVL $dst.hi, 0\n\t" 8905 "BLSIL $dst.lo, $src\n\t" 8906 "JNZ done\n\t" 8907 "BLSIL $dst.hi, $src+4\n" 8908 "done:" 8909 %} 8910 8911 ins_encode %{ 8912 Label done; 8913 Register Rdst = $dst$$Register; 8914 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8915 8916 __ movl(HIGH_FROM_LOW(Rdst), 0); 8917 __ blsil(Rdst, $src$$Address); 8918 __ jccb(Assembler::notZero, done); 8919 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8920 __ bind(done); 8921 %} 8922 ins_pipe(ialu_reg_mem); 8923 %} 8924 8925 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8926 %{ 8927 match(Set dst (XorL (AddL src minus_1) src)); 8928 predicate(UseBMI1Instructions); 8929 effect(KILL cr, TEMP dst); 8930 8931 format %{ "MOVL $dst.hi, 0\n\t" 8932 "BLSMSKL $dst.lo, $src.lo\n\t" 8933 "JNC done\n\t" 8934 "BLSMSKL $dst.hi, $src.hi\n" 8935 "done:" 8936 %} 8937 8938 ins_encode %{ 8939 Label done; 8940 Register Rdst = $dst$$Register; 8941 Register Rsrc = $src$$Register; 8942 __ movl(HIGH_FROM_LOW(Rdst), 0); 8943 __ blsmskl(Rdst, Rsrc); 8944 __ jccb(Assembler::carryClear, done); 8945 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8946 __ bind(done); 8947 %} 8948 8949 ins_pipe(ialu_reg); 8950 %} 8951 8952 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8953 %{ 8954 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8955 predicate(UseBMI1Instructions); 8956 effect(KILL cr, TEMP dst); 8957 8958 ins_cost(125); 8959 format %{ "MOVL $dst.hi, 0\n\t" 8960 "BLSMSKL $dst.lo, $src\n\t" 8961 "JNC done\n\t" 8962 "BLSMSKL $dst.hi, $src+4\n" 8963 "done:" 8964 %} 8965 8966 ins_encode %{ 8967 Label done; 8968 Register Rdst = $dst$$Register; 8969 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8970 8971 __ movl(HIGH_FROM_LOW(Rdst), 0); 8972 __ blsmskl(Rdst, $src$$Address); 8973 __ jccb(Assembler::carryClear, done); 8974 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8975 __ bind(done); 8976 %} 8977 8978 ins_pipe(ialu_reg_mem); 8979 %} 8980 8981 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8982 %{ 8983 match(Set dst (AndL (AddL src minus_1) src) ); 8984 predicate(UseBMI1Instructions); 8985 effect(KILL cr, TEMP dst); 8986 8987 format %{ "MOVL $dst.hi, $src.hi\n\t" 8988 "BLSRL $dst.lo, $src.lo\n\t" 8989 "JNC done\n\t" 8990 "BLSRL $dst.hi, $src.hi\n" 8991 "done:" 8992 %} 8993 8994 ins_encode %{ 8995 Label done; 8996 Register Rdst = $dst$$Register; 8997 Register Rsrc = $src$$Register; 8998 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8999 __ blsrl(Rdst, Rsrc); 9000 __ jccb(Assembler::carryClear, done); 9001 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9002 __ bind(done); 9003 %} 9004 9005 ins_pipe(ialu_reg); 9006 %} 9007 9008 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9009 %{ 9010 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9011 predicate(UseBMI1Instructions); 9012 effect(KILL cr, TEMP dst); 9013 9014 ins_cost(125); 9015 format %{ "MOVL $dst.hi, $src+4\n\t" 9016 "BLSRL $dst.lo, $src\n\t" 9017 "JNC done\n\t" 9018 "BLSRL $dst.hi, $src+4\n" 9019 "done:" 9020 %} 9021 9022 ins_encode %{ 9023 Label done; 9024 Register Rdst = $dst$$Register; 9025 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9026 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9027 __ blsrl(Rdst, $src$$Address); 9028 __ jccb(Assembler::carryClear, done); 9029 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9030 __ bind(done); 9031 %} 9032 9033 ins_pipe(ialu_reg_mem); 9034 %} 9035 9036 // Or Long Register with Register 9037 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9038 match(Set dst (OrL dst src)); 9039 effect(KILL cr); 9040 format %{ "OR $dst.lo,$src.lo\n\t" 9041 "OR $dst.hi,$src.hi" %} 9042 opcode(0x0B,0x0B); 9043 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9044 ins_pipe( ialu_reg_reg_long ); 9045 %} 9046 9047 // Or Long Register with Immediate 9048 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9049 match(Set dst (OrL dst src)); 9050 effect(KILL cr); 9051 format %{ "OR $dst.lo,$src.lo\n\t" 9052 "OR $dst.hi,$src.hi" %} 9053 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9054 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9055 ins_pipe( ialu_reg_long ); 9056 %} 9057 9058 // Or Long Register with Memory 9059 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9060 match(Set dst (OrL dst (LoadL mem))); 9061 effect(KILL cr); 9062 ins_cost(125); 9063 format %{ "OR $dst.lo,$mem\n\t" 9064 "OR $dst.hi,$mem+4" %} 9065 opcode(0x0B,0x0B); 9066 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 9067 ins_pipe( ialu_reg_long_mem ); 9068 %} 9069 9070 // Xor Long Register with Register 9071 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9072 match(Set dst (XorL dst src)); 9073 effect(KILL cr); 9074 format %{ "XOR $dst.lo,$src.lo\n\t" 9075 "XOR $dst.hi,$src.hi" %} 9076 opcode(0x33,0x33); 9077 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9078 ins_pipe( ialu_reg_reg_long ); 9079 %} 9080 9081 // Xor Long Register with Immediate -1 9082 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9083 match(Set dst (XorL dst imm)); 9084 format %{ "NOT $dst.lo\n\t" 9085 "NOT $dst.hi" %} 9086 ins_encode %{ 9087 __ notl($dst$$Register); 9088 __ notl(HIGH_FROM_LOW($dst$$Register)); 9089 %} 9090 ins_pipe( ialu_reg_long ); 9091 %} 9092 9093 // Xor Long Register with Immediate 9094 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9095 match(Set dst (XorL dst src)); 9096 effect(KILL cr); 9097 format %{ "XOR $dst.lo,$src.lo\n\t" 9098 "XOR $dst.hi,$src.hi" %} 9099 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9100 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9101 ins_pipe( ialu_reg_long ); 9102 %} 9103 9104 // Xor Long Register with Memory 9105 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9106 match(Set dst (XorL dst (LoadL mem))); 9107 effect(KILL cr); 9108 ins_cost(125); 9109 format %{ "XOR $dst.lo,$mem\n\t" 9110 "XOR $dst.hi,$mem+4" %} 9111 opcode(0x33,0x33); 9112 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 9113 ins_pipe( ialu_reg_long_mem ); 9114 %} 9115 9116 // Shift Left Long by 1 9117 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9118 predicate(UseNewLongLShift); 9119 match(Set dst (LShiftL dst cnt)); 9120 effect(KILL cr); 9121 ins_cost(100); 9122 format %{ "ADD $dst.lo,$dst.lo\n\t" 9123 "ADC $dst.hi,$dst.hi" %} 9124 ins_encode %{ 9125 __ addl($dst$$Register,$dst$$Register); 9126 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9127 %} 9128 ins_pipe( ialu_reg_long ); 9129 %} 9130 9131 // Shift Left Long by 2 9132 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9133 predicate(UseNewLongLShift); 9134 match(Set dst (LShiftL dst cnt)); 9135 effect(KILL cr); 9136 ins_cost(100); 9137 format %{ "ADD $dst.lo,$dst.lo\n\t" 9138 "ADC $dst.hi,$dst.hi\n\t" 9139 "ADD $dst.lo,$dst.lo\n\t" 9140 "ADC $dst.hi,$dst.hi" %} 9141 ins_encode %{ 9142 __ addl($dst$$Register,$dst$$Register); 9143 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9144 __ addl($dst$$Register,$dst$$Register); 9145 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9146 %} 9147 ins_pipe( ialu_reg_long ); 9148 %} 9149 9150 // Shift Left Long by 3 9151 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9152 predicate(UseNewLongLShift); 9153 match(Set dst (LShiftL dst cnt)); 9154 effect(KILL cr); 9155 ins_cost(100); 9156 format %{ "ADD $dst.lo,$dst.lo\n\t" 9157 "ADC $dst.hi,$dst.hi\n\t" 9158 "ADD $dst.lo,$dst.lo\n\t" 9159 "ADC $dst.hi,$dst.hi\n\t" 9160 "ADD $dst.lo,$dst.lo\n\t" 9161 "ADC $dst.hi,$dst.hi" %} 9162 ins_encode %{ 9163 __ addl($dst$$Register,$dst$$Register); 9164 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9165 __ addl($dst$$Register,$dst$$Register); 9166 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9167 __ addl($dst$$Register,$dst$$Register); 9168 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9169 %} 9170 ins_pipe( ialu_reg_long ); 9171 %} 9172 9173 // Shift Left Long by 1-31 9174 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9175 match(Set dst (LShiftL dst cnt)); 9176 effect(KILL cr); 9177 ins_cost(200); 9178 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9179 "SHL $dst.lo,$cnt" %} 9180 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9181 ins_encode( move_long_small_shift(dst,cnt) ); 9182 ins_pipe( ialu_reg_long ); 9183 %} 9184 9185 // Shift Left Long by 32-63 9186 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9187 match(Set dst (LShiftL dst cnt)); 9188 effect(KILL cr); 9189 ins_cost(300); 9190 format %{ "MOV $dst.hi,$dst.lo\n" 9191 "\tSHL $dst.hi,$cnt-32\n" 9192 "\tXOR $dst.lo,$dst.lo" %} 9193 opcode(0xC1, 0x4); /* C1 /4 ib */ 9194 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9195 ins_pipe( ialu_reg_long ); 9196 %} 9197 9198 // Shift Left Long by variable 9199 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9200 match(Set dst (LShiftL dst shift)); 9201 effect(KILL cr); 9202 ins_cost(500+200); 9203 size(17); 9204 format %{ "TEST $shift,32\n\t" 9205 "JEQ,s small\n\t" 9206 "MOV $dst.hi,$dst.lo\n\t" 9207 "XOR $dst.lo,$dst.lo\n" 9208 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9209 "SHL $dst.lo,$shift" %} 9210 ins_encode( shift_left_long( dst, shift ) ); 9211 ins_pipe( pipe_slow ); 9212 %} 9213 9214 // Shift Right Long by 1-31 9215 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9216 match(Set dst (URShiftL dst cnt)); 9217 effect(KILL cr); 9218 ins_cost(200); 9219 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9220 "SHR $dst.hi,$cnt" %} 9221 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9222 ins_encode( move_long_small_shift(dst,cnt) ); 9223 ins_pipe( ialu_reg_long ); 9224 %} 9225 9226 // Shift Right Long by 32-63 9227 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9228 match(Set dst (URShiftL dst cnt)); 9229 effect(KILL cr); 9230 ins_cost(300); 9231 format %{ "MOV $dst.lo,$dst.hi\n" 9232 "\tSHR $dst.lo,$cnt-32\n" 9233 "\tXOR $dst.hi,$dst.hi" %} 9234 opcode(0xC1, 0x5); /* C1 /5 ib */ 9235 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9236 ins_pipe( ialu_reg_long ); 9237 %} 9238 9239 // Shift Right Long by variable 9240 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9241 match(Set dst (URShiftL dst shift)); 9242 effect(KILL cr); 9243 ins_cost(600); 9244 size(17); 9245 format %{ "TEST $shift,32\n\t" 9246 "JEQ,s small\n\t" 9247 "MOV $dst.lo,$dst.hi\n\t" 9248 "XOR $dst.hi,$dst.hi\n" 9249 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9250 "SHR $dst.hi,$shift" %} 9251 ins_encode( shift_right_long( dst, shift ) ); 9252 ins_pipe( pipe_slow ); 9253 %} 9254 9255 // Shift Right Long by 1-31 9256 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9257 match(Set dst (RShiftL dst cnt)); 9258 effect(KILL cr); 9259 ins_cost(200); 9260 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9261 "SAR $dst.hi,$cnt" %} 9262 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9263 ins_encode( move_long_small_shift(dst,cnt) ); 9264 ins_pipe( ialu_reg_long ); 9265 %} 9266 9267 // Shift Right Long by 32-63 9268 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9269 match(Set dst (RShiftL dst cnt)); 9270 effect(KILL cr); 9271 ins_cost(300); 9272 format %{ "MOV $dst.lo,$dst.hi\n" 9273 "\tSAR $dst.lo,$cnt-32\n" 9274 "\tSAR $dst.hi,31" %} 9275 opcode(0xC1, 0x7); /* C1 /7 ib */ 9276 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9277 ins_pipe( ialu_reg_long ); 9278 %} 9279 9280 // Shift Right arithmetic Long by variable 9281 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9282 match(Set dst (RShiftL dst shift)); 9283 effect(KILL cr); 9284 ins_cost(600); 9285 size(18); 9286 format %{ "TEST $shift,32\n\t" 9287 "JEQ,s small\n\t" 9288 "MOV $dst.lo,$dst.hi\n\t" 9289 "SAR $dst.hi,31\n" 9290 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9291 "SAR $dst.hi,$shift" %} 9292 ins_encode( shift_right_arith_long( dst, shift ) ); 9293 ins_pipe( pipe_slow ); 9294 %} 9295 9296 9297 //----------Double Instructions------------------------------------------------ 9298 // Double Math 9299 9300 // Compare & branch 9301 9302 // P6 version of float compare, sets condition codes in EFLAGS 9303 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9304 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9305 match(Set cr (CmpD src1 src2)); 9306 effect(KILL rax); 9307 ins_cost(150); 9308 format %{ "FLD $src1\n\t" 9309 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9310 "JNP exit\n\t" 9311 "MOV ah,1 // saw a NaN, set CF\n\t" 9312 "SAHF\n" 9313 "exit:\tNOP // avoid branch to branch" %} 9314 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9315 ins_encode( Push_Reg_DPR(src1), 9316 OpcP, RegOpc(src2), 9317 cmpF_P6_fixup ); 9318 ins_pipe( pipe_slow ); 9319 %} 9320 9321 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9322 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9323 match(Set cr (CmpD src1 src2)); 9324 ins_cost(150); 9325 format %{ "FLD $src1\n\t" 9326 "FUCOMIP ST,$src2 // P6 instruction" %} 9327 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9328 ins_encode( Push_Reg_DPR(src1), 9329 OpcP, RegOpc(src2)); 9330 ins_pipe( pipe_slow ); 9331 %} 9332 9333 // Compare & branch 9334 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9335 predicate(UseSSE<=1); 9336 match(Set cr (CmpD src1 src2)); 9337 effect(KILL rax); 9338 ins_cost(200); 9339 format %{ "FLD $src1\n\t" 9340 "FCOMp $src2\n\t" 9341 "FNSTSW AX\n\t" 9342 "TEST AX,0x400\n\t" 9343 "JZ,s flags\n\t" 9344 "MOV AH,1\t# unordered treat as LT\n" 9345 "flags:\tSAHF" %} 9346 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9347 ins_encode( Push_Reg_DPR(src1), 9348 OpcP, RegOpc(src2), 9349 fpu_flags); 9350 ins_pipe( pipe_slow ); 9351 %} 9352 9353 // Compare vs zero into -1,0,1 9354 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9355 predicate(UseSSE<=1); 9356 match(Set dst (CmpD3 src1 zero)); 9357 effect(KILL cr, KILL rax); 9358 ins_cost(280); 9359 format %{ "FTSTD $dst,$src1" %} 9360 opcode(0xE4, 0xD9); 9361 ins_encode( Push_Reg_DPR(src1), 9362 OpcS, OpcP, PopFPU, 9363 CmpF_Result(dst)); 9364 ins_pipe( pipe_slow ); 9365 %} 9366 9367 // Compare into -1,0,1 9368 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9369 predicate(UseSSE<=1); 9370 match(Set dst (CmpD3 src1 src2)); 9371 effect(KILL cr, KILL rax); 9372 ins_cost(300); 9373 format %{ "FCMPD $dst,$src1,$src2" %} 9374 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9375 ins_encode( Push_Reg_DPR(src1), 9376 OpcP, RegOpc(src2), 9377 CmpF_Result(dst)); 9378 ins_pipe( pipe_slow ); 9379 %} 9380 9381 // float compare and set condition codes in EFLAGS by XMM regs 9382 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9383 predicate(UseSSE>=2); 9384 match(Set cr (CmpD src1 src2)); 9385 ins_cost(145); 9386 format %{ "UCOMISD $src1,$src2\n\t" 9387 "JNP,s exit\n\t" 9388 "PUSHF\t# saw NaN, set CF\n\t" 9389 "AND [rsp], #0xffffff2b\n\t" 9390 "POPF\n" 9391 "exit:" %} 9392 ins_encode %{ 9393 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9394 emit_cmpfp_fixup(masm); 9395 %} 9396 ins_pipe( pipe_slow ); 9397 %} 9398 9399 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9400 predicate(UseSSE>=2); 9401 match(Set cr (CmpD src1 src2)); 9402 ins_cost(100); 9403 format %{ "UCOMISD $src1,$src2" %} 9404 ins_encode %{ 9405 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9406 %} 9407 ins_pipe( pipe_slow ); 9408 %} 9409 9410 // float compare and set condition codes in EFLAGS by XMM regs 9411 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9412 predicate(UseSSE>=2); 9413 match(Set cr (CmpD src1 (LoadD src2))); 9414 ins_cost(145); 9415 format %{ "UCOMISD $src1,$src2\n\t" 9416 "JNP,s exit\n\t" 9417 "PUSHF\t# saw NaN, set CF\n\t" 9418 "AND [rsp], #0xffffff2b\n\t" 9419 "POPF\n" 9420 "exit:" %} 9421 ins_encode %{ 9422 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9423 emit_cmpfp_fixup(masm); 9424 %} 9425 ins_pipe( pipe_slow ); 9426 %} 9427 9428 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9429 predicate(UseSSE>=2); 9430 match(Set cr (CmpD src1 (LoadD src2))); 9431 ins_cost(100); 9432 format %{ "UCOMISD $src1,$src2" %} 9433 ins_encode %{ 9434 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9435 %} 9436 ins_pipe( pipe_slow ); 9437 %} 9438 9439 // Compare into -1,0,1 in XMM 9440 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9441 predicate(UseSSE>=2); 9442 match(Set dst (CmpD3 src1 src2)); 9443 effect(KILL cr); 9444 ins_cost(255); 9445 format %{ "UCOMISD $src1, $src2\n\t" 9446 "MOV $dst, #-1\n\t" 9447 "JP,s done\n\t" 9448 "JB,s done\n\t" 9449 "SETNE $dst\n\t" 9450 "MOVZB $dst, $dst\n" 9451 "done:" %} 9452 ins_encode %{ 9453 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9454 emit_cmpfp3(masm, $dst$$Register); 9455 %} 9456 ins_pipe( pipe_slow ); 9457 %} 9458 9459 // Compare into -1,0,1 in XMM and memory 9460 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9461 predicate(UseSSE>=2); 9462 match(Set dst (CmpD3 src1 (LoadD src2))); 9463 effect(KILL cr); 9464 ins_cost(275); 9465 format %{ "UCOMISD $src1, $src2\n\t" 9466 "MOV $dst, #-1\n\t" 9467 "JP,s done\n\t" 9468 "JB,s done\n\t" 9469 "SETNE $dst\n\t" 9470 "MOVZB $dst, $dst\n" 9471 "done:" %} 9472 ins_encode %{ 9473 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9474 emit_cmpfp3(masm, $dst$$Register); 9475 %} 9476 ins_pipe( pipe_slow ); 9477 %} 9478 9479 9480 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9481 predicate (UseSSE <=1); 9482 match(Set dst (SubD dst src)); 9483 9484 format %{ "FLD $src\n\t" 9485 "DSUBp $dst,ST" %} 9486 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9487 ins_cost(150); 9488 ins_encode( Push_Reg_DPR(src), 9489 OpcP, RegOpc(dst) ); 9490 ins_pipe( fpu_reg_reg ); 9491 %} 9492 9493 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9494 predicate (UseSSE <=1); 9495 match(Set dst (RoundDouble (SubD src1 src2))); 9496 ins_cost(250); 9497 9498 format %{ "FLD $src2\n\t" 9499 "DSUB ST,$src1\n\t" 9500 "FSTP_D $dst\t# D-round" %} 9501 opcode(0xD8, 0x5); 9502 ins_encode( Push_Reg_DPR(src2), 9503 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9504 ins_pipe( fpu_mem_reg_reg ); 9505 %} 9506 9507 9508 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9509 predicate (UseSSE <=1); 9510 match(Set dst (SubD dst (LoadD src))); 9511 ins_cost(150); 9512 9513 format %{ "FLD $src\n\t" 9514 "DSUBp $dst,ST" %} 9515 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9516 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9517 OpcP, RegOpc(dst), ClearInstMark ); 9518 ins_pipe( fpu_reg_mem ); 9519 %} 9520 9521 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9522 predicate (UseSSE<=1); 9523 match(Set dst (AbsD src)); 9524 ins_cost(100); 9525 format %{ "FABS" %} 9526 opcode(0xE1, 0xD9); 9527 ins_encode( OpcS, OpcP ); 9528 ins_pipe( fpu_reg_reg ); 9529 %} 9530 9531 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9532 predicate(UseSSE<=1); 9533 match(Set dst (NegD src)); 9534 ins_cost(100); 9535 format %{ "FCHS" %} 9536 opcode(0xE0, 0xD9); 9537 ins_encode( OpcS, OpcP ); 9538 ins_pipe( fpu_reg_reg ); 9539 %} 9540 9541 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9542 predicate(UseSSE<=1); 9543 match(Set dst (AddD dst src)); 9544 format %{ "FLD $src\n\t" 9545 "DADD $dst,ST" %} 9546 size(4); 9547 ins_cost(150); 9548 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9549 ins_encode( Push_Reg_DPR(src), 9550 OpcP, RegOpc(dst) ); 9551 ins_pipe( fpu_reg_reg ); 9552 %} 9553 9554 9555 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9556 predicate(UseSSE<=1); 9557 match(Set dst (RoundDouble (AddD src1 src2))); 9558 ins_cost(250); 9559 9560 format %{ "FLD $src2\n\t" 9561 "DADD ST,$src1\n\t" 9562 "FSTP_D $dst\t# D-round" %} 9563 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9564 ins_encode( Push_Reg_DPR(src2), 9565 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9566 ins_pipe( fpu_mem_reg_reg ); 9567 %} 9568 9569 9570 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9571 predicate(UseSSE<=1); 9572 match(Set dst (AddD dst (LoadD src))); 9573 ins_cost(150); 9574 9575 format %{ "FLD $src\n\t" 9576 "DADDp $dst,ST" %} 9577 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9578 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9579 OpcP, RegOpc(dst), ClearInstMark ); 9580 ins_pipe( fpu_reg_mem ); 9581 %} 9582 9583 // add-to-memory 9584 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9585 predicate(UseSSE<=1); 9586 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9587 ins_cost(150); 9588 9589 format %{ "FLD_D $dst\n\t" 9590 "DADD ST,$src\n\t" 9591 "FST_D $dst" %} 9592 opcode(0xDD, 0x0); 9593 ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst), 9594 Opcode(0xD8), RegOpc(src), ClearInstMark, 9595 SetInstMark, 9596 Opcode(0xDD), RMopc_Mem(0x03,dst), 9597 ClearInstMark); 9598 ins_pipe( fpu_reg_mem ); 9599 %} 9600 9601 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9602 predicate(UseSSE<=1); 9603 match(Set dst (AddD dst con)); 9604 ins_cost(125); 9605 format %{ "FLD1\n\t" 9606 "DADDp $dst,ST" %} 9607 ins_encode %{ 9608 __ fld1(); 9609 __ faddp($dst$$reg); 9610 %} 9611 ins_pipe(fpu_reg); 9612 %} 9613 9614 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9615 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9616 match(Set dst (AddD dst con)); 9617 ins_cost(200); 9618 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9619 "DADDp $dst,ST" %} 9620 ins_encode %{ 9621 __ fld_d($constantaddress($con)); 9622 __ faddp($dst$$reg); 9623 %} 9624 ins_pipe(fpu_reg_mem); 9625 %} 9626 9627 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9628 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9629 match(Set dst (RoundDouble (AddD src con))); 9630 ins_cost(200); 9631 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9632 "DADD ST,$src\n\t" 9633 "FSTP_D $dst\t# D-round" %} 9634 ins_encode %{ 9635 __ fld_d($constantaddress($con)); 9636 __ fadd($src$$reg); 9637 __ fstp_d(Address(rsp, $dst$$disp)); 9638 %} 9639 ins_pipe(fpu_mem_reg_con); 9640 %} 9641 9642 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9643 predicate(UseSSE<=1); 9644 match(Set dst (MulD dst src)); 9645 format %{ "FLD $src\n\t" 9646 "DMULp $dst,ST" %} 9647 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9648 ins_cost(150); 9649 ins_encode( Push_Reg_DPR(src), 9650 OpcP, RegOpc(dst) ); 9651 ins_pipe( fpu_reg_reg ); 9652 %} 9653 9654 // Strict FP instruction biases argument before multiply then 9655 // biases result to avoid double rounding of subnormals. 9656 // 9657 // scale arg1 by multiplying arg1 by 2^(-15360) 9658 // load arg2 9659 // multiply scaled arg1 by arg2 9660 // rescale product by 2^(15360) 9661 // 9662 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9663 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9664 match(Set dst (MulD dst src)); 9665 ins_cost(1); // Select this instruction for all FP double multiplies 9666 9667 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9668 "DMULp $dst,ST\n\t" 9669 "FLD $src\n\t" 9670 "DMULp $dst,ST\n\t" 9671 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9672 "DMULp $dst,ST\n\t" %} 9673 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9674 ins_encode( strictfp_bias1(dst), 9675 Push_Reg_DPR(src), 9676 OpcP, RegOpc(dst), 9677 strictfp_bias2(dst) ); 9678 ins_pipe( fpu_reg_reg ); 9679 %} 9680 9681 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9682 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9683 match(Set dst (MulD dst con)); 9684 ins_cost(200); 9685 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9686 "DMULp $dst,ST" %} 9687 ins_encode %{ 9688 __ fld_d($constantaddress($con)); 9689 __ fmulp($dst$$reg); 9690 %} 9691 ins_pipe(fpu_reg_mem); 9692 %} 9693 9694 9695 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9696 predicate( UseSSE<=1 ); 9697 match(Set dst (MulD dst (LoadD src))); 9698 ins_cost(200); 9699 format %{ "FLD_D $src\n\t" 9700 "DMULp $dst,ST" %} 9701 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9702 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9703 OpcP, RegOpc(dst), ClearInstMark ); 9704 ins_pipe( fpu_reg_mem ); 9705 %} 9706 9707 // 9708 // Cisc-alternate to reg-reg multiply 9709 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9710 predicate( UseSSE<=1 ); 9711 match(Set dst (MulD src (LoadD mem))); 9712 ins_cost(250); 9713 format %{ "FLD_D $mem\n\t" 9714 "DMUL ST,$src\n\t" 9715 "FSTP_D $dst" %} 9716 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9717 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem), 9718 OpcReg_FPR(src), 9719 Pop_Reg_DPR(dst), ClearInstMark ); 9720 ins_pipe( fpu_reg_reg_mem ); 9721 %} 9722 9723 9724 // MACRO3 -- addDPR a mulDPR 9725 // This instruction is a '2-address' instruction in that the result goes 9726 // back to src2. This eliminates a move from the macro; possibly the 9727 // register allocator will have to add it back (and maybe not). 9728 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9729 predicate( UseSSE<=1 ); 9730 match(Set src2 (AddD (MulD src0 src1) src2)); 9731 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9732 "DMUL ST,$src1\n\t" 9733 "DADDp $src2,ST" %} 9734 ins_cost(250); 9735 opcode(0xDD); /* LoadD DD /0 */ 9736 ins_encode( Push_Reg_FPR(src0), 9737 FMul_ST_reg(src1), 9738 FAddP_reg_ST(src2) ); 9739 ins_pipe( fpu_reg_reg_reg ); 9740 %} 9741 9742 9743 // MACRO3 -- subDPR a mulDPR 9744 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9745 predicate( UseSSE<=1 ); 9746 match(Set src2 (SubD (MulD src0 src1) src2)); 9747 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9748 "DMUL ST,$src1\n\t" 9749 "DSUBRp $src2,ST" %} 9750 ins_cost(250); 9751 ins_encode( Push_Reg_FPR(src0), 9752 FMul_ST_reg(src1), 9753 Opcode(0xDE), Opc_plus(0xE0,src2)); 9754 ins_pipe( fpu_reg_reg_reg ); 9755 %} 9756 9757 9758 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9759 predicate( UseSSE<=1 ); 9760 match(Set dst (DivD dst src)); 9761 9762 format %{ "FLD $src\n\t" 9763 "FDIVp $dst,ST" %} 9764 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9765 ins_cost(150); 9766 ins_encode( Push_Reg_DPR(src), 9767 OpcP, RegOpc(dst) ); 9768 ins_pipe( fpu_reg_reg ); 9769 %} 9770 9771 // Strict FP instruction biases argument before division then 9772 // biases result, to avoid double rounding of subnormals. 9773 // 9774 // scale dividend by multiplying dividend by 2^(-15360) 9775 // load divisor 9776 // divide scaled dividend by divisor 9777 // rescale quotient by 2^(15360) 9778 // 9779 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9780 predicate (UseSSE<=1); 9781 match(Set dst (DivD dst src)); 9782 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9783 ins_cost(01); 9784 9785 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9786 "DMULp $dst,ST\n\t" 9787 "FLD $src\n\t" 9788 "FDIVp $dst,ST\n\t" 9789 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9790 "DMULp $dst,ST\n\t" %} 9791 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9792 ins_encode( strictfp_bias1(dst), 9793 Push_Reg_DPR(src), 9794 OpcP, RegOpc(dst), 9795 strictfp_bias2(dst) ); 9796 ins_pipe( fpu_reg_reg ); 9797 %} 9798 9799 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9800 predicate(UseSSE<=1); 9801 match(Set dst (ModD dst src)); 9802 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9803 9804 format %{ "DMOD $dst,$src" %} 9805 ins_cost(250); 9806 ins_encode(Push_Reg_Mod_DPR(dst, src), 9807 emitModDPR(), 9808 Push_Result_Mod_DPR(src), 9809 Pop_Reg_DPR(dst)); 9810 ins_pipe( pipe_slow ); 9811 %} 9812 9813 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9814 predicate(UseSSE>=2); 9815 match(Set dst (ModD src0 src1)); 9816 effect(KILL rax, KILL cr); 9817 9818 format %{ "SUB ESP,8\t # DMOD\n" 9819 "\tMOVSD [ESP+0],$src1\n" 9820 "\tFLD_D [ESP+0]\n" 9821 "\tMOVSD [ESP+0],$src0\n" 9822 "\tFLD_D [ESP+0]\n" 9823 "loop:\tFPREM\n" 9824 "\tFWAIT\n" 9825 "\tFNSTSW AX\n" 9826 "\tSAHF\n" 9827 "\tJP loop\n" 9828 "\tFSTP_D [ESP+0]\n" 9829 "\tMOVSD $dst,[ESP+0]\n" 9830 "\tADD ESP,8\n" 9831 "\tFSTP ST0\t # Restore FPU Stack" 9832 %} 9833 ins_cost(250); 9834 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9835 ins_pipe( pipe_slow ); 9836 %} 9837 9838 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9839 predicate (UseSSE<=1); 9840 match(Set dst(AtanD dst src)); 9841 format %{ "DATA $dst,$src" %} 9842 opcode(0xD9, 0xF3); 9843 ins_encode( Push_Reg_DPR(src), 9844 OpcP, OpcS, RegOpc(dst) ); 9845 ins_pipe( pipe_slow ); 9846 %} 9847 9848 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9849 predicate (UseSSE>=2); 9850 match(Set dst(AtanD dst src)); 9851 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9852 format %{ "DATA $dst,$src" %} 9853 opcode(0xD9, 0xF3); 9854 ins_encode( Push_SrcD(src), 9855 OpcP, OpcS, Push_ResultD(dst) ); 9856 ins_pipe( pipe_slow ); 9857 %} 9858 9859 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9860 predicate (UseSSE<=1); 9861 match(Set dst (SqrtD src)); 9862 format %{ "DSQRT $dst,$src" %} 9863 opcode(0xFA, 0xD9); 9864 ins_encode( Push_Reg_DPR(src), 9865 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9866 ins_pipe( pipe_slow ); 9867 %} 9868 9869 //-------------Float Instructions------------------------------- 9870 // Float Math 9871 9872 // Code for float compare: 9873 // fcompp(); 9874 // fwait(); fnstsw_ax(); 9875 // sahf(); 9876 // movl(dst, unordered_result); 9877 // jcc(Assembler::parity, exit); 9878 // movl(dst, less_result); 9879 // jcc(Assembler::below, exit); 9880 // movl(dst, equal_result); 9881 // jcc(Assembler::equal, exit); 9882 // movl(dst, greater_result); 9883 // exit: 9884 9885 // P6 version of float compare, sets condition codes in EFLAGS 9886 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9887 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9888 match(Set cr (CmpF src1 src2)); 9889 effect(KILL rax); 9890 ins_cost(150); 9891 format %{ "FLD $src1\n\t" 9892 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9893 "JNP exit\n\t" 9894 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9895 "SAHF\n" 9896 "exit:\tNOP // avoid branch to branch" %} 9897 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9898 ins_encode( Push_Reg_DPR(src1), 9899 OpcP, RegOpc(src2), 9900 cmpF_P6_fixup ); 9901 ins_pipe( pipe_slow ); 9902 %} 9903 9904 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9905 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9906 match(Set cr (CmpF src1 src2)); 9907 ins_cost(100); 9908 format %{ "FLD $src1\n\t" 9909 "FUCOMIP ST,$src2 // P6 instruction" %} 9910 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9911 ins_encode( Push_Reg_DPR(src1), 9912 OpcP, RegOpc(src2)); 9913 ins_pipe( pipe_slow ); 9914 %} 9915 9916 9917 // Compare & branch 9918 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9919 predicate(UseSSE == 0); 9920 match(Set cr (CmpF src1 src2)); 9921 effect(KILL rax); 9922 ins_cost(200); 9923 format %{ "FLD $src1\n\t" 9924 "FCOMp $src2\n\t" 9925 "FNSTSW AX\n\t" 9926 "TEST AX,0x400\n\t" 9927 "JZ,s flags\n\t" 9928 "MOV AH,1\t# unordered treat as LT\n" 9929 "flags:\tSAHF" %} 9930 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9931 ins_encode( Push_Reg_DPR(src1), 9932 OpcP, RegOpc(src2), 9933 fpu_flags); 9934 ins_pipe( pipe_slow ); 9935 %} 9936 9937 // Compare vs zero into -1,0,1 9938 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9939 predicate(UseSSE == 0); 9940 match(Set dst (CmpF3 src1 zero)); 9941 effect(KILL cr, KILL rax); 9942 ins_cost(280); 9943 format %{ "FTSTF $dst,$src1" %} 9944 opcode(0xE4, 0xD9); 9945 ins_encode( Push_Reg_DPR(src1), 9946 OpcS, OpcP, PopFPU, 9947 CmpF_Result(dst)); 9948 ins_pipe( pipe_slow ); 9949 %} 9950 9951 // Compare into -1,0,1 9952 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9953 predicate(UseSSE == 0); 9954 match(Set dst (CmpF3 src1 src2)); 9955 effect(KILL cr, KILL rax); 9956 ins_cost(300); 9957 format %{ "FCMPF $dst,$src1,$src2" %} 9958 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9959 ins_encode( Push_Reg_DPR(src1), 9960 OpcP, RegOpc(src2), 9961 CmpF_Result(dst)); 9962 ins_pipe( pipe_slow ); 9963 %} 9964 9965 // float compare and set condition codes in EFLAGS by XMM regs 9966 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 9967 predicate(UseSSE>=1); 9968 match(Set cr (CmpF src1 src2)); 9969 ins_cost(145); 9970 format %{ "UCOMISS $src1,$src2\n\t" 9971 "JNP,s exit\n\t" 9972 "PUSHF\t# saw NaN, set CF\n\t" 9973 "AND [rsp], #0xffffff2b\n\t" 9974 "POPF\n" 9975 "exit:" %} 9976 ins_encode %{ 9977 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9978 emit_cmpfp_fixup(masm); 9979 %} 9980 ins_pipe( pipe_slow ); 9981 %} 9982 9983 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 9984 predicate(UseSSE>=1); 9985 match(Set cr (CmpF src1 src2)); 9986 ins_cost(100); 9987 format %{ "UCOMISS $src1,$src2" %} 9988 ins_encode %{ 9989 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9990 %} 9991 ins_pipe( pipe_slow ); 9992 %} 9993 9994 // float compare and set condition codes in EFLAGS by XMM regs 9995 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 9996 predicate(UseSSE>=1); 9997 match(Set cr (CmpF src1 (LoadF src2))); 9998 ins_cost(165); 9999 format %{ "UCOMISS $src1,$src2\n\t" 10000 "JNP,s exit\n\t" 10001 "PUSHF\t# saw NaN, set CF\n\t" 10002 "AND [rsp], #0xffffff2b\n\t" 10003 "POPF\n" 10004 "exit:" %} 10005 ins_encode %{ 10006 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10007 emit_cmpfp_fixup(masm); 10008 %} 10009 ins_pipe( pipe_slow ); 10010 %} 10011 10012 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10013 predicate(UseSSE>=1); 10014 match(Set cr (CmpF src1 (LoadF src2))); 10015 ins_cost(100); 10016 format %{ "UCOMISS $src1,$src2" %} 10017 ins_encode %{ 10018 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10019 %} 10020 ins_pipe( pipe_slow ); 10021 %} 10022 10023 // Compare into -1,0,1 in XMM 10024 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10025 predicate(UseSSE>=1); 10026 match(Set dst (CmpF3 src1 src2)); 10027 effect(KILL cr); 10028 ins_cost(255); 10029 format %{ "UCOMISS $src1, $src2\n\t" 10030 "MOV $dst, #-1\n\t" 10031 "JP,s done\n\t" 10032 "JB,s done\n\t" 10033 "SETNE $dst\n\t" 10034 "MOVZB $dst, $dst\n" 10035 "done:" %} 10036 ins_encode %{ 10037 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10038 emit_cmpfp3(masm, $dst$$Register); 10039 %} 10040 ins_pipe( pipe_slow ); 10041 %} 10042 10043 // Compare into -1,0,1 in XMM and memory 10044 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10045 predicate(UseSSE>=1); 10046 match(Set dst (CmpF3 src1 (LoadF src2))); 10047 effect(KILL cr); 10048 ins_cost(275); 10049 format %{ "UCOMISS $src1, $src2\n\t" 10050 "MOV $dst, #-1\n\t" 10051 "JP,s done\n\t" 10052 "JB,s done\n\t" 10053 "SETNE $dst\n\t" 10054 "MOVZB $dst, $dst\n" 10055 "done:" %} 10056 ins_encode %{ 10057 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10058 emit_cmpfp3(masm, $dst$$Register); 10059 %} 10060 ins_pipe( pipe_slow ); 10061 %} 10062 10063 // Spill to obtain 24-bit precision 10064 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10065 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10066 match(Set dst (SubF src1 src2)); 10067 10068 format %{ "FSUB $dst,$src1 - $src2" %} 10069 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10070 ins_encode( Push_Reg_FPR(src1), 10071 OpcReg_FPR(src2), 10072 Pop_Mem_FPR(dst) ); 10073 ins_pipe( fpu_mem_reg_reg ); 10074 %} 10075 // 10076 // This instruction does not round to 24-bits 10077 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10078 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10079 match(Set dst (SubF dst src)); 10080 10081 format %{ "FSUB $dst,$src" %} 10082 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10083 ins_encode( Push_Reg_FPR(src), 10084 OpcP, RegOpc(dst) ); 10085 ins_pipe( fpu_reg_reg ); 10086 %} 10087 10088 // Spill to obtain 24-bit precision 10089 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10090 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10091 match(Set dst (AddF src1 src2)); 10092 10093 format %{ "FADD $dst,$src1,$src2" %} 10094 opcode(0xD8, 0x0); /* D8 C0+i */ 10095 ins_encode( Push_Reg_FPR(src2), 10096 OpcReg_FPR(src1), 10097 Pop_Mem_FPR(dst) ); 10098 ins_pipe( fpu_mem_reg_reg ); 10099 %} 10100 // 10101 // This instruction does not round to 24-bits 10102 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10103 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10104 match(Set dst (AddF dst src)); 10105 10106 format %{ "FLD $src\n\t" 10107 "FADDp $dst,ST" %} 10108 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10109 ins_encode( Push_Reg_FPR(src), 10110 OpcP, RegOpc(dst) ); 10111 ins_pipe( fpu_reg_reg ); 10112 %} 10113 10114 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10115 predicate(UseSSE==0); 10116 match(Set dst (AbsF src)); 10117 ins_cost(100); 10118 format %{ "FABS" %} 10119 opcode(0xE1, 0xD9); 10120 ins_encode( OpcS, OpcP ); 10121 ins_pipe( fpu_reg_reg ); 10122 %} 10123 10124 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10125 predicate(UseSSE==0); 10126 match(Set dst (NegF src)); 10127 ins_cost(100); 10128 format %{ "FCHS" %} 10129 opcode(0xE0, 0xD9); 10130 ins_encode( OpcS, OpcP ); 10131 ins_pipe( fpu_reg_reg ); 10132 %} 10133 10134 // Cisc-alternate to addFPR_reg 10135 // Spill to obtain 24-bit precision 10136 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10137 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10138 match(Set dst (AddF src1 (LoadF src2))); 10139 10140 format %{ "FLD $src2\n\t" 10141 "FADD ST,$src1\n\t" 10142 "FSTP_S $dst" %} 10143 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10144 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10145 OpcReg_FPR(src1), 10146 Pop_Mem_FPR(dst), ClearInstMark ); 10147 ins_pipe( fpu_mem_reg_mem ); 10148 %} 10149 // 10150 // Cisc-alternate to addFPR_reg 10151 // This instruction does not round to 24-bits 10152 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10153 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10154 match(Set dst (AddF dst (LoadF src))); 10155 10156 format %{ "FADD $dst,$src" %} 10157 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10158 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 10159 OpcP, RegOpc(dst), ClearInstMark ); 10160 ins_pipe( fpu_reg_mem ); 10161 %} 10162 10163 // // Following two instructions for _222_mpegaudio 10164 // Spill to obtain 24-bit precision 10165 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10166 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10167 match(Set dst (AddF src1 src2)); 10168 10169 format %{ "FADD $dst,$src1,$src2" %} 10170 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10171 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1), 10172 OpcReg_FPR(src2), 10173 Pop_Mem_FPR(dst), ClearInstMark ); 10174 ins_pipe( fpu_mem_reg_mem ); 10175 %} 10176 10177 // Cisc-spill variant 10178 // Spill to obtain 24-bit precision 10179 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10180 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10181 match(Set dst (AddF src1 (LoadF src2))); 10182 10183 format %{ "FADD $dst,$src1,$src2 cisc" %} 10184 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10185 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10186 OpcP, RMopc_Mem(secondary,src1), 10187 Pop_Mem_FPR(dst), 10188 ClearInstMark); 10189 ins_pipe( fpu_mem_mem_mem ); 10190 %} 10191 10192 // Spill to obtain 24-bit precision 10193 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10194 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10195 match(Set dst (AddF src1 src2)); 10196 10197 format %{ "FADD $dst,$src1,$src2" %} 10198 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10199 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10200 OpcP, RMopc_Mem(secondary,src1), 10201 Pop_Mem_FPR(dst), 10202 ClearInstMark); 10203 ins_pipe( fpu_mem_mem_mem ); 10204 %} 10205 10206 10207 // Spill to obtain 24-bit precision 10208 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10209 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10210 match(Set dst (AddF src con)); 10211 format %{ "FLD $src\n\t" 10212 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10213 "FSTP_S $dst" %} 10214 ins_encode %{ 10215 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10216 __ fadd_s($constantaddress($con)); 10217 __ fstp_s(Address(rsp, $dst$$disp)); 10218 %} 10219 ins_pipe(fpu_mem_reg_con); 10220 %} 10221 // 10222 // This instruction does not round to 24-bits 10223 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10224 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10225 match(Set dst (AddF src con)); 10226 format %{ "FLD $src\n\t" 10227 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10228 "FSTP $dst" %} 10229 ins_encode %{ 10230 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10231 __ fadd_s($constantaddress($con)); 10232 __ fstp_d($dst$$reg); 10233 %} 10234 ins_pipe(fpu_reg_reg_con); 10235 %} 10236 10237 // Spill to obtain 24-bit precision 10238 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10239 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10240 match(Set dst (MulF src1 src2)); 10241 10242 format %{ "FLD $src1\n\t" 10243 "FMUL $src2\n\t" 10244 "FSTP_S $dst" %} 10245 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10246 ins_encode( Push_Reg_FPR(src1), 10247 OpcReg_FPR(src2), 10248 Pop_Mem_FPR(dst) ); 10249 ins_pipe( fpu_mem_reg_reg ); 10250 %} 10251 // 10252 // This instruction does not round to 24-bits 10253 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10254 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10255 match(Set dst (MulF src1 src2)); 10256 10257 format %{ "FLD $src1\n\t" 10258 "FMUL $src2\n\t" 10259 "FSTP_S $dst" %} 10260 opcode(0xD8, 0x1); /* D8 C8+i */ 10261 ins_encode( Push_Reg_FPR(src2), 10262 OpcReg_FPR(src1), 10263 Pop_Reg_FPR(dst) ); 10264 ins_pipe( fpu_reg_reg_reg ); 10265 %} 10266 10267 10268 // Spill to obtain 24-bit precision 10269 // Cisc-alternate to reg-reg multiply 10270 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10271 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10272 match(Set dst (MulF src1 (LoadF src2))); 10273 10274 format %{ "FLD_S $src2\n\t" 10275 "FMUL $src1\n\t" 10276 "FSTP_S $dst" %} 10277 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10278 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10279 OpcReg_FPR(src1), 10280 Pop_Mem_FPR(dst), ClearInstMark ); 10281 ins_pipe( fpu_mem_reg_mem ); 10282 %} 10283 // 10284 // This instruction does not round to 24-bits 10285 // Cisc-alternate to reg-reg multiply 10286 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10287 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10288 match(Set dst (MulF src1 (LoadF src2))); 10289 10290 format %{ "FMUL $dst,$src1,$src2" %} 10291 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10292 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10293 OpcReg_FPR(src1), 10294 Pop_Reg_FPR(dst), ClearInstMark ); 10295 ins_pipe( fpu_reg_reg_mem ); 10296 %} 10297 10298 // Spill to obtain 24-bit precision 10299 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10300 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10301 match(Set dst (MulF src1 src2)); 10302 10303 format %{ "FMUL $dst,$src1,$src2" %} 10304 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10305 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10306 OpcP, RMopc_Mem(secondary,src1), 10307 Pop_Mem_FPR(dst), 10308 ClearInstMark ); 10309 ins_pipe( fpu_mem_mem_mem ); 10310 %} 10311 10312 // Spill to obtain 24-bit precision 10313 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10314 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10315 match(Set dst (MulF src con)); 10316 10317 format %{ "FLD $src\n\t" 10318 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10319 "FSTP_S $dst" %} 10320 ins_encode %{ 10321 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10322 __ fmul_s($constantaddress($con)); 10323 __ fstp_s(Address(rsp, $dst$$disp)); 10324 %} 10325 ins_pipe(fpu_mem_reg_con); 10326 %} 10327 // 10328 // This instruction does not round to 24-bits 10329 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10330 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10331 match(Set dst (MulF src con)); 10332 10333 format %{ "FLD $src\n\t" 10334 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10335 "FSTP $dst" %} 10336 ins_encode %{ 10337 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10338 __ fmul_s($constantaddress($con)); 10339 __ fstp_d($dst$$reg); 10340 %} 10341 ins_pipe(fpu_reg_reg_con); 10342 %} 10343 10344 10345 // 10346 // MACRO1 -- subsume unshared load into mulFPR 10347 // This instruction does not round to 24-bits 10348 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10349 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10350 match(Set dst (MulF (LoadF mem1) src)); 10351 10352 format %{ "FLD $mem1 ===MACRO1===\n\t" 10353 "FMUL ST,$src\n\t" 10354 "FSTP $dst" %} 10355 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10356 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1), 10357 OpcReg_FPR(src), 10358 Pop_Reg_FPR(dst), ClearInstMark ); 10359 ins_pipe( fpu_reg_reg_mem ); 10360 %} 10361 // 10362 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10363 // This instruction does not round to 24-bits 10364 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10365 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10366 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10367 ins_cost(95); 10368 10369 format %{ "FLD $mem1 ===MACRO2===\n\t" 10370 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10371 "FADD ST,$src2\n\t" 10372 "FSTP $dst" %} 10373 opcode(0xD9); /* LoadF D9 /0 */ 10374 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1), 10375 FMul_ST_reg(src1), 10376 FAdd_ST_reg(src2), 10377 Pop_Reg_FPR(dst), ClearInstMark ); 10378 ins_pipe( fpu_reg_mem_reg_reg ); 10379 %} 10380 10381 // MACRO3 -- addFPR a mulFPR 10382 // This instruction does not round to 24-bits. It is a '2-address' 10383 // instruction in that the result goes back to src2. This eliminates 10384 // a move from the macro; possibly the register allocator will have 10385 // to add it back (and maybe not). 10386 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10387 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10388 match(Set src2 (AddF (MulF src0 src1) src2)); 10389 10390 format %{ "FLD $src0 ===MACRO3===\n\t" 10391 "FMUL ST,$src1\n\t" 10392 "FADDP $src2,ST" %} 10393 opcode(0xD9); /* LoadF D9 /0 */ 10394 ins_encode( Push_Reg_FPR(src0), 10395 FMul_ST_reg(src1), 10396 FAddP_reg_ST(src2) ); 10397 ins_pipe( fpu_reg_reg_reg ); 10398 %} 10399 10400 // MACRO4 -- divFPR subFPR 10401 // This instruction does not round to 24-bits 10402 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10403 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10404 match(Set dst (DivF (SubF src2 src1) src3)); 10405 10406 format %{ "FLD $src2 ===MACRO4===\n\t" 10407 "FSUB ST,$src1\n\t" 10408 "FDIV ST,$src3\n\t" 10409 "FSTP $dst" %} 10410 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10411 ins_encode( Push_Reg_FPR(src2), 10412 subFPR_divFPR_encode(src1,src3), 10413 Pop_Reg_FPR(dst) ); 10414 ins_pipe( fpu_reg_reg_reg_reg ); 10415 %} 10416 10417 // Spill to obtain 24-bit precision 10418 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10419 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10420 match(Set dst (DivF src1 src2)); 10421 10422 format %{ "FDIV $dst,$src1,$src2" %} 10423 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10424 ins_encode( Push_Reg_FPR(src1), 10425 OpcReg_FPR(src2), 10426 Pop_Mem_FPR(dst) ); 10427 ins_pipe( fpu_mem_reg_reg ); 10428 %} 10429 // 10430 // This instruction does not round to 24-bits 10431 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10432 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10433 match(Set dst (DivF dst src)); 10434 10435 format %{ "FDIV $dst,$src" %} 10436 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10437 ins_encode( Push_Reg_FPR(src), 10438 OpcP, RegOpc(dst) ); 10439 ins_pipe( fpu_reg_reg ); 10440 %} 10441 10442 10443 // Spill to obtain 24-bit precision 10444 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10445 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10446 match(Set dst (ModF src1 src2)); 10447 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10448 10449 format %{ "FMOD $dst,$src1,$src2" %} 10450 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10451 emitModDPR(), 10452 Push_Result_Mod_DPR(src2), 10453 Pop_Mem_FPR(dst)); 10454 ins_pipe( pipe_slow ); 10455 %} 10456 // 10457 // This instruction does not round to 24-bits 10458 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10459 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10460 match(Set dst (ModF dst src)); 10461 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10462 10463 format %{ "FMOD $dst,$src" %} 10464 ins_encode(Push_Reg_Mod_DPR(dst, src), 10465 emitModDPR(), 10466 Push_Result_Mod_DPR(src), 10467 Pop_Reg_FPR(dst)); 10468 ins_pipe( pipe_slow ); 10469 %} 10470 10471 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10472 predicate(UseSSE>=1); 10473 match(Set dst (ModF src0 src1)); 10474 effect(KILL rax, KILL cr); 10475 format %{ "SUB ESP,4\t # FMOD\n" 10476 "\tMOVSS [ESP+0],$src1\n" 10477 "\tFLD_S [ESP+0]\n" 10478 "\tMOVSS [ESP+0],$src0\n" 10479 "\tFLD_S [ESP+0]\n" 10480 "loop:\tFPREM\n" 10481 "\tFWAIT\n" 10482 "\tFNSTSW AX\n" 10483 "\tSAHF\n" 10484 "\tJP loop\n" 10485 "\tFSTP_S [ESP+0]\n" 10486 "\tMOVSS $dst,[ESP+0]\n" 10487 "\tADD ESP,4\n" 10488 "\tFSTP ST0\t # Restore FPU Stack" 10489 %} 10490 ins_cost(250); 10491 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10492 ins_pipe( pipe_slow ); 10493 %} 10494 10495 10496 //----------Arithmetic Conversion Instructions--------------------------------- 10497 // The conversions operations are all Alpha sorted. Please keep it that way! 10498 10499 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10500 predicate(UseSSE==0); 10501 match(Set dst (RoundFloat src)); 10502 ins_cost(125); 10503 format %{ "FST_S $dst,$src\t# F-round" %} 10504 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10505 ins_pipe( fpu_mem_reg ); 10506 %} 10507 10508 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10509 predicate(UseSSE<=1); 10510 match(Set dst (RoundDouble src)); 10511 ins_cost(125); 10512 format %{ "FST_D $dst,$src\t# D-round" %} 10513 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10514 ins_pipe( fpu_mem_reg ); 10515 %} 10516 10517 // Force rounding to 24-bit precision and 6-bit exponent 10518 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10519 predicate(UseSSE==0); 10520 match(Set dst (ConvD2F src)); 10521 format %{ "FST_S $dst,$src\t# F-round" %} 10522 expand %{ 10523 roundFloat_mem_reg(dst,src); 10524 %} 10525 %} 10526 10527 // Force rounding to 24-bit precision and 6-bit exponent 10528 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10529 predicate(UseSSE==1); 10530 match(Set dst (ConvD2F src)); 10531 effect( KILL cr ); 10532 format %{ "SUB ESP,4\n\t" 10533 "FST_S [ESP],$src\t# F-round\n\t" 10534 "MOVSS $dst,[ESP]\n\t" 10535 "ADD ESP,4" %} 10536 ins_encode %{ 10537 __ subptr(rsp, 4); 10538 if ($src$$reg != FPR1L_enc) { 10539 __ fld_s($src$$reg-1); 10540 __ fstp_s(Address(rsp, 0)); 10541 } else { 10542 __ fst_s(Address(rsp, 0)); 10543 } 10544 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10545 __ addptr(rsp, 4); 10546 %} 10547 ins_pipe( pipe_slow ); 10548 %} 10549 10550 // Force rounding double precision to single precision 10551 instruct convD2F_reg(regF dst, regD src) %{ 10552 predicate(UseSSE>=2); 10553 match(Set dst (ConvD2F src)); 10554 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10555 ins_encode %{ 10556 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10557 %} 10558 ins_pipe( pipe_slow ); 10559 %} 10560 10561 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10562 predicate(UseSSE==0); 10563 match(Set dst (ConvF2D src)); 10564 format %{ "FST_S $dst,$src\t# D-round" %} 10565 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10566 ins_pipe( fpu_reg_reg ); 10567 %} 10568 10569 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10570 predicate(UseSSE==1); 10571 match(Set dst (ConvF2D src)); 10572 format %{ "FST_D $dst,$src\t# D-round" %} 10573 expand %{ 10574 roundDouble_mem_reg(dst,src); 10575 %} 10576 %} 10577 10578 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10579 predicate(UseSSE==1); 10580 match(Set dst (ConvF2D src)); 10581 effect( KILL cr ); 10582 format %{ "SUB ESP,4\n\t" 10583 "MOVSS [ESP] $src\n\t" 10584 "FLD_S [ESP]\n\t" 10585 "ADD ESP,4\n\t" 10586 "FSTP $dst\t# D-round" %} 10587 ins_encode %{ 10588 __ subptr(rsp, 4); 10589 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10590 __ fld_s(Address(rsp, 0)); 10591 __ addptr(rsp, 4); 10592 __ fstp_d($dst$$reg); 10593 %} 10594 ins_pipe( pipe_slow ); 10595 %} 10596 10597 instruct convF2D_reg(regD dst, regF src) %{ 10598 predicate(UseSSE>=2); 10599 match(Set dst (ConvF2D src)); 10600 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10601 ins_encode %{ 10602 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10603 %} 10604 ins_pipe( pipe_slow ); 10605 %} 10606 10607 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10608 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10609 predicate(UseSSE<=1); 10610 match(Set dst (ConvD2I src)); 10611 effect( KILL tmp, KILL cr ); 10612 format %{ "FLD $src\t# Convert double to int \n\t" 10613 "FLDCW trunc mode\n\t" 10614 "SUB ESP,4\n\t" 10615 "FISTp [ESP + #0]\n\t" 10616 "FLDCW std/24-bit mode\n\t" 10617 "POP EAX\n\t" 10618 "CMP EAX,0x80000000\n\t" 10619 "JNE,s fast\n\t" 10620 "FLD_D $src\n\t" 10621 "CALL d2i_wrapper\n" 10622 "fast:" %} 10623 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10624 ins_pipe( pipe_slow ); 10625 %} 10626 10627 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10628 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10629 predicate(UseSSE>=2); 10630 match(Set dst (ConvD2I src)); 10631 effect( KILL tmp, KILL cr ); 10632 format %{ "CVTTSD2SI $dst, $src\n\t" 10633 "CMP $dst,0x80000000\n\t" 10634 "JNE,s fast\n\t" 10635 "SUB ESP, 8\n\t" 10636 "MOVSD [ESP], $src\n\t" 10637 "FLD_D [ESP]\n\t" 10638 "ADD ESP, 8\n\t" 10639 "CALL d2i_wrapper\n" 10640 "fast:" %} 10641 ins_encode %{ 10642 Label fast; 10643 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10644 __ cmpl($dst$$Register, 0x80000000); 10645 __ jccb(Assembler::notEqual, fast); 10646 __ subptr(rsp, 8); 10647 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10648 __ fld_d(Address(rsp, 0)); 10649 __ addptr(rsp, 8); 10650 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10651 __ post_call_nop(); 10652 __ bind(fast); 10653 %} 10654 ins_pipe( pipe_slow ); 10655 %} 10656 10657 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10658 predicate(UseSSE<=1); 10659 match(Set dst (ConvD2L src)); 10660 effect( KILL cr ); 10661 format %{ "FLD $src\t# Convert double to long\n\t" 10662 "FLDCW trunc mode\n\t" 10663 "SUB ESP,8\n\t" 10664 "FISTp [ESP + #0]\n\t" 10665 "FLDCW std/24-bit mode\n\t" 10666 "POP EAX\n\t" 10667 "POP EDX\n\t" 10668 "CMP EDX,0x80000000\n\t" 10669 "JNE,s fast\n\t" 10670 "TEST EAX,EAX\n\t" 10671 "JNE,s fast\n\t" 10672 "FLD $src\n\t" 10673 "CALL d2l_wrapper\n" 10674 "fast:" %} 10675 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10676 ins_pipe( pipe_slow ); 10677 %} 10678 10679 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10680 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10681 predicate (UseSSE>=2); 10682 match(Set dst (ConvD2L src)); 10683 effect( KILL cr ); 10684 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10685 "MOVSD [ESP],$src\n\t" 10686 "FLD_D [ESP]\n\t" 10687 "FLDCW trunc mode\n\t" 10688 "FISTp [ESP + #0]\n\t" 10689 "FLDCW std/24-bit mode\n\t" 10690 "POP EAX\n\t" 10691 "POP EDX\n\t" 10692 "CMP EDX,0x80000000\n\t" 10693 "JNE,s fast\n\t" 10694 "TEST EAX,EAX\n\t" 10695 "JNE,s fast\n\t" 10696 "SUB ESP,8\n\t" 10697 "MOVSD [ESP],$src\n\t" 10698 "FLD_D [ESP]\n\t" 10699 "ADD ESP,8\n\t" 10700 "CALL d2l_wrapper\n" 10701 "fast:" %} 10702 ins_encode %{ 10703 Label fast; 10704 __ subptr(rsp, 8); 10705 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10706 __ fld_d(Address(rsp, 0)); 10707 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10708 __ fistp_d(Address(rsp, 0)); 10709 // Restore the rounding mode, mask the exception 10710 if (Compile::current()->in_24_bit_fp_mode()) { 10711 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10712 } else { 10713 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10714 } 10715 // Load the converted long, adjust CPU stack 10716 __ pop(rax); 10717 __ pop(rdx); 10718 __ cmpl(rdx, 0x80000000); 10719 __ jccb(Assembler::notEqual, fast); 10720 __ testl(rax, rax); 10721 __ jccb(Assembler::notEqual, fast); 10722 __ subptr(rsp, 8); 10723 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10724 __ fld_d(Address(rsp, 0)); 10725 __ addptr(rsp, 8); 10726 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10727 __ post_call_nop(); 10728 __ bind(fast); 10729 %} 10730 ins_pipe( pipe_slow ); 10731 %} 10732 10733 // Convert a double to an int. Java semantics require we do complex 10734 // manglations in the corner cases. So we set the rounding mode to 10735 // 'zero', store the darned double down as an int, and reset the 10736 // rounding mode to 'nearest'. The hardware stores a flag value down 10737 // if we would overflow or converted a NAN; we check for this and 10738 // and go the slow path if needed. 10739 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10740 predicate(UseSSE==0); 10741 match(Set dst (ConvF2I src)); 10742 effect( KILL tmp, KILL cr ); 10743 format %{ "FLD $src\t# Convert float to int \n\t" 10744 "FLDCW trunc mode\n\t" 10745 "SUB ESP,4\n\t" 10746 "FISTp [ESP + #0]\n\t" 10747 "FLDCW std/24-bit mode\n\t" 10748 "POP EAX\n\t" 10749 "CMP EAX,0x80000000\n\t" 10750 "JNE,s fast\n\t" 10751 "FLD $src\n\t" 10752 "CALL d2i_wrapper\n" 10753 "fast:" %} 10754 // DPR2I_encoding works for FPR2I 10755 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10756 ins_pipe( pipe_slow ); 10757 %} 10758 10759 // Convert a float in xmm to an int reg. 10760 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10761 predicate(UseSSE>=1); 10762 match(Set dst (ConvF2I src)); 10763 effect( KILL tmp, KILL cr ); 10764 format %{ "CVTTSS2SI $dst, $src\n\t" 10765 "CMP $dst,0x80000000\n\t" 10766 "JNE,s fast\n\t" 10767 "SUB ESP, 4\n\t" 10768 "MOVSS [ESP], $src\n\t" 10769 "FLD [ESP]\n\t" 10770 "ADD ESP, 4\n\t" 10771 "CALL d2i_wrapper\n" 10772 "fast:" %} 10773 ins_encode %{ 10774 Label fast; 10775 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10776 __ cmpl($dst$$Register, 0x80000000); 10777 __ jccb(Assembler::notEqual, fast); 10778 __ subptr(rsp, 4); 10779 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10780 __ fld_s(Address(rsp, 0)); 10781 __ addptr(rsp, 4); 10782 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10783 __ post_call_nop(); 10784 __ bind(fast); 10785 %} 10786 ins_pipe( pipe_slow ); 10787 %} 10788 10789 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10790 predicate(UseSSE==0); 10791 match(Set dst (ConvF2L src)); 10792 effect( KILL cr ); 10793 format %{ "FLD $src\t# Convert float to long\n\t" 10794 "FLDCW trunc mode\n\t" 10795 "SUB ESP,8\n\t" 10796 "FISTp [ESP + #0]\n\t" 10797 "FLDCW std/24-bit mode\n\t" 10798 "POP EAX\n\t" 10799 "POP EDX\n\t" 10800 "CMP EDX,0x80000000\n\t" 10801 "JNE,s fast\n\t" 10802 "TEST EAX,EAX\n\t" 10803 "JNE,s fast\n\t" 10804 "FLD $src\n\t" 10805 "CALL d2l_wrapper\n" 10806 "fast:" %} 10807 // DPR2L_encoding works for FPR2L 10808 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10809 ins_pipe( pipe_slow ); 10810 %} 10811 10812 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10813 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10814 predicate (UseSSE>=1); 10815 match(Set dst (ConvF2L src)); 10816 effect( KILL cr ); 10817 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10818 "MOVSS [ESP],$src\n\t" 10819 "FLD_S [ESP]\n\t" 10820 "FLDCW trunc mode\n\t" 10821 "FISTp [ESP + #0]\n\t" 10822 "FLDCW std/24-bit mode\n\t" 10823 "POP EAX\n\t" 10824 "POP EDX\n\t" 10825 "CMP EDX,0x80000000\n\t" 10826 "JNE,s fast\n\t" 10827 "TEST EAX,EAX\n\t" 10828 "JNE,s fast\n\t" 10829 "SUB ESP,4\t# Convert float to long\n\t" 10830 "MOVSS [ESP],$src\n\t" 10831 "FLD_S [ESP]\n\t" 10832 "ADD ESP,4\n\t" 10833 "CALL d2l_wrapper\n" 10834 "fast:" %} 10835 ins_encode %{ 10836 Label fast; 10837 __ subptr(rsp, 8); 10838 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10839 __ fld_s(Address(rsp, 0)); 10840 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10841 __ fistp_d(Address(rsp, 0)); 10842 // Restore the rounding mode, mask the exception 10843 if (Compile::current()->in_24_bit_fp_mode()) { 10844 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10845 } else { 10846 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10847 } 10848 // Load the converted long, adjust CPU stack 10849 __ pop(rax); 10850 __ pop(rdx); 10851 __ cmpl(rdx, 0x80000000); 10852 __ jccb(Assembler::notEqual, fast); 10853 __ testl(rax, rax); 10854 __ jccb(Assembler::notEqual, fast); 10855 __ subptr(rsp, 4); 10856 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10857 __ fld_s(Address(rsp, 0)); 10858 __ addptr(rsp, 4); 10859 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10860 __ post_call_nop(); 10861 __ bind(fast); 10862 %} 10863 ins_pipe( pipe_slow ); 10864 %} 10865 10866 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10867 predicate( UseSSE<=1 ); 10868 match(Set dst (ConvI2D src)); 10869 format %{ "FILD $src\n\t" 10870 "FSTP $dst" %} 10871 opcode(0xDB, 0x0); /* DB /0 */ 10872 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10873 ins_pipe( fpu_reg_mem ); 10874 %} 10875 10876 instruct convI2D_reg(regD dst, rRegI src) %{ 10877 predicate( UseSSE>=2 && !UseXmmI2D ); 10878 match(Set dst (ConvI2D src)); 10879 format %{ "CVTSI2SD $dst,$src" %} 10880 ins_encode %{ 10881 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10882 %} 10883 ins_pipe( pipe_slow ); 10884 %} 10885 10886 instruct convI2D_mem(regD dst, memory mem) %{ 10887 predicate( UseSSE>=2 ); 10888 match(Set dst (ConvI2D (LoadI mem))); 10889 format %{ "CVTSI2SD $dst,$mem" %} 10890 ins_encode %{ 10891 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10892 %} 10893 ins_pipe( pipe_slow ); 10894 %} 10895 10896 instruct convXI2D_reg(regD dst, rRegI src) 10897 %{ 10898 predicate( UseSSE>=2 && UseXmmI2D ); 10899 match(Set dst (ConvI2D src)); 10900 10901 format %{ "MOVD $dst,$src\n\t" 10902 "CVTDQ2PD $dst,$dst\t# i2d" %} 10903 ins_encode %{ 10904 __ movdl($dst$$XMMRegister, $src$$Register); 10905 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10906 %} 10907 ins_pipe(pipe_slow); // XXX 10908 %} 10909 10910 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10911 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10912 match(Set dst (ConvI2D (LoadI mem))); 10913 format %{ "FILD $mem\n\t" 10914 "FSTP $dst" %} 10915 opcode(0xDB); /* DB /0 */ 10916 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10917 Pop_Reg_DPR(dst), ClearInstMark); 10918 ins_pipe( fpu_reg_mem ); 10919 %} 10920 10921 // Convert a byte to a float; no rounding step needed. 10922 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10923 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10924 match(Set dst (ConvI2F src)); 10925 format %{ "FILD $src\n\t" 10926 "FSTP $dst" %} 10927 10928 opcode(0xDB, 0x0); /* DB /0 */ 10929 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10930 ins_pipe( fpu_reg_mem ); 10931 %} 10932 10933 // In 24-bit mode, force exponent rounding by storing back out 10934 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10935 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10936 match(Set dst (ConvI2F src)); 10937 ins_cost(200); 10938 format %{ "FILD $src\n\t" 10939 "FSTP_S $dst" %} 10940 opcode(0xDB, 0x0); /* DB /0 */ 10941 ins_encode( Push_Mem_I(src), 10942 Pop_Mem_FPR(dst)); 10943 ins_pipe( fpu_mem_mem ); 10944 %} 10945 10946 // In 24-bit mode, force exponent rounding by storing back out 10947 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10948 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10949 match(Set dst (ConvI2F (LoadI mem))); 10950 ins_cost(200); 10951 format %{ "FILD $mem\n\t" 10952 "FSTP_S $dst" %} 10953 opcode(0xDB); /* DB /0 */ 10954 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10955 Pop_Mem_FPR(dst), ClearInstMark); 10956 ins_pipe( fpu_mem_mem ); 10957 %} 10958 10959 // This instruction does not round to 24-bits 10960 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 10961 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10962 match(Set dst (ConvI2F src)); 10963 format %{ "FILD $src\n\t" 10964 "FSTP $dst" %} 10965 opcode(0xDB, 0x0); /* DB /0 */ 10966 ins_encode( Push_Mem_I(src), 10967 Pop_Reg_FPR(dst)); 10968 ins_pipe( fpu_reg_mem ); 10969 %} 10970 10971 // This instruction does not round to 24-bits 10972 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 10973 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10974 match(Set dst (ConvI2F (LoadI mem))); 10975 format %{ "FILD $mem\n\t" 10976 "FSTP $dst" %} 10977 opcode(0xDB); /* DB /0 */ 10978 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10979 Pop_Reg_FPR(dst), ClearInstMark); 10980 ins_pipe( fpu_reg_mem ); 10981 %} 10982 10983 // Convert an int to a float in xmm; no rounding step needed. 10984 instruct convI2F_reg(regF dst, rRegI src) %{ 10985 predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F )); 10986 match(Set dst (ConvI2F src)); 10987 format %{ "CVTSI2SS $dst, $src" %} 10988 ins_encode %{ 10989 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 10990 %} 10991 ins_pipe( pipe_slow ); 10992 %} 10993 10994 instruct convXI2F_reg(regF dst, rRegI src) 10995 %{ 10996 predicate( UseSSE>=2 && UseXmmI2F ); 10997 match(Set dst (ConvI2F src)); 10998 10999 format %{ "MOVD $dst,$src\n\t" 11000 "CVTDQ2PS $dst,$dst\t# i2f" %} 11001 ins_encode %{ 11002 __ movdl($dst$$XMMRegister, $src$$Register); 11003 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11004 %} 11005 ins_pipe(pipe_slow); // XXX 11006 %} 11007 11008 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11009 match(Set dst (ConvI2L src)); 11010 effect(KILL cr); 11011 ins_cost(375); 11012 format %{ "MOV $dst.lo,$src\n\t" 11013 "MOV $dst.hi,$src\n\t" 11014 "SAR $dst.hi,31" %} 11015 ins_encode(convert_int_long(dst,src)); 11016 ins_pipe( ialu_reg_reg_long ); 11017 %} 11018 11019 // Zero-extend convert int to long 11020 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11021 match(Set dst (AndL (ConvI2L src) mask) ); 11022 effect( KILL flags ); 11023 ins_cost(250); 11024 format %{ "MOV $dst.lo,$src\n\t" 11025 "XOR $dst.hi,$dst.hi" %} 11026 opcode(0x33); // XOR 11027 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11028 ins_pipe( ialu_reg_reg_long ); 11029 %} 11030 11031 // Zero-extend long 11032 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11033 match(Set dst (AndL src mask) ); 11034 effect( KILL flags ); 11035 ins_cost(250); 11036 format %{ "MOV $dst.lo,$src.lo\n\t" 11037 "XOR $dst.hi,$dst.hi\n\t" %} 11038 opcode(0x33); // XOR 11039 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11040 ins_pipe( ialu_reg_reg_long ); 11041 %} 11042 11043 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11044 predicate (UseSSE<=1); 11045 match(Set dst (ConvL2D src)); 11046 effect( KILL cr ); 11047 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11048 "PUSH $src.lo\n\t" 11049 "FILD ST,[ESP + #0]\n\t" 11050 "ADD ESP,8\n\t" 11051 "FSTP_D $dst\t# D-round" %} 11052 opcode(0xDF, 0x5); /* DF /5 */ 11053 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11054 ins_pipe( pipe_slow ); 11055 %} 11056 11057 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11058 predicate (UseSSE>=2); 11059 match(Set dst (ConvL2D src)); 11060 effect( KILL cr ); 11061 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11062 "PUSH $src.lo\n\t" 11063 "FILD_D [ESP]\n\t" 11064 "FSTP_D [ESP]\n\t" 11065 "MOVSD $dst,[ESP]\n\t" 11066 "ADD ESP,8" %} 11067 opcode(0xDF, 0x5); /* DF /5 */ 11068 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11069 ins_pipe( pipe_slow ); 11070 %} 11071 11072 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11073 predicate (UseSSE>=1); 11074 match(Set dst (ConvL2F src)); 11075 effect( KILL cr ); 11076 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11077 "PUSH $src.lo\n\t" 11078 "FILD_D [ESP]\n\t" 11079 "FSTP_S [ESP]\n\t" 11080 "MOVSS $dst,[ESP]\n\t" 11081 "ADD ESP,8" %} 11082 opcode(0xDF, 0x5); /* DF /5 */ 11083 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11084 ins_pipe( pipe_slow ); 11085 %} 11086 11087 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11088 match(Set dst (ConvL2F src)); 11089 effect( KILL cr ); 11090 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11091 "PUSH $src.lo\n\t" 11092 "FILD ST,[ESP + #0]\n\t" 11093 "ADD ESP,8\n\t" 11094 "FSTP_S $dst\t# F-round" %} 11095 opcode(0xDF, 0x5); /* DF /5 */ 11096 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11097 ins_pipe( pipe_slow ); 11098 %} 11099 11100 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11101 match(Set dst (ConvL2I src)); 11102 effect( DEF dst, USE src ); 11103 format %{ "MOV $dst,$src.lo" %} 11104 ins_encode(enc_CopyL_Lo(dst,src)); 11105 ins_pipe( ialu_reg_reg ); 11106 %} 11107 11108 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11109 match(Set dst (MoveF2I src)); 11110 effect( DEF dst, USE src ); 11111 ins_cost(100); 11112 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11113 ins_encode %{ 11114 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11115 %} 11116 ins_pipe( ialu_reg_mem ); 11117 %} 11118 11119 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11120 predicate(UseSSE==0); 11121 match(Set dst (MoveF2I src)); 11122 effect( DEF dst, USE src ); 11123 11124 ins_cost(125); 11125 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11126 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11127 ins_pipe( fpu_mem_reg ); 11128 %} 11129 11130 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11131 predicate(UseSSE>=1); 11132 match(Set dst (MoveF2I src)); 11133 effect( DEF dst, USE src ); 11134 11135 ins_cost(95); 11136 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11137 ins_encode %{ 11138 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11139 %} 11140 ins_pipe( pipe_slow ); 11141 %} 11142 11143 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11144 predicate(UseSSE>=2); 11145 match(Set dst (MoveF2I src)); 11146 effect( DEF dst, USE src ); 11147 ins_cost(85); 11148 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11149 ins_encode %{ 11150 __ movdl($dst$$Register, $src$$XMMRegister); 11151 %} 11152 ins_pipe( pipe_slow ); 11153 %} 11154 11155 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11156 match(Set dst (MoveI2F src)); 11157 effect( DEF dst, USE src ); 11158 11159 ins_cost(100); 11160 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11161 ins_encode %{ 11162 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11163 %} 11164 ins_pipe( ialu_mem_reg ); 11165 %} 11166 11167 11168 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11169 predicate(UseSSE==0); 11170 match(Set dst (MoveI2F src)); 11171 effect(DEF dst, USE src); 11172 11173 ins_cost(125); 11174 format %{ "FLD_S $src\n\t" 11175 "FSTP $dst\t# MoveI2F_stack_reg" %} 11176 opcode(0xD9); /* D9 /0, FLD m32real */ 11177 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 11178 Pop_Reg_FPR(dst), ClearInstMark ); 11179 ins_pipe( fpu_reg_mem ); 11180 %} 11181 11182 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11183 predicate(UseSSE>=1); 11184 match(Set dst (MoveI2F src)); 11185 effect( DEF dst, USE src ); 11186 11187 ins_cost(95); 11188 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11189 ins_encode %{ 11190 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11191 %} 11192 ins_pipe( pipe_slow ); 11193 %} 11194 11195 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11196 predicate(UseSSE>=2); 11197 match(Set dst (MoveI2F src)); 11198 effect( DEF dst, USE src ); 11199 11200 ins_cost(85); 11201 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11202 ins_encode %{ 11203 __ movdl($dst$$XMMRegister, $src$$Register); 11204 %} 11205 ins_pipe( pipe_slow ); 11206 %} 11207 11208 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11209 match(Set dst (MoveD2L src)); 11210 effect(DEF dst, USE src); 11211 11212 ins_cost(250); 11213 format %{ "MOV $dst.lo,$src\n\t" 11214 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11215 opcode(0x8B, 0x8B); 11216 ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark); 11217 ins_pipe( ialu_mem_long_reg ); 11218 %} 11219 11220 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11221 predicate(UseSSE<=1); 11222 match(Set dst (MoveD2L src)); 11223 effect(DEF dst, USE src); 11224 11225 ins_cost(125); 11226 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11227 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11228 ins_pipe( fpu_mem_reg ); 11229 %} 11230 11231 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11232 predicate(UseSSE>=2); 11233 match(Set dst (MoveD2L src)); 11234 effect(DEF dst, USE src); 11235 ins_cost(95); 11236 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11237 ins_encode %{ 11238 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11239 %} 11240 ins_pipe( pipe_slow ); 11241 %} 11242 11243 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11244 predicate(UseSSE>=2); 11245 match(Set dst (MoveD2L src)); 11246 effect(DEF dst, USE src, TEMP tmp); 11247 ins_cost(85); 11248 format %{ "MOVD $dst.lo,$src\n\t" 11249 "PSHUFLW $tmp,$src,0x4E\n\t" 11250 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11251 ins_encode %{ 11252 __ movdl($dst$$Register, $src$$XMMRegister); 11253 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11254 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11255 %} 11256 ins_pipe( pipe_slow ); 11257 %} 11258 11259 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11260 match(Set dst (MoveL2D src)); 11261 effect(DEF dst, USE src); 11262 11263 ins_cost(200); 11264 format %{ "MOV $dst,$src.lo\n\t" 11265 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11266 opcode(0x89, 0x89); 11267 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); 11268 ins_pipe( ialu_mem_long_reg ); 11269 %} 11270 11271 11272 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11273 predicate(UseSSE<=1); 11274 match(Set dst (MoveL2D src)); 11275 effect(DEF dst, USE src); 11276 ins_cost(125); 11277 11278 format %{ "FLD_D $src\n\t" 11279 "FSTP $dst\t# MoveL2D_stack_reg" %} 11280 opcode(0xDD); /* DD /0, FLD m64real */ 11281 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 11282 Pop_Reg_DPR(dst), ClearInstMark ); 11283 ins_pipe( fpu_reg_mem ); 11284 %} 11285 11286 11287 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11288 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11289 match(Set dst (MoveL2D src)); 11290 effect(DEF dst, USE src); 11291 11292 ins_cost(95); 11293 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11294 ins_encode %{ 11295 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11296 %} 11297 ins_pipe( pipe_slow ); 11298 %} 11299 11300 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11301 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11302 match(Set dst (MoveL2D src)); 11303 effect(DEF dst, USE src); 11304 11305 ins_cost(95); 11306 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11307 ins_encode %{ 11308 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11309 %} 11310 ins_pipe( pipe_slow ); 11311 %} 11312 11313 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11314 predicate(UseSSE>=2); 11315 match(Set dst (MoveL2D src)); 11316 effect(TEMP dst, USE src, TEMP tmp); 11317 ins_cost(85); 11318 format %{ "MOVD $dst,$src.lo\n\t" 11319 "MOVD $tmp,$src.hi\n\t" 11320 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11321 ins_encode %{ 11322 __ movdl($dst$$XMMRegister, $src$$Register); 11323 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11324 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11325 %} 11326 ins_pipe( pipe_slow ); 11327 %} 11328 11329 //----------------------------- CompressBits/ExpandBits ------------------------ 11330 11331 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11332 predicate(n->bottom_type()->isa_long()); 11333 match(Set dst (CompressBits src mask)); 11334 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11335 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11336 ins_encode %{ 11337 Label exit, partail_result; 11338 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11339 // Merge the results of upper and lower destination registers such that upper destination 11340 // results are contiguously laid out after the lower destination result. 11341 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11342 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11343 __ popcntl($rtmp$$Register, $mask$$Register); 11344 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11345 __ cmpl($rtmp$$Register, 32); 11346 __ jccb(Assembler::equal, exit); 11347 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11348 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11349 // Shift left the contents of upper destination register by true bit count of lower mask register 11350 // and merge with lower destination register. 11351 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11352 __ orl($dst$$Register, $rtmp$$Register); 11353 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11354 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11355 // since contents of upper destination have already been copied to lower destination 11356 // register. 11357 __ cmpl($rtmp$$Register, 0); 11358 __ jccb(Assembler::greater, partail_result); 11359 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11360 __ jmp(exit); 11361 __ bind(partail_result); 11362 // Perform right shift over upper destination register to move out bits already copied 11363 // to lower destination register. 11364 __ subl($rtmp$$Register, 32); 11365 __ negl($rtmp$$Register); 11366 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11367 __ bind(exit); 11368 %} 11369 ins_pipe( pipe_slow ); 11370 %} 11371 11372 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11373 predicate(n->bottom_type()->isa_long()); 11374 match(Set dst (ExpandBits src mask)); 11375 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11376 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11377 ins_encode %{ 11378 // Extraction operation sequentially reads the bits from source register starting from LSB 11379 // and lays them out into destination register at bit locations corresponding to true bits 11380 // in mask register. Thus number of source bits read are equal to combined true bit count 11381 // of mask register pair. 11382 Label exit, mask_clipping; 11383 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11384 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11385 __ popcntl($rtmp$$Register, $mask$$Register); 11386 // If true bit count of lower mask register is 32 then none of bit of lower source register 11387 // will feed to upper destination register. 11388 __ cmpl($rtmp$$Register, 32); 11389 __ jccb(Assembler::equal, exit); 11390 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11391 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11392 // Shift right the contents of lower source register to remove already consumed bits. 11393 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11394 // Extract the bits from lower source register starting from LSB under the influence 11395 // of upper mask register. 11396 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11397 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11398 __ subl($rtmp$$Register, 32); 11399 __ negl($rtmp$$Register); 11400 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11401 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11402 // Clear the set bits in upper mask register which have been used to extract the contents 11403 // from lower source register. 11404 __ bind(mask_clipping); 11405 __ blsrl($mask$$Register, $mask$$Register); 11406 __ decrementl($rtmp$$Register, 1); 11407 __ jccb(Assembler::greater, mask_clipping); 11408 // Starting from LSB extract the bits from upper source register under the influence of 11409 // remaining set bits in upper mask register. 11410 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11411 // Merge the partial results extracted from lower and upper source register bits. 11412 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11413 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11414 __ bind(exit); 11415 %} 11416 ins_pipe( pipe_slow ); 11417 %} 11418 11419 // ======================================================================= 11420 // Fast clearing of an array 11421 // Small non-constant length ClearArray for non-AVX512 targets. 11422 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11423 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11424 match(Set dummy (ClearArray cnt base)); 11425 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11426 11427 format %{ $$template 11428 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11429 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11430 $$emit$$"JG LARGE\n\t" 11431 $$emit$$"SHL ECX, 1\n\t" 11432 $$emit$$"DEC ECX\n\t" 11433 $$emit$$"JS DONE\t# Zero length\n\t" 11434 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11435 $$emit$$"DEC ECX\n\t" 11436 $$emit$$"JGE LOOP\n\t" 11437 $$emit$$"JMP DONE\n\t" 11438 $$emit$$"# LARGE:\n\t" 11439 if (UseFastStosb) { 11440 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11441 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11442 } else if (UseXMMForObjInit) { 11443 $$emit$$"MOV RDI,RAX\n\t" 11444 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11445 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11446 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11447 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11448 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11449 $$emit$$"ADD 0x40,RAX\n\t" 11450 $$emit$$"# L_zero_64_bytes:\n\t" 11451 $$emit$$"SUB 0x8,RCX\n\t" 11452 $$emit$$"JGE L_loop\n\t" 11453 $$emit$$"ADD 0x4,RCX\n\t" 11454 $$emit$$"JL L_tail\n\t" 11455 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11456 $$emit$$"ADD 0x20,RAX\n\t" 11457 $$emit$$"SUB 0x4,RCX\n\t" 11458 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11459 $$emit$$"ADD 0x4,RCX\n\t" 11460 $$emit$$"JLE L_end\n\t" 11461 $$emit$$"DEC RCX\n\t" 11462 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11463 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11464 $$emit$$"ADD 0x8,RAX\n\t" 11465 $$emit$$"DEC RCX\n\t" 11466 $$emit$$"JGE L_sloop\n\t" 11467 $$emit$$"# L_end:\n\t" 11468 } else { 11469 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11470 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11471 } 11472 $$emit$$"# DONE" 11473 %} 11474 ins_encode %{ 11475 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11476 $tmp$$XMMRegister, false, knoreg); 11477 %} 11478 ins_pipe( pipe_slow ); 11479 %} 11480 11481 // Small non-constant length ClearArray for AVX512 targets. 11482 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11483 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11484 match(Set dummy (ClearArray cnt base)); 11485 ins_cost(125); 11486 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11487 11488 format %{ $$template 11489 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11490 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11491 $$emit$$"JG LARGE\n\t" 11492 $$emit$$"SHL ECX, 1\n\t" 11493 $$emit$$"DEC ECX\n\t" 11494 $$emit$$"JS DONE\t# Zero length\n\t" 11495 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11496 $$emit$$"DEC ECX\n\t" 11497 $$emit$$"JGE LOOP\n\t" 11498 $$emit$$"JMP DONE\n\t" 11499 $$emit$$"# LARGE:\n\t" 11500 if (UseFastStosb) { 11501 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11502 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11503 } else if (UseXMMForObjInit) { 11504 $$emit$$"MOV RDI,RAX\n\t" 11505 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11506 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11507 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11508 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11509 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11510 $$emit$$"ADD 0x40,RAX\n\t" 11511 $$emit$$"# L_zero_64_bytes:\n\t" 11512 $$emit$$"SUB 0x8,RCX\n\t" 11513 $$emit$$"JGE L_loop\n\t" 11514 $$emit$$"ADD 0x4,RCX\n\t" 11515 $$emit$$"JL L_tail\n\t" 11516 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11517 $$emit$$"ADD 0x20,RAX\n\t" 11518 $$emit$$"SUB 0x4,RCX\n\t" 11519 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11520 $$emit$$"ADD 0x4,RCX\n\t" 11521 $$emit$$"JLE L_end\n\t" 11522 $$emit$$"DEC RCX\n\t" 11523 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11524 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11525 $$emit$$"ADD 0x8,RAX\n\t" 11526 $$emit$$"DEC RCX\n\t" 11527 $$emit$$"JGE L_sloop\n\t" 11528 $$emit$$"# L_end:\n\t" 11529 } else { 11530 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11531 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11532 } 11533 $$emit$$"# DONE" 11534 %} 11535 ins_encode %{ 11536 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11537 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11538 %} 11539 ins_pipe( pipe_slow ); 11540 %} 11541 11542 // Large non-constant length ClearArray for non-AVX512 targets. 11543 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11544 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11545 match(Set dummy (ClearArray cnt base)); 11546 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11547 format %{ $$template 11548 if (UseFastStosb) { 11549 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11550 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11551 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11552 } else if (UseXMMForObjInit) { 11553 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11554 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11555 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11556 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11557 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11558 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11559 $$emit$$"ADD 0x40,RAX\n\t" 11560 $$emit$$"# L_zero_64_bytes:\n\t" 11561 $$emit$$"SUB 0x8,RCX\n\t" 11562 $$emit$$"JGE L_loop\n\t" 11563 $$emit$$"ADD 0x4,RCX\n\t" 11564 $$emit$$"JL L_tail\n\t" 11565 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11566 $$emit$$"ADD 0x20,RAX\n\t" 11567 $$emit$$"SUB 0x4,RCX\n\t" 11568 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11569 $$emit$$"ADD 0x4,RCX\n\t" 11570 $$emit$$"JLE L_end\n\t" 11571 $$emit$$"DEC RCX\n\t" 11572 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11573 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11574 $$emit$$"ADD 0x8,RAX\n\t" 11575 $$emit$$"DEC RCX\n\t" 11576 $$emit$$"JGE L_sloop\n\t" 11577 $$emit$$"# L_end:\n\t" 11578 } else { 11579 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11580 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11581 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11582 } 11583 $$emit$$"# DONE" 11584 %} 11585 ins_encode %{ 11586 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11587 $tmp$$XMMRegister, true, knoreg); 11588 %} 11589 ins_pipe( pipe_slow ); 11590 %} 11591 11592 // Large non-constant length ClearArray for AVX512 targets. 11593 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11594 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11595 match(Set dummy (ClearArray cnt base)); 11596 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11597 format %{ $$template 11598 if (UseFastStosb) { 11599 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11600 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11601 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11602 } else if (UseXMMForObjInit) { 11603 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11604 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11605 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11606 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11607 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11608 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11609 $$emit$$"ADD 0x40,RAX\n\t" 11610 $$emit$$"# L_zero_64_bytes:\n\t" 11611 $$emit$$"SUB 0x8,RCX\n\t" 11612 $$emit$$"JGE L_loop\n\t" 11613 $$emit$$"ADD 0x4,RCX\n\t" 11614 $$emit$$"JL L_tail\n\t" 11615 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11616 $$emit$$"ADD 0x20,RAX\n\t" 11617 $$emit$$"SUB 0x4,RCX\n\t" 11618 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11619 $$emit$$"ADD 0x4,RCX\n\t" 11620 $$emit$$"JLE L_end\n\t" 11621 $$emit$$"DEC RCX\n\t" 11622 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11623 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11624 $$emit$$"ADD 0x8,RAX\n\t" 11625 $$emit$$"DEC RCX\n\t" 11626 $$emit$$"JGE L_sloop\n\t" 11627 $$emit$$"# L_end:\n\t" 11628 } else { 11629 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11630 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11631 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11632 } 11633 $$emit$$"# DONE" 11634 %} 11635 ins_encode %{ 11636 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11637 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11638 %} 11639 ins_pipe( pipe_slow ); 11640 %} 11641 11642 // Small constant length ClearArray for AVX512 targets. 11643 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11644 %{ 11645 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl()); 11646 match(Set dummy (ClearArray cnt base)); 11647 ins_cost(100); 11648 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11649 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11650 ins_encode %{ 11651 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11652 %} 11653 ins_pipe(pipe_slow); 11654 %} 11655 11656 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11657 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11658 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11659 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11660 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11661 11662 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11663 ins_encode %{ 11664 __ string_compare($str1$$Register, $str2$$Register, 11665 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11666 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11667 %} 11668 ins_pipe( pipe_slow ); 11669 %} 11670 11671 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11672 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11673 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11674 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11675 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11676 11677 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11678 ins_encode %{ 11679 __ string_compare($str1$$Register, $str2$$Register, 11680 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11681 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11682 %} 11683 ins_pipe( pipe_slow ); 11684 %} 11685 11686 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11687 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11688 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11689 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11690 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11691 11692 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11693 ins_encode %{ 11694 __ string_compare($str1$$Register, $str2$$Register, 11695 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11696 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11697 %} 11698 ins_pipe( pipe_slow ); 11699 %} 11700 11701 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11702 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11703 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11704 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11705 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11706 11707 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11708 ins_encode %{ 11709 __ string_compare($str1$$Register, $str2$$Register, 11710 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11711 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11712 %} 11713 ins_pipe( pipe_slow ); 11714 %} 11715 11716 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11717 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11718 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11719 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11720 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11721 11722 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11723 ins_encode %{ 11724 __ string_compare($str1$$Register, $str2$$Register, 11725 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11726 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11727 %} 11728 ins_pipe( pipe_slow ); 11729 %} 11730 11731 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11732 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11733 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11734 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11735 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11736 11737 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11738 ins_encode %{ 11739 __ string_compare($str1$$Register, $str2$$Register, 11740 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11741 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11742 %} 11743 ins_pipe( pipe_slow ); 11744 %} 11745 11746 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11747 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11748 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11749 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11750 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11751 11752 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11753 ins_encode %{ 11754 __ string_compare($str2$$Register, $str1$$Register, 11755 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11756 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11757 %} 11758 ins_pipe( pipe_slow ); 11759 %} 11760 11761 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11762 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11763 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11764 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11765 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11766 11767 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11768 ins_encode %{ 11769 __ string_compare($str2$$Register, $str1$$Register, 11770 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11771 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11772 %} 11773 ins_pipe( pipe_slow ); 11774 %} 11775 11776 // fast string equals 11777 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11778 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11779 predicate(!VM_Version::supports_avx512vlbw()); 11780 match(Set result (StrEquals (Binary str1 str2) cnt)); 11781 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11782 11783 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11784 ins_encode %{ 11785 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11786 $cnt$$Register, $result$$Register, $tmp3$$Register, 11787 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11788 %} 11789 11790 ins_pipe( pipe_slow ); 11791 %} 11792 11793 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11794 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11795 predicate(VM_Version::supports_avx512vlbw()); 11796 match(Set result (StrEquals (Binary str1 str2) cnt)); 11797 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11798 11799 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11800 ins_encode %{ 11801 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11802 $cnt$$Register, $result$$Register, $tmp3$$Register, 11803 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11804 %} 11805 11806 ins_pipe( pipe_slow ); 11807 %} 11808 11809 11810 // fast search of substring with known size. 11811 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11812 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11813 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11814 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11815 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11816 11817 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11818 ins_encode %{ 11819 int icnt2 = (int)$int_cnt2$$constant; 11820 if (icnt2 >= 16) { 11821 // IndexOf for constant substrings with size >= 16 elements 11822 // which don't need to be loaded through stack. 11823 __ string_indexofC8($str1$$Register, $str2$$Register, 11824 $cnt1$$Register, $cnt2$$Register, 11825 icnt2, $result$$Register, 11826 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11827 } else { 11828 // Small strings are loaded through stack if they cross page boundary. 11829 __ string_indexof($str1$$Register, $str2$$Register, 11830 $cnt1$$Register, $cnt2$$Register, 11831 icnt2, $result$$Register, 11832 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11833 } 11834 %} 11835 ins_pipe( pipe_slow ); 11836 %} 11837 11838 // fast search of substring with known size. 11839 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11840 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11841 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11842 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11843 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11844 11845 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11846 ins_encode %{ 11847 int icnt2 = (int)$int_cnt2$$constant; 11848 if (icnt2 >= 8) { 11849 // IndexOf for constant substrings with size >= 8 elements 11850 // which don't need to be loaded through stack. 11851 __ string_indexofC8($str1$$Register, $str2$$Register, 11852 $cnt1$$Register, $cnt2$$Register, 11853 icnt2, $result$$Register, 11854 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11855 } else { 11856 // Small strings are loaded through stack if they cross page boundary. 11857 __ string_indexof($str1$$Register, $str2$$Register, 11858 $cnt1$$Register, $cnt2$$Register, 11859 icnt2, $result$$Register, 11860 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11861 } 11862 %} 11863 ins_pipe( pipe_slow ); 11864 %} 11865 11866 // fast search of substring with known size. 11867 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11868 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11869 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11870 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11871 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11872 11873 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11874 ins_encode %{ 11875 int icnt2 = (int)$int_cnt2$$constant; 11876 if (icnt2 >= 8) { 11877 // IndexOf for constant substrings with size >= 8 elements 11878 // which don't need to be loaded through stack. 11879 __ string_indexofC8($str1$$Register, $str2$$Register, 11880 $cnt1$$Register, $cnt2$$Register, 11881 icnt2, $result$$Register, 11882 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11883 } else { 11884 // Small strings are loaded through stack if they cross page boundary. 11885 __ string_indexof($str1$$Register, $str2$$Register, 11886 $cnt1$$Register, $cnt2$$Register, 11887 icnt2, $result$$Register, 11888 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11889 } 11890 %} 11891 ins_pipe( pipe_slow ); 11892 %} 11893 11894 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11895 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11896 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11897 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11898 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11899 11900 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11901 ins_encode %{ 11902 __ string_indexof($str1$$Register, $str2$$Register, 11903 $cnt1$$Register, $cnt2$$Register, 11904 (-1), $result$$Register, 11905 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11906 %} 11907 ins_pipe( pipe_slow ); 11908 %} 11909 11910 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11911 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11912 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11913 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11914 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11915 11916 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11917 ins_encode %{ 11918 __ string_indexof($str1$$Register, $str2$$Register, 11919 $cnt1$$Register, $cnt2$$Register, 11920 (-1), $result$$Register, 11921 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11922 %} 11923 ins_pipe( pipe_slow ); 11924 %} 11925 11926 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11927 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11928 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11929 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11930 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11931 11932 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11933 ins_encode %{ 11934 __ string_indexof($str1$$Register, $str2$$Register, 11935 $cnt1$$Register, $cnt2$$Register, 11936 (-1), $result$$Register, 11937 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11938 %} 11939 ins_pipe( pipe_slow ); 11940 %} 11941 11942 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11943 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11944 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 11945 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11946 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11947 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11948 ins_encode %{ 11949 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11950 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11951 %} 11952 ins_pipe( pipe_slow ); 11953 %} 11954 11955 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11956 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11957 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 11958 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11959 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11960 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11961 ins_encode %{ 11962 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11963 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11964 %} 11965 ins_pipe( pipe_slow ); 11966 %} 11967 11968 11969 // fast array equals 11970 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11971 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11972 %{ 11973 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11974 match(Set result (AryEq ary1 ary2)); 11975 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11976 //ins_cost(300); 11977 11978 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11979 ins_encode %{ 11980 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11981 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11982 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11983 %} 11984 ins_pipe( pipe_slow ); 11985 %} 11986 11987 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11988 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11989 %{ 11990 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11991 match(Set result (AryEq ary1 ary2)); 11992 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11993 //ins_cost(300); 11994 11995 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11996 ins_encode %{ 11997 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11998 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11999 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12000 %} 12001 ins_pipe( pipe_slow ); 12002 %} 12003 12004 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12005 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12006 %{ 12007 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12008 match(Set result (AryEq ary1 ary2)); 12009 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12010 //ins_cost(300); 12011 12012 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12013 ins_encode %{ 12014 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12015 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12016 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12017 %} 12018 ins_pipe( pipe_slow ); 12019 %} 12020 12021 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12022 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12023 %{ 12024 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12025 match(Set result (AryEq ary1 ary2)); 12026 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12027 //ins_cost(300); 12028 12029 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12030 ins_encode %{ 12031 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12032 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12033 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12034 %} 12035 ins_pipe( pipe_slow ); 12036 %} 12037 12038 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12039 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12040 %{ 12041 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12042 match(Set result (CountPositives ary1 len)); 12043 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12044 12045 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12046 ins_encode %{ 12047 __ count_positives($ary1$$Register, $len$$Register, 12048 $result$$Register, $tmp3$$Register, 12049 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12050 %} 12051 ins_pipe( pipe_slow ); 12052 %} 12053 12054 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12055 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12056 %{ 12057 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12058 match(Set result (CountPositives ary1 len)); 12059 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12060 12061 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12062 ins_encode %{ 12063 __ count_positives($ary1$$Register, $len$$Register, 12064 $result$$Register, $tmp3$$Register, 12065 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12066 %} 12067 ins_pipe( pipe_slow ); 12068 %} 12069 12070 12071 // fast char[] to byte[] compression 12072 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12073 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12074 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12075 match(Set result (StrCompressedCopy src (Binary dst len))); 12076 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12077 12078 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12079 ins_encode %{ 12080 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12081 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12082 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12083 knoreg, knoreg); 12084 %} 12085 ins_pipe( pipe_slow ); 12086 %} 12087 12088 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12089 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12090 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12091 match(Set result (StrCompressedCopy src (Binary dst len))); 12092 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12093 12094 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12095 ins_encode %{ 12096 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12097 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12098 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12099 $ktmp1$$KRegister, $ktmp2$$KRegister); 12100 %} 12101 ins_pipe( pipe_slow ); 12102 %} 12103 12104 // fast byte[] to char[] inflation 12105 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12106 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12107 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12108 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12109 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12110 12111 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12112 ins_encode %{ 12113 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12114 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12115 %} 12116 ins_pipe( pipe_slow ); 12117 %} 12118 12119 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12120 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12121 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12122 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12123 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12124 12125 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12126 ins_encode %{ 12127 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12128 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12129 %} 12130 ins_pipe( pipe_slow ); 12131 %} 12132 12133 // encode char[] to byte[] in ISO_8859_1 12134 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12135 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12136 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12137 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12138 match(Set result (EncodeISOArray src (Binary dst len))); 12139 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12140 12141 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12142 ins_encode %{ 12143 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12144 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12145 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12146 %} 12147 ins_pipe( pipe_slow ); 12148 %} 12149 12150 // encode char[] to byte[] in ASCII 12151 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12152 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12153 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12154 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12155 match(Set result (EncodeISOArray src (Binary dst len))); 12156 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12157 12158 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12159 ins_encode %{ 12160 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12161 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12162 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12163 %} 12164 ins_pipe( pipe_slow ); 12165 %} 12166 12167 //----------Control Flow Instructions------------------------------------------ 12168 // Signed compare Instructions 12169 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12170 match(Set cr (CmpI op1 op2)); 12171 effect( DEF cr, USE op1, USE op2 ); 12172 format %{ "CMP $op1,$op2" %} 12173 opcode(0x3B); /* Opcode 3B /r */ 12174 ins_encode( OpcP, RegReg( op1, op2) ); 12175 ins_pipe( ialu_cr_reg_reg ); 12176 %} 12177 12178 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12179 match(Set cr (CmpI op1 op2)); 12180 effect( DEF cr, USE op1 ); 12181 format %{ "CMP $op1,$op2" %} 12182 opcode(0x81,0x07); /* Opcode 81 /7 */ 12183 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12184 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12185 ins_pipe( ialu_cr_reg_imm ); 12186 %} 12187 12188 // Cisc-spilled version of cmpI_eReg 12189 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12190 match(Set cr (CmpI op1 (LoadI op2))); 12191 12192 format %{ "CMP $op1,$op2" %} 12193 ins_cost(500); 12194 opcode(0x3B); /* Opcode 3B /r */ 12195 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12196 ins_pipe( ialu_cr_reg_mem ); 12197 %} 12198 12199 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12200 match(Set cr (CmpI src zero)); 12201 effect( DEF cr, USE src ); 12202 12203 format %{ "TEST $src,$src" %} 12204 opcode(0x85); 12205 ins_encode( OpcP, RegReg( src, src ) ); 12206 ins_pipe( ialu_cr_reg_imm ); 12207 %} 12208 12209 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12210 match(Set cr (CmpI (AndI src con) zero)); 12211 12212 format %{ "TEST $src,$con" %} 12213 opcode(0xF7,0x00); 12214 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12215 ins_pipe( ialu_cr_reg_imm ); 12216 %} 12217 12218 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12219 match(Set cr (CmpI (AndI src mem) zero)); 12220 12221 format %{ "TEST $src,$mem" %} 12222 opcode(0x85); 12223 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 12224 ins_pipe( ialu_cr_reg_mem ); 12225 %} 12226 12227 // Unsigned compare Instructions; really, same as signed except they 12228 // produce an eFlagsRegU instead of eFlagsReg. 12229 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12230 match(Set cr (CmpU op1 op2)); 12231 12232 format %{ "CMPu $op1,$op2" %} 12233 opcode(0x3B); /* Opcode 3B /r */ 12234 ins_encode( OpcP, RegReg( op1, op2) ); 12235 ins_pipe( ialu_cr_reg_reg ); 12236 %} 12237 12238 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12239 match(Set cr (CmpU op1 op2)); 12240 12241 format %{ "CMPu $op1,$op2" %} 12242 opcode(0x81,0x07); /* Opcode 81 /7 */ 12243 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12244 ins_pipe( ialu_cr_reg_imm ); 12245 %} 12246 12247 // // Cisc-spilled version of cmpU_eReg 12248 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12249 match(Set cr (CmpU op1 (LoadI op2))); 12250 12251 format %{ "CMPu $op1,$op2" %} 12252 ins_cost(500); 12253 opcode(0x3B); /* Opcode 3B /r */ 12254 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12255 ins_pipe( ialu_cr_reg_mem ); 12256 %} 12257 12258 // // Cisc-spilled version of cmpU_eReg 12259 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12260 // match(Set cr (CmpU (LoadI op1) op2)); 12261 // 12262 // format %{ "CMPu $op1,$op2" %} 12263 // ins_cost(500); 12264 // opcode(0x39); /* Opcode 39 /r */ 12265 // ins_encode( OpcP, RegMem( op1, op2) ); 12266 //%} 12267 12268 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12269 match(Set cr (CmpU src zero)); 12270 12271 format %{ "TESTu $src,$src" %} 12272 opcode(0x85); 12273 ins_encode( OpcP, RegReg( src, src ) ); 12274 ins_pipe( ialu_cr_reg_imm ); 12275 %} 12276 12277 // Unsigned pointer compare Instructions 12278 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12279 match(Set cr (CmpP op1 op2)); 12280 12281 format %{ "CMPu $op1,$op2" %} 12282 opcode(0x3B); /* Opcode 3B /r */ 12283 ins_encode( OpcP, RegReg( op1, op2) ); 12284 ins_pipe( ialu_cr_reg_reg ); 12285 %} 12286 12287 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12288 match(Set cr (CmpP op1 op2)); 12289 12290 format %{ "CMPu $op1,$op2" %} 12291 opcode(0x81,0x07); /* Opcode 81 /7 */ 12292 ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark ); 12293 ins_pipe( ialu_cr_reg_imm ); 12294 %} 12295 12296 // // Cisc-spilled version of cmpP_eReg 12297 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12298 match(Set cr (CmpP op1 (LoadP op2))); 12299 12300 format %{ "CMPu $op1,$op2" %} 12301 ins_cost(500); 12302 opcode(0x3B); /* Opcode 3B /r */ 12303 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12304 ins_pipe( ialu_cr_reg_mem ); 12305 %} 12306 12307 // // Cisc-spilled version of cmpP_eReg 12308 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12309 // match(Set cr (CmpP (LoadP op1) op2)); 12310 // 12311 // format %{ "CMPu $op1,$op2" %} 12312 // ins_cost(500); 12313 // opcode(0x39); /* Opcode 39 /r */ 12314 // ins_encode( OpcP, RegMem( op1, op2) ); 12315 //%} 12316 12317 // Compare raw pointer (used in out-of-heap check). 12318 // Only works because non-oop pointers must be raw pointers 12319 // and raw pointers have no anti-dependencies. 12320 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12321 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12322 match(Set cr (CmpP op1 (LoadP op2))); 12323 12324 format %{ "CMPu $op1,$op2" %} 12325 opcode(0x3B); /* Opcode 3B /r */ 12326 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12327 ins_pipe( ialu_cr_reg_mem ); 12328 %} 12329 12330 // 12331 // This will generate a signed flags result. This should be ok 12332 // since any compare to a zero should be eq/neq. 12333 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12334 match(Set cr (CmpP src zero)); 12335 12336 format %{ "TEST $src,$src" %} 12337 opcode(0x85); 12338 ins_encode( OpcP, RegReg( src, src ) ); 12339 ins_pipe( ialu_cr_reg_imm ); 12340 %} 12341 12342 // Cisc-spilled version of testP_reg 12343 // This will generate a signed flags result. This should be ok 12344 // since any compare to a zero should be eq/neq. 12345 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12346 match(Set cr (CmpP (LoadP op) zero)); 12347 12348 format %{ "TEST $op,0xFFFFFFFF" %} 12349 ins_cost(500); 12350 opcode(0xF7); /* Opcode F7 /0 */ 12351 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark ); 12352 ins_pipe( ialu_cr_reg_imm ); 12353 %} 12354 12355 // Yanked all unsigned pointer compare operations. 12356 // Pointer compares are done with CmpP which is already unsigned. 12357 12358 //----------Max and Min-------------------------------------------------------- 12359 // Min Instructions 12360 //// 12361 // *** Min and Max using the conditional move are slower than the 12362 // *** branch version on a Pentium III. 12363 // // Conditional move for min 12364 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12365 // effect( USE_DEF op2, USE op1, USE cr ); 12366 // format %{ "CMOVlt $op2,$op1\t! min" %} 12367 // opcode(0x4C,0x0F); 12368 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12369 // ins_pipe( pipe_cmov_reg ); 12370 //%} 12371 // 12372 //// Min Register with Register (P6 version) 12373 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12374 // predicate(VM_Version::supports_cmov() ); 12375 // match(Set op2 (MinI op1 op2)); 12376 // ins_cost(200); 12377 // expand %{ 12378 // eFlagsReg cr; 12379 // compI_eReg(cr,op1,op2); 12380 // cmovI_reg_lt(op2,op1,cr); 12381 // %} 12382 //%} 12383 12384 // Min Register with Register (generic version) 12385 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12386 match(Set dst (MinI dst src)); 12387 effect(KILL flags); 12388 ins_cost(300); 12389 12390 format %{ "MIN $dst,$src" %} 12391 opcode(0xCC); 12392 ins_encode( min_enc(dst,src) ); 12393 ins_pipe( pipe_slow ); 12394 %} 12395 12396 // Max Register with Register 12397 // *** Min and Max using the conditional move are slower than the 12398 // *** branch version on a Pentium III. 12399 // // Conditional move for max 12400 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12401 // effect( USE_DEF op2, USE op1, USE cr ); 12402 // format %{ "CMOVgt $op2,$op1\t! max" %} 12403 // opcode(0x4F,0x0F); 12404 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12405 // ins_pipe( pipe_cmov_reg ); 12406 //%} 12407 // 12408 // // Max Register with Register (P6 version) 12409 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12410 // predicate(VM_Version::supports_cmov() ); 12411 // match(Set op2 (MaxI op1 op2)); 12412 // ins_cost(200); 12413 // expand %{ 12414 // eFlagsReg cr; 12415 // compI_eReg(cr,op1,op2); 12416 // cmovI_reg_gt(op2,op1,cr); 12417 // %} 12418 //%} 12419 12420 // Max Register with Register (generic version) 12421 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12422 match(Set dst (MaxI dst src)); 12423 effect(KILL flags); 12424 ins_cost(300); 12425 12426 format %{ "MAX $dst,$src" %} 12427 opcode(0xCC); 12428 ins_encode( max_enc(dst,src) ); 12429 ins_pipe( pipe_slow ); 12430 %} 12431 12432 // ============================================================================ 12433 // Counted Loop limit node which represents exact final iterator value. 12434 // Note: the resulting value should fit into integer range since 12435 // counted loops have limit check on overflow. 12436 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12437 match(Set limit (LoopLimit (Binary init limit) stride)); 12438 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12439 ins_cost(300); 12440 12441 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12442 ins_encode %{ 12443 int strd = (int)$stride$$constant; 12444 assert(strd != 1 && strd != -1, "sanity"); 12445 int m1 = (strd > 0) ? 1 : -1; 12446 // Convert limit to long (EAX:EDX) 12447 __ cdql(); 12448 // Convert init to long (init:tmp) 12449 __ movl($tmp$$Register, $init$$Register); 12450 __ sarl($tmp$$Register, 31); 12451 // $limit - $init 12452 __ subl($limit$$Register, $init$$Register); 12453 __ sbbl($limit_hi$$Register, $tmp$$Register); 12454 // + ($stride - 1) 12455 if (strd > 0) { 12456 __ addl($limit$$Register, (strd - 1)); 12457 __ adcl($limit_hi$$Register, 0); 12458 __ movl($tmp$$Register, strd); 12459 } else { 12460 __ addl($limit$$Register, (strd + 1)); 12461 __ adcl($limit_hi$$Register, -1); 12462 __ lneg($limit_hi$$Register, $limit$$Register); 12463 __ movl($tmp$$Register, -strd); 12464 } 12465 // signed division: (EAX:EDX) / pos_stride 12466 __ idivl($tmp$$Register); 12467 if (strd < 0) { 12468 // restore sign 12469 __ negl($tmp$$Register); 12470 } 12471 // (EAX) * stride 12472 __ mull($tmp$$Register); 12473 // + init (ignore upper bits) 12474 __ addl($limit$$Register, $init$$Register); 12475 %} 12476 ins_pipe( pipe_slow ); 12477 %} 12478 12479 // ============================================================================ 12480 // Branch Instructions 12481 // Jump Table 12482 instruct jumpXtnd(rRegI switch_val) %{ 12483 match(Jump switch_val); 12484 ins_cost(350); 12485 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12486 ins_encode %{ 12487 // Jump to Address(table_base + switch_reg) 12488 Address index(noreg, $switch_val$$Register, Address::times_1); 12489 __ jump(ArrayAddress($constantaddress, index), noreg); 12490 %} 12491 ins_pipe(pipe_jmp); 12492 %} 12493 12494 // Jump Direct - Label defines a relative address from JMP+1 12495 instruct jmpDir(label labl) %{ 12496 match(Goto); 12497 effect(USE labl); 12498 12499 ins_cost(300); 12500 format %{ "JMP $labl" %} 12501 size(5); 12502 ins_encode %{ 12503 Label* L = $labl$$label; 12504 __ jmp(*L, false); // Always long jump 12505 %} 12506 ins_pipe( pipe_jmp ); 12507 %} 12508 12509 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12510 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12511 match(If cop cr); 12512 effect(USE labl); 12513 12514 ins_cost(300); 12515 format %{ "J$cop $labl" %} 12516 size(6); 12517 ins_encode %{ 12518 Label* L = $labl$$label; 12519 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12520 %} 12521 ins_pipe( pipe_jcc ); 12522 %} 12523 12524 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12525 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12526 match(CountedLoopEnd cop cr); 12527 effect(USE labl); 12528 12529 ins_cost(300); 12530 format %{ "J$cop $labl\t# Loop end" %} 12531 size(6); 12532 ins_encode %{ 12533 Label* L = $labl$$label; 12534 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12535 %} 12536 ins_pipe( pipe_jcc ); 12537 %} 12538 12539 // Jump Direct Conditional - using unsigned comparison 12540 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12541 match(If cop cmp); 12542 effect(USE labl); 12543 12544 ins_cost(300); 12545 format %{ "J$cop,u $labl" %} 12546 size(6); 12547 ins_encode %{ 12548 Label* L = $labl$$label; 12549 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12550 %} 12551 ins_pipe(pipe_jcc); 12552 %} 12553 12554 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12555 match(If cop cmp); 12556 effect(USE labl); 12557 12558 ins_cost(200); 12559 format %{ "J$cop,u $labl" %} 12560 size(6); 12561 ins_encode %{ 12562 Label* L = $labl$$label; 12563 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12564 %} 12565 ins_pipe(pipe_jcc); 12566 %} 12567 12568 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12569 match(If cop cmp); 12570 effect(USE labl); 12571 12572 ins_cost(200); 12573 format %{ $$template 12574 if ($cop$$cmpcode == Assembler::notEqual) { 12575 $$emit$$"JP,u $labl\n\t" 12576 $$emit$$"J$cop,u $labl" 12577 } else { 12578 $$emit$$"JP,u done\n\t" 12579 $$emit$$"J$cop,u $labl\n\t" 12580 $$emit$$"done:" 12581 } 12582 %} 12583 ins_encode %{ 12584 Label* l = $labl$$label; 12585 if ($cop$$cmpcode == Assembler::notEqual) { 12586 __ jcc(Assembler::parity, *l, false); 12587 __ jcc(Assembler::notEqual, *l, false); 12588 } else if ($cop$$cmpcode == Assembler::equal) { 12589 Label done; 12590 __ jccb(Assembler::parity, done); 12591 __ jcc(Assembler::equal, *l, false); 12592 __ bind(done); 12593 } else { 12594 ShouldNotReachHere(); 12595 } 12596 %} 12597 ins_pipe(pipe_jcc); 12598 %} 12599 12600 // ============================================================================ 12601 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12602 // array for an instance of the superklass. Set a hidden internal cache on a 12603 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12604 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12605 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12606 match(Set result (PartialSubtypeCheck sub super)); 12607 effect( KILL rcx, KILL cr ); 12608 12609 ins_cost(1100); // slightly larger than the next version 12610 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12611 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12612 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12613 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12614 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12615 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12616 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12617 "miss:\t" %} 12618 12619 opcode(0x1); // Force a XOR of EDI 12620 ins_encode( enc_PartialSubtypeCheck() ); 12621 ins_pipe( pipe_slow ); 12622 %} 12623 12624 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12625 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12626 effect( KILL rcx, KILL result ); 12627 12628 ins_cost(1000); 12629 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12630 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12631 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12632 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12633 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12634 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12635 "miss:\t" %} 12636 12637 opcode(0x0); // No need to XOR EDI 12638 ins_encode( enc_PartialSubtypeCheck() ); 12639 ins_pipe( pipe_slow ); 12640 %} 12641 12642 // ============================================================================ 12643 // Branch Instructions -- short offset versions 12644 // 12645 // These instructions are used to replace jumps of a long offset (the default 12646 // match) with jumps of a shorter offset. These instructions are all tagged 12647 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12648 // match rules in general matching. Instead, the ADLC generates a conversion 12649 // method in the MachNode which can be used to do in-place replacement of the 12650 // long variant with the shorter variant. The compiler will determine if a 12651 // branch can be taken by the is_short_branch_offset() predicate in the machine 12652 // specific code section of the file. 12653 12654 // Jump Direct - Label defines a relative address from JMP+1 12655 instruct jmpDir_short(label labl) %{ 12656 match(Goto); 12657 effect(USE labl); 12658 12659 ins_cost(300); 12660 format %{ "JMP,s $labl" %} 12661 size(2); 12662 ins_encode %{ 12663 Label* L = $labl$$label; 12664 __ jmpb(*L); 12665 %} 12666 ins_pipe( pipe_jmp ); 12667 ins_short_branch(1); 12668 %} 12669 12670 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12671 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12672 match(If cop cr); 12673 effect(USE labl); 12674 12675 ins_cost(300); 12676 format %{ "J$cop,s $labl" %} 12677 size(2); 12678 ins_encode %{ 12679 Label* L = $labl$$label; 12680 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12681 %} 12682 ins_pipe( pipe_jcc ); 12683 ins_short_branch(1); 12684 %} 12685 12686 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12687 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12688 match(CountedLoopEnd cop cr); 12689 effect(USE labl); 12690 12691 ins_cost(300); 12692 format %{ "J$cop,s $labl\t# Loop end" %} 12693 size(2); 12694 ins_encode %{ 12695 Label* L = $labl$$label; 12696 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12697 %} 12698 ins_pipe( pipe_jcc ); 12699 ins_short_branch(1); 12700 %} 12701 12702 // Jump Direct Conditional - using unsigned comparison 12703 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12704 match(If cop cmp); 12705 effect(USE labl); 12706 12707 ins_cost(300); 12708 format %{ "J$cop,us $labl" %} 12709 size(2); 12710 ins_encode %{ 12711 Label* L = $labl$$label; 12712 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12713 %} 12714 ins_pipe( pipe_jcc ); 12715 ins_short_branch(1); 12716 %} 12717 12718 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12719 match(If cop cmp); 12720 effect(USE labl); 12721 12722 ins_cost(300); 12723 format %{ "J$cop,us $labl" %} 12724 size(2); 12725 ins_encode %{ 12726 Label* L = $labl$$label; 12727 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12728 %} 12729 ins_pipe( pipe_jcc ); 12730 ins_short_branch(1); 12731 %} 12732 12733 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12734 match(If cop cmp); 12735 effect(USE labl); 12736 12737 ins_cost(300); 12738 format %{ $$template 12739 if ($cop$$cmpcode == Assembler::notEqual) { 12740 $$emit$$"JP,u,s $labl\n\t" 12741 $$emit$$"J$cop,u,s $labl" 12742 } else { 12743 $$emit$$"JP,u,s done\n\t" 12744 $$emit$$"J$cop,u,s $labl\n\t" 12745 $$emit$$"done:" 12746 } 12747 %} 12748 size(4); 12749 ins_encode %{ 12750 Label* l = $labl$$label; 12751 if ($cop$$cmpcode == Assembler::notEqual) { 12752 __ jccb(Assembler::parity, *l); 12753 __ jccb(Assembler::notEqual, *l); 12754 } else if ($cop$$cmpcode == Assembler::equal) { 12755 Label done; 12756 __ jccb(Assembler::parity, done); 12757 __ jccb(Assembler::equal, *l); 12758 __ bind(done); 12759 } else { 12760 ShouldNotReachHere(); 12761 } 12762 %} 12763 ins_pipe(pipe_jcc); 12764 ins_short_branch(1); 12765 %} 12766 12767 // ============================================================================ 12768 // Long Compare 12769 // 12770 // Currently we hold longs in 2 registers. Comparing such values efficiently 12771 // is tricky. The flavor of compare used depends on whether we are testing 12772 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12773 // The GE test is the negated LT test. The LE test can be had by commuting 12774 // the operands (yielding a GE test) and then negating; negate again for the 12775 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12776 // NE test is negated from that. 12777 12778 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12779 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12780 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12781 // are collapsed internally in the ADLC's dfa-gen code. The match for 12782 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12783 // foo match ends up with the wrong leaf. One fix is to not match both 12784 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12785 // both forms beat the trinary form of long-compare and both are very useful 12786 // on Intel which has so few registers. 12787 12788 // Manifest a CmpL result in an integer register. Very painful. 12789 // This is the test to avoid. 12790 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12791 match(Set dst (CmpL3 src1 src2)); 12792 effect( KILL flags ); 12793 ins_cost(1000); 12794 format %{ "XOR $dst,$dst\n\t" 12795 "CMP $src1.hi,$src2.hi\n\t" 12796 "JLT,s m_one\n\t" 12797 "JGT,s p_one\n\t" 12798 "CMP $src1.lo,$src2.lo\n\t" 12799 "JB,s m_one\n\t" 12800 "JEQ,s done\n" 12801 "p_one:\tINC $dst\n\t" 12802 "JMP,s done\n" 12803 "m_one:\tDEC $dst\n" 12804 "done:" %} 12805 ins_encode %{ 12806 Label p_one, m_one, done; 12807 __ xorptr($dst$$Register, $dst$$Register); 12808 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12809 __ jccb(Assembler::less, m_one); 12810 __ jccb(Assembler::greater, p_one); 12811 __ cmpl($src1$$Register, $src2$$Register); 12812 __ jccb(Assembler::below, m_one); 12813 __ jccb(Assembler::equal, done); 12814 __ bind(p_one); 12815 __ incrementl($dst$$Register); 12816 __ jmpb(done); 12817 __ bind(m_one); 12818 __ decrementl($dst$$Register); 12819 __ bind(done); 12820 %} 12821 ins_pipe( pipe_slow ); 12822 %} 12823 12824 //====== 12825 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12826 // compares. Can be used for LE or GT compares by reversing arguments. 12827 // NOT GOOD FOR EQ/NE tests. 12828 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12829 match( Set flags (CmpL src zero )); 12830 ins_cost(100); 12831 format %{ "TEST $src.hi,$src.hi" %} 12832 opcode(0x85); 12833 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12834 ins_pipe( ialu_cr_reg_reg ); 12835 %} 12836 12837 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12838 // compares. Can be used for LE or GT compares by reversing arguments. 12839 // NOT GOOD FOR EQ/NE tests. 12840 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12841 match( Set flags (CmpL src1 src2 )); 12842 effect( TEMP tmp ); 12843 ins_cost(300); 12844 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12845 "MOV $tmp,$src1.hi\n\t" 12846 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12847 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12848 ins_pipe( ialu_cr_reg_reg ); 12849 %} 12850 12851 // Long compares reg < zero/req OR reg >= zero/req. 12852 // Just a wrapper for a normal branch, plus the predicate test. 12853 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12854 match(If cmp flags); 12855 effect(USE labl); 12856 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12857 expand %{ 12858 jmpCon(cmp,flags,labl); // JLT or JGE... 12859 %} 12860 %} 12861 12862 //====== 12863 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12864 // compares. Can be used for LE or GT compares by reversing arguments. 12865 // NOT GOOD FOR EQ/NE tests. 12866 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12867 match(Set flags (CmpUL src zero)); 12868 ins_cost(100); 12869 format %{ "TEST $src.hi,$src.hi" %} 12870 opcode(0x85); 12871 ins_encode(OpcP, RegReg_Hi2(src, src)); 12872 ins_pipe(ialu_cr_reg_reg); 12873 %} 12874 12875 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12876 // compares. Can be used for LE or GT compares by reversing arguments. 12877 // NOT GOOD FOR EQ/NE tests. 12878 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12879 match(Set flags (CmpUL src1 src2)); 12880 effect(TEMP tmp); 12881 ins_cost(300); 12882 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12883 "MOV $tmp,$src1.hi\n\t" 12884 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12885 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12886 ins_pipe(ialu_cr_reg_reg); 12887 %} 12888 12889 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12890 // Just a wrapper for a normal branch, plus the predicate test. 12891 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12892 match(If cmp flags); 12893 effect(USE labl); 12894 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12895 expand %{ 12896 jmpCon(cmp, flags, labl); // JLT or JGE... 12897 %} 12898 %} 12899 12900 // Compare 2 longs and CMOVE longs. 12901 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12902 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12903 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12904 ins_cost(400); 12905 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12906 "CMOV$cmp $dst.hi,$src.hi" %} 12907 opcode(0x0F,0x40); 12908 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12909 ins_pipe( pipe_cmov_reg_long ); 12910 %} 12911 12912 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12913 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12914 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12915 ins_cost(500); 12916 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12917 "CMOV$cmp $dst.hi,$src.hi" %} 12918 opcode(0x0F,0x40); 12919 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 12920 ins_pipe( pipe_cmov_reg_long ); 12921 %} 12922 12923 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 12924 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12925 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12926 ins_cost(400); 12927 expand %{ 12928 cmovLL_reg_LTGE(cmp, flags, dst, src); 12929 %} 12930 %} 12931 12932 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 12933 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12934 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12935 ins_cost(500); 12936 expand %{ 12937 cmovLL_mem_LTGE(cmp, flags, dst, src); 12938 %} 12939 %} 12940 12941 // Compare 2 longs and CMOVE ints. 12942 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12943 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12944 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12945 ins_cost(200); 12946 format %{ "CMOV$cmp $dst,$src" %} 12947 opcode(0x0F,0x40); 12948 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12949 ins_pipe( pipe_cmov_reg ); 12950 %} 12951 12952 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12953 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12954 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12955 ins_cost(250); 12956 format %{ "CMOV$cmp $dst,$src" %} 12957 opcode(0x0F,0x40); 12958 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 12959 ins_pipe( pipe_cmov_mem ); 12960 %} 12961 12962 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 12963 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12964 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12965 ins_cost(200); 12966 expand %{ 12967 cmovII_reg_LTGE(cmp, flags, dst, src); 12968 %} 12969 %} 12970 12971 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 12972 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12973 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12974 ins_cost(250); 12975 expand %{ 12976 cmovII_mem_LTGE(cmp, flags, dst, src); 12977 %} 12978 %} 12979 12980 // Compare 2 longs and CMOVE ptrs. 12981 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12982 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12983 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12984 ins_cost(200); 12985 format %{ "CMOV$cmp $dst,$src" %} 12986 opcode(0x0F,0x40); 12987 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12988 ins_pipe( pipe_cmov_reg ); 12989 %} 12990 12991 // Compare 2 unsigned longs and CMOVE ptrs. 12992 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 12993 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12994 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12995 ins_cost(200); 12996 expand %{ 12997 cmovPP_reg_LTGE(cmp,flags,dst,src); 12998 %} 12999 %} 13000 13001 // Compare 2 longs and CMOVE doubles 13002 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13003 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13004 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13005 ins_cost(200); 13006 expand %{ 13007 fcmovDPR_regS(cmp,flags,dst,src); 13008 %} 13009 %} 13010 13011 // Compare 2 longs and CMOVE doubles 13012 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13013 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13014 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13015 ins_cost(200); 13016 expand %{ 13017 fcmovD_regS(cmp,flags,dst,src); 13018 %} 13019 %} 13020 13021 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13022 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13023 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13024 ins_cost(200); 13025 expand %{ 13026 fcmovFPR_regS(cmp,flags,dst,src); 13027 %} 13028 %} 13029 13030 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13031 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13032 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13033 ins_cost(200); 13034 expand %{ 13035 fcmovF_regS(cmp,flags,dst,src); 13036 %} 13037 %} 13038 13039 //====== 13040 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13041 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13042 match( Set flags (CmpL src zero )); 13043 effect(TEMP tmp); 13044 ins_cost(200); 13045 format %{ "MOV $tmp,$src.lo\n\t" 13046 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13047 ins_encode( long_cmp_flags0( src, tmp ) ); 13048 ins_pipe( ialu_reg_reg_long ); 13049 %} 13050 13051 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13052 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13053 match( Set flags (CmpL src1 src2 )); 13054 ins_cost(200+300); 13055 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13056 "JNE,s skip\n\t" 13057 "CMP $src1.hi,$src2.hi\n\t" 13058 "skip:\t" %} 13059 ins_encode( long_cmp_flags1( src1, src2 ) ); 13060 ins_pipe( ialu_cr_reg_reg ); 13061 %} 13062 13063 // Long compare reg == zero/reg OR reg != zero/reg 13064 // Just a wrapper for a normal branch, plus the predicate test. 13065 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13066 match(If cmp flags); 13067 effect(USE labl); 13068 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13069 expand %{ 13070 jmpCon(cmp,flags,labl); // JEQ or JNE... 13071 %} 13072 %} 13073 13074 //====== 13075 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13076 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13077 match(Set flags (CmpUL src zero)); 13078 effect(TEMP tmp); 13079 ins_cost(200); 13080 format %{ "MOV $tmp,$src.lo\n\t" 13081 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13082 ins_encode(long_cmp_flags0(src, tmp)); 13083 ins_pipe(ialu_reg_reg_long); 13084 %} 13085 13086 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13087 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13088 match(Set flags (CmpUL src1 src2)); 13089 ins_cost(200+300); 13090 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13091 "JNE,s skip\n\t" 13092 "CMP $src1.hi,$src2.hi\n\t" 13093 "skip:\t" %} 13094 ins_encode(long_cmp_flags1(src1, src2)); 13095 ins_pipe(ialu_cr_reg_reg); 13096 %} 13097 13098 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13099 // Just a wrapper for a normal branch, plus the predicate test. 13100 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13101 match(If cmp flags); 13102 effect(USE labl); 13103 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13104 expand %{ 13105 jmpCon(cmp, flags, labl); // JEQ or JNE... 13106 %} 13107 %} 13108 13109 // Compare 2 longs and CMOVE longs. 13110 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13111 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13112 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13113 ins_cost(400); 13114 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13115 "CMOV$cmp $dst.hi,$src.hi" %} 13116 opcode(0x0F,0x40); 13117 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13118 ins_pipe( pipe_cmov_reg_long ); 13119 %} 13120 13121 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13122 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13123 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13124 ins_cost(500); 13125 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13126 "CMOV$cmp $dst.hi,$src.hi" %} 13127 opcode(0x0F,0x40); 13128 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 13129 ins_pipe( pipe_cmov_reg_long ); 13130 %} 13131 13132 // Compare 2 longs and CMOVE ints. 13133 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13134 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13135 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13136 ins_cost(200); 13137 format %{ "CMOV$cmp $dst,$src" %} 13138 opcode(0x0F,0x40); 13139 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13140 ins_pipe( pipe_cmov_reg ); 13141 %} 13142 13143 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13144 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13145 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13146 ins_cost(250); 13147 format %{ "CMOV$cmp $dst,$src" %} 13148 opcode(0x0F,0x40); 13149 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 13150 ins_pipe( pipe_cmov_mem ); 13151 %} 13152 13153 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13154 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13155 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13156 ins_cost(200); 13157 expand %{ 13158 cmovII_reg_EQNE(cmp, flags, dst, src); 13159 %} 13160 %} 13161 13162 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13163 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13164 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13165 ins_cost(250); 13166 expand %{ 13167 cmovII_mem_EQNE(cmp, flags, dst, src); 13168 %} 13169 %} 13170 13171 // Compare 2 longs and CMOVE ptrs. 13172 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13173 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13174 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13175 ins_cost(200); 13176 format %{ "CMOV$cmp $dst,$src" %} 13177 opcode(0x0F,0x40); 13178 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13179 ins_pipe( pipe_cmov_reg ); 13180 %} 13181 13182 // Compare 2 unsigned longs and CMOVE ptrs. 13183 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13184 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13185 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13186 ins_cost(200); 13187 expand %{ 13188 cmovPP_reg_EQNE(cmp,flags,dst,src); 13189 %} 13190 %} 13191 13192 // Compare 2 longs and CMOVE doubles 13193 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13194 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13195 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13196 ins_cost(200); 13197 expand %{ 13198 fcmovDPR_regS(cmp,flags,dst,src); 13199 %} 13200 %} 13201 13202 // Compare 2 longs and CMOVE doubles 13203 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13204 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13205 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13206 ins_cost(200); 13207 expand %{ 13208 fcmovD_regS(cmp,flags,dst,src); 13209 %} 13210 %} 13211 13212 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13213 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13214 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13215 ins_cost(200); 13216 expand %{ 13217 fcmovFPR_regS(cmp,flags,dst,src); 13218 %} 13219 %} 13220 13221 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13222 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13223 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13224 ins_cost(200); 13225 expand %{ 13226 fcmovF_regS(cmp,flags,dst,src); 13227 %} 13228 %} 13229 13230 //====== 13231 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13232 // Same as cmpL_reg_flags_LEGT except must negate src 13233 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13234 match( Set flags (CmpL src zero )); 13235 effect( TEMP tmp ); 13236 ins_cost(300); 13237 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13238 "CMP $tmp,$src.lo\n\t" 13239 "SBB $tmp,$src.hi\n\t" %} 13240 ins_encode( long_cmp_flags3(src, tmp) ); 13241 ins_pipe( ialu_reg_reg_long ); 13242 %} 13243 13244 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13245 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13246 // requires a commuted test to get the same result. 13247 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13248 match( Set flags (CmpL src1 src2 )); 13249 effect( TEMP tmp ); 13250 ins_cost(300); 13251 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13252 "MOV $tmp,$src2.hi\n\t" 13253 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13254 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13255 ins_pipe( ialu_cr_reg_reg ); 13256 %} 13257 13258 // Long compares reg < zero/req OR reg >= zero/req. 13259 // Just a wrapper for a normal branch, plus the predicate test 13260 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13261 match(If cmp flags); 13262 effect(USE labl); 13263 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13264 ins_cost(300); 13265 expand %{ 13266 jmpCon(cmp,flags,labl); // JGT or JLE... 13267 %} 13268 %} 13269 13270 //====== 13271 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13272 // Same as cmpUL_reg_flags_LEGT except must negate src 13273 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13274 match(Set flags (CmpUL src zero)); 13275 effect(TEMP tmp); 13276 ins_cost(300); 13277 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13278 "CMP $tmp,$src.lo\n\t" 13279 "SBB $tmp,$src.hi\n\t" %} 13280 ins_encode(long_cmp_flags3(src, tmp)); 13281 ins_pipe(ialu_reg_reg_long); 13282 %} 13283 13284 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13285 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13286 // requires a commuted test to get the same result. 13287 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13288 match(Set flags (CmpUL src1 src2)); 13289 effect(TEMP tmp); 13290 ins_cost(300); 13291 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13292 "MOV $tmp,$src2.hi\n\t" 13293 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13294 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13295 ins_pipe(ialu_cr_reg_reg); 13296 %} 13297 13298 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13299 // Just a wrapper for a normal branch, plus the predicate test 13300 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13301 match(If cmp flags); 13302 effect(USE labl); 13303 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13304 ins_cost(300); 13305 expand %{ 13306 jmpCon(cmp, flags, labl); // JGT or JLE... 13307 %} 13308 %} 13309 13310 // Compare 2 longs and CMOVE longs. 13311 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13312 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13313 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13314 ins_cost(400); 13315 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13316 "CMOV$cmp $dst.hi,$src.hi" %} 13317 opcode(0x0F,0x40); 13318 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13319 ins_pipe( pipe_cmov_reg_long ); 13320 %} 13321 13322 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13323 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13324 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13325 ins_cost(500); 13326 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13327 "CMOV$cmp $dst.hi,$src.hi+4" %} 13328 opcode(0x0F,0x40); 13329 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 13330 ins_pipe( pipe_cmov_reg_long ); 13331 %} 13332 13333 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13334 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13335 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13336 ins_cost(400); 13337 expand %{ 13338 cmovLL_reg_LEGT(cmp, flags, dst, src); 13339 %} 13340 %} 13341 13342 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13343 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13344 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13345 ins_cost(500); 13346 expand %{ 13347 cmovLL_mem_LEGT(cmp, flags, dst, src); 13348 %} 13349 %} 13350 13351 // Compare 2 longs and CMOVE ints. 13352 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13353 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13354 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13355 ins_cost(200); 13356 format %{ "CMOV$cmp $dst,$src" %} 13357 opcode(0x0F,0x40); 13358 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13359 ins_pipe( pipe_cmov_reg ); 13360 %} 13361 13362 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13363 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13364 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13365 ins_cost(250); 13366 format %{ "CMOV$cmp $dst,$src" %} 13367 opcode(0x0F,0x40); 13368 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 13369 ins_pipe( pipe_cmov_mem ); 13370 %} 13371 13372 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13373 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13374 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13375 ins_cost(200); 13376 expand %{ 13377 cmovII_reg_LEGT(cmp, flags, dst, src); 13378 %} 13379 %} 13380 13381 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13382 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13383 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13384 ins_cost(250); 13385 expand %{ 13386 cmovII_mem_LEGT(cmp, flags, dst, src); 13387 %} 13388 %} 13389 13390 // Compare 2 longs and CMOVE ptrs. 13391 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13392 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13393 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13394 ins_cost(200); 13395 format %{ "CMOV$cmp $dst,$src" %} 13396 opcode(0x0F,0x40); 13397 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13398 ins_pipe( pipe_cmov_reg ); 13399 %} 13400 13401 // Compare 2 unsigned longs and CMOVE ptrs. 13402 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13403 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13404 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13405 ins_cost(200); 13406 expand %{ 13407 cmovPP_reg_LEGT(cmp,flags,dst,src); 13408 %} 13409 %} 13410 13411 // Compare 2 longs and CMOVE doubles 13412 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13413 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13414 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13415 ins_cost(200); 13416 expand %{ 13417 fcmovDPR_regS(cmp,flags,dst,src); 13418 %} 13419 %} 13420 13421 // Compare 2 longs and CMOVE doubles 13422 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13423 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13424 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13425 ins_cost(200); 13426 expand %{ 13427 fcmovD_regS(cmp,flags,dst,src); 13428 %} 13429 %} 13430 13431 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13432 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13433 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13434 ins_cost(200); 13435 expand %{ 13436 fcmovFPR_regS(cmp,flags,dst,src); 13437 %} 13438 %} 13439 13440 13441 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13442 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13443 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13444 ins_cost(200); 13445 expand %{ 13446 fcmovF_regS(cmp,flags,dst,src); 13447 %} 13448 %} 13449 13450 13451 // ============================================================================ 13452 // Procedure Call/Return Instructions 13453 // Call Java Static Instruction 13454 // Note: If this code changes, the corresponding ret_addr_offset() and 13455 // compute_padding() functions will have to be adjusted. 13456 instruct CallStaticJavaDirect(method meth) %{ 13457 match(CallStaticJava); 13458 effect(USE meth); 13459 13460 ins_cost(300); 13461 format %{ "CALL,static " %} 13462 opcode(0xE8); /* E8 cd */ 13463 ins_encode( pre_call_resets, 13464 Java_Static_Call( meth ), 13465 call_epilog, 13466 post_call_FPU ); 13467 ins_pipe( pipe_slow ); 13468 ins_alignment(4); 13469 %} 13470 13471 // Call Java Dynamic Instruction 13472 // Note: If this code changes, the corresponding ret_addr_offset() and 13473 // compute_padding() functions will have to be adjusted. 13474 instruct CallDynamicJavaDirect(method meth) %{ 13475 match(CallDynamicJava); 13476 effect(USE meth); 13477 13478 ins_cost(300); 13479 format %{ "MOV EAX,(oop)-1\n\t" 13480 "CALL,dynamic" %} 13481 opcode(0xE8); /* E8 cd */ 13482 ins_encode( pre_call_resets, 13483 Java_Dynamic_Call( meth ), 13484 call_epilog, 13485 post_call_FPU ); 13486 ins_pipe( pipe_slow ); 13487 ins_alignment(4); 13488 %} 13489 13490 // Call Runtime Instruction 13491 instruct CallRuntimeDirect(method meth) %{ 13492 match(CallRuntime ); 13493 effect(USE meth); 13494 13495 ins_cost(300); 13496 format %{ "CALL,runtime " %} 13497 opcode(0xE8); /* E8 cd */ 13498 // Use FFREEs to clear entries in float stack 13499 ins_encode( pre_call_resets, 13500 FFree_Float_Stack_All, 13501 Java_To_Runtime( meth ), 13502 post_call_FPU ); 13503 ins_pipe( pipe_slow ); 13504 %} 13505 13506 // Call runtime without safepoint 13507 instruct CallLeafDirect(method meth) %{ 13508 match(CallLeaf); 13509 effect(USE meth); 13510 13511 ins_cost(300); 13512 format %{ "CALL_LEAF,runtime " %} 13513 opcode(0xE8); /* E8 cd */ 13514 ins_encode( pre_call_resets, 13515 FFree_Float_Stack_All, 13516 Java_To_Runtime( meth ), 13517 Verify_FPU_For_Leaf, post_call_FPU ); 13518 ins_pipe( pipe_slow ); 13519 %} 13520 13521 instruct CallLeafNoFPDirect(method meth) %{ 13522 match(CallLeafNoFP); 13523 effect(USE meth); 13524 13525 ins_cost(300); 13526 format %{ "CALL_LEAF_NOFP,runtime " %} 13527 opcode(0xE8); /* E8 cd */ 13528 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13529 ins_pipe( pipe_slow ); 13530 %} 13531 13532 13533 // Return Instruction 13534 // Remove the return address & jump to it. 13535 instruct Ret() %{ 13536 match(Return); 13537 format %{ "RET" %} 13538 opcode(0xC3); 13539 ins_encode(OpcP); 13540 ins_pipe( pipe_jmp ); 13541 %} 13542 13543 // Tail Call; Jump from runtime stub to Java code. 13544 // Also known as an 'interprocedural jump'. 13545 // Target of jump will eventually return to caller. 13546 // TailJump below removes the return address. 13547 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been 13548 // emitted just above the TailCall which has reset ebp to the caller state. 13549 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13550 match(TailCall jump_target method_ptr); 13551 ins_cost(300); 13552 format %{ "JMP $jump_target \t# EBX holds method" %} 13553 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13554 ins_encode( OpcP, RegOpc(jump_target) ); 13555 ins_pipe( pipe_jmp ); 13556 %} 13557 13558 13559 // Tail Jump; remove the return address; jump to target. 13560 // TailCall above leaves the return address around. 13561 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13562 match( TailJump jump_target ex_oop ); 13563 ins_cost(300); 13564 format %{ "POP EDX\t# pop return address into dummy\n\t" 13565 "JMP $jump_target " %} 13566 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13567 ins_encode( enc_pop_rdx, 13568 OpcP, RegOpc(jump_target) ); 13569 ins_pipe( pipe_jmp ); 13570 %} 13571 13572 // Forward exception. 13573 instruct ForwardExceptionjmp() 13574 %{ 13575 match(ForwardException); 13576 13577 format %{ "JMP forward_exception_stub" %} 13578 ins_encode %{ 13579 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg); 13580 %} 13581 ins_pipe(pipe_jmp); 13582 %} 13583 13584 // Create exception oop: created by stack-crawling runtime code. 13585 // Created exception is now available to this handler, and is setup 13586 // just prior to jumping to this handler. No code emitted. 13587 instruct CreateException( eAXRegP ex_oop ) 13588 %{ 13589 match(Set ex_oop (CreateEx)); 13590 13591 size(0); 13592 // use the following format syntax 13593 format %{ "# exception oop is in EAX; no code emitted" %} 13594 ins_encode(); 13595 ins_pipe( empty ); 13596 %} 13597 13598 13599 // Rethrow exception: 13600 // The exception oop will come in the first argument position. 13601 // Then JUMP (not call) to the rethrow stub code. 13602 instruct RethrowException() 13603 %{ 13604 match(Rethrow); 13605 13606 // use the following format syntax 13607 format %{ "JMP rethrow_stub" %} 13608 ins_encode(enc_rethrow); 13609 ins_pipe( pipe_jmp ); 13610 %} 13611 13612 // inlined locking and unlocking 13613 13614 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13615 predicate(LockingMode != LM_LIGHTWEIGHT); 13616 match(Set cr (FastLock object box)); 13617 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13618 ins_cost(300); 13619 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13620 ins_encode %{ 13621 __ get_thread($thread$$Register); 13622 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13623 $scr$$Register, noreg, noreg, $thread$$Register, nullptr); 13624 %} 13625 ins_pipe(pipe_slow); 13626 %} 13627 13628 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13629 predicate(LockingMode != LM_LIGHTWEIGHT); 13630 match(Set cr (FastUnlock object box)); 13631 effect(TEMP tmp, USE_KILL box); 13632 ins_cost(300); 13633 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13634 ins_encode %{ 13635 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); 13636 %} 13637 ins_pipe(pipe_slow); 13638 %} 13639 13640 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{ 13641 predicate(LockingMode == LM_LIGHTWEIGHT); 13642 match(Set cr (FastLock object box)); 13643 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread); 13644 ins_cost(300); 13645 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %} 13646 ins_encode %{ 13647 __ get_thread($thread$$Register); 13648 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13649 %} 13650 ins_pipe(pipe_slow); 13651 %} 13652 13653 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{ 13654 predicate(LockingMode == LM_LIGHTWEIGHT); 13655 match(Set cr (FastUnlock object eax_reg)); 13656 effect(TEMP tmp, USE_KILL eax_reg, TEMP thread); 13657 ins_cost(300); 13658 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %} 13659 ins_encode %{ 13660 __ get_thread($thread$$Register); 13661 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13662 %} 13663 ins_pipe(pipe_slow); 13664 %} 13665 13666 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13667 predicate(Matcher::vector_length(n) <= 32); 13668 match(Set dst (MaskAll src)); 13669 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13670 ins_encode %{ 13671 int mask_len = Matcher::vector_length(this); 13672 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13673 %} 13674 ins_pipe( pipe_slow ); 13675 %} 13676 13677 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13678 predicate(Matcher::vector_length(n) > 32); 13679 match(Set dst (MaskAll src)); 13680 effect(TEMP ktmp); 13681 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13682 ins_encode %{ 13683 int mask_len = Matcher::vector_length(this); 13684 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13685 %} 13686 ins_pipe( pipe_slow ); 13687 %} 13688 13689 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13690 predicate(Matcher::vector_length(n) > 32); 13691 match(Set dst (MaskAll src)); 13692 effect(TEMP ktmp); 13693 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13694 ins_encode %{ 13695 int mask_len = Matcher::vector_length(this); 13696 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13697 %} 13698 ins_pipe( pipe_slow ); 13699 %} 13700 13701 // ============================================================================ 13702 // Safepoint Instruction 13703 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13704 match(SafePoint poll); 13705 effect(KILL cr, USE poll); 13706 13707 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13708 ins_cost(125); 13709 // EBP would need size(3) 13710 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13711 ins_encode %{ 13712 __ set_inst_mark(); 13713 __ relocate(relocInfo::poll_type); 13714 __ clear_inst_mark(); 13715 address pre_pc = __ pc(); 13716 __ testl(rax, Address($poll$$Register, 0)); 13717 address post_pc = __ pc(); 13718 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13719 %} 13720 ins_pipe(ialu_reg_mem); 13721 %} 13722 13723 13724 // ============================================================================ 13725 // This name is KNOWN by the ADLC and cannot be changed. 13726 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13727 // for this guy. 13728 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13729 match(Set dst (ThreadLocal)); 13730 effect(DEF dst, KILL cr); 13731 13732 format %{ "MOV $dst, Thread::current()" %} 13733 ins_encode %{ 13734 Register dstReg = as_Register($dst$$reg); 13735 __ get_thread(dstReg); 13736 %} 13737 ins_pipe( ialu_reg_fat ); 13738 %} 13739 13740 13741 13742 //----------PEEPHOLE RULES----------------------------------------------------- 13743 // These must follow all instruction definitions as they use the names 13744 // defined in the instructions definitions. 13745 // 13746 // peepmatch ( root_instr_name [preceding_instruction]* ); 13747 // 13748 // peepconstraint %{ 13749 // (instruction_number.operand_name relational_op instruction_number.operand_name 13750 // [, ...] ); 13751 // // instruction numbers are zero-based using left to right order in peepmatch 13752 // 13753 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13754 // // provide an instruction_number.operand_name for each operand that appears 13755 // // in the replacement instruction's match rule 13756 // 13757 // ---------VM FLAGS--------------------------------------------------------- 13758 // 13759 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13760 // 13761 // Each peephole rule is given an identifying number starting with zero and 13762 // increasing by one in the order seen by the parser. An individual peephole 13763 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13764 // on the command-line. 13765 // 13766 // ---------CURRENT LIMITATIONS---------------------------------------------- 13767 // 13768 // Only match adjacent instructions in same basic block 13769 // Only equality constraints 13770 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13771 // Only one replacement instruction 13772 // 13773 // ---------EXAMPLE---------------------------------------------------------- 13774 // 13775 // // pertinent parts of existing instructions in architecture description 13776 // instruct movI(rRegI dst, rRegI src) %{ 13777 // match(Set dst (CopyI src)); 13778 // %} 13779 // 13780 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13781 // match(Set dst (AddI dst src)); 13782 // effect(KILL cr); 13783 // %} 13784 // 13785 // // Change (inc mov) to lea 13786 // peephole %{ 13787 // // increment preceded by register-register move 13788 // peepmatch ( incI_eReg movI ); 13789 // // require that the destination register of the increment 13790 // // match the destination register of the move 13791 // peepconstraint ( 0.dst == 1.dst ); 13792 // // construct a replacement instruction that sets 13793 // // the destination to ( move's source register + one ) 13794 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13795 // %} 13796 // 13797 // Implementation no longer uses movX instructions since 13798 // machine-independent system no longer uses CopyX nodes. 13799 // 13800 // peephole %{ 13801 // peepmatch ( incI_eReg movI ); 13802 // peepconstraint ( 0.dst == 1.dst ); 13803 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13804 // %} 13805 // 13806 // peephole %{ 13807 // peepmatch ( decI_eReg movI ); 13808 // peepconstraint ( 0.dst == 1.dst ); 13809 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13810 // %} 13811 // 13812 // peephole %{ 13813 // peepmatch ( addI_eReg_imm movI ); 13814 // peepconstraint ( 0.dst == 1.dst ); 13815 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13816 // %} 13817 // 13818 // peephole %{ 13819 // peepmatch ( addP_eReg_imm movP ); 13820 // peepconstraint ( 0.dst == 1.dst ); 13821 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13822 // %} 13823 13824 // // Change load of spilled value to only a spill 13825 // instruct storeI(memory mem, rRegI src) %{ 13826 // match(Set mem (StoreI mem src)); 13827 // %} 13828 // 13829 // instruct loadI(rRegI dst, memory mem) %{ 13830 // match(Set dst (LoadI mem)); 13831 // %} 13832 // 13833 peephole %{ 13834 peepmatch ( loadI storeI ); 13835 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13836 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13837 %} 13838 13839 //----------SMARTSPILL RULES--------------------------------------------------- 13840 // These must follow all instruction definitions as they use the names 13841 // defined in the instructions definitions.