1 // 2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ masm-> 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 __ emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 __ emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(C2_MacroAssembler *masm, int code) { 353 __ emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) { 358 __ relocate(__ inst_mark() + offset, reloc); 359 emit_opcode(masm, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(C2_MacroAssembler *masm, int d8) { 364 __ emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(C2_MacroAssembler *masm, int d16) { 369 __ emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(C2_MacroAssembler *masm, int d32) { 374 __ emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc, 379 int format) { 380 __ relocate(__ inst_mark(), reloc, format); 381 __ emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 __ relocate(__ inst_mark(), rspec, format); 393 __ emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) { 398 emit_opcode( masm, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( masm, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (masm, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( masm, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(masm, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(masm, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(masm, 0x1, reg_encoding, base); 423 emit_d8(masm, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(masm, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(masm, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (masm, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(masm, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(masm, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (masm, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(masm, 0x0, reg_encoding, 0x4); 450 emit_rm(masm, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(masm, 0x1, reg_encoding, 0x4); 456 emit_rm(masm, scale, index, base); 457 emit_d8(masm, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(masm, 0x2, reg_encoding, 0x4); 462 emit_rm(masm, scale, index, 0x04); 463 } else { 464 emit_rm(masm, 0x2, reg_encoding, 0x4); 465 emit_rm(masm, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(masm, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (masm, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( masm, 0x8B ); 483 emit_rm(masm, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler* masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 613 int framesize = C->output()->frame_size_in_bytes(); 614 int bangsize = C->output()->bang_size_in_bytes(); 615 616 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr); 617 618 C->output()->set_frame_complete(__ offset()); 619 620 if (C->has_mach_constant_base_node()) { 621 // NOTE: We set the table base offset here because users might be 622 // emitted before MachConstantBaseNode. 623 ConstantTable& constant_table = C->output()->constant_table(); 624 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 625 } 626 } 627 628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 629 return MachNode::size(ra_); // too many variables; just compute it the hard way 630 } 631 632 int MachPrologNode::reloc() const { 633 return 0; // a large enough number 634 } 635 636 //============================================================================= 637 #ifndef PRODUCT 638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 639 Compile *C = ra_->C; 640 int framesize = C->output()->frame_size_in_bytes(); 641 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 642 // Remove two words for return addr and rbp, 643 framesize -= 2*wordSize; 644 645 if (C->max_vector_size() > 16) { 646 st->print("VZEROUPPER"); 647 st->cr(); st->print("\t"); 648 } 649 if (C->in_24_bit_fp_mode()) { 650 st->print("FLDCW standard control word"); 651 st->cr(); st->print("\t"); 652 } 653 if (framesize) { 654 st->print("ADD ESP,%d\t# Destroy frame",framesize); 655 st->cr(); st->print("\t"); 656 } 657 st->print_cr("POPL EBP"); st->print("\t"); 658 if (do_polling() && C->is_method_compilation()) { 659 st->print("CMPL rsp, poll_offset[thread] \n\t" 660 "JA #safepoint_stub\t" 661 "# Safepoint: poll for GC"); 662 } 663 } 664 #endif 665 666 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 667 Compile *C = ra_->C; 668 669 if (C->max_vector_size() > 16) { 670 // Clear upper bits of YMM registers when current compiled code uses 671 // wide vectors to avoid AVX <-> SSE transition penalty during call. 672 __ vzeroupper(); 673 } 674 // If method set FPU control word, restore to standard control word 675 if (C->in_24_bit_fp_mode()) { 676 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 677 } 678 679 int framesize = C->output()->frame_size_in_bytes(); 680 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 681 // Remove two words for return addr and rbp, 682 framesize -= 2*wordSize; 683 684 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 685 686 if (framesize >= 128) { 687 emit_opcode(masm, 0x81); // add SP, #framesize 688 emit_rm(masm, 0x3, 0x00, ESP_enc); 689 emit_d32(masm, framesize); 690 } else if (framesize) { 691 emit_opcode(masm, 0x83); // add SP, #framesize 692 emit_rm(masm, 0x3, 0x00, ESP_enc); 693 emit_d8(masm, framesize); 694 } 695 696 emit_opcode(masm, 0x58 | EBP_enc); 697 698 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 699 __ reserved_stack_check(); 700 } 701 702 if (do_polling() && C->is_method_compilation()) { 703 Register thread = as_Register(EBX_enc); 704 __ get_thread(thread); 705 Label dummy_label; 706 Label* code_stub = &dummy_label; 707 if (!C->output()->in_scratch_emit_size()) { 708 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 709 C->output()->add_stub(stub); 710 code_stub = &stub->entry(); 711 } 712 __ set_inst_mark(); 713 __ relocate(relocInfo::poll_return_type); 714 __ clear_inst_mark(); 715 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 716 } 717 } 718 719 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 720 return MachNode::size(ra_); // too many variables; just compute it 721 // the hard way 722 } 723 724 int MachEpilogNode::reloc() const { 725 return 0; // a large enough number 726 } 727 728 const Pipeline * MachEpilogNode::pipeline() const { 729 return MachNode::pipeline_class(); 730 } 731 732 //============================================================================= 733 734 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 735 static enum RC rc_class( OptoReg::Name reg ) { 736 737 if( !OptoReg::is_valid(reg) ) return rc_bad; 738 if (OptoReg::is_stack(reg)) return rc_stack; 739 740 VMReg r = OptoReg::as_VMReg(reg); 741 if (r->is_Register()) return rc_int; 742 if (r->is_FloatRegister()) { 743 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 744 return rc_float; 745 } 746 if (r->is_KRegister()) return rc_kreg; 747 assert(r->is_XMMRegister(), "must be"); 748 return rc_xmm; 749 } 750 751 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg, 752 int opcode, const char *op_str, int size, outputStream* st ) { 753 if( masm ) { 754 masm->set_inst_mark(); 755 emit_opcode (masm, opcode ); 756 encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 757 masm->clear_inst_mark(); 758 #ifndef PRODUCT 759 } else if( !do_size ) { 760 if( size != 0 ) st->print("\n\t"); 761 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 762 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 763 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 764 } else { // FLD, FST, PUSH, POP 765 st->print("%s [ESP + #%d]",op_str,offset); 766 } 767 #endif 768 } 769 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 770 return size+3+offset_size; 771 } 772 773 // Helper for XMM registers. Extra opcode bits, limited syntax. 774 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, 775 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 776 int in_size_in_bits = Assembler::EVEX_32bit; 777 int evex_encoding = 0; 778 if (reg_lo+1 == reg_hi) { 779 in_size_in_bits = Assembler::EVEX_64bit; 780 evex_encoding = Assembler::VEX_W; 781 } 782 if (masm) { 783 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 784 // it maps more cases to single byte displacement 785 __ set_managed(); 786 if (reg_lo+1 == reg_hi) { // double move? 787 if (is_load) { 788 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 789 } else { 790 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 791 } 792 } else { 793 if (is_load) { 794 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } 799 #ifndef PRODUCT 800 } else if (!do_size) { 801 if (size != 0) st->print("\n\t"); 802 if (reg_lo+1 == reg_hi) { // double move? 803 if (is_load) st->print("%s %s,[ESP + #%d]", 804 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 805 Matcher::regName[reg_lo], offset); 806 else st->print("MOVSD [ESP + #%d],%s", 807 offset, Matcher::regName[reg_lo]); 808 } else { 809 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 810 Matcher::regName[reg_lo], offset); 811 else st->print("MOVSS [ESP + #%d],%s", 812 offset, Matcher::regName[reg_lo]); 813 } 814 #endif 815 } 816 bool is_single_byte = false; 817 if ((UseAVX > 2) && (offset != 0)) { 818 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 819 } 820 int offset_size = 0; 821 if (UseAVX > 2 ) { 822 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 823 } else { 824 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 825 } 826 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 827 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 828 return size+5+offset_size; 829 } 830 831 832 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 833 int src_hi, int dst_hi, int size, outputStream* st ) { 834 if (masm) { 835 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 836 __ set_managed(); 837 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 838 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 839 as_XMMRegister(Matcher::_regEncode[src_lo])); 840 } else { 841 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 842 as_XMMRegister(Matcher::_regEncode[src_lo])); 843 } 844 #ifndef PRODUCT 845 } else if (!do_size) { 846 if (size != 0) st->print("\n\t"); 847 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 848 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 849 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 850 } else { 851 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 852 } 853 } else { 854 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 855 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 856 } else { 857 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 858 } 859 } 860 #endif 861 } 862 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 863 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 864 int sz = (UseAVX > 2) ? 6 : 4; 865 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 866 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 867 return size + sz; 868 } 869 870 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 871 int src_hi, int dst_hi, int size, outputStream* st ) { 872 // 32-bit 873 if (masm) { 874 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 875 __ set_managed(); 876 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 877 as_Register(Matcher::_regEncode[src_lo])); 878 #ifndef PRODUCT 879 } else if (!do_size) { 880 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 881 #endif 882 } 883 return (UseAVX> 2) ? 6 : 4; 884 } 885 886 887 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 888 int src_hi, int dst_hi, int size, outputStream* st ) { 889 // 32-bit 890 if (masm) { 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 __ set_managed(); 893 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 894 as_XMMRegister(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) { 904 if( masm ) { 905 emit_opcode(masm, 0x8B ); 906 emit_rm (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 907 #ifndef PRODUCT 908 } else if( !do_size ) { 909 if( size != 0 ) st->print("\n\t"); 910 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 911 #endif 912 } 913 return size+2; 914 } 915 916 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 917 int offset, int size, outputStream* st ) { 918 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 919 if( masm ) { 920 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) 921 emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] ); 922 #ifndef PRODUCT 923 } else if( !do_size ) { 924 if( size != 0 ) st->print("\n\t"); 925 st->print("FLD %s",Matcher::regName[src_lo]); 926 #endif 927 } 928 size += 2; 929 } 930 931 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 932 const char *op_str; 933 int op; 934 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 935 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 936 op = 0xDD; 937 } else { // 32-bit store 938 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 939 op = 0xD9; 940 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 941 } 942 943 return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st); 944 } 945 946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 947 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 948 int src_hi, int dst_hi, uint ireg, outputStream* st); 949 950 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 951 int stack_offset, int reg, uint ireg, outputStream* st); 952 953 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset, 954 int dst_offset, uint ireg, outputStream* st) { 955 if (masm) { 956 switch (ireg) { 957 case Op_VecS: 958 __ pushl(Address(rsp, src_offset)); 959 __ popl (Address(rsp, dst_offset)); 960 break; 961 case Op_VecD: 962 __ pushl(Address(rsp, src_offset)); 963 __ popl (Address(rsp, dst_offset)); 964 __ pushl(Address(rsp, src_offset+4)); 965 __ popl (Address(rsp, dst_offset+4)); 966 break; 967 case Op_VecX: 968 __ movdqu(Address(rsp, -16), xmm0); 969 __ movdqu(xmm0, Address(rsp, src_offset)); 970 __ movdqu(Address(rsp, dst_offset), xmm0); 971 __ movdqu(xmm0, Address(rsp, -16)); 972 break; 973 case Op_VecY: 974 __ vmovdqu(Address(rsp, -32), xmm0); 975 __ vmovdqu(xmm0, Address(rsp, src_offset)); 976 __ vmovdqu(Address(rsp, dst_offset), xmm0); 977 __ vmovdqu(xmm0, Address(rsp, -32)); 978 break; 979 case Op_VecZ: 980 __ evmovdquq(Address(rsp, -64), xmm0, 2); 981 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 982 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 983 __ evmovdquq(xmm0, Address(rsp, -64), 2); 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 #ifndef PRODUCT 989 } else { 990 switch (ireg) { 991 case Op_VecS: 992 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 993 "popl [rsp + #%d]", 994 src_offset, dst_offset); 995 break; 996 case Op_VecD: 997 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 998 "popq [rsp + #%d]\n\t" 999 "pushl [rsp + #%d]\n\t" 1000 "popq [rsp + #%d]", 1001 src_offset, dst_offset, src_offset+4, dst_offset+4); 1002 break; 1003 case Op_VecX: 1004 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1005 "movdqu xmm0, [rsp + #%d]\n\t" 1006 "movdqu [rsp + #%d], xmm0\n\t" 1007 "movdqu xmm0, [rsp - #16]", 1008 src_offset, dst_offset); 1009 break; 1010 case Op_VecY: 1011 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1012 "vmovdqu xmm0, [rsp + #%d]\n\t" 1013 "vmovdqu [rsp + #%d], xmm0\n\t" 1014 "vmovdqu xmm0, [rsp - #32]", 1015 src_offset, dst_offset); 1016 break; 1017 case Op_VecZ: 1018 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1019 "vmovdqu xmm0, [rsp + #%d]\n\t" 1020 "vmovdqu [rsp + #%d], xmm0\n\t" 1021 "vmovdqu xmm0, [rsp - #64]", 1022 src_offset, dst_offset); 1023 break; 1024 default: 1025 ShouldNotReachHere(); 1026 } 1027 #endif 1028 } 1029 } 1030 1031 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1032 // Get registers to move 1033 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1034 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1035 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1036 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1037 1038 enum RC src_second_rc = rc_class(src_second); 1039 enum RC src_first_rc = rc_class(src_first); 1040 enum RC dst_second_rc = rc_class(dst_second); 1041 enum RC dst_first_rc = rc_class(dst_first); 1042 1043 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1044 1045 // Generate spill code! 1046 int size = 0; 1047 1048 if( src_first == dst_first && src_second == dst_second ) 1049 return size; // Self copy, no move 1050 1051 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) { 1052 uint ireg = ideal_reg(); 1053 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1054 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1055 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1056 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1057 // mem -> mem 1058 int src_offset = ra_->reg2offset(src_first); 1059 int dst_offset = ra_->reg2offset(dst_first); 1060 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st); 1061 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1062 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st); 1063 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1064 int stack_offset = ra_->reg2offset(dst_first); 1065 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st); 1066 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1067 int stack_offset = ra_->reg2offset(src_first); 1068 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st); 1069 } else { 1070 ShouldNotReachHere(); 1071 } 1072 return 0; 1073 } 1074 1075 // -------------------------------------- 1076 // Check for mem-mem move. push/pop to move. 1077 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1078 if( src_second == dst_first ) { // overlapping stack copy ranges 1079 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1080 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1081 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1082 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1083 } 1084 // move low bits 1085 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1086 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1087 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1088 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1089 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1090 } 1091 return size; 1092 } 1093 1094 // -------------------------------------- 1095 // Check for integer reg-reg copy 1096 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1097 size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st); 1098 1099 // Check for integer store 1100 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1101 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1102 1103 // Check for integer load 1104 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1105 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1106 1107 // Check for integer reg-xmm reg copy 1108 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1109 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1110 "no 64 bit integer-float reg moves" ); 1111 return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1112 } 1113 // -------------------------------------- 1114 // Check for float reg-reg copy 1115 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1116 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1117 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1118 if( masm ) { 1119 1120 // Note the mucking with the register encode to compensate for the 0/1 1121 // indexing issue mentioned in a comment in the reg_def sections 1122 // for FPR registers many lines above here. 1123 1124 if( src_first != FPR1L_num ) { 1125 emit_opcode (masm, 0xD9 ); // FLD ST(i) 1126 emit_d8 (masm, 0xC0+Matcher::_regEncode[src_first]-1 ); 1127 emit_opcode (masm, 0xDD ); // FSTP ST(i) 1128 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); 1129 } else { 1130 emit_opcode (masm, 0xDD ); // FST ST(i) 1131 emit_d8 (masm, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1132 } 1133 #ifndef PRODUCT 1134 } else if( !do_size ) { 1135 if( size != 0 ) st->print("\n\t"); 1136 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1137 else st->print( "FST %s", Matcher::regName[dst_first]); 1138 #endif 1139 } 1140 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1141 } 1142 1143 // Check for float store 1144 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1145 return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1146 } 1147 1148 // Check for float load 1149 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1150 int offset = ra_->reg2offset(src_first); 1151 const char *op_str; 1152 int op; 1153 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1154 op_str = "FLD_D"; 1155 op = 0xDD; 1156 } else { // 32-bit load 1157 op_str = "FLD_S"; 1158 op = 0xD9; 1159 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1160 } 1161 if( masm ) { 1162 masm->set_inst_mark(); 1163 emit_opcode (masm, op ); 1164 encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1165 emit_opcode (masm, 0xDD ); // FSTP ST(i) 1166 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); 1167 masm->clear_inst_mark(); 1168 #ifndef PRODUCT 1169 } else if( !do_size ) { 1170 if( size != 0 ) st->print("\n\t"); 1171 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1172 #endif 1173 } 1174 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1175 return size + 3+offset_size+2; 1176 } 1177 1178 // Check for xmm reg-reg copy 1179 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1180 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1181 (src_first+1 == src_second && dst_first+1 == dst_second), 1182 "no non-adjacent float-moves" ); 1183 return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1184 } 1185 1186 // Check for xmm reg-integer reg copy 1187 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1188 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1189 "no 64 bit float-integer reg moves" ); 1190 return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1191 } 1192 1193 // Check for xmm store 1194 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1195 return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1196 } 1197 1198 // Check for float xmm load 1199 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1200 return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1201 } 1202 1203 // Copy from float reg to xmm reg 1204 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1205 // copy to the top of stack from floating point reg 1206 // and use LEA to preserve flags 1207 if( masm ) { 1208 emit_opcode(masm,0x8D); // LEA ESP,[ESP-8] 1209 emit_rm(masm, 0x1, ESP_enc, 0x04); 1210 emit_rm(masm, 0x0, 0x04, ESP_enc); 1211 emit_d8(masm,0xF8); 1212 #ifndef PRODUCT 1213 } else if( !do_size ) { 1214 if( size != 0 ) st->print("\n\t"); 1215 st->print("LEA ESP,[ESP-8]"); 1216 #endif 1217 } 1218 size += 4; 1219 1220 size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1221 1222 // Copy from the temp memory to the xmm reg. 1223 size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st); 1224 1225 if( masm ) { 1226 emit_opcode(masm,0x8D); // LEA ESP,[ESP+8] 1227 emit_rm(masm, 0x1, ESP_enc, 0x04); 1228 emit_rm(masm, 0x0, 0x04, ESP_enc); 1229 emit_d8(masm,0x08); 1230 #ifndef PRODUCT 1231 } else if( !do_size ) { 1232 if( size != 0 ) st->print("\n\t"); 1233 st->print("LEA ESP,[ESP+8]"); 1234 #endif 1235 } 1236 size += 4; 1237 return size; 1238 } 1239 1240 // AVX-512 opmask specific spilling. 1241 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1242 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1243 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1244 int offset = ra_->reg2offset(src_first); 1245 if (masm != nullptr) { 1246 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1247 #ifndef PRODUCT 1248 } else { 1249 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); 1250 #endif 1251 } 1252 return 0; 1253 } 1254 1255 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1256 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1257 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1258 int offset = ra_->reg2offset(dst_first); 1259 if (masm != nullptr) { 1260 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1261 #ifndef PRODUCT 1262 } else { 1263 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); 1264 #endif 1265 } 1266 return 0; 1267 } 1268 1269 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1270 Unimplemented(); 1271 return 0; 1272 } 1273 1274 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1275 Unimplemented(); 1276 return 0; 1277 } 1278 1279 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1280 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1281 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1282 if (masm != nullptr) { 1283 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1284 #ifndef PRODUCT 1285 } else { 1286 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); 1287 #endif 1288 } 1289 return 0; 1290 } 1291 1292 assert( size > 0, "missed a case" ); 1293 1294 // -------------------------------------------------------------------- 1295 // Check for second bits still needing moving. 1296 if( src_second == dst_second ) 1297 return size; // Self copy; no move 1298 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1299 1300 // Check for second word int-int move 1301 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1302 return impl_mov_helper(masm,do_size,src_second,dst_second,size, st); 1303 1304 // Check for second word integer store 1305 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1306 return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1307 1308 // Check for second word integer load 1309 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1310 return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1311 1312 Unimplemented(); 1313 return 0; // Mute compiler 1314 } 1315 1316 #ifndef PRODUCT 1317 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1318 implementation( nullptr, ra_, false, st ); 1319 } 1320 #endif 1321 1322 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1323 implementation( masm, ra_, false, nullptr ); 1324 } 1325 1326 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1327 return MachNode::size(ra_); 1328 } 1329 1330 1331 //============================================================================= 1332 #ifndef PRODUCT 1333 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1334 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1335 int reg = ra_->get_reg_first(this); 1336 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1337 } 1338 #endif 1339 1340 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1341 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1342 int reg = ra_->get_encode(this); 1343 if( offset >= 128 ) { 1344 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] 1345 emit_rm(masm, 0x2, reg, 0x04); 1346 emit_rm(masm, 0x0, 0x04, ESP_enc); 1347 emit_d32(masm, offset); 1348 } 1349 else { 1350 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] 1351 emit_rm(masm, 0x1, reg, 0x04); 1352 emit_rm(masm, 0x0, 0x04, ESP_enc); 1353 emit_d8(masm, offset); 1354 } 1355 } 1356 1357 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1358 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1359 if( offset >= 128 ) { 1360 return 7; 1361 } 1362 else { 1363 return 4; 1364 } 1365 } 1366 1367 //============================================================================= 1368 #ifndef PRODUCT 1369 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1370 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1371 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1372 st->print_cr("\tNOP"); 1373 st->print_cr("\tNOP"); 1374 if( !OptoBreakpoint ) 1375 st->print_cr("\tNOP"); 1376 } 1377 #endif 1378 1379 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1380 __ ic_check(CodeEntryAlignment); 1381 } 1382 1383 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1384 return MachNode::size(ra_); // too many variables; just compute it 1385 // the hard way 1386 } 1387 1388 1389 //============================================================================= 1390 1391 // Vector calling convention not supported. 1392 bool Matcher::supports_vector_calling_convention() { 1393 return false; 1394 } 1395 1396 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1397 Unimplemented(); 1398 return OptoRegPair(0, 0); 1399 } 1400 1401 // Is this branch offset short enough that a short branch can be used? 1402 // 1403 // NOTE: If the platform does not provide any short branch variants, then 1404 // this method should return false for offset 0. 1405 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1406 // The passed offset is relative to address of the branch. 1407 // On 86 a branch displacement is calculated relative to address 1408 // of a next instruction. 1409 offset -= br_size; 1410 1411 // the short version of jmpConUCF2 contains multiple branches, 1412 // making the reach slightly less 1413 if (rule == jmpConUCF2_rule) 1414 return (-126 <= offset && offset <= 125); 1415 return (-128 <= offset && offset <= 127); 1416 } 1417 1418 // Return whether or not this register is ever used as an argument. This 1419 // function is used on startup to build the trampoline stubs in generateOptoStub. 1420 // Registers not mentioned will be killed by the VM call in the trampoline, and 1421 // arguments in those registers not be available to the callee. 1422 bool Matcher::can_be_java_arg( int reg ) { 1423 if( reg == ECX_num || reg == EDX_num ) return true; 1424 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1425 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1426 return false; 1427 } 1428 1429 bool Matcher::is_spillable_arg( int reg ) { 1430 return can_be_java_arg(reg); 1431 } 1432 1433 uint Matcher::int_pressure_limit() 1434 { 1435 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1436 } 1437 1438 uint Matcher::float_pressure_limit() 1439 { 1440 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1441 } 1442 1443 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1444 // Use hardware integer DIV instruction when 1445 // it is faster than a code which use multiply. 1446 // Only when constant divisor fits into 32 bit 1447 // (min_jint is excluded to get only correct 1448 // positive 32 bit values from negative). 1449 return VM_Version::has_fast_idiv() && 1450 (divisor == (int)divisor && divisor != min_jint); 1451 } 1452 1453 // Register for DIVI projection of divmodI 1454 RegMask Matcher::divI_proj_mask() { 1455 return EAX_REG_mask(); 1456 } 1457 1458 // Register for MODI projection of divmodI 1459 RegMask Matcher::modI_proj_mask() { 1460 return EDX_REG_mask(); 1461 } 1462 1463 // Register for DIVL projection of divmodL 1464 RegMask Matcher::divL_proj_mask() { 1465 ShouldNotReachHere(); 1466 return RegMask(); 1467 } 1468 1469 // Register for MODL projection of divmodL 1470 RegMask Matcher::modL_proj_mask() { 1471 ShouldNotReachHere(); 1472 return RegMask(); 1473 } 1474 1475 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1476 return NO_REG_mask(); 1477 } 1478 1479 // Returns true if the high 32 bits of the value is known to be zero. 1480 bool is_operand_hi32_zero(Node* n) { 1481 int opc = n->Opcode(); 1482 if (opc == Op_AndL) { 1483 Node* o2 = n->in(2); 1484 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1485 return true; 1486 } 1487 } 1488 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1489 return true; 1490 } 1491 return false; 1492 } 1493 1494 %} 1495 1496 //----------ENCODING BLOCK----------------------------------------------------- 1497 // This block specifies the encoding classes used by the compiler to output 1498 // byte streams. Encoding classes generate functions which are called by 1499 // Machine Instruction Nodes in order to generate the bit encoding of the 1500 // instruction. Operands specify their base encoding interface with the 1501 // interface keyword. There are currently supported four interfaces, 1502 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1503 // operand to generate a function which returns its register number when 1504 // queried. CONST_INTER causes an operand to generate a function which 1505 // returns the value of the constant when queried. MEMORY_INTER causes an 1506 // operand to generate four functions which return the Base Register, the 1507 // Index Register, the Scale Value, and the Offset Value of the operand when 1508 // queried. COND_INTER causes an operand to generate six functions which 1509 // return the encoding code (ie - encoding bits for the instruction) 1510 // associated with each basic boolean condition for a conditional instruction. 1511 // Instructions specify two basic values for encoding. They use the 1512 // ins_encode keyword to specify their encoding class (which must be one of 1513 // the class names specified in the encoding block), and they use the 1514 // opcode keyword to specify, in order, their primary, secondary, and 1515 // tertiary opcode. Only the opcode sections which a particular instruction 1516 // needs for encoding need to be specified. 1517 encode %{ 1518 // Build emit functions for each basic byte or larger field in the intel 1519 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1520 // code in the enc_class source block. Emit functions will live in the 1521 // main source block for now. In future, we can generalize this by 1522 // adding a syntax that specifies the sizes of fields in an order, 1523 // so that the adlc can build the emit functions automagically 1524 1525 // Set instruction mark in MacroAssembler. This is used only in 1526 // instructions that emit bytes directly to the CodeBuffer wraped 1527 // in the MacroAssembler. Should go away once all "instruct" are 1528 // patched to emit bytes only using methods in MacroAssembler. 1529 enc_class SetInstMark %{ 1530 __ set_inst_mark(); 1531 %} 1532 1533 enc_class ClearInstMark %{ 1534 __ clear_inst_mark(); 1535 %} 1536 1537 // Emit primary opcode 1538 enc_class OpcP %{ 1539 emit_opcode(masm, $primary); 1540 %} 1541 1542 // Emit secondary opcode 1543 enc_class OpcS %{ 1544 emit_opcode(masm, $secondary); 1545 %} 1546 1547 // Emit opcode directly 1548 enc_class Opcode(immI d8) %{ 1549 emit_opcode(masm, $d8$$constant); 1550 %} 1551 1552 enc_class SizePrefix %{ 1553 emit_opcode(masm,0x66); 1554 %} 1555 1556 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1557 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1558 %} 1559 1560 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1561 emit_opcode(masm,$opcode$$constant); 1562 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1563 %} 1564 1565 enc_class mov_r32_imm0( rRegI dst ) %{ 1566 emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1567 emit_d32 ( masm, 0x0 ); // imm32==0x0 1568 %} 1569 1570 enc_class cdq_enc %{ 1571 // Full implementation of Java idiv and irem; checks for 1572 // special case as described in JVM spec., p.243 & p.271. 1573 // 1574 // normal case special case 1575 // 1576 // input : rax,: dividend min_int 1577 // reg: divisor -1 1578 // 1579 // output: rax,: quotient (= rax, idiv reg) min_int 1580 // rdx: remainder (= rax, irem reg) 0 1581 // 1582 // Code sequnce: 1583 // 1584 // 81 F8 00 00 00 80 cmp rax,80000000h 1585 // 0F 85 0B 00 00 00 jne normal_case 1586 // 33 D2 xor rdx,edx 1587 // 83 F9 FF cmp rcx,0FFh 1588 // 0F 84 03 00 00 00 je done 1589 // normal_case: 1590 // 99 cdq 1591 // F7 F9 idiv rax,ecx 1592 // done: 1593 // 1594 emit_opcode(masm,0x81); emit_d8(masm,0xF8); 1595 emit_opcode(masm,0x00); emit_d8(masm,0x00); 1596 emit_opcode(masm,0x00); emit_d8(masm,0x80); // cmp rax,80000000h 1597 emit_opcode(masm,0x0F); emit_d8(masm,0x85); 1598 emit_opcode(masm,0x0B); emit_d8(masm,0x00); 1599 emit_opcode(masm,0x00); emit_d8(masm,0x00); // jne normal_case 1600 emit_opcode(masm,0x33); emit_d8(masm,0xD2); // xor rdx,edx 1601 emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh 1602 emit_opcode(masm,0x0F); emit_d8(masm,0x84); 1603 emit_opcode(masm,0x03); emit_d8(masm,0x00); 1604 emit_opcode(masm,0x00); emit_d8(masm,0x00); // je done 1605 // normal_case: 1606 emit_opcode(masm,0x99); // cdq 1607 // idiv (note: must be emitted by the user of this rule) 1608 // normal: 1609 %} 1610 1611 // Dense encoding for older common ops 1612 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1613 emit_opcode(masm, $opcode$$constant + $reg$$reg); 1614 %} 1615 1616 1617 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1618 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1619 // Check for 8-bit immediate, and set sign extend bit in opcode 1620 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1621 emit_opcode(masm, $primary | 0x02); 1622 } 1623 else { // If 32-bit immediate 1624 emit_opcode(masm, $primary); 1625 } 1626 %} 1627 1628 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1629 // Emit primary opcode and set sign-extend bit 1630 // Check for 8-bit immediate, and set sign extend bit in opcode 1631 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1632 emit_opcode(masm, $primary | 0x02); } 1633 else { // If 32-bit immediate 1634 emit_opcode(masm, $primary); 1635 } 1636 // Emit r/m byte with secondary opcode, after primary opcode. 1637 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1638 %} 1639 1640 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1641 // Check for 8-bit immediate, and set sign extend bit in opcode 1642 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1643 $$$emit8$imm$$constant; 1644 } 1645 else { // If 32-bit immediate 1646 // Output immediate 1647 $$$emit32$imm$$constant; 1648 } 1649 %} 1650 1651 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1652 // Emit primary opcode and set sign-extend bit 1653 // Check for 8-bit immediate, and set sign extend bit in opcode 1654 int con = (int)$imm$$constant; // Throw away top bits 1655 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1656 // Emit r/m byte with secondary opcode, after primary opcode. 1657 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1658 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); 1659 else emit_d32(masm,con); 1660 %} 1661 1662 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1663 // Emit primary opcode and set sign-extend bit 1664 // Check for 8-bit immediate, and set sign extend bit in opcode 1665 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1666 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1667 // Emit r/m byte with tertiary opcode, after primary opcode. 1668 emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1669 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); 1670 else emit_d32(masm,con); 1671 %} 1672 1673 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1674 emit_cc(masm, $secondary, $dst$$reg ); 1675 %} 1676 1677 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1678 int destlo = $dst$$reg; 1679 int desthi = HIGH_FROM_LOW_ENC(destlo); 1680 // bswap lo 1681 emit_opcode(masm, 0x0F); 1682 emit_cc(masm, 0xC8, destlo); 1683 // bswap hi 1684 emit_opcode(masm, 0x0F); 1685 emit_cc(masm, 0xC8, desthi); 1686 // xchg lo and hi 1687 emit_opcode(masm, 0x87); 1688 emit_rm(masm, 0x3, destlo, desthi); 1689 %} 1690 1691 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1692 emit_rm(masm, 0x3, $secondary, $div$$reg ); 1693 %} 1694 1695 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1696 $$$emit8$primary; 1697 emit_cc(masm, $secondary, $cop$$cmpcode); 1698 %} 1699 1700 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1701 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1702 emit_d8(masm, op >> 8 ); 1703 emit_d8(masm, op & 255); 1704 %} 1705 1706 // emulate a CMOV with a conditional branch around a MOV 1707 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1708 // Invert sense of branch from sense of CMOV 1709 emit_cc( masm, 0x70, ($cop$$cmpcode^1) ); 1710 emit_d8( masm, $brOffs$$constant ); 1711 %} 1712 1713 enc_class enc_PartialSubtypeCheck( ) %{ 1714 Register Redi = as_Register(EDI_enc); // result register 1715 Register Reax = as_Register(EAX_enc); // super class 1716 Register Recx = as_Register(ECX_enc); // killed 1717 Register Resi = as_Register(ESI_enc); // sub class 1718 Label miss; 1719 1720 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1721 nullptr, &miss, 1722 /*set_cond_codes:*/ true); 1723 if ($primary) { 1724 __ xorptr(Redi, Redi); 1725 } 1726 __ bind(miss); 1727 %} 1728 1729 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1730 int start = __ offset(); 1731 if (UseSSE >= 2) { 1732 if (VerifyFPU) { 1733 __ verify_FPU(0, "must be empty in SSE2+ mode"); 1734 } 1735 } else { 1736 // External c_calling_convention expects the FPU stack to be 'clean'. 1737 // Compiled code leaves it dirty. Do cleanup now. 1738 __ empty_FPU_stack(); 1739 } 1740 if (sizeof_FFree_Float_Stack_All == -1) { 1741 sizeof_FFree_Float_Stack_All = __ offset() - start; 1742 } else { 1743 assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1744 } 1745 %} 1746 1747 enc_class Verify_FPU_For_Leaf %{ 1748 if( VerifyFPU ) { 1749 __ verify_FPU( -3, "Returning from Runtime Leaf call"); 1750 } 1751 %} 1752 1753 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1754 // This is the instruction starting address for relocation info. 1755 __ set_inst_mark(); 1756 $$$emit8$primary; 1757 // CALL directly to the runtime 1758 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1759 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1760 __ clear_inst_mark(); 1761 __ post_call_nop(); 1762 1763 if (UseSSE >= 2) { 1764 BasicType rt = tf()->return_type(); 1765 1766 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1767 // A C runtime call where the return value is unused. In SSE2+ 1768 // mode the result needs to be removed from the FPU stack. It's 1769 // likely that this function call could be removed by the 1770 // optimizer if the C function is a pure function. 1771 __ ffree(0); 1772 } else if (rt == T_FLOAT) { 1773 __ lea(rsp, Address(rsp, -4)); 1774 __ fstp_s(Address(rsp, 0)); 1775 __ movflt(xmm0, Address(rsp, 0)); 1776 __ lea(rsp, Address(rsp, 4)); 1777 } else if (rt == T_DOUBLE) { 1778 __ lea(rsp, Address(rsp, -8)); 1779 __ fstp_d(Address(rsp, 0)); 1780 __ movdbl(xmm0, Address(rsp, 0)); 1781 __ lea(rsp, Address(rsp, 8)); 1782 } 1783 } 1784 %} 1785 1786 enc_class pre_call_resets %{ 1787 // If method sets FPU control word restore it here 1788 debug_only(int off0 = __ offset()); 1789 if (ra_->C->in_24_bit_fp_mode()) { 1790 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1791 } 1792 // Clear upper bits of YMM registers when current compiled code uses 1793 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1794 __ vzeroupper(); 1795 debug_only(int off1 = __ offset()); 1796 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1797 %} 1798 1799 enc_class post_call_FPU %{ 1800 // If method sets FPU control word do it here also 1801 if (Compile::current()->in_24_bit_fp_mode()) { 1802 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1803 } 1804 %} 1805 1806 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1807 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1808 // who we intended to call. 1809 __ set_inst_mark(); 1810 $$$emit8$primary; 1811 1812 if (!_method) { 1813 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1814 runtime_call_Relocation::spec(), 1815 RELOC_IMM32); 1816 __ clear_inst_mark(); 1817 __ post_call_nop(); 1818 } else { 1819 int method_index = resolved_method_index(masm); 1820 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1821 : static_call_Relocation::spec(method_index); 1822 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1823 rspec, RELOC_DISP32); 1824 __ post_call_nop(); 1825 address mark = __ inst_mark(); 1826 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1827 // Calls of the same statically bound method can share 1828 // a stub to the interpreter. 1829 __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off()); 1830 __ clear_inst_mark(); 1831 } else { 1832 // Emit stubs for static call. 1833 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark); 1834 __ clear_inst_mark(); 1835 if (stub == nullptr) { 1836 ciEnv::current()->record_failure("CodeCache is full"); 1837 return; 1838 } 1839 } 1840 } 1841 %} 1842 1843 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1844 __ ic_call((address)$meth$$method, resolved_method_index(masm)); 1845 __ post_call_nop(); 1846 %} 1847 1848 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1849 int disp = in_bytes(Method::from_compiled_offset()); 1850 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1851 1852 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1853 __ set_inst_mark(); 1854 $$$emit8$primary; 1855 emit_rm(masm, 0x01, $secondary, EAX_enc ); // R/M byte 1856 emit_d8(masm, disp); // Displacement 1857 __ clear_inst_mark(); 1858 __ post_call_nop(); 1859 %} 1860 1861 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1862 $$$emit8$primary; 1863 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1864 $$$emit8$shift$$constant; 1865 %} 1866 1867 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1868 // Load immediate does not have a zero or sign extended version 1869 // for 8-bit immediates 1870 emit_opcode(masm, 0xB8 + $dst$$reg); 1871 $$$emit32$src$$constant; 1872 %} 1873 1874 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1875 // Load immediate does not have a zero or sign extended version 1876 // for 8-bit immediates 1877 emit_opcode(masm, $primary + $dst$$reg); 1878 $$$emit32$src$$constant; 1879 %} 1880 1881 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1882 // Load immediate does not have a zero or sign extended version 1883 // for 8-bit immediates 1884 int dst_enc = $dst$$reg; 1885 int src_con = $src$$constant & 0x0FFFFFFFFL; 1886 if (src_con == 0) { 1887 // xor dst, dst 1888 emit_opcode(masm, 0x33); 1889 emit_rm(masm, 0x3, dst_enc, dst_enc); 1890 } else { 1891 emit_opcode(masm, $primary + dst_enc); 1892 emit_d32(masm, src_con); 1893 } 1894 %} 1895 1896 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1897 // Load immediate does not have a zero or sign extended version 1898 // for 8-bit immediates 1899 int dst_enc = $dst$$reg + 2; 1900 int src_con = ((julong)($src$$constant)) >> 32; 1901 if (src_con == 0) { 1902 // xor dst, dst 1903 emit_opcode(masm, 0x33); 1904 emit_rm(masm, 0x3, dst_enc, dst_enc); 1905 } else { 1906 emit_opcode(masm, $primary + dst_enc); 1907 emit_d32(masm, src_con); 1908 } 1909 %} 1910 1911 1912 // Encode a reg-reg copy. If it is useless, then empty encoding. 1913 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1914 encode_Copy( masm, $dst$$reg, $src$$reg ); 1915 %} 1916 1917 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1918 encode_Copy( masm, $dst$$reg, $src$$reg ); 1919 %} 1920 1921 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1922 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1923 %} 1924 1925 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1926 $$$emit8$primary; 1927 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1928 %} 1929 1930 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1931 $$$emit8$secondary; 1932 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1933 %} 1934 1935 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1936 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1937 %} 1938 1939 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1940 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1941 %} 1942 1943 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1944 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1945 %} 1946 1947 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1948 // Output immediate 1949 $$$emit32$src$$constant; 1950 %} 1951 1952 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1953 // Output Float immediate bits 1954 jfloat jf = $src$$constant; 1955 int jf_as_bits = jint_cast( jf ); 1956 emit_d32(masm, jf_as_bits); 1957 %} 1958 1959 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1960 // Output Float immediate bits 1961 jfloat jf = $src$$constant; 1962 int jf_as_bits = jint_cast( jf ); 1963 emit_d32(masm, jf_as_bits); 1964 %} 1965 1966 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1967 // Output immediate 1968 $$$emit16$src$$constant; 1969 %} 1970 1971 enc_class Con_d32(immI src) %{ 1972 emit_d32(masm,$src$$constant); 1973 %} 1974 1975 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1976 // Output immediate memory reference 1977 emit_rm(masm, 0x00, $t1$$reg, 0x05 ); 1978 emit_d32(masm, 0x00); 1979 %} 1980 1981 enc_class lock_prefix( ) %{ 1982 emit_opcode(masm,0xF0); // [Lock] 1983 %} 1984 1985 // Cmp-xchg long value. 1986 // Note: we need to swap rbx, and rcx before and after the 1987 // cmpxchg8 instruction because the instruction uses 1988 // rcx as the high order word of the new value to store but 1989 // our register encoding uses rbx,. 1990 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 1991 1992 // XCHG rbx,ecx 1993 emit_opcode(masm,0x87); 1994 emit_opcode(masm,0xD9); 1995 // [Lock] 1996 emit_opcode(masm,0xF0); 1997 // CMPXCHG8 [Eptr] 1998 emit_opcode(masm,0x0F); 1999 emit_opcode(masm,0xC7); 2000 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2001 // XCHG rbx,ecx 2002 emit_opcode(masm,0x87); 2003 emit_opcode(masm,0xD9); 2004 %} 2005 2006 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2007 // [Lock] 2008 emit_opcode(masm,0xF0); 2009 2010 // CMPXCHG [Eptr] 2011 emit_opcode(masm,0x0F); 2012 emit_opcode(masm,0xB1); 2013 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2014 %} 2015 2016 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2017 // [Lock] 2018 emit_opcode(masm,0xF0); 2019 2020 // CMPXCHGB [Eptr] 2021 emit_opcode(masm,0x0F); 2022 emit_opcode(masm,0xB0); 2023 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2024 %} 2025 2026 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2027 // [Lock] 2028 emit_opcode(masm,0xF0); 2029 2030 // 16-bit mode 2031 emit_opcode(masm, 0x66); 2032 2033 // CMPXCHGW [Eptr] 2034 emit_opcode(masm,0x0F); 2035 emit_opcode(masm,0xB1); 2036 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2037 %} 2038 2039 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2040 int res_encoding = $res$$reg; 2041 2042 // MOV res,0 2043 emit_opcode( masm, 0xB8 + res_encoding); 2044 emit_d32( masm, 0 ); 2045 // JNE,s fail 2046 emit_opcode(masm,0x75); 2047 emit_d8(masm, 5 ); 2048 // MOV res,1 2049 emit_opcode( masm, 0xB8 + res_encoding); 2050 emit_d32( masm, 1 ); 2051 // fail: 2052 %} 2053 2054 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2055 int reg_encoding = $ereg$$reg; 2056 int base = $mem$$base; 2057 int index = $mem$$index; 2058 int scale = $mem$$scale; 2059 int displace = $mem$$disp; 2060 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2061 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2062 %} 2063 2064 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2065 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2066 int base = $mem$$base; 2067 int index = $mem$$index; 2068 int scale = $mem$$scale; 2069 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2070 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2071 encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none); 2072 %} 2073 2074 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2075 int r1, r2; 2076 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2077 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2078 emit_opcode(masm,0x0F); 2079 emit_opcode(masm,$tertiary); 2080 emit_rm(masm, 0x3, r1, r2); 2081 emit_d8(masm,$cnt$$constant); 2082 emit_d8(masm,$primary); 2083 emit_rm(masm, 0x3, $secondary, r1); 2084 emit_d8(masm,$cnt$$constant); 2085 %} 2086 2087 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2088 emit_opcode( masm, 0x8B ); // Move 2089 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2090 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2091 emit_d8(masm,$primary); 2092 emit_rm(masm, 0x3, $secondary, $dst$$reg); 2093 emit_d8(masm,$cnt$$constant-32); 2094 } 2095 emit_d8(masm,$primary); 2096 emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2097 emit_d8(masm,31); 2098 %} 2099 2100 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2101 int r1, r2; 2102 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2103 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2104 2105 emit_opcode( masm, 0x8B ); // Move r1,r2 2106 emit_rm(masm, 0x3, r1, r2); 2107 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2108 emit_opcode(masm,$primary); 2109 emit_rm(masm, 0x3, $secondary, r1); 2110 emit_d8(masm,$cnt$$constant-32); 2111 } 2112 emit_opcode(masm,0x33); // XOR r2,r2 2113 emit_rm(masm, 0x3, r2, r2); 2114 %} 2115 2116 // Clone of RegMem but accepts an extra parameter to access each 2117 // half of a double in memory; it never needs relocation info. 2118 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2119 emit_opcode(masm,$opcode$$constant); 2120 int reg_encoding = $rm_reg$$reg; 2121 int base = $mem$$base; 2122 int index = $mem$$index; 2123 int scale = $mem$$scale; 2124 int displace = $mem$$disp + $disp_for_half$$constant; 2125 relocInfo::relocType disp_reloc = relocInfo::none; 2126 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2127 %} 2128 2129 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2130 // 2131 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2132 // and it never needs relocation information. 2133 // Frequently used to move data between FPU's Stack Top and memory. 2134 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2135 int rm_byte_opcode = $rm_opcode$$constant; 2136 int base = $mem$$base; 2137 int index = $mem$$index; 2138 int scale = $mem$$scale; 2139 int displace = $mem$$disp; 2140 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2141 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2142 %} 2143 2144 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2145 int rm_byte_opcode = $rm_opcode$$constant; 2146 int base = $mem$$base; 2147 int index = $mem$$index; 2148 int scale = $mem$$scale; 2149 int displace = $mem$$disp; 2150 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2151 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2152 %} 2153 2154 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2155 int reg_encoding = $dst$$reg; 2156 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2157 int index = 0x04; // 0x04 indicates no index 2158 int scale = 0x00; // 0x00 indicates no scale 2159 int displace = $src1$$constant; // 0x00 indicates no displacement 2160 relocInfo::relocType disp_reloc = relocInfo::none; 2161 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2162 %} 2163 2164 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2165 // Compare dst,src 2166 emit_opcode(masm,0x3B); 2167 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2168 // jmp dst < src around move 2169 emit_opcode(masm,0x7C); 2170 emit_d8(masm,2); 2171 // move dst,src 2172 emit_opcode(masm,0x8B); 2173 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2174 %} 2175 2176 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2177 // Compare dst,src 2178 emit_opcode(masm,0x3B); 2179 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2180 // jmp dst > src around move 2181 emit_opcode(masm,0x7F); 2182 emit_d8(masm,2); 2183 // move dst,src 2184 emit_opcode(masm,0x8B); 2185 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2186 %} 2187 2188 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2189 // If src is FPR1, we can just FST to store it. 2190 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2191 int reg_encoding = 0x2; // Just store 2192 int base = $mem$$base; 2193 int index = $mem$$index; 2194 int scale = $mem$$scale; 2195 int displace = $mem$$disp; 2196 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2197 if( $src$$reg != FPR1L_enc ) { 2198 reg_encoding = 0x3; // Store & pop 2199 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) 2200 emit_d8( masm, 0xC0-1+$src$$reg ); 2201 } 2202 __ set_inst_mark(); // Mark start of opcode for reloc info in mem operand 2203 emit_opcode(masm,$primary); 2204 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2205 __ clear_inst_mark(); 2206 %} 2207 2208 enc_class neg_reg(rRegI dst) %{ 2209 // NEG $dst 2210 emit_opcode(masm,0xF7); 2211 emit_rm(masm, 0x3, 0x03, $dst$$reg ); 2212 %} 2213 2214 enc_class setLT_reg(eCXRegI dst) %{ 2215 // SETLT $dst 2216 emit_opcode(masm,0x0F); 2217 emit_opcode(masm,0x9C); 2218 emit_rm( masm, 0x3, 0x4, $dst$$reg ); 2219 %} 2220 2221 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2222 int tmpReg = $tmp$$reg; 2223 2224 // SUB $p,$q 2225 emit_opcode(masm,0x2B); 2226 emit_rm(masm, 0x3, $p$$reg, $q$$reg); 2227 // SBB $tmp,$tmp 2228 emit_opcode(masm,0x1B); 2229 emit_rm(masm, 0x3, tmpReg, tmpReg); 2230 // AND $tmp,$y 2231 emit_opcode(masm,0x23); 2232 emit_rm(masm, 0x3, tmpReg, $y$$reg); 2233 // ADD $p,$tmp 2234 emit_opcode(masm,0x03); 2235 emit_rm(masm, 0x3, $p$$reg, tmpReg); 2236 %} 2237 2238 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2239 // TEST shift,32 2240 emit_opcode(masm,0xF7); 2241 emit_rm(masm, 0x3, 0, ECX_enc); 2242 emit_d32(masm,0x20); 2243 // JEQ,s small 2244 emit_opcode(masm, 0x74); 2245 emit_d8(masm, 0x04); 2246 // MOV $dst.hi,$dst.lo 2247 emit_opcode( masm, 0x8B ); 2248 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2249 // CLR $dst.lo 2250 emit_opcode(masm, 0x33); 2251 emit_rm(masm, 0x3, $dst$$reg, $dst$$reg); 2252 // small: 2253 // SHLD $dst.hi,$dst.lo,$shift 2254 emit_opcode(masm,0x0F); 2255 emit_opcode(masm,0xA5); 2256 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2257 // SHL $dst.lo,$shift" 2258 emit_opcode(masm,0xD3); 2259 emit_rm(masm, 0x3, 0x4, $dst$$reg ); 2260 %} 2261 2262 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2263 // TEST shift,32 2264 emit_opcode(masm,0xF7); 2265 emit_rm(masm, 0x3, 0, ECX_enc); 2266 emit_d32(masm,0x20); 2267 // JEQ,s small 2268 emit_opcode(masm, 0x74); 2269 emit_d8(masm, 0x04); 2270 // MOV $dst.lo,$dst.hi 2271 emit_opcode( masm, 0x8B ); 2272 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2273 // CLR $dst.hi 2274 emit_opcode(masm, 0x33); 2275 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2276 // small: 2277 // SHRD $dst.lo,$dst.hi,$shift 2278 emit_opcode(masm,0x0F); 2279 emit_opcode(masm,0xAD); 2280 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2281 // SHR $dst.hi,$shift" 2282 emit_opcode(masm,0xD3); 2283 emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2284 %} 2285 2286 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2287 // TEST shift,32 2288 emit_opcode(masm,0xF7); 2289 emit_rm(masm, 0x3, 0, ECX_enc); 2290 emit_d32(masm,0x20); 2291 // JEQ,s small 2292 emit_opcode(masm, 0x74); 2293 emit_d8(masm, 0x05); 2294 // MOV $dst.lo,$dst.hi 2295 emit_opcode( masm, 0x8B ); 2296 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2297 // SAR $dst.hi,31 2298 emit_opcode(masm, 0xC1); 2299 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2300 emit_d8(masm, 0x1F ); 2301 // small: 2302 // SHRD $dst.lo,$dst.hi,$shift 2303 emit_opcode(masm,0x0F); 2304 emit_opcode(masm,0xAD); 2305 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2306 // SAR $dst.hi,$shift" 2307 emit_opcode(masm,0xD3); 2308 emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2309 %} 2310 2311 2312 // ----------------- Encodings for floating point unit ----------------- 2313 // May leave result in FPU-TOS or FPU reg depending on opcodes 2314 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2315 $$$emit8$primary; 2316 emit_rm(masm, 0x3, $secondary, $src$$reg ); 2317 %} 2318 2319 // Pop argument in FPR0 with FSTP ST(0) 2320 enc_class PopFPU() %{ 2321 emit_opcode( masm, 0xDD ); 2322 emit_d8( masm, 0xD8 ); 2323 %} 2324 2325 // !!!!! equivalent to Pop_Reg_F 2326 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2327 emit_opcode( masm, 0xDD ); // FSTP ST(i) 2328 emit_d8( masm, 0xD8+$dst$$reg ); 2329 %} 2330 2331 enc_class Push_Reg_DPR( regDPR dst ) %{ 2332 emit_opcode( masm, 0xD9 ); 2333 emit_d8( masm, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2334 %} 2335 2336 enc_class strictfp_bias1( regDPR dst ) %{ 2337 emit_opcode( masm, 0xDB ); // FLD m80real 2338 emit_opcode( masm, 0x2D ); 2339 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2340 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 2341 emit_opcode( masm, 0xC8+$dst$$reg ); 2342 %} 2343 2344 enc_class strictfp_bias2( regDPR dst ) %{ 2345 emit_opcode( masm, 0xDB ); // FLD m80real 2346 emit_opcode( masm, 0x2D ); 2347 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2348 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 2349 emit_opcode( masm, 0xC8+$dst$$reg ); 2350 %} 2351 2352 // Special case for moving an integer register to a stack slot. 2353 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2354 store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp ); 2355 %} 2356 2357 // Special case for moving a register to a stack slot. 2358 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2359 // Opcode already emitted 2360 emit_rm( masm, 0x02, $src$$reg, ESP_enc ); // R/M byte 2361 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 2362 emit_d32(masm, $dst$$disp); // Displacement 2363 %} 2364 2365 // Push the integer in stackSlot 'src' onto FP-stack 2366 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2367 store_to_stackslot( masm, $primary, $secondary, $src$$disp ); 2368 %} 2369 2370 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2371 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2372 store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp ); 2373 %} 2374 2375 // Same as Pop_Mem_F except for opcode 2376 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2377 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2378 store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp ); 2379 %} 2380 2381 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2382 emit_opcode( masm, 0xDD ); // FSTP ST(i) 2383 emit_d8( masm, 0xD8+$dst$$reg ); 2384 %} 2385 2386 enc_class Push_Reg_FPR( regFPR dst ) %{ 2387 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2388 emit_d8( masm, 0xC0-1+$dst$$reg ); 2389 %} 2390 2391 // Push FPU's float to a stack-slot, and pop FPU-stack 2392 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2393 int pop = 0x02; 2394 if ($src$$reg != FPR1L_enc) { 2395 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2396 emit_d8( masm, 0xC0-1+$src$$reg ); 2397 pop = 0x03; 2398 } 2399 store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2400 %} 2401 2402 // Push FPU's double to a stack-slot, and pop FPU-stack 2403 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2404 int pop = 0x02; 2405 if ($src$$reg != FPR1L_enc) { 2406 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2407 emit_d8( masm, 0xC0-1+$src$$reg ); 2408 pop = 0x03; 2409 } 2410 store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2411 %} 2412 2413 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2414 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2415 int pop = 0xD0 - 1; // -1 since we skip FLD 2416 if ($src$$reg != FPR1L_enc) { 2417 emit_opcode( masm, 0xD9 ); // FLD ST(src-1) 2418 emit_d8( masm, 0xC0-1+$src$$reg ); 2419 pop = 0xD8; 2420 } 2421 emit_opcode( masm, 0xDD ); 2422 emit_d8( masm, pop+$dst$$reg ); // FST<P> ST(i) 2423 %} 2424 2425 2426 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2427 // load dst in FPR0 2428 emit_opcode( masm, 0xD9 ); 2429 emit_d8( masm, 0xC0-1+$dst$$reg ); 2430 if ($src$$reg != FPR1L_enc) { 2431 // fincstp 2432 emit_opcode (masm, 0xD9); 2433 emit_opcode (masm, 0xF7); 2434 // swap src with FPR1: 2435 // FXCH FPR1 with src 2436 emit_opcode(masm, 0xD9); 2437 emit_d8(masm, 0xC8-1+$src$$reg ); 2438 // fdecstp 2439 emit_opcode (masm, 0xD9); 2440 emit_opcode (masm, 0xF6); 2441 } 2442 %} 2443 2444 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2445 __ subptr(rsp, 8); 2446 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2447 __ fld_d(Address(rsp, 0)); 2448 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2449 __ fld_d(Address(rsp, 0)); 2450 %} 2451 2452 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2453 __ subptr(rsp, 4); 2454 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2455 __ fld_s(Address(rsp, 0)); 2456 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2457 __ fld_s(Address(rsp, 0)); 2458 %} 2459 2460 enc_class Push_ResultD(regD dst) %{ 2461 __ fstp_d(Address(rsp, 0)); 2462 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2463 __ addptr(rsp, 8); 2464 %} 2465 2466 enc_class Push_ResultF(regF dst, immI d8) %{ 2467 __ fstp_s(Address(rsp, 0)); 2468 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2469 __ addptr(rsp, $d8$$constant); 2470 %} 2471 2472 enc_class Push_SrcD(regD src) %{ 2473 __ subptr(rsp, 8); 2474 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2475 __ fld_d(Address(rsp, 0)); 2476 %} 2477 2478 enc_class push_stack_temp_qword() %{ 2479 __ subptr(rsp, 8); 2480 %} 2481 2482 enc_class pop_stack_temp_qword() %{ 2483 __ addptr(rsp, 8); 2484 %} 2485 2486 enc_class push_xmm_to_fpr1(regD src) %{ 2487 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2488 __ fld_d(Address(rsp, 0)); 2489 %} 2490 2491 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2492 if ($src$$reg != FPR1L_enc) { 2493 // fincstp 2494 emit_opcode (masm, 0xD9); 2495 emit_opcode (masm, 0xF7); 2496 // FXCH FPR1 with src 2497 emit_opcode(masm, 0xD9); 2498 emit_d8(masm, 0xC8-1+$src$$reg ); 2499 // fdecstp 2500 emit_opcode (masm, 0xD9); 2501 emit_opcode (masm, 0xF6); 2502 } 2503 %} 2504 2505 enc_class fnstsw_sahf_skip_parity() %{ 2506 // fnstsw ax 2507 emit_opcode( masm, 0xDF ); 2508 emit_opcode( masm, 0xE0 ); 2509 // sahf 2510 emit_opcode( masm, 0x9E ); 2511 // jnp ::skip 2512 emit_opcode( masm, 0x7B ); 2513 emit_opcode( masm, 0x05 ); 2514 %} 2515 2516 enc_class emitModDPR() %{ 2517 // fprem must be iterative 2518 // :: loop 2519 // fprem 2520 emit_opcode( masm, 0xD9 ); 2521 emit_opcode( masm, 0xF8 ); 2522 // wait 2523 emit_opcode( masm, 0x9b ); 2524 // fnstsw ax 2525 emit_opcode( masm, 0xDF ); 2526 emit_opcode( masm, 0xE0 ); 2527 // sahf 2528 emit_opcode( masm, 0x9E ); 2529 // jp ::loop 2530 emit_opcode( masm, 0x0F ); 2531 emit_opcode( masm, 0x8A ); 2532 emit_opcode( masm, 0xF4 ); 2533 emit_opcode( masm, 0xFF ); 2534 emit_opcode( masm, 0xFF ); 2535 emit_opcode( masm, 0xFF ); 2536 %} 2537 2538 enc_class fpu_flags() %{ 2539 // fnstsw_ax 2540 emit_opcode( masm, 0xDF); 2541 emit_opcode( masm, 0xE0); 2542 // test ax,0x0400 2543 emit_opcode( masm, 0x66 ); // operand-size prefix for 16-bit immediate 2544 emit_opcode( masm, 0xA9 ); 2545 emit_d16 ( masm, 0x0400 ); 2546 // // // This sequence works, but stalls for 12-16 cycles on PPro 2547 // // test rax,0x0400 2548 // emit_opcode( masm, 0xA9 ); 2549 // emit_d32 ( masm, 0x00000400 ); 2550 // 2551 // jz exit (no unordered comparison) 2552 emit_opcode( masm, 0x74 ); 2553 emit_d8 ( masm, 0x02 ); 2554 // mov ah,1 - treat as LT case (set carry flag) 2555 emit_opcode( masm, 0xB4 ); 2556 emit_d8 ( masm, 0x01 ); 2557 // sahf 2558 emit_opcode( masm, 0x9E); 2559 %} 2560 2561 enc_class cmpF_P6_fixup() %{ 2562 // Fixup the integer flags in case comparison involved a NaN 2563 // 2564 // JNP exit (no unordered comparison, P-flag is set by NaN) 2565 emit_opcode( masm, 0x7B ); 2566 emit_d8 ( masm, 0x03 ); 2567 // MOV AH,1 - treat as LT case (set carry flag) 2568 emit_opcode( masm, 0xB4 ); 2569 emit_d8 ( masm, 0x01 ); 2570 // SAHF 2571 emit_opcode( masm, 0x9E); 2572 // NOP // target for branch to avoid branch to branch 2573 emit_opcode( masm, 0x90); 2574 %} 2575 2576 // fnstsw_ax(); 2577 // sahf(); 2578 // movl(dst, nan_result); 2579 // jcc(Assembler::parity, exit); 2580 // movl(dst, less_result); 2581 // jcc(Assembler::below, exit); 2582 // movl(dst, equal_result); 2583 // jcc(Assembler::equal, exit); 2584 // movl(dst, greater_result); 2585 2586 // less_result = 1; 2587 // greater_result = -1; 2588 // equal_result = 0; 2589 // nan_result = -1; 2590 2591 enc_class CmpF_Result(rRegI dst) %{ 2592 // fnstsw_ax(); 2593 emit_opcode( masm, 0xDF); 2594 emit_opcode( masm, 0xE0); 2595 // sahf 2596 emit_opcode( masm, 0x9E); 2597 // movl(dst, nan_result); 2598 emit_opcode( masm, 0xB8 + $dst$$reg); 2599 emit_d32( masm, -1 ); 2600 // jcc(Assembler::parity, exit); 2601 emit_opcode( masm, 0x7A ); 2602 emit_d8 ( masm, 0x13 ); 2603 // movl(dst, less_result); 2604 emit_opcode( masm, 0xB8 + $dst$$reg); 2605 emit_d32( masm, -1 ); 2606 // jcc(Assembler::below, exit); 2607 emit_opcode( masm, 0x72 ); 2608 emit_d8 ( masm, 0x0C ); 2609 // movl(dst, equal_result); 2610 emit_opcode( masm, 0xB8 + $dst$$reg); 2611 emit_d32( masm, 0 ); 2612 // jcc(Assembler::equal, exit); 2613 emit_opcode( masm, 0x74 ); 2614 emit_d8 ( masm, 0x05 ); 2615 // movl(dst, greater_result); 2616 emit_opcode( masm, 0xB8 + $dst$$reg); 2617 emit_d32( masm, 1 ); 2618 %} 2619 2620 2621 // Compare the longs and set flags 2622 // BROKEN! Do Not use as-is 2623 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2624 // CMP $src1.hi,$src2.hi 2625 emit_opcode( masm, 0x3B ); 2626 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2627 // JNE,s done 2628 emit_opcode(masm,0x75); 2629 emit_d8(masm, 2 ); 2630 // CMP $src1.lo,$src2.lo 2631 emit_opcode( masm, 0x3B ); 2632 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2633 // done: 2634 %} 2635 2636 enc_class convert_int_long( regL dst, rRegI src ) %{ 2637 // mov $dst.lo,$src 2638 int dst_encoding = $dst$$reg; 2639 int src_encoding = $src$$reg; 2640 encode_Copy( masm, dst_encoding , src_encoding ); 2641 // mov $dst.hi,$src 2642 encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2643 // sar $dst.hi,31 2644 emit_opcode( masm, 0xC1 ); 2645 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2646 emit_d8(masm, 0x1F ); 2647 %} 2648 2649 enc_class convert_long_double( eRegL src ) %{ 2650 // push $src.hi 2651 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2652 // push $src.lo 2653 emit_opcode(masm, 0x50+$src$$reg ); 2654 // fild 64-bits at [SP] 2655 emit_opcode(masm,0xdf); 2656 emit_d8(masm, 0x6C); 2657 emit_d8(masm, 0x24); 2658 emit_d8(masm, 0x00); 2659 // pop stack 2660 emit_opcode(masm, 0x83); // add SP, #8 2661 emit_rm(masm, 0x3, 0x00, ESP_enc); 2662 emit_d8(masm, 0x8); 2663 %} 2664 2665 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2666 // IMUL EDX:EAX,$src1 2667 emit_opcode( masm, 0xF7 ); 2668 emit_rm( masm, 0x3, 0x5, $src1$$reg ); 2669 // SAR EDX,$cnt-32 2670 int shift_count = ((int)$cnt$$constant) - 32; 2671 if (shift_count > 0) { 2672 emit_opcode(masm, 0xC1); 2673 emit_rm(masm, 0x3, 7, $dst$$reg ); 2674 emit_d8(masm, shift_count); 2675 } 2676 %} 2677 2678 // this version doesn't have add sp, 8 2679 enc_class convert_long_double2( eRegL src ) %{ 2680 // push $src.hi 2681 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2682 // push $src.lo 2683 emit_opcode(masm, 0x50+$src$$reg ); 2684 // fild 64-bits at [SP] 2685 emit_opcode(masm,0xdf); 2686 emit_d8(masm, 0x6C); 2687 emit_d8(masm, 0x24); 2688 emit_d8(masm, 0x00); 2689 %} 2690 2691 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2692 // Basic idea: long = (long)int * (long)int 2693 // IMUL EDX:EAX, src 2694 emit_opcode( masm, 0xF7 ); 2695 emit_rm( masm, 0x3, 0x5, $src$$reg); 2696 %} 2697 2698 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2699 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2700 // MUL EDX:EAX, src 2701 emit_opcode( masm, 0xF7 ); 2702 emit_rm( masm, 0x3, 0x4, $src$$reg); 2703 %} 2704 2705 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2706 // Basic idea: lo(result) = lo(x_lo * y_lo) 2707 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2708 // MOV $tmp,$src.lo 2709 encode_Copy( masm, $tmp$$reg, $src$$reg ); 2710 // IMUL $tmp,EDX 2711 emit_opcode( masm, 0x0F ); 2712 emit_opcode( masm, 0xAF ); 2713 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2714 // MOV EDX,$src.hi 2715 encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2716 // IMUL EDX,EAX 2717 emit_opcode( masm, 0x0F ); 2718 emit_opcode( masm, 0xAF ); 2719 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2720 // ADD $tmp,EDX 2721 emit_opcode( masm, 0x03 ); 2722 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2723 // MUL EDX:EAX,$src.lo 2724 emit_opcode( masm, 0xF7 ); 2725 emit_rm( masm, 0x3, 0x4, $src$$reg ); 2726 // ADD EDX,ESI 2727 emit_opcode( masm, 0x03 ); 2728 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2729 %} 2730 2731 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2732 // Basic idea: lo(result) = lo(src * y_lo) 2733 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2734 // IMUL $tmp,EDX,$src 2735 emit_opcode( masm, 0x6B ); 2736 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2737 emit_d8( masm, (int)$src$$constant ); 2738 // MOV EDX,$src 2739 emit_opcode(masm, 0xB8 + EDX_enc); 2740 emit_d32( masm, (int)$src$$constant ); 2741 // MUL EDX:EAX,EDX 2742 emit_opcode( masm, 0xF7 ); 2743 emit_rm( masm, 0x3, 0x4, EDX_enc ); 2744 // ADD EDX,ESI 2745 emit_opcode( masm, 0x03 ); 2746 emit_rm( masm, 0x3, EDX_enc, $tmp$$reg ); 2747 %} 2748 2749 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2750 // PUSH src1.hi 2751 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2752 // PUSH src1.lo 2753 emit_opcode(masm, 0x50+$src1$$reg ); 2754 // PUSH src2.hi 2755 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2756 // PUSH src2.lo 2757 emit_opcode(masm, 0x50+$src2$$reg ); 2758 // CALL directly to the runtime 2759 __ set_inst_mark(); 2760 emit_opcode(masm,0xE8); // Call into runtime 2761 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2762 __ clear_inst_mark(); 2763 __ post_call_nop(); 2764 // Restore stack 2765 emit_opcode(masm, 0x83); // add SP, #framesize 2766 emit_rm(masm, 0x3, 0x00, ESP_enc); 2767 emit_d8(masm, 4*4); 2768 %} 2769 2770 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2771 // PUSH src1.hi 2772 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2773 // PUSH src1.lo 2774 emit_opcode(masm, 0x50+$src1$$reg ); 2775 // PUSH src2.hi 2776 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2777 // PUSH src2.lo 2778 emit_opcode(masm, 0x50+$src2$$reg ); 2779 // CALL directly to the runtime 2780 __ set_inst_mark(); 2781 emit_opcode(masm,0xE8); // Call into runtime 2782 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2783 __ clear_inst_mark(); 2784 __ post_call_nop(); 2785 // Restore stack 2786 emit_opcode(masm, 0x83); // add SP, #framesize 2787 emit_rm(masm, 0x3, 0x00, ESP_enc); 2788 emit_d8(masm, 4*4); 2789 %} 2790 2791 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2792 // MOV $tmp,$src.lo 2793 emit_opcode(masm, 0x8B); 2794 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg); 2795 // OR $tmp,$src.hi 2796 emit_opcode(masm, 0x0B); 2797 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2798 %} 2799 2800 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2801 // CMP $src1.lo,$src2.lo 2802 emit_opcode( masm, 0x3B ); 2803 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2804 // JNE,s skip 2805 emit_cc(masm, 0x70, 0x5); 2806 emit_d8(masm,2); 2807 // CMP $src1.hi,$src2.hi 2808 emit_opcode( masm, 0x3B ); 2809 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2810 %} 2811 2812 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2813 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2814 emit_opcode( masm, 0x3B ); 2815 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2816 // MOV $tmp,$src1.hi 2817 emit_opcode( masm, 0x8B ); 2818 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2819 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2820 emit_opcode( masm, 0x1B ); 2821 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2822 %} 2823 2824 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2825 // XOR $tmp,$tmp 2826 emit_opcode(masm,0x33); // XOR 2827 emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg); 2828 // CMP $tmp,$src.lo 2829 emit_opcode( masm, 0x3B ); 2830 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg ); 2831 // SBB $tmp,$src.hi 2832 emit_opcode( masm, 0x1B ); 2833 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2834 %} 2835 2836 // Sniff, sniff... smells like Gnu Superoptimizer 2837 enc_class neg_long( eRegL dst ) %{ 2838 emit_opcode(masm,0xF7); // NEG hi 2839 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2840 emit_opcode(masm,0xF7); // NEG lo 2841 emit_rm (masm,0x3, 0x3, $dst$$reg ); 2842 emit_opcode(masm,0x83); // SBB hi,0 2843 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2844 emit_d8 (masm,0 ); 2845 %} 2846 2847 enc_class enc_pop_rdx() %{ 2848 emit_opcode(masm,0x5A); 2849 %} 2850 2851 enc_class enc_rethrow() %{ 2852 __ set_inst_mark(); 2853 emit_opcode(masm, 0xE9); // jmp entry 2854 emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4, 2855 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2856 __ clear_inst_mark(); 2857 __ post_call_nop(); 2858 %} 2859 2860 2861 // Convert a double to an int. Java semantics require we do complex 2862 // manglelations in the corner cases. So we set the rounding mode to 2863 // 'zero', store the darned double down as an int, and reset the 2864 // rounding mode to 'nearest'. The hardware throws an exception which 2865 // patches up the correct value directly to the stack. 2866 enc_class DPR2I_encoding( regDPR src ) %{ 2867 // Flip to round-to-zero mode. We attempted to allow invalid-op 2868 // exceptions here, so that a NAN or other corner-case value will 2869 // thrown an exception (but normal values get converted at full speed). 2870 // However, I2C adapters and other float-stack manglers leave pending 2871 // invalid-op exceptions hanging. We would have to clear them before 2872 // enabling them and that is more expensive than just testing for the 2873 // invalid value Intel stores down in the corner cases. 2874 emit_opcode(masm,0xD9); // FLDCW trunc 2875 emit_opcode(masm,0x2D); 2876 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2877 // Allocate a word 2878 emit_opcode(masm,0x83); // SUB ESP,4 2879 emit_opcode(masm,0xEC); 2880 emit_d8(masm,0x04); 2881 // Encoding assumes a double has been pushed into FPR0. 2882 // Store down the double as an int, popping the FPU stack 2883 emit_opcode(masm,0xDB); // FISTP [ESP] 2884 emit_opcode(masm,0x1C); 2885 emit_d8(masm,0x24); 2886 // Restore the rounding mode; mask the exception 2887 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode 2888 emit_opcode(masm,0x2D); 2889 emit_d32( masm, Compile::current()->in_24_bit_fp_mode() 2890 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2891 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2892 2893 // Load the converted int; adjust CPU stack 2894 emit_opcode(masm,0x58); // POP EAX 2895 emit_opcode(masm,0x3D); // CMP EAX,imm 2896 emit_d32 (masm,0x80000000); // 0x80000000 2897 emit_opcode(masm,0x75); // JNE around_slow_call 2898 emit_d8 (masm,0x07); // Size of slow_call 2899 // Push src onto stack slow-path 2900 emit_opcode(masm,0xD9 ); // FLD ST(i) 2901 emit_d8 (masm,0xC0-1+$src$$reg ); 2902 // CALL directly to the runtime 2903 __ set_inst_mark(); 2904 emit_opcode(masm,0xE8); // Call into runtime 2905 emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2906 __ clear_inst_mark(); 2907 __ post_call_nop(); 2908 // Carry on here... 2909 %} 2910 2911 enc_class DPR2L_encoding( regDPR src ) %{ 2912 emit_opcode(masm,0xD9); // FLDCW trunc 2913 emit_opcode(masm,0x2D); 2914 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2915 // Allocate a word 2916 emit_opcode(masm,0x83); // SUB ESP,8 2917 emit_opcode(masm,0xEC); 2918 emit_d8(masm,0x08); 2919 // Encoding assumes a double has been pushed into FPR0. 2920 // Store down the double as a long, popping the FPU stack 2921 emit_opcode(masm,0xDF); // FISTP [ESP] 2922 emit_opcode(masm,0x3C); 2923 emit_d8(masm,0x24); 2924 // Restore the rounding mode; mask the exception 2925 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode 2926 emit_opcode(masm,0x2D); 2927 emit_d32( masm, Compile::current()->in_24_bit_fp_mode() 2928 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2929 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2930 2931 // Load the converted int; adjust CPU stack 2932 emit_opcode(masm,0x58); // POP EAX 2933 emit_opcode(masm,0x5A); // POP EDX 2934 emit_opcode(masm,0x81); // CMP EDX,imm 2935 emit_d8 (masm,0xFA); // rdx 2936 emit_d32 (masm,0x80000000); // 0x80000000 2937 emit_opcode(masm,0x75); // JNE around_slow_call 2938 emit_d8 (masm,0x07+4); // Size of slow_call 2939 emit_opcode(masm,0x85); // TEST EAX,EAX 2940 emit_opcode(masm,0xC0); // 2/rax,/rax, 2941 emit_opcode(masm,0x75); // JNE around_slow_call 2942 emit_d8 (masm,0x07); // Size of slow_call 2943 // Push src onto stack slow-path 2944 emit_opcode(masm,0xD9 ); // FLD ST(i) 2945 emit_d8 (masm,0xC0-1+$src$$reg ); 2946 // CALL directly to the runtime 2947 __ set_inst_mark(); 2948 emit_opcode(masm,0xE8); // Call into runtime 2949 emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2950 __ clear_inst_mark(); 2951 __ post_call_nop(); 2952 // Carry on here... 2953 %} 2954 2955 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2956 // Operand was loaded from memory into fp ST (stack top) 2957 // FMUL ST,$src /* D8 C8+i */ 2958 emit_opcode(masm, 0xD8); 2959 emit_opcode(masm, 0xC8 + $src1$$reg); 2960 %} 2961 2962 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2963 // FADDP ST,src2 /* D8 C0+i */ 2964 emit_opcode(masm, 0xD8); 2965 emit_opcode(masm, 0xC0 + $src2$$reg); 2966 //could use FADDP src2,fpST /* DE C0+i */ 2967 %} 2968 2969 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 2970 // FADDP src2,ST /* DE C0+i */ 2971 emit_opcode(masm, 0xDE); 2972 emit_opcode(masm, 0xC0 + $src2$$reg); 2973 %} 2974 2975 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 2976 // Operand has been loaded into fp ST (stack top) 2977 // FSUB ST,$src1 2978 emit_opcode(masm, 0xD8); 2979 emit_opcode(masm, 0xE0 + $src1$$reg); 2980 2981 // FDIV 2982 emit_opcode(masm, 0xD8); 2983 emit_opcode(masm, 0xF0 + $src2$$reg); 2984 %} 2985 2986 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 2987 // Operand was loaded from memory into fp ST (stack top) 2988 // FADD ST,$src /* D8 C0+i */ 2989 emit_opcode(masm, 0xD8); 2990 emit_opcode(masm, 0xC0 + $src1$$reg); 2991 2992 // FMUL ST,src2 /* D8 C*+i */ 2993 emit_opcode(masm, 0xD8); 2994 emit_opcode(masm, 0xC8 + $src2$$reg); 2995 %} 2996 2997 2998 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 2999 // Operand was loaded from memory into fp ST (stack top) 3000 // FADD ST,$src /* D8 C0+i */ 3001 emit_opcode(masm, 0xD8); 3002 emit_opcode(masm, 0xC0 + $src1$$reg); 3003 3004 // FMULP src2,ST /* DE C8+i */ 3005 emit_opcode(masm, 0xDE); 3006 emit_opcode(masm, 0xC8 + $src2$$reg); 3007 %} 3008 3009 // Atomically load the volatile long 3010 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3011 emit_opcode(masm,0xDF); 3012 int rm_byte_opcode = 0x05; 3013 int base = $mem$$base; 3014 int index = $mem$$index; 3015 int scale = $mem$$scale; 3016 int displace = $mem$$disp; 3017 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3018 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3019 store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp ); 3020 %} 3021 3022 // Volatile Store Long. Must be atomic, so move it into 3023 // the FP TOS and then do a 64-bit FIST. Has to probe the 3024 // target address before the store (for null-ptr checks) 3025 // so the memory operand is used twice in the encoding. 3026 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3027 store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp ); 3028 __ set_inst_mark(); // Mark start of FIST in case $mem has an oop 3029 emit_opcode(masm,0xDF); 3030 int rm_byte_opcode = 0x07; 3031 int base = $mem$$base; 3032 int index = $mem$$index; 3033 int scale = $mem$$scale; 3034 int displace = $mem$$disp; 3035 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3036 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3037 __ clear_inst_mark(); 3038 %} 3039 3040 %} 3041 3042 3043 //----------FRAME-------------------------------------------------------------- 3044 // Definition of frame structure and management information. 3045 // 3046 // S T A C K L A Y O U T Allocators stack-slot number 3047 // | (to get allocators register number 3048 // G Owned by | | v add OptoReg::stack0()) 3049 // r CALLER | | 3050 // o | +--------+ pad to even-align allocators stack-slot 3051 // w V | pad0 | numbers; owned by CALLER 3052 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3053 // h ^ | in | 5 3054 // | | args | 4 Holes in incoming args owned by SELF 3055 // | | | | 3 3056 // | | +--------+ 3057 // V | | old out| Empty on Intel, window on Sparc 3058 // | old |preserve| Must be even aligned. 3059 // | SP-+--------+----> Matcher::_old_SP, even aligned 3060 // | | in | 3 area for Intel ret address 3061 // Owned by |preserve| Empty on Sparc. 3062 // SELF +--------+ 3063 // | | pad2 | 2 pad to align old SP 3064 // | +--------+ 1 3065 // | | locks | 0 3066 // | +--------+----> OptoReg::stack0(), even aligned 3067 // | | pad1 | 11 pad to align new SP 3068 // | +--------+ 3069 // | | | 10 3070 // | | spills | 9 spills 3071 // V | | 8 (pad0 slot for callee) 3072 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3073 // ^ | out | 7 3074 // | | args | 6 Holes in outgoing args owned by CALLEE 3075 // Owned by +--------+ 3076 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3077 // | new |preserve| Must be even-aligned. 3078 // | SP-+--------+----> Matcher::_new_SP, even aligned 3079 // | | | 3080 // 3081 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3082 // known from SELF's arguments and the Java calling convention. 3083 // Region 6-7 is determined per call site. 3084 // Note 2: If the calling convention leaves holes in the incoming argument 3085 // area, those holes are owned by SELF. Holes in the outgoing area 3086 // are owned by the CALLEE. Holes should not be necessary in the 3087 // incoming area, as the Java calling convention is completely under 3088 // the control of the AD file. Doubles can be sorted and packed to 3089 // avoid holes. Holes in the outgoing arguments may be necessary for 3090 // varargs C calling conventions. 3091 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3092 // even aligned with pad0 as needed. 3093 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3094 // region 6-11 is even aligned; it may be padded out more so that 3095 // the region from SP to FP meets the minimum stack alignment. 3096 3097 frame %{ 3098 // These three registers define part of the calling convention 3099 // between compiled code and the interpreter. 3100 inline_cache_reg(EAX); // Inline Cache Register 3101 3102 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3103 cisc_spilling_operand_name(indOffset32); 3104 3105 // Number of stack slots consumed by locking an object 3106 sync_stack_slots(1); 3107 3108 // Compiled code's Frame Pointer 3109 frame_pointer(ESP); 3110 // Interpreter stores its frame pointer in a register which is 3111 // stored to the stack by I2CAdaptors. 3112 // I2CAdaptors convert from interpreted java to compiled java. 3113 interpreter_frame_pointer(EBP); 3114 3115 // Stack alignment requirement 3116 // Alignment size in bytes (128-bit -> 16 bytes) 3117 stack_alignment(StackAlignmentInBytes); 3118 3119 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3120 // for calls to C. Supports the var-args backing area for register parms. 3121 varargs_C_out_slots_killed(0); 3122 3123 // The after-PROLOG location of the return address. Location of 3124 // return address specifies a type (REG or STACK) and a number 3125 // representing the register number (i.e. - use a register name) or 3126 // stack slot. 3127 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3128 // Otherwise, it is above the locks and verification slot and alignment word 3129 return_addr(STACK - 1 + 3130 align_up((Compile::current()->in_preserve_stack_slots() + 3131 Compile::current()->fixed_slots()), 3132 stack_alignment_in_slots())); 3133 3134 // Location of C & interpreter return values 3135 c_return_value %{ 3136 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3137 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3138 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3139 3140 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3141 // that C functions return float and double results in XMM0. 3142 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3143 return OptoRegPair(XMM0b_num,XMM0_num); 3144 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3145 return OptoRegPair(OptoReg::Bad,XMM0_num); 3146 3147 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3148 %} 3149 3150 // Location of return values 3151 return_value %{ 3152 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3153 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3154 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3155 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3156 return OptoRegPair(XMM0b_num,XMM0_num); 3157 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3158 return OptoRegPair(OptoReg::Bad,XMM0_num); 3159 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3160 %} 3161 3162 %} 3163 3164 //----------ATTRIBUTES--------------------------------------------------------- 3165 //----------Operand Attributes------------------------------------------------- 3166 op_attrib op_cost(0); // Required cost attribute 3167 3168 //----------Instruction Attributes--------------------------------------------- 3169 ins_attrib ins_cost(100); // Required cost attribute 3170 ins_attrib ins_size(8); // Required size attribute (in bits) 3171 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3172 // non-matching short branch variant of some 3173 // long branch? 3174 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3175 // specifies the alignment that some part of the instruction (not 3176 // necessarily the start) requires. If > 1, a compute_padding() 3177 // function must be provided for the instruction 3178 3179 //----------OPERANDS----------------------------------------------------------- 3180 // Operand definitions must precede instruction definitions for correct parsing 3181 // in the ADLC because operands constitute user defined types which are used in 3182 // instruction definitions. 3183 3184 //----------Simple Operands---------------------------------------------------- 3185 // Immediate Operands 3186 // Integer Immediate 3187 operand immI() %{ 3188 match(ConI); 3189 3190 op_cost(10); 3191 format %{ %} 3192 interface(CONST_INTER); 3193 %} 3194 3195 // Constant for test vs zero 3196 operand immI_0() %{ 3197 predicate(n->get_int() == 0); 3198 match(ConI); 3199 3200 op_cost(0); 3201 format %{ %} 3202 interface(CONST_INTER); 3203 %} 3204 3205 // Constant for increment 3206 operand immI_1() %{ 3207 predicate(n->get_int() == 1); 3208 match(ConI); 3209 3210 op_cost(0); 3211 format %{ %} 3212 interface(CONST_INTER); 3213 %} 3214 3215 // Constant for decrement 3216 operand immI_M1() %{ 3217 predicate(n->get_int() == -1); 3218 match(ConI); 3219 3220 op_cost(0); 3221 format %{ %} 3222 interface(CONST_INTER); 3223 %} 3224 3225 // Valid scale values for addressing modes 3226 operand immI2() %{ 3227 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3228 match(ConI); 3229 3230 format %{ %} 3231 interface(CONST_INTER); 3232 %} 3233 3234 operand immI8() %{ 3235 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3236 match(ConI); 3237 3238 op_cost(5); 3239 format %{ %} 3240 interface(CONST_INTER); 3241 %} 3242 3243 operand immU8() %{ 3244 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3245 match(ConI); 3246 3247 op_cost(5); 3248 format %{ %} 3249 interface(CONST_INTER); 3250 %} 3251 3252 operand immI16() %{ 3253 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3254 match(ConI); 3255 3256 op_cost(10); 3257 format %{ %} 3258 interface(CONST_INTER); 3259 %} 3260 3261 // Int Immediate non-negative 3262 operand immU31() 3263 %{ 3264 predicate(n->get_int() >= 0); 3265 match(ConI); 3266 3267 op_cost(0); 3268 format %{ %} 3269 interface(CONST_INTER); 3270 %} 3271 3272 // Constant for long shifts 3273 operand immI_32() %{ 3274 predicate( n->get_int() == 32 ); 3275 match(ConI); 3276 3277 op_cost(0); 3278 format %{ %} 3279 interface(CONST_INTER); 3280 %} 3281 3282 operand immI_1_31() %{ 3283 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3284 match(ConI); 3285 3286 op_cost(0); 3287 format %{ %} 3288 interface(CONST_INTER); 3289 %} 3290 3291 operand immI_32_63() %{ 3292 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3293 match(ConI); 3294 op_cost(0); 3295 3296 format %{ %} 3297 interface(CONST_INTER); 3298 %} 3299 3300 operand immI_2() %{ 3301 predicate( n->get_int() == 2 ); 3302 match(ConI); 3303 3304 op_cost(0); 3305 format %{ %} 3306 interface(CONST_INTER); 3307 %} 3308 3309 operand immI_3() %{ 3310 predicate( n->get_int() == 3 ); 3311 match(ConI); 3312 3313 op_cost(0); 3314 format %{ %} 3315 interface(CONST_INTER); 3316 %} 3317 3318 operand immI_4() 3319 %{ 3320 predicate(n->get_int() == 4); 3321 match(ConI); 3322 3323 op_cost(0); 3324 format %{ %} 3325 interface(CONST_INTER); 3326 %} 3327 3328 operand immI_8() 3329 %{ 3330 predicate(n->get_int() == 8); 3331 match(ConI); 3332 3333 op_cost(0); 3334 format %{ %} 3335 interface(CONST_INTER); 3336 %} 3337 3338 // Pointer Immediate 3339 operand immP() %{ 3340 match(ConP); 3341 3342 op_cost(10); 3343 format %{ %} 3344 interface(CONST_INTER); 3345 %} 3346 3347 // Null Pointer Immediate 3348 operand immP0() %{ 3349 predicate( n->get_ptr() == 0 ); 3350 match(ConP); 3351 op_cost(0); 3352 3353 format %{ %} 3354 interface(CONST_INTER); 3355 %} 3356 3357 // Long Immediate 3358 operand immL() %{ 3359 match(ConL); 3360 3361 op_cost(20); 3362 format %{ %} 3363 interface(CONST_INTER); 3364 %} 3365 3366 // Long Immediate zero 3367 operand immL0() %{ 3368 predicate( n->get_long() == 0L ); 3369 match(ConL); 3370 op_cost(0); 3371 3372 format %{ %} 3373 interface(CONST_INTER); 3374 %} 3375 3376 // Long Immediate zero 3377 operand immL_M1() %{ 3378 predicate( n->get_long() == -1L ); 3379 match(ConL); 3380 op_cost(0); 3381 3382 format %{ %} 3383 interface(CONST_INTER); 3384 %} 3385 3386 // Long immediate from 0 to 127. 3387 // Used for a shorter form of long mul by 10. 3388 operand immL_127() %{ 3389 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3390 match(ConL); 3391 op_cost(0); 3392 3393 format %{ %} 3394 interface(CONST_INTER); 3395 %} 3396 3397 // Long Immediate: low 32-bit mask 3398 operand immL_32bits() %{ 3399 predicate(n->get_long() == 0xFFFFFFFFL); 3400 match(ConL); 3401 op_cost(0); 3402 3403 format %{ %} 3404 interface(CONST_INTER); 3405 %} 3406 3407 // Long Immediate: low 32-bit mask 3408 operand immL32() %{ 3409 predicate(n->get_long() == (int)(n->get_long())); 3410 match(ConL); 3411 op_cost(20); 3412 3413 format %{ %} 3414 interface(CONST_INTER); 3415 %} 3416 3417 //Double Immediate zero 3418 operand immDPR0() %{ 3419 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3420 // bug that generates code such that NaNs compare equal to 0.0 3421 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3422 match(ConD); 3423 3424 op_cost(5); 3425 format %{ %} 3426 interface(CONST_INTER); 3427 %} 3428 3429 // Double Immediate one 3430 operand immDPR1() %{ 3431 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3432 match(ConD); 3433 3434 op_cost(5); 3435 format %{ %} 3436 interface(CONST_INTER); 3437 %} 3438 3439 // Double Immediate 3440 operand immDPR() %{ 3441 predicate(UseSSE<=1); 3442 match(ConD); 3443 3444 op_cost(5); 3445 format %{ %} 3446 interface(CONST_INTER); 3447 %} 3448 3449 operand immD() %{ 3450 predicate(UseSSE>=2); 3451 match(ConD); 3452 3453 op_cost(5); 3454 format %{ %} 3455 interface(CONST_INTER); 3456 %} 3457 3458 // Double Immediate zero 3459 operand immD0() %{ 3460 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3461 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3462 // compare equal to -0.0. 3463 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3464 match(ConD); 3465 3466 format %{ %} 3467 interface(CONST_INTER); 3468 %} 3469 3470 // Float Immediate zero 3471 operand immFPR0() %{ 3472 predicate(UseSSE == 0 && n->getf() == 0.0F); 3473 match(ConF); 3474 3475 op_cost(5); 3476 format %{ %} 3477 interface(CONST_INTER); 3478 %} 3479 3480 // Float Immediate one 3481 operand immFPR1() %{ 3482 predicate(UseSSE == 0 && n->getf() == 1.0F); 3483 match(ConF); 3484 3485 op_cost(5); 3486 format %{ %} 3487 interface(CONST_INTER); 3488 %} 3489 3490 // Float Immediate 3491 operand immFPR() %{ 3492 predicate( UseSSE == 0 ); 3493 match(ConF); 3494 3495 op_cost(5); 3496 format %{ %} 3497 interface(CONST_INTER); 3498 %} 3499 3500 // Float Immediate 3501 operand immF() %{ 3502 predicate(UseSSE >= 1); 3503 match(ConF); 3504 3505 op_cost(5); 3506 format %{ %} 3507 interface(CONST_INTER); 3508 %} 3509 3510 // Float Immediate zero. Zero and not -0.0 3511 operand immF0() %{ 3512 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3513 match(ConF); 3514 3515 op_cost(5); 3516 format %{ %} 3517 interface(CONST_INTER); 3518 %} 3519 3520 // Immediates for special shifts (sign extend) 3521 3522 // Constants for increment 3523 operand immI_16() %{ 3524 predicate( n->get_int() == 16 ); 3525 match(ConI); 3526 3527 format %{ %} 3528 interface(CONST_INTER); 3529 %} 3530 3531 operand immI_24() %{ 3532 predicate( n->get_int() == 24 ); 3533 match(ConI); 3534 3535 format %{ %} 3536 interface(CONST_INTER); 3537 %} 3538 3539 // Constant for byte-wide masking 3540 operand immI_255() %{ 3541 predicate( n->get_int() == 255 ); 3542 match(ConI); 3543 3544 format %{ %} 3545 interface(CONST_INTER); 3546 %} 3547 3548 // Constant for short-wide masking 3549 operand immI_65535() %{ 3550 predicate(n->get_int() == 65535); 3551 match(ConI); 3552 3553 format %{ %} 3554 interface(CONST_INTER); 3555 %} 3556 3557 operand kReg() 3558 %{ 3559 constraint(ALLOC_IN_RC(vectmask_reg)); 3560 match(RegVectMask); 3561 format %{%} 3562 interface(REG_INTER); 3563 %} 3564 3565 // Register Operands 3566 // Integer Register 3567 operand rRegI() %{ 3568 constraint(ALLOC_IN_RC(int_reg)); 3569 match(RegI); 3570 match(xRegI); 3571 match(eAXRegI); 3572 match(eBXRegI); 3573 match(eCXRegI); 3574 match(eDXRegI); 3575 match(eDIRegI); 3576 match(eSIRegI); 3577 3578 format %{ %} 3579 interface(REG_INTER); 3580 %} 3581 3582 // Subset of Integer Register 3583 operand xRegI(rRegI reg) %{ 3584 constraint(ALLOC_IN_RC(int_x_reg)); 3585 match(reg); 3586 match(eAXRegI); 3587 match(eBXRegI); 3588 match(eCXRegI); 3589 match(eDXRegI); 3590 3591 format %{ %} 3592 interface(REG_INTER); 3593 %} 3594 3595 // Special Registers 3596 operand eAXRegI(xRegI reg) %{ 3597 constraint(ALLOC_IN_RC(eax_reg)); 3598 match(reg); 3599 match(rRegI); 3600 3601 format %{ "EAX" %} 3602 interface(REG_INTER); 3603 %} 3604 3605 // Special Registers 3606 operand eBXRegI(xRegI reg) %{ 3607 constraint(ALLOC_IN_RC(ebx_reg)); 3608 match(reg); 3609 match(rRegI); 3610 3611 format %{ "EBX" %} 3612 interface(REG_INTER); 3613 %} 3614 3615 operand eCXRegI(xRegI reg) %{ 3616 constraint(ALLOC_IN_RC(ecx_reg)); 3617 match(reg); 3618 match(rRegI); 3619 3620 format %{ "ECX" %} 3621 interface(REG_INTER); 3622 %} 3623 3624 operand eDXRegI(xRegI reg) %{ 3625 constraint(ALLOC_IN_RC(edx_reg)); 3626 match(reg); 3627 match(rRegI); 3628 3629 format %{ "EDX" %} 3630 interface(REG_INTER); 3631 %} 3632 3633 operand eDIRegI(xRegI reg) %{ 3634 constraint(ALLOC_IN_RC(edi_reg)); 3635 match(reg); 3636 match(rRegI); 3637 3638 format %{ "EDI" %} 3639 interface(REG_INTER); 3640 %} 3641 3642 operand nadxRegI() %{ 3643 constraint(ALLOC_IN_RC(nadx_reg)); 3644 match(RegI); 3645 match(eBXRegI); 3646 match(eCXRegI); 3647 match(eSIRegI); 3648 match(eDIRegI); 3649 3650 format %{ %} 3651 interface(REG_INTER); 3652 %} 3653 3654 operand ncxRegI() %{ 3655 constraint(ALLOC_IN_RC(ncx_reg)); 3656 match(RegI); 3657 match(eAXRegI); 3658 match(eDXRegI); 3659 match(eSIRegI); 3660 match(eDIRegI); 3661 3662 format %{ %} 3663 interface(REG_INTER); 3664 %} 3665 3666 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3667 // // 3668 operand eSIRegI(xRegI reg) %{ 3669 constraint(ALLOC_IN_RC(esi_reg)); 3670 match(reg); 3671 match(rRegI); 3672 3673 format %{ "ESI" %} 3674 interface(REG_INTER); 3675 %} 3676 3677 // Pointer Register 3678 operand anyRegP() %{ 3679 constraint(ALLOC_IN_RC(any_reg)); 3680 match(RegP); 3681 match(eAXRegP); 3682 match(eBXRegP); 3683 match(eCXRegP); 3684 match(eDIRegP); 3685 match(eRegP); 3686 3687 format %{ %} 3688 interface(REG_INTER); 3689 %} 3690 3691 operand eRegP() %{ 3692 constraint(ALLOC_IN_RC(int_reg)); 3693 match(RegP); 3694 match(eAXRegP); 3695 match(eBXRegP); 3696 match(eCXRegP); 3697 match(eDIRegP); 3698 3699 format %{ %} 3700 interface(REG_INTER); 3701 %} 3702 3703 operand rRegP() %{ 3704 constraint(ALLOC_IN_RC(int_reg)); 3705 match(RegP); 3706 match(eAXRegP); 3707 match(eBXRegP); 3708 match(eCXRegP); 3709 match(eDIRegP); 3710 3711 format %{ %} 3712 interface(REG_INTER); 3713 %} 3714 3715 // On windows95, EBP is not safe to use for implicit null tests. 3716 operand eRegP_no_EBP() %{ 3717 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3718 match(RegP); 3719 match(eAXRegP); 3720 match(eBXRegP); 3721 match(eCXRegP); 3722 match(eDIRegP); 3723 3724 op_cost(100); 3725 format %{ %} 3726 interface(REG_INTER); 3727 %} 3728 3729 operand pRegP() %{ 3730 constraint(ALLOC_IN_RC(p_reg)); 3731 match(RegP); 3732 match(eBXRegP); 3733 match(eDXRegP); 3734 match(eSIRegP); 3735 match(eDIRegP); 3736 3737 format %{ %} 3738 interface(REG_INTER); 3739 %} 3740 3741 // Special Registers 3742 // Return a pointer value 3743 operand eAXRegP(eRegP reg) %{ 3744 constraint(ALLOC_IN_RC(eax_reg)); 3745 match(reg); 3746 format %{ "EAX" %} 3747 interface(REG_INTER); 3748 %} 3749 3750 // Used in AtomicAdd 3751 operand eBXRegP(eRegP reg) %{ 3752 constraint(ALLOC_IN_RC(ebx_reg)); 3753 match(reg); 3754 format %{ "EBX" %} 3755 interface(REG_INTER); 3756 %} 3757 3758 // Tail-call (interprocedural jump) to interpreter 3759 operand eCXRegP(eRegP reg) %{ 3760 constraint(ALLOC_IN_RC(ecx_reg)); 3761 match(reg); 3762 format %{ "ECX" %} 3763 interface(REG_INTER); 3764 %} 3765 3766 operand eDXRegP(eRegP reg) %{ 3767 constraint(ALLOC_IN_RC(edx_reg)); 3768 match(reg); 3769 format %{ "EDX" %} 3770 interface(REG_INTER); 3771 %} 3772 3773 operand eSIRegP(eRegP reg) %{ 3774 constraint(ALLOC_IN_RC(esi_reg)); 3775 match(reg); 3776 format %{ "ESI" %} 3777 interface(REG_INTER); 3778 %} 3779 3780 // Used in rep stosw 3781 operand eDIRegP(eRegP reg) %{ 3782 constraint(ALLOC_IN_RC(edi_reg)); 3783 match(reg); 3784 format %{ "EDI" %} 3785 interface(REG_INTER); 3786 %} 3787 3788 operand eRegL() %{ 3789 constraint(ALLOC_IN_RC(long_reg)); 3790 match(RegL); 3791 match(eADXRegL); 3792 3793 format %{ %} 3794 interface(REG_INTER); 3795 %} 3796 3797 operand eADXRegL( eRegL reg ) %{ 3798 constraint(ALLOC_IN_RC(eadx_reg)); 3799 match(reg); 3800 3801 format %{ "EDX:EAX" %} 3802 interface(REG_INTER); 3803 %} 3804 3805 operand eBCXRegL( eRegL reg ) %{ 3806 constraint(ALLOC_IN_RC(ebcx_reg)); 3807 match(reg); 3808 3809 format %{ "EBX:ECX" %} 3810 interface(REG_INTER); 3811 %} 3812 3813 operand eBDPRegL( eRegL reg ) %{ 3814 constraint(ALLOC_IN_RC(ebpd_reg)); 3815 match(reg); 3816 3817 format %{ "EBP:EDI" %} 3818 interface(REG_INTER); 3819 %} 3820 // Special case for integer high multiply 3821 operand eADXRegL_low_only() %{ 3822 constraint(ALLOC_IN_RC(eadx_reg)); 3823 match(RegL); 3824 3825 format %{ "EAX" %} 3826 interface(REG_INTER); 3827 %} 3828 3829 // Flags register, used as output of compare instructions 3830 operand rFlagsReg() %{ 3831 constraint(ALLOC_IN_RC(int_flags)); 3832 match(RegFlags); 3833 3834 format %{ "EFLAGS" %} 3835 interface(REG_INTER); 3836 %} 3837 3838 // Flags register, used as output of compare instructions 3839 operand eFlagsReg() %{ 3840 constraint(ALLOC_IN_RC(int_flags)); 3841 match(RegFlags); 3842 3843 format %{ "EFLAGS" %} 3844 interface(REG_INTER); 3845 %} 3846 3847 // Flags register, used as output of FLOATING POINT compare instructions 3848 operand eFlagsRegU() %{ 3849 constraint(ALLOC_IN_RC(int_flags)); 3850 match(RegFlags); 3851 3852 format %{ "EFLAGS_U" %} 3853 interface(REG_INTER); 3854 %} 3855 3856 operand eFlagsRegUCF() %{ 3857 constraint(ALLOC_IN_RC(int_flags)); 3858 match(RegFlags); 3859 predicate(false); 3860 3861 format %{ "EFLAGS_U_CF" %} 3862 interface(REG_INTER); 3863 %} 3864 3865 // Condition Code Register used by long compare 3866 operand flagsReg_long_LTGE() %{ 3867 constraint(ALLOC_IN_RC(int_flags)); 3868 match(RegFlags); 3869 format %{ "FLAGS_LTGE" %} 3870 interface(REG_INTER); 3871 %} 3872 operand flagsReg_long_EQNE() %{ 3873 constraint(ALLOC_IN_RC(int_flags)); 3874 match(RegFlags); 3875 format %{ "FLAGS_EQNE" %} 3876 interface(REG_INTER); 3877 %} 3878 operand flagsReg_long_LEGT() %{ 3879 constraint(ALLOC_IN_RC(int_flags)); 3880 match(RegFlags); 3881 format %{ "FLAGS_LEGT" %} 3882 interface(REG_INTER); 3883 %} 3884 3885 // Condition Code Register used by unsigned long compare 3886 operand flagsReg_ulong_LTGE() %{ 3887 constraint(ALLOC_IN_RC(int_flags)); 3888 match(RegFlags); 3889 format %{ "FLAGS_U_LTGE" %} 3890 interface(REG_INTER); 3891 %} 3892 operand flagsReg_ulong_EQNE() %{ 3893 constraint(ALLOC_IN_RC(int_flags)); 3894 match(RegFlags); 3895 format %{ "FLAGS_U_EQNE" %} 3896 interface(REG_INTER); 3897 %} 3898 operand flagsReg_ulong_LEGT() %{ 3899 constraint(ALLOC_IN_RC(int_flags)); 3900 match(RegFlags); 3901 format %{ "FLAGS_U_LEGT" %} 3902 interface(REG_INTER); 3903 %} 3904 3905 // Float register operands 3906 operand regDPR() %{ 3907 predicate( UseSSE < 2 ); 3908 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3909 match(RegD); 3910 match(regDPR1); 3911 match(regDPR2); 3912 format %{ %} 3913 interface(REG_INTER); 3914 %} 3915 3916 operand regDPR1(regDPR reg) %{ 3917 predicate( UseSSE < 2 ); 3918 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 3919 match(reg); 3920 format %{ "FPR1" %} 3921 interface(REG_INTER); 3922 %} 3923 3924 operand regDPR2(regDPR reg) %{ 3925 predicate( UseSSE < 2 ); 3926 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 3927 match(reg); 3928 format %{ "FPR2" %} 3929 interface(REG_INTER); 3930 %} 3931 3932 operand regnotDPR1(regDPR reg) %{ 3933 predicate( UseSSE < 2 ); 3934 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 3935 match(reg); 3936 format %{ %} 3937 interface(REG_INTER); 3938 %} 3939 3940 // Float register operands 3941 operand regFPR() %{ 3942 predicate( UseSSE < 2 ); 3943 constraint(ALLOC_IN_RC(fp_flt_reg)); 3944 match(RegF); 3945 match(regFPR1); 3946 format %{ %} 3947 interface(REG_INTER); 3948 %} 3949 3950 // Float register operands 3951 operand regFPR1(regFPR reg) %{ 3952 predicate( UseSSE < 2 ); 3953 constraint(ALLOC_IN_RC(fp_flt_reg0)); 3954 match(reg); 3955 format %{ "FPR1" %} 3956 interface(REG_INTER); 3957 %} 3958 3959 // XMM Float register operands 3960 operand regF() %{ 3961 predicate( UseSSE>=1 ); 3962 constraint(ALLOC_IN_RC(float_reg_legacy)); 3963 match(RegF); 3964 format %{ %} 3965 interface(REG_INTER); 3966 %} 3967 3968 operand legRegF() %{ 3969 predicate( UseSSE>=1 ); 3970 constraint(ALLOC_IN_RC(float_reg_legacy)); 3971 match(RegF); 3972 format %{ %} 3973 interface(REG_INTER); 3974 %} 3975 3976 // Float register operands 3977 operand vlRegF() %{ 3978 constraint(ALLOC_IN_RC(float_reg_vl)); 3979 match(RegF); 3980 3981 format %{ %} 3982 interface(REG_INTER); 3983 %} 3984 3985 // XMM Double register operands 3986 operand regD() %{ 3987 predicate( UseSSE>=2 ); 3988 constraint(ALLOC_IN_RC(double_reg_legacy)); 3989 match(RegD); 3990 format %{ %} 3991 interface(REG_INTER); 3992 %} 3993 3994 // Double register operands 3995 operand legRegD() %{ 3996 predicate( UseSSE>=2 ); 3997 constraint(ALLOC_IN_RC(double_reg_legacy)); 3998 match(RegD); 3999 format %{ %} 4000 interface(REG_INTER); 4001 %} 4002 4003 operand vlRegD() %{ 4004 constraint(ALLOC_IN_RC(double_reg_vl)); 4005 match(RegD); 4006 4007 format %{ %} 4008 interface(REG_INTER); 4009 %} 4010 4011 //----------Memory Operands---------------------------------------------------- 4012 // Direct Memory Operand 4013 operand direct(immP addr) %{ 4014 match(addr); 4015 4016 format %{ "[$addr]" %} 4017 interface(MEMORY_INTER) %{ 4018 base(0xFFFFFFFF); 4019 index(0x4); 4020 scale(0x0); 4021 disp($addr); 4022 %} 4023 %} 4024 4025 // Indirect Memory Operand 4026 operand indirect(eRegP reg) %{ 4027 constraint(ALLOC_IN_RC(int_reg)); 4028 match(reg); 4029 4030 format %{ "[$reg]" %} 4031 interface(MEMORY_INTER) %{ 4032 base($reg); 4033 index(0x4); 4034 scale(0x0); 4035 disp(0x0); 4036 %} 4037 %} 4038 4039 // Indirect Memory Plus Short Offset Operand 4040 operand indOffset8(eRegP reg, immI8 off) %{ 4041 match(AddP reg off); 4042 4043 format %{ "[$reg + $off]" %} 4044 interface(MEMORY_INTER) %{ 4045 base($reg); 4046 index(0x4); 4047 scale(0x0); 4048 disp($off); 4049 %} 4050 %} 4051 4052 // Indirect Memory Plus Long Offset Operand 4053 operand indOffset32(eRegP reg, immI off) %{ 4054 match(AddP reg off); 4055 4056 format %{ "[$reg + $off]" %} 4057 interface(MEMORY_INTER) %{ 4058 base($reg); 4059 index(0x4); 4060 scale(0x0); 4061 disp($off); 4062 %} 4063 %} 4064 4065 // Indirect Memory Plus Long Offset Operand 4066 operand indOffset32X(rRegI reg, immP off) %{ 4067 match(AddP off reg); 4068 4069 format %{ "[$reg + $off]" %} 4070 interface(MEMORY_INTER) %{ 4071 base($reg); 4072 index(0x4); 4073 scale(0x0); 4074 disp($off); 4075 %} 4076 %} 4077 4078 // Indirect Memory Plus Index Register Plus Offset Operand 4079 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4080 match(AddP (AddP reg ireg) off); 4081 4082 op_cost(10); 4083 format %{"[$reg + $off + $ireg]" %} 4084 interface(MEMORY_INTER) %{ 4085 base($reg); 4086 index($ireg); 4087 scale(0x0); 4088 disp($off); 4089 %} 4090 %} 4091 4092 // Indirect Memory Plus Index Register Plus Offset Operand 4093 operand indIndex(eRegP reg, rRegI ireg) %{ 4094 match(AddP reg ireg); 4095 4096 op_cost(10); 4097 format %{"[$reg + $ireg]" %} 4098 interface(MEMORY_INTER) %{ 4099 base($reg); 4100 index($ireg); 4101 scale(0x0); 4102 disp(0x0); 4103 %} 4104 %} 4105 4106 // // ------------------------------------------------------------------------- 4107 // // 486 architecture doesn't support "scale * index + offset" with out a base 4108 // // ------------------------------------------------------------------------- 4109 // // Scaled Memory Operands 4110 // // Indirect Memory Times Scale Plus Offset Operand 4111 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4112 // match(AddP off (LShiftI ireg scale)); 4113 // 4114 // op_cost(10); 4115 // format %{"[$off + $ireg << $scale]" %} 4116 // interface(MEMORY_INTER) %{ 4117 // base(0x4); 4118 // index($ireg); 4119 // scale($scale); 4120 // disp($off); 4121 // %} 4122 // %} 4123 4124 // Indirect Memory Times Scale Plus Index Register 4125 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4126 match(AddP reg (LShiftI ireg scale)); 4127 4128 op_cost(10); 4129 format %{"[$reg + $ireg << $scale]" %} 4130 interface(MEMORY_INTER) %{ 4131 base($reg); 4132 index($ireg); 4133 scale($scale); 4134 disp(0x0); 4135 %} 4136 %} 4137 4138 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4139 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4140 match(AddP (AddP reg (LShiftI ireg scale)) off); 4141 4142 op_cost(10); 4143 format %{"[$reg + $off + $ireg << $scale]" %} 4144 interface(MEMORY_INTER) %{ 4145 base($reg); 4146 index($ireg); 4147 scale($scale); 4148 disp($off); 4149 %} 4150 %} 4151 4152 //----------Load Long Memory Operands------------------------------------------ 4153 // The load-long idiom will use it's address expression again after loading 4154 // the first word of the long. If the load-long destination overlaps with 4155 // registers used in the addressing expression, the 2nd half will be loaded 4156 // from a clobbered address. Fix this by requiring that load-long use 4157 // address registers that do not overlap with the load-long target. 4158 4159 // load-long support 4160 operand load_long_RegP() %{ 4161 constraint(ALLOC_IN_RC(esi_reg)); 4162 match(RegP); 4163 match(eSIRegP); 4164 op_cost(100); 4165 format %{ %} 4166 interface(REG_INTER); 4167 %} 4168 4169 // Indirect Memory Operand Long 4170 operand load_long_indirect(load_long_RegP reg) %{ 4171 constraint(ALLOC_IN_RC(esi_reg)); 4172 match(reg); 4173 4174 format %{ "[$reg]" %} 4175 interface(MEMORY_INTER) %{ 4176 base($reg); 4177 index(0x4); 4178 scale(0x0); 4179 disp(0x0); 4180 %} 4181 %} 4182 4183 // Indirect Memory Plus Long Offset Operand 4184 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4185 match(AddP reg off); 4186 4187 format %{ "[$reg + $off]" %} 4188 interface(MEMORY_INTER) %{ 4189 base($reg); 4190 index(0x4); 4191 scale(0x0); 4192 disp($off); 4193 %} 4194 %} 4195 4196 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4197 4198 4199 //----------Special Memory Operands-------------------------------------------- 4200 // Stack Slot Operand - This operand is used for loading and storing temporary 4201 // values on the stack where a match requires a value to 4202 // flow through memory. 4203 operand stackSlotP(sRegP reg) %{ 4204 constraint(ALLOC_IN_RC(stack_slots)); 4205 // No match rule because this operand is only generated in matching 4206 format %{ "[$reg]" %} 4207 interface(MEMORY_INTER) %{ 4208 base(0x4); // ESP 4209 index(0x4); // No Index 4210 scale(0x0); // No Scale 4211 disp($reg); // Stack Offset 4212 %} 4213 %} 4214 4215 operand stackSlotI(sRegI reg) %{ 4216 constraint(ALLOC_IN_RC(stack_slots)); 4217 // No match rule because this operand is only generated in matching 4218 format %{ "[$reg]" %} 4219 interface(MEMORY_INTER) %{ 4220 base(0x4); // ESP 4221 index(0x4); // No Index 4222 scale(0x0); // No Scale 4223 disp($reg); // Stack Offset 4224 %} 4225 %} 4226 4227 operand stackSlotF(sRegF reg) %{ 4228 constraint(ALLOC_IN_RC(stack_slots)); 4229 // No match rule because this operand is only generated in matching 4230 format %{ "[$reg]" %} 4231 interface(MEMORY_INTER) %{ 4232 base(0x4); // ESP 4233 index(0x4); // No Index 4234 scale(0x0); // No Scale 4235 disp($reg); // Stack Offset 4236 %} 4237 %} 4238 4239 operand stackSlotD(sRegD reg) %{ 4240 constraint(ALLOC_IN_RC(stack_slots)); 4241 // No match rule because this operand is only generated in matching 4242 format %{ "[$reg]" %} 4243 interface(MEMORY_INTER) %{ 4244 base(0x4); // ESP 4245 index(0x4); // No Index 4246 scale(0x0); // No Scale 4247 disp($reg); // Stack Offset 4248 %} 4249 %} 4250 4251 operand stackSlotL(sRegL reg) %{ 4252 constraint(ALLOC_IN_RC(stack_slots)); 4253 // No match rule because this operand is only generated in matching 4254 format %{ "[$reg]" %} 4255 interface(MEMORY_INTER) %{ 4256 base(0x4); // ESP 4257 index(0x4); // No Index 4258 scale(0x0); // No Scale 4259 disp($reg); // Stack Offset 4260 %} 4261 %} 4262 4263 //----------Conditional Branch Operands---------------------------------------- 4264 // Comparison Op - This is the operation of the comparison, and is limited to 4265 // the following set of codes: 4266 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4267 // 4268 // Other attributes of the comparison, such as unsignedness, are specified 4269 // by the comparison instruction that sets a condition code flags register. 4270 // That result is represented by a flags operand whose subtype is appropriate 4271 // to the unsignedness (etc.) of the comparison. 4272 // 4273 // Later, the instruction which matches both the Comparison Op (a Bool) and 4274 // the flags (produced by the Cmp) specifies the coding of the comparison op 4275 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4276 4277 // Comparison Code 4278 operand cmpOp() %{ 4279 match(Bool); 4280 4281 format %{ "" %} 4282 interface(COND_INTER) %{ 4283 equal(0x4, "e"); 4284 not_equal(0x5, "ne"); 4285 less(0xC, "l"); 4286 greater_equal(0xD, "ge"); 4287 less_equal(0xE, "le"); 4288 greater(0xF, "g"); 4289 overflow(0x0, "o"); 4290 no_overflow(0x1, "no"); 4291 %} 4292 %} 4293 4294 // Comparison Code, unsigned compare. Used by FP also, with 4295 // C2 (unordered) turned into GT or LT already. The other bits 4296 // C0 and C3 are turned into Carry & Zero flags. 4297 operand cmpOpU() %{ 4298 match(Bool); 4299 4300 format %{ "" %} 4301 interface(COND_INTER) %{ 4302 equal(0x4, "e"); 4303 not_equal(0x5, "ne"); 4304 less(0x2, "b"); 4305 greater_equal(0x3, "nb"); 4306 less_equal(0x6, "be"); 4307 greater(0x7, "nbe"); 4308 overflow(0x0, "o"); 4309 no_overflow(0x1, "no"); 4310 %} 4311 %} 4312 4313 // Floating comparisons that don't require any fixup for the unordered case 4314 operand cmpOpUCF() %{ 4315 match(Bool); 4316 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4317 n->as_Bool()->_test._test == BoolTest::ge || 4318 n->as_Bool()->_test._test == BoolTest::le || 4319 n->as_Bool()->_test._test == BoolTest::gt); 4320 format %{ "" %} 4321 interface(COND_INTER) %{ 4322 equal(0x4, "e"); 4323 not_equal(0x5, "ne"); 4324 less(0x2, "b"); 4325 greater_equal(0x3, "nb"); 4326 less_equal(0x6, "be"); 4327 greater(0x7, "nbe"); 4328 overflow(0x0, "o"); 4329 no_overflow(0x1, "no"); 4330 %} 4331 %} 4332 4333 4334 // Floating comparisons that can be fixed up with extra conditional jumps 4335 operand cmpOpUCF2() %{ 4336 match(Bool); 4337 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4338 n->as_Bool()->_test._test == BoolTest::eq); 4339 format %{ "" %} 4340 interface(COND_INTER) %{ 4341 equal(0x4, "e"); 4342 not_equal(0x5, "ne"); 4343 less(0x2, "b"); 4344 greater_equal(0x3, "nb"); 4345 less_equal(0x6, "be"); 4346 greater(0x7, "nbe"); 4347 overflow(0x0, "o"); 4348 no_overflow(0x1, "no"); 4349 %} 4350 %} 4351 4352 // Comparison Code for FP conditional move 4353 operand cmpOp_fcmov() %{ 4354 match(Bool); 4355 4356 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4357 n->as_Bool()->_test._test != BoolTest::no_overflow); 4358 format %{ "" %} 4359 interface(COND_INTER) %{ 4360 equal (0x0C8); 4361 not_equal (0x1C8); 4362 less (0x0C0); 4363 greater_equal(0x1C0); 4364 less_equal (0x0D0); 4365 greater (0x1D0); 4366 overflow(0x0, "o"); // not really supported by the instruction 4367 no_overflow(0x1, "no"); // not really supported by the instruction 4368 %} 4369 %} 4370 4371 // Comparison Code used in long compares 4372 operand cmpOp_commute() %{ 4373 match(Bool); 4374 4375 format %{ "" %} 4376 interface(COND_INTER) %{ 4377 equal(0x4, "e"); 4378 not_equal(0x5, "ne"); 4379 less(0xF, "g"); 4380 greater_equal(0xE, "le"); 4381 less_equal(0xD, "ge"); 4382 greater(0xC, "l"); 4383 overflow(0x0, "o"); 4384 no_overflow(0x1, "no"); 4385 %} 4386 %} 4387 4388 // Comparison Code used in unsigned long compares 4389 operand cmpOpU_commute() %{ 4390 match(Bool); 4391 4392 format %{ "" %} 4393 interface(COND_INTER) %{ 4394 equal(0x4, "e"); 4395 not_equal(0x5, "ne"); 4396 less(0x7, "nbe"); 4397 greater_equal(0x6, "be"); 4398 less_equal(0x3, "nb"); 4399 greater(0x2, "b"); 4400 overflow(0x0, "o"); 4401 no_overflow(0x1, "no"); 4402 %} 4403 %} 4404 4405 //----------OPERAND CLASSES---------------------------------------------------- 4406 // Operand Classes are groups of operands that are used as to simplify 4407 // instruction definitions by not requiring the AD writer to specify separate 4408 // instructions for every form of operand when the instruction accepts 4409 // multiple operand types with the same basic encoding and format. The classic 4410 // case of this is memory operands. 4411 4412 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4413 indIndex, indIndexScale, indIndexScaleOffset); 4414 4415 // Long memory operations are encoded in 2 instructions and a +4 offset. 4416 // This means some kind of offset is always required and you cannot use 4417 // an oop as the offset (done when working on static globals). 4418 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4419 indIndex, indIndexScale, indIndexScaleOffset); 4420 4421 4422 //----------PIPELINE----------------------------------------------------------- 4423 // Rules which define the behavior of the target architectures pipeline. 4424 pipeline %{ 4425 4426 //----------ATTRIBUTES--------------------------------------------------------- 4427 attributes %{ 4428 variable_size_instructions; // Fixed size instructions 4429 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4430 instruction_unit_size = 1; // An instruction is 1 bytes long 4431 instruction_fetch_unit_size = 16; // The processor fetches one line 4432 instruction_fetch_units = 1; // of 16 bytes 4433 4434 // List of nop instructions 4435 nops( MachNop ); 4436 %} 4437 4438 //----------RESOURCES---------------------------------------------------------- 4439 // Resources are the functional units available to the machine 4440 4441 // Generic P2/P3 pipeline 4442 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4443 // 3 instructions decoded per cycle. 4444 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4445 // 2 ALU op, only ALU0 handles mul/div instructions. 4446 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4447 MS0, MS1, MEM = MS0 | MS1, 4448 BR, FPU, 4449 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4450 4451 //----------PIPELINE DESCRIPTION----------------------------------------------- 4452 // Pipeline Description specifies the stages in the machine's pipeline 4453 4454 // Generic P2/P3 pipeline 4455 pipe_desc(S0, S1, S2, S3, S4, S5); 4456 4457 //----------PIPELINE CLASSES--------------------------------------------------- 4458 // Pipeline Classes describe the stages in which input and output are 4459 // referenced by the hardware pipeline. 4460 4461 // Naming convention: ialu or fpu 4462 // Then: _reg 4463 // Then: _reg if there is a 2nd register 4464 // Then: _long if it's a pair of instructions implementing a long 4465 // Then: _fat if it requires the big decoder 4466 // Or: _mem if it requires the big decoder and a memory unit. 4467 4468 // Integer ALU reg operation 4469 pipe_class ialu_reg(rRegI dst) %{ 4470 single_instruction; 4471 dst : S4(write); 4472 dst : S3(read); 4473 DECODE : S0; // any decoder 4474 ALU : S3; // any alu 4475 %} 4476 4477 // Long ALU reg operation 4478 pipe_class ialu_reg_long(eRegL dst) %{ 4479 instruction_count(2); 4480 dst : S4(write); 4481 dst : S3(read); 4482 DECODE : S0(2); // any 2 decoders 4483 ALU : S3(2); // both alus 4484 %} 4485 4486 // Integer ALU reg operation using big decoder 4487 pipe_class ialu_reg_fat(rRegI dst) %{ 4488 single_instruction; 4489 dst : S4(write); 4490 dst : S3(read); 4491 D0 : S0; // big decoder only 4492 ALU : S3; // any alu 4493 %} 4494 4495 // Long ALU reg operation using big decoder 4496 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4497 instruction_count(2); 4498 dst : S4(write); 4499 dst : S3(read); 4500 D0 : S0(2); // big decoder only; twice 4501 ALU : S3(2); // any 2 alus 4502 %} 4503 4504 // Integer ALU reg-reg operation 4505 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4506 single_instruction; 4507 dst : S4(write); 4508 src : S3(read); 4509 DECODE : S0; // any decoder 4510 ALU : S3; // any alu 4511 %} 4512 4513 // Long ALU reg-reg operation 4514 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4515 instruction_count(2); 4516 dst : S4(write); 4517 src : S3(read); 4518 DECODE : S0(2); // any 2 decoders 4519 ALU : S3(2); // both alus 4520 %} 4521 4522 // Integer ALU reg-reg operation 4523 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4524 single_instruction; 4525 dst : S4(write); 4526 src : S3(read); 4527 D0 : S0; // big decoder only 4528 ALU : S3; // any alu 4529 %} 4530 4531 // Long ALU reg-reg operation 4532 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4533 instruction_count(2); 4534 dst : S4(write); 4535 src : S3(read); 4536 D0 : S0(2); // big decoder only; twice 4537 ALU : S3(2); // both alus 4538 %} 4539 4540 // Integer ALU reg-mem operation 4541 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4542 single_instruction; 4543 dst : S5(write); 4544 mem : S3(read); 4545 D0 : S0; // big decoder only 4546 ALU : S4; // any alu 4547 MEM : S3; // any mem 4548 %} 4549 4550 // Long ALU reg-mem operation 4551 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4552 instruction_count(2); 4553 dst : S5(write); 4554 mem : S3(read); 4555 D0 : S0(2); // big decoder only; twice 4556 ALU : S4(2); // any 2 alus 4557 MEM : S3(2); // both mems 4558 %} 4559 4560 // Integer mem operation (prefetch) 4561 pipe_class ialu_mem(memory mem) 4562 %{ 4563 single_instruction; 4564 mem : S3(read); 4565 D0 : S0; // big decoder only 4566 MEM : S3; // any mem 4567 %} 4568 4569 // Integer Store to Memory 4570 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4571 single_instruction; 4572 mem : S3(read); 4573 src : S5(read); 4574 D0 : S0; // big decoder only 4575 ALU : S4; // any alu 4576 MEM : S3; 4577 %} 4578 4579 // Long Store to Memory 4580 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4581 instruction_count(2); 4582 mem : S3(read); 4583 src : S5(read); 4584 D0 : S0(2); // big decoder only; twice 4585 ALU : S4(2); // any 2 alus 4586 MEM : S3(2); // Both mems 4587 %} 4588 4589 // Integer Store to Memory 4590 pipe_class ialu_mem_imm(memory mem) %{ 4591 single_instruction; 4592 mem : S3(read); 4593 D0 : S0; // big decoder only 4594 ALU : S4; // any alu 4595 MEM : S3; 4596 %} 4597 4598 // Integer ALU0 reg-reg operation 4599 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4600 single_instruction; 4601 dst : S4(write); 4602 src : S3(read); 4603 D0 : S0; // Big decoder only 4604 ALU0 : S3; // only alu0 4605 %} 4606 4607 // Integer ALU0 reg-mem operation 4608 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4609 single_instruction; 4610 dst : S5(write); 4611 mem : S3(read); 4612 D0 : S0; // big decoder only 4613 ALU0 : S4; // ALU0 only 4614 MEM : S3; // any mem 4615 %} 4616 4617 // Integer ALU reg-reg operation 4618 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4619 single_instruction; 4620 cr : S4(write); 4621 src1 : S3(read); 4622 src2 : S3(read); 4623 DECODE : S0; // any decoder 4624 ALU : S3; // any alu 4625 %} 4626 4627 // Integer ALU reg-imm operation 4628 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4629 single_instruction; 4630 cr : S4(write); 4631 src1 : S3(read); 4632 DECODE : S0; // any decoder 4633 ALU : S3; // any alu 4634 %} 4635 4636 // Integer ALU reg-mem operation 4637 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4638 single_instruction; 4639 cr : S4(write); 4640 src1 : S3(read); 4641 src2 : S3(read); 4642 D0 : S0; // big decoder only 4643 ALU : S4; // any alu 4644 MEM : S3; 4645 %} 4646 4647 // Conditional move reg-reg 4648 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4649 instruction_count(4); 4650 y : S4(read); 4651 q : S3(read); 4652 p : S3(read); 4653 DECODE : S0(4); // any decoder 4654 %} 4655 4656 // Conditional move reg-reg 4657 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4658 single_instruction; 4659 dst : S4(write); 4660 src : S3(read); 4661 cr : S3(read); 4662 DECODE : S0; // any decoder 4663 %} 4664 4665 // Conditional move reg-mem 4666 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4667 single_instruction; 4668 dst : S4(write); 4669 src : S3(read); 4670 cr : S3(read); 4671 DECODE : S0; // any decoder 4672 MEM : S3; 4673 %} 4674 4675 // Conditional move reg-reg long 4676 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4677 single_instruction; 4678 dst : S4(write); 4679 src : S3(read); 4680 cr : S3(read); 4681 DECODE : S0(2); // any 2 decoders 4682 %} 4683 4684 // Conditional move double reg-reg 4685 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4686 single_instruction; 4687 dst : S4(write); 4688 src : S3(read); 4689 cr : S3(read); 4690 DECODE : S0; // any decoder 4691 %} 4692 4693 // Float reg-reg operation 4694 pipe_class fpu_reg(regDPR dst) %{ 4695 instruction_count(2); 4696 dst : S3(read); 4697 DECODE : S0(2); // any 2 decoders 4698 FPU : S3; 4699 %} 4700 4701 // Float reg-reg operation 4702 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4703 instruction_count(2); 4704 dst : S4(write); 4705 src : S3(read); 4706 DECODE : S0(2); // any 2 decoders 4707 FPU : S3; 4708 %} 4709 4710 // Float reg-reg operation 4711 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4712 instruction_count(3); 4713 dst : S4(write); 4714 src1 : S3(read); 4715 src2 : S3(read); 4716 DECODE : S0(3); // any 3 decoders 4717 FPU : S3(2); 4718 %} 4719 4720 // Float reg-reg operation 4721 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4722 instruction_count(4); 4723 dst : S4(write); 4724 src1 : S3(read); 4725 src2 : S3(read); 4726 src3 : S3(read); 4727 DECODE : S0(4); // any 3 decoders 4728 FPU : S3(2); 4729 %} 4730 4731 // Float reg-reg operation 4732 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4733 instruction_count(4); 4734 dst : S4(write); 4735 src1 : S3(read); 4736 src2 : S3(read); 4737 src3 : S3(read); 4738 DECODE : S1(3); // any 3 decoders 4739 D0 : S0; // Big decoder only 4740 FPU : S3(2); 4741 MEM : S3; 4742 %} 4743 4744 // Float reg-mem operation 4745 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4746 instruction_count(2); 4747 dst : S5(write); 4748 mem : S3(read); 4749 D0 : S0; // big decoder only 4750 DECODE : S1; // any decoder for FPU POP 4751 FPU : S4; 4752 MEM : S3; // any mem 4753 %} 4754 4755 // Float reg-mem operation 4756 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4757 instruction_count(3); 4758 dst : S5(write); 4759 src1 : S3(read); 4760 mem : S3(read); 4761 D0 : S0; // big decoder only 4762 DECODE : S1(2); // any decoder for FPU POP 4763 FPU : S4; 4764 MEM : S3; // any mem 4765 %} 4766 4767 // Float mem-reg operation 4768 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4769 instruction_count(2); 4770 src : S5(read); 4771 mem : S3(read); 4772 DECODE : S0; // any decoder for FPU PUSH 4773 D0 : S1; // big decoder only 4774 FPU : S4; 4775 MEM : S3; // any mem 4776 %} 4777 4778 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4779 instruction_count(3); 4780 src1 : S3(read); 4781 src2 : S3(read); 4782 mem : S3(read); 4783 DECODE : S0(2); // any decoder for FPU PUSH 4784 D0 : S1; // big decoder only 4785 FPU : S4; 4786 MEM : S3; // any mem 4787 %} 4788 4789 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4790 instruction_count(3); 4791 src1 : S3(read); 4792 src2 : S3(read); 4793 mem : S4(read); 4794 DECODE : S0; // any decoder for FPU PUSH 4795 D0 : S0(2); // big decoder only 4796 FPU : S4; 4797 MEM : S3(2); // any mem 4798 %} 4799 4800 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4801 instruction_count(2); 4802 src1 : S3(read); 4803 dst : S4(read); 4804 D0 : S0(2); // big decoder only 4805 MEM : S3(2); // any mem 4806 %} 4807 4808 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4809 instruction_count(3); 4810 src1 : S3(read); 4811 src2 : S3(read); 4812 dst : S4(read); 4813 D0 : S0(3); // big decoder only 4814 FPU : S4; 4815 MEM : S3(3); // any mem 4816 %} 4817 4818 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4819 instruction_count(3); 4820 src1 : S4(read); 4821 mem : S4(read); 4822 DECODE : S0; // any decoder for FPU PUSH 4823 D0 : S0(2); // big decoder only 4824 FPU : S4; 4825 MEM : S3(2); // any mem 4826 %} 4827 4828 // Float load constant 4829 pipe_class fpu_reg_con(regDPR dst) %{ 4830 instruction_count(2); 4831 dst : S5(write); 4832 D0 : S0; // big decoder only for the load 4833 DECODE : S1; // any decoder for FPU POP 4834 FPU : S4; 4835 MEM : S3; // any mem 4836 %} 4837 4838 // Float load constant 4839 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4840 instruction_count(3); 4841 dst : S5(write); 4842 src : S3(read); 4843 D0 : S0; // big decoder only for the load 4844 DECODE : S1(2); // any decoder for FPU POP 4845 FPU : S4; 4846 MEM : S3; // any mem 4847 %} 4848 4849 // UnConditional branch 4850 pipe_class pipe_jmp( label labl ) %{ 4851 single_instruction; 4852 BR : S3; 4853 %} 4854 4855 // Conditional branch 4856 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4857 single_instruction; 4858 cr : S1(read); 4859 BR : S3; 4860 %} 4861 4862 // Allocation idiom 4863 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4864 instruction_count(1); force_serialization; 4865 fixed_latency(6); 4866 heap_ptr : S3(read); 4867 DECODE : S0(3); 4868 D0 : S2; 4869 MEM : S3; 4870 ALU : S3(2); 4871 dst : S5(write); 4872 BR : S5; 4873 %} 4874 4875 // Generic big/slow expanded idiom 4876 pipe_class pipe_slow( ) %{ 4877 instruction_count(10); multiple_bundles; force_serialization; 4878 fixed_latency(100); 4879 D0 : S0(2); 4880 MEM : S3(2); 4881 %} 4882 4883 // The real do-nothing guy 4884 pipe_class empty( ) %{ 4885 instruction_count(0); 4886 %} 4887 4888 // Define the class for the Nop node 4889 define %{ 4890 MachNop = empty; 4891 %} 4892 4893 %} 4894 4895 //----------INSTRUCTIONS------------------------------------------------------- 4896 // 4897 // match -- States which machine-independent subtree may be replaced 4898 // by this instruction. 4899 // ins_cost -- The estimated cost of this instruction is used by instruction 4900 // selection to identify a minimum cost tree of machine 4901 // instructions that matches a tree of machine-independent 4902 // instructions. 4903 // format -- A string providing the disassembly for this instruction. 4904 // The value of an instruction's operand may be inserted 4905 // by referring to it with a '$' prefix. 4906 // opcode -- Three instruction opcodes may be provided. These are referred 4907 // to within an encode class as $primary, $secondary, and $tertiary 4908 // respectively. The primary opcode is commonly used to 4909 // indicate the type of machine instruction, while secondary 4910 // and tertiary are often used for prefix options or addressing 4911 // modes. 4912 // ins_encode -- A list of encode classes with parameters. The encode class 4913 // name must have been defined in an 'enc_class' specification 4914 // in the encode section of the architecture description. 4915 4916 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4917 // Load Float 4918 instruct MoveF2LEG(legRegF dst, regF src) %{ 4919 match(Set dst src); 4920 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4921 ins_encode %{ 4922 ShouldNotReachHere(); 4923 %} 4924 ins_pipe( fpu_reg_reg ); 4925 %} 4926 4927 // Load Float 4928 instruct MoveLEG2F(regF dst, legRegF src) %{ 4929 match(Set dst src); 4930 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4931 ins_encode %{ 4932 ShouldNotReachHere(); 4933 %} 4934 ins_pipe( fpu_reg_reg ); 4935 %} 4936 4937 // Load Float 4938 instruct MoveF2VL(vlRegF dst, regF src) %{ 4939 match(Set dst src); 4940 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4941 ins_encode %{ 4942 ShouldNotReachHere(); 4943 %} 4944 ins_pipe( fpu_reg_reg ); 4945 %} 4946 4947 // Load Float 4948 instruct MoveVL2F(regF dst, vlRegF src) %{ 4949 match(Set dst src); 4950 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4951 ins_encode %{ 4952 ShouldNotReachHere(); 4953 %} 4954 ins_pipe( fpu_reg_reg ); 4955 %} 4956 4957 4958 4959 // Load Double 4960 instruct MoveD2LEG(legRegD dst, regD src) %{ 4961 match(Set dst src); 4962 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 4963 ins_encode %{ 4964 ShouldNotReachHere(); 4965 %} 4966 ins_pipe( fpu_reg_reg ); 4967 %} 4968 4969 // Load Double 4970 instruct MoveLEG2D(regD dst, legRegD src) %{ 4971 match(Set dst src); 4972 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 4973 ins_encode %{ 4974 ShouldNotReachHere(); 4975 %} 4976 ins_pipe( fpu_reg_reg ); 4977 %} 4978 4979 // Load Double 4980 instruct MoveD2VL(vlRegD dst, regD src) %{ 4981 match(Set dst src); 4982 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 4983 ins_encode %{ 4984 ShouldNotReachHere(); 4985 %} 4986 ins_pipe( fpu_reg_reg ); 4987 %} 4988 4989 // Load Double 4990 instruct MoveVL2D(regD dst, vlRegD src) %{ 4991 match(Set dst src); 4992 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 4993 ins_encode %{ 4994 ShouldNotReachHere(); 4995 %} 4996 ins_pipe( fpu_reg_reg ); 4997 %} 4998 4999 //----------BSWAP-Instruction-------------------------------------------------- 5000 instruct bytes_reverse_int(rRegI dst) %{ 5001 match(Set dst (ReverseBytesI dst)); 5002 5003 format %{ "BSWAP $dst" %} 5004 opcode(0x0F, 0xC8); 5005 ins_encode( OpcP, OpcSReg(dst) ); 5006 ins_pipe( ialu_reg ); 5007 %} 5008 5009 instruct bytes_reverse_long(eRegL dst) %{ 5010 match(Set dst (ReverseBytesL dst)); 5011 5012 format %{ "BSWAP $dst.lo\n\t" 5013 "BSWAP $dst.hi\n\t" 5014 "XCHG $dst.lo $dst.hi" %} 5015 5016 ins_cost(125); 5017 ins_encode( bswap_long_bytes(dst) ); 5018 ins_pipe( ialu_reg_reg); 5019 %} 5020 5021 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5022 match(Set dst (ReverseBytesUS dst)); 5023 effect(KILL cr); 5024 5025 format %{ "BSWAP $dst\n\t" 5026 "SHR $dst,16\n\t" %} 5027 ins_encode %{ 5028 __ bswapl($dst$$Register); 5029 __ shrl($dst$$Register, 16); 5030 %} 5031 ins_pipe( ialu_reg ); 5032 %} 5033 5034 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5035 match(Set dst (ReverseBytesS dst)); 5036 effect(KILL cr); 5037 5038 format %{ "BSWAP $dst\n\t" 5039 "SAR $dst,16\n\t" %} 5040 ins_encode %{ 5041 __ bswapl($dst$$Register); 5042 __ sarl($dst$$Register, 16); 5043 %} 5044 ins_pipe( ialu_reg ); 5045 %} 5046 5047 5048 //---------- Zeros Count Instructions ------------------------------------------ 5049 5050 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5051 predicate(UseCountLeadingZerosInstruction); 5052 match(Set dst (CountLeadingZerosI src)); 5053 effect(KILL cr); 5054 5055 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5056 ins_encode %{ 5057 __ lzcntl($dst$$Register, $src$$Register); 5058 %} 5059 ins_pipe(ialu_reg); 5060 %} 5061 5062 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5063 predicate(!UseCountLeadingZerosInstruction); 5064 match(Set dst (CountLeadingZerosI src)); 5065 effect(KILL cr); 5066 5067 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5068 "JNZ skip\n\t" 5069 "MOV $dst, -1\n" 5070 "skip:\n\t" 5071 "NEG $dst\n\t" 5072 "ADD $dst, 31" %} 5073 ins_encode %{ 5074 Register Rdst = $dst$$Register; 5075 Register Rsrc = $src$$Register; 5076 Label skip; 5077 __ bsrl(Rdst, Rsrc); 5078 __ jccb(Assembler::notZero, skip); 5079 __ movl(Rdst, -1); 5080 __ bind(skip); 5081 __ negl(Rdst); 5082 __ addl(Rdst, BitsPerInt - 1); 5083 %} 5084 ins_pipe(ialu_reg); 5085 %} 5086 5087 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5088 predicate(UseCountLeadingZerosInstruction); 5089 match(Set dst (CountLeadingZerosL src)); 5090 effect(TEMP dst, KILL cr); 5091 5092 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5093 "JNC done\n\t" 5094 "LZCNT $dst, $src.lo\n\t" 5095 "ADD $dst, 32\n" 5096 "done:" %} 5097 ins_encode %{ 5098 Register Rdst = $dst$$Register; 5099 Register Rsrc = $src$$Register; 5100 Label done; 5101 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5102 __ jccb(Assembler::carryClear, done); 5103 __ lzcntl(Rdst, Rsrc); 5104 __ addl(Rdst, BitsPerInt); 5105 __ bind(done); 5106 %} 5107 ins_pipe(ialu_reg); 5108 %} 5109 5110 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5111 predicate(!UseCountLeadingZerosInstruction); 5112 match(Set dst (CountLeadingZerosL src)); 5113 effect(TEMP dst, KILL cr); 5114 5115 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5116 "JZ msw_is_zero\n\t" 5117 "ADD $dst, 32\n\t" 5118 "JMP not_zero\n" 5119 "msw_is_zero:\n\t" 5120 "BSR $dst, $src.lo\n\t" 5121 "JNZ not_zero\n\t" 5122 "MOV $dst, -1\n" 5123 "not_zero:\n\t" 5124 "NEG $dst\n\t" 5125 "ADD $dst, 63\n" %} 5126 ins_encode %{ 5127 Register Rdst = $dst$$Register; 5128 Register Rsrc = $src$$Register; 5129 Label msw_is_zero; 5130 Label not_zero; 5131 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5132 __ jccb(Assembler::zero, msw_is_zero); 5133 __ addl(Rdst, BitsPerInt); 5134 __ jmpb(not_zero); 5135 __ bind(msw_is_zero); 5136 __ bsrl(Rdst, Rsrc); 5137 __ jccb(Assembler::notZero, not_zero); 5138 __ movl(Rdst, -1); 5139 __ bind(not_zero); 5140 __ negl(Rdst); 5141 __ addl(Rdst, BitsPerLong - 1); 5142 %} 5143 ins_pipe(ialu_reg); 5144 %} 5145 5146 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5147 predicate(UseCountTrailingZerosInstruction); 5148 match(Set dst (CountTrailingZerosI src)); 5149 effect(KILL cr); 5150 5151 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5152 ins_encode %{ 5153 __ tzcntl($dst$$Register, $src$$Register); 5154 %} 5155 ins_pipe(ialu_reg); 5156 %} 5157 5158 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5159 predicate(!UseCountTrailingZerosInstruction); 5160 match(Set dst (CountTrailingZerosI src)); 5161 effect(KILL cr); 5162 5163 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5164 "JNZ done\n\t" 5165 "MOV $dst, 32\n" 5166 "done:" %} 5167 ins_encode %{ 5168 Register Rdst = $dst$$Register; 5169 Label done; 5170 __ bsfl(Rdst, $src$$Register); 5171 __ jccb(Assembler::notZero, done); 5172 __ movl(Rdst, BitsPerInt); 5173 __ bind(done); 5174 %} 5175 ins_pipe(ialu_reg); 5176 %} 5177 5178 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5179 predicate(UseCountTrailingZerosInstruction); 5180 match(Set dst (CountTrailingZerosL src)); 5181 effect(TEMP dst, KILL cr); 5182 5183 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5184 "JNC done\n\t" 5185 "TZCNT $dst, $src.hi\n\t" 5186 "ADD $dst, 32\n" 5187 "done:" %} 5188 ins_encode %{ 5189 Register Rdst = $dst$$Register; 5190 Register Rsrc = $src$$Register; 5191 Label done; 5192 __ tzcntl(Rdst, Rsrc); 5193 __ jccb(Assembler::carryClear, done); 5194 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5195 __ addl(Rdst, BitsPerInt); 5196 __ bind(done); 5197 %} 5198 ins_pipe(ialu_reg); 5199 %} 5200 5201 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5202 predicate(!UseCountTrailingZerosInstruction); 5203 match(Set dst (CountTrailingZerosL src)); 5204 effect(TEMP dst, KILL cr); 5205 5206 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5207 "JNZ done\n\t" 5208 "BSF $dst, $src.hi\n\t" 5209 "JNZ msw_not_zero\n\t" 5210 "MOV $dst, 32\n" 5211 "msw_not_zero:\n\t" 5212 "ADD $dst, 32\n" 5213 "done:" %} 5214 ins_encode %{ 5215 Register Rdst = $dst$$Register; 5216 Register Rsrc = $src$$Register; 5217 Label msw_not_zero; 5218 Label done; 5219 __ bsfl(Rdst, Rsrc); 5220 __ jccb(Assembler::notZero, done); 5221 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5222 __ jccb(Assembler::notZero, msw_not_zero); 5223 __ movl(Rdst, BitsPerInt); 5224 __ bind(msw_not_zero); 5225 __ addl(Rdst, BitsPerInt); 5226 __ bind(done); 5227 %} 5228 ins_pipe(ialu_reg); 5229 %} 5230 5231 5232 //---------- Population Count Instructions ------------------------------------- 5233 5234 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5235 predicate(UsePopCountInstruction); 5236 match(Set dst (PopCountI src)); 5237 effect(KILL cr); 5238 5239 format %{ "POPCNT $dst, $src" %} 5240 ins_encode %{ 5241 __ popcntl($dst$$Register, $src$$Register); 5242 %} 5243 ins_pipe(ialu_reg); 5244 %} 5245 5246 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5247 predicate(UsePopCountInstruction); 5248 match(Set dst (PopCountI (LoadI mem))); 5249 effect(KILL cr); 5250 5251 format %{ "POPCNT $dst, $mem" %} 5252 ins_encode %{ 5253 __ popcntl($dst$$Register, $mem$$Address); 5254 %} 5255 ins_pipe(ialu_reg); 5256 %} 5257 5258 // Note: Long.bitCount(long) returns an int. 5259 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5260 predicate(UsePopCountInstruction); 5261 match(Set dst (PopCountL src)); 5262 effect(KILL cr, TEMP tmp, TEMP dst); 5263 5264 format %{ "POPCNT $dst, $src.lo\n\t" 5265 "POPCNT $tmp, $src.hi\n\t" 5266 "ADD $dst, $tmp" %} 5267 ins_encode %{ 5268 __ popcntl($dst$$Register, $src$$Register); 5269 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5270 __ addl($dst$$Register, $tmp$$Register); 5271 %} 5272 ins_pipe(ialu_reg); 5273 %} 5274 5275 // Note: Long.bitCount(long) returns an int. 5276 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5277 predicate(UsePopCountInstruction); 5278 match(Set dst (PopCountL (LoadL mem))); 5279 effect(KILL cr, TEMP tmp, TEMP dst); 5280 5281 format %{ "POPCNT $dst, $mem\n\t" 5282 "POPCNT $tmp, $mem+4\n\t" 5283 "ADD $dst, $tmp" %} 5284 ins_encode %{ 5285 //__ popcntl($dst$$Register, $mem$$Address$$first); 5286 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5287 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5288 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5289 __ addl($dst$$Register, $tmp$$Register); 5290 %} 5291 ins_pipe(ialu_reg); 5292 %} 5293 5294 5295 //----------Load/Store/Move Instructions--------------------------------------- 5296 //----------Load Instructions-------------------------------------------------- 5297 // Load Byte (8bit signed) 5298 instruct loadB(xRegI dst, memory mem) %{ 5299 match(Set dst (LoadB mem)); 5300 5301 ins_cost(125); 5302 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5303 5304 ins_encode %{ 5305 __ movsbl($dst$$Register, $mem$$Address); 5306 %} 5307 5308 ins_pipe(ialu_reg_mem); 5309 %} 5310 5311 // Load Byte (8bit signed) into Long Register 5312 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5313 match(Set dst (ConvI2L (LoadB mem))); 5314 effect(KILL cr); 5315 5316 ins_cost(375); 5317 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5318 "MOV $dst.hi,$dst.lo\n\t" 5319 "SAR $dst.hi,7" %} 5320 5321 ins_encode %{ 5322 __ movsbl($dst$$Register, $mem$$Address); 5323 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5324 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5325 %} 5326 5327 ins_pipe(ialu_reg_mem); 5328 %} 5329 5330 // Load Unsigned Byte (8bit UNsigned) 5331 instruct loadUB(xRegI dst, memory mem) %{ 5332 match(Set dst (LoadUB mem)); 5333 5334 ins_cost(125); 5335 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5336 5337 ins_encode %{ 5338 __ movzbl($dst$$Register, $mem$$Address); 5339 %} 5340 5341 ins_pipe(ialu_reg_mem); 5342 %} 5343 5344 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5345 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5346 match(Set dst (ConvI2L (LoadUB mem))); 5347 effect(KILL cr); 5348 5349 ins_cost(250); 5350 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5351 "XOR $dst.hi,$dst.hi" %} 5352 5353 ins_encode %{ 5354 Register Rdst = $dst$$Register; 5355 __ movzbl(Rdst, $mem$$Address); 5356 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5357 %} 5358 5359 ins_pipe(ialu_reg_mem); 5360 %} 5361 5362 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5363 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5364 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5365 effect(KILL cr); 5366 5367 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5368 "XOR $dst.hi,$dst.hi\n\t" 5369 "AND $dst.lo,right_n_bits($mask, 8)" %} 5370 ins_encode %{ 5371 Register Rdst = $dst$$Register; 5372 __ movzbl(Rdst, $mem$$Address); 5373 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5374 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5375 %} 5376 ins_pipe(ialu_reg_mem); 5377 %} 5378 5379 // Load Short (16bit signed) 5380 instruct loadS(rRegI dst, memory mem) %{ 5381 match(Set dst (LoadS mem)); 5382 5383 ins_cost(125); 5384 format %{ "MOVSX $dst,$mem\t# short" %} 5385 5386 ins_encode %{ 5387 __ movswl($dst$$Register, $mem$$Address); 5388 %} 5389 5390 ins_pipe(ialu_reg_mem); 5391 %} 5392 5393 // Load Short (16 bit signed) to Byte (8 bit signed) 5394 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5395 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5396 5397 ins_cost(125); 5398 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5399 ins_encode %{ 5400 __ movsbl($dst$$Register, $mem$$Address); 5401 %} 5402 ins_pipe(ialu_reg_mem); 5403 %} 5404 5405 // Load Short (16bit signed) into Long Register 5406 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5407 match(Set dst (ConvI2L (LoadS mem))); 5408 effect(KILL cr); 5409 5410 ins_cost(375); 5411 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5412 "MOV $dst.hi,$dst.lo\n\t" 5413 "SAR $dst.hi,15" %} 5414 5415 ins_encode %{ 5416 __ movswl($dst$$Register, $mem$$Address); 5417 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5418 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5419 %} 5420 5421 ins_pipe(ialu_reg_mem); 5422 %} 5423 5424 // Load Unsigned Short/Char (16bit unsigned) 5425 instruct loadUS(rRegI dst, memory mem) %{ 5426 match(Set dst (LoadUS mem)); 5427 5428 ins_cost(125); 5429 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5430 5431 ins_encode %{ 5432 __ movzwl($dst$$Register, $mem$$Address); 5433 %} 5434 5435 ins_pipe(ialu_reg_mem); 5436 %} 5437 5438 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5439 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5440 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5441 5442 ins_cost(125); 5443 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5444 ins_encode %{ 5445 __ movsbl($dst$$Register, $mem$$Address); 5446 %} 5447 ins_pipe(ialu_reg_mem); 5448 %} 5449 5450 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5451 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5452 match(Set dst (ConvI2L (LoadUS mem))); 5453 effect(KILL cr); 5454 5455 ins_cost(250); 5456 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5457 "XOR $dst.hi,$dst.hi" %} 5458 5459 ins_encode %{ 5460 __ movzwl($dst$$Register, $mem$$Address); 5461 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5462 %} 5463 5464 ins_pipe(ialu_reg_mem); 5465 %} 5466 5467 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5468 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5469 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5470 effect(KILL cr); 5471 5472 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5473 "XOR $dst.hi,$dst.hi" %} 5474 ins_encode %{ 5475 Register Rdst = $dst$$Register; 5476 __ movzbl(Rdst, $mem$$Address); 5477 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5478 %} 5479 ins_pipe(ialu_reg_mem); 5480 %} 5481 5482 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5483 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5484 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5485 effect(KILL cr); 5486 5487 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5488 "XOR $dst.hi,$dst.hi\n\t" 5489 "AND $dst.lo,right_n_bits($mask, 16)" %} 5490 ins_encode %{ 5491 Register Rdst = $dst$$Register; 5492 __ movzwl(Rdst, $mem$$Address); 5493 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5494 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5495 %} 5496 ins_pipe(ialu_reg_mem); 5497 %} 5498 5499 // Load Integer 5500 instruct loadI(rRegI dst, memory mem) %{ 5501 match(Set dst (LoadI mem)); 5502 5503 ins_cost(125); 5504 format %{ "MOV $dst,$mem\t# int" %} 5505 5506 ins_encode %{ 5507 __ movl($dst$$Register, $mem$$Address); 5508 %} 5509 5510 ins_pipe(ialu_reg_mem); 5511 %} 5512 5513 // Load Integer (32 bit signed) to Byte (8 bit signed) 5514 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5515 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5516 5517 ins_cost(125); 5518 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5519 ins_encode %{ 5520 __ movsbl($dst$$Register, $mem$$Address); 5521 %} 5522 ins_pipe(ialu_reg_mem); 5523 %} 5524 5525 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5526 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5527 match(Set dst (AndI (LoadI mem) mask)); 5528 5529 ins_cost(125); 5530 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5531 ins_encode %{ 5532 __ movzbl($dst$$Register, $mem$$Address); 5533 %} 5534 ins_pipe(ialu_reg_mem); 5535 %} 5536 5537 // Load Integer (32 bit signed) to Short (16 bit signed) 5538 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5539 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5540 5541 ins_cost(125); 5542 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5543 ins_encode %{ 5544 __ movswl($dst$$Register, $mem$$Address); 5545 %} 5546 ins_pipe(ialu_reg_mem); 5547 %} 5548 5549 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5550 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5551 match(Set dst (AndI (LoadI mem) mask)); 5552 5553 ins_cost(125); 5554 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5555 ins_encode %{ 5556 __ movzwl($dst$$Register, $mem$$Address); 5557 %} 5558 ins_pipe(ialu_reg_mem); 5559 %} 5560 5561 // Load Integer into Long Register 5562 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5563 match(Set dst (ConvI2L (LoadI mem))); 5564 effect(KILL cr); 5565 5566 ins_cost(375); 5567 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5568 "MOV $dst.hi,$dst.lo\n\t" 5569 "SAR $dst.hi,31" %} 5570 5571 ins_encode %{ 5572 __ movl($dst$$Register, $mem$$Address); 5573 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5574 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5575 %} 5576 5577 ins_pipe(ialu_reg_mem); 5578 %} 5579 5580 // Load Integer with mask 0xFF into Long Register 5581 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5582 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5583 effect(KILL cr); 5584 5585 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5586 "XOR $dst.hi,$dst.hi" %} 5587 ins_encode %{ 5588 Register Rdst = $dst$$Register; 5589 __ movzbl(Rdst, $mem$$Address); 5590 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5591 %} 5592 ins_pipe(ialu_reg_mem); 5593 %} 5594 5595 // Load Integer with mask 0xFFFF into Long Register 5596 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5597 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5598 effect(KILL cr); 5599 5600 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5601 "XOR $dst.hi,$dst.hi" %} 5602 ins_encode %{ 5603 Register Rdst = $dst$$Register; 5604 __ movzwl(Rdst, $mem$$Address); 5605 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5606 %} 5607 ins_pipe(ialu_reg_mem); 5608 %} 5609 5610 // Load Integer with 31-bit mask into Long Register 5611 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5612 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5613 effect(KILL cr); 5614 5615 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5616 "XOR $dst.hi,$dst.hi\n\t" 5617 "AND $dst.lo,$mask" %} 5618 ins_encode %{ 5619 Register Rdst = $dst$$Register; 5620 __ movl(Rdst, $mem$$Address); 5621 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5622 __ andl(Rdst, $mask$$constant); 5623 %} 5624 ins_pipe(ialu_reg_mem); 5625 %} 5626 5627 // Load Unsigned Integer into Long Register 5628 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5629 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5630 effect(KILL cr); 5631 5632 ins_cost(250); 5633 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5634 "XOR $dst.hi,$dst.hi" %} 5635 5636 ins_encode %{ 5637 __ movl($dst$$Register, $mem$$Address); 5638 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5639 %} 5640 5641 ins_pipe(ialu_reg_mem); 5642 %} 5643 5644 // Load Long. Cannot clobber address while loading, so restrict address 5645 // register to ESI 5646 instruct loadL(eRegL dst, load_long_memory mem) %{ 5647 predicate(!((LoadLNode*)n)->require_atomic_access()); 5648 match(Set dst (LoadL mem)); 5649 5650 ins_cost(250); 5651 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5652 "MOV $dst.hi,$mem+4" %} 5653 5654 ins_encode %{ 5655 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5656 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5657 __ movl($dst$$Register, Amemlo); 5658 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5659 %} 5660 5661 ins_pipe(ialu_reg_long_mem); 5662 %} 5663 5664 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5665 // then store it down to the stack and reload on the int 5666 // side. 5667 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5668 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5669 match(Set dst (LoadL mem)); 5670 5671 ins_cost(200); 5672 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5673 "FISTp $dst" %} 5674 ins_encode(enc_loadL_volatile(mem,dst)); 5675 ins_pipe( fpu_reg_mem ); 5676 %} 5677 5678 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5679 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5680 match(Set dst (LoadL mem)); 5681 effect(TEMP tmp); 5682 ins_cost(180); 5683 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5684 "MOVSD $dst,$tmp" %} 5685 ins_encode %{ 5686 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5687 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5688 %} 5689 ins_pipe( pipe_slow ); 5690 %} 5691 5692 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5693 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5694 match(Set dst (LoadL mem)); 5695 effect(TEMP tmp); 5696 ins_cost(160); 5697 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5698 "MOVD $dst.lo,$tmp\n\t" 5699 "PSRLQ $tmp,32\n\t" 5700 "MOVD $dst.hi,$tmp" %} 5701 ins_encode %{ 5702 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5703 __ movdl($dst$$Register, $tmp$$XMMRegister); 5704 __ psrlq($tmp$$XMMRegister, 32); 5705 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5706 %} 5707 ins_pipe( pipe_slow ); 5708 %} 5709 5710 // Load Range 5711 instruct loadRange(rRegI dst, memory mem) %{ 5712 match(Set dst (LoadRange mem)); 5713 5714 ins_cost(125); 5715 format %{ "MOV $dst,$mem" %} 5716 opcode(0x8B); 5717 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5718 ins_pipe( ialu_reg_mem ); 5719 %} 5720 5721 5722 // Load Pointer 5723 instruct loadP(eRegP dst, memory mem) %{ 5724 match(Set dst (LoadP mem)); 5725 5726 ins_cost(125); 5727 format %{ "MOV $dst,$mem" %} 5728 opcode(0x8B); 5729 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5730 ins_pipe( ialu_reg_mem ); 5731 %} 5732 5733 // Load Klass Pointer 5734 instruct loadKlass(eRegP dst, memory mem) %{ 5735 match(Set dst (LoadKlass mem)); 5736 5737 ins_cost(125); 5738 format %{ "MOV $dst,$mem" %} 5739 opcode(0x8B); 5740 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5741 ins_pipe( ialu_reg_mem ); 5742 %} 5743 5744 // Load Double 5745 instruct loadDPR(regDPR dst, memory mem) %{ 5746 predicate(UseSSE<=1); 5747 match(Set dst (LoadD mem)); 5748 5749 ins_cost(150); 5750 format %{ "FLD_D ST,$mem\n\t" 5751 "FSTP $dst" %} 5752 opcode(0xDD); /* DD /0 */ 5753 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 5754 Pop_Reg_DPR(dst), ClearInstMark ); 5755 ins_pipe( fpu_reg_mem ); 5756 %} 5757 5758 // Load Double to XMM 5759 instruct loadD(regD dst, memory mem) %{ 5760 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5761 match(Set dst (LoadD mem)); 5762 ins_cost(145); 5763 format %{ "MOVSD $dst,$mem" %} 5764 ins_encode %{ 5765 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5766 %} 5767 ins_pipe( pipe_slow ); 5768 %} 5769 5770 instruct loadD_partial(regD dst, memory mem) %{ 5771 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5772 match(Set dst (LoadD mem)); 5773 ins_cost(145); 5774 format %{ "MOVLPD $dst,$mem" %} 5775 ins_encode %{ 5776 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 // Load to XMM register (single-precision floating point) 5782 // MOVSS instruction 5783 instruct loadF(regF dst, memory mem) %{ 5784 predicate(UseSSE>=1); 5785 match(Set dst (LoadF mem)); 5786 ins_cost(145); 5787 format %{ "MOVSS $dst,$mem" %} 5788 ins_encode %{ 5789 __ movflt ($dst$$XMMRegister, $mem$$Address); 5790 %} 5791 ins_pipe( pipe_slow ); 5792 %} 5793 5794 // Load Float 5795 instruct loadFPR(regFPR dst, memory mem) %{ 5796 predicate(UseSSE==0); 5797 match(Set dst (LoadF mem)); 5798 5799 ins_cost(150); 5800 format %{ "FLD_S ST,$mem\n\t" 5801 "FSTP $dst" %} 5802 opcode(0xD9); /* D9 /0 */ 5803 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 5804 Pop_Reg_FPR(dst), ClearInstMark ); 5805 ins_pipe( fpu_reg_mem ); 5806 %} 5807 5808 // Load Effective Address 5809 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5810 match(Set dst mem); 5811 5812 ins_cost(110); 5813 format %{ "LEA $dst,$mem" %} 5814 opcode(0x8D); 5815 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5816 ins_pipe( ialu_reg_reg_fat ); 5817 %} 5818 5819 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5820 match(Set dst mem); 5821 5822 ins_cost(110); 5823 format %{ "LEA $dst,$mem" %} 5824 opcode(0x8D); 5825 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5826 ins_pipe( ialu_reg_reg_fat ); 5827 %} 5828 5829 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5830 match(Set dst mem); 5831 5832 ins_cost(110); 5833 format %{ "LEA $dst,$mem" %} 5834 opcode(0x8D); 5835 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5836 ins_pipe( ialu_reg_reg_fat ); 5837 %} 5838 5839 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5840 match(Set dst mem); 5841 5842 ins_cost(110); 5843 format %{ "LEA $dst,$mem" %} 5844 opcode(0x8D); 5845 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5846 ins_pipe( ialu_reg_reg_fat ); 5847 %} 5848 5849 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5850 match(Set dst mem); 5851 5852 ins_cost(110); 5853 format %{ "LEA $dst,$mem" %} 5854 opcode(0x8D); 5855 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5856 ins_pipe( ialu_reg_reg_fat ); 5857 %} 5858 5859 // Load Constant 5860 instruct loadConI(rRegI dst, immI src) %{ 5861 match(Set dst src); 5862 5863 format %{ "MOV $dst,$src" %} 5864 ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark ); 5865 ins_pipe( ialu_reg_fat ); 5866 %} 5867 5868 // Load Constant zero 5869 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5870 match(Set dst src); 5871 effect(KILL cr); 5872 5873 ins_cost(50); 5874 format %{ "XOR $dst,$dst" %} 5875 opcode(0x33); /* + rd */ 5876 ins_encode( OpcP, RegReg( dst, dst ) ); 5877 ins_pipe( ialu_reg ); 5878 %} 5879 5880 instruct loadConP(eRegP dst, immP src) %{ 5881 match(Set dst src); 5882 5883 format %{ "MOV $dst,$src" %} 5884 opcode(0xB8); /* + rd */ 5885 ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark ); 5886 ins_pipe( ialu_reg_fat ); 5887 %} 5888 5889 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5890 match(Set dst src); 5891 effect(KILL cr); 5892 ins_cost(200); 5893 format %{ "MOV $dst.lo,$src.lo\n\t" 5894 "MOV $dst.hi,$src.hi" %} 5895 opcode(0xB8); 5896 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5897 ins_pipe( ialu_reg_long_fat ); 5898 %} 5899 5900 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5901 match(Set dst src); 5902 effect(KILL cr); 5903 ins_cost(150); 5904 format %{ "XOR $dst.lo,$dst.lo\n\t" 5905 "XOR $dst.hi,$dst.hi" %} 5906 opcode(0x33,0x33); 5907 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5908 ins_pipe( ialu_reg_long ); 5909 %} 5910 5911 // The instruction usage is guarded by predicate in operand immFPR(). 5912 instruct loadConFPR(regFPR dst, immFPR con) %{ 5913 match(Set dst con); 5914 ins_cost(125); 5915 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5916 "FSTP $dst" %} 5917 ins_encode %{ 5918 __ fld_s($constantaddress($con)); 5919 __ fstp_d($dst$$reg); 5920 %} 5921 ins_pipe(fpu_reg_con); 5922 %} 5923 5924 // The instruction usage is guarded by predicate in operand immFPR0(). 5925 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5926 match(Set dst con); 5927 ins_cost(125); 5928 format %{ "FLDZ ST\n\t" 5929 "FSTP $dst" %} 5930 ins_encode %{ 5931 __ fldz(); 5932 __ fstp_d($dst$$reg); 5933 %} 5934 ins_pipe(fpu_reg_con); 5935 %} 5936 5937 // The instruction usage is guarded by predicate in operand immFPR1(). 5938 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 5939 match(Set dst con); 5940 ins_cost(125); 5941 format %{ "FLD1 ST\n\t" 5942 "FSTP $dst" %} 5943 ins_encode %{ 5944 __ fld1(); 5945 __ fstp_d($dst$$reg); 5946 %} 5947 ins_pipe(fpu_reg_con); 5948 %} 5949 5950 // The instruction usage is guarded by predicate in operand immF(). 5951 instruct loadConF(regF dst, immF con) %{ 5952 match(Set dst con); 5953 ins_cost(125); 5954 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 5955 ins_encode %{ 5956 __ movflt($dst$$XMMRegister, $constantaddress($con)); 5957 %} 5958 ins_pipe(pipe_slow); 5959 %} 5960 5961 // The instruction usage is guarded by predicate in operand immF0(). 5962 instruct loadConF0(regF dst, immF0 src) %{ 5963 match(Set dst src); 5964 ins_cost(100); 5965 format %{ "XORPS $dst,$dst\t# float 0.0" %} 5966 ins_encode %{ 5967 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 5968 %} 5969 ins_pipe(pipe_slow); 5970 %} 5971 5972 // The instruction usage is guarded by predicate in operand immDPR(). 5973 instruct loadConDPR(regDPR dst, immDPR con) %{ 5974 match(Set dst con); 5975 ins_cost(125); 5976 5977 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 5978 "FSTP $dst" %} 5979 ins_encode %{ 5980 __ fld_d($constantaddress($con)); 5981 __ fstp_d($dst$$reg); 5982 %} 5983 ins_pipe(fpu_reg_con); 5984 %} 5985 5986 // The instruction usage is guarded by predicate in operand immDPR0(). 5987 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 5988 match(Set dst con); 5989 ins_cost(125); 5990 5991 format %{ "FLDZ ST\n\t" 5992 "FSTP $dst" %} 5993 ins_encode %{ 5994 __ fldz(); 5995 __ fstp_d($dst$$reg); 5996 %} 5997 ins_pipe(fpu_reg_con); 5998 %} 5999 6000 // The instruction usage is guarded by predicate in operand immDPR1(). 6001 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6002 match(Set dst con); 6003 ins_cost(125); 6004 6005 format %{ "FLD1 ST\n\t" 6006 "FSTP $dst" %} 6007 ins_encode %{ 6008 __ fld1(); 6009 __ fstp_d($dst$$reg); 6010 %} 6011 ins_pipe(fpu_reg_con); 6012 %} 6013 6014 // The instruction usage is guarded by predicate in operand immD(). 6015 instruct loadConD(regD dst, immD con) %{ 6016 match(Set dst con); 6017 ins_cost(125); 6018 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6019 ins_encode %{ 6020 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6021 %} 6022 ins_pipe(pipe_slow); 6023 %} 6024 6025 // The instruction usage is guarded by predicate in operand immD0(). 6026 instruct loadConD0(regD dst, immD0 src) %{ 6027 match(Set dst src); 6028 ins_cost(100); 6029 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6030 ins_encode %{ 6031 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6032 %} 6033 ins_pipe( pipe_slow ); 6034 %} 6035 6036 // Load Stack Slot 6037 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6038 match(Set dst src); 6039 ins_cost(125); 6040 6041 format %{ "MOV $dst,$src" %} 6042 opcode(0x8B); 6043 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); 6044 ins_pipe( ialu_reg_mem ); 6045 %} 6046 6047 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6048 match(Set dst src); 6049 6050 ins_cost(200); 6051 format %{ "MOV $dst,$src.lo\n\t" 6052 "MOV $dst+4,$src.hi" %} 6053 opcode(0x8B, 0x8B); 6054 ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark ); 6055 ins_pipe( ialu_mem_long_reg ); 6056 %} 6057 6058 // Load Stack Slot 6059 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6060 match(Set dst src); 6061 ins_cost(125); 6062 6063 format %{ "MOV $dst,$src" %} 6064 opcode(0x8B); 6065 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); 6066 ins_pipe( ialu_reg_mem ); 6067 %} 6068 6069 // Load Stack Slot 6070 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6071 match(Set dst src); 6072 ins_cost(125); 6073 6074 format %{ "FLD_S $src\n\t" 6075 "FSTP $dst" %} 6076 opcode(0xD9); /* D9 /0, FLD m32real */ 6077 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 6078 Pop_Reg_FPR(dst), ClearInstMark ); 6079 ins_pipe( fpu_reg_mem ); 6080 %} 6081 6082 // Load Stack Slot 6083 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6084 match(Set dst src); 6085 ins_cost(125); 6086 6087 format %{ "FLD_D $src\n\t" 6088 "FSTP $dst" %} 6089 opcode(0xDD); /* DD /0, FLD m64real */ 6090 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 6091 Pop_Reg_DPR(dst), ClearInstMark ); 6092 ins_pipe( fpu_reg_mem ); 6093 %} 6094 6095 // Prefetch instructions for allocation. 6096 // Must be safe to execute with invalid address (cannot fault). 6097 6098 instruct prefetchAlloc0( memory mem ) %{ 6099 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6100 match(PrefetchAllocation mem); 6101 ins_cost(0); 6102 size(0); 6103 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6104 ins_encode(); 6105 ins_pipe(empty); 6106 %} 6107 6108 instruct prefetchAlloc( memory mem ) %{ 6109 predicate(AllocatePrefetchInstr==3); 6110 match( PrefetchAllocation mem ); 6111 ins_cost(100); 6112 6113 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6114 ins_encode %{ 6115 __ prefetchw($mem$$Address); 6116 %} 6117 ins_pipe(ialu_mem); 6118 %} 6119 6120 instruct prefetchAllocNTA( memory mem ) %{ 6121 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6122 match(PrefetchAllocation mem); 6123 ins_cost(100); 6124 6125 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6126 ins_encode %{ 6127 __ prefetchnta($mem$$Address); 6128 %} 6129 ins_pipe(ialu_mem); 6130 %} 6131 6132 instruct prefetchAllocT0( memory mem ) %{ 6133 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6134 match(PrefetchAllocation mem); 6135 ins_cost(100); 6136 6137 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6138 ins_encode %{ 6139 __ prefetcht0($mem$$Address); 6140 %} 6141 ins_pipe(ialu_mem); 6142 %} 6143 6144 instruct prefetchAllocT2( memory mem ) %{ 6145 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6146 match(PrefetchAllocation mem); 6147 ins_cost(100); 6148 6149 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6150 ins_encode %{ 6151 __ prefetcht2($mem$$Address); 6152 %} 6153 ins_pipe(ialu_mem); 6154 %} 6155 6156 //----------Store Instructions------------------------------------------------- 6157 6158 // Store Byte 6159 instruct storeB(memory mem, xRegI src) %{ 6160 match(Set mem (StoreB mem src)); 6161 6162 ins_cost(125); 6163 format %{ "MOV8 $mem,$src" %} 6164 opcode(0x88); 6165 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6166 ins_pipe( ialu_mem_reg ); 6167 %} 6168 6169 // Store Char/Short 6170 instruct storeC(memory mem, rRegI src) %{ 6171 match(Set mem (StoreC mem src)); 6172 6173 ins_cost(125); 6174 format %{ "MOV16 $mem,$src" %} 6175 opcode(0x89, 0x66); 6176 ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark ); 6177 ins_pipe( ialu_mem_reg ); 6178 %} 6179 6180 // Store Integer 6181 instruct storeI(memory mem, rRegI src) %{ 6182 match(Set mem (StoreI mem src)); 6183 6184 ins_cost(125); 6185 format %{ "MOV $mem,$src" %} 6186 opcode(0x89); 6187 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6188 ins_pipe( ialu_mem_reg ); 6189 %} 6190 6191 // Store Long 6192 instruct storeL(long_memory mem, eRegL src) %{ 6193 predicate(!((StoreLNode*)n)->require_atomic_access()); 6194 match(Set mem (StoreL mem src)); 6195 6196 ins_cost(200); 6197 format %{ "MOV $mem,$src.lo\n\t" 6198 "MOV $mem+4,$src.hi" %} 6199 opcode(0x89, 0x89); 6200 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark ); 6201 ins_pipe( ialu_mem_long_reg ); 6202 %} 6203 6204 // Store Long to Integer 6205 instruct storeL2I(memory mem, eRegL src) %{ 6206 match(Set mem (StoreI mem (ConvL2I src))); 6207 6208 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6209 ins_encode %{ 6210 __ movl($mem$$Address, $src$$Register); 6211 %} 6212 ins_pipe(ialu_mem_reg); 6213 %} 6214 6215 // Volatile Store Long. Must be atomic, so move it into 6216 // the FP TOS and then do a 64-bit FIST. Has to probe the 6217 // target address before the store (for null-ptr checks) 6218 // so the memory operand is used twice in the encoding. 6219 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6220 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6221 match(Set mem (StoreL mem src)); 6222 effect( KILL cr ); 6223 ins_cost(400); 6224 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6225 "FILD $src\n\t" 6226 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6227 opcode(0x3B); 6228 ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark); 6229 ins_pipe( fpu_reg_mem ); 6230 %} 6231 6232 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6233 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6234 match(Set mem (StoreL mem src)); 6235 effect( TEMP tmp, KILL cr ); 6236 ins_cost(380); 6237 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6238 "MOVSD $tmp,$src\n\t" 6239 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6240 ins_encode %{ 6241 __ cmpl(rax, $mem$$Address); 6242 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6243 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6244 %} 6245 ins_pipe( pipe_slow ); 6246 %} 6247 6248 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6249 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6250 match(Set mem (StoreL mem src)); 6251 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6252 ins_cost(360); 6253 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6254 "MOVD $tmp,$src.lo\n\t" 6255 "MOVD $tmp2,$src.hi\n\t" 6256 "PUNPCKLDQ $tmp,$tmp2\n\t" 6257 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6258 ins_encode %{ 6259 __ cmpl(rax, $mem$$Address); 6260 __ movdl($tmp$$XMMRegister, $src$$Register); 6261 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6262 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6263 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6264 %} 6265 ins_pipe( pipe_slow ); 6266 %} 6267 6268 // Store Pointer; for storing unknown oops and raw pointers 6269 instruct storeP(memory mem, anyRegP src) %{ 6270 match(Set mem (StoreP mem src)); 6271 6272 ins_cost(125); 6273 format %{ "MOV $mem,$src" %} 6274 opcode(0x89); 6275 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6276 ins_pipe( ialu_mem_reg ); 6277 %} 6278 6279 // Store Integer Immediate 6280 instruct storeImmI(memory mem, immI src) %{ 6281 match(Set mem (StoreI mem src)); 6282 6283 ins_cost(150); 6284 format %{ "MOV $mem,$src" %} 6285 opcode(0xC7); /* C7 /0 */ 6286 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark); 6287 ins_pipe( ialu_mem_imm ); 6288 %} 6289 6290 // Store Short/Char Immediate 6291 instruct storeImmI16(memory mem, immI16 src) %{ 6292 predicate(UseStoreImmI16); 6293 match(Set mem (StoreC mem src)); 6294 6295 ins_cost(150); 6296 format %{ "MOV16 $mem,$src" %} 6297 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6298 ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark); 6299 ins_pipe( ialu_mem_imm ); 6300 %} 6301 6302 // Store Pointer Immediate; null pointers or constant oops that do not 6303 // need card-mark barriers. 6304 instruct storeImmP(memory mem, immP src) %{ 6305 match(Set mem (StoreP mem src)); 6306 6307 ins_cost(150); 6308 format %{ "MOV $mem,$src" %} 6309 opcode(0xC7); /* C7 /0 */ 6310 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark); 6311 ins_pipe( ialu_mem_imm ); 6312 %} 6313 6314 // Store Byte Immediate 6315 instruct storeImmB(memory mem, immI8 src) %{ 6316 match(Set mem (StoreB mem src)); 6317 6318 ins_cost(150); 6319 format %{ "MOV8 $mem,$src" %} 6320 opcode(0xC6); /* C6 /0 */ 6321 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark); 6322 ins_pipe( ialu_mem_imm ); 6323 %} 6324 6325 // Store CMS card-mark Immediate 6326 instruct storeImmCM(memory mem, immI8 src) %{ 6327 match(Set mem (StoreCM mem src)); 6328 6329 ins_cost(150); 6330 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6331 opcode(0xC6); /* C6 /0 */ 6332 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark); 6333 ins_pipe( ialu_mem_imm ); 6334 %} 6335 6336 // Store Double 6337 instruct storeDPR( memory mem, regDPR1 src) %{ 6338 predicate(UseSSE<=1); 6339 match(Set mem (StoreD mem src)); 6340 6341 ins_cost(100); 6342 format %{ "FST_D $mem,$src" %} 6343 opcode(0xDD); /* DD /2 */ 6344 ins_encode( enc_FPR_store(mem,src) ); 6345 ins_pipe( fpu_mem_reg ); 6346 %} 6347 6348 // Store double does rounding on x86 6349 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6350 predicate(UseSSE<=1); 6351 match(Set mem (StoreD mem (RoundDouble src))); 6352 6353 ins_cost(100); 6354 format %{ "FST_D $mem,$src\t# round" %} 6355 opcode(0xDD); /* DD /2 */ 6356 ins_encode( enc_FPR_store(mem,src) ); 6357 ins_pipe( fpu_mem_reg ); 6358 %} 6359 6360 // Store XMM register to memory (double-precision floating points) 6361 // MOVSD instruction 6362 instruct storeD(memory mem, regD src) %{ 6363 predicate(UseSSE>=2); 6364 match(Set mem (StoreD mem src)); 6365 ins_cost(95); 6366 format %{ "MOVSD $mem,$src" %} 6367 ins_encode %{ 6368 __ movdbl($mem$$Address, $src$$XMMRegister); 6369 %} 6370 ins_pipe( pipe_slow ); 6371 %} 6372 6373 // Store XMM register to memory (single-precision floating point) 6374 // MOVSS instruction 6375 instruct storeF(memory mem, regF src) %{ 6376 predicate(UseSSE>=1); 6377 match(Set mem (StoreF mem src)); 6378 ins_cost(95); 6379 format %{ "MOVSS $mem,$src" %} 6380 ins_encode %{ 6381 __ movflt($mem$$Address, $src$$XMMRegister); 6382 %} 6383 ins_pipe( pipe_slow ); 6384 %} 6385 6386 6387 // Store Float 6388 instruct storeFPR( memory mem, regFPR1 src) %{ 6389 predicate(UseSSE==0); 6390 match(Set mem (StoreF mem src)); 6391 6392 ins_cost(100); 6393 format %{ "FST_S $mem,$src" %} 6394 opcode(0xD9); /* D9 /2 */ 6395 ins_encode( enc_FPR_store(mem,src) ); 6396 ins_pipe( fpu_mem_reg ); 6397 %} 6398 6399 // Store Float does rounding on x86 6400 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6401 predicate(UseSSE==0); 6402 match(Set mem (StoreF mem (RoundFloat src))); 6403 6404 ins_cost(100); 6405 format %{ "FST_S $mem,$src\t# round" %} 6406 opcode(0xD9); /* D9 /2 */ 6407 ins_encode( enc_FPR_store(mem,src) ); 6408 ins_pipe( fpu_mem_reg ); 6409 %} 6410 6411 // Store Float does rounding on x86 6412 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6413 predicate(UseSSE<=1); 6414 match(Set mem (StoreF mem (ConvD2F src))); 6415 6416 ins_cost(100); 6417 format %{ "FST_S $mem,$src\t# D-round" %} 6418 opcode(0xD9); /* D9 /2 */ 6419 ins_encode( enc_FPR_store(mem,src) ); 6420 ins_pipe( fpu_mem_reg ); 6421 %} 6422 6423 // Store immediate Float value (it is faster than store from FPU register) 6424 // The instruction usage is guarded by predicate in operand immFPR(). 6425 instruct storeFPR_imm( memory mem, immFPR src) %{ 6426 match(Set mem (StoreF mem src)); 6427 6428 ins_cost(50); 6429 format %{ "MOV $mem,$src\t# store float" %} 6430 opcode(0xC7); /* C7 /0 */ 6431 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits(src), ClearInstMark); 6432 ins_pipe( ialu_mem_imm ); 6433 %} 6434 6435 // Store immediate Float value (it is faster than store from XMM register) 6436 // The instruction usage is guarded by predicate in operand immF(). 6437 instruct storeF_imm( memory mem, immF src) %{ 6438 match(Set mem (StoreF mem src)); 6439 6440 ins_cost(50); 6441 format %{ "MOV $mem,$src\t# store float" %} 6442 opcode(0xC7); /* C7 /0 */ 6443 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits(src), ClearInstMark); 6444 ins_pipe( ialu_mem_imm ); 6445 %} 6446 6447 // Store Integer to stack slot 6448 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6449 match(Set dst src); 6450 6451 ins_cost(100); 6452 format %{ "MOV $dst,$src" %} 6453 opcode(0x89); 6454 ins_encode( OpcPRegSS( dst, src ) ); 6455 ins_pipe( ialu_mem_reg ); 6456 %} 6457 6458 // Store Integer to stack slot 6459 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6460 match(Set dst src); 6461 6462 ins_cost(100); 6463 format %{ "MOV $dst,$src" %} 6464 opcode(0x89); 6465 ins_encode( OpcPRegSS( dst, src ) ); 6466 ins_pipe( ialu_mem_reg ); 6467 %} 6468 6469 // Store Long to stack slot 6470 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6471 match(Set dst src); 6472 6473 ins_cost(200); 6474 format %{ "MOV $dst,$src.lo\n\t" 6475 "MOV $dst+4,$src.hi" %} 6476 opcode(0x89, 0x89); 6477 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); 6478 ins_pipe( ialu_mem_long_reg ); 6479 %} 6480 6481 //----------MemBar Instructions----------------------------------------------- 6482 // Memory barrier flavors 6483 6484 instruct membar_acquire() %{ 6485 match(MemBarAcquire); 6486 match(LoadFence); 6487 ins_cost(400); 6488 6489 size(0); 6490 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6491 ins_encode(); 6492 ins_pipe(empty); 6493 %} 6494 6495 instruct membar_acquire_lock() %{ 6496 match(MemBarAcquireLock); 6497 ins_cost(0); 6498 6499 size(0); 6500 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6501 ins_encode( ); 6502 ins_pipe(empty); 6503 %} 6504 6505 instruct membar_release() %{ 6506 match(MemBarRelease); 6507 match(StoreFence); 6508 ins_cost(400); 6509 6510 size(0); 6511 format %{ "MEMBAR-release ! (empty encoding)" %} 6512 ins_encode( ); 6513 ins_pipe(empty); 6514 %} 6515 6516 instruct membar_release_lock() %{ 6517 match(MemBarReleaseLock); 6518 ins_cost(0); 6519 6520 size(0); 6521 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6522 ins_encode( ); 6523 ins_pipe(empty); 6524 %} 6525 6526 instruct membar_volatile(eFlagsReg cr) %{ 6527 match(MemBarVolatile); 6528 effect(KILL cr); 6529 ins_cost(400); 6530 6531 format %{ 6532 $$template 6533 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6534 %} 6535 ins_encode %{ 6536 __ membar(Assembler::StoreLoad); 6537 %} 6538 ins_pipe(pipe_slow); 6539 %} 6540 6541 instruct unnecessary_membar_volatile() %{ 6542 match(MemBarVolatile); 6543 predicate(Matcher::post_store_load_barrier(n)); 6544 ins_cost(0); 6545 6546 size(0); 6547 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6548 ins_encode( ); 6549 ins_pipe(empty); 6550 %} 6551 6552 instruct membar_storestore() %{ 6553 match(MemBarStoreStore); 6554 match(StoreStoreFence); 6555 ins_cost(0); 6556 6557 size(0); 6558 format %{ "MEMBAR-storestore (empty encoding)" %} 6559 ins_encode( ); 6560 ins_pipe(empty); 6561 %} 6562 6563 //----------Move Instructions-------------------------------------------------- 6564 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6565 match(Set dst (CastX2P src)); 6566 format %{ "# X2P $dst, $src" %} 6567 ins_encode( /*empty encoding*/ ); 6568 ins_cost(0); 6569 ins_pipe(empty); 6570 %} 6571 6572 instruct castP2X(rRegI dst, eRegP src ) %{ 6573 match(Set dst (CastP2X src)); 6574 ins_cost(50); 6575 format %{ "MOV $dst, $src\t# CastP2X" %} 6576 ins_encode( enc_Copy( dst, src) ); 6577 ins_pipe( ialu_reg_reg ); 6578 %} 6579 6580 //----------Conditional Move--------------------------------------------------- 6581 // Conditional move 6582 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6583 predicate(!VM_Version::supports_cmov() ); 6584 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6585 ins_cost(200); 6586 format %{ "J$cop,us skip\t# signed cmove\n\t" 6587 "MOV $dst,$src\n" 6588 "skip:" %} 6589 ins_encode %{ 6590 Label Lskip; 6591 // Invert sense of branch from sense of CMOV 6592 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6593 __ movl($dst$$Register, $src$$Register); 6594 __ bind(Lskip); 6595 %} 6596 ins_pipe( pipe_cmov_reg ); 6597 %} 6598 6599 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6600 predicate(!VM_Version::supports_cmov() ); 6601 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6602 ins_cost(200); 6603 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6604 "MOV $dst,$src\n" 6605 "skip:" %} 6606 ins_encode %{ 6607 Label Lskip; 6608 // Invert sense of branch from sense of CMOV 6609 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6610 __ movl($dst$$Register, $src$$Register); 6611 __ bind(Lskip); 6612 %} 6613 ins_pipe( pipe_cmov_reg ); 6614 %} 6615 6616 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6617 predicate(VM_Version::supports_cmov() ); 6618 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6619 ins_cost(200); 6620 format %{ "CMOV$cop $dst,$src" %} 6621 opcode(0x0F,0x40); 6622 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6623 ins_pipe( pipe_cmov_reg ); 6624 %} 6625 6626 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6627 predicate(VM_Version::supports_cmov() ); 6628 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6629 ins_cost(200); 6630 format %{ "CMOV$cop $dst,$src" %} 6631 opcode(0x0F,0x40); 6632 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6633 ins_pipe( pipe_cmov_reg ); 6634 %} 6635 6636 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6637 predicate(VM_Version::supports_cmov() ); 6638 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6639 ins_cost(200); 6640 expand %{ 6641 cmovI_regU(cop, cr, dst, src); 6642 %} 6643 %} 6644 6645 // Conditional move 6646 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6647 predicate(VM_Version::supports_cmov() ); 6648 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6649 ins_cost(250); 6650 format %{ "CMOV$cop $dst,$src" %} 6651 opcode(0x0F,0x40); 6652 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); 6653 ins_pipe( pipe_cmov_mem ); 6654 %} 6655 6656 // Conditional move 6657 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6658 predicate(VM_Version::supports_cmov() ); 6659 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6660 ins_cost(250); 6661 format %{ "CMOV$cop $dst,$src" %} 6662 opcode(0x0F,0x40); 6663 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); 6664 ins_pipe( pipe_cmov_mem ); 6665 %} 6666 6667 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6668 predicate(VM_Version::supports_cmov() ); 6669 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6670 ins_cost(250); 6671 expand %{ 6672 cmovI_memU(cop, cr, dst, src); 6673 %} 6674 %} 6675 6676 // Conditional move 6677 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6678 predicate(VM_Version::supports_cmov() ); 6679 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6680 ins_cost(200); 6681 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6682 opcode(0x0F,0x40); 6683 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6684 ins_pipe( pipe_cmov_reg ); 6685 %} 6686 6687 // Conditional move (non-P6 version) 6688 // Note: a CMoveP is generated for stubs and native wrappers 6689 // regardless of whether we are on a P6, so we 6690 // emulate a cmov here 6691 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6692 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6693 ins_cost(300); 6694 format %{ "Jn$cop skip\n\t" 6695 "MOV $dst,$src\t# pointer\n" 6696 "skip:" %} 6697 opcode(0x8b); 6698 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6699 ins_pipe( pipe_cmov_reg ); 6700 %} 6701 6702 // Conditional move 6703 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6704 predicate(VM_Version::supports_cmov() ); 6705 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6706 ins_cost(200); 6707 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6708 opcode(0x0F,0x40); 6709 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6710 ins_pipe( pipe_cmov_reg ); 6711 %} 6712 6713 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6714 predicate(VM_Version::supports_cmov() ); 6715 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6716 ins_cost(200); 6717 expand %{ 6718 cmovP_regU(cop, cr, dst, src); 6719 %} 6720 %} 6721 6722 // DISABLED: Requires the ADLC to emit a bottom_type call that 6723 // correctly meets the two pointer arguments; one is an incoming 6724 // register but the other is a memory operand. ALSO appears to 6725 // be buggy with implicit null checks. 6726 // 6727 //// Conditional move 6728 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6729 // predicate(VM_Version::supports_cmov() ); 6730 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6731 // ins_cost(250); 6732 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6733 // opcode(0x0F,0x40); 6734 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6735 // ins_pipe( pipe_cmov_mem ); 6736 //%} 6737 // 6738 //// Conditional move 6739 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6740 // predicate(VM_Version::supports_cmov() ); 6741 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6742 // ins_cost(250); 6743 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6744 // opcode(0x0F,0x40); 6745 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6746 // ins_pipe( pipe_cmov_mem ); 6747 //%} 6748 6749 // Conditional move 6750 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6751 predicate(UseSSE<=1); 6752 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6753 ins_cost(200); 6754 format %{ "FCMOV$cop $dst,$src\t# double" %} 6755 opcode(0xDA); 6756 ins_encode( enc_cmov_dpr(cop,src) ); 6757 ins_pipe( pipe_cmovDPR_reg ); 6758 %} 6759 6760 // Conditional move 6761 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6762 predicate(UseSSE==0); 6763 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6764 ins_cost(200); 6765 format %{ "FCMOV$cop $dst,$src\t# float" %} 6766 opcode(0xDA); 6767 ins_encode( enc_cmov_dpr(cop,src) ); 6768 ins_pipe( pipe_cmovDPR_reg ); 6769 %} 6770 6771 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6772 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6773 predicate(UseSSE<=1); 6774 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6775 ins_cost(200); 6776 format %{ "Jn$cop skip\n\t" 6777 "MOV $dst,$src\t# double\n" 6778 "skip:" %} 6779 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6780 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6781 ins_pipe( pipe_cmovDPR_reg ); 6782 %} 6783 6784 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6785 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6786 predicate(UseSSE==0); 6787 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6788 ins_cost(200); 6789 format %{ "Jn$cop skip\n\t" 6790 "MOV $dst,$src\t# float\n" 6791 "skip:" %} 6792 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6793 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6794 ins_pipe( pipe_cmovDPR_reg ); 6795 %} 6796 6797 // No CMOVE with SSE/SSE2 6798 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6799 predicate (UseSSE>=1); 6800 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6801 ins_cost(200); 6802 format %{ "Jn$cop skip\n\t" 6803 "MOVSS $dst,$src\t# float\n" 6804 "skip:" %} 6805 ins_encode %{ 6806 Label skip; 6807 // Invert sense of branch from sense of CMOV 6808 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6809 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6810 __ bind(skip); 6811 %} 6812 ins_pipe( pipe_slow ); 6813 %} 6814 6815 // No CMOVE with SSE/SSE2 6816 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6817 predicate (UseSSE>=2); 6818 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6819 ins_cost(200); 6820 format %{ "Jn$cop skip\n\t" 6821 "MOVSD $dst,$src\t# float\n" 6822 "skip:" %} 6823 ins_encode %{ 6824 Label skip; 6825 // Invert sense of branch from sense of CMOV 6826 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6827 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6828 __ bind(skip); 6829 %} 6830 ins_pipe( pipe_slow ); 6831 %} 6832 6833 // unsigned version 6834 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6835 predicate (UseSSE>=1); 6836 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6837 ins_cost(200); 6838 format %{ "Jn$cop skip\n\t" 6839 "MOVSS $dst,$src\t# float\n" 6840 "skip:" %} 6841 ins_encode %{ 6842 Label skip; 6843 // Invert sense of branch from sense of CMOV 6844 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6845 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6846 __ bind(skip); 6847 %} 6848 ins_pipe( pipe_slow ); 6849 %} 6850 6851 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6852 predicate (UseSSE>=1); 6853 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6854 ins_cost(200); 6855 expand %{ 6856 fcmovF_regU(cop, cr, dst, src); 6857 %} 6858 %} 6859 6860 // unsigned version 6861 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6862 predicate (UseSSE>=2); 6863 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6864 ins_cost(200); 6865 format %{ "Jn$cop skip\n\t" 6866 "MOVSD $dst,$src\t# float\n" 6867 "skip:" %} 6868 ins_encode %{ 6869 Label skip; 6870 // Invert sense of branch from sense of CMOV 6871 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6872 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6873 __ bind(skip); 6874 %} 6875 ins_pipe( pipe_slow ); 6876 %} 6877 6878 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6879 predicate (UseSSE>=2); 6880 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6881 ins_cost(200); 6882 expand %{ 6883 fcmovD_regU(cop, cr, dst, src); 6884 %} 6885 %} 6886 6887 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6888 predicate(VM_Version::supports_cmov() ); 6889 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6890 ins_cost(200); 6891 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6892 "CMOV$cop $dst.hi,$src.hi" %} 6893 opcode(0x0F,0x40); 6894 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6895 ins_pipe( pipe_cmov_reg_long ); 6896 %} 6897 6898 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6899 predicate(VM_Version::supports_cmov() ); 6900 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6901 ins_cost(200); 6902 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6903 "CMOV$cop $dst.hi,$src.hi" %} 6904 opcode(0x0F,0x40); 6905 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6906 ins_pipe( pipe_cmov_reg_long ); 6907 %} 6908 6909 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6910 predicate(VM_Version::supports_cmov() ); 6911 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6912 ins_cost(200); 6913 expand %{ 6914 cmovL_regU(cop, cr, dst, src); 6915 %} 6916 %} 6917 6918 //----------Arithmetic Instructions-------------------------------------------- 6919 //----------Addition Instructions---------------------------------------------- 6920 6921 // Integer Addition Instructions 6922 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 6923 match(Set dst (AddI dst src)); 6924 effect(KILL cr); 6925 6926 size(2); 6927 format %{ "ADD $dst,$src" %} 6928 opcode(0x03); 6929 ins_encode( OpcP, RegReg( dst, src) ); 6930 ins_pipe( ialu_reg_reg ); 6931 %} 6932 6933 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 6934 match(Set dst (AddI dst src)); 6935 effect(KILL cr); 6936 6937 format %{ "ADD $dst,$src" %} 6938 opcode(0x81, 0x00); /* /0 id */ 6939 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 6940 ins_pipe( ialu_reg ); 6941 %} 6942 6943 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 6944 predicate(UseIncDec); 6945 match(Set dst (AddI dst src)); 6946 effect(KILL cr); 6947 6948 size(1); 6949 format %{ "INC $dst" %} 6950 opcode(0x40); /* */ 6951 ins_encode( Opc_plus( primary, dst ) ); 6952 ins_pipe( ialu_reg ); 6953 %} 6954 6955 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 6956 match(Set dst (AddI src0 src1)); 6957 ins_cost(110); 6958 6959 format %{ "LEA $dst,[$src0 + $src1]" %} 6960 opcode(0x8D); /* 0x8D /r */ 6961 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); 6962 ins_pipe( ialu_reg_reg ); 6963 %} 6964 6965 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 6966 match(Set dst (AddP src0 src1)); 6967 ins_cost(110); 6968 6969 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 6970 opcode(0x8D); /* 0x8D /r */ 6971 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); 6972 ins_pipe( ialu_reg_reg ); 6973 %} 6974 6975 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 6976 predicate(UseIncDec); 6977 match(Set dst (AddI dst src)); 6978 effect(KILL cr); 6979 6980 size(1); 6981 format %{ "DEC $dst" %} 6982 opcode(0x48); /* */ 6983 ins_encode( Opc_plus( primary, dst ) ); 6984 ins_pipe( ialu_reg ); 6985 %} 6986 6987 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 6988 match(Set dst (AddP dst src)); 6989 effect(KILL cr); 6990 6991 size(2); 6992 format %{ "ADD $dst,$src" %} 6993 opcode(0x03); 6994 ins_encode( OpcP, RegReg( dst, src) ); 6995 ins_pipe( ialu_reg_reg ); 6996 %} 6997 6998 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 6999 match(Set dst (AddP dst src)); 7000 effect(KILL cr); 7001 7002 format %{ "ADD $dst,$src" %} 7003 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7004 // ins_encode( RegImm( dst, src) ); 7005 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7006 ins_pipe( ialu_reg ); 7007 %} 7008 7009 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7010 match(Set dst (AddI dst (LoadI src))); 7011 effect(KILL cr); 7012 7013 ins_cost(150); 7014 format %{ "ADD $dst,$src" %} 7015 opcode(0x03); 7016 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 7017 ins_pipe( ialu_reg_mem ); 7018 %} 7019 7020 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7021 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7022 effect(KILL cr); 7023 7024 ins_cost(150); 7025 format %{ "ADD $dst,$src" %} 7026 opcode(0x01); /* Opcode 01 /r */ 7027 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 7028 ins_pipe( ialu_mem_reg ); 7029 %} 7030 7031 // Add Memory with Immediate 7032 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7033 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7034 effect(KILL cr); 7035 7036 ins_cost(125); 7037 format %{ "ADD $dst,$src" %} 7038 opcode(0x81); /* Opcode 81 /0 id */ 7039 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark ); 7040 ins_pipe( ialu_mem_imm ); 7041 %} 7042 7043 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7044 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7045 effect(KILL cr); 7046 7047 ins_cost(125); 7048 format %{ "INC $dst" %} 7049 opcode(0xFF); /* Opcode FF /0 */ 7050 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark); 7051 ins_pipe( ialu_mem_imm ); 7052 %} 7053 7054 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7055 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7056 effect(KILL cr); 7057 7058 ins_cost(125); 7059 format %{ "DEC $dst" %} 7060 opcode(0xFF); /* Opcode FF /1 */ 7061 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark); 7062 ins_pipe( ialu_mem_imm ); 7063 %} 7064 7065 7066 instruct checkCastPP( eRegP dst ) %{ 7067 match(Set dst (CheckCastPP dst)); 7068 7069 size(0); 7070 format %{ "#checkcastPP of $dst" %} 7071 ins_encode( /*empty encoding*/ ); 7072 ins_pipe( empty ); 7073 %} 7074 7075 instruct castPP( eRegP dst ) %{ 7076 match(Set dst (CastPP dst)); 7077 format %{ "#castPP of $dst" %} 7078 ins_encode( /*empty encoding*/ ); 7079 ins_pipe( empty ); 7080 %} 7081 7082 instruct castII( rRegI dst ) %{ 7083 match(Set dst (CastII dst)); 7084 format %{ "#castII of $dst" %} 7085 ins_encode( /*empty encoding*/ ); 7086 ins_cost(0); 7087 ins_pipe( empty ); 7088 %} 7089 7090 instruct castLL( eRegL dst ) %{ 7091 match(Set dst (CastLL dst)); 7092 format %{ "#castLL of $dst" %} 7093 ins_encode( /*empty encoding*/ ); 7094 ins_cost(0); 7095 ins_pipe( empty ); 7096 %} 7097 7098 instruct castFF( regF dst ) %{ 7099 predicate(UseSSE >= 1); 7100 match(Set dst (CastFF dst)); 7101 format %{ "#castFF of $dst" %} 7102 ins_encode( /*empty encoding*/ ); 7103 ins_cost(0); 7104 ins_pipe( empty ); 7105 %} 7106 7107 instruct castDD( regD dst ) %{ 7108 predicate(UseSSE >= 2); 7109 match(Set dst (CastDD dst)); 7110 format %{ "#castDD of $dst" %} 7111 ins_encode( /*empty encoding*/ ); 7112 ins_cost(0); 7113 ins_pipe( empty ); 7114 %} 7115 7116 instruct castFF_PR( regFPR dst ) %{ 7117 predicate(UseSSE < 1); 7118 match(Set dst (CastFF dst)); 7119 format %{ "#castFF of $dst" %} 7120 ins_encode( /*empty encoding*/ ); 7121 ins_cost(0); 7122 ins_pipe( empty ); 7123 %} 7124 7125 instruct castDD_PR( regDPR dst ) %{ 7126 predicate(UseSSE < 2); 7127 match(Set dst (CastDD dst)); 7128 format %{ "#castDD of $dst" %} 7129 ins_encode( /*empty encoding*/ ); 7130 ins_cost(0); 7131 ins_pipe( empty ); 7132 %} 7133 7134 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7135 7136 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7137 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7138 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7139 effect(KILL cr, KILL oldval); 7140 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7141 "MOV $res,0\n\t" 7142 "JNE,s fail\n\t" 7143 "MOV $res,1\n" 7144 "fail:" %} 7145 ins_encode( enc_cmpxchg8(mem_ptr), 7146 enc_flags_ne_to_boolean(res) ); 7147 ins_pipe( pipe_cmpxchg ); 7148 %} 7149 7150 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7151 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7152 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7153 effect(KILL cr, KILL oldval); 7154 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7155 "MOV $res,0\n\t" 7156 "JNE,s fail\n\t" 7157 "MOV $res,1\n" 7158 "fail:" %} 7159 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7160 ins_pipe( pipe_cmpxchg ); 7161 %} 7162 7163 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7164 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7165 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7166 effect(KILL cr, KILL oldval); 7167 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7168 "MOV $res,0\n\t" 7169 "JNE,s fail\n\t" 7170 "MOV $res,1\n" 7171 "fail:" %} 7172 ins_encode( enc_cmpxchgb(mem_ptr), 7173 enc_flags_ne_to_boolean(res) ); 7174 ins_pipe( pipe_cmpxchg ); 7175 %} 7176 7177 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7178 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7179 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7180 effect(KILL cr, KILL oldval); 7181 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7182 "MOV $res,0\n\t" 7183 "JNE,s fail\n\t" 7184 "MOV $res,1\n" 7185 "fail:" %} 7186 ins_encode( enc_cmpxchgw(mem_ptr), 7187 enc_flags_ne_to_boolean(res) ); 7188 ins_pipe( pipe_cmpxchg ); 7189 %} 7190 7191 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7192 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7193 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7194 effect(KILL cr, KILL oldval); 7195 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7196 "MOV $res,0\n\t" 7197 "JNE,s fail\n\t" 7198 "MOV $res,1\n" 7199 "fail:" %} 7200 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7201 ins_pipe( pipe_cmpxchg ); 7202 %} 7203 7204 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7205 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7206 effect(KILL cr); 7207 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7208 ins_encode( enc_cmpxchg8(mem_ptr) ); 7209 ins_pipe( pipe_cmpxchg ); 7210 %} 7211 7212 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7213 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7214 effect(KILL cr); 7215 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7216 ins_encode( enc_cmpxchg(mem_ptr) ); 7217 ins_pipe( pipe_cmpxchg ); 7218 %} 7219 7220 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7221 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7222 effect(KILL cr); 7223 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7224 ins_encode( enc_cmpxchgb(mem_ptr) ); 7225 ins_pipe( pipe_cmpxchg ); 7226 %} 7227 7228 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7229 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7230 effect(KILL cr); 7231 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7232 ins_encode( enc_cmpxchgw(mem_ptr) ); 7233 ins_pipe( pipe_cmpxchg ); 7234 %} 7235 7236 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7237 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7238 effect(KILL cr); 7239 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7240 ins_encode( enc_cmpxchg(mem_ptr) ); 7241 ins_pipe( pipe_cmpxchg ); 7242 %} 7243 7244 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7245 predicate(n->as_LoadStore()->result_not_used()); 7246 match(Set dummy (GetAndAddB mem add)); 7247 effect(KILL cr); 7248 format %{ "ADDB [$mem],$add" %} 7249 ins_encode %{ 7250 __ lock(); 7251 __ addb($mem$$Address, $add$$constant); 7252 %} 7253 ins_pipe( pipe_cmpxchg ); 7254 %} 7255 7256 // Important to match to xRegI: only 8-bit regs. 7257 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7258 match(Set newval (GetAndAddB mem newval)); 7259 effect(KILL cr); 7260 format %{ "XADDB [$mem],$newval" %} 7261 ins_encode %{ 7262 __ lock(); 7263 __ xaddb($mem$$Address, $newval$$Register); 7264 %} 7265 ins_pipe( pipe_cmpxchg ); 7266 %} 7267 7268 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7269 predicate(n->as_LoadStore()->result_not_used()); 7270 match(Set dummy (GetAndAddS mem add)); 7271 effect(KILL cr); 7272 format %{ "ADDS [$mem],$add" %} 7273 ins_encode %{ 7274 __ lock(); 7275 __ addw($mem$$Address, $add$$constant); 7276 %} 7277 ins_pipe( pipe_cmpxchg ); 7278 %} 7279 7280 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7281 match(Set newval (GetAndAddS mem newval)); 7282 effect(KILL cr); 7283 format %{ "XADDS [$mem],$newval" %} 7284 ins_encode %{ 7285 __ lock(); 7286 __ xaddw($mem$$Address, $newval$$Register); 7287 %} 7288 ins_pipe( pipe_cmpxchg ); 7289 %} 7290 7291 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7292 predicate(n->as_LoadStore()->result_not_used()); 7293 match(Set dummy (GetAndAddI mem add)); 7294 effect(KILL cr); 7295 format %{ "ADDL [$mem],$add" %} 7296 ins_encode %{ 7297 __ lock(); 7298 __ addl($mem$$Address, $add$$constant); 7299 %} 7300 ins_pipe( pipe_cmpxchg ); 7301 %} 7302 7303 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7304 match(Set newval (GetAndAddI mem newval)); 7305 effect(KILL cr); 7306 format %{ "XADDL [$mem],$newval" %} 7307 ins_encode %{ 7308 __ lock(); 7309 __ xaddl($mem$$Address, $newval$$Register); 7310 %} 7311 ins_pipe( pipe_cmpxchg ); 7312 %} 7313 7314 // Important to match to xRegI: only 8-bit regs. 7315 instruct xchgB( memory mem, xRegI newval) %{ 7316 match(Set newval (GetAndSetB mem newval)); 7317 format %{ "XCHGB $newval,[$mem]" %} 7318 ins_encode %{ 7319 __ xchgb($newval$$Register, $mem$$Address); 7320 %} 7321 ins_pipe( pipe_cmpxchg ); 7322 %} 7323 7324 instruct xchgS( memory mem, rRegI newval) %{ 7325 match(Set newval (GetAndSetS mem newval)); 7326 format %{ "XCHGW $newval,[$mem]" %} 7327 ins_encode %{ 7328 __ xchgw($newval$$Register, $mem$$Address); 7329 %} 7330 ins_pipe( pipe_cmpxchg ); 7331 %} 7332 7333 instruct xchgI( memory mem, rRegI newval) %{ 7334 match(Set newval (GetAndSetI mem newval)); 7335 format %{ "XCHGL $newval,[$mem]" %} 7336 ins_encode %{ 7337 __ xchgl($newval$$Register, $mem$$Address); 7338 %} 7339 ins_pipe( pipe_cmpxchg ); 7340 %} 7341 7342 instruct xchgP( memory mem, pRegP newval) %{ 7343 match(Set newval (GetAndSetP mem newval)); 7344 format %{ "XCHGL $newval,[$mem]" %} 7345 ins_encode %{ 7346 __ xchgl($newval$$Register, $mem$$Address); 7347 %} 7348 ins_pipe( pipe_cmpxchg ); 7349 %} 7350 7351 //----------Subtraction Instructions------------------------------------------- 7352 7353 // Integer Subtraction Instructions 7354 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7355 match(Set dst (SubI dst src)); 7356 effect(KILL cr); 7357 7358 size(2); 7359 format %{ "SUB $dst,$src" %} 7360 opcode(0x2B); 7361 ins_encode( OpcP, RegReg( dst, src) ); 7362 ins_pipe( ialu_reg_reg ); 7363 %} 7364 7365 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7366 match(Set dst (SubI dst src)); 7367 effect(KILL cr); 7368 7369 format %{ "SUB $dst,$src" %} 7370 opcode(0x81,0x05); /* Opcode 81 /5 */ 7371 // ins_encode( RegImm( dst, src) ); 7372 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7373 ins_pipe( ialu_reg ); 7374 %} 7375 7376 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7377 match(Set dst (SubI dst (LoadI src))); 7378 effect(KILL cr); 7379 7380 ins_cost(150); 7381 format %{ "SUB $dst,$src" %} 7382 opcode(0x2B); 7383 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 7384 ins_pipe( ialu_reg_mem ); 7385 %} 7386 7387 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7388 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7389 effect(KILL cr); 7390 7391 ins_cost(150); 7392 format %{ "SUB $dst,$src" %} 7393 opcode(0x29); /* Opcode 29 /r */ 7394 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 7395 ins_pipe( ialu_mem_reg ); 7396 %} 7397 7398 // Subtract from a pointer 7399 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7400 match(Set dst (AddP dst (SubI zero src))); 7401 effect(KILL cr); 7402 7403 size(2); 7404 format %{ "SUB $dst,$src" %} 7405 opcode(0x2B); 7406 ins_encode( OpcP, RegReg( dst, src) ); 7407 ins_pipe( ialu_reg_reg ); 7408 %} 7409 7410 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7411 match(Set dst (SubI zero dst)); 7412 effect(KILL cr); 7413 7414 size(2); 7415 format %{ "NEG $dst" %} 7416 opcode(0xF7,0x03); // Opcode F7 /3 7417 ins_encode( OpcP, RegOpc( dst ) ); 7418 ins_pipe( ialu_reg ); 7419 %} 7420 7421 //----------Multiplication/Division Instructions------------------------------- 7422 // Integer Multiplication Instructions 7423 // Multiply Register 7424 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7425 match(Set dst (MulI dst src)); 7426 effect(KILL cr); 7427 7428 size(3); 7429 ins_cost(300); 7430 format %{ "IMUL $dst,$src" %} 7431 opcode(0xAF, 0x0F); 7432 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7433 ins_pipe( ialu_reg_reg_alu0 ); 7434 %} 7435 7436 // Multiply 32-bit Immediate 7437 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7438 match(Set dst (MulI src imm)); 7439 effect(KILL cr); 7440 7441 ins_cost(300); 7442 format %{ "IMUL $dst,$src,$imm" %} 7443 opcode(0x69); /* 69 /r id */ 7444 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7445 ins_pipe( ialu_reg_reg_alu0 ); 7446 %} 7447 7448 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7449 match(Set dst src); 7450 effect(KILL cr); 7451 7452 // Note that this is artificially increased to make it more expensive than loadConL 7453 ins_cost(250); 7454 format %{ "MOV EAX,$src\t// low word only" %} 7455 opcode(0xB8); 7456 ins_encode( LdImmL_Lo(dst, src) ); 7457 ins_pipe( ialu_reg_fat ); 7458 %} 7459 7460 // Multiply by 32-bit Immediate, taking the shifted high order results 7461 // (special case for shift by 32) 7462 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7463 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7464 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7465 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7466 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7467 effect(USE src1, KILL cr); 7468 7469 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7470 ins_cost(0*100 + 1*400 - 150); 7471 format %{ "IMUL EDX:EAX,$src1" %} 7472 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7473 ins_pipe( pipe_slow ); 7474 %} 7475 7476 // Multiply by 32-bit Immediate, taking the shifted high order results 7477 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7478 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7479 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7480 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7481 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7482 effect(USE src1, KILL cr); 7483 7484 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7485 ins_cost(1*100 + 1*400 - 150); 7486 format %{ "IMUL EDX:EAX,$src1\n\t" 7487 "SAR EDX,$cnt-32" %} 7488 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7489 ins_pipe( pipe_slow ); 7490 %} 7491 7492 // Multiply Memory 32-bit Immediate 7493 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7494 match(Set dst (MulI (LoadI src) imm)); 7495 effect(KILL cr); 7496 7497 ins_cost(300); 7498 format %{ "IMUL $dst,$src,$imm" %} 7499 opcode(0x69); /* 69 /r id */ 7500 ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark ); 7501 ins_pipe( ialu_reg_mem_alu0 ); 7502 %} 7503 7504 // Multiply Memory 7505 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7506 match(Set dst (MulI dst (LoadI src))); 7507 effect(KILL cr); 7508 7509 ins_cost(350); 7510 format %{ "IMUL $dst,$src" %} 7511 opcode(0xAF, 0x0F); 7512 ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark ); 7513 ins_pipe( ialu_reg_mem_alu0 ); 7514 %} 7515 7516 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7517 %{ 7518 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7519 effect(KILL cr, KILL src2); 7520 7521 expand %{ mulI_eReg(dst, src1, cr); 7522 mulI_eReg(src2, src3, cr); 7523 addI_eReg(dst, src2, cr); %} 7524 %} 7525 7526 // Multiply Register Int to Long 7527 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7528 // Basic Idea: long = (long)int * (long)int 7529 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7530 effect(DEF dst, USE src, USE src1, KILL flags); 7531 7532 ins_cost(300); 7533 format %{ "IMUL $dst,$src1" %} 7534 7535 ins_encode( long_int_multiply( dst, src1 ) ); 7536 ins_pipe( ialu_reg_reg_alu0 ); 7537 %} 7538 7539 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7540 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7541 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7542 effect(KILL flags); 7543 7544 ins_cost(300); 7545 format %{ "MUL $dst,$src1" %} 7546 7547 ins_encode( long_uint_multiply(dst, src1) ); 7548 ins_pipe( ialu_reg_reg_alu0 ); 7549 %} 7550 7551 // Multiply Register Long 7552 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7553 match(Set dst (MulL dst src)); 7554 effect(KILL cr, TEMP tmp); 7555 ins_cost(4*100+3*400); 7556 // Basic idea: lo(result) = lo(x_lo * y_lo) 7557 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7558 format %{ "MOV $tmp,$src.lo\n\t" 7559 "IMUL $tmp,EDX\n\t" 7560 "MOV EDX,$src.hi\n\t" 7561 "IMUL EDX,EAX\n\t" 7562 "ADD $tmp,EDX\n\t" 7563 "MUL EDX:EAX,$src.lo\n\t" 7564 "ADD EDX,$tmp" %} 7565 ins_encode( long_multiply( dst, src, tmp ) ); 7566 ins_pipe( pipe_slow ); 7567 %} 7568 7569 // Multiply Register Long where the left operand's high 32 bits are zero 7570 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7571 predicate(is_operand_hi32_zero(n->in(1))); 7572 match(Set dst (MulL dst src)); 7573 effect(KILL cr, TEMP tmp); 7574 ins_cost(2*100+2*400); 7575 // Basic idea: lo(result) = lo(x_lo * y_lo) 7576 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7577 format %{ "MOV $tmp,$src.hi\n\t" 7578 "IMUL $tmp,EAX\n\t" 7579 "MUL EDX:EAX,$src.lo\n\t" 7580 "ADD EDX,$tmp" %} 7581 ins_encode %{ 7582 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7583 __ imull($tmp$$Register, rax); 7584 __ mull($src$$Register); 7585 __ addl(rdx, $tmp$$Register); 7586 %} 7587 ins_pipe( pipe_slow ); 7588 %} 7589 7590 // Multiply Register Long where the right operand's high 32 bits are zero 7591 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7592 predicate(is_operand_hi32_zero(n->in(2))); 7593 match(Set dst (MulL dst src)); 7594 effect(KILL cr, TEMP tmp); 7595 ins_cost(2*100+2*400); 7596 // Basic idea: lo(result) = lo(x_lo * y_lo) 7597 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7598 format %{ "MOV $tmp,$src.lo\n\t" 7599 "IMUL $tmp,EDX\n\t" 7600 "MUL EDX:EAX,$src.lo\n\t" 7601 "ADD EDX,$tmp" %} 7602 ins_encode %{ 7603 __ movl($tmp$$Register, $src$$Register); 7604 __ imull($tmp$$Register, rdx); 7605 __ mull($src$$Register); 7606 __ addl(rdx, $tmp$$Register); 7607 %} 7608 ins_pipe( pipe_slow ); 7609 %} 7610 7611 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7612 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7613 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7614 match(Set dst (MulL dst src)); 7615 effect(KILL cr); 7616 ins_cost(1*400); 7617 // Basic idea: lo(result) = lo(x_lo * y_lo) 7618 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7619 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7620 ins_encode %{ 7621 __ mull($src$$Register); 7622 %} 7623 ins_pipe( pipe_slow ); 7624 %} 7625 7626 // Multiply Register Long by small constant 7627 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7628 match(Set dst (MulL dst src)); 7629 effect(KILL cr, TEMP tmp); 7630 ins_cost(2*100+2*400); 7631 size(12); 7632 // Basic idea: lo(result) = lo(src * EAX) 7633 // hi(result) = hi(src * EAX) + lo(src * EDX) 7634 format %{ "IMUL $tmp,EDX,$src\n\t" 7635 "MOV EDX,$src\n\t" 7636 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7637 "ADD EDX,$tmp" %} 7638 ins_encode( long_multiply_con( dst, src, tmp ) ); 7639 ins_pipe( pipe_slow ); 7640 %} 7641 7642 // Integer DIV with Register 7643 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7644 match(Set rax (DivI rax div)); 7645 effect(KILL rdx, KILL cr); 7646 size(26); 7647 ins_cost(30*100+10*100); 7648 format %{ "CMP EAX,0x80000000\n\t" 7649 "JNE,s normal\n\t" 7650 "XOR EDX,EDX\n\t" 7651 "CMP ECX,-1\n\t" 7652 "JE,s done\n" 7653 "normal: CDQ\n\t" 7654 "IDIV $div\n\t" 7655 "done:" %} 7656 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7657 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7658 ins_pipe( ialu_reg_reg_alu0 ); 7659 %} 7660 7661 // Divide Register Long 7662 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7663 match(Set dst (DivL src1 src2)); 7664 effect(CALL); 7665 ins_cost(10000); 7666 format %{ "PUSH $src1.hi\n\t" 7667 "PUSH $src1.lo\n\t" 7668 "PUSH $src2.hi\n\t" 7669 "PUSH $src2.lo\n\t" 7670 "CALL SharedRuntime::ldiv\n\t" 7671 "ADD ESP,16" %} 7672 ins_encode( long_div(src1,src2) ); 7673 ins_pipe( pipe_slow ); 7674 %} 7675 7676 // Integer DIVMOD with Register, both quotient and mod results 7677 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7678 match(DivModI rax div); 7679 effect(KILL cr); 7680 size(26); 7681 ins_cost(30*100+10*100); 7682 format %{ "CMP EAX,0x80000000\n\t" 7683 "JNE,s normal\n\t" 7684 "XOR EDX,EDX\n\t" 7685 "CMP ECX,-1\n\t" 7686 "JE,s done\n" 7687 "normal: CDQ\n\t" 7688 "IDIV $div\n\t" 7689 "done:" %} 7690 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7691 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7692 ins_pipe( pipe_slow ); 7693 %} 7694 7695 // Integer MOD with Register 7696 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7697 match(Set rdx (ModI rax div)); 7698 effect(KILL rax, KILL cr); 7699 7700 size(26); 7701 ins_cost(300); 7702 format %{ "CDQ\n\t" 7703 "IDIV $div" %} 7704 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7705 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7706 ins_pipe( ialu_reg_reg_alu0 ); 7707 %} 7708 7709 // Remainder Register Long 7710 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7711 match(Set dst (ModL src1 src2)); 7712 effect(CALL); 7713 ins_cost(10000); 7714 format %{ "PUSH $src1.hi\n\t" 7715 "PUSH $src1.lo\n\t" 7716 "PUSH $src2.hi\n\t" 7717 "PUSH $src2.lo\n\t" 7718 "CALL SharedRuntime::lrem\n\t" 7719 "ADD ESP,16" %} 7720 ins_encode( long_mod(src1,src2) ); 7721 ins_pipe( pipe_slow ); 7722 %} 7723 7724 // Divide Register Long (no special case since divisor != -1) 7725 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7726 match(Set dst (DivL dst imm)); 7727 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7728 ins_cost(1000); 7729 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7730 "XOR $tmp2,$tmp2\n\t" 7731 "CMP $tmp,EDX\n\t" 7732 "JA,s fast\n\t" 7733 "MOV $tmp2,EAX\n\t" 7734 "MOV EAX,EDX\n\t" 7735 "MOV EDX,0\n\t" 7736 "JLE,s pos\n\t" 7737 "LNEG EAX : $tmp2\n\t" 7738 "DIV $tmp # unsigned division\n\t" 7739 "XCHG EAX,$tmp2\n\t" 7740 "DIV $tmp\n\t" 7741 "LNEG $tmp2 : EAX\n\t" 7742 "JMP,s done\n" 7743 "pos:\n\t" 7744 "DIV $tmp\n\t" 7745 "XCHG EAX,$tmp2\n" 7746 "fast:\n\t" 7747 "DIV $tmp\n" 7748 "done:\n\t" 7749 "MOV EDX,$tmp2\n\t" 7750 "NEG EDX:EAX # if $imm < 0" %} 7751 ins_encode %{ 7752 int con = (int)$imm$$constant; 7753 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7754 int pcon = (con > 0) ? con : -con; 7755 Label Lfast, Lpos, Ldone; 7756 7757 __ movl($tmp$$Register, pcon); 7758 __ xorl($tmp2$$Register,$tmp2$$Register); 7759 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7760 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7761 7762 __ movl($tmp2$$Register, $dst$$Register); // save 7763 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7764 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7765 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7766 7767 // Negative dividend. 7768 // convert value to positive to use unsigned division 7769 __ lneg($dst$$Register, $tmp2$$Register); 7770 __ divl($tmp$$Register); 7771 __ xchgl($dst$$Register, $tmp2$$Register); 7772 __ divl($tmp$$Register); 7773 // revert result back to negative 7774 __ lneg($tmp2$$Register, $dst$$Register); 7775 __ jmpb(Ldone); 7776 7777 __ bind(Lpos); 7778 __ divl($tmp$$Register); // Use unsigned division 7779 __ xchgl($dst$$Register, $tmp2$$Register); 7780 // Fallthrow for final divide, tmp2 has 32 bit hi result 7781 7782 __ bind(Lfast); 7783 // fast path: src is positive 7784 __ divl($tmp$$Register); // Use unsigned division 7785 7786 __ bind(Ldone); 7787 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7788 if (con < 0) { 7789 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7790 } 7791 %} 7792 ins_pipe( pipe_slow ); 7793 %} 7794 7795 // Remainder Register Long (remainder fit into 32 bits) 7796 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7797 match(Set dst (ModL dst imm)); 7798 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7799 ins_cost(1000); 7800 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7801 "CMP $tmp,EDX\n\t" 7802 "JA,s fast\n\t" 7803 "MOV $tmp2,EAX\n\t" 7804 "MOV EAX,EDX\n\t" 7805 "MOV EDX,0\n\t" 7806 "JLE,s pos\n\t" 7807 "LNEG EAX : $tmp2\n\t" 7808 "DIV $tmp # unsigned division\n\t" 7809 "MOV EAX,$tmp2\n\t" 7810 "DIV $tmp\n\t" 7811 "NEG EDX\n\t" 7812 "JMP,s done\n" 7813 "pos:\n\t" 7814 "DIV $tmp\n\t" 7815 "MOV EAX,$tmp2\n" 7816 "fast:\n\t" 7817 "DIV $tmp\n" 7818 "done:\n\t" 7819 "MOV EAX,EDX\n\t" 7820 "SAR EDX,31\n\t" %} 7821 ins_encode %{ 7822 int con = (int)$imm$$constant; 7823 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7824 int pcon = (con > 0) ? con : -con; 7825 Label Lfast, Lpos, Ldone; 7826 7827 __ movl($tmp$$Register, pcon); 7828 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7829 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7830 7831 __ movl($tmp2$$Register, $dst$$Register); // save 7832 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7833 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7834 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7835 7836 // Negative dividend. 7837 // convert value to positive to use unsigned division 7838 __ lneg($dst$$Register, $tmp2$$Register); 7839 __ divl($tmp$$Register); 7840 __ movl($dst$$Register, $tmp2$$Register); 7841 __ divl($tmp$$Register); 7842 // revert remainder back to negative 7843 __ negl(HIGH_FROM_LOW($dst$$Register)); 7844 __ jmpb(Ldone); 7845 7846 __ bind(Lpos); 7847 __ divl($tmp$$Register); 7848 __ movl($dst$$Register, $tmp2$$Register); 7849 7850 __ bind(Lfast); 7851 // fast path: src is positive 7852 __ divl($tmp$$Register); 7853 7854 __ bind(Ldone); 7855 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7856 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7857 7858 %} 7859 ins_pipe( pipe_slow ); 7860 %} 7861 7862 // Integer Shift Instructions 7863 // Shift Left by one 7864 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7865 match(Set dst (LShiftI dst shift)); 7866 effect(KILL cr); 7867 7868 size(2); 7869 format %{ "SHL $dst,$shift" %} 7870 opcode(0xD1, 0x4); /* D1 /4 */ 7871 ins_encode( OpcP, RegOpc( dst ) ); 7872 ins_pipe( ialu_reg ); 7873 %} 7874 7875 // Shift Left by 8-bit immediate 7876 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7877 match(Set dst (LShiftI dst shift)); 7878 effect(KILL cr); 7879 7880 size(3); 7881 format %{ "SHL $dst,$shift" %} 7882 opcode(0xC1, 0x4); /* C1 /4 ib */ 7883 ins_encode( RegOpcImm( dst, shift) ); 7884 ins_pipe( ialu_reg ); 7885 %} 7886 7887 // Shift Left by variable 7888 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7889 match(Set dst (LShiftI dst shift)); 7890 effect(KILL cr); 7891 7892 size(2); 7893 format %{ "SHL $dst,$shift" %} 7894 opcode(0xD3, 0x4); /* D3 /4 */ 7895 ins_encode( OpcP, RegOpc( dst ) ); 7896 ins_pipe( ialu_reg_reg ); 7897 %} 7898 7899 // Arithmetic shift right by one 7900 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7901 match(Set dst (RShiftI dst shift)); 7902 effect(KILL cr); 7903 7904 size(2); 7905 format %{ "SAR $dst,$shift" %} 7906 opcode(0xD1, 0x7); /* D1 /7 */ 7907 ins_encode( OpcP, RegOpc( dst ) ); 7908 ins_pipe( ialu_reg ); 7909 %} 7910 7911 // Arithmetic shift right by one 7912 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 7913 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7914 effect(KILL cr); 7915 format %{ "SAR $dst,$shift" %} 7916 opcode(0xD1, 0x7); /* D1 /7 */ 7917 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark ); 7918 ins_pipe( ialu_mem_imm ); 7919 %} 7920 7921 // Arithmetic Shift Right by 8-bit immediate 7922 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7923 match(Set dst (RShiftI dst shift)); 7924 effect(KILL cr); 7925 7926 size(3); 7927 format %{ "SAR $dst,$shift" %} 7928 opcode(0xC1, 0x7); /* C1 /7 ib */ 7929 ins_encode( RegOpcImm( dst, shift ) ); 7930 ins_pipe( ialu_mem_imm ); 7931 %} 7932 7933 // Arithmetic Shift Right by 8-bit immediate 7934 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7935 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7936 effect(KILL cr); 7937 7938 format %{ "SAR $dst,$shift" %} 7939 opcode(0xC1, 0x7); /* C1 /7 ib */ 7940 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark ); 7941 ins_pipe( ialu_mem_imm ); 7942 %} 7943 7944 // Arithmetic Shift Right by variable 7945 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7946 match(Set dst (RShiftI dst shift)); 7947 effect(KILL cr); 7948 7949 size(2); 7950 format %{ "SAR $dst,$shift" %} 7951 opcode(0xD3, 0x7); /* D3 /7 */ 7952 ins_encode( OpcP, RegOpc( dst ) ); 7953 ins_pipe( ialu_reg_reg ); 7954 %} 7955 7956 // Logical shift right by one 7957 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7958 match(Set dst (URShiftI dst shift)); 7959 effect(KILL cr); 7960 7961 size(2); 7962 format %{ "SHR $dst,$shift" %} 7963 opcode(0xD1, 0x5); /* D1 /5 */ 7964 ins_encode( OpcP, RegOpc( dst ) ); 7965 ins_pipe( ialu_reg ); 7966 %} 7967 7968 // Logical Shift Right by 8-bit immediate 7969 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7970 match(Set dst (URShiftI dst shift)); 7971 effect(KILL cr); 7972 7973 size(3); 7974 format %{ "SHR $dst,$shift" %} 7975 opcode(0xC1, 0x5); /* C1 /5 ib */ 7976 ins_encode( RegOpcImm( dst, shift) ); 7977 ins_pipe( ialu_reg ); 7978 %} 7979 7980 7981 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7982 // This idiom is used by the compiler for the i2b bytecode. 7983 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7984 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7985 7986 size(3); 7987 format %{ "MOVSX $dst,$src :8" %} 7988 ins_encode %{ 7989 __ movsbl($dst$$Register, $src$$Register); 7990 %} 7991 ins_pipe(ialu_reg_reg); 7992 %} 7993 7994 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7995 // This idiom is used by the compiler the i2s bytecode. 7996 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7997 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7998 7999 size(3); 8000 format %{ "MOVSX $dst,$src :16" %} 8001 ins_encode %{ 8002 __ movswl($dst$$Register, $src$$Register); 8003 %} 8004 ins_pipe(ialu_reg_reg); 8005 %} 8006 8007 8008 // Logical Shift Right by variable 8009 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8010 match(Set dst (URShiftI dst shift)); 8011 effect(KILL cr); 8012 8013 size(2); 8014 format %{ "SHR $dst,$shift" %} 8015 opcode(0xD3, 0x5); /* D3 /5 */ 8016 ins_encode( OpcP, RegOpc( dst ) ); 8017 ins_pipe( ialu_reg_reg ); 8018 %} 8019 8020 8021 //----------Logical Instructions----------------------------------------------- 8022 //----------Integer Logical Instructions--------------------------------------- 8023 // And Instructions 8024 // And Register with Register 8025 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8026 match(Set dst (AndI dst src)); 8027 effect(KILL cr); 8028 8029 size(2); 8030 format %{ "AND $dst,$src" %} 8031 opcode(0x23); 8032 ins_encode( OpcP, RegReg( dst, src) ); 8033 ins_pipe( ialu_reg_reg ); 8034 %} 8035 8036 // And Register with Immediate 8037 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8038 match(Set dst (AndI dst src)); 8039 effect(KILL cr); 8040 8041 format %{ "AND $dst,$src" %} 8042 opcode(0x81,0x04); /* Opcode 81 /4 */ 8043 // ins_encode( RegImm( dst, src) ); 8044 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8045 ins_pipe( ialu_reg ); 8046 %} 8047 8048 // And Register with Memory 8049 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8050 match(Set dst (AndI dst (LoadI src))); 8051 effect(KILL cr); 8052 8053 ins_cost(150); 8054 format %{ "AND $dst,$src" %} 8055 opcode(0x23); 8056 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 8057 ins_pipe( ialu_reg_mem ); 8058 %} 8059 8060 // And Memory with Register 8061 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8062 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8063 effect(KILL cr); 8064 8065 ins_cost(150); 8066 format %{ "AND $dst,$src" %} 8067 opcode(0x21); /* Opcode 21 /r */ 8068 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8069 ins_pipe( ialu_mem_reg ); 8070 %} 8071 8072 // And Memory with Immediate 8073 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8074 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8075 effect(KILL cr); 8076 8077 ins_cost(125); 8078 format %{ "AND $dst,$src" %} 8079 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8080 // ins_encode( MemImm( dst, src) ); 8081 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8082 ins_pipe( ialu_mem_imm ); 8083 %} 8084 8085 // BMI1 instructions 8086 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8087 match(Set dst (AndI (XorI src1 minus_1) src2)); 8088 predicate(UseBMI1Instructions); 8089 effect(KILL cr); 8090 8091 format %{ "ANDNL $dst, $src1, $src2" %} 8092 8093 ins_encode %{ 8094 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8095 %} 8096 ins_pipe(ialu_reg); 8097 %} 8098 8099 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8100 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8101 predicate(UseBMI1Instructions); 8102 effect(KILL cr); 8103 8104 ins_cost(125); 8105 format %{ "ANDNL $dst, $src1, $src2" %} 8106 8107 ins_encode %{ 8108 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8109 %} 8110 ins_pipe(ialu_reg_mem); 8111 %} 8112 8113 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8114 match(Set dst (AndI (SubI imm_zero src) src)); 8115 predicate(UseBMI1Instructions); 8116 effect(KILL cr); 8117 8118 format %{ "BLSIL $dst, $src" %} 8119 8120 ins_encode %{ 8121 __ blsil($dst$$Register, $src$$Register); 8122 %} 8123 ins_pipe(ialu_reg); 8124 %} 8125 8126 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8127 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8128 predicate(UseBMI1Instructions); 8129 effect(KILL cr); 8130 8131 ins_cost(125); 8132 format %{ "BLSIL $dst, $src" %} 8133 8134 ins_encode %{ 8135 __ blsil($dst$$Register, $src$$Address); 8136 %} 8137 ins_pipe(ialu_reg_mem); 8138 %} 8139 8140 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8141 %{ 8142 match(Set dst (XorI (AddI src minus_1) src)); 8143 predicate(UseBMI1Instructions); 8144 effect(KILL cr); 8145 8146 format %{ "BLSMSKL $dst, $src" %} 8147 8148 ins_encode %{ 8149 __ blsmskl($dst$$Register, $src$$Register); 8150 %} 8151 8152 ins_pipe(ialu_reg); 8153 %} 8154 8155 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8156 %{ 8157 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8158 predicate(UseBMI1Instructions); 8159 effect(KILL cr); 8160 8161 ins_cost(125); 8162 format %{ "BLSMSKL $dst, $src" %} 8163 8164 ins_encode %{ 8165 __ blsmskl($dst$$Register, $src$$Address); 8166 %} 8167 8168 ins_pipe(ialu_reg_mem); 8169 %} 8170 8171 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8172 %{ 8173 match(Set dst (AndI (AddI src minus_1) src) ); 8174 predicate(UseBMI1Instructions); 8175 effect(KILL cr); 8176 8177 format %{ "BLSRL $dst, $src" %} 8178 8179 ins_encode %{ 8180 __ blsrl($dst$$Register, $src$$Register); 8181 %} 8182 8183 ins_pipe(ialu_reg); 8184 %} 8185 8186 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8187 %{ 8188 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8189 predicate(UseBMI1Instructions); 8190 effect(KILL cr); 8191 8192 ins_cost(125); 8193 format %{ "BLSRL $dst, $src" %} 8194 8195 ins_encode %{ 8196 __ blsrl($dst$$Register, $src$$Address); 8197 %} 8198 8199 ins_pipe(ialu_reg_mem); 8200 %} 8201 8202 // Or Instructions 8203 // Or Register with Register 8204 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8205 match(Set dst (OrI dst src)); 8206 effect(KILL cr); 8207 8208 size(2); 8209 format %{ "OR $dst,$src" %} 8210 opcode(0x0B); 8211 ins_encode( OpcP, RegReg( dst, src) ); 8212 ins_pipe( ialu_reg_reg ); 8213 %} 8214 8215 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8216 match(Set dst (OrI dst (CastP2X src))); 8217 effect(KILL cr); 8218 8219 size(2); 8220 format %{ "OR $dst,$src" %} 8221 opcode(0x0B); 8222 ins_encode( OpcP, RegReg( dst, src) ); 8223 ins_pipe( ialu_reg_reg ); 8224 %} 8225 8226 8227 // Or Register with Immediate 8228 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8229 match(Set dst (OrI dst src)); 8230 effect(KILL cr); 8231 8232 format %{ "OR $dst,$src" %} 8233 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8234 // ins_encode( RegImm( dst, src) ); 8235 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8236 ins_pipe( ialu_reg ); 8237 %} 8238 8239 // Or Register with Memory 8240 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8241 match(Set dst (OrI dst (LoadI src))); 8242 effect(KILL cr); 8243 8244 ins_cost(150); 8245 format %{ "OR $dst,$src" %} 8246 opcode(0x0B); 8247 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 8248 ins_pipe( ialu_reg_mem ); 8249 %} 8250 8251 // Or Memory with Register 8252 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8253 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8254 effect(KILL cr); 8255 8256 ins_cost(150); 8257 format %{ "OR $dst,$src" %} 8258 opcode(0x09); /* Opcode 09 /r */ 8259 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8260 ins_pipe( ialu_mem_reg ); 8261 %} 8262 8263 // Or Memory with Immediate 8264 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8265 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8266 effect(KILL cr); 8267 8268 ins_cost(125); 8269 format %{ "OR $dst,$src" %} 8270 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8271 // ins_encode( MemImm( dst, src) ); 8272 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8273 ins_pipe( ialu_mem_imm ); 8274 %} 8275 8276 // ROL/ROR 8277 // ROL expand 8278 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8279 effect(USE_DEF dst, USE shift, KILL cr); 8280 8281 format %{ "ROL $dst, $shift" %} 8282 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8283 ins_encode( OpcP, RegOpc( dst )); 8284 ins_pipe( ialu_reg ); 8285 %} 8286 8287 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8288 effect(USE_DEF dst, USE shift, KILL cr); 8289 8290 format %{ "ROL $dst, $shift" %} 8291 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8292 ins_encode( RegOpcImm(dst, shift) ); 8293 ins_pipe(ialu_reg); 8294 %} 8295 8296 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8297 effect(USE_DEF dst, USE shift, KILL cr); 8298 8299 format %{ "ROL $dst, $shift" %} 8300 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8301 ins_encode(OpcP, RegOpc(dst)); 8302 ins_pipe( ialu_reg_reg ); 8303 %} 8304 // end of ROL expand 8305 8306 // ROL 32bit by one once 8307 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8308 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8309 8310 expand %{ 8311 rolI_eReg_imm1(dst, lshift, cr); 8312 %} 8313 %} 8314 8315 // ROL 32bit var by imm8 once 8316 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8317 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8318 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8319 8320 expand %{ 8321 rolI_eReg_imm8(dst, lshift, cr); 8322 %} 8323 %} 8324 8325 // ROL 32bit var by var once 8326 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8327 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8328 8329 expand %{ 8330 rolI_eReg_CL(dst, shift, cr); 8331 %} 8332 %} 8333 8334 // ROL 32bit var by var once 8335 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8336 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8337 8338 expand %{ 8339 rolI_eReg_CL(dst, shift, cr); 8340 %} 8341 %} 8342 8343 // ROR expand 8344 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8345 effect(USE_DEF dst, USE shift, KILL cr); 8346 8347 format %{ "ROR $dst, $shift" %} 8348 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8349 ins_encode( OpcP, RegOpc( dst ) ); 8350 ins_pipe( ialu_reg ); 8351 %} 8352 8353 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8354 effect (USE_DEF dst, USE shift, KILL cr); 8355 8356 format %{ "ROR $dst, $shift" %} 8357 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8358 ins_encode( RegOpcImm(dst, shift) ); 8359 ins_pipe( ialu_reg ); 8360 %} 8361 8362 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8363 effect(USE_DEF dst, USE shift, KILL cr); 8364 8365 format %{ "ROR $dst, $shift" %} 8366 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8367 ins_encode(OpcP, RegOpc(dst)); 8368 ins_pipe( ialu_reg_reg ); 8369 %} 8370 // end of ROR expand 8371 8372 // ROR right once 8373 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8374 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8375 8376 expand %{ 8377 rorI_eReg_imm1(dst, rshift, cr); 8378 %} 8379 %} 8380 8381 // ROR 32bit by immI8 once 8382 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8383 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8384 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8385 8386 expand %{ 8387 rorI_eReg_imm8(dst, rshift, cr); 8388 %} 8389 %} 8390 8391 // ROR 32bit var by var once 8392 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8393 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8394 8395 expand %{ 8396 rorI_eReg_CL(dst, shift, cr); 8397 %} 8398 %} 8399 8400 // ROR 32bit var by var once 8401 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8402 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8403 8404 expand %{ 8405 rorI_eReg_CL(dst, shift, cr); 8406 %} 8407 %} 8408 8409 // Xor Instructions 8410 // Xor Register with Register 8411 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8412 match(Set dst (XorI dst src)); 8413 effect(KILL cr); 8414 8415 size(2); 8416 format %{ "XOR $dst,$src" %} 8417 opcode(0x33); 8418 ins_encode( OpcP, RegReg( dst, src) ); 8419 ins_pipe( ialu_reg_reg ); 8420 %} 8421 8422 // Xor Register with Immediate -1 8423 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8424 match(Set dst (XorI dst imm)); 8425 8426 size(2); 8427 format %{ "NOT $dst" %} 8428 ins_encode %{ 8429 __ notl($dst$$Register); 8430 %} 8431 ins_pipe( ialu_reg ); 8432 %} 8433 8434 // Xor Register with Immediate 8435 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8436 match(Set dst (XorI dst src)); 8437 effect(KILL cr); 8438 8439 format %{ "XOR $dst,$src" %} 8440 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8441 // ins_encode( RegImm( dst, src) ); 8442 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8443 ins_pipe( ialu_reg ); 8444 %} 8445 8446 // Xor Register with Memory 8447 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8448 match(Set dst (XorI dst (LoadI src))); 8449 effect(KILL cr); 8450 8451 ins_cost(150); 8452 format %{ "XOR $dst,$src" %} 8453 opcode(0x33); 8454 ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark ); 8455 ins_pipe( ialu_reg_mem ); 8456 %} 8457 8458 // Xor Memory with Register 8459 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8460 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8461 effect(KILL cr); 8462 8463 ins_cost(150); 8464 format %{ "XOR $dst,$src" %} 8465 opcode(0x31); /* Opcode 31 /r */ 8466 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8467 ins_pipe( ialu_mem_reg ); 8468 %} 8469 8470 // Xor Memory with Immediate 8471 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8472 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8473 effect(KILL cr); 8474 8475 ins_cost(125); 8476 format %{ "XOR $dst,$src" %} 8477 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8478 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8479 ins_pipe( ialu_mem_imm ); 8480 %} 8481 8482 //----------Convert Int to Boolean--------------------------------------------- 8483 8484 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8485 effect( DEF dst, USE src ); 8486 format %{ "MOV $dst,$src" %} 8487 ins_encode( enc_Copy( dst, src) ); 8488 ins_pipe( ialu_reg_reg ); 8489 %} 8490 8491 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8492 effect( USE_DEF dst, USE src, KILL cr ); 8493 8494 size(4); 8495 format %{ "NEG $dst\n\t" 8496 "ADC $dst,$src" %} 8497 ins_encode( neg_reg(dst), 8498 OpcRegReg(0x13,dst,src) ); 8499 ins_pipe( ialu_reg_reg_long ); 8500 %} 8501 8502 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8503 match(Set dst (Conv2B src)); 8504 8505 expand %{ 8506 movI_nocopy(dst,src); 8507 ci2b(dst,src,cr); 8508 %} 8509 %} 8510 8511 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8512 effect( DEF dst, USE src ); 8513 format %{ "MOV $dst,$src" %} 8514 ins_encode( enc_Copy( dst, src) ); 8515 ins_pipe( ialu_reg_reg ); 8516 %} 8517 8518 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8519 effect( USE_DEF dst, USE src, KILL cr ); 8520 format %{ "NEG $dst\n\t" 8521 "ADC $dst,$src" %} 8522 ins_encode( neg_reg(dst), 8523 OpcRegReg(0x13,dst,src) ); 8524 ins_pipe( ialu_reg_reg_long ); 8525 %} 8526 8527 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8528 match(Set dst (Conv2B src)); 8529 8530 expand %{ 8531 movP_nocopy(dst,src); 8532 cp2b(dst,src,cr); 8533 %} 8534 %} 8535 8536 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8537 match(Set dst (CmpLTMask p q)); 8538 effect(KILL cr); 8539 ins_cost(400); 8540 8541 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8542 format %{ "XOR $dst,$dst\n\t" 8543 "CMP $p,$q\n\t" 8544 "SETlt $dst\n\t" 8545 "NEG $dst" %} 8546 ins_encode %{ 8547 Register Rp = $p$$Register; 8548 Register Rq = $q$$Register; 8549 Register Rd = $dst$$Register; 8550 Label done; 8551 __ xorl(Rd, Rd); 8552 __ cmpl(Rp, Rq); 8553 __ setb(Assembler::less, Rd); 8554 __ negl(Rd); 8555 %} 8556 8557 ins_pipe(pipe_slow); 8558 %} 8559 8560 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8561 match(Set dst (CmpLTMask dst zero)); 8562 effect(DEF dst, KILL cr); 8563 ins_cost(100); 8564 8565 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8566 ins_encode %{ 8567 __ sarl($dst$$Register, 31); 8568 %} 8569 ins_pipe(ialu_reg); 8570 %} 8571 8572 /* better to save a register than avoid a branch */ 8573 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8574 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8575 effect(KILL cr); 8576 ins_cost(400); 8577 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8578 "JGE done\n\t" 8579 "ADD $p,$y\n" 8580 "done: " %} 8581 ins_encode %{ 8582 Register Rp = $p$$Register; 8583 Register Rq = $q$$Register; 8584 Register Ry = $y$$Register; 8585 Label done; 8586 __ subl(Rp, Rq); 8587 __ jccb(Assembler::greaterEqual, done); 8588 __ addl(Rp, Ry); 8589 __ bind(done); 8590 %} 8591 8592 ins_pipe(pipe_cmplt); 8593 %} 8594 8595 /* better to save a register than avoid a branch */ 8596 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8597 match(Set y (AndI (CmpLTMask p q) y)); 8598 effect(KILL cr); 8599 8600 ins_cost(300); 8601 8602 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8603 "JLT done\n\t" 8604 "XORL $y, $y\n" 8605 "done: " %} 8606 ins_encode %{ 8607 Register Rp = $p$$Register; 8608 Register Rq = $q$$Register; 8609 Register Ry = $y$$Register; 8610 Label done; 8611 __ cmpl(Rp, Rq); 8612 __ jccb(Assembler::less, done); 8613 __ xorl(Ry, Ry); 8614 __ bind(done); 8615 %} 8616 8617 ins_pipe(pipe_cmplt); 8618 %} 8619 8620 /* If I enable this, I encourage spilling in the inner loop of compress. 8621 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8622 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8623 */ 8624 //----------Overflow Math Instructions----------------------------------------- 8625 8626 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8627 %{ 8628 match(Set cr (OverflowAddI op1 op2)); 8629 effect(DEF cr, USE_KILL op1, USE op2); 8630 8631 format %{ "ADD $op1, $op2\t# overflow check int" %} 8632 8633 ins_encode %{ 8634 __ addl($op1$$Register, $op2$$Register); 8635 %} 8636 ins_pipe(ialu_reg_reg); 8637 %} 8638 8639 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8640 %{ 8641 match(Set cr (OverflowAddI op1 op2)); 8642 effect(DEF cr, USE_KILL op1, USE op2); 8643 8644 format %{ "ADD $op1, $op2\t# overflow check int" %} 8645 8646 ins_encode %{ 8647 __ addl($op1$$Register, $op2$$constant); 8648 %} 8649 ins_pipe(ialu_reg_reg); 8650 %} 8651 8652 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8653 %{ 8654 match(Set cr (OverflowSubI op1 op2)); 8655 8656 format %{ "CMP $op1, $op2\t# overflow check int" %} 8657 ins_encode %{ 8658 __ cmpl($op1$$Register, $op2$$Register); 8659 %} 8660 ins_pipe(ialu_reg_reg); 8661 %} 8662 8663 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8664 %{ 8665 match(Set cr (OverflowSubI op1 op2)); 8666 8667 format %{ "CMP $op1, $op2\t# overflow check int" %} 8668 ins_encode %{ 8669 __ cmpl($op1$$Register, $op2$$constant); 8670 %} 8671 ins_pipe(ialu_reg_reg); 8672 %} 8673 8674 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8675 %{ 8676 match(Set cr (OverflowSubI zero op2)); 8677 effect(DEF cr, USE_KILL op2); 8678 8679 format %{ "NEG $op2\t# overflow check int" %} 8680 ins_encode %{ 8681 __ negl($op2$$Register); 8682 %} 8683 ins_pipe(ialu_reg_reg); 8684 %} 8685 8686 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8687 %{ 8688 match(Set cr (OverflowMulI op1 op2)); 8689 effect(DEF cr, USE_KILL op1, USE op2); 8690 8691 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8692 ins_encode %{ 8693 __ imull($op1$$Register, $op2$$Register); 8694 %} 8695 ins_pipe(ialu_reg_reg_alu0); 8696 %} 8697 8698 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8699 %{ 8700 match(Set cr (OverflowMulI op1 op2)); 8701 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8702 8703 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8704 ins_encode %{ 8705 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8706 %} 8707 ins_pipe(ialu_reg_reg_alu0); 8708 %} 8709 8710 // Integer Absolute Instructions 8711 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8712 %{ 8713 match(Set dst (AbsI src)); 8714 effect(TEMP dst, TEMP tmp, KILL cr); 8715 format %{ "movl $tmp, $src\n\t" 8716 "sarl $tmp, 31\n\t" 8717 "movl $dst, $src\n\t" 8718 "xorl $dst, $tmp\n\t" 8719 "subl $dst, $tmp\n" 8720 %} 8721 ins_encode %{ 8722 __ movl($tmp$$Register, $src$$Register); 8723 __ sarl($tmp$$Register, 31); 8724 __ movl($dst$$Register, $src$$Register); 8725 __ xorl($dst$$Register, $tmp$$Register); 8726 __ subl($dst$$Register, $tmp$$Register); 8727 %} 8728 8729 ins_pipe(ialu_reg_reg); 8730 %} 8731 8732 //----------Long Instructions------------------------------------------------ 8733 // Add Long Register with Register 8734 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8735 match(Set dst (AddL dst src)); 8736 effect(KILL cr); 8737 ins_cost(200); 8738 format %{ "ADD $dst.lo,$src.lo\n\t" 8739 "ADC $dst.hi,$src.hi" %} 8740 opcode(0x03, 0x13); 8741 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8742 ins_pipe( ialu_reg_reg_long ); 8743 %} 8744 8745 // Add Long Register with Immediate 8746 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8747 match(Set dst (AddL dst src)); 8748 effect(KILL cr); 8749 format %{ "ADD $dst.lo,$src.lo\n\t" 8750 "ADC $dst.hi,$src.hi" %} 8751 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8752 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8753 ins_pipe( ialu_reg_long ); 8754 %} 8755 8756 // Add Long Register with Memory 8757 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8758 match(Set dst (AddL dst (LoadL mem))); 8759 effect(KILL cr); 8760 ins_cost(125); 8761 format %{ "ADD $dst.lo,$mem\n\t" 8762 "ADC $dst.hi,$mem+4" %} 8763 opcode(0x03, 0x13); 8764 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8765 ins_pipe( ialu_reg_long_mem ); 8766 %} 8767 8768 // Subtract Long Register with Register. 8769 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8770 match(Set dst (SubL dst src)); 8771 effect(KILL cr); 8772 ins_cost(200); 8773 format %{ "SUB $dst.lo,$src.lo\n\t" 8774 "SBB $dst.hi,$src.hi" %} 8775 opcode(0x2B, 0x1B); 8776 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8777 ins_pipe( ialu_reg_reg_long ); 8778 %} 8779 8780 // Subtract Long Register with Immediate 8781 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8782 match(Set dst (SubL dst src)); 8783 effect(KILL cr); 8784 format %{ "SUB $dst.lo,$src.lo\n\t" 8785 "SBB $dst.hi,$src.hi" %} 8786 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8787 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8788 ins_pipe( ialu_reg_long ); 8789 %} 8790 8791 // Subtract Long Register with Memory 8792 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8793 match(Set dst (SubL dst (LoadL mem))); 8794 effect(KILL cr); 8795 ins_cost(125); 8796 format %{ "SUB $dst.lo,$mem\n\t" 8797 "SBB $dst.hi,$mem+4" %} 8798 opcode(0x2B, 0x1B); 8799 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8800 ins_pipe( ialu_reg_long_mem ); 8801 %} 8802 8803 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8804 match(Set dst (SubL zero dst)); 8805 effect(KILL cr); 8806 ins_cost(300); 8807 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8808 ins_encode( neg_long(dst) ); 8809 ins_pipe( ialu_reg_reg_long ); 8810 %} 8811 8812 // And Long Register with Register 8813 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8814 match(Set dst (AndL dst src)); 8815 effect(KILL cr); 8816 format %{ "AND $dst.lo,$src.lo\n\t" 8817 "AND $dst.hi,$src.hi" %} 8818 opcode(0x23,0x23); 8819 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8820 ins_pipe( ialu_reg_reg_long ); 8821 %} 8822 8823 // And Long Register with Immediate 8824 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8825 match(Set dst (AndL dst src)); 8826 effect(KILL cr); 8827 format %{ "AND $dst.lo,$src.lo\n\t" 8828 "AND $dst.hi,$src.hi" %} 8829 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8830 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8831 ins_pipe( ialu_reg_long ); 8832 %} 8833 8834 // And Long Register with Memory 8835 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8836 match(Set dst (AndL dst (LoadL mem))); 8837 effect(KILL cr); 8838 ins_cost(125); 8839 format %{ "AND $dst.lo,$mem\n\t" 8840 "AND $dst.hi,$mem+4" %} 8841 opcode(0x23, 0x23); 8842 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8843 ins_pipe( ialu_reg_long_mem ); 8844 %} 8845 8846 // BMI1 instructions 8847 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8848 match(Set dst (AndL (XorL src1 minus_1) src2)); 8849 predicate(UseBMI1Instructions); 8850 effect(KILL cr, TEMP dst); 8851 8852 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8853 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8854 %} 8855 8856 ins_encode %{ 8857 Register Rdst = $dst$$Register; 8858 Register Rsrc1 = $src1$$Register; 8859 Register Rsrc2 = $src2$$Register; 8860 __ andnl(Rdst, Rsrc1, Rsrc2); 8861 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8862 %} 8863 ins_pipe(ialu_reg_reg_long); 8864 %} 8865 8866 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8867 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8868 predicate(UseBMI1Instructions); 8869 effect(KILL cr, TEMP dst); 8870 8871 ins_cost(125); 8872 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8873 "ANDNL $dst.hi, $src1.hi, $src2+4" 8874 %} 8875 8876 ins_encode %{ 8877 Register Rdst = $dst$$Register; 8878 Register Rsrc1 = $src1$$Register; 8879 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8880 8881 __ andnl(Rdst, Rsrc1, $src2$$Address); 8882 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8883 %} 8884 ins_pipe(ialu_reg_mem); 8885 %} 8886 8887 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8888 match(Set dst (AndL (SubL imm_zero src) src)); 8889 predicate(UseBMI1Instructions); 8890 effect(KILL cr, TEMP dst); 8891 8892 format %{ "MOVL $dst.hi, 0\n\t" 8893 "BLSIL $dst.lo, $src.lo\n\t" 8894 "JNZ done\n\t" 8895 "BLSIL $dst.hi, $src.hi\n" 8896 "done:" 8897 %} 8898 8899 ins_encode %{ 8900 Label done; 8901 Register Rdst = $dst$$Register; 8902 Register Rsrc = $src$$Register; 8903 __ movl(HIGH_FROM_LOW(Rdst), 0); 8904 __ blsil(Rdst, Rsrc); 8905 __ jccb(Assembler::notZero, done); 8906 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8907 __ bind(done); 8908 %} 8909 ins_pipe(ialu_reg); 8910 %} 8911 8912 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8913 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8914 predicate(UseBMI1Instructions); 8915 effect(KILL cr, TEMP dst); 8916 8917 ins_cost(125); 8918 format %{ "MOVL $dst.hi, 0\n\t" 8919 "BLSIL $dst.lo, $src\n\t" 8920 "JNZ done\n\t" 8921 "BLSIL $dst.hi, $src+4\n" 8922 "done:" 8923 %} 8924 8925 ins_encode %{ 8926 Label done; 8927 Register Rdst = $dst$$Register; 8928 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8929 8930 __ movl(HIGH_FROM_LOW(Rdst), 0); 8931 __ blsil(Rdst, $src$$Address); 8932 __ jccb(Assembler::notZero, done); 8933 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8934 __ bind(done); 8935 %} 8936 ins_pipe(ialu_reg_mem); 8937 %} 8938 8939 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8940 %{ 8941 match(Set dst (XorL (AddL src minus_1) src)); 8942 predicate(UseBMI1Instructions); 8943 effect(KILL cr, TEMP dst); 8944 8945 format %{ "MOVL $dst.hi, 0\n\t" 8946 "BLSMSKL $dst.lo, $src.lo\n\t" 8947 "JNC done\n\t" 8948 "BLSMSKL $dst.hi, $src.hi\n" 8949 "done:" 8950 %} 8951 8952 ins_encode %{ 8953 Label done; 8954 Register Rdst = $dst$$Register; 8955 Register Rsrc = $src$$Register; 8956 __ movl(HIGH_FROM_LOW(Rdst), 0); 8957 __ blsmskl(Rdst, Rsrc); 8958 __ jccb(Assembler::carryClear, done); 8959 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8960 __ bind(done); 8961 %} 8962 8963 ins_pipe(ialu_reg); 8964 %} 8965 8966 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8967 %{ 8968 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8969 predicate(UseBMI1Instructions); 8970 effect(KILL cr, TEMP dst); 8971 8972 ins_cost(125); 8973 format %{ "MOVL $dst.hi, 0\n\t" 8974 "BLSMSKL $dst.lo, $src\n\t" 8975 "JNC done\n\t" 8976 "BLSMSKL $dst.hi, $src+4\n" 8977 "done:" 8978 %} 8979 8980 ins_encode %{ 8981 Label done; 8982 Register Rdst = $dst$$Register; 8983 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8984 8985 __ movl(HIGH_FROM_LOW(Rdst), 0); 8986 __ blsmskl(Rdst, $src$$Address); 8987 __ jccb(Assembler::carryClear, done); 8988 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8989 __ bind(done); 8990 %} 8991 8992 ins_pipe(ialu_reg_mem); 8993 %} 8994 8995 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8996 %{ 8997 match(Set dst (AndL (AddL src minus_1) src) ); 8998 predicate(UseBMI1Instructions); 8999 effect(KILL cr, TEMP dst); 9000 9001 format %{ "MOVL $dst.hi, $src.hi\n\t" 9002 "BLSRL $dst.lo, $src.lo\n\t" 9003 "JNC done\n\t" 9004 "BLSRL $dst.hi, $src.hi\n" 9005 "done:" 9006 %} 9007 9008 ins_encode %{ 9009 Label done; 9010 Register Rdst = $dst$$Register; 9011 Register Rsrc = $src$$Register; 9012 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9013 __ blsrl(Rdst, Rsrc); 9014 __ jccb(Assembler::carryClear, done); 9015 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9016 __ bind(done); 9017 %} 9018 9019 ins_pipe(ialu_reg); 9020 %} 9021 9022 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9023 %{ 9024 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9025 predicate(UseBMI1Instructions); 9026 effect(KILL cr, TEMP dst); 9027 9028 ins_cost(125); 9029 format %{ "MOVL $dst.hi, $src+4\n\t" 9030 "BLSRL $dst.lo, $src\n\t" 9031 "JNC done\n\t" 9032 "BLSRL $dst.hi, $src+4\n" 9033 "done:" 9034 %} 9035 9036 ins_encode %{ 9037 Label done; 9038 Register Rdst = $dst$$Register; 9039 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9040 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9041 __ blsrl(Rdst, $src$$Address); 9042 __ jccb(Assembler::carryClear, done); 9043 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9044 __ bind(done); 9045 %} 9046 9047 ins_pipe(ialu_reg_mem); 9048 %} 9049 9050 // Or Long Register with Register 9051 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9052 match(Set dst (OrL dst src)); 9053 effect(KILL cr); 9054 format %{ "OR $dst.lo,$src.lo\n\t" 9055 "OR $dst.hi,$src.hi" %} 9056 opcode(0x0B,0x0B); 9057 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9058 ins_pipe( ialu_reg_reg_long ); 9059 %} 9060 9061 // Or Long Register with Immediate 9062 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9063 match(Set dst (OrL dst src)); 9064 effect(KILL cr); 9065 format %{ "OR $dst.lo,$src.lo\n\t" 9066 "OR $dst.hi,$src.hi" %} 9067 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9068 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9069 ins_pipe( ialu_reg_long ); 9070 %} 9071 9072 // Or Long Register with Memory 9073 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9074 match(Set dst (OrL dst (LoadL mem))); 9075 effect(KILL cr); 9076 ins_cost(125); 9077 format %{ "OR $dst.lo,$mem\n\t" 9078 "OR $dst.hi,$mem+4" %} 9079 opcode(0x0B,0x0B); 9080 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 9081 ins_pipe( ialu_reg_long_mem ); 9082 %} 9083 9084 // Xor Long Register with Register 9085 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9086 match(Set dst (XorL dst src)); 9087 effect(KILL cr); 9088 format %{ "XOR $dst.lo,$src.lo\n\t" 9089 "XOR $dst.hi,$src.hi" %} 9090 opcode(0x33,0x33); 9091 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9092 ins_pipe( ialu_reg_reg_long ); 9093 %} 9094 9095 // Xor Long Register with Immediate -1 9096 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9097 match(Set dst (XorL dst imm)); 9098 format %{ "NOT $dst.lo\n\t" 9099 "NOT $dst.hi" %} 9100 ins_encode %{ 9101 __ notl($dst$$Register); 9102 __ notl(HIGH_FROM_LOW($dst$$Register)); 9103 %} 9104 ins_pipe( ialu_reg_long ); 9105 %} 9106 9107 // Xor Long Register with Immediate 9108 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9109 match(Set dst (XorL dst src)); 9110 effect(KILL cr); 9111 format %{ "XOR $dst.lo,$src.lo\n\t" 9112 "XOR $dst.hi,$src.hi" %} 9113 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9114 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9115 ins_pipe( ialu_reg_long ); 9116 %} 9117 9118 // Xor Long Register with Memory 9119 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9120 match(Set dst (XorL dst (LoadL mem))); 9121 effect(KILL cr); 9122 ins_cost(125); 9123 format %{ "XOR $dst.lo,$mem\n\t" 9124 "XOR $dst.hi,$mem+4" %} 9125 opcode(0x33,0x33); 9126 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 9127 ins_pipe( ialu_reg_long_mem ); 9128 %} 9129 9130 // Shift Left Long by 1 9131 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9132 predicate(UseNewLongLShift); 9133 match(Set dst (LShiftL dst cnt)); 9134 effect(KILL cr); 9135 ins_cost(100); 9136 format %{ "ADD $dst.lo,$dst.lo\n\t" 9137 "ADC $dst.hi,$dst.hi" %} 9138 ins_encode %{ 9139 __ addl($dst$$Register,$dst$$Register); 9140 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9141 %} 9142 ins_pipe( ialu_reg_long ); 9143 %} 9144 9145 // Shift Left Long by 2 9146 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9147 predicate(UseNewLongLShift); 9148 match(Set dst (LShiftL dst cnt)); 9149 effect(KILL cr); 9150 ins_cost(100); 9151 format %{ "ADD $dst.lo,$dst.lo\n\t" 9152 "ADC $dst.hi,$dst.hi\n\t" 9153 "ADD $dst.lo,$dst.lo\n\t" 9154 "ADC $dst.hi,$dst.hi" %} 9155 ins_encode %{ 9156 __ addl($dst$$Register,$dst$$Register); 9157 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9158 __ addl($dst$$Register,$dst$$Register); 9159 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9160 %} 9161 ins_pipe( ialu_reg_long ); 9162 %} 9163 9164 // Shift Left Long by 3 9165 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9166 predicate(UseNewLongLShift); 9167 match(Set dst (LShiftL dst cnt)); 9168 effect(KILL cr); 9169 ins_cost(100); 9170 format %{ "ADD $dst.lo,$dst.lo\n\t" 9171 "ADC $dst.hi,$dst.hi\n\t" 9172 "ADD $dst.lo,$dst.lo\n\t" 9173 "ADC $dst.hi,$dst.hi\n\t" 9174 "ADD $dst.lo,$dst.lo\n\t" 9175 "ADC $dst.hi,$dst.hi" %} 9176 ins_encode %{ 9177 __ addl($dst$$Register,$dst$$Register); 9178 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9179 __ addl($dst$$Register,$dst$$Register); 9180 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9181 __ addl($dst$$Register,$dst$$Register); 9182 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9183 %} 9184 ins_pipe( ialu_reg_long ); 9185 %} 9186 9187 // Shift Left Long by 1-31 9188 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9189 match(Set dst (LShiftL dst cnt)); 9190 effect(KILL cr); 9191 ins_cost(200); 9192 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9193 "SHL $dst.lo,$cnt" %} 9194 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9195 ins_encode( move_long_small_shift(dst,cnt) ); 9196 ins_pipe( ialu_reg_long ); 9197 %} 9198 9199 // Shift Left Long by 32-63 9200 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9201 match(Set dst (LShiftL dst cnt)); 9202 effect(KILL cr); 9203 ins_cost(300); 9204 format %{ "MOV $dst.hi,$dst.lo\n" 9205 "\tSHL $dst.hi,$cnt-32\n" 9206 "\tXOR $dst.lo,$dst.lo" %} 9207 opcode(0xC1, 0x4); /* C1 /4 ib */ 9208 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9209 ins_pipe( ialu_reg_long ); 9210 %} 9211 9212 // Shift Left Long by variable 9213 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9214 match(Set dst (LShiftL dst shift)); 9215 effect(KILL cr); 9216 ins_cost(500+200); 9217 size(17); 9218 format %{ "TEST $shift,32\n\t" 9219 "JEQ,s small\n\t" 9220 "MOV $dst.hi,$dst.lo\n\t" 9221 "XOR $dst.lo,$dst.lo\n" 9222 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9223 "SHL $dst.lo,$shift" %} 9224 ins_encode( shift_left_long( dst, shift ) ); 9225 ins_pipe( pipe_slow ); 9226 %} 9227 9228 // Shift Right Long by 1-31 9229 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9230 match(Set dst (URShiftL dst cnt)); 9231 effect(KILL cr); 9232 ins_cost(200); 9233 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9234 "SHR $dst.hi,$cnt" %} 9235 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9236 ins_encode( move_long_small_shift(dst,cnt) ); 9237 ins_pipe( ialu_reg_long ); 9238 %} 9239 9240 // Shift Right Long by 32-63 9241 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9242 match(Set dst (URShiftL dst cnt)); 9243 effect(KILL cr); 9244 ins_cost(300); 9245 format %{ "MOV $dst.lo,$dst.hi\n" 9246 "\tSHR $dst.lo,$cnt-32\n" 9247 "\tXOR $dst.hi,$dst.hi" %} 9248 opcode(0xC1, 0x5); /* C1 /5 ib */ 9249 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9250 ins_pipe( ialu_reg_long ); 9251 %} 9252 9253 // Shift Right Long by variable 9254 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9255 match(Set dst (URShiftL dst shift)); 9256 effect(KILL cr); 9257 ins_cost(600); 9258 size(17); 9259 format %{ "TEST $shift,32\n\t" 9260 "JEQ,s small\n\t" 9261 "MOV $dst.lo,$dst.hi\n\t" 9262 "XOR $dst.hi,$dst.hi\n" 9263 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9264 "SHR $dst.hi,$shift" %} 9265 ins_encode( shift_right_long( dst, shift ) ); 9266 ins_pipe( pipe_slow ); 9267 %} 9268 9269 // Shift Right Long by 1-31 9270 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9271 match(Set dst (RShiftL dst cnt)); 9272 effect(KILL cr); 9273 ins_cost(200); 9274 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9275 "SAR $dst.hi,$cnt" %} 9276 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9277 ins_encode( move_long_small_shift(dst,cnt) ); 9278 ins_pipe( ialu_reg_long ); 9279 %} 9280 9281 // Shift Right Long by 32-63 9282 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9283 match(Set dst (RShiftL dst cnt)); 9284 effect(KILL cr); 9285 ins_cost(300); 9286 format %{ "MOV $dst.lo,$dst.hi\n" 9287 "\tSAR $dst.lo,$cnt-32\n" 9288 "\tSAR $dst.hi,31" %} 9289 opcode(0xC1, 0x7); /* C1 /7 ib */ 9290 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9291 ins_pipe( ialu_reg_long ); 9292 %} 9293 9294 // Shift Right arithmetic Long by variable 9295 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9296 match(Set dst (RShiftL dst shift)); 9297 effect(KILL cr); 9298 ins_cost(600); 9299 size(18); 9300 format %{ "TEST $shift,32\n\t" 9301 "JEQ,s small\n\t" 9302 "MOV $dst.lo,$dst.hi\n\t" 9303 "SAR $dst.hi,31\n" 9304 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9305 "SAR $dst.hi,$shift" %} 9306 ins_encode( shift_right_arith_long( dst, shift ) ); 9307 ins_pipe( pipe_slow ); 9308 %} 9309 9310 9311 //----------Double Instructions------------------------------------------------ 9312 // Double Math 9313 9314 // Compare & branch 9315 9316 // P6 version of float compare, sets condition codes in EFLAGS 9317 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9318 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9319 match(Set cr (CmpD src1 src2)); 9320 effect(KILL rax); 9321 ins_cost(150); 9322 format %{ "FLD $src1\n\t" 9323 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9324 "JNP exit\n\t" 9325 "MOV ah,1 // saw a NaN, set CF\n\t" 9326 "SAHF\n" 9327 "exit:\tNOP // avoid branch to branch" %} 9328 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9329 ins_encode( Push_Reg_DPR(src1), 9330 OpcP, RegOpc(src2), 9331 cmpF_P6_fixup ); 9332 ins_pipe( pipe_slow ); 9333 %} 9334 9335 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9336 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9337 match(Set cr (CmpD src1 src2)); 9338 ins_cost(150); 9339 format %{ "FLD $src1\n\t" 9340 "FUCOMIP ST,$src2 // P6 instruction" %} 9341 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9342 ins_encode( Push_Reg_DPR(src1), 9343 OpcP, RegOpc(src2)); 9344 ins_pipe( pipe_slow ); 9345 %} 9346 9347 // Compare & branch 9348 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9349 predicate(UseSSE<=1); 9350 match(Set cr (CmpD src1 src2)); 9351 effect(KILL rax); 9352 ins_cost(200); 9353 format %{ "FLD $src1\n\t" 9354 "FCOMp $src2\n\t" 9355 "FNSTSW AX\n\t" 9356 "TEST AX,0x400\n\t" 9357 "JZ,s flags\n\t" 9358 "MOV AH,1\t# unordered treat as LT\n" 9359 "flags:\tSAHF" %} 9360 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9361 ins_encode( Push_Reg_DPR(src1), 9362 OpcP, RegOpc(src2), 9363 fpu_flags); 9364 ins_pipe( pipe_slow ); 9365 %} 9366 9367 // Compare vs zero into -1,0,1 9368 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9369 predicate(UseSSE<=1); 9370 match(Set dst (CmpD3 src1 zero)); 9371 effect(KILL cr, KILL rax); 9372 ins_cost(280); 9373 format %{ "FTSTD $dst,$src1" %} 9374 opcode(0xE4, 0xD9); 9375 ins_encode( Push_Reg_DPR(src1), 9376 OpcS, OpcP, PopFPU, 9377 CmpF_Result(dst)); 9378 ins_pipe( pipe_slow ); 9379 %} 9380 9381 // Compare into -1,0,1 9382 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9383 predicate(UseSSE<=1); 9384 match(Set dst (CmpD3 src1 src2)); 9385 effect(KILL cr, KILL rax); 9386 ins_cost(300); 9387 format %{ "FCMPD $dst,$src1,$src2" %} 9388 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9389 ins_encode( Push_Reg_DPR(src1), 9390 OpcP, RegOpc(src2), 9391 CmpF_Result(dst)); 9392 ins_pipe( pipe_slow ); 9393 %} 9394 9395 // float compare and set condition codes in EFLAGS by XMM regs 9396 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9397 predicate(UseSSE>=2); 9398 match(Set cr (CmpD src1 src2)); 9399 ins_cost(145); 9400 format %{ "UCOMISD $src1,$src2\n\t" 9401 "JNP,s exit\n\t" 9402 "PUSHF\t# saw NaN, set CF\n\t" 9403 "AND [rsp], #0xffffff2b\n\t" 9404 "POPF\n" 9405 "exit:" %} 9406 ins_encode %{ 9407 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9408 emit_cmpfp_fixup(masm); 9409 %} 9410 ins_pipe( pipe_slow ); 9411 %} 9412 9413 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9414 predicate(UseSSE>=2); 9415 match(Set cr (CmpD src1 src2)); 9416 ins_cost(100); 9417 format %{ "UCOMISD $src1,$src2" %} 9418 ins_encode %{ 9419 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9420 %} 9421 ins_pipe( pipe_slow ); 9422 %} 9423 9424 // float compare and set condition codes in EFLAGS by XMM regs 9425 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9426 predicate(UseSSE>=2); 9427 match(Set cr (CmpD src1 (LoadD src2))); 9428 ins_cost(145); 9429 format %{ "UCOMISD $src1,$src2\n\t" 9430 "JNP,s exit\n\t" 9431 "PUSHF\t# saw NaN, set CF\n\t" 9432 "AND [rsp], #0xffffff2b\n\t" 9433 "POPF\n" 9434 "exit:" %} 9435 ins_encode %{ 9436 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9437 emit_cmpfp_fixup(masm); 9438 %} 9439 ins_pipe( pipe_slow ); 9440 %} 9441 9442 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9443 predicate(UseSSE>=2); 9444 match(Set cr (CmpD src1 (LoadD src2))); 9445 ins_cost(100); 9446 format %{ "UCOMISD $src1,$src2" %} 9447 ins_encode %{ 9448 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9449 %} 9450 ins_pipe( pipe_slow ); 9451 %} 9452 9453 // Compare into -1,0,1 in XMM 9454 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9455 predicate(UseSSE>=2); 9456 match(Set dst (CmpD3 src1 src2)); 9457 effect(KILL cr); 9458 ins_cost(255); 9459 format %{ "UCOMISD $src1, $src2\n\t" 9460 "MOV $dst, #-1\n\t" 9461 "JP,s done\n\t" 9462 "JB,s done\n\t" 9463 "SETNE $dst\n\t" 9464 "MOVZB $dst, $dst\n" 9465 "done:" %} 9466 ins_encode %{ 9467 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9468 emit_cmpfp3(masm, $dst$$Register); 9469 %} 9470 ins_pipe( pipe_slow ); 9471 %} 9472 9473 // Compare into -1,0,1 in XMM and memory 9474 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9475 predicate(UseSSE>=2); 9476 match(Set dst (CmpD3 src1 (LoadD src2))); 9477 effect(KILL cr); 9478 ins_cost(275); 9479 format %{ "UCOMISD $src1, $src2\n\t" 9480 "MOV $dst, #-1\n\t" 9481 "JP,s done\n\t" 9482 "JB,s done\n\t" 9483 "SETNE $dst\n\t" 9484 "MOVZB $dst, $dst\n" 9485 "done:" %} 9486 ins_encode %{ 9487 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9488 emit_cmpfp3(masm, $dst$$Register); 9489 %} 9490 ins_pipe( pipe_slow ); 9491 %} 9492 9493 9494 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9495 predicate (UseSSE <=1); 9496 match(Set dst (SubD dst src)); 9497 9498 format %{ "FLD $src\n\t" 9499 "DSUBp $dst,ST" %} 9500 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9501 ins_cost(150); 9502 ins_encode( Push_Reg_DPR(src), 9503 OpcP, RegOpc(dst) ); 9504 ins_pipe( fpu_reg_reg ); 9505 %} 9506 9507 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9508 predicate (UseSSE <=1); 9509 match(Set dst (RoundDouble (SubD src1 src2))); 9510 ins_cost(250); 9511 9512 format %{ "FLD $src2\n\t" 9513 "DSUB ST,$src1\n\t" 9514 "FSTP_D $dst\t# D-round" %} 9515 opcode(0xD8, 0x5); 9516 ins_encode( Push_Reg_DPR(src2), 9517 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9518 ins_pipe( fpu_mem_reg_reg ); 9519 %} 9520 9521 9522 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9523 predicate (UseSSE <=1); 9524 match(Set dst (SubD dst (LoadD src))); 9525 ins_cost(150); 9526 9527 format %{ "FLD $src\n\t" 9528 "DSUBp $dst,ST" %} 9529 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9530 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9531 OpcP, RegOpc(dst), ClearInstMark ); 9532 ins_pipe( fpu_reg_mem ); 9533 %} 9534 9535 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9536 predicate (UseSSE<=1); 9537 match(Set dst (AbsD src)); 9538 ins_cost(100); 9539 format %{ "FABS" %} 9540 opcode(0xE1, 0xD9); 9541 ins_encode( OpcS, OpcP ); 9542 ins_pipe( fpu_reg_reg ); 9543 %} 9544 9545 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9546 predicate(UseSSE<=1); 9547 match(Set dst (NegD src)); 9548 ins_cost(100); 9549 format %{ "FCHS" %} 9550 opcode(0xE0, 0xD9); 9551 ins_encode( OpcS, OpcP ); 9552 ins_pipe( fpu_reg_reg ); 9553 %} 9554 9555 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9556 predicate(UseSSE<=1); 9557 match(Set dst (AddD dst src)); 9558 format %{ "FLD $src\n\t" 9559 "DADD $dst,ST" %} 9560 size(4); 9561 ins_cost(150); 9562 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9563 ins_encode( Push_Reg_DPR(src), 9564 OpcP, RegOpc(dst) ); 9565 ins_pipe( fpu_reg_reg ); 9566 %} 9567 9568 9569 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9570 predicate(UseSSE<=1); 9571 match(Set dst (RoundDouble (AddD src1 src2))); 9572 ins_cost(250); 9573 9574 format %{ "FLD $src2\n\t" 9575 "DADD ST,$src1\n\t" 9576 "FSTP_D $dst\t# D-round" %} 9577 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9578 ins_encode( Push_Reg_DPR(src2), 9579 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9580 ins_pipe( fpu_mem_reg_reg ); 9581 %} 9582 9583 9584 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9585 predicate(UseSSE<=1); 9586 match(Set dst (AddD dst (LoadD src))); 9587 ins_cost(150); 9588 9589 format %{ "FLD $src\n\t" 9590 "DADDp $dst,ST" %} 9591 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9592 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9593 OpcP, RegOpc(dst), ClearInstMark ); 9594 ins_pipe( fpu_reg_mem ); 9595 %} 9596 9597 // add-to-memory 9598 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9599 predicate(UseSSE<=1); 9600 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9601 ins_cost(150); 9602 9603 format %{ "FLD_D $dst\n\t" 9604 "DADD ST,$src\n\t" 9605 "FST_D $dst" %} 9606 opcode(0xDD, 0x0); 9607 ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst), 9608 Opcode(0xD8), RegOpc(src), ClearInstMark, 9609 SetInstMark, 9610 Opcode(0xDD), RMopc_Mem(0x03,dst), 9611 ClearInstMark); 9612 ins_pipe( fpu_reg_mem ); 9613 %} 9614 9615 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9616 predicate(UseSSE<=1); 9617 match(Set dst (AddD dst con)); 9618 ins_cost(125); 9619 format %{ "FLD1\n\t" 9620 "DADDp $dst,ST" %} 9621 ins_encode %{ 9622 __ fld1(); 9623 __ faddp($dst$$reg); 9624 %} 9625 ins_pipe(fpu_reg); 9626 %} 9627 9628 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9629 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9630 match(Set dst (AddD dst con)); 9631 ins_cost(200); 9632 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9633 "DADDp $dst,ST" %} 9634 ins_encode %{ 9635 __ fld_d($constantaddress($con)); 9636 __ faddp($dst$$reg); 9637 %} 9638 ins_pipe(fpu_reg_mem); 9639 %} 9640 9641 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9642 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9643 match(Set dst (RoundDouble (AddD src con))); 9644 ins_cost(200); 9645 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9646 "DADD ST,$src\n\t" 9647 "FSTP_D $dst\t# D-round" %} 9648 ins_encode %{ 9649 __ fld_d($constantaddress($con)); 9650 __ fadd($src$$reg); 9651 __ fstp_d(Address(rsp, $dst$$disp)); 9652 %} 9653 ins_pipe(fpu_mem_reg_con); 9654 %} 9655 9656 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9657 predicate(UseSSE<=1); 9658 match(Set dst (MulD dst src)); 9659 format %{ "FLD $src\n\t" 9660 "DMULp $dst,ST" %} 9661 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9662 ins_cost(150); 9663 ins_encode( Push_Reg_DPR(src), 9664 OpcP, RegOpc(dst) ); 9665 ins_pipe( fpu_reg_reg ); 9666 %} 9667 9668 // Strict FP instruction biases argument before multiply then 9669 // biases result to avoid double rounding of subnormals. 9670 // 9671 // scale arg1 by multiplying arg1 by 2^(-15360) 9672 // load arg2 9673 // multiply scaled arg1 by arg2 9674 // rescale product by 2^(15360) 9675 // 9676 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9677 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9678 match(Set dst (MulD dst src)); 9679 ins_cost(1); // Select this instruction for all FP double multiplies 9680 9681 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9682 "DMULp $dst,ST\n\t" 9683 "FLD $src\n\t" 9684 "DMULp $dst,ST\n\t" 9685 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9686 "DMULp $dst,ST\n\t" %} 9687 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9688 ins_encode( strictfp_bias1(dst), 9689 Push_Reg_DPR(src), 9690 OpcP, RegOpc(dst), 9691 strictfp_bias2(dst) ); 9692 ins_pipe( fpu_reg_reg ); 9693 %} 9694 9695 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9696 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9697 match(Set dst (MulD dst con)); 9698 ins_cost(200); 9699 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9700 "DMULp $dst,ST" %} 9701 ins_encode %{ 9702 __ fld_d($constantaddress($con)); 9703 __ fmulp($dst$$reg); 9704 %} 9705 ins_pipe(fpu_reg_mem); 9706 %} 9707 9708 9709 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9710 predicate( UseSSE<=1 ); 9711 match(Set dst (MulD dst (LoadD src))); 9712 ins_cost(200); 9713 format %{ "FLD_D $src\n\t" 9714 "DMULp $dst,ST" %} 9715 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9716 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9717 OpcP, RegOpc(dst), ClearInstMark ); 9718 ins_pipe( fpu_reg_mem ); 9719 %} 9720 9721 // 9722 // Cisc-alternate to reg-reg multiply 9723 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9724 predicate( UseSSE<=1 ); 9725 match(Set dst (MulD src (LoadD mem))); 9726 ins_cost(250); 9727 format %{ "FLD_D $mem\n\t" 9728 "DMUL ST,$src\n\t" 9729 "FSTP_D $dst" %} 9730 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9731 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem), 9732 OpcReg_FPR(src), 9733 Pop_Reg_DPR(dst), ClearInstMark ); 9734 ins_pipe( fpu_reg_reg_mem ); 9735 %} 9736 9737 9738 // MACRO3 -- addDPR a mulDPR 9739 // This instruction is a '2-address' instruction in that the result goes 9740 // back to src2. This eliminates a move from the macro; possibly the 9741 // register allocator will have to add it back (and maybe not). 9742 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9743 predicate( UseSSE<=1 ); 9744 match(Set src2 (AddD (MulD src0 src1) src2)); 9745 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9746 "DMUL ST,$src1\n\t" 9747 "DADDp $src2,ST" %} 9748 ins_cost(250); 9749 opcode(0xDD); /* LoadD DD /0 */ 9750 ins_encode( Push_Reg_FPR(src0), 9751 FMul_ST_reg(src1), 9752 FAddP_reg_ST(src2) ); 9753 ins_pipe( fpu_reg_reg_reg ); 9754 %} 9755 9756 9757 // MACRO3 -- subDPR a mulDPR 9758 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9759 predicate( UseSSE<=1 ); 9760 match(Set src2 (SubD (MulD src0 src1) src2)); 9761 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9762 "DMUL ST,$src1\n\t" 9763 "DSUBRp $src2,ST" %} 9764 ins_cost(250); 9765 ins_encode( Push_Reg_FPR(src0), 9766 FMul_ST_reg(src1), 9767 Opcode(0xDE), Opc_plus(0xE0,src2)); 9768 ins_pipe( fpu_reg_reg_reg ); 9769 %} 9770 9771 9772 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9773 predicate( UseSSE<=1 ); 9774 match(Set dst (DivD dst src)); 9775 9776 format %{ "FLD $src\n\t" 9777 "FDIVp $dst,ST" %} 9778 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9779 ins_cost(150); 9780 ins_encode( Push_Reg_DPR(src), 9781 OpcP, RegOpc(dst) ); 9782 ins_pipe( fpu_reg_reg ); 9783 %} 9784 9785 // Strict FP instruction biases argument before division then 9786 // biases result, to avoid double rounding of subnormals. 9787 // 9788 // scale dividend by multiplying dividend by 2^(-15360) 9789 // load divisor 9790 // divide scaled dividend by divisor 9791 // rescale quotient by 2^(15360) 9792 // 9793 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9794 predicate (UseSSE<=1); 9795 match(Set dst (DivD dst src)); 9796 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9797 ins_cost(01); 9798 9799 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9800 "DMULp $dst,ST\n\t" 9801 "FLD $src\n\t" 9802 "FDIVp $dst,ST\n\t" 9803 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9804 "DMULp $dst,ST\n\t" %} 9805 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9806 ins_encode( strictfp_bias1(dst), 9807 Push_Reg_DPR(src), 9808 OpcP, RegOpc(dst), 9809 strictfp_bias2(dst) ); 9810 ins_pipe( fpu_reg_reg ); 9811 %} 9812 9813 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9814 predicate(UseSSE<=1); 9815 match(Set dst (ModD dst src)); 9816 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9817 9818 format %{ "DMOD $dst,$src" %} 9819 ins_cost(250); 9820 ins_encode(Push_Reg_Mod_DPR(dst, src), 9821 emitModDPR(), 9822 Push_Result_Mod_DPR(src), 9823 Pop_Reg_DPR(dst)); 9824 ins_pipe( pipe_slow ); 9825 %} 9826 9827 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9828 predicate(UseSSE>=2); 9829 match(Set dst (ModD src0 src1)); 9830 effect(KILL rax, KILL cr); 9831 9832 format %{ "SUB ESP,8\t # DMOD\n" 9833 "\tMOVSD [ESP+0],$src1\n" 9834 "\tFLD_D [ESP+0]\n" 9835 "\tMOVSD [ESP+0],$src0\n" 9836 "\tFLD_D [ESP+0]\n" 9837 "loop:\tFPREM\n" 9838 "\tFWAIT\n" 9839 "\tFNSTSW AX\n" 9840 "\tSAHF\n" 9841 "\tJP loop\n" 9842 "\tFSTP_D [ESP+0]\n" 9843 "\tMOVSD $dst,[ESP+0]\n" 9844 "\tADD ESP,8\n" 9845 "\tFSTP ST0\t # Restore FPU Stack" 9846 %} 9847 ins_cost(250); 9848 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9849 ins_pipe( pipe_slow ); 9850 %} 9851 9852 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9853 predicate (UseSSE<=1); 9854 match(Set dst(AtanD dst src)); 9855 format %{ "DATA $dst,$src" %} 9856 opcode(0xD9, 0xF3); 9857 ins_encode( Push_Reg_DPR(src), 9858 OpcP, OpcS, RegOpc(dst) ); 9859 ins_pipe( pipe_slow ); 9860 %} 9861 9862 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9863 predicate (UseSSE>=2); 9864 match(Set dst(AtanD dst src)); 9865 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9866 format %{ "DATA $dst,$src" %} 9867 opcode(0xD9, 0xF3); 9868 ins_encode( Push_SrcD(src), 9869 OpcP, OpcS, Push_ResultD(dst) ); 9870 ins_pipe( pipe_slow ); 9871 %} 9872 9873 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9874 predicate (UseSSE<=1); 9875 match(Set dst (SqrtD src)); 9876 format %{ "DSQRT $dst,$src" %} 9877 opcode(0xFA, 0xD9); 9878 ins_encode( Push_Reg_DPR(src), 9879 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9880 ins_pipe( pipe_slow ); 9881 %} 9882 9883 //-------------Float Instructions------------------------------- 9884 // Float Math 9885 9886 // Code for float compare: 9887 // fcompp(); 9888 // fwait(); fnstsw_ax(); 9889 // sahf(); 9890 // movl(dst, unordered_result); 9891 // jcc(Assembler::parity, exit); 9892 // movl(dst, less_result); 9893 // jcc(Assembler::below, exit); 9894 // movl(dst, equal_result); 9895 // jcc(Assembler::equal, exit); 9896 // movl(dst, greater_result); 9897 // exit: 9898 9899 // P6 version of float compare, sets condition codes in EFLAGS 9900 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9901 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9902 match(Set cr (CmpF src1 src2)); 9903 effect(KILL rax); 9904 ins_cost(150); 9905 format %{ "FLD $src1\n\t" 9906 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9907 "JNP exit\n\t" 9908 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9909 "SAHF\n" 9910 "exit:\tNOP // avoid branch to branch" %} 9911 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9912 ins_encode( Push_Reg_DPR(src1), 9913 OpcP, RegOpc(src2), 9914 cmpF_P6_fixup ); 9915 ins_pipe( pipe_slow ); 9916 %} 9917 9918 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9919 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9920 match(Set cr (CmpF src1 src2)); 9921 ins_cost(100); 9922 format %{ "FLD $src1\n\t" 9923 "FUCOMIP ST,$src2 // P6 instruction" %} 9924 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9925 ins_encode( Push_Reg_DPR(src1), 9926 OpcP, RegOpc(src2)); 9927 ins_pipe( pipe_slow ); 9928 %} 9929 9930 9931 // Compare & branch 9932 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9933 predicate(UseSSE == 0); 9934 match(Set cr (CmpF src1 src2)); 9935 effect(KILL rax); 9936 ins_cost(200); 9937 format %{ "FLD $src1\n\t" 9938 "FCOMp $src2\n\t" 9939 "FNSTSW AX\n\t" 9940 "TEST AX,0x400\n\t" 9941 "JZ,s flags\n\t" 9942 "MOV AH,1\t# unordered treat as LT\n" 9943 "flags:\tSAHF" %} 9944 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9945 ins_encode( Push_Reg_DPR(src1), 9946 OpcP, RegOpc(src2), 9947 fpu_flags); 9948 ins_pipe( pipe_slow ); 9949 %} 9950 9951 // Compare vs zero into -1,0,1 9952 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9953 predicate(UseSSE == 0); 9954 match(Set dst (CmpF3 src1 zero)); 9955 effect(KILL cr, KILL rax); 9956 ins_cost(280); 9957 format %{ "FTSTF $dst,$src1" %} 9958 opcode(0xE4, 0xD9); 9959 ins_encode( Push_Reg_DPR(src1), 9960 OpcS, OpcP, PopFPU, 9961 CmpF_Result(dst)); 9962 ins_pipe( pipe_slow ); 9963 %} 9964 9965 // Compare into -1,0,1 9966 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9967 predicate(UseSSE == 0); 9968 match(Set dst (CmpF3 src1 src2)); 9969 effect(KILL cr, KILL rax); 9970 ins_cost(300); 9971 format %{ "FCMPF $dst,$src1,$src2" %} 9972 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9973 ins_encode( Push_Reg_DPR(src1), 9974 OpcP, RegOpc(src2), 9975 CmpF_Result(dst)); 9976 ins_pipe( pipe_slow ); 9977 %} 9978 9979 // float compare and set condition codes in EFLAGS by XMM regs 9980 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 9981 predicate(UseSSE>=1); 9982 match(Set cr (CmpF src1 src2)); 9983 ins_cost(145); 9984 format %{ "UCOMISS $src1,$src2\n\t" 9985 "JNP,s exit\n\t" 9986 "PUSHF\t# saw NaN, set CF\n\t" 9987 "AND [rsp], #0xffffff2b\n\t" 9988 "POPF\n" 9989 "exit:" %} 9990 ins_encode %{ 9991 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9992 emit_cmpfp_fixup(masm); 9993 %} 9994 ins_pipe( pipe_slow ); 9995 %} 9996 9997 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 9998 predicate(UseSSE>=1); 9999 match(Set cr (CmpF src1 src2)); 10000 ins_cost(100); 10001 format %{ "UCOMISS $src1,$src2" %} 10002 ins_encode %{ 10003 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10004 %} 10005 ins_pipe( pipe_slow ); 10006 %} 10007 10008 // float compare and set condition codes in EFLAGS by XMM regs 10009 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10010 predicate(UseSSE>=1); 10011 match(Set cr (CmpF src1 (LoadF src2))); 10012 ins_cost(165); 10013 format %{ "UCOMISS $src1,$src2\n\t" 10014 "JNP,s exit\n\t" 10015 "PUSHF\t# saw NaN, set CF\n\t" 10016 "AND [rsp], #0xffffff2b\n\t" 10017 "POPF\n" 10018 "exit:" %} 10019 ins_encode %{ 10020 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10021 emit_cmpfp_fixup(masm); 10022 %} 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10027 predicate(UseSSE>=1); 10028 match(Set cr (CmpF src1 (LoadF src2))); 10029 ins_cost(100); 10030 format %{ "UCOMISS $src1,$src2" %} 10031 ins_encode %{ 10032 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10033 %} 10034 ins_pipe( pipe_slow ); 10035 %} 10036 10037 // Compare into -1,0,1 in XMM 10038 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10039 predicate(UseSSE>=1); 10040 match(Set dst (CmpF3 src1 src2)); 10041 effect(KILL cr); 10042 ins_cost(255); 10043 format %{ "UCOMISS $src1, $src2\n\t" 10044 "MOV $dst, #-1\n\t" 10045 "JP,s done\n\t" 10046 "JB,s done\n\t" 10047 "SETNE $dst\n\t" 10048 "MOVZB $dst, $dst\n" 10049 "done:" %} 10050 ins_encode %{ 10051 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10052 emit_cmpfp3(masm, $dst$$Register); 10053 %} 10054 ins_pipe( pipe_slow ); 10055 %} 10056 10057 // Compare into -1,0,1 in XMM and memory 10058 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10059 predicate(UseSSE>=1); 10060 match(Set dst (CmpF3 src1 (LoadF src2))); 10061 effect(KILL cr); 10062 ins_cost(275); 10063 format %{ "UCOMISS $src1, $src2\n\t" 10064 "MOV $dst, #-1\n\t" 10065 "JP,s done\n\t" 10066 "JB,s done\n\t" 10067 "SETNE $dst\n\t" 10068 "MOVZB $dst, $dst\n" 10069 "done:" %} 10070 ins_encode %{ 10071 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10072 emit_cmpfp3(masm, $dst$$Register); 10073 %} 10074 ins_pipe( pipe_slow ); 10075 %} 10076 10077 // Spill to obtain 24-bit precision 10078 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10079 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10080 match(Set dst (SubF src1 src2)); 10081 10082 format %{ "FSUB $dst,$src1 - $src2" %} 10083 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10084 ins_encode( Push_Reg_FPR(src1), 10085 OpcReg_FPR(src2), 10086 Pop_Mem_FPR(dst) ); 10087 ins_pipe( fpu_mem_reg_reg ); 10088 %} 10089 // 10090 // This instruction does not round to 24-bits 10091 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10092 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10093 match(Set dst (SubF dst src)); 10094 10095 format %{ "FSUB $dst,$src" %} 10096 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10097 ins_encode( Push_Reg_FPR(src), 10098 OpcP, RegOpc(dst) ); 10099 ins_pipe( fpu_reg_reg ); 10100 %} 10101 10102 // Spill to obtain 24-bit precision 10103 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10104 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10105 match(Set dst (AddF src1 src2)); 10106 10107 format %{ "FADD $dst,$src1,$src2" %} 10108 opcode(0xD8, 0x0); /* D8 C0+i */ 10109 ins_encode( Push_Reg_FPR(src2), 10110 OpcReg_FPR(src1), 10111 Pop_Mem_FPR(dst) ); 10112 ins_pipe( fpu_mem_reg_reg ); 10113 %} 10114 // 10115 // This instruction does not round to 24-bits 10116 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10117 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10118 match(Set dst (AddF dst src)); 10119 10120 format %{ "FLD $src\n\t" 10121 "FADDp $dst,ST" %} 10122 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10123 ins_encode( Push_Reg_FPR(src), 10124 OpcP, RegOpc(dst) ); 10125 ins_pipe( fpu_reg_reg ); 10126 %} 10127 10128 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10129 predicate(UseSSE==0); 10130 match(Set dst (AbsF src)); 10131 ins_cost(100); 10132 format %{ "FABS" %} 10133 opcode(0xE1, 0xD9); 10134 ins_encode( OpcS, OpcP ); 10135 ins_pipe( fpu_reg_reg ); 10136 %} 10137 10138 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10139 predicate(UseSSE==0); 10140 match(Set dst (NegF src)); 10141 ins_cost(100); 10142 format %{ "FCHS" %} 10143 opcode(0xE0, 0xD9); 10144 ins_encode( OpcS, OpcP ); 10145 ins_pipe( fpu_reg_reg ); 10146 %} 10147 10148 // Cisc-alternate to addFPR_reg 10149 // Spill to obtain 24-bit precision 10150 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10151 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10152 match(Set dst (AddF src1 (LoadF src2))); 10153 10154 format %{ "FLD $src2\n\t" 10155 "FADD ST,$src1\n\t" 10156 "FSTP_S $dst" %} 10157 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10158 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10159 OpcReg_FPR(src1), 10160 Pop_Mem_FPR(dst), ClearInstMark ); 10161 ins_pipe( fpu_mem_reg_mem ); 10162 %} 10163 // 10164 // Cisc-alternate to addFPR_reg 10165 // This instruction does not round to 24-bits 10166 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10167 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10168 match(Set dst (AddF dst (LoadF src))); 10169 10170 format %{ "FADD $dst,$src" %} 10171 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10172 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 10173 OpcP, RegOpc(dst), ClearInstMark ); 10174 ins_pipe( fpu_reg_mem ); 10175 %} 10176 10177 // // Following two instructions for _222_mpegaudio 10178 // Spill to obtain 24-bit precision 10179 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10180 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10181 match(Set dst (AddF src1 src2)); 10182 10183 format %{ "FADD $dst,$src1,$src2" %} 10184 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10185 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1), 10186 OpcReg_FPR(src2), 10187 Pop_Mem_FPR(dst), ClearInstMark ); 10188 ins_pipe( fpu_mem_reg_mem ); 10189 %} 10190 10191 // Cisc-spill variant 10192 // Spill to obtain 24-bit precision 10193 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10194 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10195 match(Set dst (AddF src1 (LoadF src2))); 10196 10197 format %{ "FADD $dst,$src1,$src2 cisc" %} 10198 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10199 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10200 OpcP, RMopc_Mem(secondary,src1), 10201 Pop_Mem_FPR(dst), 10202 ClearInstMark); 10203 ins_pipe( fpu_mem_mem_mem ); 10204 %} 10205 10206 // Spill to obtain 24-bit precision 10207 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10208 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10209 match(Set dst (AddF src1 src2)); 10210 10211 format %{ "FADD $dst,$src1,$src2" %} 10212 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10213 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10214 OpcP, RMopc_Mem(secondary,src1), 10215 Pop_Mem_FPR(dst), 10216 ClearInstMark); 10217 ins_pipe( fpu_mem_mem_mem ); 10218 %} 10219 10220 10221 // Spill to obtain 24-bit precision 10222 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10223 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10224 match(Set dst (AddF src con)); 10225 format %{ "FLD $src\n\t" 10226 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10227 "FSTP_S $dst" %} 10228 ins_encode %{ 10229 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10230 __ fadd_s($constantaddress($con)); 10231 __ fstp_s(Address(rsp, $dst$$disp)); 10232 %} 10233 ins_pipe(fpu_mem_reg_con); 10234 %} 10235 // 10236 // This instruction does not round to 24-bits 10237 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10238 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10239 match(Set dst (AddF src con)); 10240 format %{ "FLD $src\n\t" 10241 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10242 "FSTP $dst" %} 10243 ins_encode %{ 10244 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10245 __ fadd_s($constantaddress($con)); 10246 __ fstp_d($dst$$reg); 10247 %} 10248 ins_pipe(fpu_reg_reg_con); 10249 %} 10250 10251 // Spill to obtain 24-bit precision 10252 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10253 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10254 match(Set dst (MulF src1 src2)); 10255 10256 format %{ "FLD $src1\n\t" 10257 "FMUL $src2\n\t" 10258 "FSTP_S $dst" %} 10259 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10260 ins_encode( Push_Reg_FPR(src1), 10261 OpcReg_FPR(src2), 10262 Pop_Mem_FPR(dst) ); 10263 ins_pipe( fpu_mem_reg_reg ); 10264 %} 10265 // 10266 // This instruction does not round to 24-bits 10267 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10268 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10269 match(Set dst (MulF src1 src2)); 10270 10271 format %{ "FLD $src1\n\t" 10272 "FMUL $src2\n\t" 10273 "FSTP_S $dst" %} 10274 opcode(0xD8, 0x1); /* D8 C8+i */ 10275 ins_encode( Push_Reg_FPR(src2), 10276 OpcReg_FPR(src1), 10277 Pop_Reg_FPR(dst) ); 10278 ins_pipe( fpu_reg_reg_reg ); 10279 %} 10280 10281 10282 // Spill to obtain 24-bit precision 10283 // Cisc-alternate to reg-reg multiply 10284 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10285 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10286 match(Set dst (MulF src1 (LoadF src2))); 10287 10288 format %{ "FLD_S $src2\n\t" 10289 "FMUL $src1\n\t" 10290 "FSTP_S $dst" %} 10291 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10292 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10293 OpcReg_FPR(src1), 10294 Pop_Mem_FPR(dst), ClearInstMark ); 10295 ins_pipe( fpu_mem_reg_mem ); 10296 %} 10297 // 10298 // This instruction does not round to 24-bits 10299 // Cisc-alternate to reg-reg multiply 10300 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10301 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10302 match(Set dst (MulF src1 (LoadF src2))); 10303 10304 format %{ "FMUL $dst,$src1,$src2" %} 10305 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10306 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10307 OpcReg_FPR(src1), 10308 Pop_Reg_FPR(dst), ClearInstMark ); 10309 ins_pipe( fpu_reg_reg_mem ); 10310 %} 10311 10312 // Spill to obtain 24-bit precision 10313 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10314 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10315 match(Set dst (MulF src1 src2)); 10316 10317 format %{ "FMUL $dst,$src1,$src2" %} 10318 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10319 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10320 OpcP, RMopc_Mem(secondary,src1), 10321 Pop_Mem_FPR(dst), 10322 ClearInstMark ); 10323 ins_pipe( fpu_mem_mem_mem ); 10324 %} 10325 10326 // Spill to obtain 24-bit precision 10327 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10328 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10329 match(Set dst (MulF src con)); 10330 10331 format %{ "FLD $src\n\t" 10332 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10333 "FSTP_S $dst" %} 10334 ins_encode %{ 10335 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10336 __ fmul_s($constantaddress($con)); 10337 __ fstp_s(Address(rsp, $dst$$disp)); 10338 %} 10339 ins_pipe(fpu_mem_reg_con); 10340 %} 10341 // 10342 // This instruction does not round to 24-bits 10343 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10344 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10345 match(Set dst (MulF src con)); 10346 10347 format %{ "FLD $src\n\t" 10348 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10349 "FSTP $dst" %} 10350 ins_encode %{ 10351 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10352 __ fmul_s($constantaddress($con)); 10353 __ fstp_d($dst$$reg); 10354 %} 10355 ins_pipe(fpu_reg_reg_con); 10356 %} 10357 10358 10359 // 10360 // MACRO1 -- subsume unshared load into mulFPR 10361 // This instruction does not round to 24-bits 10362 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10363 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10364 match(Set dst (MulF (LoadF mem1) src)); 10365 10366 format %{ "FLD $mem1 ===MACRO1===\n\t" 10367 "FMUL ST,$src\n\t" 10368 "FSTP $dst" %} 10369 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10370 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1), 10371 OpcReg_FPR(src), 10372 Pop_Reg_FPR(dst), ClearInstMark ); 10373 ins_pipe( fpu_reg_reg_mem ); 10374 %} 10375 // 10376 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10377 // This instruction does not round to 24-bits 10378 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10379 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10380 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10381 ins_cost(95); 10382 10383 format %{ "FLD $mem1 ===MACRO2===\n\t" 10384 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10385 "FADD ST,$src2\n\t" 10386 "FSTP $dst" %} 10387 opcode(0xD9); /* LoadF D9 /0 */ 10388 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1), 10389 FMul_ST_reg(src1), 10390 FAdd_ST_reg(src2), 10391 Pop_Reg_FPR(dst), ClearInstMark ); 10392 ins_pipe( fpu_reg_mem_reg_reg ); 10393 %} 10394 10395 // MACRO3 -- addFPR a mulFPR 10396 // This instruction does not round to 24-bits. It is a '2-address' 10397 // instruction in that the result goes back to src2. This eliminates 10398 // a move from the macro; possibly the register allocator will have 10399 // to add it back (and maybe not). 10400 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10401 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10402 match(Set src2 (AddF (MulF src0 src1) src2)); 10403 10404 format %{ "FLD $src0 ===MACRO3===\n\t" 10405 "FMUL ST,$src1\n\t" 10406 "FADDP $src2,ST" %} 10407 opcode(0xD9); /* LoadF D9 /0 */ 10408 ins_encode( Push_Reg_FPR(src0), 10409 FMul_ST_reg(src1), 10410 FAddP_reg_ST(src2) ); 10411 ins_pipe( fpu_reg_reg_reg ); 10412 %} 10413 10414 // MACRO4 -- divFPR subFPR 10415 // This instruction does not round to 24-bits 10416 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10417 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10418 match(Set dst (DivF (SubF src2 src1) src3)); 10419 10420 format %{ "FLD $src2 ===MACRO4===\n\t" 10421 "FSUB ST,$src1\n\t" 10422 "FDIV ST,$src3\n\t" 10423 "FSTP $dst" %} 10424 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10425 ins_encode( Push_Reg_FPR(src2), 10426 subFPR_divFPR_encode(src1,src3), 10427 Pop_Reg_FPR(dst) ); 10428 ins_pipe( fpu_reg_reg_reg_reg ); 10429 %} 10430 10431 // Spill to obtain 24-bit precision 10432 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10433 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10434 match(Set dst (DivF src1 src2)); 10435 10436 format %{ "FDIV $dst,$src1,$src2" %} 10437 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10438 ins_encode( Push_Reg_FPR(src1), 10439 OpcReg_FPR(src2), 10440 Pop_Mem_FPR(dst) ); 10441 ins_pipe( fpu_mem_reg_reg ); 10442 %} 10443 // 10444 // This instruction does not round to 24-bits 10445 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10446 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10447 match(Set dst (DivF dst src)); 10448 10449 format %{ "FDIV $dst,$src" %} 10450 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10451 ins_encode( Push_Reg_FPR(src), 10452 OpcP, RegOpc(dst) ); 10453 ins_pipe( fpu_reg_reg ); 10454 %} 10455 10456 10457 // Spill to obtain 24-bit precision 10458 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10459 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10460 match(Set dst (ModF src1 src2)); 10461 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10462 10463 format %{ "FMOD $dst,$src1,$src2" %} 10464 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10465 emitModDPR(), 10466 Push_Result_Mod_DPR(src2), 10467 Pop_Mem_FPR(dst)); 10468 ins_pipe( pipe_slow ); 10469 %} 10470 // 10471 // This instruction does not round to 24-bits 10472 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10473 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10474 match(Set dst (ModF dst src)); 10475 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10476 10477 format %{ "FMOD $dst,$src" %} 10478 ins_encode(Push_Reg_Mod_DPR(dst, src), 10479 emitModDPR(), 10480 Push_Result_Mod_DPR(src), 10481 Pop_Reg_FPR(dst)); 10482 ins_pipe( pipe_slow ); 10483 %} 10484 10485 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10486 predicate(UseSSE>=1); 10487 match(Set dst (ModF src0 src1)); 10488 effect(KILL rax, KILL cr); 10489 format %{ "SUB ESP,4\t # FMOD\n" 10490 "\tMOVSS [ESP+0],$src1\n" 10491 "\tFLD_S [ESP+0]\n" 10492 "\tMOVSS [ESP+0],$src0\n" 10493 "\tFLD_S [ESP+0]\n" 10494 "loop:\tFPREM\n" 10495 "\tFWAIT\n" 10496 "\tFNSTSW AX\n" 10497 "\tSAHF\n" 10498 "\tJP loop\n" 10499 "\tFSTP_S [ESP+0]\n" 10500 "\tMOVSS $dst,[ESP+0]\n" 10501 "\tADD ESP,4\n" 10502 "\tFSTP ST0\t # Restore FPU Stack" 10503 %} 10504 ins_cost(250); 10505 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10506 ins_pipe( pipe_slow ); 10507 %} 10508 10509 10510 //----------Arithmetic Conversion Instructions--------------------------------- 10511 // The conversions operations are all Alpha sorted. Please keep it that way! 10512 10513 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10514 predicate(UseSSE==0); 10515 match(Set dst (RoundFloat src)); 10516 ins_cost(125); 10517 format %{ "FST_S $dst,$src\t# F-round" %} 10518 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10519 ins_pipe( fpu_mem_reg ); 10520 %} 10521 10522 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10523 predicate(UseSSE<=1); 10524 match(Set dst (RoundDouble src)); 10525 ins_cost(125); 10526 format %{ "FST_D $dst,$src\t# D-round" %} 10527 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10528 ins_pipe( fpu_mem_reg ); 10529 %} 10530 10531 // Force rounding to 24-bit precision and 6-bit exponent 10532 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10533 predicate(UseSSE==0); 10534 match(Set dst (ConvD2F src)); 10535 format %{ "FST_S $dst,$src\t# F-round" %} 10536 expand %{ 10537 roundFloat_mem_reg(dst,src); 10538 %} 10539 %} 10540 10541 // Force rounding to 24-bit precision and 6-bit exponent 10542 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10543 predicate(UseSSE==1); 10544 match(Set dst (ConvD2F src)); 10545 effect( KILL cr ); 10546 format %{ "SUB ESP,4\n\t" 10547 "FST_S [ESP],$src\t# F-round\n\t" 10548 "MOVSS $dst,[ESP]\n\t" 10549 "ADD ESP,4" %} 10550 ins_encode %{ 10551 __ subptr(rsp, 4); 10552 if ($src$$reg != FPR1L_enc) { 10553 __ fld_s($src$$reg-1); 10554 __ fstp_s(Address(rsp, 0)); 10555 } else { 10556 __ fst_s(Address(rsp, 0)); 10557 } 10558 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10559 __ addptr(rsp, 4); 10560 %} 10561 ins_pipe( pipe_slow ); 10562 %} 10563 10564 // Force rounding double precision to single precision 10565 instruct convD2F_reg(regF dst, regD src) %{ 10566 predicate(UseSSE>=2); 10567 match(Set dst (ConvD2F src)); 10568 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10569 ins_encode %{ 10570 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10571 %} 10572 ins_pipe( pipe_slow ); 10573 %} 10574 10575 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10576 predicate(UseSSE==0); 10577 match(Set dst (ConvF2D src)); 10578 format %{ "FST_S $dst,$src\t# D-round" %} 10579 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10580 ins_pipe( fpu_reg_reg ); 10581 %} 10582 10583 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10584 predicate(UseSSE==1); 10585 match(Set dst (ConvF2D src)); 10586 format %{ "FST_D $dst,$src\t# D-round" %} 10587 expand %{ 10588 roundDouble_mem_reg(dst,src); 10589 %} 10590 %} 10591 10592 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10593 predicate(UseSSE==1); 10594 match(Set dst (ConvF2D src)); 10595 effect( KILL cr ); 10596 format %{ "SUB ESP,4\n\t" 10597 "MOVSS [ESP] $src\n\t" 10598 "FLD_S [ESP]\n\t" 10599 "ADD ESP,4\n\t" 10600 "FSTP $dst\t# D-round" %} 10601 ins_encode %{ 10602 __ subptr(rsp, 4); 10603 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10604 __ fld_s(Address(rsp, 0)); 10605 __ addptr(rsp, 4); 10606 __ fstp_d($dst$$reg); 10607 %} 10608 ins_pipe( pipe_slow ); 10609 %} 10610 10611 instruct convF2D_reg(regD dst, regF src) %{ 10612 predicate(UseSSE>=2); 10613 match(Set dst (ConvF2D src)); 10614 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10615 ins_encode %{ 10616 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10617 %} 10618 ins_pipe( pipe_slow ); 10619 %} 10620 10621 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10622 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10623 predicate(UseSSE<=1); 10624 match(Set dst (ConvD2I src)); 10625 effect( KILL tmp, KILL cr ); 10626 format %{ "FLD $src\t# Convert double to int \n\t" 10627 "FLDCW trunc mode\n\t" 10628 "SUB ESP,4\n\t" 10629 "FISTp [ESP + #0]\n\t" 10630 "FLDCW std/24-bit mode\n\t" 10631 "POP EAX\n\t" 10632 "CMP EAX,0x80000000\n\t" 10633 "JNE,s fast\n\t" 10634 "FLD_D $src\n\t" 10635 "CALL d2i_wrapper\n" 10636 "fast:" %} 10637 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10638 ins_pipe( pipe_slow ); 10639 %} 10640 10641 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10642 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10643 predicate(UseSSE>=2); 10644 match(Set dst (ConvD2I src)); 10645 effect( KILL tmp, KILL cr ); 10646 format %{ "CVTTSD2SI $dst, $src\n\t" 10647 "CMP $dst,0x80000000\n\t" 10648 "JNE,s fast\n\t" 10649 "SUB ESP, 8\n\t" 10650 "MOVSD [ESP], $src\n\t" 10651 "FLD_D [ESP]\n\t" 10652 "ADD ESP, 8\n\t" 10653 "CALL d2i_wrapper\n" 10654 "fast:" %} 10655 ins_encode %{ 10656 Label fast; 10657 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10658 __ cmpl($dst$$Register, 0x80000000); 10659 __ jccb(Assembler::notEqual, fast); 10660 __ subptr(rsp, 8); 10661 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10662 __ fld_d(Address(rsp, 0)); 10663 __ addptr(rsp, 8); 10664 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10665 __ post_call_nop(); 10666 __ bind(fast); 10667 %} 10668 ins_pipe( pipe_slow ); 10669 %} 10670 10671 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10672 predicate(UseSSE<=1); 10673 match(Set dst (ConvD2L src)); 10674 effect( KILL cr ); 10675 format %{ "FLD $src\t# Convert double to long\n\t" 10676 "FLDCW trunc mode\n\t" 10677 "SUB ESP,8\n\t" 10678 "FISTp [ESP + #0]\n\t" 10679 "FLDCW std/24-bit mode\n\t" 10680 "POP EAX\n\t" 10681 "POP EDX\n\t" 10682 "CMP EDX,0x80000000\n\t" 10683 "JNE,s fast\n\t" 10684 "TEST EAX,EAX\n\t" 10685 "JNE,s fast\n\t" 10686 "FLD $src\n\t" 10687 "CALL d2l_wrapper\n" 10688 "fast:" %} 10689 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10690 ins_pipe( pipe_slow ); 10691 %} 10692 10693 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10694 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10695 predicate (UseSSE>=2); 10696 match(Set dst (ConvD2L src)); 10697 effect( KILL cr ); 10698 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10699 "MOVSD [ESP],$src\n\t" 10700 "FLD_D [ESP]\n\t" 10701 "FLDCW trunc mode\n\t" 10702 "FISTp [ESP + #0]\n\t" 10703 "FLDCW std/24-bit mode\n\t" 10704 "POP EAX\n\t" 10705 "POP EDX\n\t" 10706 "CMP EDX,0x80000000\n\t" 10707 "JNE,s fast\n\t" 10708 "TEST EAX,EAX\n\t" 10709 "JNE,s fast\n\t" 10710 "SUB ESP,8\n\t" 10711 "MOVSD [ESP],$src\n\t" 10712 "FLD_D [ESP]\n\t" 10713 "ADD ESP,8\n\t" 10714 "CALL d2l_wrapper\n" 10715 "fast:" %} 10716 ins_encode %{ 10717 Label fast; 10718 __ subptr(rsp, 8); 10719 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10720 __ fld_d(Address(rsp, 0)); 10721 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10722 __ fistp_d(Address(rsp, 0)); 10723 // Restore the rounding mode, mask the exception 10724 if (Compile::current()->in_24_bit_fp_mode()) { 10725 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10726 } else { 10727 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10728 } 10729 // Load the converted long, adjust CPU stack 10730 __ pop(rax); 10731 __ pop(rdx); 10732 __ cmpl(rdx, 0x80000000); 10733 __ jccb(Assembler::notEqual, fast); 10734 __ testl(rax, rax); 10735 __ jccb(Assembler::notEqual, fast); 10736 __ subptr(rsp, 8); 10737 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10738 __ fld_d(Address(rsp, 0)); 10739 __ addptr(rsp, 8); 10740 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10741 __ post_call_nop(); 10742 __ bind(fast); 10743 %} 10744 ins_pipe( pipe_slow ); 10745 %} 10746 10747 // Convert a double to an int. Java semantics require we do complex 10748 // manglations in the corner cases. So we set the rounding mode to 10749 // 'zero', store the darned double down as an int, and reset the 10750 // rounding mode to 'nearest'. The hardware stores a flag value down 10751 // if we would overflow or converted a NAN; we check for this and 10752 // and go the slow path if needed. 10753 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10754 predicate(UseSSE==0); 10755 match(Set dst (ConvF2I src)); 10756 effect( KILL tmp, KILL cr ); 10757 format %{ "FLD $src\t# Convert float to int \n\t" 10758 "FLDCW trunc mode\n\t" 10759 "SUB ESP,4\n\t" 10760 "FISTp [ESP + #0]\n\t" 10761 "FLDCW std/24-bit mode\n\t" 10762 "POP EAX\n\t" 10763 "CMP EAX,0x80000000\n\t" 10764 "JNE,s fast\n\t" 10765 "FLD $src\n\t" 10766 "CALL d2i_wrapper\n" 10767 "fast:" %} 10768 // DPR2I_encoding works for FPR2I 10769 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10770 ins_pipe( pipe_slow ); 10771 %} 10772 10773 // Convert a float in xmm to an int reg. 10774 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10775 predicate(UseSSE>=1); 10776 match(Set dst (ConvF2I src)); 10777 effect( KILL tmp, KILL cr ); 10778 format %{ "CVTTSS2SI $dst, $src\n\t" 10779 "CMP $dst,0x80000000\n\t" 10780 "JNE,s fast\n\t" 10781 "SUB ESP, 4\n\t" 10782 "MOVSS [ESP], $src\n\t" 10783 "FLD [ESP]\n\t" 10784 "ADD ESP, 4\n\t" 10785 "CALL d2i_wrapper\n" 10786 "fast:" %} 10787 ins_encode %{ 10788 Label fast; 10789 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10790 __ cmpl($dst$$Register, 0x80000000); 10791 __ jccb(Assembler::notEqual, fast); 10792 __ subptr(rsp, 4); 10793 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10794 __ fld_s(Address(rsp, 0)); 10795 __ addptr(rsp, 4); 10796 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10797 __ post_call_nop(); 10798 __ bind(fast); 10799 %} 10800 ins_pipe( pipe_slow ); 10801 %} 10802 10803 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10804 predicate(UseSSE==0); 10805 match(Set dst (ConvF2L src)); 10806 effect( KILL cr ); 10807 format %{ "FLD $src\t# Convert float to long\n\t" 10808 "FLDCW trunc mode\n\t" 10809 "SUB ESP,8\n\t" 10810 "FISTp [ESP + #0]\n\t" 10811 "FLDCW std/24-bit mode\n\t" 10812 "POP EAX\n\t" 10813 "POP EDX\n\t" 10814 "CMP EDX,0x80000000\n\t" 10815 "JNE,s fast\n\t" 10816 "TEST EAX,EAX\n\t" 10817 "JNE,s fast\n\t" 10818 "FLD $src\n\t" 10819 "CALL d2l_wrapper\n" 10820 "fast:" %} 10821 // DPR2L_encoding works for FPR2L 10822 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10823 ins_pipe( pipe_slow ); 10824 %} 10825 10826 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10827 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10828 predicate (UseSSE>=1); 10829 match(Set dst (ConvF2L src)); 10830 effect( KILL cr ); 10831 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10832 "MOVSS [ESP],$src\n\t" 10833 "FLD_S [ESP]\n\t" 10834 "FLDCW trunc mode\n\t" 10835 "FISTp [ESP + #0]\n\t" 10836 "FLDCW std/24-bit mode\n\t" 10837 "POP EAX\n\t" 10838 "POP EDX\n\t" 10839 "CMP EDX,0x80000000\n\t" 10840 "JNE,s fast\n\t" 10841 "TEST EAX,EAX\n\t" 10842 "JNE,s fast\n\t" 10843 "SUB ESP,4\t# Convert float to long\n\t" 10844 "MOVSS [ESP],$src\n\t" 10845 "FLD_S [ESP]\n\t" 10846 "ADD ESP,4\n\t" 10847 "CALL d2l_wrapper\n" 10848 "fast:" %} 10849 ins_encode %{ 10850 Label fast; 10851 __ subptr(rsp, 8); 10852 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10853 __ fld_s(Address(rsp, 0)); 10854 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10855 __ fistp_d(Address(rsp, 0)); 10856 // Restore the rounding mode, mask the exception 10857 if (Compile::current()->in_24_bit_fp_mode()) { 10858 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10859 } else { 10860 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10861 } 10862 // Load the converted long, adjust CPU stack 10863 __ pop(rax); 10864 __ pop(rdx); 10865 __ cmpl(rdx, 0x80000000); 10866 __ jccb(Assembler::notEqual, fast); 10867 __ testl(rax, rax); 10868 __ jccb(Assembler::notEqual, fast); 10869 __ subptr(rsp, 4); 10870 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10871 __ fld_s(Address(rsp, 0)); 10872 __ addptr(rsp, 4); 10873 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10874 __ post_call_nop(); 10875 __ bind(fast); 10876 %} 10877 ins_pipe( pipe_slow ); 10878 %} 10879 10880 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10881 predicate( UseSSE<=1 ); 10882 match(Set dst (ConvI2D src)); 10883 format %{ "FILD $src\n\t" 10884 "FSTP $dst" %} 10885 opcode(0xDB, 0x0); /* DB /0 */ 10886 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10887 ins_pipe( fpu_reg_mem ); 10888 %} 10889 10890 instruct convI2D_reg(regD dst, rRegI src) %{ 10891 predicate( UseSSE>=2 && !UseXmmI2D ); 10892 match(Set dst (ConvI2D src)); 10893 format %{ "CVTSI2SD $dst,$src" %} 10894 ins_encode %{ 10895 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10896 %} 10897 ins_pipe( pipe_slow ); 10898 %} 10899 10900 instruct convI2D_mem(regD dst, memory mem) %{ 10901 predicate( UseSSE>=2 ); 10902 match(Set dst (ConvI2D (LoadI mem))); 10903 format %{ "CVTSI2SD $dst,$mem" %} 10904 ins_encode %{ 10905 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10906 %} 10907 ins_pipe( pipe_slow ); 10908 %} 10909 10910 instruct convXI2D_reg(regD dst, rRegI src) 10911 %{ 10912 predicate( UseSSE>=2 && UseXmmI2D ); 10913 match(Set dst (ConvI2D src)); 10914 10915 format %{ "MOVD $dst,$src\n\t" 10916 "CVTDQ2PD $dst,$dst\t# i2d" %} 10917 ins_encode %{ 10918 __ movdl($dst$$XMMRegister, $src$$Register); 10919 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10920 %} 10921 ins_pipe(pipe_slow); // XXX 10922 %} 10923 10924 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10925 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10926 match(Set dst (ConvI2D (LoadI mem))); 10927 format %{ "FILD $mem\n\t" 10928 "FSTP $dst" %} 10929 opcode(0xDB); /* DB /0 */ 10930 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10931 Pop_Reg_DPR(dst), ClearInstMark); 10932 ins_pipe( fpu_reg_mem ); 10933 %} 10934 10935 // Convert a byte to a float; no rounding step needed. 10936 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10937 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10938 match(Set dst (ConvI2F src)); 10939 format %{ "FILD $src\n\t" 10940 "FSTP $dst" %} 10941 10942 opcode(0xDB, 0x0); /* DB /0 */ 10943 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10944 ins_pipe( fpu_reg_mem ); 10945 %} 10946 10947 // In 24-bit mode, force exponent rounding by storing back out 10948 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10949 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10950 match(Set dst (ConvI2F src)); 10951 ins_cost(200); 10952 format %{ "FILD $src\n\t" 10953 "FSTP_S $dst" %} 10954 opcode(0xDB, 0x0); /* DB /0 */ 10955 ins_encode( Push_Mem_I(src), 10956 Pop_Mem_FPR(dst)); 10957 ins_pipe( fpu_mem_mem ); 10958 %} 10959 10960 // In 24-bit mode, force exponent rounding by storing back out 10961 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10962 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10963 match(Set dst (ConvI2F (LoadI mem))); 10964 ins_cost(200); 10965 format %{ "FILD $mem\n\t" 10966 "FSTP_S $dst" %} 10967 opcode(0xDB); /* DB /0 */ 10968 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10969 Pop_Mem_FPR(dst), ClearInstMark); 10970 ins_pipe( fpu_mem_mem ); 10971 %} 10972 10973 // This instruction does not round to 24-bits 10974 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 10975 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10976 match(Set dst (ConvI2F src)); 10977 format %{ "FILD $src\n\t" 10978 "FSTP $dst" %} 10979 opcode(0xDB, 0x0); /* DB /0 */ 10980 ins_encode( Push_Mem_I(src), 10981 Pop_Reg_FPR(dst)); 10982 ins_pipe( fpu_reg_mem ); 10983 %} 10984 10985 // This instruction does not round to 24-bits 10986 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 10987 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10988 match(Set dst (ConvI2F (LoadI mem))); 10989 format %{ "FILD $mem\n\t" 10990 "FSTP $dst" %} 10991 opcode(0xDB); /* DB /0 */ 10992 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10993 Pop_Reg_FPR(dst), ClearInstMark); 10994 ins_pipe( fpu_reg_mem ); 10995 %} 10996 10997 // Convert an int to a float in xmm; no rounding step needed. 10998 instruct convI2F_reg(regF dst, rRegI src) %{ 10999 predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F )); 11000 match(Set dst (ConvI2F src)); 11001 format %{ "CVTSI2SS $dst, $src" %} 11002 ins_encode %{ 11003 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11004 %} 11005 ins_pipe( pipe_slow ); 11006 %} 11007 11008 instruct convXI2F_reg(regF dst, rRegI src) 11009 %{ 11010 predicate( UseSSE>=2 && UseXmmI2F ); 11011 match(Set dst (ConvI2F src)); 11012 11013 format %{ "MOVD $dst,$src\n\t" 11014 "CVTDQ2PS $dst,$dst\t# i2f" %} 11015 ins_encode %{ 11016 __ movdl($dst$$XMMRegister, $src$$Register); 11017 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11018 %} 11019 ins_pipe(pipe_slow); // XXX 11020 %} 11021 11022 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11023 match(Set dst (ConvI2L src)); 11024 effect(KILL cr); 11025 ins_cost(375); 11026 format %{ "MOV $dst.lo,$src\n\t" 11027 "MOV $dst.hi,$src\n\t" 11028 "SAR $dst.hi,31" %} 11029 ins_encode(convert_int_long(dst,src)); 11030 ins_pipe( ialu_reg_reg_long ); 11031 %} 11032 11033 // Zero-extend convert int to long 11034 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11035 match(Set dst (AndL (ConvI2L src) mask) ); 11036 effect( KILL flags ); 11037 ins_cost(250); 11038 format %{ "MOV $dst.lo,$src\n\t" 11039 "XOR $dst.hi,$dst.hi" %} 11040 opcode(0x33); // XOR 11041 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11042 ins_pipe( ialu_reg_reg_long ); 11043 %} 11044 11045 // Zero-extend long 11046 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11047 match(Set dst (AndL src mask) ); 11048 effect( KILL flags ); 11049 ins_cost(250); 11050 format %{ "MOV $dst.lo,$src.lo\n\t" 11051 "XOR $dst.hi,$dst.hi\n\t" %} 11052 opcode(0x33); // XOR 11053 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11054 ins_pipe( ialu_reg_reg_long ); 11055 %} 11056 11057 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11058 predicate (UseSSE<=1); 11059 match(Set dst (ConvL2D src)); 11060 effect( KILL cr ); 11061 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11062 "PUSH $src.lo\n\t" 11063 "FILD ST,[ESP + #0]\n\t" 11064 "ADD ESP,8\n\t" 11065 "FSTP_D $dst\t# D-round" %} 11066 opcode(0xDF, 0x5); /* DF /5 */ 11067 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11068 ins_pipe( pipe_slow ); 11069 %} 11070 11071 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11072 predicate (UseSSE>=2); 11073 match(Set dst (ConvL2D src)); 11074 effect( KILL cr ); 11075 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11076 "PUSH $src.lo\n\t" 11077 "FILD_D [ESP]\n\t" 11078 "FSTP_D [ESP]\n\t" 11079 "MOVSD $dst,[ESP]\n\t" 11080 "ADD ESP,8" %} 11081 opcode(0xDF, 0x5); /* DF /5 */ 11082 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11083 ins_pipe( pipe_slow ); 11084 %} 11085 11086 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11087 predicate (UseSSE>=1); 11088 match(Set dst (ConvL2F src)); 11089 effect( KILL cr ); 11090 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11091 "PUSH $src.lo\n\t" 11092 "FILD_D [ESP]\n\t" 11093 "FSTP_S [ESP]\n\t" 11094 "MOVSS $dst,[ESP]\n\t" 11095 "ADD ESP,8" %} 11096 opcode(0xDF, 0x5); /* DF /5 */ 11097 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11098 ins_pipe( pipe_slow ); 11099 %} 11100 11101 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11102 match(Set dst (ConvL2F src)); 11103 effect( KILL cr ); 11104 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11105 "PUSH $src.lo\n\t" 11106 "FILD ST,[ESP + #0]\n\t" 11107 "ADD ESP,8\n\t" 11108 "FSTP_S $dst\t# F-round" %} 11109 opcode(0xDF, 0x5); /* DF /5 */ 11110 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11111 ins_pipe( pipe_slow ); 11112 %} 11113 11114 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11115 match(Set dst (ConvL2I src)); 11116 effect( DEF dst, USE src ); 11117 format %{ "MOV $dst,$src.lo" %} 11118 ins_encode(enc_CopyL_Lo(dst,src)); 11119 ins_pipe( ialu_reg_reg ); 11120 %} 11121 11122 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11123 match(Set dst (MoveF2I src)); 11124 effect( DEF dst, USE src ); 11125 ins_cost(100); 11126 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11127 ins_encode %{ 11128 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11129 %} 11130 ins_pipe( ialu_reg_mem ); 11131 %} 11132 11133 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11134 predicate(UseSSE==0); 11135 match(Set dst (MoveF2I src)); 11136 effect( DEF dst, USE src ); 11137 11138 ins_cost(125); 11139 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11140 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11141 ins_pipe( fpu_mem_reg ); 11142 %} 11143 11144 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11145 predicate(UseSSE>=1); 11146 match(Set dst (MoveF2I src)); 11147 effect( DEF dst, USE src ); 11148 11149 ins_cost(95); 11150 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11151 ins_encode %{ 11152 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11153 %} 11154 ins_pipe( pipe_slow ); 11155 %} 11156 11157 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11158 predicate(UseSSE>=2); 11159 match(Set dst (MoveF2I src)); 11160 effect( DEF dst, USE src ); 11161 ins_cost(85); 11162 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11163 ins_encode %{ 11164 __ movdl($dst$$Register, $src$$XMMRegister); 11165 %} 11166 ins_pipe( pipe_slow ); 11167 %} 11168 11169 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11170 match(Set dst (MoveI2F src)); 11171 effect( DEF dst, USE src ); 11172 11173 ins_cost(100); 11174 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11175 ins_encode %{ 11176 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11177 %} 11178 ins_pipe( ialu_mem_reg ); 11179 %} 11180 11181 11182 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11183 predicate(UseSSE==0); 11184 match(Set dst (MoveI2F src)); 11185 effect(DEF dst, USE src); 11186 11187 ins_cost(125); 11188 format %{ "FLD_S $src\n\t" 11189 "FSTP $dst\t# MoveI2F_stack_reg" %} 11190 opcode(0xD9); /* D9 /0, FLD m32real */ 11191 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 11192 Pop_Reg_FPR(dst), ClearInstMark ); 11193 ins_pipe( fpu_reg_mem ); 11194 %} 11195 11196 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11197 predicate(UseSSE>=1); 11198 match(Set dst (MoveI2F src)); 11199 effect( DEF dst, USE src ); 11200 11201 ins_cost(95); 11202 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11203 ins_encode %{ 11204 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11205 %} 11206 ins_pipe( pipe_slow ); 11207 %} 11208 11209 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11210 predicate(UseSSE>=2); 11211 match(Set dst (MoveI2F src)); 11212 effect( DEF dst, USE src ); 11213 11214 ins_cost(85); 11215 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11216 ins_encode %{ 11217 __ movdl($dst$$XMMRegister, $src$$Register); 11218 %} 11219 ins_pipe( pipe_slow ); 11220 %} 11221 11222 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11223 match(Set dst (MoveD2L src)); 11224 effect(DEF dst, USE src); 11225 11226 ins_cost(250); 11227 format %{ "MOV $dst.lo,$src\n\t" 11228 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11229 opcode(0x8B, 0x8B); 11230 ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark); 11231 ins_pipe( ialu_mem_long_reg ); 11232 %} 11233 11234 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11235 predicate(UseSSE<=1); 11236 match(Set dst (MoveD2L src)); 11237 effect(DEF dst, USE src); 11238 11239 ins_cost(125); 11240 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11241 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11242 ins_pipe( fpu_mem_reg ); 11243 %} 11244 11245 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11246 predicate(UseSSE>=2); 11247 match(Set dst (MoveD2L src)); 11248 effect(DEF dst, USE src); 11249 ins_cost(95); 11250 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11251 ins_encode %{ 11252 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11253 %} 11254 ins_pipe( pipe_slow ); 11255 %} 11256 11257 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11258 predicate(UseSSE>=2); 11259 match(Set dst (MoveD2L src)); 11260 effect(DEF dst, USE src, TEMP tmp); 11261 ins_cost(85); 11262 format %{ "MOVD $dst.lo,$src\n\t" 11263 "PSHUFLW $tmp,$src,0x4E\n\t" 11264 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11265 ins_encode %{ 11266 __ movdl($dst$$Register, $src$$XMMRegister); 11267 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11268 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11269 %} 11270 ins_pipe( pipe_slow ); 11271 %} 11272 11273 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11274 match(Set dst (MoveL2D src)); 11275 effect(DEF dst, USE src); 11276 11277 ins_cost(200); 11278 format %{ "MOV $dst,$src.lo\n\t" 11279 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11280 opcode(0x89, 0x89); 11281 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); 11282 ins_pipe( ialu_mem_long_reg ); 11283 %} 11284 11285 11286 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11287 predicate(UseSSE<=1); 11288 match(Set dst (MoveL2D src)); 11289 effect(DEF dst, USE src); 11290 ins_cost(125); 11291 11292 format %{ "FLD_D $src\n\t" 11293 "FSTP $dst\t# MoveL2D_stack_reg" %} 11294 opcode(0xDD); /* DD /0, FLD m64real */ 11295 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 11296 Pop_Reg_DPR(dst), ClearInstMark ); 11297 ins_pipe( fpu_reg_mem ); 11298 %} 11299 11300 11301 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11302 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11303 match(Set dst (MoveL2D src)); 11304 effect(DEF dst, USE src); 11305 11306 ins_cost(95); 11307 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11308 ins_encode %{ 11309 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11310 %} 11311 ins_pipe( pipe_slow ); 11312 %} 11313 11314 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11315 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11316 match(Set dst (MoveL2D src)); 11317 effect(DEF dst, USE src); 11318 11319 ins_cost(95); 11320 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11321 ins_encode %{ 11322 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11323 %} 11324 ins_pipe( pipe_slow ); 11325 %} 11326 11327 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11328 predicate(UseSSE>=2); 11329 match(Set dst (MoveL2D src)); 11330 effect(TEMP dst, USE src, TEMP tmp); 11331 ins_cost(85); 11332 format %{ "MOVD $dst,$src.lo\n\t" 11333 "MOVD $tmp,$src.hi\n\t" 11334 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11335 ins_encode %{ 11336 __ movdl($dst$$XMMRegister, $src$$Register); 11337 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11338 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11339 %} 11340 ins_pipe( pipe_slow ); 11341 %} 11342 11343 //----------------------------- CompressBits/ExpandBits ------------------------ 11344 11345 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11346 predicate(n->bottom_type()->isa_long()); 11347 match(Set dst (CompressBits src mask)); 11348 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11349 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11350 ins_encode %{ 11351 Label exit, partail_result; 11352 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11353 // Merge the results of upper and lower destination registers such that upper destination 11354 // results are contiguously laid out after the lower destination result. 11355 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11356 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11357 __ popcntl($rtmp$$Register, $mask$$Register); 11358 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11359 __ cmpl($rtmp$$Register, 32); 11360 __ jccb(Assembler::equal, exit); 11361 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11362 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11363 // Shift left the contents of upper destination register by true bit count of lower mask register 11364 // and merge with lower destination register. 11365 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11366 __ orl($dst$$Register, $rtmp$$Register); 11367 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11368 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11369 // since contents of upper destination have already been copied to lower destination 11370 // register. 11371 __ cmpl($rtmp$$Register, 0); 11372 __ jccb(Assembler::greater, partail_result); 11373 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11374 __ jmp(exit); 11375 __ bind(partail_result); 11376 // Perform right shift over upper destination register to move out bits already copied 11377 // to lower destination register. 11378 __ subl($rtmp$$Register, 32); 11379 __ negl($rtmp$$Register); 11380 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11381 __ bind(exit); 11382 %} 11383 ins_pipe( pipe_slow ); 11384 %} 11385 11386 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11387 predicate(n->bottom_type()->isa_long()); 11388 match(Set dst (ExpandBits src mask)); 11389 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11390 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11391 ins_encode %{ 11392 // Extraction operation sequentially reads the bits from source register starting from LSB 11393 // and lays them out into destination register at bit locations corresponding to true bits 11394 // in mask register. Thus number of source bits read are equal to combined true bit count 11395 // of mask register pair. 11396 Label exit, mask_clipping; 11397 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11398 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11399 __ popcntl($rtmp$$Register, $mask$$Register); 11400 // If true bit count of lower mask register is 32 then none of bit of lower source register 11401 // will feed to upper destination register. 11402 __ cmpl($rtmp$$Register, 32); 11403 __ jccb(Assembler::equal, exit); 11404 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11405 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11406 // Shift right the contents of lower source register to remove already consumed bits. 11407 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11408 // Extract the bits from lower source register starting from LSB under the influence 11409 // of upper mask register. 11410 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11411 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11412 __ subl($rtmp$$Register, 32); 11413 __ negl($rtmp$$Register); 11414 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11415 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11416 // Clear the set bits in upper mask register which have been used to extract the contents 11417 // from lower source register. 11418 __ bind(mask_clipping); 11419 __ blsrl($mask$$Register, $mask$$Register); 11420 __ decrementl($rtmp$$Register, 1); 11421 __ jccb(Assembler::greater, mask_clipping); 11422 // Starting from LSB extract the bits from upper source register under the influence of 11423 // remaining set bits in upper mask register. 11424 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11425 // Merge the partial results extracted from lower and upper source register bits. 11426 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11427 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11428 __ bind(exit); 11429 %} 11430 ins_pipe( pipe_slow ); 11431 %} 11432 11433 // ======================================================================= 11434 // Fast clearing of an array 11435 // Small non-constant length ClearArray for non-AVX512 targets. 11436 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11437 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11438 match(Set dummy (ClearArray cnt base)); 11439 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11440 11441 format %{ $$template 11442 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11443 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11444 $$emit$$"JG LARGE\n\t" 11445 $$emit$$"SHL ECX, 1\n\t" 11446 $$emit$$"DEC ECX\n\t" 11447 $$emit$$"JS DONE\t# Zero length\n\t" 11448 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11449 $$emit$$"DEC ECX\n\t" 11450 $$emit$$"JGE LOOP\n\t" 11451 $$emit$$"JMP DONE\n\t" 11452 $$emit$$"# LARGE:\n\t" 11453 if (UseFastStosb) { 11454 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11455 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11456 } else if (UseXMMForObjInit) { 11457 $$emit$$"MOV RDI,RAX\n\t" 11458 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11459 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11460 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11461 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11462 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11463 $$emit$$"ADD 0x40,RAX\n\t" 11464 $$emit$$"# L_zero_64_bytes:\n\t" 11465 $$emit$$"SUB 0x8,RCX\n\t" 11466 $$emit$$"JGE L_loop\n\t" 11467 $$emit$$"ADD 0x4,RCX\n\t" 11468 $$emit$$"JL L_tail\n\t" 11469 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11470 $$emit$$"ADD 0x20,RAX\n\t" 11471 $$emit$$"SUB 0x4,RCX\n\t" 11472 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11473 $$emit$$"ADD 0x4,RCX\n\t" 11474 $$emit$$"JLE L_end\n\t" 11475 $$emit$$"DEC RCX\n\t" 11476 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11477 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11478 $$emit$$"ADD 0x8,RAX\n\t" 11479 $$emit$$"DEC RCX\n\t" 11480 $$emit$$"JGE L_sloop\n\t" 11481 $$emit$$"# L_end:\n\t" 11482 } else { 11483 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11484 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11485 } 11486 $$emit$$"# DONE" 11487 %} 11488 ins_encode %{ 11489 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11490 $tmp$$XMMRegister, false, knoreg); 11491 %} 11492 ins_pipe( pipe_slow ); 11493 %} 11494 11495 // Small non-constant length ClearArray for AVX512 targets. 11496 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11497 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11498 match(Set dummy (ClearArray cnt base)); 11499 ins_cost(125); 11500 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11501 11502 format %{ $$template 11503 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11504 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11505 $$emit$$"JG LARGE\n\t" 11506 $$emit$$"SHL ECX, 1\n\t" 11507 $$emit$$"DEC ECX\n\t" 11508 $$emit$$"JS DONE\t# Zero length\n\t" 11509 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11510 $$emit$$"DEC ECX\n\t" 11511 $$emit$$"JGE LOOP\n\t" 11512 $$emit$$"JMP DONE\n\t" 11513 $$emit$$"# LARGE:\n\t" 11514 if (UseFastStosb) { 11515 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11516 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11517 } else if (UseXMMForObjInit) { 11518 $$emit$$"MOV RDI,RAX\n\t" 11519 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11520 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11521 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11522 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11523 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11524 $$emit$$"ADD 0x40,RAX\n\t" 11525 $$emit$$"# L_zero_64_bytes:\n\t" 11526 $$emit$$"SUB 0x8,RCX\n\t" 11527 $$emit$$"JGE L_loop\n\t" 11528 $$emit$$"ADD 0x4,RCX\n\t" 11529 $$emit$$"JL L_tail\n\t" 11530 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11531 $$emit$$"ADD 0x20,RAX\n\t" 11532 $$emit$$"SUB 0x4,RCX\n\t" 11533 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11534 $$emit$$"ADD 0x4,RCX\n\t" 11535 $$emit$$"JLE L_end\n\t" 11536 $$emit$$"DEC RCX\n\t" 11537 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11538 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11539 $$emit$$"ADD 0x8,RAX\n\t" 11540 $$emit$$"DEC RCX\n\t" 11541 $$emit$$"JGE L_sloop\n\t" 11542 $$emit$$"# L_end:\n\t" 11543 } else { 11544 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11545 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11546 } 11547 $$emit$$"# DONE" 11548 %} 11549 ins_encode %{ 11550 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11551 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11552 %} 11553 ins_pipe( pipe_slow ); 11554 %} 11555 11556 // Large non-constant length ClearArray for non-AVX512 targets. 11557 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11558 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11559 match(Set dummy (ClearArray cnt base)); 11560 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11561 format %{ $$template 11562 if (UseFastStosb) { 11563 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11564 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11565 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11566 } else if (UseXMMForObjInit) { 11567 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11568 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11569 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11570 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11571 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11572 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11573 $$emit$$"ADD 0x40,RAX\n\t" 11574 $$emit$$"# L_zero_64_bytes:\n\t" 11575 $$emit$$"SUB 0x8,RCX\n\t" 11576 $$emit$$"JGE L_loop\n\t" 11577 $$emit$$"ADD 0x4,RCX\n\t" 11578 $$emit$$"JL L_tail\n\t" 11579 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11580 $$emit$$"ADD 0x20,RAX\n\t" 11581 $$emit$$"SUB 0x4,RCX\n\t" 11582 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11583 $$emit$$"ADD 0x4,RCX\n\t" 11584 $$emit$$"JLE L_end\n\t" 11585 $$emit$$"DEC RCX\n\t" 11586 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11587 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11588 $$emit$$"ADD 0x8,RAX\n\t" 11589 $$emit$$"DEC RCX\n\t" 11590 $$emit$$"JGE L_sloop\n\t" 11591 $$emit$$"# L_end:\n\t" 11592 } else { 11593 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11594 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11595 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11596 } 11597 $$emit$$"# DONE" 11598 %} 11599 ins_encode %{ 11600 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11601 $tmp$$XMMRegister, true, knoreg); 11602 %} 11603 ins_pipe( pipe_slow ); 11604 %} 11605 11606 // Large non-constant length ClearArray for AVX512 targets. 11607 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11608 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11609 match(Set dummy (ClearArray cnt base)); 11610 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11611 format %{ $$template 11612 if (UseFastStosb) { 11613 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11614 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11615 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11616 } else if (UseXMMForObjInit) { 11617 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11618 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11619 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11620 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11621 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11622 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11623 $$emit$$"ADD 0x40,RAX\n\t" 11624 $$emit$$"# L_zero_64_bytes:\n\t" 11625 $$emit$$"SUB 0x8,RCX\n\t" 11626 $$emit$$"JGE L_loop\n\t" 11627 $$emit$$"ADD 0x4,RCX\n\t" 11628 $$emit$$"JL L_tail\n\t" 11629 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11630 $$emit$$"ADD 0x20,RAX\n\t" 11631 $$emit$$"SUB 0x4,RCX\n\t" 11632 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11633 $$emit$$"ADD 0x4,RCX\n\t" 11634 $$emit$$"JLE L_end\n\t" 11635 $$emit$$"DEC RCX\n\t" 11636 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11637 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11638 $$emit$$"ADD 0x8,RAX\n\t" 11639 $$emit$$"DEC RCX\n\t" 11640 $$emit$$"JGE L_sloop\n\t" 11641 $$emit$$"# L_end:\n\t" 11642 } else { 11643 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11644 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11645 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11646 } 11647 $$emit$$"# DONE" 11648 %} 11649 ins_encode %{ 11650 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11651 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11652 %} 11653 ins_pipe( pipe_slow ); 11654 %} 11655 11656 // Small constant length ClearArray for AVX512 targets. 11657 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11658 %{ 11659 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl()); 11660 match(Set dummy (ClearArray cnt base)); 11661 ins_cost(100); 11662 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11663 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11664 ins_encode %{ 11665 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11666 %} 11667 ins_pipe(pipe_slow); 11668 %} 11669 11670 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11671 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11672 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11673 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11674 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11675 11676 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11677 ins_encode %{ 11678 __ string_compare($str1$$Register, $str2$$Register, 11679 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11680 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11681 %} 11682 ins_pipe( pipe_slow ); 11683 %} 11684 11685 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11686 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11687 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11688 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11689 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11690 11691 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11692 ins_encode %{ 11693 __ string_compare($str1$$Register, $str2$$Register, 11694 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11695 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11696 %} 11697 ins_pipe( pipe_slow ); 11698 %} 11699 11700 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11701 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11702 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11703 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11704 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11705 11706 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11707 ins_encode %{ 11708 __ string_compare($str1$$Register, $str2$$Register, 11709 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11710 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11711 %} 11712 ins_pipe( pipe_slow ); 11713 %} 11714 11715 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11716 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11717 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11718 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11719 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11720 11721 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11722 ins_encode %{ 11723 __ string_compare($str1$$Register, $str2$$Register, 11724 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11725 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11726 %} 11727 ins_pipe( pipe_slow ); 11728 %} 11729 11730 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11731 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11732 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11733 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11734 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11735 11736 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11737 ins_encode %{ 11738 __ string_compare($str1$$Register, $str2$$Register, 11739 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11740 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11741 %} 11742 ins_pipe( pipe_slow ); 11743 %} 11744 11745 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11746 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11747 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11748 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11749 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11750 11751 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11752 ins_encode %{ 11753 __ string_compare($str1$$Register, $str2$$Register, 11754 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11755 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11756 %} 11757 ins_pipe( pipe_slow ); 11758 %} 11759 11760 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11761 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11762 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11763 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11764 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11765 11766 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11767 ins_encode %{ 11768 __ string_compare($str2$$Register, $str1$$Register, 11769 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11770 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11771 %} 11772 ins_pipe( pipe_slow ); 11773 %} 11774 11775 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11776 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11777 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11778 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11779 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11780 11781 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11782 ins_encode %{ 11783 __ string_compare($str2$$Register, $str1$$Register, 11784 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11785 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11786 %} 11787 ins_pipe( pipe_slow ); 11788 %} 11789 11790 // fast string equals 11791 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11792 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11793 predicate(!VM_Version::supports_avx512vlbw()); 11794 match(Set result (StrEquals (Binary str1 str2) cnt)); 11795 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11796 11797 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11798 ins_encode %{ 11799 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11800 $cnt$$Register, $result$$Register, $tmp3$$Register, 11801 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11802 %} 11803 11804 ins_pipe( pipe_slow ); 11805 %} 11806 11807 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11808 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11809 predicate(VM_Version::supports_avx512vlbw()); 11810 match(Set result (StrEquals (Binary str1 str2) cnt)); 11811 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11812 11813 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11814 ins_encode %{ 11815 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11816 $cnt$$Register, $result$$Register, $tmp3$$Register, 11817 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11818 %} 11819 11820 ins_pipe( pipe_slow ); 11821 %} 11822 11823 11824 // fast search of substring with known size. 11825 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11826 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11827 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11828 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11829 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11830 11831 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11832 ins_encode %{ 11833 int icnt2 = (int)$int_cnt2$$constant; 11834 if (icnt2 >= 16) { 11835 // IndexOf for constant substrings with size >= 16 elements 11836 // which don't need to be loaded through stack. 11837 __ string_indexofC8($str1$$Register, $str2$$Register, 11838 $cnt1$$Register, $cnt2$$Register, 11839 icnt2, $result$$Register, 11840 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11841 } else { 11842 // Small strings are loaded through stack if they cross page boundary. 11843 __ string_indexof($str1$$Register, $str2$$Register, 11844 $cnt1$$Register, $cnt2$$Register, 11845 icnt2, $result$$Register, 11846 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11847 } 11848 %} 11849 ins_pipe( pipe_slow ); 11850 %} 11851 11852 // fast search of substring with known size. 11853 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11854 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11855 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11856 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11857 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11858 11859 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11860 ins_encode %{ 11861 int icnt2 = (int)$int_cnt2$$constant; 11862 if (icnt2 >= 8) { 11863 // IndexOf for constant substrings with size >= 8 elements 11864 // which don't need to be loaded through stack. 11865 __ string_indexofC8($str1$$Register, $str2$$Register, 11866 $cnt1$$Register, $cnt2$$Register, 11867 icnt2, $result$$Register, 11868 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11869 } else { 11870 // Small strings are loaded through stack if they cross page boundary. 11871 __ string_indexof($str1$$Register, $str2$$Register, 11872 $cnt1$$Register, $cnt2$$Register, 11873 icnt2, $result$$Register, 11874 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11875 } 11876 %} 11877 ins_pipe( pipe_slow ); 11878 %} 11879 11880 // fast search of substring with known size. 11881 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11882 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11883 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11884 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11885 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11886 11887 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11888 ins_encode %{ 11889 int icnt2 = (int)$int_cnt2$$constant; 11890 if (icnt2 >= 8) { 11891 // IndexOf for constant substrings with size >= 8 elements 11892 // which don't need to be loaded through stack. 11893 __ string_indexofC8($str1$$Register, $str2$$Register, 11894 $cnt1$$Register, $cnt2$$Register, 11895 icnt2, $result$$Register, 11896 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11897 } else { 11898 // Small strings are loaded through stack if they cross page boundary. 11899 __ string_indexof($str1$$Register, $str2$$Register, 11900 $cnt1$$Register, $cnt2$$Register, 11901 icnt2, $result$$Register, 11902 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11903 } 11904 %} 11905 ins_pipe( pipe_slow ); 11906 %} 11907 11908 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11909 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11910 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11911 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11912 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11913 11914 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11915 ins_encode %{ 11916 __ string_indexof($str1$$Register, $str2$$Register, 11917 $cnt1$$Register, $cnt2$$Register, 11918 (-1), $result$$Register, 11919 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11920 %} 11921 ins_pipe( pipe_slow ); 11922 %} 11923 11924 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11925 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11926 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11927 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11928 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11929 11930 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11931 ins_encode %{ 11932 __ string_indexof($str1$$Register, $str2$$Register, 11933 $cnt1$$Register, $cnt2$$Register, 11934 (-1), $result$$Register, 11935 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11936 %} 11937 ins_pipe( pipe_slow ); 11938 %} 11939 11940 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11941 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11942 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11943 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11944 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11945 11946 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11947 ins_encode %{ 11948 __ string_indexof($str1$$Register, $str2$$Register, 11949 $cnt1$$Register, $cnt2$$Register, 11950 (-1), $result$$Register, 11951 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11952 %} 11953 ins_pipe( pipe_slow ); 11954 %} 11955 11956 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11957 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11958 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 11959 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11960 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11961 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11962 ins_encode %{ 11963 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11964 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11965 %} 11966 ins_pipe( pipe_slow ); 11967 %} 11968 11969 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11970 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11971 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 11972 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11973 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11974 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11975 ins_encode %{ 11976 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11977 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11978 %} 11979 ins_pipe( pipe_slow ); 11980 %} 11981 11982 11983 // fast array equals 11984 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11985 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11986 %{ 11987 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11988 match(Set result (AryEq ary1 ary2)); 11989 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11990 //ins_cost(300); 11991 11992 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11993 ins_encode %{ 11994 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11995 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11996 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11997 %} 11998 ins_pipe( pipe_slow ); 11999 %} 12000 12001 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12002 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12003 %{ 12004 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12005 match(Set result (AryEq ary1 ary2)); 12006 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12007 //ins_cost(300); 12008 12009 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12010 ins_encode %{ 12011 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12012 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12013 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12014 %} 12015 ins_pipe( pipe_slow ); 12016 %} 12017 12018 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12019 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12020 %{ 12021 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12022 match(Set result (AryEq ary1 ary2)); 12023 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12024 //ins_cost(300); 12025 12026 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12027 ins_encode %{ 12028 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12029 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12030 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12031 %} 12032 ins_pipe( pipe_slow ); 12033 %} 12034 12035 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12036 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12037 %{ 12038 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12039 match(Set result (AryEq ary1 ary2)); 12040 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12041 //ins_cost(300); 12042 12043 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12044 ins_encode %{ 12045 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12046 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12047 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12048 %} 12049 ins_pipe( pipe_slow ); 12050 %} 12051 12052 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12053 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12054 %{ 12055 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12056 match(Set result (CountPositives ary1 len)); 12057 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12058 12059 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12060 ins_encode %{ 12061 __ count_positives($ary1$$Register, $len$$Register, 12062 $result$$Register, $tmp3$$Register, 12063 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12064 %} 12065 ins_pipe( pipe_slow ); 12066 %} 12067 12068 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12069 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12070 %{ 12071 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12072 match(Set result (CountPositives ary1 len)); 12073 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12074 12075 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12076 ins_encode %{ 12077 __ count_positives($ary1$$Register, $len$$Register, 12078 $result$$Register, $tmp3$$Register, 12079 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12080 %} 12081 ins_pipe( pipe_slow ); 12082 %} 12083 12084 12085 // fast char[] to byte[] compression 12086 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12087 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12088 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12089 match(Set result (StrCompressedCopy src (Binary dst len))); 12090 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12091 12092 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12093 ins_encode %{ 12094 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12095 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12096 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12097 knoreg, knoreg); 12098 %} 12099 ins_pipe( pipe_slow ); 12100 %} 12101 12102 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12103 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12104 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12105 match(Set result (StrCompressedCopy src (Binary dst len))); 12106 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12107 12108 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12109 ins_encode %{ 12110 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12111 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12112 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12113 $ktmp1$$KRegister, $ktmp2$$KRegister); 12114 %} 12115 ins_pipe( pipe_slow ); 12116 %} 12117 12118 // fast byte[] to char[] inflation 12119 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12120 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12121 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12122 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12123 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12124 12125 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12126 ins_encode %{ 12127 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12128 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12129 %} 12130 ins_pipe( pipe_slow ); 12131 %} 12132 12133 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12134 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12135 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12136 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12137 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12138 12139 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12140 ins_encode %{ 12141 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12142 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12143 %} 12144 ins_pipe( pipe_slow ); 12145 %} 12146 12147 // encode char[] to byte[] in ISO_8859_1 12148 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12149 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12150 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12151 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12152 match(Set result (EncodeISOArray src (Binary dst len))); 12153 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12154 12155 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12156 ins_encode %{ 12157 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12158 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12159 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12160 %} 12161 ins_pipe( pipe_slow ); 12162 %} 12163 12164 // encode char[] to byte[] in ASCII 12165 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12166 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12167 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12168 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12169 match(Set result (EncodeISOArray src (Binary dst len))); 12170 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12171 12172 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12173 ins_encode %{ 12174 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12175 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12176 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12177 %} 12178 ins_pipe( pipe_slow ); 12179 %} 12180 12181 //----------Control Flow Instructions------------------------------------------ 12182 // Signed compare Instructions 12183 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12184 match(Set cr (CmpI op1 op2)); 12185 effect( DEF cr, USE op1, USE op2 ); 12186 format %{ "CMP $op1,$op2" %} 12187 opcode(0x3B); /* Opcode 3B /r */ 12188 ins_encode( OpcP, RegReg( op1, op2) ); 12189 ins_pipe( ialu_cr_reg_reg ); 12190 %} 12191 12192 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12193 match(Set cr (CmpI op1 op2)); 12194 effect( DEF cr, USE op1 ); 12195 format %{ "CMP $op1,$op2" %} 12196 opcode(0x81,0x07); /* Opcode 81 /7 */ 12197 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12198 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12199 ins_pipe( ialu_cr_reg_imm ); 12200 %} 12201 12202 // Cisc-spilled version of cmpI_eReg 12203 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12204 match(Set cr (CmpI op1 (LoadI op2))); 12205 12206 format %{ "CMP $op1,$op2" %} 12207 ins_cost(500); 12208 opcode(0x3B); /* Opcode 3B /r */ 12209 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12210 ins_pipe( ialu_cr_reg_mem ); 12211 %} 12212 12213 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12214 match(Set cr (CmpI src zero)); 12215 effect( DEF cr, USE src ); 12216 12217 format %{ "TEST $src,$src" %} 12218 opcode(0x85); 12219 ins_encode( OpcP, RegReg( src, src ) ); 12220 ins_pipe( ialu_cr_reg_imm ); 12221 %} 12222 12223 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12224 match(Set cr (CmpI (AndI src con) zero)); 12225 12226 format %{ "TEST $src,$con" %} 12227 opcode(0xF7,0x00); 12228 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12229 ins_pipe( ialu_cr_reg_imm ); 12230 %} 12231 12232 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12233 match(Set cr (CmpI (AndI src mem) zero)); 12234 12235 format %{ "TEST $src,$mem" %} 12236 opcode(0x85); 12237 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 12238 ins_pipe( ialu_cr_reg_mem ); 12239 %} 12240 12241 // Unsigned compare Instructions; really, same as signed except they 12242 // produce an eFlagsRegU instead of eFlagsReg. 12243 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12244 match(Set cr (CmpU op1 op2)); 12245 12246 format %{ "CMPu $op1,$op2" %} 12247 opcode(0x3B); /* Opcode 3B /r */ 12248 ins_encode( OpcP, RegReg( op1, op2) ); 12249 ins_pipe( ialu_cr_reg_reg ); 12250 %} 12251 12252 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12253 match(Set cr (CmpU op1 op2)); 12254 12255 format %{ "CMPu $op1,$op2" %} 12256 opcode(0x81,0x07); /* Opcode 81 /7 */ 12257 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12258 ins_pipe( ialu_cr_reg_imm ); 12259 %} 12260 12261 // // Cisc-spilled version of cmpU_eReg 12262 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12263 match(Set cr (CmpU op1 (LoadI op2))); 12264 12265 format %{ "CMPu $op1,$op2" %} 12266 ins_cost(500); 12267 opcode(0x3B); /* Opcode 3B /r */ 12268 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12269 ins_pipe( ialu_cr_reg_mem ); 12270 %} 12271 12272 // // Cisc-spilled version of cmpU_eReg 12273 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12274 // match(Set cr (CmpU (LoadI op1) op2)); 12275 // 12276 // format %{ "CMPu $op1,$op2" %} 12277 // ins_cost(500); 12278 // opcode(0x39); /* Opcode 39 /r */ 12279 // ins_encode( OpcP, RegMem( op1, op2) ); 12280 //%} 12281 12282 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12283 match(Set cr (CmpU src zero)); 12284 12285 format %{ "TESTu $src,$src" %} 12286 opcode(0x85); 12287 ins_encode( OpcP, RegReg( src, src ) ); 12288 ins_pipe( ialu_cr_reg_imm ); 12289 %} 12290 12291 // Unsigned pointer compare Instructions 12292 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12293 match(Set cr (CmpP op1 op2)); 12294 12295 format %{ "CMPu $op1,$op2" %} 12296 opcode(0x3B); /* Opcode 3B /r */ 12297 ins_encode( OpcP, RegReg( op1, op2) ); 12298 ins_pipe( ialu_cr_reg_reg ); 12299 %} 12300 12301 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12302 match(Set cr (CmpP op1 op2)); 12303 12304 format %{ "CMPu $op1,$op2" %} 12305 opcode(0x81,0x07); /* Opcode 81 /7 */ 12306 ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark ); 12307 ins_pipe( ialu_cr_reg_imm ); 12308 %} 12309 12310 // // Cisc-spilled version of cmpP_eReg 12311 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12312 match(Set cr (CmpP op1 (LoadP op2))); 12313 12314 format %{ "CMPu $op1,$op2" %} 12315 ins_cost(500); 12316 opcode(0x3B); /* Opcode 3B /r */ 12317 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12318 ins_pipe( ialu_cr_reg_mem ); 12319 %} 12320 12321 // // Cisc-spilled version of cmpP_eReg 12322 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12323 // match(Set cr (CmpP (LoadP op1) op2)); 12324 // 12325 // format %{ "CMPu $op1,$op2" %} 12326 // ins_cost(500); 12327 // opcode(0x39); /* Opcode 39 /r */ 12328 // ins_encode( OpcP, RegMem( op1, op2) ); 12329 //%} 12330 12331 // Compare raw pointer (used in out-of-heap check). 12332 // Only works because non-oop pointers must be raw pointers 12333 // and raw pointers have no anti-dependencies. 12334 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12335 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12336 match(Set cr (CmpP op1 (LoadP op2))); 12337 12338 format %{ "CMPu $op1,$op2" %} 12339 opcode(0x3B); /* Opcode 3B /r */ 12340 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12341 ins_pipe( ialu_cr_reg_mem ); 12342 %} 12343 12344 // 12345 // This will generate a signed flags result. This should be ok 12346 // since any compare to a zero should be eq/neq. 12347 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12348 match(Set cr (CmpP src zero)); 12349 12350 format %{ "TEST $src,$src" %} 12351 opcode(0x85); 12352 ins_encode( OpcP, RegReg( src, src ) ); 12353 ins_pipe( ialu_cr_reg_imm ); 12354 %} 12355 12356 // Cisc-spilled version of testP_reg 12357 // This will generate a signed flags result. This should be ok 12358 // since any compare to a zero should be eq/neq. 12359 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12360 match(Set cr (CmpP (LoadP op) zero)); 12361 12362 format %{ "TEST $op,0xFFFFFFFF" %} 12363 ins_cost(500); 12364 opcode(0xF7); /* Opcode F7 /0 */ 12365 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark ); 12366 ins_pipe( ialu_cr_reg_imm ); 12367 %} 12368 12369 // Yanked all unsigned pointer compare operations. 12370 // Pointer compares are done with CmpP which is already unsigned. 12371 12372 //----------Max and Min-------------------------------------------------------- 12373 // Min Instructions 12374 //// 12375 // *** Min and Max using the conditional move are slower than the 12376 // *** branch version on a Pentium III. 12377 // // Conditional move for min 12378 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12379 // effect( USE_DEF op2, USE op1, USE cr ); 12380 // format %{ "CMOVlt $op2,$op1\t! min" %} 12381 // opcode(0x4C,0x0F); 12382 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12383 // ins_pipe( pipe_cmov_reg ); 12384 //%} 12385 // 12386 //// Min Register with Register (P6 version) 12387 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12388 // predicate(VM_Version::supports_cmov() ); 12389 // match(Set op2 (MinI op1 op2)); 12390 // ins_cost(200); 12391 // expand %{ 12392 // eFlagsReg cr; 12393 // compI_eReg(cr,op1,op2); 12394 // cmovI_reg_lt(op2,op1,cr); 12395 // %} 12396 //%} 12397 12398 // Min Register with Register (generic version) 12399 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12400 match(Set dst (MinI dst src)); 12401 effect(KILL flags); 12402 ins_cost(300); 12403 12404 format %{ "MIN $dst,$src" %} 12405 opcode(0xCC); 12406 ins_encode( min_enc(dst,src) ); 12407 ins_pipe( pipe_slow ); 12408 %} 12409 12410 // Max Register with Register 12411 // *** Min and Max using the conditional move are slower than the 12412 // *** branch version on a Pentium III. 12413 // // Conditional move for max 12414 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12415 // effect( USE_DEF op2, USE op1, USE cr ); 12416 // format %{ "CMOVgt $op2,$op1\t! max" %} 12417 // opcode(0x4F,0x0F); 12418 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12419 // ins_pipe( pipe_cmov_reg ); 12420 //%} 12421 // 12422 // // Max Register with Register (P6 version) 12423 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12424 // predicate(VM_Version::supports_cmov() ); 12425 // match(Set op2 (MaxI op1 op2)); 12426 // ins_cost(200); 12427 // expand %{ 12428 // eFlagsReg cr; 12429 // compI_eReg(cr,op1,op2); 12430 // cmovI_reg_gt(op2,op1,cr); 12431 // %} 12432 //%} 12433 12434 // Max Register with Register (generic version) 12435 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12436 match(Set dst (MaxI dst src)); 12437 effect(KILL flags); 12438 ins_cost(300); 12439 12440 format %{ "MAX $dst,$src" %} 12441 opcode(0xCC); 12442 ins_encode( max_enc(dst,src) ); 12443 ins_pipe( pipe_slow ); 12444 %} 12445 12446 // ============================================================================ 12447 // Counted Loop limit node which represents exact final iterator value. 12448 // Note: the resulting value should fit into integer range since 12449 // counted loops have limit check on overflow. 12450 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12451 match(Set limit (LoopLimit (Binary init limit) stride)); 12452 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12453 ins_cost(300); 12454 12455 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12456 ins_encode %{ 12457 int strd = (int)$stride$$constant; 12458 assert(strd != 1 && strd != -1, "sanity"); 12459 int m1 = (strd > 0) ? 1 : -1; 12460 // Convert limit to long (EAX:EDX) 12461 __ cdql(); 12462 // Convert init to long (init:tmp) 12463 __ movl($tmp$$Register, $init$$Register); 12464 __ sarl($tmp$$Register, 31); 12465 // $limit - $init 12466 __ subl($limit$$Register, $init$$Register); 12467 __ sbbl($limit_hi$$Register, $tmp$$Register); 12468 // + ($stride - 1) 12469 if (strd > 0) { 12470 __ addl($limit$$Register, (strd - 1)); 12471 __ adcl($limit_hi$$Register, 0); 12472 __ movl($tmp$$Register, strd); 12473 } else { 12474 __ addl($limit$$Register, (strd + 1)); 12475 __ adcl($limit_hi$$Register, -1); 12476 __ lneg($limit_hi$$Register, $limit$$Register); 12477 __ movl($tmp$$Register, -strd); 12478 } 12479 // signed division: (EAX:EDX) / pos_stride 12480 __ idivl($tmp$$Register); 12481 if (strd < 0) { 12482 // restore sign 12483 __ negl($tmp$$Register); 12484 } 12485 // (EAX) * stride 12486 __ mull($tmp$$Register); 12487 // + init (ignore upper bits) 12488 __ addl($limit$$Register, $init$$Register); 12489 %} 12490 ins_pipe( pipe_slow ); 12491 %} 12492 12493 // ============================================================================ 12494 // Branch Instructions 12495 // Jump Table 12496 instruct jumpXtnd(rRegI switch_val) %{ 12497 match(Jump switch_val); 12498 ins_cost(350); 12499 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12500 ins_encode %{ 12501 // Jump to Address(table_base + switch_reg) 12502 Address index(noreg, $switch_val$$Register, Address::times_1); 12503 __ jump(ArrayAddress($constantaddress, index), noreg); 12504 %} 12505 ins_pipe(pipe_jmp); 12506 %} 12507 12508 // Jump Direct - Label defines a relative address from JMP+1 12509 instruct jmpDir(label labl) %{ 12510 match(Goto); 12511 effect(USE labl); 12512 12513 ins_cost(300); 12514 format %{ "JMP $labl" %} 12515 size(5); 12516 ins_encode %{ 12517 Label* L = $labl$$label; 12518 __ jmp(*L, false); // Always long jump 12519 %} 12520 ins_pipe( pipe_jmp ); 12521 %} 12522 12523 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12524 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12525 match(If cop cr); 12526 effect(USE labl); 12527 12528 ins_cost(300); 12529 format %{ "J$cop $labl" %} 12530 size(6); 12531 ins_encode %{ 12532 Label* L = $labl$$label; 12533 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12534 %} 12535 ins_pipe( pipe_jcc ); 12536 %} 12537 12538 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12539 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12540 match(CountedLoopEnd cop cr); 12541 effect(USE labl); 12542 12543 ins_cost(300); 12544 format %{ "J$cop $labl\t# Loop end" %} 12545 size(6); 12546 ins_encode %{ 12547 Label* L = $labl$$label; 12548 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12549 %} 12550 ins_pipe( pipe_jcc ); 12551 %} 12552 12553 // Jump Direct Conditional - using unsigned comparison 12554 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12555 match(If cop cmp); 12556 effect(USE labl); 12557 12558 ins_cost(300); 12559 format %{ "J$cop,u $labl" %} 12560 size(6); 12561 ins_encode %{ 12562 Label* L = $labl$$label; 12563 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12564 %} 12565 ins_pipe(pipe_jcc); 12566 %} 12567 12568 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12569 match(If cop cmp); 12570 effect(USE labl); 12571 12572 ins_cost(200); 12573 format %{ "J$cop,u $labl" %} 12574 size(6); 12575 ins_encode %{ 12576 Label* L = $labl$$label; 12577 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12578 %} 12579 ins_pipe(pipe_jcc); 12580 %} 12581 12582 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12583 match(If cop cmp); 12584 effect(USE labl); 12585 12586 ins_cost(200); 12587 format %{ $$template 12588 if ($cop$$cmpcode == Assembler::notEqual) { 12589 $$emit$$"JP,u $labl\n\t" 12590 $$emit$$"J$cop,u $labl" 12591 } else { 12592 $$emit$$"JP,u done\n\t" 12593 $$emit$$"J$cop,u $labl\n\t" 12594 $$emit$$"done:" 12595 } 12596 %} 12597 ins_encode %{ 12598 Label* l = $labl$$label; 12599 if ($cop$$cmpcode == Assembler::notEqual) { 12600 __ jcc(Assembler::parity, *l, false); 12601 __ jcc(Assembler::notEqual, *l, false); 12602 } else if ($cop$$cmpcode == Assembler::equal) { 12603 Label done; 12604 __ jccb(Assembler::parity, done); 12605 __ jcc(Assembler::equal, *l, false); 12606 __ bind(done); 12607 } else { 12608 ShouldNotReachHere(); 12609 } 12610 %} 12611 ins_pipe(pipe_jcc); 12612 %} 12613 12614 // ============================================================================ 12615 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12616 // array for an instance of the superklass. Set a hidden internal cache on a 12617 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12618 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12619 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12620 match(Set result (PartialSubtypeCheck sub super)); 12621 effect( KILL rcx, KILL cr ); 12622 12623 ins_cost(1100); // slightly larger than the next version 12624 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12625 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12626 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12627 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12628 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12629 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12630 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12631 "miss:\t" %} 12632 12633 opcode(0x1); // Force a XOR of EDI 12634 ins_encode( enc_PartialSubtypeCheck() ); 12635 ins_pipe( pipe_slow ); 12636 %} 12637 12638 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12639 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12640 effect( KILL rcx, KILL result ); 12641 12642 ins_cost(1000); 12643 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12644 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12645 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12646 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12647 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12648 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12649 "miss:\t" %} 12650 12651 opcode(0x0); // No need to XOR EDI 12652 ins_encode( enc_PartialSubtypeCheck() ); 12653 ins_pipe( pipe_slow ); 12654 %} 12655 12656 // ============================================================================ 12657 // Branch Instructions -- short offset versions 12658 // 12659 // These instructions are used to replace jumps of a long offset (the default 12660 // match) with jumps of a shorter offset. These instructions are all tagged 12661 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12662 // match rules in general matching. Instead, the ADLC generates a conversion 12663 // method in the MachNode which can be used to do in-place replacement of the 12664 // long variant with the shorter variant. The compiler will determine if a 12665 // branch can be taken by the is_short_branch_offset() predicate in the machine 12666 // specific code section of the file. 12667 12668 // Jump Direct - Label defines a relative address from JMP+1 12669 instruct jmpDir_short(label labl) %{ 12670 match(Goto); 12671 effect(USE labl); 12672 12673 ins_cost(300); 12674 format %{ "JMP,s $labl" %} 12675 size(2); 12676 ins_encode %{ 12677 Label* L = $labl$$label; 12678 __ jmpb(*L); 12679 %} 12680 ins_pipe( pipe_jmp ); 12681 ins_short_branch(1); 12682 %} 12683 12684 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12685 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12686 match(If cop cr); 12687 effect(USE labl); 12688 12689 ins_cost(300); 12690 format %{ "J$cop,s $labl" %} 12691 size(2); 12692 ins_encode %{ 12693 Label* L = $labl$$label; 12694 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12695 %} 12696 ins_pipe( pipe_jcc ); 12697 ins_short_branch(1); 12698 %} 12699 12700 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12701 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12702 match(CountedLoopEnd cop cr); 12703 effect(USE labl); 12704 12705 ins_cost(300); 12706 format %{ "J$cop,s $labl\t# Loop end" %} 12707 size(2); 12708 ins_encode %{ 12709 Label* L = $labl$$label; 12710 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12711 %} 12712 ins_pipe( pipe_jcc ); 12713 ins_short_branch(1); 12714 %} 12715 12716 // Jump Direct Conditional - using unsigned comparison 12717 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12718 match(If cop cmp); 12719 effect(USE labl); 12720 12721 ins_cost(300); 12722 format %{ "J$cop,us $labl" %} 12723 size(2); 12724 ins_encode %{ 12725 Label* L = $labl$$label; 12726 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12727 %} 12728 ins_pipe( pipe_jcc ); 12729 ins_short_branch(1); 12730 %} 12731 12732 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12733 match(If cop cmp); 12734 effect(USE labl); 12735 12736 ins_cost(300); 12737 format %{ "J$cop,us $labl" %} 12738 size(2); 12739 ins_encode %{ 12740 Label* L = $labl$$label; 12741 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12742 %} 12743 ins_pipe( pipe_jcc ); 12744 ins_short_branch(1); 12745 %} 12746 12747 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12748 match(If cop cmp); 12749 effect(USE labl); 12750 12751 ins_cost(300); 12752 format %{ $$template 12753 if ($cop$$cmpcode == Assembler::notEqual) { 12754 $$emit$$"JP,u,s $labl\n\t" 12755 $$emit$$"J$cop,u,s $labl" 12756 } else { 12757 $$emit$$"JP,u,s done\n\t" 12758 $$emit$$"J$cop,u,s $labl\n\t" 12759 $$emit$$"done:" 12760 } 12761 %} 12762 size(4); 12763 ins_encode %{ 12764 Label* l = $labl$$label; 12765 if ($cop$$cmpcode == Assembler::notEqual) { 12766 __ jccb(Assembler::parity, *l); 12767 __ jccb(Assembler::notEqual, *l); 12768 } else if ($cop$$cmpcode == Assembler::equal) { 12769 Label done; 12770 __ jccb(Assembler::parity, done); 12771 __ jccb(Assembler::equal, *l); 12772 __ bind(done); 12773 } else { 12774 ShouldNotReachHere(); 12775 } 12776 %} 12777 ins_pipe(pipe_jcc); 12778 ins_short_branch(1); 12779 %} 12780 12781 // ============================================================================ 12782 // Long Compare 12783 // 12784 // Currently we hold longs in 2 registers. Comparing such values efficiently 12785 // is tricky. The flavor of compare used depends on whether we are testing 12786 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12787 // The GE test is the negated LT test. The LE test can be had by commuting 12788 // the operands (yielding a GE test) and then negating; negate again for the 12789 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12790 // NE test is negated from that. 12791 12792 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12793 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12794 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12795 // are collapsed internally in the ADLC's dfa-gen code. The match for 12796 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12797 // foo match ends up with the wrong leaf. One fix is to not match both 12798 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12799 // both forms beat the trinary form of long-compare and both are very useful 12800 // on Intel which has so few registers. 12801 12802 // Manifest a CmpL result in an integer register. Very painful. 12803 // This is the test to avoid. 12804 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12805 match(Set dst (CmpL3 src1 src2)); 12806 effect( KILL flags ); 12807 ins_cost(1000); 12808 format %{ "XOR $dst,$dst\n\t" 12809 "CMP $src1.hi,$src2.hi\n\t" 12810 "JLT,s m_one\n\t" 12811 "JGT,s p_one\n\t" 12812 "CMP $src1.lo,$src2.lo\n\t" 12813 "JB,s m_one\n\t" 12814 "JEQ,s done\n" 12815 "p_one:\tINC $dst\n\t" 12816 "JMP,s done\n" 12817 "m_one:\tDEC $dst\n" 12818 "done:" %} 12819 ins_encode %{ 12820 Label p_one, m_one, done; 12821 __ xorptr($dst$$Register, $dst$$Register); 12822 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12823 __ jccb(Assembler::less, m_one); 12824 __ jccb(Assembler::greater, p_one); 12825 __ cmpl($src1$$Register, $src2$$Register); 12826 __ jccb(Assembler::below, m_one); 12827 __ jccb(Assembler::equal, done); 12828 __ bind(p_one); 12829 __ incrementl($dst$$Register); 12830 __ jmpb(done); 12831 __ bind(m_one); 12832 __ decrementl($dst$$Register); 12833 __ bind(done); 12834 %} 12835 ins_pipe( pipe_slow ); 12836 %} 12837 12838 //====== 12839 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12840 // compares. Can be used for LE or GT compares by reversing arguments. 12841 // NOT GOOD FOR EQ/NE tests. 12842 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12843 match( Set flags (CmpL src zero )); 12844 ins_cost(100); 12845 format %{ "TEST $src.hi,$src.hi" %} 12846 opcode(0x85); 12847 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12848 ins_pipe( ialu_cr_reg_reg ); 12849 %} 12850 12851 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12852 // compares. Can be used for LE or GT compares by reversing arguments. 12853 // NOT GOOD FOR EQ/NE tests. 12854 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12855 match( Set flags (CmpL src1 src2 )); 12856 effect( TEMP tmp ); 12857 ins_cost(300); 12858 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12859 "MOV $tmp,$src1.hi\n\t" 12860 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12861 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12862 ins_pipe( ialu_cr_reg_reg ); 12863 %} 12864 12865 // Long compares reg < zero/req OR reg >= zero/req. 12866 // Just a wrapper for a normal branch, plus the predicate test. 12867 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12868 match(If cmp flags); 12869 effect(USE labl); 12870 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12871 expand %{ 12872 jmpCon(cmp,flags,labl); // JLT or JGE... 12873 %} 12874 %} 12875 12876 //====== 12877 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12878 // compares. Can be used for LE or GT compares by reversing arguments. 12879 // NOT GOOD FOR EQ/NE tests. 12880 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12881 match(Set flags (CmpUL src zero)); 12882 ins_cost(100); 12883 format %{ "TEST $src.hi,$src.hi" %} 12884 opcode(0x85); 12885 ins_encode(OpcP, RegReg_Hi2(src, src)); 12886 ins_pipe(ialu_cr_reg_reg); 12887 %} 12888 12889 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12890 // compares. Can be used for LE or GT compares by reversing arguments. 12891 // NOT GOOD FOR EQ/NE tests. 12892 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12893 match(Set flags (CmpUL src1 src2)); 12894 effect(TEMP tmp); 12895 ins_cost(300); 12896 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12897 "MOV $tmp,$src1.hi\n\t" 12898 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12899 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12900 ins_pipe(ialu_cr_reg_reg); 12901 %} 12902 12903 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12904 // Just a wrapper for a normal branch, plus the predicate test. 12905 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12906 match(If cmp flags); 12907 effect(USE labl); 12908 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12909 expand %{ 12910 jmpCon(cmp, flags, labl); // JLT or JGE... 12911 %} 12912 %} 12913 12914 // Compare 2 longs and CMOVE longs. 12915 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12916 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12917 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12918 ins_cost(400); 12919 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12920 "CMOV$cmp $dst.hi,$src.hi" %} 12921 opcode(0x0F,0x40); 12922 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12923 ins_pipe( pipe_cmov_reg_long ); 12924 %} 12925 12926 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12927 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12928 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12929 ins_cost(500); 12930 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12931 "CMOV$cmp $dst.hi,$src.hi" %} 12932 opcode(0x0F,0x40); 12933 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 12934 ins_pipe( pipe_cmov_reg_long ); 12935 %} 12936 12937 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 12938 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12939 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12940 ins_cost(400); 12941 expand %{ 12942 cmovLL_reg_LTGE(cmp, flags, dst, src); 12943 %} 12944 %} 12945 12946 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 12947 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12948 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12949 ins_cost(500); 12950 expand %{ 12951 cmovLL_mem_LTGE(cmp, flags, dst, src); 12952 %} 12953 %} 12954 12955 // Compare 2 longs and CMOVE ints. 12956 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12957 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12958 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12959 ins_cost(200); 12960 format %{ "CMOV$cmp $dst,$src" %} 12961 opcode(0x0F,0x40); 12962 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12963 ins_pipe( pipe_cmov_reg ); 12964 %} 12965 12966 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12967 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12968 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12969 ins_cost(250); 12970 format %{ "CMOV$cmp $dst,$src" %} 12971 opcode(0x0F,0x40); 12972 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 12973 ins_pipe( pipe_cmov_mem ); 12974 %} 12975 12976 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 12977 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12978 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12979 ins_cost(200); 12980 expand %{ 12981 cmovII_reg_LTGE(cmp, flags, dst, src); 12982 %} 12983 %} 12984 12985 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 12986 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12987 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12988 ins_cost(250); 12989 expand %{ 12990 cmovII_mem_LTGE(cmp, flags, dst, src); 12991 %} 12992 %} 12993 12994 // Compare 2 longs and CMOVE ptrs. 12995 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12996 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12997 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12998 ins_cost(200); 12999 format %{ "CMOV$cmp $dst,$src" %} 13000 opcode(0x0F,0x40); 13001 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13002 ins_pipe( pipe_cmov_reg ); 13003 %} 13004 13005 // Compare 2 unsigned longs and CMOVE ptrs. 13006 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 13007 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13008 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13009 ins_cost(200); 13010 expand %{ 13011 cmovPP_reg_LTGE(cmp,flags,dst,src); 13012 %} 13013 %} 13014 13015 // Compare 2 longs and CMOVE doubles 13016 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13017 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13018 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13019 ins_cost(200); 13020 expand %{ 13021 fcmovDPR_regS(cmp,flags,dst,src); 13022 %} 13023 %} 13024 13025 // Compare 2 longs and CMOVE doubles 13026 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13027 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13028 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13029 ins_cost(200); 13030 expand %{ 13031 fcmovD_regS(cmp,flags,dst,src); 13032 %} 13033 %} 13034 13035 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13036 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13037 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13038 ins_cost(200); 13039 expand %{ 13040 fcmovFPR_regS(cmp,flags,dst,src); 13041 %} 13042 %} 13043 13044 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13045 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13046 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13047 ins_cost(200); 13048 expand %{ 13049 fcmovF_regS(cmp,flags,dst,src); 13050 %} 13051 %} 13052 13053 //====== 13054 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13055 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13056 match( Set flags (CmpL src zero )); 13057 effect(TEMP tmp); 13058 ins_cost(200); 13059 format %{ "MOV $tmp,$src.lo\n\t" 13060 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13061 ins_encode( long_cmp_flags0( src, tmp ) ); 13062 ins_pipe( ialu_reg_reg_long ); 13063 %} 13064 13065 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13066 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13067 match( Set flags (CmpL src1 src2 )); 13068 ins_cost(200+300); 13069 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13070 "JNE,s skip\n\t" 13071 "CMP $src1.hi,$src2.hi\n\t" 13072 "skip:\t" %} 13073 ins_encode( long_cmp_flags1( src1, src2 ) ); 13074 ins_pipe( ialu_cr_reg_reg ); 13075 %} 13076 13077 // Long compare reg == zero/reg OR reg != zero/reg 13078 // Just a wrapper for a normal branch, plus the predicate test. 13079 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13080 match(If cmp flags); 13081 effect(USE labl); 13082 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13083 expand %{ 13084 jmpCon(cmp,flags,labl); // JEQ or JNE... 13085 %} 13086 %} 13087 13088 //====== 13089 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13090 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13091 match(Set flags (CmpUL src zero)); 13092 effect(TEMP tmp); 13093 ins_cost(200); 13094 format %{ "MOV $tmp,$src.lo\n\t" 13095 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13096 ins_encode(long_cmp_flags0(src, tmp)); 13097 ins_pipe(ialu_reg_reg_long); 13098 %} 13099 13100 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13101 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13102 match(Set flags (CmpUL src1 src2)); 13103 ins_cost(200+300); 13104 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13105 "JNE,s skip\n\t" 13106 "CMP $src1.hi,$src2.hi\n\t" 13107 "skip:\t" %} 13108 ins_encode(long_cmp_flags1(src1, src2)); 13109 ins_pipe(ialu_cr_reg_reg); 13110 %} 13111 13112 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13113 // Just a wrapper for a normal branch, plus the predicate test. 13114 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13115 match(If cmp flags); 13116 effect(USE labl); 13117 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13118 expand %{ 13119 jmpCon(cmp, flags, labl); // JEQ or JNE... 13120 %} 13121 %} 13122 13123 // Compare 2 longs and CMOVE longs. 13124 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13125 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13126 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13127 ins_cost(400); 13128 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13129 "CMOV$cmp $dst.hi,$src.hi" %} 13130 opcode(0x0F,0x40); 13131 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13132 ins_pipe( pipe_cmov_reg_long ); 13133 %} 13134 13135 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13136 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13137 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13138 ins_cost(500); 13139 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13140 "CMOV$cmp $dst.hi,$src.hi" %} 13141 opcode(0x0F,0x40); 13142 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 13143 ins_pipe( pipe_cmov_reg_long ); 13144 %} 13145 13146 // Compare 2 longs and CMOVE ints. 13147 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13148 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13149 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13150 ins_cost(200); 13151 format %{ "CMOV$cmp $dst,$src" %} 13152 opcode(0x0F,0x40); 13153 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13154 ins_pipe( pipe_cmov_reg ); 13155 %} 13156 13157 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13158 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13159 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13160 ins_cost(250); 13161 format %{ "CMOV$cmp $dst,$src" %} 13162 opcode(0x0F,0x40); 13163 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 13164 ins_pipe( pipe_cmov_mem ); 13165 %} 13166 13167 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13168 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13169 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13170 ins_cost(200); 13171 expand %{ 13172 cmovII_reg_EQNE(cmp, flags, dst, src); 13173 %} 13174 %} 13175 13176 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13177 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13178 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13179 ins_cost(250); 13180 expand %{ 13181 cmovII_mem_EQNE(cmp, flags, dst, src); 13182 %} 13183 %} 13184 13185 // Compare 2 longs and CMOVE ptrs. 13186 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13187 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13188 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13189 ins_cost(200); 13190 format %{ "CMOV$cmp $dst,$src" %} 13191 opcode(0x0F,0x40); 13192 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13193 ins_pipe( pipe_cmov_reg ); 13194 %} 13195 13196 // Compare 2 unsigned longs and CMOVE ptrs. 13197 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13198 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13199 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13200 ins_cost(200); 13201 expand %{ 13202 cmovPP_reg_EQNE(cmp,flags,dst,src); 13203 %} 13204 %} 13205 13206 // Compare 2 longs and CMOVE doubles 13207 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13208 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13209 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13210 ins_cost(200); 13211 expand %{ 13212 fcmovDPR_regS(cmp,flags,dst,src); 13213 %} 13214 %} 13215 13216 // Compare 2 longs and CMOVE doubles 13217 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13218 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13219 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13220 ins_cost(200); 13221 expand %{ 13222 fcmovD_regS(cmp,flags,dst,src); 13223 %} 13224 %} 13225 13226 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13227 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13228 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13229 ins_cost(200); 13230 expand %{ 13231 fcmovFPR_regS(cmp,flags,dst,src); 13232 %} 13233 %} 13234 13235 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13236 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13237 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13238 ins_cost(200); 13239 expand %{ 13240 fcmovF_regS(cmp,flags,dst,src); 13241 %} 13242 %} 13243 13244 //====== 13245 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13246 // Same as cmpL_reg_flags_LEGT except must negate src 13247 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13248 match( Set flags (CmpL src zero )); 13249 effect( TEMP tmp ); 13250 ins_cost(300); 13251 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13252 "CMP $tmp,$src.lo\n\t" 13253 "SBB $tmp,$src.hi\n\t" %} 13254 ins_encode( long_cmp_flags3(src, tmp) ); 13255 ins_pipe( ialu_reg_reg_long ); 13256 %} 13257 13258 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13259 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13260 // requires a commuted test to get the same result. 13261 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13262 match( Set flags (CmpL src1 src2 )); 13263 effect( TEMP tmp ); 13264 ins_cost(300); 13265 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13266 "MOV $tmp,$src2.hi\n\t" 13267 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13268 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13269 ins_pipe( ialu_cr_reg_reg ); 13270 %} 13271 13272 // Long compares reg < zero/req OR reg >= zero/req. 13273 // Just a wrapper for a normal branch, plus the predicate test 13274 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13275 match(If cmp flags); 13276 effect(USE labl); 13277 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13278 ins_cost(300); 13279 expand %{ 13280 jmpCon(cmp,flags,labl); // JGT or JLE... 13281 %} 13282 %} 13283 13284 //====== 13285 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13286 // Same as cmpUL_reg_flags_LEGT except must negate src 13287 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13288 match(Set flags (CmpUL src zero)); 13289 effect(TEMP tmp); 13290 ins_cost(300); 13291 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13292 "CMP $tmp,$src.lo\n\t" 13293 "SBB $tmp,$src.hi\n\t" %} 13294 ins_encode(long_cmp_flags3(src, tmp)); 13295 ins_pipe(ialu_reg_reg_long); 13296 %} 13297 13298 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13299 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13300 // requires a commuted test to get the same result. 13301 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13302 match(Set flags (CmpUL src1 src2)); 13303 effect(TEMP tmp); 13304 ins_cost(300); 13305 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13306 "MOV $tmp,$src2.hi\n\t" 13307 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13308 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13309 ins_pipe(ialu_cr_reg_reg); 13310 %} 13311 13312 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13313 // Just a wrapper for a normal branch, plus the predicate test 13314 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13315 match(If cmp flags); 13316 effect(USE labl); 13317 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13318 ins_cost(300); 13319 expand %{ 13320 jmpCon(cmp, flags, labl); // JGT or JLE... 13321 %} 13322 %} 13323 13324 // Compare 2 longs and CMOVE longs. 13325 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13326 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13327 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13328 ins_cost(400); 13329 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13330 "CMOV$cmp $dst.hi,$src.hi" %} 13331 opcode(0x0F,0x40); 13332 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13333 ins_pipe( pipe_cmov_reg_long ); 13334 %} 13335 13336 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13337 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13338 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13339 ins_cost(500); 13340 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13341 "CMOV$cmp $dst.hi,$src.hi+4" %} 13342 opcode(0x0F,0x40); 13343 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 13344 ins_pipe( pipe_cmov_reg_long ); 13345 %} 13346 13347 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13348 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13349 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13350 ins_cost(400); 13351 expand %{ 13352 cmovLL_reg_LEGT(cmp, flags, dst, src); 13353 %} 13354 %} 13355 13356 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13357 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13358 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13359 ins_cost(500); 13360 expand %{ 13361 cmovLL_mem_LEGT(cmp, flags, dst, src); 13362 %} 13363 %} 13364 13365 // Compare 2 longs and CMOVE ints. 13366 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13367 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13368 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13369 ins_cost(200); 13370 format %{ "CMOV$cmp $dst,$src" %} 13371 opcode(0x0F,0x40); 13372 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13373 ins_pipe( pipe_cmov_reg ); 13374 %} 13375 13376 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13377 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13378 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13379 ins_cost(250); 13380 format %{ "CMOV$cmp $dst,$src" %} 13381 opcode(0x0F,0x40); 13382 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 13383 ins_pipe( pipe_cmov_mem ); 13384 %} 13385 13386 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13387 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13388 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13389 ins_cost(200); 13390 expand %{ 13391 cmovII_reg_LEGT(cmp, flags, dst, src); 13392 %} 13393 %} 13394 13395 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13396 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13397 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13398 ins_cost(250); 13399 expand %{ 13400 cmovII_mem_LEGT(cmp, flags, dst, src); 13401 %} 13402 %} 13403 13404 // Compare 2 longs and CMOVE ptrs. 13405 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13406 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13407 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13408 ins_cost(200); 13409 format %{ "CMOV$cmp $dst,$src" %} 13410 opcode(0x0F,0x40); 13411 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13412 ins_pipe( pipe_cmov_reg ); 13413 %} 13414 13415 // Compare 2 unsigned longs and CMOVE ptrs. 13416 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13417 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13418 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13419 ins_cost(200); 13420 expand %{ 13421 cmovPP_reg_LEGT(cmp,flags,dst,src); 13422 %} 13423 %} 13424 13425 // Compare 2 longs and CMOVE doubles 13426 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13427 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13428 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13429 ins_cost(200); 13430 expand %{ 13431 fcmovDPR_regS(cmp,flags,dst,src); 13432 %} 13433 %} 13434 13435 // Compare 2 longs and CMOVE doubles 13436 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13437 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13438 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13439 ins_cost(200); 13440 expand %{ 13441 fcmovD_regS(cmp,flags,dst,src); 13442 %} 13443 %} 13444 13445 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13446 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13447 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13448 ins_cost(200); 13449 expand %{ 13450 fcmovFPR_regS(cmp,flags,dst,src); 13451 %} 13452 %} 13453 13454 13455 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13456 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13457 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13458 ins_cost(200); 13459 expand %{ 13460 fcmovF_regS(cmp,flags,dst,src); 13461 %} 13462 %} 13463 13464 13465 // ============================================================================ 13466 // Procedure Call/Return Instructions 13467 // Call Java Static Instruction 13468 // Note: If this code changes, the corresponding ret_addr_offset() and 13469 // compute_padding() functions will have to be adjusted. 13470 instruct CallStaticJavaDirect(method meth) %{ 13471 match(CallStaticJava); 13472 effect(USE meth); 13473 13474 ins_cost(300); 13475 format %{ "CALL,static " %} 13476 opcode(0xE8); /* E8 cd */ 13477 ins_encode( pre_call_resets, 13478 Java_Static_Call( meth ), 13479 call_epilog, 13480 post_call_FPU ); 13481 ins_pipe( pipe_slow ); 13482 ins_alignment(4); 13483 %} 13484 13485 // Call Java Dynamic Instruction 13486 // Note: If this code changes, the corresponding ret_addr_offset() and 13487 // compute_padding() functions will have to be adjusted. 13488 instruct CallDynamicJavaDirect(method meth) %{ 13489 match(CallDynamicJava); 13490 effect(USE meth); 13491 13492 ins_cost(300); 13493 format %{ "MOV EAX,(oop)-1\n\t" 13494 "CALL,dynamic" %} 13495 opcode(0xE8); /* E8 cd */ 13496 ins_encode( pre_call_resets, 13497 Java_Dynamic_Call( meth ), 13498 call_epilog, 13499 post_call_FPU ); 13500 ins_pipe( pipe_slow ); 13501 ins_alignment(4); 13502 %} 13503 13504 // Call Runtime Instruction 13505 instruct CallRuntimeDirect(method meth) %{ 13506 match(CallRuntime ); 13507 effect(USE meth); 13508 13509 ins_cost(300); 13510 format %{ "CALL,runtime " %} 13511 opcode(0xE8); /* E8 cd */ 13512 // Use FFREEs to clear entries in float stack 13513 ins_encode( pre_call_resets, 13514 FFree_Float_Stack_All, 13515 Java_To_Runtime( meth ), 13516 post_call_FPU ); 13517 ins_pipe( pipe_slow ); 13518 %} 13519 13520 // Call runtime without safepoint 13521 instruct CallLeafDirect(method meth) %{ 13522 match(CallLeaf); 13523 effect(USE meth); 13524 13525 ins_cost(300); 13526 format %{ "CALL_LEAF,runtime " %} 13527 opcode(0xE8); /* E8 cd */ 13528 ins_encode( pre_call_resets, 13529 FFree_Float_Stack_All, 13530 Java_To_Runtime( meth ), 13531 Verify_FPU_For_Leaf, post_call_FPU ); 13532 ins_pipe( pipe_slow ); 13533 %} 13534 13535 instruct CallLeafNoFPDirect(method meth) %{ 13536 match(CallLeafNoFP); 13537 effect(USE meth); 13538 13539 ins_cost(300); 13540 format %{ "CALL_LEAF_NOFP,runtime " %} 13541 opcode(0xE8); /* E8 cd */ 13542 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13543 ins_pipe( pipe_slow ); 13544 %} 13545 13546 13547 // Return Instruction 13548 // Remove the return address & jump to it. 13549 instruct Ret() %{ 13550 match(Return); 13551 format %{ "RET" %} 13552 opcode(0xC3); 13553 ins_encode(OpcP); 13554 ins_pipe( pipe_jmp ); 13555 %} 13556 13557 // Tail Call; Jump from runtime stub to Java code. 13558 // Also known as an 'interprocedural jump'. 13559 // Target of jump will eventually return to caller. 13560 // TailJump below removes the return address. 13561 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been 13562 // emitted just above the TailCall which has reset ebp to the caller state. 13563 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13564 match(TailCall jump_target method_ptr); 13565 ins_cost(300); 13566 format %{ "JMP $jump_target \t# EBX holds method" %} 13567 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13568 ins_encode( OpcP, RegOpc(jump_target) ); 13569 ins_pipe( pipe_jmp ); 13570 %} 13571 13572 13573 // Tail Jump; remove the return address; jump to target. 13574 // TailCall above leaves the return address around. 13575 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13576 match( TailJump jump_target ex_oop ); 13577 ins_cost(300); 13578 format %{ "POP EDX\t# pop return address into dummy\n\t" 13579 "JMP $jump_target " %} 13580 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13581 ins_encode( enc_pop_rdx, 13582 OpcP, RegOpc(jump_target) ); 13583 ins_pipe( pipe_jmp ); 13584 %} 13585 13586 // Forward exception. 13587 instruct ForwardExceptionjmp() 13588 %{ 13589 match(ForwardException); 13590 13591 format %{ "JMP forward_exception_stub" %} 13592 ins_encode %{ 13593 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg); 13594 %} 13595 ins_pipe(pipe_jmp); 13596 %} 13597 13598 // Create exception oop: created by stack-crawling runtime code. 13599 // Created exception is now available to this handler, and is setup 13600 // just prior to jumping to this handler. No code emitted. 13601 instruct CreateException( eAXRegP ex_oop ) 13602 %{ 13603 match(Set ex_oop (CreateEx)); 13604 13605 size(0); 13606 // use the following format syntax 13607 format %{ "# exception oop is in EAX; no code emitted" %} 13608 ins_encode(); 13609 ins_pipe( empty ); 13610 %} 13611 13612 13613 // Rethrow exception: 13614 // The exception oop will come in the first argument position. 13615 // Then JUMP (not call) to the rethrow stub code. 13616 instruct RethrowException() 13617 %{ 13618 match(Rethrow); 13619 13620 // use the following format syntax 13621 format %{ "JMP rethrow_stub" %} 13622 ins_encode(enc_rethrow); 13623 ins_pipe( pipe_jmp ); 13624 %} 13625 13626 // inlined locking and unlocking 13627 13628 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13629 predicate(LockingMode != LM_LIGHTWEIGHT); 13630 match(Set cr (FastLock object box)); 13631 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13632 ins_cost(300); 13633 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13634 ins_encode %{ 13635 __ get_thread($thread$$Register); 13636 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13637 $scr$$Register, noreg, noreg, $thread$$Register, nullptr); 13638 %} 13639 ins_pipe(pipe_slow); 13640 %} 13641 13642 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13643 predicate(LockingMode != LM_LIGHTWEIGHT); 13644 match(Set cr (FastUnlock object box)); 13645 effect(TEMP tmp, USE_KILL box); 13646 ins_cost(300); 13647 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13648 ins_encode %{ 13649 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); 13650 %} 13651 ins_pipe(pipe_slow); 13652 %} 13653 13654 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{ 13655 predicate(LockingMode == LM_LIGHTWEIGHT); 13656 match(Set cr (FastLock object box)); 13657 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread); 13658 ins_cost(300); 13659 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %} 13660 ins_encode %{ 13661 __ get_thread($thread$$Register); 13662 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13663 %} 13664 ins_pipe(pipe_slow); 13665 %} 13666 13667 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{ 13668 predicate(LockingMode == LM_LIGHTWEIGHT); 13669 match(Set cr (FastUnlock object eax_reg)); 13670 effect(TEMP tmp, USE_KILL eax_reg, TEMP thread); 13671 ins_cost(300); 13672 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %} 13673 ins_encode %{ 13674 __ get_thread($thread$$Register); 13675 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13676 %} 13677 ins_pipe(pipe_slow); 13678 %} 13679 13680 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13681 predicate(Matcher::vector_length(n) <= 32); 13682 match(Set dst (MaskAll src)); 13683 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13684 ins_encode %{ 13685 int mask_len = Matcher::vector_length(this); 13686 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13687 %} 13688 ins_pipe( pipe_slow ); 13689 %} 13690 13691 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13692 predicate(Matcher::vector_length(n) > 32); 13693 match(Set dst (MaskAll src)); 13694 effect(TEMP ktmp); 13695 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13696 ins_encode %{ 13697 int mask_len = Matcher::vector_length(this); 13698 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13699 %} 13700 ins_pipe( pipe_slow ); 13701 %} 13702 13703 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13704 predicate(Matcher::vector_length(n) > 32); 13705 match(Set dst (MaskAll src)); 13706 effect(TEMP ktmp); 13707 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13708 ins_encode %{ 13709 int mask_len = Matcher::vector_length(this); 13710 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13711 %} 13712 ins_pipe( pipe_slow ); 13713 %} 13714 13715 // ============================================================================ 13716 // Safepoint Instruction 13717 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13718 match(SafePoint poll); 13719 effect(KILL cr, USE poll); 13720 13721 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13722 ins_cost(125); 13723 // EBP would need size(3) 13724 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13725 ins_encode %{ 13726 __ set_inst_mark(); 13727 __ relocate(relocInfo::poll_type); 13728 __ clear_inst_mark(); 13729 address pre_pc = __ pc(); 13730 __ testl(rax, Address($poll$$Register, 0)); 13731 address post_pc = __ pc(); 13732 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13733 %} 13734 ins_pipe(ialu_reg_mem); 13735 %} 13736 13737 13738 // ============================================================================ 13739 // This name is KNOWN by the ADLC and cannot be changed. 13740 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13741 // for this guy. 13742 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13743 match(Set dst (ThreadLocal)); 13744 effect(DEF dst, KILL cr); 13745 13746 format %{ "MOV $dst, Thread::current()" %} 13747 ins_encode %{ 13748 Register dstReg = as_Register($dst$$reg); 13749 __ get_thread(dstReg); 13750 %} 13751 ins_pipe( ialu_reg_fat ); 13752 %} 13753 13754 13755 13756 //----------PEEPHOLE RULES----------------------------------------------------- 13757 // These must follow all instruction definitions as they use the names 13758 // defined in the instructions definitions. 13759 // 13760 // peepmatch ( root_instr_name [preceding_instruction]* ); 13761 // 13762 // peepconstraint %{ 13763 // (instruction_number.operand_name relational_op instruction_number.operand_name 13764 // [, ...] ); 13765 // // instruction numbers are zero-based using left to right order in peepmatch 13766 // 13767 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13768 // // provide an instruction_number.operand_name for each operand that appears 13769 // // in the replacement instruction's match rule 13770 // 13771 // ---------VM FLAGS--------------------------------------------------------- 13772 // 13773 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13774 // 13775 // Each peephole rule is given an identifying number starting with zero and 13776 // increasing by one in the order seen by the parser. An individual peephole 13777 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13778 // on the command-line. 13779 // 13780 // ---------CURRENT LIMITATIONS---------------------------------------------- 13781 // 13782 // Only match adjacent instructions in same basic block 13783 // Only equality constraints 13784 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13785 // Only one replacement instruction 13786 // 13787 // ---------EXAMPLE---------------------------------------------------------- 13788 // 13789 // // pertinent parts of existing instructions in architecture description 13790 // instruct movI(rRegI dst, rRegI src) %{ 13791 // match(Set dst (CopyI src)); 13792 // %} 13793 // 13794 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13795 // match(Set dst (AddI dst src)); 13796 // effect(KILL cr); 13797 // %} 13798 // 13799 // // Change (inc mov) to lea 13800 // peephole %{ 13801 // // increment preceded by register-register move 13802 // peepmatch ( incI_eReg movI ); 13803 // // require that the destination register of the increment 13804 // // match the destination register of the move 13805 // peepconstraint ( 0.dst == 1.dst ); 13806 // // construct a replacement instruction that sets 13807 // // the destination to ( move's source register + one ) 13808 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13809 // %} 13810 // 13811 // Implementation no longer uses movX instructions since 13812 // machine-independent system no longer uses CopyX nodes. 13813 // 13814 // peephole %{ 13815 // peepmatch ( incI_eReg movI ); 13816 // peepconstraint ( 0.dst == 1.dst ); 13817 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13818 // %} 13819 // 13820 // peephole %{ 13821 // peepmatch ( decI_eReg movI ); 13822 // peepconstraint ( 0.dst == 1.dst ); 13823 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13824 // %} 13825 // 13826 // peephole %{ 13827 // peepmatch ( addI_eReg_imm movI ); 13828 // peepconstraint ( 0.dst == 1.dst ); 13829 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13830 // %} 13831 // 13832 // peephole %{ 13833 // peepmatch ( addP_eReg_imm movP ); 13834 // peepconstraint ( 0.dst == 1.dst ); 13835 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13836 // %} 13837 13838 // // Change load of spilled value to only a spill 13839 // instruct storeI(memory mem, rRegI src) %{ 13840 // match(Set mem (StoreI mem src)); 13841 // %} 13842 // 13843 // instruct loadI(rRegI dst, memory mem) %{ 13844 // match(Set dst (LoadI mem)); 13845 // %} 13846 // 13847 peephole %{ 13848 peepmatch ( loadI storeI ); 13849 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13850 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13851 %} 13852 13853 //----------SMARTSPILL RULES--------------------------------------------------- 13854 // These must follow all instruction definitions as they use the names 13855 // defined in the instructions definitions.