1 // 2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ masm-> 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 __ emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 __ emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(C2_MacroAssembler *masm, int code) { 353 __ emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) { 358 __ relocate(__ inst_mark() + offset, reloc); 359 emit_opcode(masm, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(C2_MacroAssembler *masm, int d8) { 364 __ emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(C2_MacroAssembler *masm, int d16) { 369 __ emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(C2_MacroAssembler *masm, int d32) { 374 __ emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc, 379 int format) { 380 __ relocate(__ inst_mark(), reloc, format); 381 __ emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 __ relocate(__ inst_mark(), rspec, format); 393 __ emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) { 398 emit_opcode( masm, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( masm, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (masm, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( masm, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(masm, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(masm, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(masm, 0x1, reg_encoding, base); 423 emit_d8(masm, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(masm, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(masm, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (masm, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(masm, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(masm, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (masm, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(masm, 0x0, reg_encoding, 0x4); 450 emit_rm(masm, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(masm, 0x1, reg_encoding, 0x4); 456 emit_rm(masm, scale, index, base); 457 emit_d8(masm, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(masm, 0x2, reg_encoding, 0x4); 462 emit_rm(masm, scale, index, 0x04); 463 } else { 464 emit_rm(masm, 0x2, reg_encoding, 0x4); 465 emit_rm(masm, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(masm, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (masm, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( masm, 0x8B ); 483 emit_rm(masm, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler* masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 613 int framesize = C->output()->frame_size_in_bytes(); 614 int bangsize = C->output()->bang_size_in_bytes(); 615 616 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr); 617 618 C->output()->set_frame_complete(__ offset()); 619 620 if (C->has_mach_constant_base_node()) { 621 // NOTE: We set the table base offset here because users might be 622 // emitted before MachConstantBaseNode. 623 ConstantTable& constant_table = C->output()->constant_table(); 624 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 625 } 626 } 627 628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 629 return MachNode::size(ra_); // too many variables; just compute it the hard way 630 } 631 632 int MachPrologNode::reloc() const { 633 return 0; // a large enough number 634 } 635 636 //============================================================================= 637 #ifndef PRODUCT 638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 639 Compile *C = ra_->C; 640 int framesize = C->output()->frame_size_in_bytes(); 641 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 642 // Remove two words for return addr and rbp, 643 framesize -= 2*wordSize; 644 645 if (C->max_vector_size() > 16) { 646 st->print("VZEROUPPER"); 647 st->cr(); st->print("\t"); 648 } 649 if (C->in_24_bit_fp_mode()) { 650 st->print("FLDCW standard control word"); 651 st->cr(); st->print("\t"); 652 } 653 if (framesize) { 654 st->print("ADD ESP,%d\t# Destroy frame",framesize); 655 st->cr(); st->print("\t"); 656 } 657 st->print_cr("POPL EBP"); st->print("\t"); 658 if (do_polling() && C->is_method_compilation()) { 659 st->print("CMPL rsp, poll_offset[thread] \n\t" 660 "JA #safepoint_stub\t" 661 "# Safepoint: poll for GC"); 662 } 663 } 664 #endif 665 666 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 667 Compile *C = ra_->C; 668 669 if (C->max_vector_size() > 16) { 670 // Clear upper bits of YMM registers when current compiled code uses 671 // wide vectors to avoid AVX <-> SSE transition penalty during call. 672 __ vzeroupper(); 673 } 674 // If method set FPU control word, restore to standard control word 675 if (C->in_24_bit_fp_mode()) { 676 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 677 } 678 679 int framesize = C->output()->frame_size_in_bytes(); 680 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 681 // Remove two words for return addr and rbp, 682 framesize -= 2*wordSize; 683 684 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 685 686 if (framesize >= 128) { 687 emit_opcode(masm, 0x81); // add SP, #framesize 688 emit_rm(masm, 0x3, 0x00, ESP_enc); 689 emit_d32(masm, framesize); 690 } else if (framesize) { 691 emit_opcode(masm, 0x83); // add SP, #framesize 692 emit_rm(masm, 0x3, 0x00, ESP_enc); 693 emit_d8(masm, framesize); 694 } 695 696 emit_opcode(masm, 0x58 | EBP_enc); 697 698 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 699 __ reserved_stack_check(); 700 } 701 702 if (do_polling() && C->is_method_compilation()) { 703 Register thread = as_Register(EBX_enc); 704 __ get_thread(thread); 705 Label dummy_label; 706 Label* code_stub = &dummy_label; 707 if (!C->output()->in_scratch_emit_size()) { 708 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 709 C->output()->add_stub(stub); 710 code_stub = &stub->entry(); 711 } 712 __ set_inst_mark(); 713 __ relocate(relocInfo::poll_return_type); 714 __ clear_inst_mark(); 715 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 716 } 717 } 718 719 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 720 return MachNode::size(ra_); // too many variables; just compute it 721 // the hard way 722 } 723 724 int MachEpilogNode::reloc() const { 725 return 0; // a large enough number 726 } 727 728 const Pipeline * MachEpilogNode::pipeline() const { 729 return MachNode::pipeline_class(); 730 } 731 732 //============================================================================= 733 734 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 735 static enum RC rc_class( OptoReg::Name reg ) { 736 737 if( !OptoReg::is_valid(reg) ) return rc_bad; 738 if (OptoReg::is_stack(reg)) return rc_stack; 739 740 VMReg r = OptoReg::as_VMReg(reg); 741 if (r->is_Register()) return rc_int; 742 if (r->is_FloatRegister()) { 743 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 744 return rc_float; 745 } 746 if (r->is_KRegister()) return rc_kreg; 747 assert(r->is_XMMRegister(), "must be"); 748 return rc_xmm; 749 } 750 751 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg, 752 int opcode, const char *op_str, int size, outputStream* st ) { 753 if( masm ) { 754 masm->set_inst_mark(); 755 emit_opcode (masm, opcode ); 756 encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 757 masm->clear_inst_mark(); 758 #ifndef PRODUCT 759 } else if( !do_size ) { 760 if( size != 0 ) st->print("\n\t"); 761 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 762 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 763 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 764 } else { // FLD, FST, PUSH, POP 765 st->print("%s [ESP + #%d]",op_str,offset); 766 } 767 #endif 768 } 769 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 770 return size+3+offset_size; 771 } 772 773 // Helper for XMM registers. Extra opcode bits, limited syntax. 774 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, 775 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 776 int in_size_in_bits = Assembler::EVEX_32bit; 777 int evex_encoding = 0; 778 if (reg_lo+1 == reg_hi) { 779 in_size_in_bits = Assembler::EVEX_64bit; 780 evex_encoding = Assembler::VEX_W; 781 } 782 if (masm) { 783 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 784 // it maps more cases to single byte displacement 785 __ set_managed(); 786 if (reg_lo+1 == reg_hi) { // double move? 787 if (is_load) { 788 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 789 } else { 790 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 791 } 792 } else { 793 if (is_load) { 794 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } 799 #ifndef PRODUCT 800 } else if (!do_size) { 801 if (size != 0) st->print("\n\t"); 802 if (reg_lo+1 == reg_hi) { // double move? 803 if (is_load) st->print("%s %s,[ESP + #%d]", 804 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 805 Matcher::regName[reg_lo], offset); 806 else st->print("MOVSD [ESP + #%d],%s", 807 offset, Matcher::regName[reg_lo]); 808 } else { 809 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 810 Matcher::regName[reg_lo], offset); 811 else st->print("MOVSS [ESP + #%d],%s", 812 offset, Matcher::regName[reg_lo]); 813 } 814 #endif 815 } 816 bool is_single_byte = false; 817 if ((UseAVX > 2) && (offset != 0)) { 818 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 819 } 820 int offset_size = 0; 821 if (UseAVX > 2 ) { 822 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 823 } else { 824 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 825 } 826 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 827 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 828 return size+5+offset_size; 829 } 830 831 832 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 833 int src_hi, int dst_hi, int size, outputStream* st ) { 834 if (masm) { 835 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 836 __ set_managed(); 837 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 838 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 839 as_XMMRegister(Matcher::_regEncode[src_lo])); 840 } else { 841 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 842 as_XMMRegister(Matcher::_regEncode[src_lo])); 843 } 844 #ifndef PRODUCT 845 } else if (!do_size) { 846 if (size != 0) st->print("\n\t"); 847 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 848 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 849 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 850 } else { 851 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 852 } 853 } else { 854 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 855 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 856 } else { 857 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 858 } 859 } 860 #endif 861 } 862 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 863 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 864 int sz = (UseAVX > 2) ? 6 : 4; 865 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 866 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 867 return size + sz; 868 } 869 870 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 871 int src_hi, int dst_hi, int size, outputStream* st ) { 872 // 32-bit 873 if (masm) { 874 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 875 __ set_managed(); 876 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 877 as_Register(Matcher::_regEncode[src_lo])); 878 #ifndef PRODUCT 879 } else if (!do_size) { 880 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 881 #endif 882 } 883 return (UseAVX> 2) ? 6 : 4; 884 } 885 886 887 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, 888 int src_hi, int dst_hi, int size, outputStream* st ) { 889 // 32-bit 890 if (masm) { 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 __ set_managed(); 893 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 894 as_XMMRegister(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) { 904 if( masm ) { 905 emit_opcode(masm, 0x8B ); 906 emit_rm (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 907 #ifndef PRODUCT 908 } else if( !do_size ) { 909 if( size != 0 ) st->print("\n\t"); 910 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 911 #endif 912 } 913 return size+2; 914 } 915 916 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 917 int offset, int size, outputStream* st ) { 918 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 919 if( masm ) { 920 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) 921 emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] ); 922 #ifndef PRODUCT 923 } else if( !do_size ) { 924 if( size != 0 ) st->print("\n\t"); 925 st->print("FLD %s",Matcher::regName[src_lo]); 926 #endif 927 } 928 size += 2; 929 } 930 931 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 932 const char *op_str; 933 int op; 934 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 935 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 936 op = 0xDD; 937 } else { // 32-bit store 938 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 939 op = 0xD9; 940 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 941 } 942 943 return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st); 944 } 945 946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 947 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 948 int src_hi, int dst_hi, uint ireg, outputStream* st); 949 950 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 951 int stack_offset, int reg, uint ireg, outputStream* st); 952 953 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset, 954 int dst_offset, uint ireg, outputStream* st) { 955 if (masm) { 956 switch (ireg) { 957 case Op_VecS: 958 __ pushl(Address(rsp, src_offset)); 959 __ popl (Address(rsp, dst_offset)); 960 break; 961 case Op_VecD: 962 __ pushl(Address(rsp, src_offset)); 963 __ popl (Address(rsp, dst_offset)); 964 __ pushl(Address(rsp, src_offset+4)); 965 __ popl (Address(rsp, dst_offset+4)); 966 break; 967 case Op_VecX: 968 __ movdqu(Address(rsp, -16), xmm0); 969 __ movdqu(xmm0, Address(rsp, src_offset)); 970 __ movdqu(Address(rsp, dst_offset), xmm0); 971 __ movdqu(xmm0, Address(rsp, -16)); 972 break; 973 case Op_VecY: 974 __ vmovdqu(Address(rsp, -32), xmm0); 975 __ vmovdqu(xmm0, Address(rsp, src_offset)); 976 __ vmovdqu(Address(rsp, dst_offset), xmm0); 977 __ vmovdqu(xmm0, Address(rsp, -32)); 978 break; 979 case Op_VecZ: 980 __ evmovdquq(Address(rsp, -64), xmm0, 2); 981 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 982 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 983 __ evmovdquq(xmm0, Address(rsp, -64), 2); 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 #ifndef PRODUCT 989 } else { 990 switch (ireg) { 991 case Op_VecS: 992 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 993 "popl [rsp + #%d]", 994 src_offset, dst_offset); 995 break; 996 case Op_VecD: 997 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 998 "popq [rsp + #%d]\n\t" 999 "pushl [rsp + #%d]\n\t" 1000 "popq [rsp + #%d]", 1001 src_offset, dst_offset, src_offset+4, dst_offset+4); 1002 break; 1003 case Op_VecX: 1004 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1005 "movdqu xmm0, [rsp + #%d]\n\t" 1006 "movdqu [rsp + #%d], xmm0\n\t" 1007 "movdqu xmm0, [rsp - #16]", 1008 src_offset, dst_offset); 1009 break; 1010 case Op_VecY: 1011 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1012 "vmovdqu xmm0, [rsp + #%d]\n\t" 1013 "vmovdqu [rsp + #%d], xmm0\n\t" 1014 "vmovdqu xmm0, [rsp - #32]", 1015 src_offset, dst_offset); 1016 break; 1017 case Op_VecZ: 1018 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1019 "vmovdqu xmm0, [rsp + #%d]\n\t" 1020 "vmovdqu [rsp + #%d], xmm0\n\t" 1021 "vmovdqu xmm0, [rsp - #64]", 1022 src_offset, dst_offset); 1023 break; 1024 default: 1025 ShouldNotReachHere(); 1026 } 1027 #endif 1028 } 1029 } 1030 1031 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1032 // Get registers to move 1033 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1034 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1035 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1036 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1037 1038 enum RC src_second_rc = rc_class(src_second); 1039 enum RC src_first_rc = rc_class(src_first); 1040 enum RC dst_second_rc = rc_class(dst_second); 1041 enum RC dst_first_rc = rc_class(dst_first); 1042 1043 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1044 1045 // Generate spill code! 1046 int size = 0; 1047 1048 if( src_first == dst_first && src_second == dst_second ) 1049 return size; // Self copy, no move 1050 1051 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) { 1052 uint ireg = ideal_reg(); 1053 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1054 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1055 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1056 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1057 // mem -> mem 1058 int src_offset = ra_->reg2offset(src_first); 1059 int dst_offset = ra_->reg2offset(dst_first); 1060 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st); 1061 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1062 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st); 1063 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1064 int stack_offset = ra_->reg2offset(dst_first); 1065 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st); 1066 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1067 int stack_offset = ra_->reg2offset(src_first); 1068 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st); 1069 } else { 1070 ShouldNotReachHere(); 1071 } 1072 return 0; 1073 } 1074 1075 // -------------------------------------- 1076 // Check for mem-mem move. push/pop to move. 1077 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1078 if( src_second == dst_first ) { // overlapping stack copy ranges 1079 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1080 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1081 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1082 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1083 } 1084 // move low bits 1085 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1086 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1087 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1088 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1089 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1090 } 1091 return size; 1092 } 1093 1094 // -------------------------------------- 1095 // Check for integer reg-reg copy 1096 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1097 size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st); 1098 1099 // Check for integer store 1100 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1101 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1102 1103 // Check for integer load 1104 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1105 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1106 1107 // Check for integer reg-xmm reg copy 1108 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1109 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1110 "no 64 bit integer-float reg moves" ); 1111 return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1112 } 1113 // -------------------------------------- 1114 // Check for float reg-reg copy 1115 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1116 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1117 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1118 if( masm ) { 1119 1120 // Note the mucking with the register encode to compensate for the 0/1 1121 // indexing issue mentioned in a comment in the reg_def sections 1122 // for FPR registers many lines above here. 1123 1124 if( src_first != FPR1L_num ) { 1125 emit_opcode (masm, 0xD9 ); // FLD ST(i) 1126 emit_d8 (masm, 0xC0+Matcher::_regEncode[src_first]-1 ); 1127 emit_opcode (masm, 0xDD ); // FSTP ST(i) 1128 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); 1129 } else { 1130 emit_opcode (masm, 0xDD ); // FST ST(i) 1131 emit_d8 (masm, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1132 } 1133 #ifndef PRODUCT 1134 } else if( !do_size ) { 1135 if( size != 0 ) st->print("\n\t"); 1136 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1137 else st->print( "FST %s", Matcher::regName[dst_first]); 1138 #endif 1139 } 1140 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1141 } 1142 1143 // Check for float store 1144 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1145 return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1146 } 1147 1148 // Check for float load 1149 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1150 int offset = ra_->reg2offset(src_first); 1151 const char *op_str; 1152 int op; 1153 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1154 op_str = "FLD_D"; 1155 op = 0xDD; 1156 } else { // 32-bit load 1157 op_str = "FLD_S"; 1158 op = 0xD9; 1159 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1160 } 1161 if( masm ) { 1162 masm->set_inst_mark(); 1163 emit_opcode (masm, op ); 1164 encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1165 emit_opcode (masm, 0xDD ); // FSTP ST(i) 1166 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); 1167 masm->clear_inst_mark(); 1168 #ifndef PRODUCT 1169 } else if( !do_size ) { 1170 if( size != 0 ) st->print("\n\t"); 1171 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1172 #endif 1173 } 1174 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1175 return size + 3+offset_size+2; 1176 } 1177 1178 // Check for xmm reg-reg copy 1179 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1180 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1181 (src_first+1 == src_second && dst_first+1 == dst_second), 1182 "no non-adjacent float-moves" ); 1183 return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1184 } 1185 1186 // Check for xmm reg-integer reg copy 1187 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1188 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1189 "no 64 bit float-integer reg moves" ); 1190 return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); 1191 } 1192 1193 // Check for xmm store 1194 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1195 return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1196 } 1197 1198 // Check for float xmm load 1199 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1200 return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1201 } 1202 1203 // Copy from float reg to xmm reg 1204 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1205 // copy to the top of stack from floating point reg 1206 // and use LEA to preserve flags 1207 if( masm ) { 1208 emit_opcode(masm,0x8D); // LEA ESP,[ESP-8] 1209 emit_rm(masm, 0x1, ESP_enc, 0x04); 1210 emit_rm(masm, 0x0, 0x04, ESP_enc); 1211 emit_d8(masm,0xF8); 1212 #ifndef PRODUCT 1213 } else if( !do_size ) { 1214 if( size != 0 ) st->print("\n\t"); 1215 st->print("LEA ESP,[ESP-8]"); 1216 #endif 1217 } 1218 size += 4; 1219 1220 size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1221 1222 // Copy from the temp memory to the xmm reg. 1223 size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st); 1224 1225 if( masm ) { 1226 emit_opcode(masm,0x8D); // LEA ESP,[ESP+8] 1227 emit_rm(masm, 0x1, ESP_enc, 0x04); 1228 emit_rm(masm, 0x0, 0x04, ESP_enc); 1229 emit_d8(masm,0x08); 1230 #ifndef PRODUCT 1231 } else if( !do_size ) { 1232 if( size != 0 ) st->print("\n\t"); 1233 st->print("LEA ESP,[ESP+8]"); 1234 #endif 1235 } 1236 size += 4; 1237 return size; 1238 } 1239 1240 // AVX-512 opmask specific spilling. 1241 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1242 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1243 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1244 int offset = ra_->reg2offset(src_first); 1245 if (masm != nullptr) { 1246 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1247 #ifndef PRODUCT 1248 } else { 1249 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); 1250 #endif 1251 } 1252 return 0; 1253 } 1254 1255 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1256 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1257 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1258 int offset = ra_->reg2offset(dst_first); 1259 if (masm != nullptr) { 1260 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1261 #ifndef PRODUCT 1262 } else { 1263 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); 1264 #endif 1265 } 1266 return 0; 1267 } 1268 1269 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1270 Unimplemented(); 1271 return 0; 1272 } 1273 1274 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1275 Unimplemented(); 1276 return 0; 1277 } 1278 1279 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1280 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1281 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1282 if (masm != nullptr) { 1283 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1284 #ifndef PRODUCT 1285 } else { 1286 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); 1287 #endif 1288 } 1289 return 0; 1290 } 1291 1292 assert( size > 0, "missed a case" ); 1293 1294 // -------------------------------------------------------------------- 1295 // Check for second bits still needing moving. 1296 if( src_second == dst_second ) 1297 return size; // Self copy; no move 1298 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1299 1300 // Check for second word int-int move 1301 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1302 return impl_mov_helper(masm,do_size,src_second,dst_second,size, st); 1303 1304 // Check for second word integer store 1305 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1306 return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1307 1308 // Check for second word integer load 1309 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1310 return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1311 1312 Unimplemented(); 1313 return 0; // Mute compiler 1314 } 1315 1316 #ifndef PRODUCT 1317 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1318 implementation( nullptr, ra_, false, st ); 1319 } 1320 #endif 1321 1322 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1323 implementation( masm, ra_, false, nullptr ); 1324 } 1325 1326 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1327 return MachNode::size(ra_); 1328 } 1329 1330 1331 //============================================================================= 1332 #ifndef PRODUCT 1333 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1334 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1335 int reg = ra_->get_reg_first(this); 1336 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1337 } 1338 #endif 1339 1340 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1341 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1342 int reg = ra_->get_encode(this); 1343 if( offset >= 128 ) { 1344 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] 1345 emit_rm(masm, 0x2, reg, 0x04); 1346 emit_rm(masm, 0x0, 0x04, ESP_enc); 1347 emit_d32(masm, offset); 1348 } 1349 else { 1350 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] 1351 emit_rm(masm, 0x1, reg, 0x04); 1352 emit_rm(masm, 0x0, 0x04, ESP_enc); 1353 emit_d8(masm, offset); 1354 } 1355 } 1356 1357 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1358 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1359 if( offset >= 128 ) { 1360 return 7; 1361 } 1362 else { 1363 return 4; 1364 } 1365 } 1366 1367 //============================================================================= 1368 #ifndef PRODUCT 1369 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1370 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1371 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1372 st->print_cr("\tNOP"); 1373 st->print_cr("\tNOP"); 1374 if( !OptoBreakpoint ) 1375 st->print_cr("\tNOP"); 1376 } 1377 #endif 1378 1379 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { 1380 __ ic_check(CodeEntryAlignment); 1381 } 1382 1383 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1384 return MachNode::size(ra_); // too many variables; just compute it 1385 // the hard way 1386 } 1387 1388 1389 //============================================================================= 1390 1391 // Vector calling convention not supported. 1392 bool Matcher::supports_vector_calling_convention() { 1393 return false; 1394 } 1395 1396 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1397 Unimplemented(); 1398 return OptoRegPair(0, 0); 1399 } 1400 1401 // Is this branch offset short enough that a short branch can be used? 1402 // 1403 // NOTE: If the platform does not provide any short branch variants, then 1404 // this method should return false for offset 0. 1405 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1406 // The passed offset is relative to address of the branch. 1407 // On 86 a branch displacement is calculated relative to address 1408 // of a next instruction. 1409 offset -= br_size; 1410 1411 // the short version of jmpConUCF2 contains multiple branches, 1412 // making the reach slightly less 1413 if (rule == jmpConUCF2_rule) 1414 return (-126 <= offset && offset <= 125); 1415 return (-128 <= offset && offset <= 127); 1416 } 1417 1418 // Return whether or not this register is ever used as an argument. This 1419 // function is used on startup to build the trampoline stubs in generateOptoStub. 1420 // Registers not mentioned will be killed by the VM call in the trampoline, and 1421 // arguments in those registers not be available to the callee. 1422 bool Matcher::can_be_java_arg( int reg ) { 1423 if( reg == ECX_num || reg == EDX_num ) return true; 1424 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1425 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1426 return false; 1427 } 1428 1429 bool Matcher::is_spillable_arg( int reg ) { 1430 return can_be_java_arg(reg); 1431 } 1432 1433 uint Matcher::int_pressure_limit() 1434 { 1435 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1436 } 1437 1438 uint Matcher::float_pressure_limit() 1439 { 1440 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1441 } 1442 1443 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1444 // Use hardware integer DIV instruction when 1445 // it is faster than a code which use multiply. 1446 // Only when constant divisor fits into 32 bit 1447 // (min_jint is excluded to get only correct 1448 // positive 32 bit values from negative). 1449 return VM_Version::has_fast_idiv() && 1450 (divisor == (int)divisor && divisor != min_jint); 1451 } 1452 1453 // Register for DIVI projection of divmodI 1454 RegMask Matcher::divI_proj_mask() { 1455 return EAX_REG_mask(); 1456 } 1457 1458 // Register for MODI projection of divmodI 1459 RegMask Matcher::modI_proj_mask() { 1460 return EDX_REG_mask(); 1461 } 1462 1463 // Register for DIVL projection of divmodL 1464 RegMask Matcher::divL_proj_mask() { 1465 ShouldNotReachHere(); 1466 return RegMask(); 1467 } 1468 1469 // Register for MODL projection of divmodL 1470 RegMask Matcher::modL_proj_mask() { 1471 ShouldNotReachHere(); 1472 return RegMask(); 1473 } 1474 1475 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1476 return NO_REG_mask(); 1477 } 1478 1479 // Returns true if the high 32 bits of the value is known to be zero. 1480 bool is_operand_hi32_zero(Node* n) { 1481 int opc = n->Opcode(); 1482 if (opc == Op_AndL) { 1483 Node* o2 = n->in(2); 1484 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1485 return true; 1486 } 1487 } 1488 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1489 return true; 1490 } 1491 return false; 1492 } 1493 1494 %} 1495 1496 //----------ENCODING BLOCK----------------------------------------------------- 1497 // This block specifies the encoding classes used by the compiler to output 1498 // byte streams. Encoding classes generate functions which are called by 1499 // Machine Instruction Nodes in order to generate the bit encoding of the 1500 // instruction. Operands specify their base encoding interface with the 1501 // interface keyword. There are currently supported four interfaces, 1502 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1503 // operand to generate a function which returns its register number when 1504 // queried. CONST_INTER causes an operand to generate a function which 1505 // returns the value of the constant when queried. MEMORY_INTER causes an 1506 // operand to generate four functions which return the Base Register, the 1507 // Index Register, the Scale Value, and the Offset Value of the operand when 1508 // queried. COND_INTER causes an operand to generate six functions which 1509 // return the encoding code (ie - encoding bits for the instruction) 1510 // associated with each basic boolean condition for a conditional instruction. 1511 // Instructions specify two basic values for encoding. They use the 1512 // ins_encode keyword to specify their encoding class (which must be one of 1513 // the class names specified in the encoding block), and they use the 1514 // opcode keyword to specify, in order, their primary, secondary, and 1515 // tertiary opcode. Only the opcode sections which a particular instruction 1516 // needs for encoding need to be specified. 1517 encode %{ 1518 // Build emit functions for each basic byte or larger field in the intel 1519 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1520 // code in the enc_class source block. Emit functions will live in the 1521 // main source block for now. In future, we can generalize this by 1522 // adding a syntax that specifies the sizes of fields in an order, 1523 // so that the adlc can build the emit functions automagically 1524 1525 // Set instruction mark in MacroAssembler. This is used only in 1526 // instructions that emit bytes directly to the CodeBuffer wraped 1527 // in the MacroAssembler. Should go away once all "instruct" are 1528 // patched to emit bytes only using methods in MacroAssembler. 1529 enc_class SetInstMark %{ 1530 __ set_inst_mark(); 1531 %} 1532 1533 enc_class ClearInstMark %{ 1534 __ clear_inst_mark(); 1535 %} 1536 1537 // Emit primary opcode 1538 enc_class OpcP %{ 1539 emit_opcode(masm, $primary); 1540 %} 1541 1542 // Emit secondary opcode 1543 enc_class OpcS %{ 1544 emit_opcode(masm, $secondary); 1545 %} 1546 1547 // Emit opcode directly 1548 enc_class Opcode(immI d8) %{ 1549 emit_opcode(masm, $d8$$constant); 1550 %} 1551 1552 enc_class SizePrefix %{ 1553 emit_opcode(masm,0x66); 1554 %} 1555 1556 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1557 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1558 %} 1559 1560 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1561 emit_opcode(masm,$opcode$$constant); 1562 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1563 %} 1564 1565 enc_class mov_r32_imm0( rRegI dst ) %{ 1566 emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1567 emit_d32 ( masm, 0x0 ); // imm32==0x0 1568 %} 1569 1570 enc_class cdq_enc %{ 1571 // Full implementation of Java idiv and irem; checks for 1572 // special case as described in JVM spec., p.243 & p.271. 1573 // 1574 // normal case special case 1575 // 1576 // input : rax,: dividend min_int 1577 // reg: divisor -1 1578 // 1579 // output: rax,: quotient (= rax, idiv reg) min_int 1580 // rdx: remainder (= rax, irem reg) 0 1581 // 1582 // Code sequnce: 1583 // 1584 // 81 F8 00 00 00 80 cmp rax,80000000h 1585 // 0F 85 0B 00 00 00 jne normal_case 1586 // 33 D2 xor rdx,edx 1587 // 83 F9 FF cmp rcx,0FFh 1588 // 0F 84 03 00 00 00 je done 1589 // normal_case: 1590 // 99 cdq 1591 // F7 F9 idiv rax,ecx 1592 // done: 1593 // 1594 emit_opcode(masm,0x81); emit_d8(masm,0xF8); 1595 emit_opcode(masm,0x00); emit_d8(masm,0x00); 1596 emit_opcode(masm,0x00); emit_d8(masm,0x80); // cmp rax,80000000h 1597 emit_opcode(masm,0x0F); emit_d8(masm,0x85); 1598 emit_opcode(masm,0x0B); emit_d8(masm,0x00); 1599 emit_opcode(masm,0x00); emit_d8(masm,0x00); // jne normal_case 1600 emit_opcode(masm,0x33); emit_d8(masm,0xD2); // xor rdx,edx 1601 emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh 1602 emit_opcode(masm,0x0F); emit_d8(masm,0x84); 1603 emit_opcode(masm,0x03); emit_d8(masm,0x00); 1604 emit_opcode(masm,0x00); emit_d8(masm,0x00); // je done 1605 // normal_case: 1606 emit_opcode(masm,0x99); // cdq 1607 // idiv (note: must be emitted by the user of this rule) 1608 // normal: 1609 %} 1610 1611 // Dense encoding for older common ops 1612 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1613 emit_opcode(masm, $opcode$$constant + $reg$$reg); 1614 %} 1615 1616 1617 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1618 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1619 // Check for 8-bit immediate, and set sign extend bit in opcode 1620 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1621 emit_opcode(masm, $primary | 0x02); 1622 } 1623 else { // If 32-bit immediate 1624 emit_opcode(masm, $primary); 1625 } 1626 %} 1627 1628 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1629 // Emit primary opcode and set sign-extend bit 1630 // Check for 8-bit immediate, and set sign extend bit in opcode 1631 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1632 emit_opcode(masm, $primary | 0x02); } 1633 else { // If 32-bit immediate 1634 emit_opcode(masm, $primary); 1635 } 1636 // Emit r/m byte with secondary opcode, after primary opcode. 1637 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1638 %} 1639 1640 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1641 // Check for 8-bit immediate, and set sign extend bit in opcode 1642 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1643 $$$emit8$imm$$constant; 1644 } 1645 else { // If 32-bit immediate 1646 // Output immediate 1647 $$$emit32$imm$$constant; 1648 } 1649 %} 1650 1651 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1652 // Emit primary opcode and set sign-extend bit 1653 // Check for 8-bit immediate, and set sign extend bit in opcode 1654 int con = (int)$imm$$constant; // Throw away top bits 1655 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1656 // Emit r/m byte with secondary opcode, after primary opcode. 1657 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1658 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); 1659 else emit_d32(masm,con); 1660 %} 1661 1662 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1663 // Emit primary opcode and set sign-extend bit 1664 // Check for 8-bit immediate, and set sign extend bit in opcode 1665 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1666 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1667 // Emit r/m byte with tertiary opcode, after primary opcode. 1668 emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1669 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); 1670 else emit_d32(masm,con); 1671 %} 1672 1673 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1674 emit_cc(masm, $secondary, $dst$$reg ); 1675 %} 1676 1677 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1678 int destlo = $dst$$reg; 1679 int desthi = HIGH_FROM_LOW_ENC(destlo); 1680 // bswap lo 1681 emit_opcode(masm, 0x0F); 1682 emit_cc(masm, 0xC8, destlo); 1683 // bswap hi 1684 emit_opcode(masm, 0x0F); 1685 emit_cc(masm, 0xC8, desthi); 1686 // xchg lo and hi 1687 emit_opcode(masm, 0x87); 1688 emit_rm(masm, 0x3, destlo, desthi); 1689 %} 1690 1691 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1692 emit_rm(masm, 0x3, $secondary, $div$$reg ); 1693 %} 1694 1695 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1696 $$$emit8$primary; 1697 emit_cc(masm, $secondary, $cop$$cmpcode); 1698 %} 1699 1700 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1701 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1702 emit_d8(masm, op >> 8 ); 1703 emit_d8(masm, op & 255); 1704 %} 1705 1706 // emulate a CMOV with a conditional branch around a MOV 1707 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1708 // Invert sense of branch from sense of CMOV 1709 emit_cc( masm, 0x70, ($cop$$cmpcode^1) ); 1710 emit_d8( masm, $brOffs$$constant ); 1711 %} 1712 1713 enc_class enc_PartialSubtypeCheck( ) %{ 1714 Register Redi = as_Register(EDI_enc); // result register 1715 Register Reax = as_Register(EAX_enc); // super class 1716 Register Recx = as_Register(ECX_enc); // killed 1717 Register Resi = as_Register(ESI_enc); // sub class 1718 Label miss; 1719 1720 // NB: Callers may assume that, when $result is a valid register, 1721 // check_klass_subtype_slow_path sets it to a nonzero value. 1722 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1723 nullptr, &miss, 1724 /*set_cond_codes:*/ true); 1725 if ($primary) { 1726 __ xorptr(Redi, Redi); 1727 } 1728 __ bind(miss); 1729 %} 1730 1731 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1732 int start = __ offset(); 1733 if (UseSSE >= 2) { 1734 if (VerifyFPU) { 1735 __ verify_FPU(0, "must be empty in SSE2+ mode"); 1736 } 1737 } else { 1738 // External c_calling_convention expects the FPU stack to be 'clean'. 1739 // Compiled code leaves it dirty. Do cleanup now. 1740 __ empty_FPU_stack(); 1741 } 1742 if (sizeof_FFree_Float_Stack_All == -1) { 1743 sizeof_FFree_Float_Stack_All = __ offset() - start; 1744 } else { 1745 assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1746 } 1747 %} 1748 1749 enc_class Verify_FPU_For_Leaf %{ 1750 if( VerifyFPU ) { 1751 __ verify_FPU( -3, "Returning from Runtime Leaf call"); 1752 } 1753 %} 1754 1755 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1756 // This is the instruction starting address for relocation info. 1757 __ set_inst_mark(); 1758 $$$emit8$primary; 1759 // CALL directly to the runtime 1760 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1761 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1762 __ clear_inst_mark(); 1763 __ post_call_nop(); 1764 1765 if (UseSSE >= 2) { 1766 BasicType rt = tf()->return_type(); 1767 1768 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1769 // A C runtime call where the return value is unused. In SSE2+ 1770 // mode the result needs to be removed from the FPU stack. It's 1771 // likely that this function call could be removed by the 1772 // optimizer if the C function is a pure function. 1773 __ ffree(0); 1774 } else if (rt == T_FLOAT) { 1775 __ lea(rsp, Address(rsp, -4)); 1776 __ fstp_s(Address(rsp, 0)); 1777 __ movflt(xmm0, Address(rsp, 0)); 1778 __ lea(rsp, Address(rsp, 4)); 1779 } else if (rt == T_DOUBLE) { 1780 __ lea(rsp, Address(rsp, -8)); 1781 __ fstp_d(Address(rsp, 0)); 1782 __ movdbl(xmm0, Address(rsp, 0)); 1783 __ lea(rsp, Address(rsp, 8)); 1784 } 1785 } 1786 %} 1787 1788 enc_class pre_call_resets %{ 1789 // If method sets FPU control word restore it here 1790 debug_only(int off0 = __ offset()); 1791 if (ra_->C->in_24_bit_fp_mode()) { 1792 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1793 } 1794 // Clear upper bits of YMM registers when current compiled code uses 1795 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1796 __ vzeroupper(); 1797 debug_only(int off1 = __ offset()); 1798 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1799 %} 1800 1801 enc_class post_call_FPU %{ 1802 // If method sets FPU control word do it here also 1803 if (Compile::current()->in_24_bit_fp_mode()) { 1804 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1805 } 1806 %} 1807 1808 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1809 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1810 // who we intended to call. 1811 __ set_inst_mark(); 1812 $$$emit8$primary; 1813 1814 if (!_method) { 1815 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1816 runtime_call_Relocation::spec(), 1817 RELOC_IMM32); 1818 __ clear_inst_mark(); 1819 __ post_call_nop(); 1820 } else { 1821 int method_index = resolved_method_index(masm); 1822 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1823 : static_call_Relocation::spec(method_index); 1824 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), 1825 rspec, RELOC_DISP32); 1826 __ post_call_nop(); 1827 address mark = __ inst_mark(); 1828 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1829 // Calls of the same statically bound method can share 1830 // a stub to the interpreter. 1831 __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off()); 1832 __ clear_inst_mark(); 1833 } else { 1834 // Emit stubs for static call. 1835 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark); 1836 __ clear_inst_mark(); 1837 if (stub == nullptr) { 1838 ciEnv::current()->record_failure("CodeCache is full"); 1839 return; 1840 } 1841 } 1842 } 1843 %} 1844 1845 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1846 __ ic_call((address)$meth$$method, resolved_method_index(masm)); 1847 __ post_call_nop(); 1848 %} 1849 1850 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1851 int disp = in_bytes(Method::from_compiled_offset()); 1852 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1853 1854 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1855 __ set_inst_mark(); 1856 $$$emit8$primary; 1857 emit_rm(masm, 0x01, $secondary, EAX_enc ); // R/M byte 1858 emit_d8(masm, disp); // Displacement 1859 __ clear_inst_mark(); 1860 __ post_call_nop(); 1861 %} 1862 1863 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1864 $$$emit8$primary; 1865 emit_rm(masm, 0x3, $secondary, $dst$$reg); 1866 $$$emit8$shift$$constant; 1867 %} 1868 1869 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1870 // Load immediate does not have a zero or sign extended version 1871 // for 8-bit immediates 1872 emit_opcode(masm, 0xB8 + $dst$$reg); 1873 $$$emit32$src$$constant; 1874 %} 1875 1876 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1877 // Load immediate does not have a zero or sign extended version 1878 // for 8-bit immediates 1879 emit_opcode(masm, $primary + $dst$$reg); 1880 $$$emit32$src$$constant; 1881 %} 1882 1883 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1884 // Load immediate does not have a zero or sign extended version 1885 // for 8-bit immediates 1886 int dst_enc = $dst$$reg; 1887 int src_con = $src$$constant & 0x0FFFFFFFFL; 1888 if (src_con == 0) { 1889 // xor dst, dst 1890 emit_opcode(masm, 0x33); 1891 emit_rm(masm, 0x3, dst_enc, dst_enc); 1892 } else { 1893 emit_opcode(masm, $primary + dst_enc); 1894 emit_d32(masm, src_con); 1895 } 1896 %} 1897 1898 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1899 // Load immediate does not have a zero or sign extended version 1900 // for 8-bit immediates 1901 int dst_enc = $dst$$reg + 2; 1902 int src_con = ((julong)($src$$constant)) >> 32; 1903 if (src_con == 0) { 1904 // xor dst, dst 1905 emit_opcode(masm, 0x33); 1906 emit_rm(masm, 0x3, dst_enc, dst_enc); 1907 } else { 1908 emit_opcode(masm, $primary + dst_enc); 1909 emit_d32(masm, src_con); 1910 } 1911 %} 1912 1913 1914 // Encode a reg-reg copy. If it is useless, then empty encoding. 1915 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1916 encode_Copy( masm, $dst$$reg, $src$$reg ); 1917 %} 1918 1919 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1920 encode_Copy( masm, $dst$$reg, $src$$reg ); 1921 %} 1922 1923 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1924 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1925 %} 1926 1927 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1928 $$$emit8$primary; 1929 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1930 %} 1931 1932 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1933 $$$emit8$secondary; 1934 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1935 %} 1936 1937 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1938 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 1939 %} 1940 1941 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1942 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1943 %} 1944 1945 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1946 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1947 %} 1948 1949 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1950 // Output immediate 1951 $$$emit32$src$$constant; 1952 %} 1953 1954 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1955 // Output Float immediate bits 1956 jfloat jf = $src$$constant; 1957 int jf_as_bits = jint_cast( jf ); 1958 emit_d32(masm, jf_as_bits); 1959 %} 1960 1961 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1962 // Output Float immediate bits 1963 jfloat jf = $src$$constant; 1964 int jf_as_bits = jint_cast( jf ); 1965 emit_d32(masm, jf_as_bits); 1966 %} 1967 1968 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1969 // Output immediate 1970 $$$emit16$src$$constant; 1971 %} 1972 1973 enc_class Con_d32(immI src) %{ 1974 emit_d32(masm,$src$$constant); 1975 %} 1976 1977 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1978 // Output immediate memory reference 1979 emit_rm(masm, 0x00, $t1$$reg, 0x05 ); 1980 emit_d32(masm, 0x00); 1981 %} 1982 1983 enc_class lock_prefix( ) %{ 1984 emit_opcode(masm,0xF0); // [Lock] 1985 %} 1986 1987 // Cmp-xchg long value. 1988 // Note: we need to swap rbx, and rcx before and after the 1989 // cmpxchg8 instruction because the instruction uses 1990 // rcx as the high order word of the new value to store but 1991 // our register encoding uses rbx,. 1992 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 1993 1994 // XCHG rbx,ecx 1995 emit_opcode(masm,0x87); 1996 emit_opcode(masm,0xD9); 1997 // [Lock] 1998 emit_opcode(masm,0xF0); 1999 // CMPXCHG8 [Eptr] 2000 emit_opcode(masm,0x0F); 2001 emit_opcode(masm,0xC7); 2002 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2003 // XCHG rbx,ecx 2004 emit_opcode(masm,0x87); 2005 emit_opcode(masm,0xD9); 2006 %} 2007 2008 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2009 // [Lock] 2010 emit_opcode(masm,0xF0); 2011 2012 // CMPXCHG [Eptr] 2013 emit_opcode(masm,0x0F); 2014 emit_opcode(masm,0xB1); 2015 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2016 %} 2017 2018 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2019 // [Lock] 2020 emit_opcode(masm,0xF0); 2021 2022 // CMPXCHGB [Eptr] 2023 emit_opcode(masm,0x0F); 2024 emit_opcode(masm,0xB0); 2025 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2026 %} 2027 2028 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2029 // [Lock] 2030 emit_opcode(masm,0xF0); 2031 2032 // 16-bit mode 2033 emit_opcode(masm, 0x66); 2034 2035 // CMPXCHGW [Eptr] 2036 emit_opcode(masm,0x0F); 2037 emit_opcode(masm,0xB1); 2038 emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); 2039 %} 2040 2041 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2042 int res_encoding = $res$$reg; 2043 2044 // MOV res,0 2045 emit_opcode( masm, 0xB8 + res_encoding); 2046 emit_d32( masm, 0 ); 2047 // JNE,s fail 2048 emit_opcode(masm,0x75); 2049 emit_d8(masm, 5 ); 2050 // MOV res,1 2051 emit_opcode( masm, 0xB8 + res_encoding); 2052 emit_d32( masm, 1 ); 2053 // fail: 2054 %} 2055 2056 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2057 int reg_encoding = $ereg$$reg; 2058 int base = $mem$$base; 2059 int index = $mem$$index; 2060 int scale = $mem$$scale; 2061 int displace = $mem$$disp; 2062 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2063 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2064 %} 2065 2066 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2067 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2068 int base = $mem$$base; 2069 int index = $mem$$index; 2070 int scale = $mem$$scale; 2071 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2072 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2073 encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none); 2074 %} 2075 2076 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2077 int r1, r2; 2078 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2079 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2080 emit_opcode(masm,0x0F); 2081 emit_opcode(masm,$tertiary); 2082 emit_rm(masm, 0x3, r1, r2); 2083 emit_d8(masm,$cnt$$constant); 2084 emit_d8(masm,$primary); 2085 emit_rm(masm, 0x3, $secondary, r1); 2086 emit_d8(masm,$cnt$$constant); 2087 %} 2088 2089 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2090 emit_opcode( masm, 0x8B ); // Move 2091 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2092 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2093 emit_d8(masm,$primary); 2094 emit_rm(masm, 0x3, $secondary, $dst$$reg); 2095 emit_d8(masm,$cnt$$constant-32); 2096 } 2097 emit_d8(masm,$primary); 2098 emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2099 emit_d8(masm,31); 2100 %} 2101 2102 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2103 int r1, r2; 2104 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2105 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2106 2107 emit_opcode( masm, 0x8B ); // Move r1,r2 2108 emit_rm(masm, 0x3, r1, r2); 2109 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2110 emit_opcode(masm,$primary); 2111 emit_rm(masm, 0x3, $secondary, r1); 2112 emit_d8(masm,$cnt$$constant-32); 2113 } 2114 emit_opcode(masm,0x33); // XOR r2,r2 2115 emit_rm(masm, 0x3, r2, r2); 2116 %} 2117 2118 // Clone of RegMem but accepts an extra parameter to access each 2119 // half of a double in memory; it never needs relocation info. 2120 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2121 emit_opcode(masm,$opcode$$constant); 2122 int reg_encoding = $rm_reg$$reg; 2123 int base = $mem$$base; 2124 int index = $mem$$index; 2125 int scale = $mem$$scale; 2126 int displace = $mem$$disp + $disp_for_half$$constant; 2127 relocInfo::relocType disp_reloc = relocInfo::none; 2128 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2129 %} 2130 2131 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2132 // 2133 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2134 // and it never needs relocation information. 2135 // Frequently used to move data between FPU's Stack Top and memory. 2136 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2137 int rm_byte_opcode = $rm_opcode$$constant; 2138 int base = $mem$$base; 2139 int index = $mem$$index; 2140 int scale = $mem$$scale; 2141 int displace = $mem$$disp; 2142 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2143 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2144 %} 2145 2146 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2147 int rm_byte_opcode = $rm_opcode$$constant; 2148 int base = $mem$$base; 2149 int index = $mem$$index; 2150 int scale = $mem$$scale; 2151 int displace = $mem$$disp; 2152 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2153 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2154 %} 2155 2156 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2157 int reg_encoding = $dst$$reg; 2158 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2159 int index = 0x04; // 0x04 indicates no index 2160 int scale = 0x00; // 0x00 indicates no scale 2161 int displace = $src1$$constant; // 0x00 indicates no displacement 2162 relocInfo::relocType disp_reloc = relocInfo::none; 2163 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2164 %} 2165 2166 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2167 // Compare dst,src 2168 emit_opcode(masm,0x3B); 2169 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2170 // jmp dst < src around move 2171 emit_opcode(masm,0x7C); 2172 emit_d8(masm,2); 2173 // move dst,src 2174 emit_opcode(masm,0x8B); 2175 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2176 %} 2177 2178 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2179 // Compare dst,src 2180 emit_opcode(masm,0x3B); 2181 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2182 // jmp dst > src around move 2183 emit_opcode(masm,0x7F); 2184 emit_d8(masm,2); 2185 // move dst,src 2186 emit_opcode(masm,0x8B); 2187 emit_rm(masm, 0x3, $dst$$reg, $src$$reg); 2188 %} 2189 2190 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2191 // If src is FPR1, we can just FST to store it. 2192 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2193 int reg_encoding = 0x2; // Just store 2194 int base = $mem$$base; 2195 int index = $mem$$index; 2196 int scale = $mem$$scale; 2197 int displace = $mem$$disp; 2198 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2199 if( $src$$reg != FPR1L_enc ) { 2200 reg_encoding = 0x3; // Store & pop 2201 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) 2202 emit_d8( masm, 0xC0-1+$src$$reg ); 2203 } 2204 __ set_inst_mark(); // Mark start of opcode for reloc info in mem operand 2205 emit_opcode(masm,$primary); 2206 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); 2207 __ clear_inst_mark(); 2208 %} 2209 2210 enc_class neg_reg(rRegI dst) %{ 2211 // NEG $dst 2212 emit_opcode(masm,0xF7); 2213 emit_rm(masm, 0x3, 0x03, $dst$$reg ); 2214 %} 2215 2216 enc_class setLT_reg(eCXRegI dst) %{ 2217 // SETLT $dst 2218 emit_opcode(masm,0x0F); 2219 emit_opcode(masm,0x9C); 2220 emit_rm( masm, 0x3, 0x4, $dst$$reg ); 2221 %} 2222 2223 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2224 int tmpReg = $tmp$$reg; 2225 2226 // SUB $p,$q 2227 emit_opcode(masm,0x2B); 2228 emit_rm(masm, 0x3, $p$$reg, $q$$reg); 2229 // SBB $tmp,$tmp 2230 emit_opcode(masm,0x1B); 2231 emit_rm(masm, 0x3, tmpReg, tmpReg); 2232 // AND $tmp,$y 2233 emit_opcode(masm,0x23); 2234 emit_rm(masm, 0x3, tmpReg, $y$$reg); 2235 // ADD $p,$tmp 2236 emit_opcode(masm,0x03); 2237 emit_rm(masm, 0x3, $p$$reg, tmpReg); 2238 %} 2239 2240 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2241 // TEST shift,32 2242 emit_opcode(masm,0xF7); 2243 emit_rm(masm, 0x3, 0, ECX_enc); 2244 emit_d32(masm,0x20); 2245 // JEQ,s small 2246 emit_opcode(masm, 0x74); 2247 emit_d8(masm, 0x04); 2248 // MOV $dst.hi,$dst.lo 2249 emit_opcode( masm, 0x8B ); 2250 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2251 // CLR $dst.lo 2252 emit_opcode(masm, 0x33); 2253 emit_rm(masm, 0x3, $dst$$reg, $dst$$reg); 2254 // small: 2255 // SHLD $dst.hi,$dst.lo,$shift 2256 emit_opcode(masm,0x0F); 2257 emit_opcode(masm,0xA5); 2258 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2259 // SHL $dst.lo,$shift" 2260 emit_opcode(masm,0xD3); 2261 emit_rm(masm, 0x3, 0x4, $dst$$reg ); 2262 %} 2263 2264 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2265 // TEST shift,32 2266 emit_opcode(masm,0xF7); 2267 emit_rm(masm, 0x3, 0, ECX_enc); 2268 emit_d32(masm,0x20); 2269 // JEQ,s small 2270 emit_opcode(masm, 0x74); 2271 emit_d8(masm, 0x04); 2272 // MOV $dst.lo,$dst.hi 2273 emit_opcode( masm, 0x8B ); 2274 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2275 // CLR $dst.hi 2276 emit_opcode(masm, 0x33); 2277 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2278 // small: 2279 // SHRD $dst.lo,$dst.hi,$shift 2280 emit_opcode(masm,0x0F); 2281 emit_opcode(masm,0xAD); 2282 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2283 // SHR $dst.hi,$shift" 2284 emit_opcode(masm,0xD3); 2285 emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2286 %} 2287 2288 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2289 // TEST shift,32 2290 emit_opcode(masm,0xF7); 2291 emit_rm(masm, 0x3, 0, ECX_enc); 2292 emit_d32(masm,0x20); 2293 // JEQ,s small 2294 emit_opcode(masm, 0x74); 2295 emit_d8(masm, 0x05); 2296 // MOV $dst.lo,$dst.hi 2297 emit_opcode( masm, 0x8B ); 2298 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2299 // SAR $dst.hi,31 2300 emit_opcode(masm, 0xC1); 2301 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2302 emit_d8(masm, 0x1F ); 2303 // small: 2304 // SHRD $dst.lo,$dst.hi,$shift 2305 emit_opcode(masm,0x0F); 2306 emit_opcode(masm,0xAD); 2307 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2308 // SAR $dst.hi,$shift" 2309 emit_opcode(masm,0xD3); 2310 emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2311 %} 2312 2313 2314 // ----------------- Encodings for floating point unit ----------------- 2315 // May leave result in FPU-TOS or FPU reg depending on opcodes 2316 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2317 $$$emit8$primary; 2318 emit_rm(masm, 0x3, $secondary, $src$$reg ); 2319 %} 2320 2321 // Pop argument in FPR0 with FSTP ST(0) 2322 enc_class PopFPU() %{ 2323 emit_opcode( masm, 0xDD ); 2324 emit_d8( masm, 0xD8 ); 2325 %} 2326 2327 // !!!!! equivalent to Pop_Reg_F 2328 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2329 emit_opcode( masm, 0xDD ); // FSTP ST(i) 2330 emit_d8( masm, 0xD8+$dst$$reg ); 2331 %} 2332 2333 enc_class Push_Reg_DPR( regDPR dst ) %{ 2334 emit_opcode( masm, 0xD9 ); 2335 emit_d8( masm, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2336 %} 2337 2338 enc_class strictfp_bias1( regDPR dst ) %{ 2339 emit_opcode( masm, 0xDB ); // FLD m80real 2340 emit_opcode( masm, 0x2D ); 2341 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2342 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 2343 emit_opcode( masm, 0xC8+$dst$$reg ); 2344 %} 2345 2346 enc_class strictfp_bias2( regDPR dst ) %{ 2347 emit_opcode( masm, 0xDB ); // FLD m80real 2348 emit_opcode( masm, 0x2D ); 2349 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2350 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 2351 emit_opcode( masm, 0xC8+$dst$$reg ); 2352 %} 2353 2354 // Special case for moving an integer register to a stack slot. 2355 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2356 store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp ); 2357 %} 2358 2359 // Special case for moving a register to a stack slot. 2360 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2361 // Opcode already emitted 2362 emit_rm( masm, 0x02, $src$$reg, ESP_enc ); // R/M byte 2363 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte 2364 emit_d32(masm, $dst$$disp); // Displacement 2365 %} 2366 2367 // Push the integer in stackSlot 'src' onto FP-stack 2368 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2369 store_to_stackslot( masm, $primary, $secondary, $src$$disp ); 2370 %} 2371 2372 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2373 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2374 store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp ); 2375 %} 2376 2377 // Same as Pop_Mem_F except for opcode 2378 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2379 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2380 store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp ); 2381 %} 2382 2383 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2384 emit_opcode( masm, 0xDD ); // FSTP ST(i) 2385 emit_d8( masm, 0xD8+$dst$$reg ); 2386 %} 2387 2388 enc_class Push_Reg_FPR( regFPR dst ) %{ 2389 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2390 emit_d8( masm, 0xC0-1+$dst$$reg ); 2391 %} 2392 2393 // Push FPU's float to a stack-slot, and pop FPU-stack 2394 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2395 int pop = 0x02; 2396 if ($src$$reg != FPR1L_enc) { 2397 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2398 emit_d8( masm, 0xC0-1+$src$$reg ); 2399 pop = 0x03; 2400 } 2401 store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2402 %} 2403 2404 // Push FPU's double to a stack-slot, and pop FPU-stack 2405 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2406 int pop = 0x02; 2407 if ($src$$reg != FPR1L_enc) { 2408 emit_opcode( masm, 0xD9 ); // FLD ST(i-1) 2409 emit_d8( masm, 0xC0-1+$src$$reg ); 2410 pop = 0x03; 2411 } 2412 store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2413 %} 2414 2415 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2416 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2417 int pop = 0xD0 - 1; // -1 since we skip FLD 2418 if ($src$$reg != FPR1L_enc) { 2419 emit_opcode( masm, 0xD9 ); // FLD ST(src-1) 2420 emit_d8( masm, 0xC0-1+$src$$reg ); 2421 pop = 0xD8; 2422 } 2423 emit_opcode( masm, 0xDD ); 2424 emit_d8( masm, pop+$dst$$reg ); // FST<P> ST(i) 2425 %} 2426 2427 2428 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2429 // load dst in FPR0 2430 emit_opcode( masm, 0xD9 ); 2431 emit_d8( masm, 0xC0-1+$dst$$reg ); 2432 if ($src$$reg != FPR1L_enc) { 2433 // fincstp 2434 emit_opcode (masm, 0xD9); 2435 emit_opcode (masm, 0xF7); 2436 // swap src with FPR1: 2437 // FXCH FPR1 with src 2438 emit_opcode(masm, 0xD9); 2439 emit_d8(masm, 0xC8-1+$src$$reg ); 2440 // fdecstp 2441 emit_opcode (masm, 0xD9); 2442 emit_opcode (masm, 0xF6); 2443 } 2444 %} 2445 2446 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2447 __ subptr(rsp, 8); 2448 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2449 __ fld_d(Address(rsp, 0)); 2450 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2451 __ fld_d(Address(rsp, 0)); 2452 %} 2453 2454 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2455 __ subptr(rsp, 4); 2456 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2457 __ fld_s(Address(rsp, 0)); 2458 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2459 __ fld_s(Address(rsp, 0)); 2460 %} 2461 2462 enc_class Push_ResultD(regD dst) %{ 2463 __ fstp_d(Address(rsp, 0)); 2464 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2465 __ addptr(rsp, 8); 2466 %} 2467 2468 enc_class Push_ResultF(regF dst, immI d8) %{ 2469 __ fstp_s(Address(rsp, 0)); 2470 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2471 __ addptr(rsp, $d8$$constant); 2472 %} 2473 2474 enc_class Push_SrcD(regD src) %{ 2475 __ subptr(rsp, 8); 2476 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2477 __ fld_d(Address(rsp, 0)); 2478 %} 2479 2480 enc_class push_stack_temp_qword() %{ 2481 __ subptr(rsp, 8); 2482 %} 2483 2484 enc_class pop_stack_temp_qword() %{ 2485 __ addptr(rsp, 8); 2486 %} 2487 2488 enc_class push_xmm_to_fpr1(regD src) %{ 2489 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2490 __ fld_d(Address(rsp, 0)); 2491 %} 2492 2493 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2494 if ($src$$reg != FPR1L_enc) { 2495 // fincstp 2496 emit_opcode (masm, 0xD9); 2497 emit_opcode (masm, 0xF7); 2498 // FXCH FPR1 with src 2499 emit_opcode(masm, 0xD9); 2500 emit_d8(masm, 0xC8-1+$src$$reg ); 2501 // fdecstp 2502 emit_opcode (masm, 0xD9); 2503 emit_opcode (masm, 0xF6); 2504 } 2505 %} 2506 2507 enc_class fnstsw_sahf_skip_parity() %{ 2508 // fnstsw ax 2509 emit_opcode( masm, 0xDF ); 2510 emit_opcode( masm, 0xE0 ); 2511 // sahf 2512 emit_opcode( masm, 0x9E ); 2513 // jnp ::skip 2514 emit_opcode( masm, 0x7B ); 2515 emit_opcode( masm, 0x05 ); 2516 %} 2517 2518 enc_class emitModDPR() %{ 2519 // fprem must be iterative 2520 // :: loop 2521 // fprem 2522 emit_opcode( masm, 0xD9 ); 2523 emit_opcode( masm, 0xF8 ); 2524 // wait 2525 emit_opcode( masm, 0x9b ); 2526 // fnstsw ax 2527 emit_opcode( masm, 0xDF ); 2528 emit_opcode( masm, 0xE0 ); 2529 // sahf 2530 emit_opcode( masm, 0x9E ); 2531 // jp ::loop 2532 emit_opcode( masm, 0x0F ); 2533 emit_opcode( masm, 0x8A ); 2534 emit_opcode( masm, 0xF4 ); 2535 emit_opcode( masm, 0xFF ); 2536 emit_opcode( masm, 0xFF ); 2537 emit_opcode( masm, 0xFF ); 2538 %} 2539 2540 enc_class fpu_flags() %{ 2541 // fnstsw_ax 2542 emit_opcode( masm, 0xDF); 2543 emit_opcode( masm, 0xE0); 2544 // test ax,0x0400 2545 emit_opcode( masm, 0x66 ); // operand-size prefix for 16-bit immediate 2546 emit_opcode( masm, 0xA9 ); 2547 emit_d16 ( masm, 0x0400 ); 2548 // // // This sequence works, but stalls for 12-16 cycles on PPro 2549 // // test rax,0x0400 2550 // emit_opcode( masm, 0xA9 ); 2551 // emit_d32 ( masm, 0x00000400 ); 2552 // 2553 // jz exit (no unordered comparison) 2554 emit_opcode( masm, 0x74 ); 2555 emit_d8 ( masm, 0x02 ); 2556 // mov ah,1 - treat as LT case (set carry flag) 2557 emit_opcode( masm, 0xB4 ); 2558 emit_d8 ( masm, 0x01 ); 2559 // sahf 2560 emit_opcode( masm, 0x9E); 2561 %} 2562 2563 enc_class cmpF_P6_fixup() %{ 2564 // Fixup the integer flags in case comparison involved a NaN 2565 // 2566 // JNP exit (no unordered comparison, P-flag is set by NaN) 2567 emit_opcode( masm, 0x7B ); 2568 emit_d8 ( masm, 0x03 ); 2569 // MOV AH,1 - treat as LT case (set carry flag) 2570 emit_opcode( masm, 0xB4 ); 2571 emit_d8 ( masm, 0x01 ); 2572 // SAHF 2573 emit_opcode( masm, 0x9E); 2574 // NOP // target for branch to avoid branch to branch 2575 emit_opcode( masm, 0x90); 2576 %} 2577 2578 // fnstsw_ax(); 2579 // sahf(); 2580 // movl(dst, nan_result); 2581 // jcc(Assembler::parity, exit); 2582 // movl(dst, less_result); 2583 // jcc(Assembler::below, exit); 2584 // movl(dst, equal_result); 2585 // jcc(Assembler::equal, exit); 2586 // movl(dst, greater_result); 2587 2588 // less_result = 1; 2589 // greater_result = -1; 2590 // equal_result = 0; 2591 // nan_result = -1; 2592 2593 enc_class CmpF_Result(rRegI dst) %{ 2594 // fnstsw_ax(); 2595 emit_opcode( masm, 0xDF); 2596 emit_opcode( masm, 0xE0); 2597 // sahf 2598 emit_opcode( masm, 0x9E); 2599 // movl(dst, nan_result); 2600 emit_opcode( masm, 0xB8 + $dst$$reg); 2601 emit_d32( masm, -1 ); 2602 // jcc(Assembler::parity, exit); 2603 emit_opcode( masm, 0x7A ); 2604 emit_d8 ( masm, 0x13 ); 2605 // movl(dst, less_result); 2606 emit_opcode( masm, 0xB8 + $dst$$reg); 2607 emit_d32( masm, -1 ); 2608 // jcc(Assembler::below, exit); 2609 emit_opcode( masm, 0x72 ); 2610 emit_d8 ( masm, 0x0C ); 2611 // movl(dst, equal_result); 2612 emit_opcode( masm, 0xB8 + $dst$$reg); 2613 emit_d32( masm, 0 ); 2614 // jcc(Assembler::equal, exit); 2615 emit_opcode( masm, 0x74 ); 2616 emit_d8 ( masm, 0x05 ); 2617 // movl(dst, greater_result); 2618 emit_opcode( masm, 0xB8 + $dst$$reg); 2619 emit_d32( masm, 1 ); 2620 %} 2621 2622 2623 // Compare the longs and set flags 2624 // BROKEN! Do Not use as-is 2625 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2626 // CMP $src1.hi,$src2.hi 2627 emit_opcode( masm, 0x3B ); 2628 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2629 // JNE,s done 2630 emit_opcode(masm,0x75); 2631 emit_d8(masm, 2 ); 2632 // CMP $src1.lo,$src2.lo 2633 emit_opcode( masm, 0x3B ); 2634 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2635 // done: 2636 %} 2637 2638 enc_class convert_int_long( regL dst, rRegI src ) %{ 2639 // mov $dst.lo,$src 2640 int dst_encoding = $dst$$reg; 2641 int src_encoding = $src$$reg; 2642 encode_Copy( masm, dst_encoding , src_encoding ); 2643 // mov $dst.hi,$src 2644 encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2645 // sar $dst.hi,31 2646 emit_opcode( masm, 0xC1 ); 2647 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2648 emit_d8(masm, 0x1F ); 2649 %} 2650 2651 enc_class convert_long_double( eRegL src ) %{ 2652 // push $src.hi 2653 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2654 // push $src.lo 2655 emit_opcode(masm, 0x50+$src$$reg ); 2656 // fild 64-bits at [SP] 2657 emit_opcode(masm,0xdf); 2658 emit_d8(masm, 0x6C); 2659 emit_d8(masm, 0x24); 2660 emit_d8(masm, 0x00); 2661 // pop stack 2662 emit_opcode(masm, 0x83); // add SP, #8 2663 emit_rm(masm, 0x3, 0x00, ESP_enc); 2664 emit_d8(masm, 0x8); 2665 %} 2666 2667 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2668 // IMUL EDX:EAX,$src1 2669 emit_opcode( masm, 0xF7 ); 2670 emit_rm( masm, 0x3, 0x5, $src1$$reg ); 2671 // SAR EDX,$cnt-32 2672 int shift_count = ((int)$cnt$$constant) - 32; 2673 if (shift_count > 0) { 2674 emit_opcode(masm, 0xC1); 2675 emit_rm(masm, 0x3, 7, $dst$$reg ); 2676 emit_d8(masm, shift_count); 2677 } 2678 %} 2679 2680 // this version doesn't have add sp, 8 2681 enc_class convert_long_double2( eRegL src ) %{ 2682 // push $src.hi 2683 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2684 // push $src.lo 2685 emit_opcode(masm, 0x50+$src$$reg ); 2686 // fild 64-bits at [SP] 2687 emit_opcode(masm,0xdf); 2688 emit_d8(masm, 0x6C); 2689 emit_d8(masm, 0x24); 2690 emit_d8(masm, 0x00); 2691 %} 2692 2693 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2694 // Basic idea: long = (long)int * (long)int 2695 // IMUL EDX:EAX, src 2696 emit_opcode( masm, 0xF7 ); 2697 emit_rm( masm, 0x3, 0x5, $src$$reg); 2698 %} 2699 2700 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2701 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2702 // MUL EDX:EAX, src 2703 emit_opcode( masm, 0xF7 ); 2704 emit_rm( masm, 0x3, 0x4, $src$$reg); 2705 %} 2706 2707 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2708 // Basic idea: lo(result) = lo(x_lo * y_lo) 2709 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2710 // MOV $tmp,$src.lo 2711 encode_Copy( masm, $tmp$$reg, $src$$reg ); 2712 // IMUL $tmp,EDX 2713 emit_opcode( masm, 0x0F ); 2714 emit_opcode( masm, 0xAF ); 2715 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2716 // MOV EDX,$src.hi 2717 encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2718 // IMUL EDX,EAX 2719 emit_opcode( masm, 0x0F ); 2720 emit_opcode( masm, 0xAF ); 2721 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2722 // ADD $tmp,EDX 2723 emit_opcode( masm, 0x03 ); 2724 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2725 // MUL EDX:EAX,$src.lo 2726 emit_opcode( masm, 0xF7 ); 2727 emit_rm( masm, 0x3, 0x4, $src$$reg ); 2728 // ADD EDX,ESI 2729 emit_opcode( masm, 0x03 ); 2730 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2731 %} 2732 2733 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2734 // Basic idea: lo(result) = lo(src * y_lo) 2735 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2736 // IMUL $tmp,EDX,$src 2737 emit_opcode( masm, 0x6B ); 2738 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2739 emit_d8( masm, (int)$src$$constant ); 2740 // MOV EDX,$src 2741 emit_opcode(masm, 0xB8 + EDX_enc); 2742 emit_d32( masm, (int)$src$$constant ); 2743 // MUL EDX:EAX,EDX 2744 emit_opcode( masm, 0xF7 ); 2745 emit_rm( masm, 0x3, 0x4, EDX_enc ); 2746 // ADD EDX,ESI 2747 emit_opcode( masm, 0x03 ); 2748 emit_rm( masm, 0x3, EDX_enc, $tmp$$reg ); 2749 %} 2750 2751 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2752 // PUSH src1.hi 2753 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2754 // PUSH src1.lo 2755 emit_opcode(masm, 0x50+$src1$$reg ); 2756 // PUSH src2.hi 2757 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2758 // PUSH src2.lo 2759 emit_opcode(masm, 0x50+$src2$$reg ); 2760 // CALL directly to the runtime 2761 __ set_inst_mark(); 2762 emit_opcode(masm,0xE8); // Call into runtime 2763 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2764 __ clear_inst_mark(); 2765 __ post_call_nop(); 2766 // Restore stack 2767 emit_opcode(masm, 0x83); // add SP, #framesize 2768 emit_rm(masm, 0x3, 0x00, ESP_enc); 2769 emit_d8(masm, 4*4); 2770 %} 2771 2772 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2773 // PUSH src1.hi 2774 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2775 // PUSH src1.lo 2776 emit_opcode(masm, 0x50+$src1$$reg ); 2777 // PUSH src2.hi 2778 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2779 // PUSH src2.lo 2780 emit_opcode(masm, 0x50+$src2$$reg ); 2781 // CALL directly to the runtime 2782 __ set_inst_mark(); 2783 emit_opcode(masm,0xE8); // Call into runtime 2784 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2785 __ clear_inst_mark(); 2786 __ post_call_nop(); 2787 // Restore stack 2788 emit_opcode(masm, 0x83); // add SP, #framesize 2789 emit_rm(masm, 0x3, 0x00, ESP_enc); 2790 emit_d8(masm, 4*4); 2791 %} 2792 2793 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2794 // MOV $tmp,$src.lo 2795 emit_opcode(masm, 0x8B); 2796 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg); 2797 // OR $tmp,$src.hi 2798 emit_opcode(masm, 0x0B); 2799 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2800 %} 2801 2802 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2803 // CMP $src1.lo,$src2.lo 2804 emit_opcode( masm, 0x3B ); 2805 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2806 // JNE,s skip 2807 emit_cc(masm, 0x70, 0x5); 2808 emit_d8(masm,2); 2809 // CMP $src1.hi,$src2.hi 2810 emit_opcode( masm, 0x3B ); 2811 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2812 %} 2813 2814 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2815 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2816 emit_opcode( masm, 0x3B ); 2817 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); 2818 // MOV $tmp,$src1.hi 2819 emit_opcode( masm, 0x8B ); 2820 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2821 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2822 emit_opcode( masm, 0x1B ); 2823 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2824 %} 2825 2826 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2827 // XOR $tmp,$tmp 2828 emit_opcode(masm,0x33); // XOR 2829 emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg); 2830 // CMP $tmp,$src.lo 2831 emit_opcode( masm, 0x3B ); 2832 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg ); 2833 // SBB $tmp,$src.hi 2834 emit_opcode( masm, 0x1B ); 2835 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2836 %} 2837 2838 // Sniff, sniff... smells like Gnu Superoptimizer 2839 enc_class neg_long( eRegL dst ) %{ 2840 emit_opcode(masm,0xF7); // NEG hi 2841 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2842 emit_opcode(masm,0xF7); // NEG lo 2843 emit_rm (masm,0x3, 0x3, $dst$$reg ); 2844 emit_opcode(masm,0x83); // SBB hi,0 2845 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2846 emit_d8 (masm,0 ); 2847 %} 2848 2849 enc_class enc_pop_rdx() %{ 2850 emit_opcode(masm,0x5A); 2851 %} 2852 2853 enc_class enc_rethrow() %{ 2854 __ set_inst_mark(); 2855 emit_opcode(masm, 0xE9); // jmp entry 2856 emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4, 2857 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2858 __ clear_inst_mark(); 2859 __ post_call_nop(); 2860 %} 2861 2862 2863 // Convert a double to an int. Java semantics require we do complex 2864 // manglelations in the corner cases. So we set the rounding mode to 2865 // 'zero', store the darned double down as an int, and reset the 2866 // rounding mode to 'nearest'. The hardware throws an exception which 2867 // patches up the correct value directly to the stack. 2868 enc_class DPR2I_encoding( regDPR src ) %{ 2869 // Flip to round-to-zero mode. We attempted to allow invalid-op 2870 // exceptions here, so that a NAN or other corner-case value will 2871 // thrown an exception (but normal values get converted at full speed). 2872 // However, I2C adapters and other float-stack manglers leave pending 2873 // invalid-op exceptions hanging. We would have to clear them before 2874 // enabling them and that is more expensive than just testing for the 2875 // invalid value Intel stores down in the corner cases. 2876 emit_opcode(masm,0xD9); // FLDCW trunc 2877 emit_opcode(masm,0x2D); 2878 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2879 // Allocate a word 2880 emit_opcode(masm,0x83); // SUB ESP,4 2881 emit_opcode(masm,0xEC); 2882 emit_d8(masm,0x04); 2883 // Encoding assumes a double has been pushed into FPR0. 2884 // Store down the double as an int, popping the FPU stack 2885 emit_opcode(masm,0xDB); // FISTP [ESP] 2886 emit_opcode(masm,0x1C); 2887 emit_d8(masm,0x24); 2888 // Restore the rounding mode; mask the exception 2889 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode 2890 emit_opcode(masm,0x2D); 2891 emit_d32( masm, Compile::current()->in_24_bit_fp_mode() 2892 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2893 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2894 2895 // Load the converted int; adjust CPU stack 2896 emit_opcode(masm,0x58); // POP EAX 2897 emit_opcode(masm,0x3D); // CMP EAX,imm 2898 emit_d32 (masm,0x80000000); // 0x80000000 2899 emit_opcode(masm,0x75); // JNE around_slow_call 2900 emit_d8 (masm,0x07); // Size of slow_call 2901 // Push src onto stack slow-path 2902 emit_opcode(masm,0xD9 ); // FLD ST(i) 2903 emit_d8 (masm,0xC0-1+$src$$reg ); 2904 // CALL directly to the runtime 2905 __ set_inst_mark(); 2906 emit_opcode(masm,0xE8); // Call into runtime 2907 emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2908 __ clear_inst_mark(); 2909 __ post_call_nop(); 2910 // Carry on here... 2911 %} 2912 2913 enc_class DPR2L_encoding( regDPR src ) %{ 2914 emit_opcode(masm,0xD9); // FLDCW trunc 2915 emit_opcode(masm,0x2D); 2916 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2917 // Allocate a word 2918 emit_opcode(masm,0x83); // SUB ESP,8 2919 emit_opcode(masm,0xEC); 2920 emit_d8(masm,0x08); 2921 // Encoding assumes a double has been pushed into FPR0. 2922 // Store down the double as a long, popping the FPU stack 2923 emit_opcode(masm,0xDF); // FISTP [ESP] 2924 emit_opcode(masm,0x3C); 2925 emit_d8(masm,0x24); 2926 // Restore the rounding mode; mask the exception 2927 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode 2928 emit_opcode(masm,0x2D); 2929 emit_d32( masm, Compile::current()->in_24_bit_fp_mode() 2930 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2931 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2932 2933 // Load the converted int; adjust CPU stack 2934 emit_opcode(masm,0x58); // POP EAX 2935 emit_opcode(masm,0x5A); // POP EDX 2936 emit_opcode(masm,0x81); // CMP EDX,imm 2937 emit_d8 (masm,0xFA); // rdx 2938 emit_d32 (masm,0x80000000); // 0x80000000 2939 emit_opcode(masm,0x75); // JNE around_slow_call 2940 emit_d8 (masm,0x07+4); // Size of slow_call 2941 emit_opcode(masm,0x85); // TEST EAX,EAX 2942 emit_opcode(masm,0xC0); // 2/rax,/rax, 2943 emit_opcode(masm,0x75); // JNE around_slow_call 2944 emit_d8 (masm,0x07); // Size of slow_call 2945 // Push src onto stack slow-path 2946 emit_opcode(masm,0xD9 ); // FLD ST(i) 2947 emit_d8 (masm,0xC0-1+$src$$reg ); 2948 // CALL directly to the runtime 2949 __ set_inst_mark(); 2950 emit_opcode(masm,0xE8); // Call into runtime 2951 emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2952 __ clear_inst_mark(); 2953 __ post_call_nop(); 2954 // Carry on here... 2955 %} 2956 2957 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2958 // Operand was loaded from memory into fp ST (stack top) 2959 // FMUL ST,$src /* D8 C8+i */ 2960 emit_opcode(masm, 0xD8); 2961 emit_opcode(masm, 0xC8 + $src1$$reg); 2962 %} 2963 2964 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2965 // FADDP ST,src2 /* D8 C0+i */ 2966 emit_opcode(masm, 0xD8); 2967 emit_opcode(masm, 0xC0 + $src2$$reg); 2968 //could use FADDP src2,fpST /* DE C0+i */ 2969 %} 2970 2971 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 2972 // FADDP src2,ST /* DE C0+i */ 2973 emit_opcode(masm, 0xDE); 2974 emit_opcode(masm, 0xC0 + $src2$$reg); 2975 %} 2976 2977 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 2978 // Operand has been loaded into fp ST (stack top) 2979 // FSUB ST,$src1 2980 emit_opcode(masm, 0xD8); 2981 emit_opcode(masm, 0xE0 + $src1$$reg); 2982 2983 // FDIV 2984 emit_opcode(masm, 0xD8); 2985 emit_opcode(masm, 0xF0 + $src2$$reg); 2986 %} 2987 2988 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 2989 // Operand was loaded from memory into fp ST (stack top) 2990 // FADD ST,$src /* D8 C0+i */ 2991 emit_opcode(masm, 0xD8); 2992 emit_opcode(masm, 0xC0 + $src1$$reg); 2993 2994 // FMUL ST,src2 /* D8 C*+i */ 2995 emit_opcode(masm, 0xD8); 2996 emit_opcode(masm, 0xC8 + $src2$$reg); 2997 %} 2998 2999 3000 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3001 // Operand was loaded from memory into fp ST (stack top) 3002 // FADD ST,$src /* D8 C0+i */ 3003 emit_opcode(masm, 0xD8); 3004 emit_opcode(masm, 0xC0 + $src1$$reg); 3005 3006 // FMULP src2,ST /* DE C8+i */ 3007 emit_opcode(masm, 0xDE); 3008 emit_opcode(masm, 0xC8 + $src2$$reg); 3009 %} 3010 3011 // Atomically load the volatile long 3012 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3013 emit_opcode(masm,0xDF); 3014 int rm_byte_opcode = 0x05; 3015 int base = $mem$$base; 3016 int index = $mem$$index; 3017 int scale = $mem$$scale; 3018 int displace = $mem$$disp; 3019 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3020 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3021 store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp ); 3022 %} 3023 3024 // Volatile Store Long. Must be atomic, so move it into 3025 // the FP TOS and then do a 64-bit FIST. Has to probe the 3026 // target address before the store (for null-ptr checks) 3027 // so the memory operand is used twice in the encoding. 3028 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3029 store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp ); 3030 __ set_inst_mark(); // Mark start of FIST in case $mem has an oop 3031 emit_opcode(masm,0xDF); 3032 int rm_byte_opcode = 0x07; 3033 int base = $mem$$base; 3034 int index = $mem$$index; 3035 int scale = $mem$$scale; 3036 int displace = $mem$$disp; 3037 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3038 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3039 __ clear_inst_mark(); 3040 %} 3041 3042 %} 3043 3044 3045 //----------FRAME-------------------------------------------------------------- 3046 // Definition of frame structure and management information. 3047 // 3048 // S T A C K L A Y O U T Allocators stack-slot number 3049 // | (to get allocators register number 3050 // G Owned by | | v add OptoReg::stack0()) 3051 // r CALLER | | 3052 // o | +--------+ pad to even-align allocators stack-slot 3053 // w V | pad0 | numbers; owned by CALLER 3054 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3055 // h ^ | in | 5 3056 // | | args | 4 Holes in incoming args owned by SELF 3057 // | | | | 3 3058 // | | +--------+ 3059 // V | | old out| Empty on Intel, window on Sparc 3060 // | old |preserve| Must be even aligned. 3061 // | SP-+--------+----> Matcher::_old_SP, even aligned 3062 // | | in | 3 area for Intel ret address 3063 // Owned by |preserve| Empty on Sparc. 3064 // SELF +--------+ 3065 // | | pad2 | 2 pad to align old SP 3066 // | +--------+ 1 3067 // | | locks | 0 3068 // | +--------+----> OptoReg::stack0(), even aligned 3069 // | | pad1 | 11 pad to align new SP 3070 // | +--------+ 3071 // | | | 10 3072 // | | spills | 9 spills 3073 // V | | 8 (pad0 slot for callee) 3074 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3075 // ^ | out | 7 3076 // | | args | 6 Holes in outgoing args owned by CALLEE 3077 // Owned by +--------+ 3078 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3079 // | new |preserve| Must be even-aligned. 3080 // | SP-+--------+----> Matcher::_new_SP, even aligned 3081 // | | | 3082 // 3083 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3084 // known from SELF's arguments and the Java calling convention. 3085 // Region 6-7 is determined per call site. 3086 // Note 2: If the calling convention leaves holes in the incoming argument 3087 // area, those holes are owned by SELF. Holes in the outgoing area 3088 // are owned by the CALLEE. Holes should not be necessary in the 3089 // incoming area, as the Java calling convention is completely under 3090 // the control of the AD file. Doubles can be sorted and packed to 3091 // avoid holes. Holes in the outgoing arguments may be necessary for 3092 // varargs C calling conventions. 3093 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3094 // even aligned with pad0 as needed. 3095 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3096 // region 6-11 is even aligned; it may be padded out more so that 3097 // the region from SP to FP meets the minimum stack alignment. 3098 3099 frame %{ 3100 // These three registers define part of the calling convention 3101 // between compiled code and the interpreter. 3102 inline_cache_reg(EAX); // Inline Cache Register 3103 3104 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3105 cisc_spilling_operand_name(indOffset32); 3106 3107 // Number of stack slots consumed by locking an object 3108 sync_stack_slots(1); 3109 3110 // Compiled code's Frame Pointer 3111 frame_pointer(ESP); 3112 // Interpreter stores its frame pointer in a register which is 3113 // stored to the stack by I2CAdaptors. 3114 // I2CAdaptors convert from interpreted java to compiled java. 3115 interpreter_frame_pointer(EBP); 3116 3117 // Stack alignment requirement 3118 // Alignment size in bytes (128-bit -> 16 bytes) 3119 stack_alignment(StackAlignmentInBytes); 3120 3121 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3122 // for calls to C. Supports the var-args backing area for register parms. 3123 varargs_C_out_slots_killed(0); 3124 3125 // The after-PROLOG location of the return address. Location of 3126 // return address specifies a type (REG or STACK) and a number 3127 // representing the register number (i.e. - use a register name) or 3128 // stack slot. 3129 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3130 // Otherwise, it is above the locks and verification slot and alignment word 3131 return_addr(STACK - 1 + 3132 align_up((Compile::current()->in_preserve_stack_slots() + 3133 Compile::current()->fixed_slots()), 3134 stack_alignment_in_slots())); 3135 3136 // Location of C & interpreter return values 3137 c_return_value %{ 3138 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3139 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3140 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3141 3142 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3143 // that C functions return float and double results in XMM0. 3144 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3145 return OptoRegPair(XMM0b_num,XMM0_num); 3146 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3147 return OptoRegPair(OptoReg::Bad,XMM0_num); 3148 3149 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3150 %} 3151 3152 // Location of return values 3153 return_value %{ 3154 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3155 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3156 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3157 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3158 return OptoRegPair(XMM0b_num,XMM0_num); 3159 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3160 return OptoRegPair(OptoReg::Bad,XMM0_num); 3161 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3162 %} 3163 3164 %} 3165 3166 //----------ATTRIBUTES--------------------------------------------------------- 3167 //----------Operand Attributes------------------------------------------------- 3168 op_attrib op_cost(0); // Required cost attribute 3169 3170 //----------Instruction Attributes--------------------------------------------- 3171 ins_attrib ins_cost(100); // Required cost attribute 3172 ins_attrib ins_size(8); // Required size attribute (in bits) 3173 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3174 // non-matching short branch variant of some 3175 // long branch? 3176 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3177 // specifies the alignment that some part of the instruction (not 3178 // necessarily the start) requires. If > 1, a compute_padding() 3179 // function must be provided for the instruction 3180 3181 //----------OPERANDS----------------------------------------------------------- 3182 // Operand definitions must precede instruction definitions for correct parsing 3183 // in the ADLC because operands constitute user defined types which are used in 3184 // instruction definitions. 3185 3186 //----------Simple Operands---------------------------------------------------- 3187 // Immediate Operands 3188 // Integer Immediate 3189 operand immI() %{ 3190 match(ConI); 3191 3192 op_cost(10); 3193 format %{ %} 3194 interface(CONST_INTER); 3195 %} 3196 3197 // Constant for test vs zero 3198 operand immI_0() %{ 3199 predicate(n->get_int() == 0); 3200 match(ConI); 3201 3202 op_cost(0); 3203 format %{ %} 3204 interface(CONST_INTER); 3205 %} 3206 3207 // Constant for increment 3208 operand immI_1() %{ 3209 predicate(n->get_int() == 1); 3210 match(ConI); 3211 3212 op_cost(0); 3213 format %{ %} 3214 interface(CONST_INTER); 3215 %} 3216 3217 // Constant for decrement 3218 operand immI_M1() %{ 3219 predicate(n->get_int() == -1); 3220 match(ConI); 3221 3222 op_cost(0); 3223 format %{ %} 3224 interface(CONST_INTER); 3225 %} 3226 3227 // Valid scale values for addressing modes 3228 operand immI2() %{ 3229 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3230 match(ConI); 3231 3232 format %{ %} 3233 interface(CONST_INTER); 3234 %} 3235 3236 operand immI8() %{ 3237 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3238 match(ConI); 3239 3240 op_cost(5); 3241 format %{ %} 3242 interface(CONST_INTER); 3243 %} 3244 3245 operand immU8() %{ 3246 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3247 match(ConI); 3248 3249 op_cost(5); 3250 format %{ %} 3251 interface(CONST_INTER); 3252 %} 3253 3254 operand immI16() %{ 3255 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3256 match(ConI); 3257 3258 op_cost(10); 3259 format %{ %} 3260 interface(CONST_INTER); 3261 %} 3262 3263 // Int Immediate non-negative 3264 operand immU31() 3265 %{ 3266 predicate(n->get_int() >= 0); 3267 match(ConI); 3268 3269 op_cost(0); 3270 format %{ %} 3271 interface(CONST_INTER); 3272 %} 3273 3274 // Constant for long shifts 3275 operand immI_32() %{ 3276 predicate( n->get_int() == 32 ); 3277 match(ConI); 3278 3279 op_cost(0); 3280 format %{ %} 3281 interface(CONST_INTER); 3282 %} 3283 3284 operand immI_1_31() %{ 3285 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3286 match(ConI); 3287 3288 op_cost(0); 3289 format %{ %} 3290 interface(CONST_INTER); 3291 %} 3292 3293 operand immI_32_63() %{ 3294 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3295 match(ConI); 3296 op_cost(0); 3297 3298 format %{ %} 3299 interface(CONST_INTER); 3300 %} 3301 3302 operand immI_2() %{ 3303 predicate( n->get_int() == 2 ); 3304 match(ConI); 3305 3306 op_cost(0); 3307 format %{ %} 3308 interface(CONST_INTER); 3309 %} 3310 3311 operand immI_3() %{ 3312 predicate( n->get_int() == 3 ); 3313 match(ConI); 3314 3315 op_cost(0); 3316 format %{ %} 3317 interface(CONST_INTER); 3318 %} 3319 3320 operand immI_4() 3321 %{ 3322 predicate(n->get_int() == 4); 3323 match(ConI); 3324 3325 op_cost(0); 3326 format %{ %} 3327 interface(CONST_INTER); 3328 %} 3329 3330 operand immI_8() 3331 %{ 3332 predicate(n->get_int() == 8); 3333 match(ConI); 3334 3335 op_cost(0); 3336 format %{ %} 3337 interface(CONST_INTER); 3338 %} 3339 3340 // Pointer Immediate 3341 operand immP() %{ 3342 match(ConP); 3343 3344 op_cost(10); 3345 format %{ %} 3346 interface(CONST_INTER); 3347 %} 3348 3349 // Null Pointer Immediate 3350 operand immP0() %{ 3351 predicate( n->get_ptr() == 0 ); 3352 match(ConP); 3353 op_cost(0); 3354 3355 format %{ %} 3356 interface(CONST_INTER); 3357 %} 3358 3359 // Long Immediate 3360 operand immL() %{ 3361 match(ConL); 3362 3363 op_cost(20); 3364 format %{ %} 3365 interface(CONST_INTER); 3366 %} 3367 3368 // Long Immediate zero 3369 operand immL0() %{ 3370 predicate( n->get_long() == 0L ); 3371 match(ConL); 3372 op_cost(0); 3373 3374 format %{ %} 3375 interface(CONST_INTER); 3376 %} 3377 3378 // Long Immediate zero 3379 operand immL_M1() %{ 3380 predicate( n->get_long() == -1L ); 3381 match(ConL); 3382 op_cost(0); 3383 3384 format %{ %} 3385 interface(CONST_INTER); 3386 %} 3387 3388 // Long immediate from 0 to 127. 3389 // Used for a shorter form of long mul by 10. 3390 operand immL_127() %{ 3391 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3392 match(ConL); 3393 op_cost(0); 3394 3395 format %{ %} 3396 interface(CONST_INTER); 3397 %} 3398 3399 // Long Immediate: low 32-bit mask 3400 operand immL_32bits() %{ 3401 predicate(n->get_long() == 0xFFFFFFFFL); 3402 match(ConL); 3403 op_cost(0); 3404 3405 format %{ %} 3406 interface(CONST_INTER); 3407 %} 3408 3409 // Long Immediate: low 32-bit mask 3410 operand immL32() %{ 3411 predicate(n->get_long() == (int)(n->get_long())); 3412 match(ConL); 3413 op_cost(20); 3414 3415 format %{ %} 3416 interface(CONST_INTER); 3417 %} 3418 3419 //Double Immediate zero 3420 operand immDPR0() %{ 3421 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3422 // bug that generates code such that NaNs compare equal to 0.0 3423 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3424 match(ConD); 3425 3426 op_cost(5); 3427 format %{ %} 3428 interface(CONST_INTER); 3429 %} 3430 3431 // Double Immediate one 3432 operand immDPR1() %{ 3433 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3434 match(ConD); 3435 3436 op_cost(5); 3437 format %{ %} 3438 interface(CONST_INTER); 3439 %} 3440 3441 // Double Immediate 3442 operand immDPR() %{ 3443 predicate(UseSSE<=1); 3444 match(ConD); 3445 3446 op_cost(5); 3447 format %{ %} 3448 interface(CONST_INTER); 3449 %} 3450 3451 operand immD() %{ 3452 predicate(UseSSE>=2); 3453 match(ConD); 3454 3455 op_cost(5); 3456 format %{ %} 3457 interface(CONST_INTER); 3458 %} 3459 3460 // Double Immediate zero 3461 operand immD0() %{ 3462 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3463 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3464 // compare equal to -0.0. 3465 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3466 match(ConD); 3467 3468 format %{ %} 3469 interface(CONST_INTER); 3470 %} 3471 3472 // Float Immediate zero 3473 operand immFPR0() %{ 3474 predicate(UseSSE == 0 && n->getf() == 0.0F); 3475 match(ConF); 3476 3477 op_cost(5); 3478 format %{ %} 3479 interface(CONST_INTER); 3480 %} 3481 3482 // Float Immediate one 3483 operand immFPR1() %{ 3484 predicate(UseSSE == 0 && n->getf() == 1.0F); 3485 match(ConF); 3486 3487 op_cost(5); 3488 format %{ %} 3489 interface(CONST_INTER); 3490 %} 3491 3492 // Float Immediate 3493 operand immFPR() %{ 3494 predicate( UseSSE == 0 ); 3495 match(ConF); 3496 3497 op_cost(5); 3498 format %{ %} 3499 interface(CONST_INTER); 3500 %} 3501 3502 // Float Immediate 3503 operand immF() %{ 3504 predicate(UseSSE >= 1); 3505 match(ConF); 3506 3507 op_cost(5); 3508 format %{ %} 3509 interface(CONST_INTER); 3510 %} 3511 3512 // Float Immediate zero. Zero and not -0.0 3513 operand immF0() %{ 3514 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3515 match(ConF); 3516 3517 op_cost(5); 3518 format %{ %} 3519 interface(CONST_INTER); 3520 %} 3521 3522 // Immediates for special shifts (sign extend) 3523 3524 // Constants for increment 3525 operand immI_16() %{ 3526 predicate( n->get_int() == 16 ); 3527 match(ConI); 3528 3529 format %{ %} 3530 interface(CONST_INTER); 3531 %} 3532 3533 operand immI_24() %{ 3534 predicate( n->get_int() == 24 ); 3535 match(ConI); 3536 3537 format %{ %} 3538 interface(CONST_INTER); 3539 %} 3540 3541 // Constant for byte-wide masking 3542 operand immI_255() %{ 3543 predicate( n->get_int() == 255 ); 3544 match(ConI); 3545 3546 format %{ %} 3547 interface(CONST_INTER); 3548 %} 3549 3550 // Constant for short-wide masking 3551 operand immI_65535() %{ 3552 predicate(n->get_int() == 65535); 3553 match(ConI); 3554 3555 format %{ %} 3556 interface(CONST_INTER); 3557 %} 3558 3559 operand kReg() 3560 %{ 3561 constraint(ALLOC_IN_RC(vectmask_reg)); 3562 match(RegVectMask); 3563 format %{%} 3564 interface(REG_INTER); 3565 %} 3566 3567 // Register Operands 3568 // Integer Register 3569 operand rRegI() %{ 3570 constraint(ALLOC_IN_RC(int_reg)); 3571 match(RegI); 3572 match(xRegI); 3573 match(eAXRegI); 3574 match(eBXRegI); 3575 match(eCXRegI); 3576 match(eDXRegI); 3577 match(eDIRegI); 3578 match(eSIRegI); 3579 3580 format %{ %} 3581 interface(REG_INTER); 3582 %} 3583 3584 // Subset of Integer Register 3585 operand xRegI(rRegI reg) %{ 3586 constraint(ALLOC_IN_RC(int_x_reg)); 3587 match(reg); 3588 match(eAXRegI); 3589 match(eBXRegI); 3590 match(eCXRegI); 3591 match(eDXRegI); 3592 3593 format %{ %} 3594 interface(REG_INTER); 3595 %} 3596 3597 // Special Registers 3598 operand eAXRegI(xRegI reg) %{ 3599 constraint(ALLOC_IN_RC(eax_reg)); 3600 match(reg); 3601 match(rRegI); 3602 3603 format %{ "EAX" %} 3604 interface(REG_INTER); 3605 %} 3606 3607 // Special Registers 3608 operand eBXRegI(xRegI reg) %{ 3609 constraint(ALLOC_IN_RC(ebx_reg)); 3610 match(reg); 3611 match(rRegI); 3612 3613 format %{ "EBX" %} 3614 interface(REG_INTER); 3615 %} 3616 3617 operand eCXRegI(xRegI reg) %{ 3618 constraint(ALLOC_IN_RC(ecx_reg)); 3619 match(reg); 3620 match(rRegI); 3621 3622 format %{ "ECX" %} 3623 interface(REG_INTER); 3624 %} 3625 3626 operand eDXRegI(xRegI reg) %{ 3627 constraint(ALLOC_IN_RC(edx_reg)); 3628 match(reg); 3629 match(rRegI); 3630 3631 format %{ "EDX" %} 3632 interface(REG_INTER); 3633 %} 3634 3635 operand eDIRegI(xRegI reg) %{ 3636 constraint(ALLOC_IN_RC(edi_reg)); 3637 match(reg); 3638 match(rRegI); 3639 3640 format %{ "EDI" %} 3641 interface(REG_INTER); 3642 %} 3643 3644 operand nadxRegI() %{ 3645 constraint(ALLOC_IN_RC(nadx_reg)); 3646 match(RegI); 3647 match(eBXRegI); 3648 match(eCXRegI); 3649 match(eSIRegI); 3650 match(eDIRegI); 3651 3652 format %{ %} 3653 interface(REG_INTER); 3654 %} 3655 3656 operand ncxRegI() %{ 3657 constraint(ALLOC_IN_RC(ncx_reg)); 3658 match(RegI); 3659 match(eAXRegI); 3660 match(eDXRegI); 3661 match(eSIRegI); 3662 match(eDIRegI); 3663 3664 format %{ %} 3665 interface(REG_INTER); 3666 %} 3667 3668 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3669 // // 3670 operand eSIRegI(xRegI reg) %{ 3671 constraint(ALLOC_IN_RC(esi_reg)); 3672 match(reg); 3673 match(rRegI); 3674 3675 format %{ "ESI" %} 3676 interface(REG_INTER); 3677 %} 3678 3679 // Pointer Register 3680 operand anyRegP() %{ 3681 constraint(ALLOC_IN_RC(any_reg)); 3682 match(RegP); 3683 match(eAXRegP); 3684 match(eBXRegP); 3685 match(eCXRegP); 3686 match(eDIRegP); 3687 match(eRegP); 3688 3689 format %{ %} 3690 interface(REG_INTER); 3691 %} 3692 3693 operand eRegP() %{ 3694 constraint(ALLOC_IN_RC(int_reg)); 3695 match(RegP); 3696 match(eAXRegP); 3697 match(eBXRegP); 3698 match(eCXRegP); 3699 match(eDIRegP); 3700 3701 format %{ %} 3702 interface(REG_INTER); 3703 %} 3704 3705 operand rRegP() %{ 3706 constraint(ALLOC_IN_RC(int_reg)); 3707 match(RegP); 3708 match(eAXRegP); 3709 match(eBXRegP); 3710 match(eCXRegP); 3711 match(eDIRegP); 3712 3713 format %{ %} 3714 interface(REG_INTER); 3715 %} 3716 3717 // On windows95, EBP is not safe to use for implicit null tests. 3718 operand eRegP_no_EBP() %{ 3719 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3720 match(RegP); 3721 match(eAXRegP); 3722 match(eBXRegP); 3723 match(eCXRegP); 3724 match(eDIRegP); 3725 3726 op_cost(100); 3727 format %{ %} 3728 interface(REG_INTER); 3729 %} 3730 3731 operand pRegP() %{ 3732 constraint(ALLOC_IN_RC(p_reg)); 3733 match(RegP); 3734 match(eBXRegP); 3735 match(eDXRegP); 3736 match(eSIRegP); 3737 match(eDIRegP); 3738 3739 format %{ %} 3740 interface(REG_INTER); 3741 %} 3742 3743 // Special Registers 3744 // Return a pointer value 3745 operand eAXRegP(eRegP reg) %{ 3746 constraint(ALLOC_IN_RC(eax_reg)); 3747 match(reg); 3748 format %{ "EAX" %} 3749 interface(REG_INTER); 3750 %} 3751 3752 // Used in AtomicAdd 3753 operand eBXRegP(eRegP reg) %{ 3754 constraint(ALLOC_IN_RC(ebx_reg)); 3755 match(reg); 3756 format %{ "EBX" %} 3757 interface(REG_INTER); 3758 %} 3759 3760 // Tail-call (interprocedural jump) to interpreter 3761 operand eCXRegP(eRegP reg) %{ 3762 constraint(ALLOC_IN_RC(ecx_reg)); 3763 match(reg); 3764 format %{ "ECX" %} 3765 interface(REG_INTER); 3766 %} 3767 3768 operand eDXRegP(eRegP reg) %{ 3769 constraint(ALLOC_IN_RC(edx_reg)); 3770 match(reg); 3771 format %{ "EDX" %} 3772 interface(REG_INTER); 3773 %} 3774 3775 operand eSIRegP(eRegP reg) %{ 3776 constraint(ALLOC_IN_RC(esi_reg)); 3777 match(reg); 3778 format %{ "ESI" %} 3779 interface(REG_INTER); 3780 %} 3781 3782 // Used in rep stosw 3783 operand eDIRegP(eRegP reg) %{ 3784 constraint(ALLOC_IN_RC(edi_reg)); 3785 match(reg); 3786 format %{ "EDI" %} 3787 interface(REG_INTER); 3788 %} 3789 3790 operand eRegL() %{ 3791 constraint(ALLOC_IN_RC(long_reg)); 3792 match(RegL); 3793 match(eADXRegL); 3794 3795 format %{ %} 3796 interface(REG_INTER); 3797 %} 3798 3799 operand eADXRegL( eRegL reg ) %{ 3800 constraint(ALLOC_IN_RC(eadx_reg)); 3801 match(reg); 3802 3803 format %{ "EDX:EAX" %} 3804 interface(REG_INTER); 3805 %} 3806 3807 operand eBCXRegL( eRegL reg ) %{ 3808 constraint(ALLOC_IN_RC(ebcx_reg)); 3809 match(reg); 3810 3811 format %{ "EBX:ECX" %} 3812 interface(REG_INTER); 3813 %} 3814 3815 operand eBDPRegL( eRegL reg ) %{ 3816 constraint(ALLOC_IN_RC(ebpd_reg)); 3817 match(reg); 3818 3819 format %{ "EBP:EDI" %} 3820 interface(REG_INTER); 3821 %} 3822 // Special case for integer high multiply 3823 operand eADXRegL_low_only() %{ 3824 constraint(ALLOC_IN_RC(eadx_reg)); 3825 match(RegL); 3826 3827 format %{ "EAX" %} 3828 interface(REG_INTER); 3829 %} 3830 3831 // Flags register, used as output of compare instructions 3832 operand rFlagsReg() %{ 3833 constraint(ALLOC_IN_RC(int_flags)); 3834 match(RegFlags); 3835 3836 format %{ "EFLAGS" %} 3837 interface(REG_INTER); 3838 %} 3839 3840 // Flags register, used as output of compare instructions 3841 operand eFlagsReg() %{ 3842 constraint(ALLOC_IN_RC(int_flags)); 3843 match(RegFlags); 3844 3845 format %{ "EFLAGS" %} 3846 interface(REG_INTER); 3847 %} 3848 3849 // Flags register, used as output of FLOATING POINT compare instructions 3850 operand eFlagsRegU() %{ 3851 constraint(ALLOC_IN_RC(int_flags)); 3852 match(RegFlags); 3853 3854 format %{ "EFLAGS_U" %} 3855 interface(REG_INTER); 3856 %} 3857 3858 operand eFlagsRegUCF() %{ 3859 constraint(ALLOC_IN_RC(int_flags)); 3860 match(RegFlags); 3861 predicate(false); 3862 3863 format %{ "EFLAGS_U_CF" %} 3864 interface(REG_INTER); 3865 %} 3866 3867 // Condition Code Register used by long compare 3868 operand flagsReg_long_LTGE() %{ 3869 constraint(ALLOC_IN_RC(int_flags)); 3870 match(RegFlags); 3871 format %{ "FLAGS_LTGE" %} 3872 interface(REG_INTER); 3873 %} 3874 operand flagsReg_long_EQNE() %{ 3875 constraint(ALLOC_IN_RC(int_flags)); 3876 match(RegFlags); 3877 format %{ "FLAGS_EQNE" %} 3878 interface(REG_INTER); 3879 %} 3880 operand flagsReg_long_LEGT() %{ 3881 constraint(ALLOC_IN_RC(int_flags)); 3882 match(RegFlags); 3883 format %{ "FLAGS_LEGT" %} 3884 interface(REG_INTER); 3885 %} 3886 3887 // Condition Code Register used by unsigned long compare 3888 operand flagsReg_ulong_LTGE() %{ 3889 constraint(ALLOC_IN_RC(int_flags)); 3890 match(RegFlags); 3891 format %{ "FLAGS_U_LTGE" %} 3892 interface(REG_INTER); 3893 %} 3894 operand flagsReg_ulong_EQNE() %{ 3895 constraint(ALLOC_IN_RC(int_flags)); 3896 match(RegFlags); 3897 format %{ "FLAGS_U_EQNE" %} 3898 interface(REG_INTER); 3899 %} 3900 operand flagsReg_ulong_LEGT() %{ 3901 constraint(ALLOC_IN_RC(int_flags)); 3902 match(RegFlags); 3903 format %{ "FLAGS_U_LEGT" %} 3904 interface(REG_INTER); 3905 %} 3906 3907 // Float register operands 3908 operand regDPR() %{ 3909 predicate( UseSSE < 2 ); 3910 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3911 match(RegD); 3912 match(regDPR1); 3913 match(regDPR2); 3914 format %{ %} 3915 interface(REG_INTER); 3916 %} 3917 3918 operand regDPR1(regDPR reg) %{ 3919 predicate( UseSSE < 2 ); 3920 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 3921 match(reg); 3922 format %{ "FPR1" %} 3923 interface(REG_INTER); 3924 %} 3925 3926 operand regDPR2(regDPR reg) %{ 3927 predicate( UseSSE < 2 ); 3928 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 3929 match(reg); 3930 format %{ "FPR2" %} 3931 interface(REG_INTER); 3932 %} 3933 3934 operand regnotDPR1(regDPR reg) %{ 3935 predicate( UseSSE < 2 ); 3936 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 3937 match(reg); 3938 format %{ %} 3939 interface(REG_INTER); 3940 %} 3941 3942 // Float register operands 3943 operand regFPR() %{ 3944 predicate( UseSSE < 2 ); 3945 constraint(ALLOC_IN_RC(fp_flt_reg)); 3946 match(RegF); 3947 match(regFPR1); 3948 format %{ %} 3949 interface(REG_INTER); 3950 %} 3951 3952 // Float register operands 3953 operand regFPR1(regFPR reg) %{ 3954 predicate( UseSSE < 2 ); 3955 constraint(ALLOC_IN_RC(fp_flt_reg0)); 3956 match(reg); 3957 format %{ "FPR1" %} 3958 interface(REG_INTER); 3959 %} 3960 3961 // XMM Float register operands 3962 operand regF() %{ 3963 predicate( UseSSE>=1 ); 3964 constraint(ALLOC_IN_RC(float_reg_legacy)); 3965 match(RegF); 3966 format %{ %} 3967 interface(REG_INTER); 3968 %} 3969 3970 operand legRegF() %{ 3971 predicate( UseSSE>=1 ); 3972 constraint(ALLOC_IN_RC(float_reg_legacy)); 3973 match(RegF); 3974 format %{ %} 3975 interface(REG_INTER); 3976 %} 3977 3978 // Float register operands 3979 operand vlRegF() %{ 3980 constraint(ALLOC_IN_RC(float_reg_vl)); 3981 match(RegF); 3982 3983 format %{ %} 3984 interface(REG_INTER); 3985 %} 3986 3987 // XMM Double register operands 3988 operand regD() %{ 3989 predicate( UseSSE>=2 ); 3990 constraint(ALLOC_IN_RC(double_reg_legacy)); 3991 match(RegD); 3992 format %{ %} 3993 interface(REG_INTER); 3994 %} 3995 3996 // Double register operands 3997 operand legRegD() %{ 3998 predicate( UseSSE>=2 ); 3999 constraint(ALLOC_IN_RC(double_reg_legacy)); 4000 match(RegD); 4001 format %{ %} 4002 interface(REG_INTER); 4003 %} 4004 4005 operand vlRegD() %{ 4006 constraint(ALLOC_IN_RC(double_reg_vl)); 4007 match(RegD); 4008 4009 format %{ %} 4010 interface(REG_INTER); 4011 %} 4012 4013 //----------Memory Operands---------------------------------------------------- 4014 // Direct Memory Operand 4015 operand direct(immP addr) %{ 4016 match(addr); 4017 4018 format %{ "[$addr]" %} 4019 interface(MEMORY_INTER) %{ 4020 base(0xFFFFFFFF); 4021 index(0x4); 4022 scale(0x0); 4023 disp($addr); 4024 %} 4025 %} 4026 4027 // Indirect Memory Operand 4028 operand indirect(eRegP reg) %{ 4029 constraint(ALLOC_IN_RC(int_reg)); 4030 match(reg); 4031 4032 format %{ "[$reg]" %} 4033 interface(MEMORY_INTER) %{ 4034 base($reg); 4035 index(0x4); 4036 scale(0x0); 4037 disp(0x0); 4038 %} 4039 %} 4040 4041 // Indirect Memory Plus Short Offset Operand 4042 operand indOffset8(eRegP reg, immI8 off) %{ 4043 match(AddP reg off); 4044 4045 format %{ "[$reg + $off]" %} 4046 interface(MEMORY_INTER) %{ 4047 base($reg); 4048 index(0x4); 4049 scale(0x0); 4050 disp($off); 4051 %} 4052 %} 4053 4054 // Indirect Memory Plus Long Offset Operand 4055 operand indOffset32(eRegP reg, immI off) %{ 4056 match(AddP reg off); 4057 4058 format %{ "[$reg + $off]" %} 4059 interface(MEMORY_INTER) %{ 4060 base($reg); 4061 index(0x4); 4062 scale(0x0); 4063 disp($off); 4064 %} 4065 %} 4066 4067 // Indirect Memory Plus Long Offset Operand 4068 operand indOffset32X(rRegI reg, immP off) %{ 4069 match(AddP off reg); 4070 4071 format %{ "[$reg + $off]" %} 4072 interface(MEMORY_INTER) %{ 4073 base($reg); 4074 index(0x4); 4075 scale(0x0); 4076 disp($off); 4077 %} 4078 %} 4079 4080 // Indirect Memory Plus Index Register Plus Offset Operand 4081 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4082 match(AddP (AddP reg ireg) off); 4083 4084 op_cost(10); 4085 format %{"[$reg + $off + $ireg]" %} 4086 interface(MEMORY_INTER) %{ 4087 base($reg); 4088 index($ireg); 4089 scale(0x0); 4090 disp($off); 4091 %} 4092 %} 4093 4094 // Indirect Memory Plus Index Register Plus Offset Operand 4095 operand indIndex(eRegP reg, rRegI ireg) %{ 4096 match(AddP reg ireg); 4097 4098 op_cost(10); 4099 format %{"[$reg + $ireg]" %} 4100 interface(MEMORY_INTER) %{ 4101 base($reg); 4102 index($ireg); 4103 scale(0x0); 4104 disp(0x0); 4105 %} 4106 %} 4107 4108 // // ------------------------------------------------------------------------- 4109 // // 486 architecture doesn't support "scale * index + offset" with out a base 4110 // // ------------------------------------------------------------------------- 4111 // // Scaled Memory Operands 4112 // // Indirect Memory Times Scale Plus Offset Operand 4113 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4114 // match(AddP off (LShiftI ireg scale)); 4115 // 4116 // op_cost(10); 4117 // format %{"[$off + $ireg << $scale]" %} 4118 // interface(MEMORY_INTER) %{ 4119 // base(0x4); 4120 // index($ireg); 4121 // scale($scale); 4122 // disp($off); 4123 // %} 4124 // %} 4125 4126 // Indirect Memory Times Scale Plus Index Register 4127 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4128 match(AddP reg (LShiftI ireg scale)); 4129 4130 op_cost(10); 4131 format %{"[$reg + $ireg << $scale]" %} 4132 interface(MEMORY_INTER) %{ 4133 base($reg); 4134 index($ireg); 4135 scale($scale); 4136 disp(0x0); 4137 %} 4138 %} 4139 4140 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4141 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4142 match(AddP (AddP reg (LShiftI ireg scale)) off); 4143 4144 op_cost(10); 4145 format %{"[$reg + $off + $ireg << $scale]" %} 4146 interface(MEMORY_INTER) %{ 4147 base($reg); 4148 index($ireg); 4149 scale($scale); 4150 disp($off); 4151 %} 4152 %} 4153 4154 //----------Load Long Memory Operands------------------------------------------ 4155 // The load-long idiom will use it's address expression again after loading 4156 // the first word of the long. If the load-long destination overlaps with 4157 // registers used in the addressing expression, the 2nd half will be loaded 4158 // from a clobbered address. Fix this by requiring that load-long use 4159 // address registers that do not overlap with the load-long target. 4160 4161 // load-long support 4162 operand load_long_RegP() %{ 4163 constraint(ALLOC_IN_RC(esi_reg)); 4164 match(RegP); 4165 match(eSIRegP); 4166 op_cost(100); 4167 format %{ %} 4168 interface(REG_INTER); 4169 %} 4170 4171 // Indirect Memory Operand Long 4172 operand load_long_indirect(load_long_RegP reg) %{ 4173 constraint(ALLOC_IN_RC(esi_reg)); 4174 match(reg); 4175 4176 format %{ "[$reg]" %} 4177 interface(MEMORY_INTER) %{ 4178 base($reg); 4179 index(0x4); 4180 scale(0x0); 4181 disp(0x0); 4182 %} 4183 %} 4184 4185 // Indirect Memory Plus Long Offset Operand 4186 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4187 match(AddP reg off); 4188 4189 format %{ "[$reg + $off]" %} 4190 interface(MEMORY_INTER) %{ 4191 base($reg); 4192 index(0x4); 4193 scale(0x0); 4194 disp($off); 4195 %} 4196 %} 4197 4198 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4199 4200 4201 //----------Special Memory Operands-------------------------------------------- 4202 // Stack Slot Operand - This operand is used for loading and storing temporary 4203 // values on the stack where a match requires a value to 4204 // flow through memory. 4205 operand stackSlotP(sRegP reg) %{ 4206 constraint(ALLOC_IN_RC(stack_slots)); 4207 // No match rule because this operand is only generated in matching 4208 format %{ "[$reg]" %} 4209 interface(MEMORY_INTER) %{ 4210 base(0x4); // ESP 4211 index(0x4); // No Index 4212 scale(0x0); // No Scale 4213 disp($reg); // Stack Offset 4214 %} 4215 %} 4216 4217 operand stackSlotI(sRegI reg) %{ 4218 constraint(ALLOC_IN_RC(stack_slots)); 4219 // No match rule because this operand is only generated in matching 4220 format %{ "[$reg]" %} 4221 interface(MEMORY_INTER) %{ 4222 base(0x4); // ESP 4223 index(0x4); // No Index 4224 scale(0x0); // No Scale 4225 disp($reg); // Stack Offset 4226 %} 4227 %} 4228 4229 operand stackSlotF(sRegF reg) %{ 4230 constraint(ALLOC_IN_RC(stack_slots)); 4231 // No match rule because this operand is only generated in matching 4232 format %{ "[$reg]" %} 4233 interface(MEMORY_INTER) %{ 4234 base(0x4); // ESP 4235 index(0x4); // No Index 4236 scale(0x0); // No Scale 4237 disp($reg); // Stack Offset 4238 %} 4239 %} 4240 4241 operand stackSlotD(sRegD reg) %{ 4242 constraint(ALLOC_IN_RC(stack_slots)); 4243 // No match rule because this operand is only generated in matching 4244 format %{ "[$reg]" %} 4245 interface(MEMORY_INTER) %{ 4246 base(0x4); // ESP 4247 index(0x4); // No Index 4248 scale(0x0); // No Scale 4249 disp($reg); // Stack Offset 4250 %} 4251 %} 4252 4253 operand stackSlotL(sRegL reg) %{ 4254 constraint(ALLOC_IN_RC(stack_slots)); 4255 // No match rule because this operand is only generated in matching 4256 format %{ "[$reg]" %} 4257 interface(MEMORY_INTER) %{ 4258 base(0x4); // ESP 4259 index(0x4); // No Index 4260 scale(0x0); // No Scale 4261 disp($reg); // Stack Offset 4262 %} 4263 %} 4264 4265 //----------Conditional Branch Operands---------------------------------------- 4266 // Comparison Op - This is the operation of the comparison, and is limited to 4267 // the following set of codes: 4268 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4269 // 4270 // Other attributes of the comparison, such as unsignedness, are specified 4271 // by the comparison instruction that sets a condition code flags register. 4272 // That result is represented by a flags operand whose subtype is appropriate 4273 // to the unsignedness (etc.) of the comparison. 4274 // 4275 // Later, the instruction which matches both the Comparison Op (a Bool) and 4276 // the flags (produced by the Cmp) specifies the coding of the comparison op 4277 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4278 4279 // Comparison Code 4280 operand cmpOp() %{ 4281 match(Bool); 4282 4283 format %{ "" %} 4284 interface(COND_INTER) %{ 4285 equal(0x4, "e"); 4286 not_equal(0x5, "ne"); 4287 less(0xC, "l"); 4288 greater_equal(0xD, "ge"); 4289 less_equal(0xE, "le"); 4290 greater(0xF, "g"); 4291 overflow(0x0, "o"); 4292 no_overflow(0x1, "no"); 4293 %} 4294 %} 4295 4296 // Comparison Code, unsigned compare. Used by FP also, with 4297 // C2 (unordered) turned into GT or LT already. The other bits 4298 // C0 and C3 are turned into Carry & Zero flags. 4299 operand cmpOpU() %{ 4300 match(Bool); 4301 4302 format %{ "" %} 4303 interface(COND_INTER) %{ 4304 equal(0x4, "e"); 4305 not_equal(0x5, "ne"); 4306 less(0x2, "b"); 4307 greater_equal(0x3, "nb"); 4308 less_equal(0x6, "be"); 4309 greater(0x7, "nbe"); 4310 overflow(0x0, "o"); 4311 no_overflow(0x1, "no"); 4312 %} 4313 %} 4314 4315 // Floating comparisons that don't require any fixup for the unordered case 4316 operand cmpOpUCF() %{ 4317 match(Bool); 4318 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4319 n->as_Bool()->_test._test == BoolTest::ge || 4320 n->as_Bool()->_test._test == BoolTest::le || 4321 n->as_Bool()->_test._test == BoolTest::gt); 4322 format %{ "" %} 4323 interface(COND_INTER) %{ 4324 equal(0x4, "e"); 4325 not_equal(0x5, "ne"); 4326 less(0x2, "b"); 4327 greater_equal(0x3, "nb"); 4328 less_equal(0x6, "be"); 4329 greater(0x7, "nbe"); 4330 overflow(0x0, "o"); 4331 no_overflow(0x1, "no"); 4332 %} 4333 %} 4334 4335 4336 // Floating comparisons that can be fixed up with extra conditional jumps 4337 operand cmpOpUCF2() %{ 4338 match(Bool); 4339 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4340 n->as_Bool()->_test._test == BoolTest::eq); 4341 format %{ "" %} 4342 interface(COND_INTER) %{ 4343 equal(0x4, "e"); 4344 not_equal(0x5, "ne"); 4345 less(0x2, "b"); 4346 greater_equal(0x3, "nb"); 4347 less_equal(0x6, "be"); 4348 greater(0x7, "nbe"); 4349 overflow(0x0, "o"); 4350 no_overflow(0x1, "no"); 4351 %} 4352 %} 4353 4354 // Comparison Code for FP conditional move 4355 operand cmpOp_fcmov() %{ 4356 match(Bool); 4357 4358 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4359 n->as_Bool()->_test._test != BoolTest::no_overflow); 4360 format %{ "" %} 4361 interface(COND_INTER) %{ 4362 equal (0x0C8); 4363 not_equal (0x1C8); 4364 less (0x0C0); 4365 greater_equal(0x1C0); 4366 less_equal (0x0D0); 4367 greater (0x1D0); 4368 overflow(0x0, "o"); // not really supported by the instruction 4369 no_overflow(0x1, "no"); // not really supported by the instruction 4370 %} 4371 %} 4372 4373 // Comparison Code used in long compares 4374 operand cmpOp_commute() %{ 4375 match(Bool); 4376 4377 format %{ "" %} 4378 interface(COND_INTER) %{ 4379 equal(0x4, "e"); 4380 not_equal(0x5, "ne"); 4381 less(0xF, "g"); 4382 greater_equal(0xE, "le"); 4383 less_equal(0xD, "ge"); 4384 greater(0xC, "l"); 4385 overflow(0x0, "o"); 4386 no_overflow(0x1, "no"); 4387 %} 4388 %} 4389 4390 // Comparison Code used in unsigned long compares 4391 operand cmpOpU_commute() %{ 4392 match(Bool); 4393 4394 format %{ "" %} 4395 interface(COND_INTER) %{ 4396 equal(0x4, "e"); 4397 not_equal(0x5, "ne"); 4398 less(0x7, "nbe"); 4399 greater_equal(0x6, "be"); 4400 less_equal(0x3, "nb"); 4401 greater(0x2, "b"); 4402 overflow(0x0, "o"); 4403 no_overflow(0x1, "no"); 4404 %} 4405 %} 4406 4407 //----------OPERAND CLASSES---------------------------------------------------- 4408 // Operand Classes are groups of operands that are used as to simplify 4409 // instruction definitions by not requiring the AD writer to specify separate 4410 // instructions for every form of operand when the instruction accepts 4411 // multiple operand types with the same basic encoding and format. The classic 4412 // case of this is memory operands. 4413 4414 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4415 indIndex, indIndexScale, indIndexScaleOffset); 4416 4417 // Long memory operations are encoded in 2 instructions and a +4 offset. 4418 // This means some kind of offset is always required and you cannot use 4419 // an oop as the offset (done when working on static globals). 4420 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4421 indIndex, indIndexScale, indIndexScaleOffset); 4422 4423 4424 //----------PIPELINE----------------------------------------------------------- 4425 // Rules which define the behavior of the target architectures pipeline. 4426 pipeline %{ 4427 4428 //----------ATTRIBUTES--------------------------------------------------------- 4429 attributes %{ 4430 variable_size_instructions; // Fixed size instructions 4431 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4432 instruction_unit_size = 1; // An instruction is 1 bytes long 4433 instruction_fetch_unit_size = 16; // The processor fetches one line 4434 instruction_fetch_units = 1; // of 16 bytes 4435 4436 // List of nop instructions 4437 nops( MachNop ); 4438 %} 4439 4440 //----------RESOURCES---------------------------------------------------------- 4441 // Resources are the functional units available to the machine 4442 4443 // Generic P2/P3 pipeline 4444 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4445 // 3 instructions decoded per cycle. 4446 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4447 // 2 ALU op, only ALU0 handles mul/div instructions. 4448 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4449 MS0, MS1, MEM = MS0 | MS1, 4450 BR, FPU, 4451 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4452 4453 //----------PIPELINE DESCRIPTION----------------------------------------------- 4454 // Pipeline Description specifies the stages in the machine's pipeline 4455 4456 // Generic P2/P3 pipeline 4457 pipe_desc(S0, S1, S2, S3, S4, S5); 4458 4459 //----------PIPELINE CLASSES--------------------------------------------------- 4460 // Pipeline Classes describe the stages in which input and output are 4461 // referenced by the hardware pipeline. 4462 4463 // Naming convention: ialu or fpu 4464 // Then: _reg 4465 // Then: _reg if there is a 2nd register 4466 // Then: _long if it's a pair of instructions implementing a long 4467 // Then: _fat if it requires the big decoder 4468 // Or: _mem if it requires the big decoder and a memory unit. 4469 4470 // Integer ALU reg operation 4471 pipe_class ialu_reg(rRegI dst) %{ 4472 single_instruction; 4473 dst : S4(write); 4474 dst : S3(read); 4475 DECODE : S0; // any decoder 4476 ALU : S3; // any alu 4477 %} 4478 4479 // Long ALU reg operation 4480 pipe_class ialu_reg_long(eRegL dst) %{ 4481 instruction_count(2); 4482 dst : S4(write); 4483 dst : S3(read); 4484 DECODE : S0(2); // any 2 decoders 4485 ALU : S3(2); // both alus 4486 %} 4487 4488 // Integer ALU reg operation using big decoder 4489 pipe_class ialu_reg_fat(rRegI dst) %{ 4490 single_instruction; 4491 dst : S4(write); 4492 dst : S3(read); 4493 D0 : S0; // big decoder only 4494 ALU : S3; // any alu 4495 %} 4496 4497 // Long ALU reg operation using big decoder 4498 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4499 instruction_count(2); 4500 dst : S4(write); 4501 dst : S3(read); 4502 D0 : S0(2); // big decoder only; twice 4503 ALU : S3(2); // any 2 alus 4504 %} 4505 4506 // Integer ALU reg-reg operation 4507 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4508 single_instruction; 4509 dst : S4(write); 4510 src : S3(read); 4511 DECODE : S0; // any decoder 4512 ALU : S3; // any alu 4513 %} 4514 4515 // Long ALU reg-reg operation 4516 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4517 instruction_count(2); 4518 dst : S4(write); 4519 src : S3(read); 4520 DECODE : S0(2); // any 2 decoders 4521 ALU : S3(2); // both alus 4522 %} 4523 4524 // Integer ALU reg-reg operation 4525 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4526 single_instruction; 4527 dst : S4(write); 4528 src : S3(read); 4529 D0 : S0; // big decoder only 4530 ALU : S3; // any alu 4531 %} 4532 4533 // Long ALU reg-reg operation 4534 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4535 instruction_count(2); 4536 dst : S4(write); 4537 src : S3(read); 4538 D0 : S0(2); // big decoder only; twice 4539 ALU : S3(2); // both alus 4540 %} 4541 4542 // Integer ALU reg-mem operation 4543 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4544 single_instruction; 4545 dst : S5(write); 4546 mem : S3(read); 4547 D0 : S0; // big decoder only 4548 ALU : S4; // any alu 4549 MEM : S3; // any mem 4550 %} 4551 4552 // Long ALU reg-mem operation 4553 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4554 instruction_count(2); 4555 dst : S5(write); 4556 mem : S3(read); 4557 D0 : S0(2); // big decoder only; twice 4558 ALU : S4(2); // any 2 alus 4559 MEM : S3(2); // both mems 4560 %} 4561 4562 // Integer mem operation (prefetch) 4563 pipe_class ialu_mem(memory mem) 4564 %{ 4565 single_instruction; 4566 mem : S3(read); 4567 D0 : S0; // big decoder only 4568 MEM : S3; // any mem 4569 %} 4570 4571 // Integer Store to Memory 4572 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4573 single_instruction; 4574 mem : S3(read); 4575 src : S5(read); 4576 D0 : S0; // big decoder only 4577 ALU : S4; // any alu 4578 MEM : S3; 4579 %} 4580 4581 // Long Store to Memory 4582 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4583 instruction_count(2); 4584 mem : S3(read); 4585 src : S5(read); 4586 D0 : S0(2); // big decoder only; twice 4587 ALU : S4(2); // any 2 alus 4588 MEM : S3(2); // Both mems 4589 %} 4590 4591 // Integer Store to Memory 4592 pipe_class ialu_mem_imm(memory mem) %{ 4593 single_instruction; 4594 mem : S3(read); 4595 D0 : S0; // big decoder only 4596 ALU : S4; // any alu 4597 MEM : S3; 4598 %} 4599 4600 // Integer ALU0 reg-reg operation 4601 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4602 single_instruction; 4603 dst : S4(write); 4604 src : S3(read); 4605 D0 : S0; // Big decoder only 4606 ALU0 : S3; // only alu0 4607 %} 4608 4609 // Integer ALU0 reg-mem operation 4610 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4611 single_instruction; 4612 dst : S5(write); 4613 mem : S3(read); 4614 D0 : S0; // big decoder only 4615 ALU0 : S4; // ALU0 only 4616 MEM : S3; // any mem 4617 %} 4618 4619 // Integer ALU reg-reg operation 4620 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4621 single_instruction; 4622 cr : S4(write); 4623 src1 : S3(read); 4624 src2 : S3(read); 4625 DECODE : S0; // any decoder 4626 ALU : S3; // any alu 4627 %} 4628 4629 // Integer ALU reg-imm operation 4630 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4631 single_instruction; 4632 cr : S4(write); 4633 src1 : S3(read); 4634 DECODE : S0; // any decoder 4635 ALU : S3; // any alu 4636 %} 4637 4638 // Integer ALU reg-mem operation 4639 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4640 single_instruction; 4641 cr : S4(write); 4642 src1 : S3(read); 4643 src2 : S3(read); 4644 D0 : S0; // big decoder only 4645 ALU : S4; // any alu 4646 MEM : S3; 4647 %} 4648 4649 // Conditional move reg-reg 4650 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4651 instruction_count(4); 4652 y : S4(read); 4653 q : S3(read); 4654 p : S3(read); 4655 DECODE : S0(4); // any decoder 4656 %} 4657 4658 // Conditional move reg-reg 4659 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4660 single_instruction; 4661 dst : S4(write); 4662 src : S3(read); 4663 cr : S3(read); 4664 DECODE : S0; // any decoder 4665 %} 4666 4667 // Conditional move reg-mem 4668 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4669 single_instruction; 4670 dst : S4(write); 4671 src : S3(read); 4672 cr : S3(read); 4673 DECODE : S0; // any decoder 4674 MEM : S3; 4675 %} 4676 4677 // Conditional move reg-reg long 4678 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4679 single_instruction; 4680 dst : S4(write); 4681 src : S3(read); 4682 cr : S3(read); 4683 DECODE : S0(2); // any 2 decoders 4684 %} 4685 4686 // Conditional move double reg-reg 4687 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4688 single_instruction; 4689 dst : S4(write); 4690 src : S3(read); 4691 cr : S3(read); 4692 DECODE : S0; // any decoder 4693 %} 4694 4695 // Float reg-reg operation 4696 pipe_class fpu_reg(regDPR dst) %{ 4697 instruction_count(2); 4698 dst : S3(read); 4699 DECODE : S0(2); // any 2 decoders 4700 FPU : S3; 4701 %} 4702 4703 // Float reg-reg operation 4704 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4705 instruction_count(2); 4706 dst : S4(write); 4707 src : S3(read); 4708 DECODE : S0(2); // any 2 decoders 4709 FPU : S3; 4710 %} 4711 4712 // Float reg-reg operation 4713 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4714 instruction_count(3); 4715 dst : S4(write); 4716 src1 : S3(read); 4717 src2 : S3(read); 4718 DECODE : S0(3); // any 3 decoders 4719 FPU : S3(2); 4720 %} 4721 4722 // Float reg-reg operation 4723 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4724 instruction_count(4); 4725 dst : S4(write); 4726 src1 : S3(read); 4727 src2 : S3(read); 4728 src3 : S3(read); 4729 DECODE : S0(4); // any 3 decoders 4730 FPU : S3(2); 4731 %} 4732 4733 // Float reg-reg operation 4734 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4735 instruction_count(4); 4736 dst : S4(write); 4737 src1 : S3(read); 4738 src2 : S3(read); 4739 src3 : S3(read); 4740 DECODE : S1(3); // any 3 decoders 4741 D0 : S0; // Big decoder only 4742 FPU : S3(2); 4743 MEM : S3; 4744 %} 4745 4746 // Float reg-mem operation 4747 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4748 instruction_count(2); 4749 dst : S5(write); 4750 mem : S3(read); 4751 D0 : S0; // big decoder only 4752 DECODE : S1; // any decoder for FPU POP 4753 FPU : S4; 4754 MEM : S3; // any mem 4755 %} 4756 4757 // Float reg-mem operation 4758 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4759 instruction_count(3); 4760 dst : S5(write); 4761 src1 : S3(read); 4762 mem : S3(read); 4763 D0 : S0; // big decoder only 4764 DECODE : S1(2); // any decoder for FPU POP 4765 FPU : S4; 4766 MEM : S3; // any mem 4767 %} 4768 4769 // Float mem-reg operation 4770 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4771 instruction_count(2); 4772 src : S5(read); 4773 mem : S3(read); 4774 DECODE : S0; // any decoder for FPU PUSH 4775 D0 : S1; // big decoder only 4776 FPU : S4; 4777 MEM : S3; // any mem 4778 %} 4779 4780 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4781 instruction_count(3); 4782 src1 : S3(read); 4783 src2 : S3(read); 4784 mem : S3(read); 4785 DECODE : S0(2); // any decoder for FPU PUSH 4786 D0 : S1; // big decoder only 4787 FPU : S4; 4788 MEM : S3; // any mem 4789 %} 4790 4791 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4792 instruction_count(3); 4793 src1 : S3(read); 4794 src2 : S3(read); 4795 mem : S4(read); 4796 DECODE : S0; // any decoder for FPU PUSH 4797 D0 : S0(2); // big decoder only 4798 FPU : S4; 4799 MEM : S3(2); // any mem 4800 %} 4801 4802 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4803 instruction_count(2); 4804 src1 : S3(read); 4805 dst : S4(read); 4806 D0 : S0(2); // big decoder only 4807 MEM : S3(2); // any mem 4808 %} 4809 4810 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4811 instruction_count(3); 4812 src1 : S3(read); 4813 src2 : S3(read); 4814 dst : S4(read); 4815 D0 : S0(3); // big decoder only 4816 FPU : S4; 4817 MEM : S3(3); // any mem 4818 %} 4819 4820 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4821 instruction_count(3); 4822 src1 : S4(read); 4823 mem : S4(read); 4824 DECODE : S0; // any decoder for FPU PUSH 4825 D0 : S0(2); // big decoder only 4826 FPU : S4; 4827 MEM : S3(2); // any mem 4828 %} 4829 4830 // Float load constant 4831 pipe_class fpu_reg_con(regDPR dst) %{ 4832 instruction_count(2); 4833 dst : S5(write); 4834 D0 : S0; // big decoder only for the load 4835 DECODE : S1; // any decoder for FPU POP 4836 FPU : S4; 4837 MEM : S3; // any mem 4838 %} 4839 4840 // Float load constant 4841 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4842 instruction_count(3); 4843 dst : S5(write); 4844 src : S3(read); 4845 D0 : S0; // big decoder only for the load 4846 DECODE : S1(2); // any decoder for FPU POP 4847 FPU : S4; 4848 MEM : S3; // any mem 4849 %} 4850 4851 // UnConditional branch 4852 pipe_class pipe_jmp( label labl ) %{ 4853 single_instruction; 4854 BR : S3; 4855 %} 4856 4857 // Conditional branch 4858 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4859 single_instruction; 4860 cr : S1(read); 4861 BR : S3; 4862 %} 4863 4864 // Allocation idiom 4865 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4866 instruction_count(1); force_serialization; 4867 fixed_latency(6); 4868 heap_ptr : S3(read); 4869 DECODE : S0(3); 4870 D0 : S2; 4871 MEM : S3; 4872 ALU : S3(2); 4873 dst : S5(write); 4874 BR : S5; 4875 %} 4876 4877 // Generic big/slow expanded idiom 4878 pipe_class pipe_slow( ) %{ 4879 instruction_count(10); multiple_bundles; force_serialization; 4880 fixed_latency(100); 4881 D0 : S0(2); 4882 MEM : S3(2); 4883 %} 4884 4885 // The real do-nothing guy 4886 pipe_class empty( ) %{ 4887 instruction_count(0); 4888 %} 4889 4890 // Define the class for the Nop node 4891 define %{ 4892 MachNop = empty; 4893 %} 4894 4895 %} 4896 4897 //----------INSTRUCTIONS------------------------------------------------------- 4898 // 4899 // match -- States which machine-independent subtree may be replaced 4900 // by this instruction. 4901 // ins_cost -- The estimated cost of this instruction is used by instruction 4902 // selection to identify a minimum cost tree of machine 4903 // instructions that matches a tree of machine-independent 4904 // instructions. 4905 // format -- A string providing the disassembly for this instruction. 4906 // The value of an instruction's operand may be inserted 4907 // by referring to it with a '$' prefix. 4908 // opcode -- Three instruction opcodes may be provided. These are referred 4909 // to within an encode class as $primary, $secondary, and $tertiary 4910 // respectively. The primary opcode is commonly used to 4911 // indicate the type of machine instruction, while secondary 4912 // and tertiary are often used for prefix options or addressing 4913 // modes. 4914 // ins_encode -- A list of encode classes with parameters. The encode class 4915 // name must have been defined in an 'enc_class' specification 4916 // in the encode section of the architecture description. 4917 4918 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4919 // Load Float 4920 instruct MoveF2LEG(legRegF dst, regF src) %{ 4921 match(Set dst src); 4922 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4923 ins_encode %{ 4924 ShouldNotReachHere(); 4925 %} 4926 ins_pipe( fpu_reg_reg ); 4927 %} 4928 4929 // Load Float 4930 instruct MoveLEG2F(regF dst, legRegF src) %{ 4931 match(Set dst src); 4932 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 4933 ins_encode %{ 4934 ShouldNotReachHere(); 4935 %} 4936 ins_pipe( fpu_reg_reg ); 4937 %} 4938 4939 // Load Float 4940 instruct MoveF2VL(vlRegF dst, regF src) %{ 4941 match(Set dst src); 4942 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4943 ins_encode %{ 4944 ShouldNotReachHere(); 4945 %} 4946 ins_pipe( fpu_reg_reg ); 4947 %} 4948 4949 // Load Float 4950 instruct MoveVL2F(regF dst, vlRegF src) %{ 4951 match(Set dst src); 4952 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 4953 ins_encode %{ 4954 ShouldNotReachHere(); 4955 %} 4956 ins_pipe( fpu_reg_reg ); 4957 %} 4958 4959 4960 4961 // Load Double 4962 instruct MoveD2LEG(legRegD dst, regD src) %{ 4963 match(Set dst src); 4964 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 4965 ins_encode %{ 4966 ShouldNotReachHere(); 4967 %} 4968 ins_pipe( fpu_reg_reg ); 4969 %} 4970 4971 // Load Double 4972 instruct MoveLEG2D(regD dst, legRegD src) %{ 4973 match(Set dst src); 4974 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 4975 ins_encode %{ 4976 ShouldNotReachHere(); 4977 %} 4978 ins_pipe( fpu_reg_reg ); 4979 %} 4980 4981 // Load Double 4982 instruct MoveD2VL(vlRegD dst, regD src) %{ 4983 match(Set dst src); 4984 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 4985 ins_encode %{ 4986 ShouldNotReachHere(); 4987 %} 4988 ins_pipe( fpu_reg_reg ); 4989 %} 4990 4991 // Load Double 4992 instruct MoveVL2D(regD dst, vlRegD src) %{ 4993 match(Set dst src); 4994 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 4995 ins_encode %{ 4996 ShouldNotReachHere(); 4997 %} 4998 ins_pipe( fpu_reg_reg ); 4999 %} 5000 5001 //----------BSWAP-Instruction-------------------------------------------------- 5002 instruct bytes_reverse_int(rRegI dst) %{ 5003 match(Set dst (ReverseBytesI dst)); 5004 5005 format %{ "BSWAP $dst" %} 5006 opcode(0x0F, 0xC8); 5007 ins_encode( OpcP, OpcSReg(dst) ); 5008 ins_pipe( ialu_reg ); 5009 %} 5010 5011 instruct bytes_reverse_long(eRegL dst) %{ 5012 match(Set dst (ReverseBytesL dst)); 5013 5014 format %{ "BSWAP $dst.lo\n\t" 5015 "BSWAP $dst.hi\n\t" 5016 "XCHG $dst.lo $dst.hi" %} 5017 5018 ins_cost(125); 5019 ins_encode( bswap_long_bytes(dst) ); 5020 ins_pipe( ialu_reg_reg); 5021 %} 5022 5023 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5024 match(Set dst (ReverseBytesUS dst)); 5025 effect(KILL cr); 5026 5027 format %{ "BSWAP $dst\n\t" 5028 "SHR $dst,16\n\t" %} 5029 ins_encode %{ 5030 __ bswapl($dst$$Register); 5031 __ shrl($dst$$Register, 16); 5032 %} 5033 ins_pipe( ialu_reg ); 5034 %} 5035 5036 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5037 match(Set dst (ReverseBytesS dst)); 5038 effect(KILL cr); 5039 5040 format %{ "BSWAP $dst\n\t" 5041 "SAR $dst,16\n\t" %} 5042 ins_encode %{ 5043 __ bswapl($dst$$Register); 5044 __ sarl($dst$$Register, 16); 5045 %} 5046 ins_pipe( ialu_reg ); 5047 %} 5048 5049 5050 //---------- Zeros Count Instructions ------------------------------------------ 5051 5052 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5053 predicate(UseCountLeadingZerosInstruction); 5054 match(Set dst (CountLeadingZerosI src)); 5055 effect(KILL cr); 5056 5057 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5058 ins_encode %{ 5059 __ lzcntl($dst$$Register, $src$$Register); 5060 %} 5061 ins_pipe(ialu_reg); 5062 %} 5063 5064 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5065 predicate(!UseCountLeadingZerosInstruction); 5066 match(Set dst (CountLeadingZerosI src)); 5067 effect(KILL cr); 5068 5069 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5070 "JNZ skip\n\t" 5071 "MOV $dst, -1\n" 5072 "skip:\n\t" 5073 "NEG $dst\n\t" 5074 "ADD $dst, 31" %} 5075 ins_encode %{ 5076 Register Rdst = $dst$$Register; 5077 Register Rsrc = $src$$Register; 5078 Label skip; 5079 __ bsrl(Rdst, Rsrc); 5080 __ jccb(Assembler::notZero, skip); 5081 __ movl(Rdst, -1); 5082 __ bind(skip); 5083 __ negl(Rdst); 5084 __ addl(Rdst, BitsPerInt - 1); 5085 %} 5086 ins_pipe(ialu_reg); 5087 %} 5088 5089 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5090 predicate(UseCountLeadingZerosInstruction); 5091 match(Set dst (CountLeadingZerosL src)); 5092 effect(TEMP dst, KILL cr); 5093 5094 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5095 "JNC done\n\t" 5096 "LZCNT $dst, $src.lo\n\t" 5097 "ADD $dst, 32\n" 5098 "done:" %} 5099 ins_encode %{ 5100 Register Rdst = $dst$$Register; 5101 Register Rsrc = $src$$Register; 5102 Label done; 5103 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5104 __ jccb(Assembler::carryClear, done); 5105 __ lzcntl(Rdst, Rsrc); 5106 __ addl(Rdst, BitsPerInt); 5107 __ bind(done); 5108 %} 5109 ins_pipe(ialu_reg); 5110 %} 5111 5112 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5113 predicate(!UseCountLeadingZerosInstruction); 5114 match(Set dst (CountLeadingZerosL src)); 5115 effect(TEMP dst, KILL cr); 5116 5117 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5118 "JZ msw_is_zero\n\t" 5119 "ADD $dst, 32\n\t" 5120 "JMP not_zero\n" 5121 "msw_is_zero:\n\t" 5122 "BSR $dst, $src.lo\n\t" 5123 "JNZ not_zero\n\t" 5124 "MOV $dst, -1\n" 5125 "not_zero:\n\t" 5126 "NEG $dst\n\t" 5127 "ADD $dst, 63\n" %} 5128 ins_encode %{ 5129 Register Rdst = $dst$$Register; 5130 Register Rsrc = $src$$Register; 5131 Label msw_is_zero; 5132 Label not_zero; 5133 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5134 __ jccb(Assembler::zero, msw_is_zero); 5135 __ addl(Rdst, BitsPerInt); 5136 __ jmpb(not_zero); 5137 __ bind(msw_is_zero); 5138 __ bsrl(Rdst, Rsrc); 5139 __ jccb(Assembler::notZero, not_zero); 5140 __ movl(Rdst, -1); 5141 __ bind(not_zero); 5142 __ negl(Rdst); 5143 __ addl(Rdst, BitsPerLong - 1); 5144 %} 5145 ins_pipe(ialu_reg); 5146 %} 5147 5148 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5149 predicate(UseCountTrailingZerosInstruction); 5150 match(Set dst (CountTrailingZerosI src)); 5151 effect(KILL cr); 5152 5153 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5154 ins_encode %{ 5155 __ tzcntl($dst$$Register, $src$$Register); 5156 %} 5157 ins_pipe(ialu_reg); 5158 %} 5159 5160 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5161 predicate(!UseCountTrailingZerosInstruction); 5162 match(Set dst (CountTrailingZerosI src)); 5163 effect(KILL cr); 5164 5165 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5166 "JNZ done\n\t" 5167 "MOV $dst, 32\n" 5168 "done:" %} 5169 ins_encode %{ 5170 Register Rdst = $dst$$Register; 5171 Label done; 5172 __ bsfl(Rdst, $src$$Register); 5173 __ jccb(Assembler::notZero, done); 5174 __ movl(Rdst, BitsPerInt); 5175 __ bind(done); 5176 %} 5177 ins_pipe(ialu_reg); 5178 %} 5179 5180 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5181 predicate(UseCountTrailingZerosInstruction); 5182 match(Set dst (CountTrailingZerosL src)); 5183 effect(TEMP dst, KILL cr); 5184 5185 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5186 "JNC done\n\t" 5187 "TZCNT $dst, $src.hi\n\t" 5188 "ADD $dst, 32\n" 5189 "done:" %} 5190 ins_encode %{ 5191 Register Rdst = $dst$$Register; 5192 Register Rsrc = $src$$Register; 5193 Label done; 5194 __ tzcntl(Rdst, Rsrc); 5195 __ jccb(Assembler::carryClear, done); 5196 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5197 __ addl(Rdst, BitsPerInt); 5198 __ bind(done); 5199 %} 5200 ins_pipe(ialu_reg); 5201 %} 5202 5203 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5204 predicate(!UseCountTrailingZerosInstruction); 5205 match(Set dst (CountTrailingZerosL src)); 5206 effect(TEMP dst, KILL cr); 5207 5208 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5209 "JNZ done\n\t" 5210 "BSF $dst, $src.hi\n\t" 5211 "JNZ msw_not_zero\n\t" 5212 "MOV $dst, 32\n" 5213 "msw_not_zero:\n\t" 5214 "ADD $dst, 32\n" 5215 "done:" %} 5216 ins_encode %{ 5217 Register Rdst = $dst$$Register; 5218 Register Rsrc = $src$$Register; 5219 Label msw_not_zero; 5220 Label done; 5221 __ bsfl(Rdst, Rsrc); 5222 __ jccb(Assembler::notZero, done); 5223 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5224 __ jccb(Assembler::notZero, msw_not_zero); 5225 __ movl(Rdst, BitsPerInt); 5226 __ bind(msw_not_zero); 5227 __ addl(Rdst, BitsPerInt); 5228 __ bind(done); 5229 %} 5230 ins_pipe(ialu_reg); 5231 %} 5232 5233 5234 //---------- Population Count Instructions ------------------------------------- 5235 5236 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5237 predicate(UsePopCountInstruction); 5238 match(Set dst (PopCountI src)); 5239 effect(KILL cr); 5240 5241 format %{ "POPCNT $dst, $src" %} 5242 ins_encode %{ 5243 __ popcntl($dst$$Register, $src$$Register); 5244 %} 5245 ins_pipe(ialu_reg); 5246 %} 5247 5248 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5249 predicate(UsePopCountInstruction); 5250 match(Set dst (PopCountI (LoadI mem))); 5251 effect(KILL cr); 5252 5253 format %{ "POPCNT $dst, $mem" %} 5254 ins_encode %{ 5255 __ popcntl($dst$$Register, $mem$$Address); 5256 %} 5257 ins_pipe(ialu_reg); 5258 %} 5259 5260 // Note: Long.bitCount(long) returns an int. 5261 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5262 predicate(UsePopCountInstruction); 5263 match(Set dst (PopCountL src)); 5264 effect(KILL cr, TEMP tmp, TEMP dst); 5265 5266 format %{ "POPCNT $dst, $src.lo\n\t" 5267 "POPCNT $tmp, $src.hi\n\t" 5268 "ADD $dst, $tmp" %} 5269 ins_encode %{ 5270 __ popcntl($dst$$Register, $src$$Register); 5271 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5272 __ addl($dst$$Register, $tmp$$Register); 5273 %} 5274 ins_pipe(ialu_reg); 5275 %} 5276 5277 // Note: Long.bitCount(long) returns an int. 5278 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5279 predicate(UsePopCountInstruction); 5280 match(Set dst (PopCountL (LoadL mem))); 5281 effect(KILL cr, TEMP tmp, TEMP dst); 5282 5283 format %{ "POPCNT $dst, $mem\n\t" 5284 "POPCNT $tmp, $mem+4\n\t" 5285 "ADD $dst, $tmp" %} 5286 ins_encode %{ 5287 //__ popcntl($dst$$Register, $mem$$Address$$first); 5288 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5289 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5290 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5291 __ addl($dst$$Register, $tmp$$Register); 5292 %} 5293 ins_pipe(ialu_reg); 5294 %} 5295 5296 5297 //----------Load/Store/Move Instructions--------------------------------------- 5298 //----------Load Instructions-------------------------------------------------- 5299 // Load Byte (8bit signed) 5300 instruct loadB(xRegI dst, memory mem) %{ 5301 match(Set dst (LoadB mem)); 5302 5303 ins_cost(125); 5304 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5305 5306 ins_encode %{ 5307 __ movsbl($dst$$Register, $mem$$Address); 5308 %} 5309 5310 ins_pipe(ialu_reg_mem); 5311 %} 5312 5313 // Load Byte (8bit signed) into Long Register 5314 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5315 match(Set dst (ConvI2L (LoadB mem))); 5316 effect(KILL cr); 5317 5318 ins_cost(375); 5319 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5320 "MOV $dst.hi,$dst.lo\n\t" 5321 "SAR $dst.hi,7" %} 5322 5323 ins_encode %{ 5324 __ movsbl($dst$$Register, $mem$$Address); 5325 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5326 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5327 %} 5328 5329 ins_pipe(ialu_reg_mem); 5330 %} 5331 5332 // Load Unsigned Byte (8bit UNsigned) 5333 instruct loadUB(xRegI dst, memory mem) %{ 5334 match(Set dst (LoadUB mem)); 5335 5336 ins_cost(125); 5337 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5338 5339 ins_encode %{ 5340 __ movzbl($dst$$Register, $mem$$Address); 5341 %} 5342 5343 ins_pipe(ialu_reg_mem); 5344 %} 5345 5346 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5347 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5348 match(Set dst (ConvI2L (LoadUB mem))); 5349 effect(KILL cr); 5350 5351 ins_cost(250); 5352 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5353 "XOR $dst.hi,$dst.hi" %} 5354 5355 ins_encode %{ 5356 Register Rdst = $dst$$Register; 5357 __ movzbl(Rdst, $mem$$Address); 5358 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5359 %} 5360 5361 ins_pipe(ialu_reg_mem); 5362 %} 5363 5364 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5365 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5366 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5367 effect(KILL cr); 5368 5369 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5370 "XOR $dst.hi,$dst.hi\n\t" 5371 "AND $dst.lo,right_n_bits($mask, 8)" %} 5372 ins_encode %{ 5373 Register Rdst = $dst$$Register; 5374 __ movzbl(Rdst, $mem$$Address); 5375 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5376 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5377 %} 5378 ins_pipe(ialu_reg_mem); 5379 %} 5380 5381 // Load Short (16bit signed) 5382 instruct loadS(rRegI dst, memory mem) %{ 5383 match(Set dst (LoadS mem)); 5384 5385 ins_cost(125); 5386 format %{ "MOVSX $dst,$mem\t# short" %} 5387 5388 ins_encode %{ 5389 __ movswl($dst$$Register, $mem$$Address); 5390 %} 5391 5392 ins_pipe(ialu_reg_mem); 5393 %} 5394 5395 // Load Short (16 bit signed) to Byte (8 bit signed) 5396 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5397 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5398 5399 ins_cost(125); 5400 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5401 ins_encode %{ 5402 __ movsbl($dst$$Register, $mem$$Address); 5403 %} 5404 ins_pipe(ialu_reg_mem); 5405 %} 5406 5407 // Load Short (16bit signed) into Long Register 5408 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5409 match(Set dst (ConvI2L (LoadS mem))); 5410 effect(KILL cr); 5411 5412 ins_cost(375); 5413 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5414 "MOV $dst.hi,$dst.lo\n\t" 5415 "SAR $dst.hi,15" %} 5416 5417 ins_encode %{ 5418 __ movswl($dst$$Register, $mem$$Address); 5419 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5420 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5421 %} 5422 5423 ins_pipe(ialu_reg_mem); 5424 %} 5425 5426 // Load Unsigned Short/Char (16bit unsigned) 5427 instruct loadUS(rRegI dst, memory mem) %{ 5428 match(Set dst (LoadUS mem)); 5429 5430 ins_cost(125); 5431 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5432 5433 ins_encode %{ 5434 __ movzwl($dst$$Register, $mem$$Address); 5435 %} 5436 5437 ins_pipe(ialu_reg_mem); 5438 %} 5439 5440 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5441 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5442 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5443 5444 ins_cost(125); 5445 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5446 ins_encode %{ 5447 __ movsbl($dst$$Register, $mem$$Address); 5448 %} 5449 ins_pipe(ialu_reg_mem); 5450 %} 5451 5452 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5453 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5454 match(Set dst (ConvI2L (LoadUS mem))); 5455 effect(KILL cr); 5456 5457 ins_cost(250); 5458 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5459 "XOR $dst.hi,$dst.hi" %} 5460 5461 ins_encode %{ 5462 __ movzwl($dst$$Register, $mem$$Address); 5463 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5464 %} 5465 5466 ins_pipe(ialu_reg_mem); 5467 %} 5468 5469 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5470 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5471 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5472 effect(KILL cr); 5473 5474 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5475 "XOR $dst.hi,$dst.hi" %} 5476 ins_encode %{ 5477 Register Rdst = $dst$$Register; 5478 __ movzbl(Rdst, $mem$$Address); 5479 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5480 %} 5481 ins_pipe(ialu_reg_mem); 5482 %} 5483 5484 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5485 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5486 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5487 effect(KILL cr); 5488 5489 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5490 "XOR $dst.hi,$dst.hi\n\t" 5491 "AND $dst.lo,right_n_bits($mask, 16)" %} 5492 ins_encode %{ 5493 Register Rdst = $dst$$Register; 5494 __ movzwl(Rdst, $mem$$Address); 5495 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5496 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5497 %} 5498 ins_pipe(ialu_reg_mem); 5499 %} 5500 5501 // Load Integer 5502 instruct loadI(rRegI dst, memory mem) %{ 5503 match(Set dst (LoadI mem)); 5504 5505 ins_cost(125); 5506 format %{ "MOV $dst,$mem\t# int" %} 5507 5508 ins_encode %{ 5509 __ movl($dst$$Register, $mem$$Address); 5510 %} 5511 5512 ins_pipe(ialu_reg_mem); 5513 %} 5514 5515 // Load Integer (32 bit signed) to Byte (8 bit signed) 5516 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5517 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5518 5519 ins_cost(125); 5520 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5521 ins_encode %{ 5522 __ movsbl($dst$$Register, $mem$$Address); 5523 %} 5524 ins_pipe(ialu_reg_mem); 5525 %} 5526 5527 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5528 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5529 match(Set dst (AndI (LoadI mem) mask)); 5530 5531 ins_cost(125); 5532 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5533 ins_encode %{ 5534 __ movzbl($dst$$Register, $mem$$Address); 5535 %} 5536 ins_pipe(ialu_reg_mem); 5537 %} 5538 5539 // Load Integer (32 bit signed) to Short (16 bit signed) 5540 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5541 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5542 5543 ins_cost(125); 5544 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5545 ins_encode %{ 5546 __ movswl($dst$$Register, $mem$$Address); 5547 %} 5548 ins_pipe(ialu_reg_mem); 5549 %} 5550 5551 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5552 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5553 match(Set dst (AndI (LoadI mem) mask)); 5554 5555 ins_cost(125); 5556 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5557 ins_encode %{ 5558 __ movzwl($dst$$Register, $mem$$Address); 5559 %} 5560 ins_pipe(ialu_reg_mem); 5561 %} 5562 5563 // Load Integer into Long Register 5564 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5565 match(Set dst (ConvI2L (LoadI mem))); 5566 effect(KILL cr); 5567 5568 ins_cost(375); 5569 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5570 "MOV $dst.hi,$dst.lo\n\t" 5571 "SAR $dst.hi,31" %} 5572 5573 ins_encode %{ 5574 __ movl($dst$$Register, $mem$$Address); 5575 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5576 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5577 %} 5578 5579 ins_pipe(ialu_reg_mem); 5580 %} 5581 5582 // Load Integer with mask 0xFF into Long Register 5583 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5584 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5585 effect(KILL cr); 5586 5587 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5588 "XOR $dst.hi,$dst.hi" %} 5589 ins_encode %{ 5590 Register Rdst = $dst$$Register; 5591 __ movzbl(Rdst, $mem$$Address); 5592 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5593 %} 5594 ins_pipe(ialu_reg_mem); 5595 %} 5596 5597 // Load Integer with mask 0xFFFF into Long Register 5598 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5599 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5600 effect(KILL cr); 5601 5602 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5603 "XOR $dst.hi,$dst.hi" %} 5604 ins_encode %{ 5605 Register Rdst = $dst$$Register; 5606 __ movzwl(Rdst, $mem$$Address); 5607 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5608 %} 5609 ins_pipe(ialu_reg_mem); 5610 %} 5611 5612 // Load Integer with 31-bit mask into Long Register 5613 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5614 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5615 effect(KILL cr); 5616 5617 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5618 "XOR $dst.hi,$dst.hi\n\t" 5619 "AND $dst.lo,$mask" %} 5620 ins_encode %{ 5621 Register Rdst = $dst$$Register; 5622 __ movl(Rdst, $mem$$Address); 5623 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5624 __ andl(Rdst, $mask$$constant); 5625 %} 5626 ins_pipe(ialu_reg_mem); 5627 %} 5628 5629 // Load Unsigned Integer into Long Register 5630 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5631 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5632 effect(KILL cr); 5633 5634 ins_cost(250); 5635 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5636 "XOR $dst.hi,$dst.hi" %} 5637 5638 ins_encode %{ 5639 __ movl($dst$$Register, $mem$$Address); 5640 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5641 %} 5642 5643 ins_pipe(ialu_reg_mem); 5644 %} 5645 5646 // Load Long. Cannot clobber address while loading, so restrict address 5647 // register to ESI 5648 instruct loadL(eRegL dst, load_long_memory mem) %{ 5649 predicate(!((LoadLNode*)n)->require_atomic_access()); 5650 match(Set dst (LoadL mem)); 5651 5652 ins_cost(250); 5653 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5654 "MOV $dst.hi,$mem+4" %} 5655 5656 ins_encode %{ 5657 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5658 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5659 __ movl($dst$$Register, Amemlo); 5660 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5661 %} 5662 5663 ins_pipe(ialu_reg_long_mem); 5664 %} 5665 5666 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5667 // then store it down to the stack and reload on the int 5668 // side. 5669 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5670 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5671 match(Set dst (LoadL mem)); 5672 5673 ins_cost(200); 5674 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5675 "FISTp $dst" %} 5676 ins_encode(enc_loadL_volatile(mem,dst)); 5677 ins_pipe( fpu_reg_mem ); 5678 %} 5679 5680 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5681 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5682 match(Set dst (LoadL mem)); 5683 effect(TEMP tmp); 5684 ins_cost(180); 5685 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5686 "MOVSD $dst,$tmp" %} 5687 ins_encode %{ 5688 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5689 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5695 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5696 match(Set dst (LoadL mem)); 5697 effect(TEMP tmp); 5698 ins_cost(160); 5699 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5700 "MOVD $dst.lo,$tmp\n\t" 5701 "PSRLQ $tmp,32\n\t" 5702 "MOVD $dst.hi,$tmp" %} 5703 ins_encode %{ 5704 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5705 __ movdl($dst$$Register, $tmp$$XMMRegister); 5706 __ psrlq($tmp$$XMMRegister, 32); 5707 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5708 %} 5709 ins_pipe( pipe_slow ); 5710 %} 5711 5712 // Load Range 5713 instruct loadRange(rRegI dst, memory mem) %{ 5714 match(Set dst (LoadRange mem)); 5715 5716 ins_cost(125); 5717 format %{ "MOV $dst,$mem" %} 5718 opcode(0x8B); 5719 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5720 ins_pipe( ialu_reg_mem ); 5721 %} 5722 5723 5724 // Load Pointer 5725 instruct loadP(eRegP dst, memory mem) %{ 5726 match(Set dst (LoadP mem)); 5727 5728 ins_cost(125); 5729 format %{ "MOV $dst,$mem" %} 5730 opcode(0x8B); 5731 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5732 ins_pipe( ialu_reg_mem ); 5733 %} 5734 5735 // Load Klass Pointer 5736 instruct loadKlass(eRegP dst, memory mem) %{ 5737 match(Set dst (LoadKlass mem)); 5738 5739 ins_cost(125); 5740 format %{ "MOV $dst,$mem" %} 5741 opcode(0x8B); 5742 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5743 ins_pipe( ialu_reg_mem ); 5744 %} 5745 5746 // Load Double 5747 instruct loadDPR(regDPR dst, memory mem) %{ 5748 predicate(UseSSE<=1); 5749 match(Set dst (LoadD mem)); 5750 5751 ins_cost(150); 5752 format %{ "FLD_D ST,$mem\n\t" 5753 "FSTP $dst" %} 5754 opcode(0xDD); /* DD /0 */ 5755 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 5756 Pop_Reg_DPR(dst), ClearInstMark ); 5757 ins_pipe( fpu_reg_mem ); 5758 %} 5759 5760 // Load Double to XMM 5761 instruct loadD(regD dst, memory mem) %{ 5762 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5763 match(Set dst (LoadD mem)); 5764 ins_cost(145); 5765 format %{ "MOVSD $dst,$mem" %} 5766 ins_encode %{ 5767 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5768 %} 5769 ins_pipe( pipe_slow ); 5770 %} 5771 5772 instruct loadD_partial(regD dst, memory mem) %{ 5773 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5774 match(Set dst (LoadD mem)); 5775 ins_cost(145); 5776 format %{ "MOVLPD $dst,$mem" %} 5777 ins_encode %{ 5778 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5779 %} 5780 ins_pipe( pipe_slow ); 5781 %} 5782 5783 // Load to XMM register (single-precision floating point) 5784 // MOVSS instruction 5785 instruct loadF(regF dst, memory mem) %{ 5786 predicate(UseSSE>=1); 5787 match(Set dst (LoadF mem)); 5788 ins_cost(145); 5789 format %{ "MOVSS $dst,$mem" %} 5790 ins_encode %{ 5791 __ movflt ($dst$$XMMRegister, $mem$$Address); 5792 %} 5793 ins_pipe( pipe_slow ); 5794 %} 5795 5796 // Load Float 5797 instruct loadFPR(regFPR dst, memory mem) %{ 5798 predicate(UseSSE==0); 5799 match(Set dst (LoadF mem)); 5800 5801 ins_cost(150); 5802 format %{ "FLD_S ST,$mem\n\t" 5803 "FSTP $dst" %} 5804 opcode(0xD9); /* D9 /0 */ 5805 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 5806 Pop_Reg_FPR(dst), ClearInstMark ); 5807 ins_pipe( fpu_reg_mem ); 5808 %} 5809 5810 // Load Effective Address 5811 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5812 match(Set dst mem); 5813 5814 ins_cost(110); 5815 format %{ "LEA $dst,$mem" %} 5816 opcode(0x8D); 5817 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5818 ins_pipe( ialu_reg_reg_fat ); 5819 %} 5820 5821 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5822 match(Set dst mem); 5823 5824 ins_cost(110); 5825 format %{ "LEA $dst,$mem" %} 5826 opcode(0x8D); 5827 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5828 ins_pipe( ialu_reg_reg_fat ); 5829 %} 5830 5831 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5832 match(Set dst mem); 5833 5834 ins_cost(110); 5835 format %{ "LEA $dst,$mem" %} 5836 opcode(0x8D); 5837 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5838 ins_pipe( ialu_reg_reg_fat ); 5839 %} 5840 5841 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5842 match(Set dst mem); 5843 5844 ins_cost(110); 5845 format %{ "LEA $dst,$mem" %} 5846 opcode(0x8D); 5847 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5848 ins_pipe( ialu_reg_reg_fat ); 5849 %} 5850 5851 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5852 match(Set dst mem); 5853 5854 ins_cost(110); 5855 format %{ "LEA $dst,$mem" %} 5856 opcode(0x8D); 5857 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); 5858 ins_pipe( ialu_reg_reg_fat ); 5859 %} 5860 5861 // Load Constant 5862 instruct loadConI(rRegI dst, immI src) %{ 5863 match(Set dst src); 5864 5865 format %{ "MOV $dst,$src" %} 5866 ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark ); 5867 ins_pipe( ialu_reg_fat ); 5868 %} 5869 5870 // Load Constant zero 5871 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5872 match(Set dst src); 5873 effect(KILL cr); 5874 5875 ins_cost(50); 5876 format %{ "XOR $dst,$dst" %} 5877 opcode(0x33); /* + rd */ 5878 ins_encode( OpcP, RegReg( dst, dst ) ); 5879 ins_pipe( ialu_reg ); 5880 %} 5881 5882 instruct loadConP(eRegP dst, immP src) %{ 5883 match(Set dst src); 5884 5885 format %{ "MOV $dst,$src" %} 5886 opcode(0xB8); /* + rd */ 5887 ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark ); 5888 ins_pipe( ialu_reg_fat ); 5889 %} 5890 5891 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5892 match(Set dst src); 5893 effect(KILL cr); 5894 ins_cost(200); 5895 format %{ "MOV $dst.lo,$src.lo\n\t" 5896 "MOV $dst.hi,$src.hi" %} 5897 opcode(0xB8); 5898 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5899 ins_pipe( ialu_reg_long_fat ); 5900 %} 5901 5902 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5903 match(Set dst src); 5904 effect(KILL cr); 5905 ins_cost(150); 5906 format %{ "XOR $dst.lo,$dst.lo\n\t" 5907 "XOR $dst.hi,$dst.hi" %} 5908 opcode(0x33,0x33); 5909 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5910 ins_pipe( ialu_reg_long ); 5911 %} 5912 5913 // The instruction usage is guarded by predicate in operand immFPR(). 5914 instruct loadConFPR(regFPR dst, immFPR con) %{ 5915 match(Set dst con); 5916 ins_cost(125); 5917 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5918 "FSTP $dst" %} 5919 ins_encode %{ 5920 __ fld_s($constantaddress($con)); 5921 __ fstp_d($dst$$reg); 5922 %} 5923 ins_pipe(fpu_reg_con); 5924 %} 5925 5926 // The instruction usage is guarded by predicate in operand immFPR0(). 5927 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5928 match(Set dst con); 5929 ins_cost(125); 5930 format %{ "FLDZ ST\n\t" 5931 "FSTP $dst" %} 5932 ins_encode %{ 5933 __ fldz(); 5934 __ fstp_d($dst$$reg); 5935 %} 5936 ins_pipe(fpu_reg_con); 5937 %} 5938 5939 // The instruction usage is guarded by predicate in operand immFPR1(). 5940 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 5941 match(Set dst con); 5942 ins_cost(125); 5943 format %{ "FLD1 ST\n\t" 5944 "FSTP $dst" %} 5945 ins_encode %{ 5946 __ fld1(); 5947 __ fstp_d($dst$$reg); 5948 %} 5949 ins_pipe(fpu_reg_con); 5950 %} 5951 5952 // The instruction usage is guarded by predicate in operand immF(). 5953 instruct loadConF(regF dst, immF con) %{ 5954 match(Set dst con); 5955 ins_cost(125); 5956 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 5957 ins_encode %{ 5958 __ movflt($dst$$XMMRegister, $constantaddress($con)); 5959 %} 5960 ins_pipe(pipe_slow); 5961 %} 5962 5963 // The instruction usage is guarded by predicate in operand immF0(). 5964 instruct loadConF0(regF dst, immF0 src) %{ 5965 match(Set dst src); 5966 ins_cost(100); 5967 format %{ "XORPS $dst,$dst\t# float 0.0" %} 5968 ins_encode %{ 5969 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 5970 %} 5971 ins_pipe(pipe_slow); 5972 %} 5973 5974 // The instruction usage is guarded by predicate in operand immDPR(). 5975 instruct loadConDPR(regDPR dst, immDPR con) %{ 5976 match(Set dst con); 5977 ins_cost(125); 5978 5979 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 5980 "FSTP $dst" %} 5981 ins_encode %{ 5982 __ fld_d($constantaddress($con)); 5983 __ fstp_d($dst$$reg); 5984 %} 5985 ins_pipe(fpu_reg_con); 5986 %} 5987 5988 // The instruction usage is guarded by predicate in operand immDPR0(). 5989 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 5990 match(Set dst con); 5991 ins_cost(125); 5992 5993 format %{ "FLDZ ST\n\t" 5994 "FSTP $dst" %} 5995 ins_encode %{ 5996 __ fldz(); 5997 __ fstp_d($dst$$reg); 5998 %} 5999 ins_pipe(fpu_reg_con); 6000 %} 6001 6002 // The instruction usage is guarded by predicate in operand immDPR1(). 6003 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6004 match(Set dst con); 6005 ins_cost(125); 6006 6007 format %{ "FLD1 ST\n\t" 6008 "FSTP $dst" %} 6009 ins_encode %{ 6010 __ fld1(); 6011 __ fstp_d($dst$$reg); 6012 %} 6013 ins_pipe(fpu_reg_con); 6014 %} 6015 6016 // The instruction usage is guarded by predicate in operand immD(). 6017 instruct loadConD(regD dst, immD con) %{ 6018 match(Set dst con); 6019 ins_cost(125); 6020 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6021 ins_encode %{ 6022 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6023 %} 6024 ins_pipe(pipe_slow); 6025 %} 6026 6027 // The instruction usage is guarded by predicate in operand immD0(). 6028 instruct loadConD0(regD dst, immD0 src) %{ 6029 match(Set dst src); 6030 ins_cost(100); 6031 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6032 ins_encode %{ 6033 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6034 %} 6035 ins_pipe( pipe_slow ); 6036 %} 6037 6038 // Load Stack Slot 6039 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6040 match(Set dst src); 6041 ins_cost(125); 6042 6043 format %{ "MOV $dst,$src" %} 6044 opcode(0x8B); 6045 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); 6046 ins_pipe( ialu_reg_mem ); 6047 %} 6048 6049 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6050 match(Set dst src); 6051 6052 ins_cost(200); 6053 format %{ "MOV $dst,$src.lo\n\t" 6054 "MOV $dst+4,$src.hi" %} 6055 opcode(0x8B, 0x8B); 6056 ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark ); 6057 ins_pipe( ialu_mem_long_reg ); 6058 %} 6059 6060 // Load Stack Slot 6061 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6062 match(Set dst src); 6063 ins_cost(125); 6064 6065 format %{ "MOV $dst,$src" %} 6066 opcode(0x8B); 6067 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); 6068 ins_pipe( ialu_reg_mem ); 6069 %} 6070 6071 // Load Stack Slot 6072 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6073 match(Set dst src); 6074 ins_cost(125); 6075 6076 format %{ "FLD_S $src\n\t" 6077 "FSTP $dst" %} 6078 opcode(0xD9); /* D9 /0, FLD m32real */ 6079 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 6080 Pop_Reg_FPR(dst), ClearInstMark ); 6081 ins_pipe( fpu_reg_mem ); 6082 %} 6083 6084 // Load Stack Slot 6085 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6086 match(Set dst src); 6087 ins_cost(125); 6088 6089 format %{ "FLD_D $src\n\t" 6090 "FSTP $dst" %} 6091 opcode(0xDD); /* DD /0, FLD m64real */ 6092 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 6093 Pop_Reg_DPR(dst), ClearInstMark ); 6094 ins_pipe( fpu_reg_mem ); 6095 %} 6096 6097 // Prefetch instructions for allocation. 6098 // Must be safe to execute with invalid address (cannot fault). 6099 6100 instruct prefetchAlloc0( memory mem ) %{ 6101 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6102 match(PrefetchAllocation mem); 6103 ins_cost(0); 6104 size(0); 6105 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6106 ins_encode(); 6107 ins_pipe(empty); 6108 %} 6109 6110 instruct prefetchAlloc( memory mem ) %{ 6111 predicate(AllocatePrefetchInstr==3); 6112 match( PrefetchAllocation mem ); 6113 ins_cost(100); 6114 6115 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6116 ins_encode %{ 6117 __ prefetchw($mem$$Address); 6118 %} 6119 ins_pipe(ialu_mem); 6120 %} 6121 6122 instruct prefetchAllocNTA( memory mem ) %{ 6123 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6124 match(PrefetchAllocation mem); 6125 ins_cost(100); 6126 6127 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6128 ins_encode %{ 6129 __ prefetchnta($mem$$Address); 6130 %} 6131 ins_pipe(ialu_mem); 6132 %} 6133 6134 instruct prefetchAllocT0( memory mem ) %{ 6135 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6136 match(PrefetchAllocation mem); 6137 ins_cost(100); 6138 6139 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6140 ins_encode %{ 6141 __ prefetcht0($mem$$Address); 6142 %} 6143 ins_pipe(ialu_mem); 6144 %} 6145 6146 instruct prefetchAllocT2( memory mem ) %{ 6147 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6148 match(PrefetchAllocation mem); 6149 ins_cost(100); 6150 6151 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6152 ins_encode %{ 6153 __ prefetcht2($mem$$Address); 6154 %} 6155 ins_pipe(ialu_mem); 6156 %} 6157 6158 //----------Store Instructions------------------------------------------------- 6159 6160 // Store Byte 6161 instruct storeB(memory mem, xRegI src) %{ 6162 match(Set mem (StoreB mem src)); 6163 6164 ins_cost(125); 6165 format %{ "MOV8 $mem,$src" %} 6166 opcode(0x88); 6167 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6168 ins_pipe( ialu_mem_reg ); 6169 %} 6170 6171 // Store Char/Short 6172 instruct storeC(memory mem, rRegI src) %{ 6173 match(Set mem (StoreC mem src)); 6174 6175 ins_cost(125); 6176 format %{ "MOV16 $mem,$src" %} 6177 opcode(0x89, 0x66); 6178 ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark ); 6179 ins_pipe( ialu_mem_reg ); 6180 %} 6181 6182 // Store Integer 6183 instruct storeI(memory mem, rRegI src) %{ 6184 match(Set mem (StoreI mem src)); 6185 6186 ins_cost(125); 6187 format %{ "MOV $mem,$src" %} 6188 opcode(0x89); 6189 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6190 ins_pipe( ialu_mem_reg ); 6191 %} 6192 6193 // Store Long 6194 instruct storeL(long_memory mem, eRegL src) %{ 6195 predicate(!((StoreLNode*)n)->require_atomic_access()); 6196 match(Set mem (StoreL mem src)); 6197 6198 ins_cost(200); 6199 format %{ "MOV $mem,$src.lo\n\t" 6200 "MOV $mem+4,$src.hi" %} 6201 opcode(0x89, 0x89); 6202 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark ); 6203 ins_pipe( ialu_mem_long_reg ); 6204 %} 6205 6206 // Store Long to Integer 6207 instruct storeL2I(memory mem, eRegL src) %{ 6208 match(Set mem (StoreI mem (ConvL2I src))); 6209 6210 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6211 ins_encode %{ 6212 __ movl($mem$$Address, $src$$Register); 6213 %} 6214 ins_pipe(ialu_mem_reg); 6215 %} 6216 6217 // Volatile Store Long. Must be atomic, so move it into 6218 // the FP TOS and then do a 64-bit FIST. Has to probe the 6219 // target address before the store (for null-ptr checks) 6220 // so the memory operand is used twice in the encoding. 6221 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6222 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6223 match(Set mem (StoreL mem src)); 6224 effect( KILL cr ); 6225 ins_cost(400); 6226 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6227 "FILD $src\n\t" 6228 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6229 opcode(0x3B); 6230 ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark); 6231 ins_pipe( fpu_reg_mem ); 6232 %} 6233 6234 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6235 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6236 match(Set mem (StoreL mem src)); 6237 effect( TEMP tmp, KILL cr ); 6238 ins_cost(380); 6239 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6240 "MOVSD $tmp,$src\n\t" 6241 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6242 ins_encode %{ 6243 __ cmpl(rax, $mem$$Address); 6244 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6245 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6246 %} 6247 ins_pipe( pipe_slow ); 6248 %} 6249 6250 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6251 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6252 match(Set mem (StoreL mem src)); 6253 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6254 ins_cost(360); 6255 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6256 "MOVD $tmp,$src.lo\n\t" 6257 "MOVD $tmp2,$src.hi\n\t" 6258 "PUNPCKLDQ $tmp,$tmp2\n\t" 6259 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6260 ins_encode %{ 6261 __ cmpl(rax, $mem$$Address); 6262 __ movdl($tmp$$XMMRegister, $src$$Register); 6263 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6264 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6265 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6266 %} 6267 ins_pipe( pipe_slow ); 6268 %} 6269 6270 // Store Pointer; for storing unknown oops and raw pointers 6271 instruct storeP(memory mem, anyRegP src) %{ 6272 match(Set mem (StoreP mem src)); 6273 6274 ins_cost(125); 6275 format %{ "MOV $mem,$src" %} 6276 opcode(0x89); 6277 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 6278 ins_pipe( ialu_mem_reg ); 6279 %} 6280 6281 // Store Integer Immediate 6282 instruct storeImmI(memory mem, immI src) %{ 6283 match(Set mem (StoreI mem src)); 6284 6285 ins_cost(150); 6286 format %{ "MOV $mem,$src" %} 6287 opcode(0xC7); /* C7 /0 */ 6288 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark); 6289 ins_pipe( ialu_mem_imm ); 6290 %} 6291 6292 // Store Short/Char Immediate 6293 instruct storeImmI16(memory mem, immI16 src) %{ 6294 predicate(UseStoreImmI16); 6295 match(Set mem (StoreC mem src)); 6296 6297 ins_cost(150); 6298 format %{ "MOV16 $mem,$src" %} 6299 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6300 ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark); 6301 ins_pipe( ialu_mem_imm ); 6302 %} 6303 6304 // Store Pointer Immediate; null pointers or constant oops that do not 6305 // need card-mark barriers. 6306 instruct storeImmP(memory mem, immP src) %{ 6307 match(Set mem (StoreP mem src)); 6308 6309 ins_cost(150); 6310 format %{ "MOV $mem,$src" %} 6311 opcode(0xC7); /* C7 /0 */ 6312 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark); 6313 ins_pipe( ialu_mem_imm ); 6314 %} 6315 6316 // Store Byte Immediate 6317 instruct storeImmB(memory mem, immI8 src) %{ 6318 match(Set mem (StoreB mem src)); 6319 6320 ins_cost(150); 6321 format %{ "MOV8 $mem,$src" %} 6322 opcode(0xC6); /* C6 /0 */ 6323 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark); 6324 ins_pipe( ialu_mem_imm ); 6325 %} 6326 6327 // Store Double 6328 instruct storeDPR( memory mem, regDPR1 src) %{ 6329 predicate(UseSSE<=1); 6330 match(Set mem (StoreD mem src)); 6331 6332 ins_cost(100); 6333 format %{ "FST_D $mem,$src" %} 6334 opcode(0xDD); /* DD /2 */ 6335 ins_encode( enc_FPR_store(mem,src) ); 6336 ins_pipe( fpu_mem_reg ); 6337 %} 6338 6339 // Store double does rounding on x86 6340 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6341 predicate(UseSSE<=1); 6342 match(Set mem (StoreD mem (RoundDouble src))); 6343 6344 ins_cost(100); 6345 format %{ "FST_D $mem,$src\t# round" %} 6346 opcode(0xDD); /* DD /2 */ 6347 ins_encode( enc_FPR_store(mem,src) ); 6348 ins_pipe( fpu_mem_reg ); 6349 %} 6350 6351 // Store XMM register to memory (double-precision floating points) 6352 // MOVSD instruction 6353 instruct storeD(memory mem, regD src) %{ 6354 predicate(UseSSE>=2); 6355 match(Set mem (StoreD mem src)); 6356 ins_cost(95); 6357 format %{ "MOVSD $mem,$src" %} 6358 ins_encode %{ 6359 __ movdbl($mem$$Address, $src$$XMMRegister); 6360 %} 6361 ins_pipe( pipe_slow ); 6362 %} 6363 6364 // Store XMM register to memory (single-precision floating point) 6365 // MOVSS instruction 6366 instruct storeF(memory mem, regF src) %{ 6367 predicate(UseSSE>=1); 6368 match(Set mem (StoreF mem src)); 6369 ins_cost(95); 6370 format %{ "MOVSS $mem,$src" %} 6371 ins_encode %{ 6372 __ movflt($mem$$Address, $src$$XMMRegister); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 6378 // Store Float 6379 instruct storeFPR( memory mem, regFPR1 src) %{ 6380 predicate(UseSSE==0); 6381 match(Set mem (StoreF mem src)); 6382 6383 ins_cost(100); 6384 format %{ "FST_S $mem,$src" %} 6385 opcode(0xD9); /* D9 /2 */ 6386 ins_encode( enc_FPR_store(mem,src) ); 6387 ins_pipe( fpu_mem_reg ); 6388 %} 6389 6390 // Store Float does rounding on x86 6391 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6392 predicate(UseSSE==0); 6393 match(Set mem (StoreF mem (RoundFloat src))); 6394 6395 ins_cost(100); 6396 format %{ "FST_S $mem,$src\t# round" %} 6397 opcode(0xD9); /* D9 /2 */ 6398 ins_encode( enc_FPR_store(mem,src) ); 6399 ins_pipe( fpu_mem_reg ); 6400 %} 6401 6402 // Store Float does rounding on x86 6403 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6404 predicate(UseSSE<=1); 6405 match(Set mem (StoreF mem (ConvD2F src))); 6406 6407 ins_cost(100); 6408 format %{ "FST_S $mem,$src\t# D-round" %} 6409 opcode(0xD9); /* D9 /2 */ 6410 ins_encode( enc_FPR_store(mem,src) ); 6411 ins_pipe( fpu_mem_reg ); 6412 %} 6413 6414 // Store immediate Float value (it is faster than store from FPU register) 6415 // The instruction usage is guarded by predicate in operand immFPR(). 6416 instruct storeFPR_imm( memory mem, immFPR src) %{ 6417 match(Set mem (StoreF mem src)); 6418 6419 ins_cost(50); 6420 format %{ "MOV $mem,$src\t# store float" %} 6421 opcode(0xC7); /* C7 /0 */ 6422 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits(src), ClearInstMark); 6423 ins_pipe( ialu_mem_imm ); 6424 %} 6425 6426 // Store immediate Float value (it is faster than store from XMM register) 6427 // The instruction usage is guarded by predicate in operand immF(). 6428 instruct storeF_imm( memory mem, immF src) %{ 6429 match(Set mem (StoreF mem src)); 6430 6431 ins_cost(50); 6432 format %{ "MOV $mem,$src\t# store float" %} 6433 opcode(0xC7); /* C7 /0 */ 6434 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits(src), ClearInstMark); 6435 ins_pipe( ialu_mem_imm ); 6436 %} 6437 6438 // Store Integer to stack slot 6439 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6440 match(Set dst src); 6441 6442 ins_cost(100); 6443 format %{ "MOV $dst,$src" %} 6444 opcode(0x89); 6445 ins_encode( OpcPRegSS( dst, src ) ); 6446 ins_pipe( ialu_mem_reg ); 6447 %} 6448 6449 // Store Integer to stack slot 6450 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6451 match(Set dst src); 6452 6453 ins_cost(100); 6454 format %{ "MOV $dst,$src" %} 6455 opcode(0x89); 6456 ins_encode( OpcPRegSS( dst, src ) ); 6457 ins_pipe( ialu_mem_reg ); 6458 %} 6459 6460 // Store Long to stack slot 6461 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6462 match(Set dst src); 6463 6464 ins_cost(200); 6465 format %{ "MOV $dst,$src.lo\n\t" 6466 "MOV $dst+4,$src.hi" %} 6467 opcode(0x89, 0x89); 6468 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); 6469 ins_pipe( ialu_mem_long_reg ); 6470 %} 6471 6472 //----------MemBar Instructions----------------------------------------------- 6473 // Memory barrier flavors 6474 6475 instruct membar_acquire() %{ 6476 match(MemBarAcquire); 6477 match(LoadFence); 6478 ins_cost(400); 6479 6480 size(0); 6481 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6482 ins_encode(); 6483 ins_pipe(empty); 6484 %} 6485 6486 instruct membar_acquire_lock() %{ 6487 match(MemBarAcquireLock); 6488 ins_cost(0); 6489 6490 size(0); 6491 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6492 ins_encode( ); 6493 ins_pipe(empty); 6494 %} 6495 6496 instruct membar_release() %{ 6497 match(MemBarRelease); 6498 match(StoreFence); 6499 ins_cost(400); 6500 6501 size(0); 6502 format %{ "MEMBAR-release ! (empty encoding)" %} 6503 ins_encode( ); 6504 ins_pipe(empty); 6505 %} 6506 6507 instruct membar_release_lock() %{ 6508 match(MemBarReleaseLock); 6509 ins_cost(0); 6510 6511 size(0); 6512 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6513 ins_encode( ); 6514 ins_pipe(empty); 6515 %} 6516 6517 instruct membar_volatile(eFlagsReg cr) %{ 6518 match(MemBarVolatile); 6519 effect(KILL cr); 6520 ins_cost(400); 6521 6522 format %{ 6523 $$template 6524 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6525 %} 6526 ins_encode %{ 6527 __ membar(Assembler::StoreLoad); 6528 %} 6529 ins_pipe(pipe_slow); 6530 %} 6531 6532 instruct unnecessary_membar_volatile() %{ 6533 match(MemBarVolatile); 6534 predicate(Matcher::post_store_load_barrier(n)); 6535 ins_cost(0); 6536 6537 size(0); 6538 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6539 ins_encode( ); 6540 ins_pipe(empty); 6541 %} 6542 6543 instruct membar_storestore() %{ 6544 match(MemBarStoreStore); 6545 match(StoreStoreFence); 6546 ins_cost(0); 6547 6548 size(0); 6549 format %{ "MEMBAR-storestore (empty encoding)" %} 6550 ins_encode( ); 6551 ins_pipe(empty); 6552 %} 6553 6554 //----------Move Instructions-------------------------------------------------- 6555 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6556 match(Set dst (CastX2P src)); 6557 format %{ "# X2P $dst, $src" %} 6558 ins_encode( /*empty encoding*/ ); 6559 ins_cost(0); 6560 ins_pipe(empty); 6561 %} 6562 6563 instruct castP2X(rRegI dst, eRegP src ) %{ 6564 match(Set dst (CastP2X src)); 6565 ins_cost(50); 6566 format %{ "MOV $dst, $src\t# CastP2X" %} 6567 ins_encode( enc_Copy( dst, src) ); 6568 ins_pipe( ialu_reg_reg ); 6569 %} 6570 6571 //----------Conditional Move--------------------------------------------------- 6572 // Conditional move 6573 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6574 predicate(!VM_Version::supports_cmov() ); 6575 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6576 ins_cost(200); 6577 format %{ "J$cop,us skip\t# signed cmove\n\t" 6578 "MOV $dst,$src\n" 6579 "skip:" %} 6580 ins_encode %{ 6581 Label Lskip; 6582 // Invert sense of branch from sense of CMOV 6583 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6584 __ movl($dst$$Register, $src$$Register); 6585 __ bind(Lskip); 6586 %} 6587 ins_pipe( pipe_cmov_reg ); 6588 %} 6589 6590 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6591 predicate(!VM_Version::supports_cmov() ); 6592 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6593 ins_cost(200); 6594 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6595 "MOV $dst,$src\n" 6596 "skip:" %} 6597 ins_encode %{ 6598 Label Lskip; 6599 // Invert sense of branch from sense of CMOV 6600 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6601 __ movl($dst$$Register, $src$$Register); 6602 __ bind(Lskip); 6603 %} 6604 ins_pipe( pipe_cmov_reg ); 6605 %} 6606 6607 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6608 predicate(VM_Version::supports_cmov() ); 6609 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6610 ins_cost(200); 6611 format %{ "CMOV$cop $dst,$src" %} 6612 opcode(0x0F,0x40); 6613 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6614 ins_pipe( pipe_cmov_reg ); 6615 %} 6616 6617 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6618 predicate(VM_Version::supports_cmov() ); 6619 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6620 ins_cost(200); 6621 format %{ "CMOV$cop $dst,$src" %} 6622 opcode(0x0F,0x40); 6623 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6624 ins_pipe( pipe_cmov_reg ); 6625 %} 6626 6627 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6628 predicate(VM_Version::supports_cmov() ); 6629 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6630 ins_cost(200); 6631 expand %{ 6632 cmovI_regU(cop, cr, dst, src); 6633 %} 6634 %} 6635 6636 // Conditional move 6637 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6638 predicate(VM_Version::supports_cmov() ); 6639 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6640 ins_cost(250); 6641 format %{ "CMOV$cop $dst,$src" %} 6642 opcode(0x0F,0x40); 6643 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); 6644 ins_pipe( pipe_cmov_mem ); 6645 %} 6646 6647 // Conditional move 6648 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6649 predicate(VM_Version::supports_cmov() ); 6650 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6651 ins_cost(250); 6652 format %{ "CMOV$cop $dst,$src" %} 6653 opcode(0x0F,0x40); 6654 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); 6655 ins_pipe( pipe_cmov_mem ); 6656 %} 6657 6658 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6659 predicate(VM_Version::supports_cmov() ); 6660 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6661 ins_cost(250); 6662 expand %{ 6663 cmovI_memU(cop, cr, dst, src); 6664 %} 6665 %} 6666 6667 // Conditional move 6668 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6669 predicate(VM_Version::supports_cmov() ); 6670 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6671 ins_cost(200); 6672 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6673 opcode(0x0F,0x40); 6674 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6675 ins_pipe( pipe_cmov_reg ); 6676 %} 6677 6678 // Conditional move (non-P6 version) 6679 // Note: a CMoveP is generated for stubs and native wrappers 6680 // regardless of whether we are on a P6, so we 6681 // emulate a cmov here 6682 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6683 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6684 ins_cost(300); 6685 format %{ "Jn$cop skip\n\t" 6686 "MOV $dst,$src\t# pointer\n" 6687 "skip:" %} 6688 opcode(0x8b); 6689 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6690 ins_pipe( pipe_cmov_reg ); 6691 %} 6692 6693 // Conditional move 6694 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6695 predicate(VM_Version::supports_cmov() ); 6696 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6697 ins_cost(200); 6698 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6699 opcode(0x0F,0x40); 6700 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6701 ins_pipe( pipe_cmov_reg ); 6702 %} 6703 6704 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6705 predicate(VM_Version::supports_cmov() ); 6706 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6707 ins_cost(200); 6708 expand %{ 6709 cmovP_regU(cop, cr, dst, src); 6710 %} 6711 %} 6712 6713 // DISABLED: Requires the ADLC to emit a bottom_type call that 6714 // correctly meets the two pointer arguments; one is an incoming 6715 // register but the other is a memory operand. ALSO appears to 6716 // be buggy with implicit null checks. 6717 // 6718 //// Conditional move 6719 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6720 // predicate(VM_Version::supports_cmov() ); 6721 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6722 // ins_cost(250); 6723 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6724 // opcode(0x0F,0x40); 6725 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6726 // ins_pipe( pipe_cmov_mem ); 6727 //%} 6728 // 6729 //// Conditional move 6730 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6731 // predicate(VM_Version::supports_cmov() ); 6732 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6733 // ins_cost(250); 6734 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6735 // opcode(0x0F,0x40); 6736 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6737 // ins_pipe( pipe_cmov_mem ); 6738 //%} 6739 6740 // Conditional move 6741 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6742 predicate(UseSSE<=1); 6743 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6744 ins_cost(200); 6745 format %{ "FCMOV$cop $dst,$src\t# double" %} 6746 opcode(0xDA); 6747 ins_encode( enc_cmov_dpr(cop,src) ); 6748 ins_pipe( pipe_cmovDPR_reg ); 6749 %} 6750 6751 // Conditional move 6752 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6753 predicate(UseSSE==0); 6754 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6755 ins_cost(200); 6756 format %{ "FCMOV$cop $dst,$src\t# float" %} 6757 opcode(0xDA); 6758 ins_encode( enc_cmov_dpr(cop,src) ); 6759 ins_pipe( pipe_cmovDPR_reg ); 6760 %} 6761 6762 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6763 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6764 predicate(UseSSE<=1); 6765 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6766 ins_cost(200); 6767 format %{ "Jn$cop skip\n\t" 6768 "MOV $dst,$src\t# double\n" 6769 "skip:" %} 6770 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6771 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6772 ins_pipe( pipe_cmovDPR_reg ); 6773 %} 6774 6775 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6776 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6777 predicate(UseSSE==0); 6778 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6779 ins_cost(200); 6780 format %{ "Jn$cop skip\n\t" 6781 "MOV $dst,$src\t# float\n" 6782 "skip:" %} 6783 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6784 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6785 ins_pipe( pipe_cmovDPR_reg ); 6786 %} 6787 6788 // No CMOVE with SSE/SSE2 6789 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6790 predicate (UseSSE>=1); 6791 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6792 ins_cost(200); 6793 format %{ "Jn$cop skip\n\t" 6794 "MOVSS $dst,$src\t# float\n" 6795 "skip:" %} 6796 ins_encode %{ 6797 Label skip; 6798 // Invert sense of branch from sense of CMOV 6799 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6800 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6801 __ bind(skip); 6802 %} 6803 ins_pipe( pipe_slow ); 6804 %} 6805 6806 // No CMOVE with SSE/SSE2 6807 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6808 predicate (UseSSE>=2); 6809 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6810 ins_cost(200); 6811 format %{ "Jn$cop skip\n\t" 6812 "MOVSD $dst,$src\t# float\n" 6813 "skip:" %} 6814 ins_encode %{ 6815 Label skip; 6816 // Invert sense of branch from sense of CMOV 6817 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6818 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6819 __ bind(skip); 6820 %} 6821 ins_pipe( pipe_slow ); 6822 %} 6823 6824 // unsigned version 6825 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6826 predicate (UseSSE>=1); 6827 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6828 ins_cost(200); 6829 format %{ "Jn$cop skip\n\t" 6830 "MOVSS $dst,$src\t# float\n" 6831 "skip:" %} 6832 ins_encode %{ 6833 Label skip; 6834 // Invert sense of branch from sense of CMOV 6835 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6836 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6837 __ bind(skip); 6838 %} 6839 ins_pipe( pipe_slow ); 6840 %} 6841 6842 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6843 predicate (UseSSE>=1); 6844 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6845 ins_cost(200); 6846 expand %{ 6847 fcmovF_regU(cop, cr, dst, src); 6848 %} 6849 %} 6850 6851 // unsigned version 6852 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6853 predicate (UseSSE>=2); 6854 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6855 ins_cost(200); 6856 format %{ "Jn$cop skip\n\t" 6857 "MOVSD $dst,$src\t# float\n" 6858 "skip:" %} 6859 ins_encode %{ 6860 Label skip; 6861 // Invert sense of branch from sense of CMOV 6862 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6863 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6864 __ bind(skip); 6865 %} 6866 ins_pipe( pipe_slow ); 6867 %} 6868 6869 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6870 predicate (UseSSE>=2); 6871 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6872 ins_cost(200); 6873 expand %{ 6874 fcmovD_regU(cop, cr, dst, src); 6875 %} 6876 %} 6877 6878 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6879 predicate(VM_Version::supports_cmov() ); 6880 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6881 ins_cost(200); 6882 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6883 "CMOV$cop $dst.hi,$src.hi" %} 6884 opcode(0x0F,0x40); 6885 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6886 ins_pipe( pipe_cmov_reg_long ); 6887 %} 6888 6889 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6890 predicate(VM_Version::supports_cmov() ); 6891 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6892 ins_cost(200); 6893 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6894 "CMOV$cop $dst.hi,$src.hi" %} 6895 opcode(0x0F,0x40); 6896 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6897 ins_pipe( pipe_cmov_reg_long ); 6898 %} 6899 6900 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6901 predicate(VM_Version::supports_cmov() ); 6902 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6903 ins_cost(200); 6904 expand %{ 6905 cmovL_regU(cop, cr, dst, src); 6906 %} 6907 %} 6908 6909 //----------Arithmetic Instructions-------------------------------------------- 6910 //----------Addition Instructions---------------------------------------------- 6911 6912 // Integer Addition Instructions 6913 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 6914 match(Set dst (AddI dst src)); 6915 effect(KILL cr); 6916 6917 size(2); 6918 format %{ "ADD $dst,$src" %} 6919 opcode(0x03); 6920 ins_encode( OpcP, RegReg( dst, src) ); 6921 ins_pipe( ialu_reg_reg ); 6922 %} 6923 6924 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 6925 match(Set dst (AddI dst src)); 6926 effect(KILL cr); 6927 6928 format %{ "ADD $dst,$src" %} 6929 opcode(0x81, 0x00); /* /0 id */ 6930 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 6931 ins_pipe( ialu_reg ); 6932 %} 6933 6934 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 6935 predicate(UseIncDec); 6936 match(Set dst (AddI dst src)); 6937 effect(KILL cr); 6938 6939 size(1); 6940 format %{ "INC $dst" %} 6941 opcode(0x40); /* */ 6942 ins_encode( Opc_plus( primary, dst ) ); 6943 ins_pipe( ialu_reg ); 6944 %} 6945 6946 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 6947 match(Set dst (AddI src0 src1)); 6948 ins_cost(110); 6949 6950 format %{ "LEA $dst,[$src0 + $src1]" %} 6951 opcode(0x8D); /* 0x8D /r */ 6952 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); 6953 ins_pipe( ialu_reg_reg ); 6954 %} 6955 6956 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 6957 match(Set dst (AddP src0 src1)); 6958 ins_cost(110); 6959 6960 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 6961 opcode(0x8D); /* 0x8D /r */ 6962 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); 6963 ins_pipe( ialu_reg_reg ); 6964 %} 6965 6966 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 6967 predicate(UseIncDec); 6968 match(Set dst (AddI dst src)); 6969 effect(KILL cr); 6970 6971 size(1); 6972 format %{ "DEC $dst" %} 6973 opcode(0x48); /* */ 6974 ins_encode( Opc_plus( primary, dst ) ); 6975 ins_pipe( ialu_reg ); 6976 %} 6977 6978 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 6979 match(Set dst (AddP dst src)); 6980 effect(KILL cr); 6981 6982 size(2); 6983 format %{ "ADD $dst,$src" %} 6984 opcode(0x03); 6985 ins_encode( OpcP, RegReg( dst, src) ); 6986 ins_pipe( ialu_reg_reg ); 6987 %} 6988 6989 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 6990 match(Set dst (AddP dst src)); 6991 effect(KILL cr); 6992 6993 format %{ "ADD $dst,$src" %} 6994 opcode(0x81,0x00); /* Opcode 81 /0 id */ 6995 // ins_encode( RegImm( dst, src) ); 6996 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 6997 ins_pipe( ialu_reg ); 6998 %} 6999 7000 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7001 match(Set dst (AddI dst (LoadI src))); 7002 effect(KILL cr); 7003 7004 ins_cost(150); 7005 format %{ "ADD $dst,$src" %} 7006 opcode(0x03); 7007 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 7008 ins_pipe( ialu_reg_mem ); 7009 %} 7010 7011 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7012 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7013 effect(KILL cr); 7014 7015 ins_cost(150); 7016 format %{ "ADD $dst,$src" %} 7017 opcode(0x01); /* Opcode 01 /r */ 7018 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 7019 ins_pipe( ialu_mem_reg ); 7020 %} 7021 7022 // Add Memory with Immediate 7023 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7024 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7025 effect(KILL cr); 7026 7027 ins_cost(125); 7028 format %{ "ADD $dst,$src" %} 7029 opcode(0x81); /* Opcode 81 /0 id */ 7030 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark ); 7031 ins_pipe( ialu_mem_imm ); 7032 %} 7033 7034 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7035 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7036 effect(KILL cr); 7037 7038 ins_cost(125); 7039 format %{ "INC $dst" %} 7040 opcode(0xFF); /* Opcode FF /0 */ 7041 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark); 7042 ins_pipe( ialu_mem_imm ); 7043 %} 7044 7045 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7046 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7047 effect(KILL cr); 7048 7049 ins_cost(125); 7050 format %{ "DEC $dst" %} 7051 opcode(0xFF); /* Opcode FF /1 */ 7052 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark); 7053 ins_pipe( ialu_mem_imm ); 7054 %} 7055 7056 7057 instruct checkCastPP( eRegP dst ) %{ 7058 match(Set dst (CheckCastPP dst)); 7059 7060 size(0); 7061 format %{ "#checkcastPP of $dst" %} 7062 ins_encode( /*empty encoding*/ ); 7063 ins_pipe( empty ); 7064 %} 7065 7066 instruct castPP( eRegP dst ) %{ 7067 match(Set dst (CastPP dst)); 7068 format %{ "#castPP of $dst" %} 7069 ins_encode( /*empty encoding*/ ); 7070 ins_pipe( empty ); 7071 %} 7072 7073 instruct castII( rRegI dst ) %{ 7074 match(Set dst (CastII dst)); 7075 format %{ "#castII of $dst" %} 7076 ins_encode( /*empty encoding*/ ); 7077 ins_cost(0); 7078 ins_pipe( empty ); 7079 %} 7080 7081 instruct castLL( eRegL dst ) %{ 7082 match(Set dst (CastLL dst)); 7083 format %{ "#castLL of $dst" %} 7084 ins_encode( /*empty encoding*/ ); 7085 ins_cost(0); 7086 ins_pipe( empty ); 7087 %} 7088 7089 instruct castFF( regF dst ) %{ 7090 predicate(UseSSE >= 1); 7091 match(Set dst (CastFF dst)); 7092 format %{ "#castFF of $dst" %} 7093 ins_encode( /*empty encoding*/ ); 7094 ins_cost(0); 7095 ins_pipe( empty ); 7096 %} 7097 7098 instruct castDD( regD dst ) %{ 7099 predicate(UseSSE >= 2); 7100 match(Set dst (CastDD dst)); 7101 format %{ "#castDD of $dst" %} 7102 ins_encode( /*empty encoding*/ ); 7103 ins_cost(0); 7104 ins_pipe( empty ); 7105 %} 7106 7107 instruct castFF_PR( regFPR dst ) %{ 7108 predicate(UseSSE < 1); 7109 match(Set dst (CastFF dst)); 7110 format %{ "#castFF of $dst" %} 7111 ins_encode( /*empty encoding*/ ); 7112 ins_cost(0); 7113 ins_pipe( empty ); 7114 %} 7115 7116 instruct castDD_PR( regDPR dst ) %{ 7117 predicate(UseSSE < 2); 7118 match(Set dst (CastDD dst)); 7119 format %{ "#castDD of $dst" %} 7120 ins_encode( /*empty encoding*/ ); 7121 ins_cost(0); 7122 ins_pipe( empty ); 7123 %} 7124 7125 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7126 7127 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7128 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7129 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7130 effect(KILL cr, KILL oldval); 7131 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7132 "MOV $res,0\n\t" 7133 "JNE,s fail\n\t" 7134 "MOV $res,1\n" 7135 "fail:" %} 7136 ins_encode( enc_cmpxchg8(mem_ptr), 7137 enc_flags_ne_to_boolean(res) ); 7138 ins_pipe( pipe_cmpxchg ); 7139 %} 7140 7141 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7142 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7143 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7144 effect(KILL cr, KILL oldval); 7145 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7146 "MOV $res,0\n\t" 7147 "JNE,s fail\n\t" 7148 "MOV $res,1\n" 7149 "fail:" %} 7150 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7151 ins_pipe( pipe_cmpxchg ); 7152 %} 7153 7154 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7155 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7156 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7157 effect(KILL cr, KILL oldval); 7158 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7159 "MOV $res,0\n\t" 7160 "JNE,s fail\n\t" 7161 "MOV $res,1\n" 7162 "fail:" %} 7163 ins_encode( enc_cmpxchgb(mem_ptr), 7164 enc_flags_ne_to_boolean(res) ); 7165 ins_pipe( pipe_cmpxchg ); 7166 %} 7167 7168 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7169 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7170 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7171 effect(KILL cr, KILL oldval); 7172 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7173 "MOV $res,0\n\t" 7174 "JNE,s fail\n\t" 7175 "MOV $res,1\n" 7176 "fail:" %} 7177 ins_encode( enc_cmpxchgw(mem_ptr), 7178 enc_flags_ne_to_boolean(res) ); 7179 ins_pipe( pipe_cmpxchg ); 7180 %} 7181 7182 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7183 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7184 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7185 effect(KILL cr, KILL oldval); 7186 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7187 "MOV $res,0\n\t" 7188 "JNE,s fail\n\t" 7189 "MOV $res,1\n" 7190 "fail:" %} 7191 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7192 ins_pipe( pipe_cmpxchg ); 7193 %} 7194 7195 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7196 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7197 effect(KILL cr); 7198 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7199 ins_encode( enc_cmpxchg8(mem_ptr) ); 7200 ins_pipe( pipe_cmpxchg ); 7201 %} 7202 7203 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7204 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7205 effect(KILL cr); 7206 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7207 ins_encode( enc_cmpxchg(mem_ptr) ); 7208 ins_pipe( pipe_cmpxchg ); 7209 %} 7210 7211 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7212 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7213 effect(KILL cr); 7214 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7215 ins_encode( enc_cmpxchgb(mem_ptr) ); 7216 ins_pipe( pipe_cmpxchg ); 7217 %} 7218 7219 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7220 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7221 effect(KILL cr); 7222 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7223 ins_encode( enc_cmpxchgw(mem_ptr) ); 7224 ins_pipe( pipe_cmpxchg ); 7225 %} 7226 7227 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7228 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7229 effect(KILL cr); 7230 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7231 ins_encode( enc_cmpxchg(mem_ptr) ); 7232 ins_pipe( pipe_cmpxchg ); 7233 %} 7234 7235 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7236 predicate(n->as_LoadStore()->result_not_used()); 7237 match(Set dummy (GetAndAddB mem add)); 7238 effect(KILL cr); 7239 format %{ "ADDB [$mem],$add" %} 7240 ins_encode %{ 7241 __ lock(); 7242 __ addb($mem$$Address, $add$$constant); 7243 %} 7244 ins_pipe( pipe_cmpxchg ); 7245 %} 7246 7247 // Important to match to xRegI: only 8-bit regs. 7248 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7249 match(Set newval (GetAndAddB mem newval)); 7250 effect(KILL cr); 7251 format %{ "XADDB [$mem],$newval" %} 7252 ins_encode %{ 7253 __ lock(); 7254 __ xaddb($mem$$Address, $newval$$Register); 7255 %} 7256 ins_pipe( pipe_cmpxchg ); 7257 %} 7258 7259 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7260 predicate(n->as_LoadStore()->result_not_used()); 7261 match(Set dummy (GetAndAddS mem add)); 7262 effect(KILL cr); 7263 format %{ "ADDS [$mem],$add" %} 7264 ins_encode %{ 7265 __ lock(); 7266 __ addw($mem$$Address, $add$$constant); 7267 %} 7268 ins_pipe( pipe_cmpxchg ); 7269 %} 7270 7271 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7272 match(Set newval (GetAndAddS mem newval)); 7273 effect(KILL cr); 7274 format %{ "XADDS [$mem],$newval" %} 7275 ins_encode %{ 7276 __ lock(); 7277 __ xaddw($mem$$Address, $newval$$Register); 7278 %} 7279 ins_pipe( pipe_cmpxchg ); 7280 %} 7281 7282 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7283 predicate(n->as_LoadStore()->result_not_used()); 7284 match(Set dummy (GetAndAddI mem add)); 7285 effect(KILL cr); 7286 format %{ "ADDL [$mem],$add" %} 7287 ins_encode %{ 7288 __ lock(); 7289 __ addl($mem$$Address, $add$$constant); 7290 %} 7291 ins_pipe( pipe_cmpxchg ); 7292 %} 7293 7294 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7295 match(Set newval (GetAndAddI mem newval)); 7296 effect(KILL cr); 7297 format %{ "XADDL [$mem],$newval" %} 7298 ins_encode %{ 7299 __ lock(); 7300 __ xaddl($mem$$Address, $newval$$Register); 7301 %} 7302 ins_pipe( pipe_cmpxchg ); 7303 %} 7304 7305 // Important to match to xRegI: only 8-bit regs. 7306 instruct xchgB( memory mem, xRegI newval) %{ 7307 match(Set newval (GetAndSetB mem newval)); 7308 format %{ "XCHGB $newval,[$mem]" %} 7309 ins_encode %{ 7310 __ xchgb($newval$$Register, $mem$$Address); 7311 %} 7312 ins_pipe( pipe_cmpxchg ); 7313 %} 7314 7315 instruct xchgS( memory mem, rRegI newval) %{ 7316 match(Set newval (GetAndSetS mem newval)); 7317 format %{ "XCHGW $newval,[$mem]" %} 7318 ins_encode %{ 7319 __ xchgw($newval$$Register, $mem$$Address); 7320 %} 7321 ins_pipe( pipe_cmpxchg ); 7322 %} 7323 7324 instruct xchgI( memory mem, rRegI newval) %{ 7325 match(Set newval (GetAndSetI mem newval)); 7326 format %{ "XCHGL $newval,[$mem]" %} 7327 ins_encode %{ 7328 __ xchgl($newval$$Register, $mem$$Address); 7329 %} 7330 ins_pipe( pipe_cmpxchg ); 7331 %} 7332 7333 instruct xchgP( memory mem, pRegP newval) %{ 7334 match(Set newval (GetAndSetP mem newval)); 7335 format %{ "XCHGL $newval,[$mem]" %} 7336 ins_encode %{ 7337 __ xchgl($newval$$Register, $mem$$Address); 7338 %} 7339 ins_pipe( pipe_cmpxchg ); 7340 %} 7341 7342 //----------Subtraction Instructions------------------------------------------- 7343 7344 // Integer Subtraction Instructions 7345 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7346 match(Set dst (SubI dst src)); 7347 effect(KILL cr); 7348 7349 size(2); 7350 format %{ "SUB $dst,$src" %} 7351 opcode(0x2B); 7352 ins_encode( OpcP, RegReg( dst, src) ); 7353 ins_pipe( ialu_reg_reg ); 7354 %} 7355 7356 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7357 match(Set dst (SubI dst src)); 7358 effect(KILL cr); 7359 7360 format %{ "SUB $dst,$src" %} 7361 opcode(0x81,0x05); /* Opcode 81 /5 */ 7362 // ins_encode( RegImm( dst, src) ); 7363 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7364 ins_pipe( ialu_reg ); 7365 %} 7366 7367 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7368 match(Set dst (SubI dst (LoadI src))); 7369 effect(KILL cr); 7370 7371 ins_cost(150); 7372 format %{ "SUB $dst,$src" %} 7373 opcode(0x2B); 7374 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 7375 ins_pipe( ialu_reg_mem ); 7376 %} 7377 7378 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7379 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7380 effect(KILL cr); 7381 7382 ins_cost(150); 7383 format %{ "SUB $dst,$src" %} 7384 opcode(0x29); /* Opcode 29 /r */ 7385 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 7386 ins_pipe( ialu_mem_reg ); 7387 %} 7388 7389 // Subtract from a pointer 7390 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7391 match(Set dst (AddP dst (SubI zero src))); 7392 effect(KILL cr); 7393 7394 size(2); 7395 format %{ "SUB $dst,$src" %} 7396 opcode(0x2B); 7397 ins_encode( OpcP, RegReg( dst, src) ); 7398 ins_pipe( ialu_reg_reg ); 7399 %} 7400 7401 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7402 match(Set dst (SubI zero dst)); 7403 effect(KILL cr); 7404 7405 size(2); 7406 format %{ "NEG $dst" %} 7407 opcode(0xF7,0x03); // Opcode F7 /3 7408 ins_encode( OpcP, RegOpc( dst ) ); 7409 ins_pipe( ialu_reg ); 7410 %} 7411 7412 //----------Multiplication/Division Instructions------------------------------- 7413 // Integer Multiplication Instructions 7414 // Multiply Register 7415 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7416 match(Set dst (MulI dst src)); 7417 effect(KILL cr); 7418 7419 size(3); 7420 ins_cost(300); 7421 format %{ "IMUL $dst,$src" %} 7422 opcode(0xAF, 0x0F); 7423 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7424 ins_pipe( ialu_reg_reg_alu0 ); 7425 %} 7426 7427 // Multiply 32-bit Immediate 7428 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7429 match(Set dst (MulI src imm)); 7430 effect(KILL cr); 7431 7432 ins_cost(300); 7433 format %{ "IMUL $dst,$src,$imm" %} 7434 opcode(0x69); /* 69 /r id */ 7435 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7436 ins_pipe( ialu_reg_reg_alu0 ); 7437 %} 7438 7439 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7440 match(Set dst src); 7441 effect(KILL cr); 7442 7443 // Note that this is artificially increased to make it more expensive than loadConL 7444 ins_cost(250); 7445 format %{ "MOV EAX,$src\t// low word only" %} 7446 opcode(0xB8); 7447 ins_encode( LdImmL_Lo(dst, src) ); 7448 ins_pipe( ialu_reg_fat ); 7449 %} 7450 7451 // Multiply by 32-bit Immediate, taking the shifted high order results 7452 // (special case for shift by 32) 7453 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7454 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7455 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7456 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7457 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7458 effect(USE src1, KILL cr); 7459 7460 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7461 ins_cost(0*100 + 1*400 - 150); 7462 format %{ "IMUL EDX:EAX,$src1" %} 7463 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7464 ins_pipe( pipe_slow ); 7465 %} 7466 7467 // Multiply by 32-bit Immediate, taking the shifted high order results 7468 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7469 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7470 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7471 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7472 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7473 effect(USE src1, KILL cr); 7474 7475 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7476 ins_cost(1*100 + 1*400 - 150); 7477 format %{ "IMUL EDX:EAX,$src1\n\t" 7478 "SAR EDX,$cnt-32" %} 7479 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7480 ins_pipe( pipe_slow ); 7481 %} 7482 7483 // Multiply Memory 32-bit Immediate 7484 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7485 match(Set dst (MulI (LoadI src) imm)); 7486 effect(KILL cr); 7487 7488 ins_cost(300); 7489 format %{ "IMUL $dst,$src,$imm" %} 7490 opcode(0x69); /* 69 /r id */ 7491 ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark ); 7492 ins_pipe( ialu_reg_mem_alu0 ); 7493 %} 7494 7495 // Multiply Memory 7496 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7497 match(Set dst (MulI dst (LoadI src))); 7498 effect(KILL cr); 7499 7500 ins_cost(350); 7501 format %{ "IMUL $dst,$src" %} 7502 opcode(0xAF, 0x0F); 7503 ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark ); 7504 ins_pipe( ialu_reg_mem_alu0 ); 7505 %} 7506 7507 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7508 %{ 7509 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7510 effect(KILL cr, KILL src2); 7511 7512 expand %{ mulI_eReg(dst, src1, cr); 7513 mulI_eReg(src2, src3, cr); 7514 addI_eReg(dst, src2, cr); %} 7515 %} 7516 7517 // Multiply Register Int to Long 7518 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7519 // Basic Idea: long = (long)int * (long)int 7520 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7521 effect(DEF dst, USE src, USE src1, KILL flags); 7522 7523 ins_cost(300); 7524 format %{ "IMUL $dst,$src1" %} 7525 7526 ins_encode( long_int_multiply( dst, src1 ) ); 7527 ins_pipe( ialu_reg_reg_alu0 ); 7528 %} 7529 7530 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7531 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7532 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7533 effect(KILL flags); 7534 7535 ins_cost(300); 7536 format %{ "MUL $dst,$src1" %} 7537 7538 ins_encode( long_uint_multiply(dst, src1) ); 7539 ins_pipe( ialu_reg_reg_alu0 ); 7540 %} 7541 7542 // Multiply Register Long 7543 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7544 match(Set dst (MulL dst src)); 7545 effect(KILL cr, TEMP tmp); 7546 ins_cost(4*100+3*400); 7547 // Basic idea: lo(result) = lo(x_lo * y_lo) 7548 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7549 format %{ "MOV $tmp,$src.lo\n\t" 7550 "IMUL $tmp,EDX\n\t" 7551 "MOV EDX,$src.hi\n\t" 7552 "IMUL EDX,EAX\n\t" 7553 "ADD $tmp,EDX\n\t" 7554 "MUL EDX:EAX,$src.lo\n\t" 7555 "ADD EDX,$tmp" %} 7556 ins_encode( long_multiply( dst, src, tmp ) ); 7557 ins_pipe( pipe_slow ); 7558 %} 7559 7560 // Multiply Register Long where the left operand's high 32 bits are zero 7561 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7562 predicate(is_operand_hi32_zero(n->in(1))); 7563 match(Set dst (MulL dst src)); 7564 effect(KILL cr, TEMP tmp); 7565 ins_cost(2*100+2*400); 7566 // Basic idea: lo(result) = lo(x_lo * y_lo) 7567 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7568 format %{ "MOV $tmp,$src.hi\n\t" 7569 "IMUL $tmp,EAX\n\t" 7570 "MUL EDX:EAX,$src.lo\n\t" 7571 "ADD EDX,$tmp" %} 7572 ins_encode %{ 7573 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7574 __ imull($tmp$$Register, rax); 7575 __ mull($src$$Register); 7576 __ addl(rdx, $tmp$$Register); 7577 %} 7578 ins_pipe( pipe_slow ); 7579 %} 7580 7581 // Multiply Register Long where the right operand's high 32 bits are zero 7582 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7583 predicate(is_operand_hi32_zero(n->in(2))); 7584 match(Set dst (MulL dst src)); 7585 effect(KILL cr, TEMP tmp); 7586 ins_cost(2*100+2*400); 7587 // Basic idea: lo(result) = lo(x_lo * y_lo) 7588 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7589 format %{ "MOV $tmp,$src.lo\n\t" 7590 "IMUL $tmp,EDX\n\t" 7591 "MUL EDX:EAX,$src.lo\n\t" 7592 "ADD EDX,$tmp" %} 7593 ins_encode %{ 7594 __ movl($tmp$$Register, $src$$Register); 7595 __ imull($tmp$$Register, rdx); 7596 __ mull($src$$Register); 7597 __ addl(rdx, $tmp$$Register); 7598 %} 7599 ins_pipe( pipe_slow ); 7600 %} 7601 7602 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7603 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7604 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7605 match(Set dst (MulL dst src)); 7606 effect(KILL cr); 7607 ins_cost(1*400); 7608 // Basic idea: lo(result) = lo(x_lo * y_lo) 7609 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7610 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7611 ins_encode %{ 7612 __ mull($src$$Register); 7613 %} 7614 ins_pipe( pipe_slow ); 7615 %} 7616 7617 // Multiply Register Long by small constant 7618 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7619 match(Set dst (MulL dst src)); 7620 effect(KILL cr, TEMP tmp); 7621 ins_cost(2*100+2*400); 7622 size(12); 7623 // Basic idea: lo(result) = lo(src * EAX) 7624 // hi(result) = hi(src * EAX) + lo(src * EDX) 7625 format %{ "IMUL $tmp,EDX,$src\n\t" 7626 "MOV EDX,$src\n\t" 7627 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7628 "ADD EDX,$tmp" %} 7629 ins_encode( long_multiply_con( dst, src, tmp ) ); 7630 ins_pipe( pipe_slow ); 7631 %} 7632 7633 // Integer DIV with Register 7634 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7635 match(Set rax (DivI rax div)); 7636 effect(KILL rdx, KILL cr); 7637 size(26); 7638 ins_cost(30*100+10*100); 7639 format %{ "CMP EAX,0x80000000\n\t" 7640 "JNE,s normal\n\t" 7641 "XOR EDX,EDX\n\t" 7642 "CMP ECX,-1\n\t" 7643 "JE,s done\n" 7644 "normal: CDQ\n\t" 7645 "IDIV $div\n\t" 7646 "done:" %} 7647 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7648 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7649 ins_pipe( ialu_reg_reg_alu0 ); 7650 %} 7651 7652 // Divide Register Long 7653 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7654 match(Set dst (DivL src1 src2)); 7655 effect(CALL); 7656 ins_cost(10000); 7657 format %{ "PUSH $src1.hi\n\t" 7658 "PUSH $src1.lo\n\t" 7659 "PUSH $src2.hi\n\t" 7660 "PUSH $src2.lo\n\t" 7661 "CALL SharedRuntime::ldiv\n\t" 7662 "ADD ESP,16" %} 7663 ins_encode( long_div(src1,src2) ); 7664 ins_pipe( pipe_slow ); 7665 %} 7666 7667 // Integer DIVMOD with Register, both quotient and mod results 7668 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7669 match(DivModI rax div); 7670 effect(KILL cr); 7671 size(26); 7672 ins_cost(30*100+10*100); 7673 format %{ "CMP EAX,0x80000000\n\t" 7674 "JNE,s normal\n\t" 7675 "XOR EDX,EDX\n\t" 7676 "CMP ECX,-1\n\t" 7677 "JE,s done\n" 7678 "normal: CDQ\n\t" 7679 "IDIV $div\n\t" 7680 "done:" %} 7681 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7682 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7683 ins_pipe( pipe_slow ); 7684 %} 7685 7686 // Integer MOD with Register 7687 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7688 match(Set rdx (ModI rax div)); 7689 effect(KILL rax, KILL cr); 7690 7691 size(26); 7692 ins_cost(300); 7693 format %{ "CDQ\n\t" 7694 "IDIV $div" %} 7695 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7696 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7697 ins_pipe( ialu_reg_reg_alu0 ); 7698 %} 7699 7700 // Remainder Register Long 7701 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7702 match(Set dst (ModL src1 src2)); 7703 effect(CALL); 7704 ins_cost(10000); 7705 format %{ "PUSH $src1.hi\n\t" 7706 "PUSH $src1.lo\n\t" 7707 "PUSH $src2.hi\n\t" 7708 "PUSH $src2.lo\n\t" 7709 "CALL SharedRuntime::lrem\n\t" 7710 "ADD ESP,16" %} 7711 ins_encode( long_mod(src1,src2) ); 7712 ins_pipe( pipe_slow ); 7713 %} 7714 7715 // Divide Register Long (no special case since divisor != -1) 7716 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7717 match(Set dst (DivL dst imm)); 7718 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7719 ins_cost(1000); 7720 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7721 "XOR $tmp2,$tmp2\n\t" 7722 "CMP $tmp,EDX\n\t" 7723 "JA,s fast\n\t" 7724 "MOV $tmp2,EAX\n\t" 7725 "MOV EAX,EDX\n\t" 7726 "MOV EDX,0\n\t" 7727 "JLE,s pos\n\t" 7728 "LNEG EAX : $tmp2\n\t" 7729 "DIV $tmp # unsigned division\n\t" 7730 "XCHG EAX,$tmp2\n\t" 7731 "DIV $tmp\n\t" 7732 "LNEG $tmp2 : EAX\n\t" 7733 "JMP,s done\n" 7734 "pos:\n\t" 7735 "DIV $tmp\n\t" 7736 "XCHG EAX,$tmp2\n" 7737 "fast:\n\t" 7738 "DIV $tmp\n" 7739 "done:\n\t" 7740 "MOV EDX,$tmp2\n\t" 7741 "NEG EDX:EAX # if $imm < 0" %} 7742 ins_encode %{ 7743 int con = (int)$imm$$constant; 7744 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7745 int pcon = (con > 0) ? con : -con; 7746 Label Lfast, Lpos, Ldone; 7747 7748 __ movl($tmp$$Register, pcon); 7749 __ xorl($tmp2$$Register,$tmp2$$Register); 7750 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7751 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7752 7753 __ movl($tmp2$$Register, $dst$$Register); // save 7754 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7755 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7756 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7757 7758 // Negative dividend. 7759 // convert value to positive to use unsigned division 7760 __ lneg($dst$$Register, $tmp2$$Register); 7761 __ divl($tmp$$Register); 7762 __ xchgl($dst$$Register, $tmp2$$Register); 7763 __ divl($tmp$$Register); 7764 // revert result back to negative 7765 __ lneg($tmp2$$Register, $dst$$Register); 7766 __ jmpb(Ldone); 7767 7768 __ bind(Lpos); 7769 __ divl($tmp$$Register); // Use unsigned division 7770 __ xchgl($dst$$Register, $tmp2$$Register); 7771 // Fallthrow for final divide, tmp2 has 32 bit hi result 7772 7773 __ bind(Lfast); 7774 // fast path: src is positive 7775 __ divl($tmp$$Register); // Use unsigned division 7776 7777 __ bind(Ldone); 7778 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7779 if (con < 0) { 7780 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7781 } 7782 %} 7783 ins_pipe( pipe_slow ); 7784 %} 7785 7786 // Remainder Register Long (remainder fit into 32 bits) 7787 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7788 match(Set dst (ModL dst imm)); 7789 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7790 ins_cost(1000); 7791 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7792 "CMP $tmp,EDX\n\t" 7793 "JA,s fast\n\t" 7794 "MOV $tmp2,EAX\n\t" 7795 "MOV EAX,EDX\n\t" 7796 "MOV EDX,0\n\t" 7797 "JLE,s pos\n\t" 7798 "LNEG EAX : $tmp2\n\t" 7799 "DIV $tmp # unsigned division\n\t" 7800 "MOV EAX,$tmp2\n\t" 7801 "DIV $tmp\n\t" 7802 "NEG EDX\n\t" 7803 "JMP,s done\n" 7804 "pos:\n\t" 7805 "DIV $tmp\n\t" 7806 "MOV EAX,$tmp2\n" 7807 "fast:\n\t" 7808 "DIV $tmp\n" 7809 "done:\n\t" 7810 "MOV EAX,EDX\n\t" 7811 "SAR EDX,31\n\t" %} 7812 ins_encode %{ 7813 int con = (int)$imm$$constant; 7814 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7815 int pcon = (con > 0) ? con : -con; 7816 Label Lfast, Lpos, Ldone; 7817 7818 __ movl($tmp$$Register, pcon); 7819 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7820 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7821 7822 __ movl($tmp2$$Register, $dst$$Register); // save 7823 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7824 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7825 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7826 7827 // Negative dividend. 7828 // convert value to positive to use unsigned division 7829 __ lneg($dst$$Register, $tmp2$$Register); 7830 __ divl($tmp$$Register); 7831 __ movl($dst$$Register, $tmp2$$Register); 7832 __ divl($tmp$$Register); 7833 // revert remainder back to negative 7834 __ negl(HIGH_FROM_LOW($dst$$Register)); 7835 __ jmpb(Ldone); 7836 7837 __ bind(Lpos); 7838 __ divl($tmp$$Register); 7839 __ movl($dst$$Register, $tmp2$$Register); 7840 7841 __ bind(Lfast); 7842 // fast path: src is positive 7843 __ divl($tmp$$Register); 7844 7845 __ bind(Ldone); 7846 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7847 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7848 7849 %} 7850 ins_pipe( pipe_slow ); 7851 %} 7852 7853 // Integer Shift Instructions 7854 // Shift Left by one 7855 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7856 match(Set dst (LShiftI dst shift)); 7857 effect(KILL cr); 7858 7859 size(2); 7860 format %{ "SHL $dst,$shift" %} 7861 opcode(0xD1, 0x4); /* D1 /4 */ 7862 ins_encode( OpcP, RegOpc( dst ) ); 7863 ins_pipe( ialu_reg ); 7864 %} 7865 7866 // Shift Left by 8-bit immediate 7867 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7868 match(Set dst (LShiftI dst shift)); 7869 effect(KILL cr); 7870 7871 size(3); 7872 format %{ "SHL $dst,$shift" %} 7873 opcode(0xC1, 0x4); /* C1 /4 ib */ 7874 ins_encode( RegOpcImm( dst, shift) ); 7875 ins_pipe( ialu_reg ); 7876 %} 7877 7878 // Shift Left by variable 7879 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7880 match(Set dst (LShiftI dst shift)); 7881 effect(KILL cr); 7882 7883 size(2); 7884 format %{ "SHL $dst,$shift" %} 7885 opcode(0xD3, 0x4); /* D3 /4 */ 7886 ins_encode( OpcP, RegOpc( dst ) ); 7887 ins_pipe( ialu_reg_reg ); 7888 %} 7889 7890 // Arithmetic shift right by one 7891 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7892 match(Set dst (RShiftI dst shift)); 7893 effect(KILL cr); 7894 7895 size(2); 7896 format %{ "SAR $dst,$shift" %} 7897 opcode(0xD1, 0x7); /* D1 /7 */ 7898 ins_encode( OpcP, RegOpc( dst ) ); 7899 ins_pipe( ialu_reg ); 7900 %} 7901 7902 // Arithmetic shift right by one 7903 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 7904 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7905 effect(KILL cr); 7906 format %{ "SAR $dst,$shift" %} 7907 opcode(0xD1, 0x7); /* D1 /7 */ 7908 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark ); 7909 ins_pipe( ialu_mem_imm ); 7910 %} 7911 7912 // Arithmetic Shift Right by 8-bit immediate 7913 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7914 match(Set dst (RShiftI dst shift)); 7915 effect(KILL cr); 7916 7917 size(3); 7918 format %{ "SAR $dst,$shift" %} 7919 opcode(0xC1, 0x7); /* C1 /7 ib */ 7920 ins_encode( RegOpcImm( dst, shift ) ); 7921 ins_pipe( ialu_mem_imm ); 7922 %} 7923 7924 // Arithmetic Shift Right by 8-bit immediate 7925 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7926 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7927 effect(KILL cr); 7928 7929 format %{ "SAR $dst,$shift" %} 7930 opcode(0xC1, 0x7); /* C1 /7 ib */ 7931 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark ); 7932 ins_pipe( ialu_mem_imm ); 7933 %} 7934 7935 // Arithmetic Shift Right by variable 7936 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7937 match(Set dst (RShiftI dst shift)); 7938 effect(KILL cr); 7939 7940 size(2); 7941 format %{ "SAR $dst,$shift" %} 7942 opcode(0xD3, 0x7); /* D3 /7 */ 7943 ins_encode( OpcP, RegOpc( dst ) ); 7944 ins_pipe( ialu_reg_reg ); 7945 %} 7946 7947 // Logical shift right by one 7948 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7949 match(Set dst (URShiftI dst shift)); 7950 effect(KILL cr); 7951 7952 size(2); 7953 format %{ "SHR $dst,$shift" %} 7954 opcode(0xD1, 0x5); /* D1 /5 */ 7955 ins_encode( OpcP, RegOpc( dst ) ); 7956 ins_pipe( ialu_reg ); 7957 %} 7958 7959 // Logical Shift Right by 8-bit immediate 7960 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7961 match(Set dst (URShiftI dst shift)); 7962 effect(KILL cr); 7963 7964 size(3); 7965 format %{ "SHR $dst,$shift" %} 7966 opcode(0xC1, 0x5); /* C1 /5 ib */ 7967 ins_encode( RegOpcImm( dst, shift) ); 7968 ins_pipe( ialu_reg ); 7969 %} 7970 7971 7972 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7973 // This idiom is used by the compiler for the i2b bytecode. 7974 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7975 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7976 7977 size(3); 7978 format %{ "MOVSX $dst,$src :8" %} 7979 ins_encode %{ 7980 __ movsbl($dst$$Register, $src$$Register); 7981 %} 7982 ins_pipe(ialu_reg_reg); 7983 %} 7984 7985 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7986 // This idiom is used by the compiler the i2s bytecode. 7987 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7988 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7989 7990 size(3); 7991 format %{ "MOVSX $dst,$src :16" %} 7992 ins_encode %{ 7993 __ movswl($dst$$Register, $src$$Register); 7994 %} 7995 ins_pipe(ialu_reg_reg); 7996 %} 7997 7998 7999 // Logical Shift Right by variable 8000 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8001 match(Set dst (URShiftI dst shift)); 8002 effect(KILL cr); 8003 8004 size(2); 8005 format %{ "SHR $dst,$shift" %} 8006 opcode(0xD3, 0x5); /* D3 /5 */ 8007 ins_encode( OpcP, RegOpc( dst ) ); 8008 ins_pipe( ialu_reg_reg ); 8009 %} 8010 8011 8012 //----------Logical Instructions----------------------------------------------- 8013 //----------Integer Logical Instructions--------------------------------------- 8014 // And Instructions 8015 // And Register with Register 8016 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8017 match(Set dst (AndI dst src)); 8018 effect(KILL cr); 8019 8020 size(2); 8021 format %{ "AND $dst,$src" %} 8022 opcode(0x23); 8023 ins_encode( OpcP, RegReg( dst, src) ); 8024 ins_pipe( ialu_reg_reg ); 8025 %} 8026 8027 // And Register with Immediate 8028 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8029 match(Set dst (AndI dst src)); 8030 effect(KILL cr); 8031 8032 format %{ "AND $dst,$src" %} 8033 opcode(0x81,0x04); /* Opcode 81 /4 */ 8034 // ins_encode( RegImm( dst, src) ); 8035 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8036 ins_pipe( ialu_reg ); 8037 %} 8038 8039 // And Register with Memory 8040 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8041 match(Set dst (AndI dst (LoadI src))); 8042 effect(KILL cr); 8043 8044 ins_cost(150); 8045 format %{ "AND $dst,$src" %} 8046 opcode(0x23); 8047 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 8048 ins_pipe( ialu_reg_mem ); 8049 %} 8050 8051 // And Memory with Register 8052 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8053 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8054 effect(KILL cr); 8055 8056 ins_cost(150); 8057 format %{ "AND $dst,$src" %} 8058 opcode(0x21); /* Opcode 21 /r */ 8059 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8060 ins_pipe( ialu_mem_reg ); 8061 %} 8062 8063 // And Memory with Immediate 8064 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8065 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8066 effect(KILL cr); 8067 8068 ins_cost(125); 8069 format %{ "AND $dst,$src" %} 8070 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8071 // ins_encode( MemImm( dst, src) ); 8072 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8073 ins_pipe( ialu_mem_imm ); 8074 %} 8075 8076 // BMI1 instructions 8077 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8078 match(Set dst (AndI (XorI src1 minus_1) src2)); 8079 predicate(UseBMI1Instructions); 8080 effect(KILL cr); 8081 8082 format %{ "ANDNL $dst, $src1, $src2" %} 8083 8084 ins_encode %{ 8085 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8086 %} 8087 ins_pipe(ialu_reg); 8088 %} 8089 8090 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8091 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8092 predicate(UseBMI1Instructions); 8093 effect(KILL cr); 8094 8095 ins_cost(125); 8096 format %{ "ANDNL $dst, $src1, $src2" %} 8097 8098 ins_encode %{ 8099 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8100 %} 8101 ins_pipe(ialu_reg_mem); 8102 %} 8103 8104 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8105 match(Set dst (AndI (SubI imm_zero src) src)); 8106 predicate(UseBMI1Instructions); 8107 effect(KILL cr); 8108 8109 format %{ "BLSIL $dst, $src" %} 8110 8111 ins_encode %{ 8112 __ blsil($dst$$Register, $src$$Register); 8113 %} 8114 ins_pipe(ialu_reg); 8115 %} 8116 8117 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8118 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8119 predicate(UseBMI1Instructions); 8120 effect(KILL cr); 8121 8122 ins_cost(125); 8123 format %{ "BLSIL $dst, $src" %} 8124 8125 ins_encode %{ 8126 __ blsil($dst$$Register, $src$$Address); 8127 %} 8128 ins_pipe(ialu_reg_mem); 8129 %} 8130 8131 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8132 %{ 8133 match(Set dst (XorI (AddI src minus_1) src)); 8134 predicate(UseBMI1Instructions); 8135 effect(KILL cr); 8136 8137 format %{ "BLSMSKL $dst, $src" %} 8138 8139 ins_encode %{ 8140 __ blsmskl($dst$$Register, $src$$Register); 8141 %} 8142 8143 ins_pipe(ialu_reg); 8144 %} 8145 8146 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8147 %{ 8148 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8149 predicate(UseBMI1Instructions); 8150 effect(KILL cr); 8151 8152 ins_cost(125); 8153 format %{ "BLSMSKL $dst, $src" %} 8154 8155 ins_encode %{ 8156 __ blsmskl($dst$$Register, $src$$Address); 8157 %} 8158 8159 ins_pipe(ialu_reg_mem); 8160 %} 8161 8162 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8163 %{ 8164 match(Set dst (AndI (AddI src minus_1) src) ); 8165 predicate(UseBMI1Instructions); 8166 effect(KILL cr); 8167 8168 format %{ "BLSRL $dst, $src" %} 8169 8170 ins_encode %{ 8171 __ blsrl($dst$$Register, $src$$Register); 8172 %} 8173 8174 ins_pipe(ialu_reg); 8175 %} 8176 8177 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8178 %{ 8179 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8180 predicate(UseBMI1Instructions); 8181 effect(KILL cr); 8182 8183 ins_cost(125); 8184 format %{ "BLSRL $dst, $src" %} 8185 8186 ins_encode %{ 8187 __ blsrl($dst$$Register, $src$$Address); 8188 %} 8189 8190 ins_pipe(ialu_reg_mem); 8191 %} 8192 8193 // Or Instructions 8194 // Or Register with Register 8195 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8196 match(Set dst (OrI dst src)); 8197 effect(KILL cr); 8198 8199 size(2); 8200 format %{ "OR $dst,$src" %} 8201 opcode(0x0B); 8202 ins_encode( OpcP, RegReg( dst, src) ); 8203 ins_pipe( ialu_reg_reg ); 8204 %} 8205 8206 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8207 match(Set dst (OrI dst (CastP2X src))); 8208 effect(KILL cr); 8209 8210 size(2); 8211 format %{ "OR $dst,$src" %} 8212 opcode(0x0B); 8213 ins_encode( OpcP, RegReg( dst, src) ); 8214 ins_pipe( ialu_reg_reg ); 8215 %} 8216 8217 8218 // Or Register with Immediate 8219 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8220 match(Set dst (OrI dst src)); 8221 effect(KILL cr); 8222 8223 format %{ "OR $dst,$src" %} 8224 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8225 // ins_encode( RegImm( dst, src) ); 8226 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8227 ins_pipe( ialu_reg ); 8228 %} 8229 8230 // Or Register with Memory 8231 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8232 match(Set dst (OrI dst (LoadI src))); 8233 effect(KILL cr); 8234 8235 ins_cost(150); 8236 format %{ "OR $dst,$src" %} 8237 opcode(0x0B); 8238 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); 8239 ins_pipe( ialu_reg_mem ); 8240 %} 8241 8242 // Or Memory with Register 8243 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8244 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8245 effect(KILL cr); 8246 8247 ins_cost(150); 8248 format %{ "OR $dst,$src" %} 8249 opcode(0x09); /* Opcode 09 /r */ 8250 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8251 ins_pipe( ialu_mem_reg ); 8252 %} 8253 8254 // Or Memory with Immediate 8255 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8256 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8257 effect(KILL cr); 8258 8259 ins_cost(125); 8260 format %{ "OR $dst,$src" %} 8261 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8262 // ins_encode( MemImm( dst, src) ); 8263 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8264 ins_pipe( ialu_mem_imm ); 8265 %} 8266 8267 // ROL/ROR 8268 // ROL expand 8269 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8270 effect(USE_DEF dst, USE shift, KILL cr); 8271 8272 format %{ "ROL $dst, $shift" %} 8273 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8274 ins_encode( OpcP, RegOpc( dst )); 8275 ins_pipe( ialu_reg ); 8276 %} 8277 8278 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8279 effect(USE_DEF dst, USE shift, KILL cr); 8280 8281 format %{ "ROL $dst, $shift" %} 8282 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8283 ins_encode( RegOpcImm(dst, shift) ); 8284 ins_pipe(ialu_reg); 8285 %} 8286 8287 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8288 effect(USE_DEF dst, USE shift, KILL cr); 8289 8290 format %{ "ROL $dst, $shift" %} 8291 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8292 ins_encode(OpcP, RegOpc(dst)); 8293 ins_pipe( ialu_reg_reg ); 8294 %} 8295 // end of ROL expand 8296 8297 // ROL 32bit by one once 8298 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8299 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8300 8301 expand %{ 8302 rolI_eReg_imm1(dst, lshift, cr); 8303 %} 8304 %} 8305 8306 // ROL 32bit var by imm8 once 8307 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8308 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8309 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8310 8311 expand %{ 8312 rolI_eReg_imm8(dst, lshift, cr); 8313 %} 8314 %} 8315 8316 // ROL 32bit var by var once 8317 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8318 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8319 8320 expand %{ 8321 rolI_eReg_CL(dst, shift, cr); 8322 %} 8323 %} 8324 8325 // ROL 32bit var by var once 8326 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8327 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8328 8329 expand %{ 8330 rolI_eReg_CL(dst, shift, cr); 8331 %} 8332 %} 8333 8334 // ROR expand 8335 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8336 effect(USE_DEF dst, USE shift, KILL cr); 8337 8338 format %{ "ROR $dst, $shift" %} 8339 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8340 ins_encode( OpcP, RegOpc( dst ) ); 8341 ins_pipe( ialu_reg ); 8342 %} 8343 8344 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8345 effect (USE_DEF dst, USE shift, KILL cr); 8346 8347 format %{ "ROR $dst, $shift" %} 8348 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8349 ins_encode( RegOpcImm(dst, shift) ); 8350 ins_pipe( ialu_reg ); 8351 %} 8352 8353 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8354 effect(USE_DEF dst, USE shift, KILL cr); 8355 8356 format %{ "ROR $dst, $shift" %} 8357 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8358 ins_encode(OpcP, RegOpc(dst)); 8359 ins_pipe( ialu_reg_reg ); 8360 %} 8361 // end of ROR expand 8362 8363 // ROR right once 8364 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8365 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8366 8367 expand %{ 8368 rorI_eReg_imm1(dst, rshift, cr); 8369 %} 8370 %} 8371 8372 // ROR 32bit by immI8 once 8373 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8374 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8375 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8376 8377 expand %{ 8378 rorI_eReg_imm8(dst, rshift, cr); 8379 %} 8380 %} 8381 8382 // ROR 32bit var by var once 8383 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8384 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8385 8386 expand %{ 8387 rorI_eReg_CL(dst, shift, cr); 8388 %} 8389 %} 8390 8391 // ROR 32bit var by var once 8392 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8393 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8394 8395 expand %{ 8396 rorI_eReg_CL(dst, shift, cr); 8397 %} 8398 %} 8399 8400 // Xor Instructions 8401 // Xor Register with Register 8402 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8403 match(Set dst (XorI dst src)); 8404 effect(KILL cr); 8405 8406 size(2); 8407 format %{ "XOR $dst,$src" %} 8408 opcode(0x33); 8409 ins_encode( OpcP, RegReg( dst, src) ); 8410 ins_pipe( ialu_reg_reg ); 8411 %} 8412 8413 // Xor Register with Immediate -1 8414 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8415 match(Set dst (XorI dst imm)); 8416 8417 size(2); 8418 format %{ "NOT $dst" %} 8419 ins_encode %{ 8420 __ notl($dst$$Register); 8421 %} 8422 ins_pipe( ialu_reg ); 8423 %} 8424 8425 // Xor Register with Immediate 8426 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8427 match(Set dst (XorI dst src)); 8428 effect(KILL cr); 8429 8430 format %{ "XOR $dst,$src" %} 8431 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8432 // ins_encode( RegImm( dst, src) ); 8433 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8434 ins_pipe( ialu_reg ); 8435 %} 8436 8437 // Xor Register with Memory 8438 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8439 match(Set dst (XorI dst (LoadI src))); 8440 effect(KILL cr); 8441 8442 ins_cost(150); 8443 format %{ "XOR $dst,$src" %} 8444 opcode(0x33); 8445 ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark ); 8446 ins_pipe( ialu_reg_mem ); 8447 %} 8448 8449 // Xor Memory with Register 8450 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8451 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8452 effect(KILL cr); 8453 8454 ins_cost(150); 8455 format %{ "XOR $dst,$src" %} 8456 opcode(0x31); /* Opcode 31 /r */ 8457 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); 8458 ins_pipe( ialu_mem_reg ); 8459 %} 8460 8461 // Xor Memory with Immediate 8462 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8463 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8464 effect(KILL cr); 8465 8466 ins_cost(125); 8467 format %{ "XOR $dst,$src" %} 8468 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8469 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); 8470 ins_pipe( ialu_mem_imm ); 8471 %} 8472 8473 //----------Convert Int to Boolean--------------------------------------------- 8474 8475 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8476 effect( DEF dst, USE src ); 8477 format %{ "MOV $dst,$src" %} 8478 ins_encode( enc_Copy( dst, src) ); 8479 ins_pipe( ialu_reg_reg ); 8480 %} 8481 8482 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8483 effect( USE_DEF dst, USE src, KILL cr ); 8484 8485 size(4); 8486 format %{ "NEG $dst\n\t" 8487 "ADC $dst,$src" %} 8488 ins_encode( neg_reg(dst), 8489 OpcRegReg(0x13,dst,src) ); 8490 ins_pipe( ialu_reg_reg_long ); 8491 %} 8492 8493 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8494 match(Set dst (Conv2B src)); 8495 8496 expand %{ 8497 movI_nocopy(dst,src); 8498 ci2b(dst,src,cr); 8499 %} 8500 %} 8501 8502 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8503 effect( DEF dst, USE src ); 8504 format %{ "MOV $dst,$src" %} 8505 ins_encode( enc_Copy( dst, src) ); 8506 ins_pipe( ialu_reg_reg ); 8507 %} 8508 8509 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8510 effect( USE_DEF dst, USE src, KILL cr ); 8511 format %{ "NEG $dst\n\t" 8512 "ADC $dst,$src" %} 8513 ins_encode( neg_reg(dst), 8514 OpcRegReg(0x13,dst,src) ); 8515 ins_pipe( ialu_reg_reg_long ); 8516 %} 8517 8518 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8519 match(Set dst (Conv2B src)); 8520 8521 expand %{ 8522 movP_nocopy(dst,src); 8523 cp2b(dst,src,cr); 8524 %} 8525 %} 8526 8527 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8528 match(Set dst (CmpLTMask p q)); 8529 effect(KILL cr); 8530 ins_cost(400); 8531 8532 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8533 format %{ "XOR $dst,$dst\n\t" 8534 "CMP $p,$q\n\t" 8535 "SETlt $dst\n\t" 8536 "NEG $dst" %} 8537 ins_encode %{ 8538 Register Rp = $p$$Register; 8539 Register Rq = $q$$Register; 8540 Register Rd = $dst$$Register; 8541 Label done; 8542 __ xorl(Rd, Rd); 8543 __ cmpl(Rp, Rq); 8544 __ setb(Assembler::less, Rd); 8545 __ negl(Rd); 8546 %} 8547 8548 ins_pipe(pipe_slow); 8549 %} 8550 8551 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8552 match(Set dst (CmpLTMask dst zero)); 8553 effect(DEF dst, KILL cr); 8554 ins_cost(100); 8555 8556 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8557 ins_encode %{ 8558 __ sarl($dst$$Register, 31); 8559 %} 8560 ins_pipe(ialu_reg); 8561 %} 8562 8563 /* better to save a register than avoid a branch */ 8564 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8565 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8566 effect(KILL cr); 8567 ins_cost(400); 8568 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8569 "JGE done\n\t" 8570 "ADD $p,$y\n" 8571 "done: " %} 8572 ins_encode %{ 8573 Register Rp = $p$$Register; 8574 Register Rq = $q$$Register; 8575 Register Ry = $y$$Register; 8576 Label done; 8577 __ subl(Rp, Rq); 8578 __ jccb(Assembler::greaterEqual, done); 8579 __ addl(Rp, Ry); 8580 __ bind(done); 8581 %} 8582 8583 ins_pipe(pipe_cmplt); 8584 %} 8585 8586 /* better to save a register than avoid a branch */ 8587 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8588 match(Set y (AndI (CmpLTMask p q) y)); 8589 effect(KILL cr); 8590 8591 ins_cost(300); 8592 8593 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8594 "JLT done\n\t" 8595 "XORL $y, $y\n" 8596 "done: " %} 8597 ins_encode %{ 8598 Register Rp = $p$$Register; 8599 Register Rq = $q$$Register; 8600 Register Ry = $y$$Register; 8601 Label done; 8602 __ cmpl(Rp, Rq); 8603 __ jccb(Assembler::less, done); 8604 __ xorl(Ry, Ry); 8605 __ bind(done); 8606 %} 8607 8608 ins_pipe(pipe_cmplt); 8609 %} 8610 8611 /* If I enable this, I encourage spilling in the inner loop of compress. 8612 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8613 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8614 */ 8615 //----------Overflow Math Instructions----------------------------------------- 8616 8617 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8618 %{ 8619 match(Set cr (OverflowAddI op1 op2)); 8620 effect(DEF cr, USE_KILL op1, USE op2); 8621 8622 format %{ "ADD $op1, $op2\t# overflow check int" %} 8623 8624 ins_encode %{ 8625 __ addl($op1$$Register, $op2$$Register); 8626 %} 8627 ins_pipe(ialu_reg_reg); 8628 %} 8629 8630 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8631 %{ 8632 match(Set cr (OverflowAddI op1 op2)); 8633 effect(DEF cr, USE_KILL op1, USE op2); 8634 8635 format %{ "ADD $op1, $op2\t# overflow check int" %} 8636 8637 ins_encode %{ 8638 __ addl($op1$$Register, $op2$$constant); 8639 %} 8640 ins_pipe(ialu_reg_reg); 8641 %} 8642 8643 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8644 %{ 8645 match(Set cr (OverflowSubI op1 op2)); 8646 8647 format %{ "CMP $op1, $op2\t# overflow check int" %} 8648 ins_encode %{ 8649 __ cmpl($op1$$Register, $op2$$Register); 8650 %} 8651 ins_pipe(ialu_reg_reg); 8652 %} 8653 8654 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8655 %{ 8656 match(Set cr (OverflowSubI op1 op2)); 8657 8658 format %{ "CMP $op1, $op2\t# overflow check int" %} 8659 ins_encode %{ 8660 __ cmpl($op1$$Register, $op2$$constant); 8661 %} 8662 ins_pipe(ialu_reg_reg); 8663 %} 8664 8665 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8666 %{ 8667 match(Set cr (OverflowSubI zero op2)); 8668 effect(DEF cr, USE_KILL op2); 8669 8670 format %{ "NEG $op2\t# overflow check int" %} 8671 ins_encode %{ 8672 __ negl($op2$$Register); 8673 %} 8674 ins_pipe(ialu_reg_reg); 8675 %} 8676 8677 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8678 %{ 8679 match(Set cr (OverflowMulI op1 op2)); 8680 effect(DEF cr, USE_KILL op1, USE op2); 8681 8682 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8683 ins_encode %{ 8684 __ imull($op1$$Register, $op2$$Register); 8685 %} 8686 ins_pipe(ialu_reg_reg_alu0); 8687 %} 8688 8689 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8690 %{ 8691 match(Set cr (OverflowMulI op1 op2)); 8692 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8693 8694 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8695 ins_encode %{ 8696 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8697 %} 8698 ins_pipe(ialu_reg_reg_alu0); 8699 %} 8700 8701 // Integer Absolute Instructions 8702 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8703 %{ 8704 match(Set dst (AbsI src)); 8705 effect(TEMP dst, TEMP tmp, KILL cr); 8706 format %{ "movl $tmp, $src\n\t" 8707 "sarl $tmp, 31\n\t" 8708 "movl $dst, $src\n\t" 8709 "xorl $dst, $tmp\n\t" 8710 "subl $dst, $tmp\n" 8711 %} 8712 ins_encode %{ 8713 __ movl($tmp$$Register, $src$$Register); 8714 __ sarl($tmp$$Register, 31); 8715 __ movl($dst$$Register, $src$$Register); 8716 __ xorl($dst$$Register, $tmp$$Register); 8717 __ subl($dst$$Register, $tmp$$Register); 8718 %} 8719 8720 ins_pipe(ialu_reg_reg); 8721 %} 8722 8723 //----------Long Instructions------------------------------------------------ 8724 // Add Long Register with Register 8725 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8726 match(Set dst (AddL dst src)); 8727 effect(KILL cr); 8728 ins_cost(200); 8729 format %{ "ADD $dst.lo,$src.lo\n\t" 8730 "ADC $dst.hi,$src.hi" %} 8731 opcode(0x03, 0x13); 8732 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8733 ins_pipe( ialu_reg_reg_long ); 8734 %} 8735 8736 // Add Long Register with Immediate 8737 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8738 match(Set dst (AddL dst src)); 8739 effect(KILL cr); 8740 format %{ "ADD $dst.lo,$src.lo\n\t" 8741 "ADC $dst.hi,$src.hi" %} 8742 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8743 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8744 ins_pipe( ialu_reg_long ); 8745 %} 8746 8747 // Add Long Register with Memory 8748 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8749 match(Set dst (AddL dst (LoadL mem))); 8750 effect(KILL cr); 8751 ins_cost(125); 8752 format %{ "ADD $dst.lo,$mem\n\t" 8753 "ADC $dst.hi,$mem+4" %} 8754 opcode(0x03, 0x13); 8755 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8756 ins_pipe( ialu_reg_long_mem ); 8757 %} 8758 8759 // Subtract Long Register with Register. 8760 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8761 match(Set dst (SubL dst src)); 8762 effect(KILL cr); 8763 ins_cost(200); 8764 format %{ "SUB $dst.lo,$src.lo\n\t" 8765 "SBB $dst.hi,$src.hi" %} 8766 opcode(0x2B, 0x1B); 8767 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8768 ins_pipe( ialu_reg_reg_long ); 8769 %} 8770 8771 // Subtract Long Register with Immediate 8772 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8773 match(Set dst (SubL dst src)); 8774 effect(KILL cr); 8775 format %{ "SUB $dst.lo,$src.lo\n\t" 8776 "SBB $dst.hi,$src.hi" %} 8777 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8778 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8779 ins_pipe( ialu_reg_long ); 8780 %} 8781 8782 // Subtract Long Register with Memory 8783 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8784 match(Set dst (SubL dst (LoadL mem))); 8785 effect(KILL cr); 8786 ins_cost(125); 8787 format %{ "SUB $dst.lo,$mem\n\t" 8788 "SBB $dst.hi,$mem+4" %} 8789 opcode(0x2B, 0x1B); 8790 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8791 ins_pipe( ialu_reg_long_mem ); 8792 %} 8793 8794 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8795 match(Set dst (SubL zero dst)); 8796 effect(KILL cr); 8797 ins_cost(300); 8798 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8799 ins_encode( neg_long(dst) ); 8800 ins_pipe( ialu_reg_reg_long ); 8801 %} 8802 8803 // And Long Register with Register 8804 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8805 match(Set dst (AndL dst src)); 8806 effect(KILL cr); 8807 format %{ "AND $dst.lo,$src.lo\n\t" 8808 "AND $dst.hi,$src.hi" %} 8809 opcode(0x23,0x23); 8810 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8811 ins_pipe( ialu_reg_reg_long ); 8812 %} 8813 8814 // And Long Register with Immediate 8815 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8816 match(Set dst (AndL dst src)); 8817 effect(KILL cr); 8818 format %{ "AND $dst.lo,$src.lo\n\t" 8819 "AND $dst.hi,$src.hi" %} 8820 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8821 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8822 ins_pipe( ialu_reg_long ); 8823 %} 8824 8825 // And Long Register with Memory 8826 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8827 match(Set dst (AndL dst (LoadL mem))); 8828 effect(KILL cr); 8829 ins_cost(125); 8830 format %{ "AND $dst.lo,$mem\n\t" 8831 "AND $dst.hi,$mem+4" %} 8832 opcode(0x23, 0x23); 8833 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 8834 ins_pipe( ialu_reg_long_mem ); 8835 %} 8836 8837 // BMI1 instructions 8838 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8839 match(Set dst (AndL (XorL src1 minus_1) src2)); 8840 predicate(UseBMI1Instructions); 8841 effect(KILL cr, TEMP dst); 8842 8843 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8844 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8845 %} 8846 8847 ins_encode %{ 8848 Register Rdst = $dst$$Register; 8849 Register Rsrc1 = $src1$$Register; 8850 Register Rsrc2 = $src2$$Register; 8851 __ andnl(Rdst, Rsrc1, Rsrc2); 8852 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8853 %} 8854 ins_pipe(ialu_reg_reg_long); 8855 %} 8856 8857 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8858 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8859 predicate(UseBMI1Instructions); 8860 effect(KILL cr, TEMP dst); 8861 8862 ins_cost(125); 8863 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8864 "ANDNL $dst.hi, $src1.hi, $src2+4" 8865 %} 8866 8867 ins_encode %{ 8868 Register Rdst = $dst$$Register; 8869 Register Rsrc1 = $src1$$Register; 8870 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8871 8872 __ andnl(Rdst, Rsrc1, $src2$$Address); 8873 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8874 %} 8875 ins_pipe(ialu_reg_mem); 8876 %} 8877 8878 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8879 match(Set dst (AndL (SubL imm_zero src) src)); 8880 predicate(UseBMI1Instructions); 8881 effect(KILL cr, TEMP dst); 8882 8883 format %{ "MOVL $dst.hi, 0\n\t" 8884 "BLSIL $dst.lo, $src.lo\n\t" 8885 "JNZ done\n\t" 8886 "BLSIL $dst.hi, $src.hi\n" 8887 "done:" 8888 %} 8889 8890 ins_encode %{ 8891 Label done; 8892 Register Rdst = $dst$$Register; 8893 Register Rsrc = $src$$Register; 8894 __ movl(HIGH_FROM_LOW(Rdst), 0); 8895 __ blsil(Rdst, Rsrc); 8896 __ jccb(Assembler::notZero, done); 8897 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8898 __ bind(done); 8899 %} 8900 ins_pipe(ialu_reg); 8901 %} 8902 8903 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8904 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8905 predicate(UseBMI1Instructions); 8906 effect(KILL cr, TEMP dst); 8907 8908 ins_cost(125); 8909 format %{ "MOVL $dst.hi, 0\n\t" 8910 "BLSIL $dst.lo, $src\n\t" 8911 "JNZ done\n\t" 8912 "BLSIL $dst.hi, $src+4\n" 8913 "done:" 8914 %} 8915 8916 ins_encode %{ 8917 Label done; 8918 Register Rdst = $dst$$Register; 8919 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8920 8921 __ movl(HIGH_FROM_LOW(Rdst), 0); 8922 __ blsil(Rdst, $src$$Address); 8923 __ jccb(Assembler::notZero, done); 8924 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8925 __ bind(done); 8926 %} 8927 ins_pipe(ialu_reg_mem); 8928 %} 8929 8930 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8931 %{ 8932 match(Set dst (XorL (AddL src minus_1) src)); 8933 predicate(UseBMI1Instructions); 8934 effect(KILL cr, TEMP dst); 8935 8936 format %{ "MOVL $dst.hi, 0\n\t" 8937 "BLSMSKL $dst.lo, $src.lo\n\t" 8938 "JNC done\n\t" 8939 "BLSMSKL $dst.hi, $src.hi\n" 8940 "done:" 8941 %} 8942 8943 ins_encode %{ 8944 Label done; 8945 Register Rdst = $dst$$Register; 8946 Register Rsrc = $src$$Register; 8947 __ movl(HIGH_FROM_LOW(Rdst), 0); 8948 __ blsmskl(Rdst, Rsrc); 8949 __ jccb(Assembler::carryClear, done); 8950 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8951 __ bind(done); 8952 %} 8953 8954 ins_pipe(ialu_reg); 8955 %} 8956 8957 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8958 %{ 8959 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8960 predicate(UseBMI1Instructions); 8961 effect(KILL cr, TEMP dst); 8962 8963 ins_cost(125); 8964 format %{ "MOVL $dst.hi, 0\n\t" 8965 "BLSMSKL $dst.lo, $src\n\t" 8966 "JNC done\n\t" 8967 "BLSMSKL $dst.hi, $src+4\n" 8968 "done:" 8969 %} 8970 8971 ins_encode %{ 8972 Label done; 8973 Register Rdst = $dst$$Register; 8974 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8975 8976 __ movl(HIGH_FROM_LOW(Rdst), 0); 8977 __ blsmskl(Rdst, $src$$Address); 8978 __ jccb(Assembler::carryClear, done); 8979 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8980 __ bind(done); 8981 %} 8982 8983 ins_pipe(ialu_reg_mem); 8984 %} 8985 8986 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8987 %{ 8988 match(Set dst (AndL (AddL src minus_1) src) ); 8989 predicate(UseBMI1Instructions); 8990 effect(KILL cr, TEMP dst); 8991 8992 format %{ "MOVL $dst.hi, $src.hi\n\t" 8993 "BLSRL $dst.lo, $src.lo\n\t" 8994 "JNC done\n\t" 8995 "BLSRL $dst.hi, $src.hi\n" 8996 "done:" 8997 %} 8998 8999 ins_encode %{ 9000 Label done; 9001 Register Rdst = $dst$$Register; 9002 Register Rsrc = $src$$Register; 9003 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9004 __ blsrl(Rdst, Rsrc); 9005 __ jccb(Assembler::carryClear, done); 9006 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9007 __ bind(done); 9008 %} 9009 9010 ins_pipe(ialu_reg); 9011 %} 9012 9013 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9014 %{ 9015 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9016 predicate(UseBMI1Instructions); 9017 effect(KILL cr, TEMP dst); 9018 9019 ins_cost(125); 9020 format %{ "MOVL $dst.hi, $src+4\n\t" 9021 "BLSRL $dst.lo, $src\n\t" 9022 "JNC done\n\t" 9023 "BLSRL $dst.hi, $src+4\n" 9024 "done:" 9025 %} 9026 9027 ins_encode %{ 9028 Label done; 9029 Register Rdst = $dst$$Register; 9030 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9031 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9032 __ blsrl(Rdst, $src$$Address); 9033 __ jccb(Assembler::carryClear, done); 9034 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9035 __ bind(done); 9036 %} 9037 9038 ins_pipe(ialu_reg_mem); 9039 %} 9040 9041 // Or Long Register with Register 9042 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9043 match(Set dst (OrL dst src)); 9044 effect(KILL cr); 9045 format %{ "OR $dst.lo,$src.lo\n\t" 9046 "OR $dst.hi,$src.hi" %} 9047 opcode(0x0B,0x0B); 9048 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9049 ins_pipe( ialu_reg_reg_long ); 9050 %} 9051 9052 // Or Long Register with Immediate 9053 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9054 match(Set dst (OrL dst src)); 9055 effect(KILL cr); 9056 format %{ "OR $dst.lo,$src.lo\n\t" 9057 "OR $dst.hi,$src.hi" %} 9058 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9059 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9060 ins_pipe( ialu_reg_long ); 9061 %} 9062 9063 // Or Long Register with Memory 9064 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9065 match(Set dst (OrL dst (LoadL mem))); 9066 effect(KILL cr); 9067 ins_cost(125); 9068 format %{ "OR $dst.lo,$mem\n\t" 9069 "OR $dst.hi,$mem+4" %} 9070 opcode(0x0B,0x0B); 9071 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 9072 ins_pipe( ialu_reg_long_mem ); 9073 %} 9074 9075 // Xor Long Register with Register 9076 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9077 match(Set dst (XorL dst src)); 9078 effect(KILL cr); 9079 format %{ "XOR $dst.lo,$src.lo\n\t" 9080 "XOR $dst.hi,$src.hi" %} 9081 opcode(0x33,0x33); 9082 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9083 ins_pipe( ialu_reg_reg_long ); 9084 %} 9085 9086 // Xor Long Register with Immediate -1 9087 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9088 match(Set dst (XorL dst imm)); 9089 format %{ "NOT $dst.lo\n\t" 9090 "NOT $dst.hi" %} 9091 ins_encode %{ 9092 __ notl($dst$$Register); 9093 __ notl(HIGH_FROM_LOW($dst$$Register)); 9094 %} 9095 ins_pipe( ialu_reg_long ); 9096 %} 9097 9098 // Xor Long Register with Immediate 9099 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9100 match(Set dst (XorL dst src)); 9101 effect(KILL cr); 9102 format %{ "XOR $dst.lo,$src.lo\n\t" 9103 "XOR $dst.hi,$src.hi" %} 9104 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9105 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9106 ins_pipe( ialu_reg_long ); 9107 %} 9108 9109 // Xor Long Register with Memory 9110 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9111 match(Set dst (XorL dst (LoadL mem))); 9112 effect(KILL cr); 9113 ins_cost(125); 9114 format %{ "XOR $dst.lo,$mem\n\t" 9115 "XOR $dst.hi,$mem+4" %} 9116 opcode(0x33,0x33); 9117 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); 9118 ins_pipe( ialu_reg_long_mem ); 9119 %} 9120 9121 // Shift Left Long by 1 9122 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9123 predicate(UseNewLongLShift); 9124 match(Set dst (LShiftL dst cnt)); 9125 effect(KILL cr); 9126 ins_cost(100); 9127 format %{ "ADD $dst.lo,$dst.lo\n\t" 9128 "ADC $dst.hi,$dst.hi" %} 9129 ins_encode %{ 9130 __ addl($dst$$Register,$dst$$Register); 9131 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9132 %} 9133 ins_pipe( ialu_reg_long ); 9134 %} 9135 9136 // Shift Left Long by 2 9137 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9138 predicate(UseNewLongLShift); 9139 match(Set dst (LShiftL dst cnt)); 9140 effect(KILL cr); 9141 ins_cost(100); 9142 format %{ "ADD $dst.lo,$dst.lo\n\t" 9143 "ADC $dst.hi,$dst.hi\n\t" 9144 "ADD $dst.lo,$dst.lo\n\t" 9145 "ADC $dst.hi,$dst.hi" %} 9146 ins_encode %{ 9147 __ addl($dst$$Register,$dst$$Register); 9148 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9149 __ addl($dst$$Register,$dst$$Register); 9150 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9151 %} 9152 ins_pipe( ialu_reg_long ); 9153 %} 9154 9155 // Shift Left Long by 3 9156 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9157 predicate(UseNewLongLShift); 9158 match(Set dst (LShiftL dst cnt)); 9159 effect(KILL cr); 9160 ins_cost(100); 9161 format %{ "ADD $dst.lo,$dst.lo\n\t" 9162 "ADC $dst.hi,$dst.hi\n\t" 9163 "ADD $dst.lo,$dst.lo\n\t" 9164 "ADC $dst.hi,$dst.hi\n\t" 9165 "ADD $dst.lo,$dst.lo\n\t" 9166 "ADC $dst.hi,$dst.hi" %} 9167 ins_encode %{ 9168 __ addl($dst$$Register,$dst$$Register); 9169 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9170 __ addl($dst$$Register,$dst$$Register); 9171 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9172 __ addl($dst$$Register,$dst$$Register); 9173 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9174 %} 9175 ins_pipe( ialu_reg_long ); 9176 %} 9177 9178 // Shift Left Long by 1-31 9179 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9180 match(Set dst (LShiftL dst cnt)); 9181 effect(KILL cr); 9182 ins_cost(200); 9183 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9184 "SHL $dst.lo,$cnt" %} 9185 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9186 ins_encode( move_long_small_shift(dst,cnt) ); 9187 ins_pipe( ialu_reg_long ); 9188 %} 9189 9190 // Shift Left Long by 32-63 9191 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9192 match(Set dst (LShiftL dst cnt)); 9193 effect(KILL cr); 9194 ins_cost(300); 9195 format %{ "MOV $dst.hi,$dst.lo\n" 9196 "\tSHL $dst.hi,$cnt-32\n" 9197 "\tXOR $dst.lo,$dst.lo" %} 9198 opcode(0xC1, 0x4); /* C1 /4 ib */ 9199 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9200 ins_pipe( ialu_reg_long ); 9201 %} 9202 9203 // Shift Left Long by variable 9204 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9205 match(Set dst (LShiftL dst shift)); 9206 effect(KILL cr); 9207 ins_cost(500+200); 9208 size(17); 9209 format %{ "TEST $shift,32\n\t" 9210 "JEQ,s small\n\t" 9211 "MOV $dst.hi,$dst.lo\n\t" 9212 "XOR $dst.lo,$dst.lo\n" 9213 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9214 "SHL $dst.lo,$shift" %} 9215 ins_encode( shift_left_long( dst, shift ) ); 9216 ins_pipe( pipe_slow ); 9217 %} 9218 9219 // Shift Right Long by 1-31 9220 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9221 match(Set dst (URShiftL dst cnt)); 9222 effect(KILL cr); 9223 ins_cost(200); 9224 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9225 "SHR $dst.hi,$cnt" %} 9226 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9227 ins_encode( move_long_small_shift(dst,cnt) ); 9228 ins_pipe( ialu_reg_long ); 9229 %} 9230 9231 // Shift Right Long by 32-63 9232 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9233 match(Set dst (URShiftL dst cnt)); 9234 effect(KILL cr); 9235 ins_cost(300); 9236 format %{ "MOV $dst.lo,$dst.hi\n" 9237 "\tSHR $dst.lo,$cnt-32\n" 9238 "\tXOR $dst.hi,$dst.hi" %} 9239 opcode(0xC1, 0x5); /* C1 /5 ib */ 9240 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9241 ins_pipe( ialu_reg_long ); 9242 %} 9243 9244 // Shift Right Long by variable 9245 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9246 match(Set dst (URShiftL dst shift)); 9247 effect(KILL cr); 9248 ins_cost(600); 9249 size(17); 9250 format %{ "TEST $shift,32\n\t" 9251 "JEQ,s small\n\t" 9252 "MOV $dst.lo,$dst.hi\n\t" 9253 "XOR $dst.hi,$dst.hi\n" 9254 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9255 "SHR $dst.hi,$shift" %} 9256 ins_encode( shift_right_long( dst, shift ) ); 9257 ins_pipe( pipe_slow ); 9258 %} 9259 9260 // Shift Right Long by 1-31 9261 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9262 match(Set dst (RShiftL dst cnt)); 9263 effect(KILL cr); 9264 ins_cost(200); 9265 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9266 "SAR $dst.hi,$cnt" %} 9267 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9268 ins_encode( move_long_small_shift(dst,cnt) ); 9269 ins_pipe( ialu_reg_long ); 9270 %} 9271 9272 // Shift Right Long by 32-63 9273 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9274 match(Set dst (RShiftL dst cnt)); 9275 effect(KILL cr); 9276 ins_cost(300); 9277 format %{ "MOV $dst.lo,$dst.hi\n" 9278 "\tSAR $dst.lo,$cnt-32\n" 9279 "\tSAR $dst.hi,31" %} 9280 opcode(0xC1, 0x7); /* C1 /7 ib */ 9281 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9282 ins_pipe( ialu_reg_long ); 9283 %} 9284 9285 // Shift Right arithmetic Long by variable 9286 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9287 match(Set dst (RShiftL dst shift)); 9288 effect(KILL cr); 9289 ins_cost(600); 9290 size(18); 9291 format %{ "TEST $shift,32\n\t" 9292 "JEQ,s small\n\t" 9293 "MOV $dst.lo,$dst.hi\n\t" 9294 "SAR $dst.hi,31\n" 9295 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9296 "SAR $dst.hi,$shift" %} 9297 ins_encode( shift_right_arith_long( dst, shift ) ); 9298 ins_pipe( pipe_slow ); 9299 %} 9300 9301 9302 //----------Double Instructions------------------------------------------------ 9303 // Double Math 9304 9305 // Compare & branch 9306 9307 // P6 version of float compare, sets condition codes in EFLAGS 9308 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9309 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9310 match(Set cr (CmpD src1 src2)); 9311 effect(KILL rax); 9312 ins_cost(150); 9313 format %{ "FLD $src1\n\t" 9314 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9315 "JNP exit\n\t" 9316 "MOV ah,1 // saw a NaN, set CF\n\t" 9317 "SAHF\n" 9318 "exit:\tNOP // avoid branch to branch" %} 9319 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9320 ins_encode( Push_Reg_DPR(src1), 9321 OpcP, RegOpc(src2), 9322 cmpF_P6_fixup ); 9323 ins_pipe( pipe_slow ); 9324 %} 9325 9326 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9327 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9328 match(Set cr (CmpD src1 src2)); 9329 ins_cost(150); 9330 format %{ "FLD $src1\n\t" 9331 "FUCOMIP ST,$src2 // P6 instruction" %} 9332 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9333 ins_encode( Push_Reg_DPR(src1), 9334 OpcP, RegOpc(src2)); 9335 ins_pipe( pipe_slow ); 9336 %} 9337 9338 // Compare & branch 9339 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9340 predicate(UseSSE<=1); 9341 match(Set cr (CmpD src1 src2)); 9342 effect(KILL rax); 9343 ins_cost(200); 9344 format %{ "FLD $src1\n\t" 9345 "FCOMp $src2\n\t" 9346 "FNSTSW AX\n\t" 9347 "TEST AX,0x400\n\t" 9348 "JZ,s flags\n\t" 9349 "MOV AH,1\t# unordered treat as LT\n" 9350 "flags:\tSAHF" %} 9351 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9352 ins_encode( Push_Reg_DPR(src1), 9353 OpcP, RegOpc(src2), 9354 fpu_flags); 9355 ins_pipe( pipe_slow ); 9356 %} 9357 9358 // Compare vs zero into -1,0,1 9359 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9360 predicate(UseSSE<=1); 9361 match(Set dst (CmpD3 src1 zero)); 9362 effect(KILL cr, KILL rax); 9363 ins_cost(280); 9364 format %{ "FTSTD $dst,$src1" %} 9365 opcode(0xE4, 0xD9); 9366 ins_encode( Push_Reg_DPR(src1), 9367 OpcS, OpcP, PopFPU, 9368 CmpF_Result(dst)); 9369 ins_pipe( pipe_slow ); 9370 %} 9371 9372 // Compare into -1,0,1 9373 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9374 predicate(UseSSE<=1); 9375 match(Set dst (CmpD3 src1 src2)); 9376 effect(KILL cr, KILL rax); 9377 ins_cost(300); 9378 format %{ "FCMPD $dst,$src1,$src2" %} 9379 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9380 ins_encode( Push_Reg_DPR(src1), 9381 OpcP, RegOpc(src2), 9382 CmpF_Result(dst)); 9383 ins_pipe( pipe_slow ); 9384 %} 9385 9386 // float compare and set condition codes in EFLAGS by XMM regs 9387 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9388 predicate(UseSSE>=2); 9389 match(Set cr (CmpD src1 src2)); 9390 ins_cost(145); 9391 format %{ "UCOMISD $src1,$src2\n\t" 9392 "JNP,s exit\n\t" 9393 "PUSHF\t# saw NaN, set CF\n\t" 9394 "AND [rsp], #0xffffff2b\n\t" 9395 "POPF\n" 9396 "exit:" %} 9397 ins_encode %{ 9398 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9399 emit_cmpfp_fixup(masm); 9400 %} 9401 ins_pipe( pipe_slow ); 9402 %} 9403 9404 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9405 predicate(UseSSE>=2); 9406 match(Set cr (CmpD src1 src2)); 9407 ins_cost(100); 9408 format %{ "UCOMISD $src1,$src2" %} 9409 ins_encode %{ 9410 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9411 %} 9412 ins_pipe( pipe_slow ); 9413 %} 9414 9415 // float compare and set condition codes in EFLAGS by XMM regs 9416 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9417 predicate(UseSSE>=2); 9418 match(Set cr (CmpD src1 (LoadD src2))); 9419 ins_cost(145); 9420 format %{ "UCOMISD $src1,$src2\n\t" 9421 "JNP,s exit\n\t" 9422 "PUSHF\t# saw NaN, set CF\n\t" 9423 "AND [rsp], #0xffffff2b\n\t" 9424 "POPF\n" 9425 "exit:" %} 9426 ins_encode %{ 9427 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9428 emit_cmpfp_fixup(masm); 9429 %} 9430 ins_pipe( pipe_slow ); 9431 %} 9432 9433 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9434 predicate(UseSSE>=2); 9435 match(Set cr (CmpD src1 (LoadD src2))); 9436 ins_cost(100); 9437 format %{ "UCOMISD $src1,$src2" %} 9438 ins_encode %{ 9439 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9440 %} 9441 ins_pipe( pipe_slow ); 9442 %} 9443 9444 // Compare into -1,0,1 in XMM 9445 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9446 predicate(UseSSE>=2); 9447 match(Set dst (CmpD3 src1 src2)); 9448 effect(KILL cr); 9449 ins_cost(255); 9450 format %{ "UCOMISD $src1, $src2\n\t" 9451 "MOV $dst, #-1\n\t" 9452 "JP,s done\n\t" 9453 "JB,s done\n\t" 9454 "SETNE $dst\n\t" 9455 "MOVZB $dst, $dst\n" 9456 "done:" %} 9457 ins_encode %{ 9458 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9459 emit_cmpfp3(masm, $dst$$Register); 9460 %} 9461 ins_pipe( pipe_slow ); 9462 %} 9463 9464 // Compare into -1,0,1 in XMM and memory 9465 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9466 predicate(UseSSE>=2); 9467 match(Set dst (CmpD3 src1 (LoadD src2))); 9468 effect(KILL cr); 9469 ins_cost(275); 9470 format %{ "UCOMISD $src1, $src2\n\t" 9471 "MOV $dst, #-1\n\t" 9472 "JP,s done\n\t" 9473 "JB,s done\n\t" 9474 "SETNE $dst\n\t" 9475 "MOVZB $dst, $dst\n" 9476 "done:" %} 9477 ins_encode %{ 9478 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9479 emit_cmpfp3(masm, $dst$$Register); 9480 %} 9481 ins_pipe( pipe_slow ); 9482 %} 9483 9484 9485 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9486 predicate (UseSSE <=1); 9487 match(Set dst (SubD dst src)); 9488 9489 format %{ "FLD $src\n\t" 9490 "DSUBp $dst,ST" %} 9491 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9492 ins_cost(150); 9493 ins_encode( Push_Reg_DPR(src), 9494 OpcP, RegOpc(dst) ); 9495 ins_pipe( fpu_reg_reg ); 9496 %} 9497 9498 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9499 predicate (UseSSE <=1); 9500 match(Set dst (RoundDouble (SubD src1 src2))); 9501 ins_cost(250); 9502 9503 format %{ "FLD $src2\n\t" 9504 "DSUB ST,$src1\n\t" 9505 "FSTP_D $dst\t# D-round" %} 9506 opcode(0xD8, 0x5); 9507 ins_encode( Push_Reg_DPR(src2), 9508 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9509 ins_pipe( fpu_mem_reg_reg ); 9510 %} 9511 9512 9513 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9514 predicate (UseSSE <=1); 9515 match(Set dst (SubD dst (LoadD src))); 9516 ins_cost(150); 9517 9518 format %{ "FLD $src\n\t" 9519 "DSUBp $dst,ST" %} 9520 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9521 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9522 OpcP, RegOpc(dst), ClearInstMark ); 9523 ins_pipe( fpu_reg_mem ); 9524 %} 9525 9526 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9527 predicate (UseSSE<=1); 9528 match(Set dst (AbsD src)); 9529 ins_cost(100); 9530 format %{ "FABS" %} 9531 opcode(0xE1, 0xD9); 9532 ins_encode( OpcS, OpcP ); 9533 ins_pipe( fpu_reg_reg ); 9534 %} 9535 9536 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9537 predicate(UseSSE<=1); 9538 match(Set dst (NegD src)); 9539 ins_cost(100); 9540 format %{ "FCHS" %} 9541 opcode(0xE0, 0xD9); 9542 ins_encode( OpcS, OpcP ); 9543 ins_pipe( fpu_reg_reg ); 9544 %} 9545 9546 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9547 predicate(UseSSE<=1); 9548 match(Set dst (AddD dst src)); 9549 format %{ "FLD $src\n\t" 9550 "DADD $dst,ST" %} 9551 size(4); 9552 ins_cost(150); 9553 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9554 ins_encode( Push_Reg_DPR(src), 9555 OpcP, RegOpc(dst) ); 9556 ins_pipe( fpu_reg_reg ); 9557 %} 9558 9559 9560 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9561 predicate(UseSSE<=1); 9562 match(Set dst (RoundDouble (AddD src1 src2))); 9563 ins_cost(250); 9564 9565 format %{ "FLD $src2\n\t" 9566 "DADD ST,$src1\n\t" 9567 "FSTP_D $dst\t# D-round" %} 9568 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9569 ins_encode( Push_Reg_DPR(src2), 9570 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9571 ins_pipe( fpu_mem_reg_reg ); 9572 %} 9573 9574 9575 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9576 predicate(UseSSE<=1); 9577 match(Set dst (AddD dst (LoadD src))); 9578 ins_cost(150); 9579 9580 format %{ "FLD $src\n\t" 9581 "DADDp $dst,ST" %} 9582 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9583 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9584 OpcP, RegOpc(dst), ClearInstMark ); 9585 ins_pipe( fpu_reg_mem ); 9586 %} 9587 9588 // add-to-memory 9589 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9590 predicate(UseSSE<=1); 9591 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9592 ins_cost(150); 9593 9594 format %{ "FLD_D $dst\n\t" 9595 "DADD ST,$src\n\t" 9596 "FST_D $dst" %} 9597 opcode(0xDD, 0x0); 9598 ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst), 9599 Opcode(0xD8), RegOpc(src), ClearInstMark, 9600 SetInstMark, 9601 Opcode(0xDD), RMopc_Mem(0x03,dst), 9602 ClearInstMark); 9603 ins_pipe( fpu_reg_mem ); 9604 %} 9605 9606 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9607 predicate(UseSSE<=1); 9608 match(Set dst (AddD dst con)); 9609 ins_cost(125); 9610 format %{ "FLD1\n\t" 9611 "DADDp $dst,ST" %} 9612 ins_encode %{ 9613 __ fld1(); 9614 __ faddp($dst$$reg); 9615 %} 9616 ins_pipe(fpu_reg); 9617 %} 9618 9619 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9620 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9621 match(Set dst (AddD dst con)); 9622 ins_cost(200); 9623 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9624 "DADDp $dst,ST" %} 9625 ins_encode %{ 9626 __ fld_d($constantaddress($con)); 9627 __ faddp($dst$$reg); 9628 %} 9629 ins_pipe(fpu_reg_mem); 9630 %} 9631 9632 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9633 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9634 match(Set dst (RoundDouble (AddD src con))); 9635 ins_cost(200); 9636 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9637 "DADD ST,$src\n\t" 9638 "FSTP_D $dst\t# D-round" %} 9639 ins_encode %{ 9640 __ fld_d($constantaddress($con)); 9641 __ fadd($src$$reg); 9642 __ fstp_d(Address(rsp, $dst$$disp)); 9643 %} 9644 ins_pipe(fpu_mem_reg_con); 9645 %} 9646 9647 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9648 predicate(UseSSE<=1); 9649 match(Set dst (MulD dst src)); 9650 format %{ "FLD $src\n\t" 9651 "DMULp $dst,ST" %} 9652 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9653 ins_cost(150); 9654 ins_encode( Push_Reg_DPR(src), 9655 OpcP, RegOpc(dst) ); 9656 ins_pipe( fpu_reg_reg ); 9657 %} 9658 9659 // Strict FP instruction biases argument before multiply then 9660 // biases result to avoid double rounding of subnormals. 9661 // 9662 // scale arg1 by multiplying arg1 by 2^(-15360) 9663 // load arg2 9664 // multiply scaled arg1 by arg2 9665 // rescale product by 2^(15360) 9666 // 9667 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9668 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9669 match(Set dst (MulD dst src)); 9670 ins_cost(1); // Select this instruction for all FP double multiplies 9671 9672 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9673 "DMULp $dst,ST\n\t" 9674 "FLD $src\n\t" 9675 "DMULp $dst,ST\n\t" 9676 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9677 "DMULp $dst,ST\n\t" %} 9678 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9679 ins_encode( strictfp_bias1(dst), 9680 Push_Reg_DPR(src), 9681 OpcP, RegOpc(dst), 9682 strictfp_bias2(dst) ); 9683 ins_pipe( fpu_reg_reg ); 9684 %} 9685 9686 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9687 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9688 match(Set dst (MulD dst con)); 9689 ins_cost(200); 9690 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9691 "DMULp $dst,ST" %} 9692 ins_encode %{ 9693 __ fld_d($constantaddress($con)); 9694 __ fmulp($dst$$reg); 9695 %} 9696 ins_pipe(fpu_reg_mem); 9697 %} 9698 9699 9700 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9701 predicate( UseSSE<=1 ); 9702 match(Set dst (MulD dst (LoadD src))); 9703 ins_cost(200); 9704 format %{ "FLD_D $src\n\t" 9705 "DMULp $dst,ST" %} 9706 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9707 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 9708 OpcP, RegOpc(dst), ClearInstMark ); 9709 ins_pipe( fpu_reg_mem ); 9710 %} 9711 9712 // 9713 // Cisc-alternate to reg-reg multiply 9714 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9715 predicate( UseSSE<=1 ); 9716 match(Set dst (MulD src (LoadD mem))); 9717 ins_cost(250); 9718 format %{ "FLD_D $mem\n\t" 9719 "DMUL ST,$src\n\t" 9720 "FSTP_D $dst" %} 9721 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9722 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem), 9723 OpcReg_FPR(src), 9724 Pop_Reg_DPR(dst), ClearInstMark ); 9725 ins_pipe( fpu_reg_reg_mem ); 9726 %} 9727 9728 9729 // MACRO3 -- addDPR a mulDPR 9730 // This instruction is a '2-address' instruction in that the result goes 9731 // back to src2. This eliminates a move from the macro; possibly the 9732 // register allocator will have to add it back (and maybe not). 9733 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9734 predicate( UseSSE<=1 ); 9735 match(Set src2 (AddD (MulD src0 src1) src2)); 9736 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9737 "DMUL ST,$src1\n\t" 9738 "DADDp $src2,ST" %} 9739 ins_cost(250); 9740 opcode(0xDD); /* LoadD DD /0 */ 9741 ins_encode( Push_Reg_FPR(src0), 9742 FMul_ST_reg(src1), 9743 FAddP_reg_ST(src2) ); 9744 ins_pipe( fpu_reg_reg_reg ); 9745 %} 9746 9747 9748 // MACRO3 -- subDPR a mulDPR 9749 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9750 predicate( UseSSE<=1 ); 9751 match(Set src2 (SubD (MulD src0 src1) src2)); 9752 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9753 "DMUL ST,$src1\n\t" 9754 "DSUBRp $src2,ST" %} 9755 ins_cost(250); 9756 ins_encode( Push_Reg_FPR(src0), 9757 FMul_ST_reg(src1), 9758 Opcode(0xDE), Opc_plus(0xE0,src2)); 9759 ins_pipe( fpu_reg_reg_reg ); 9760 %} 9761 9762 9763 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9764 predicate( UseSSE<=1 ); 9765 match(Set dst (DivD dst src)); 9766 9767 format %{ "FLD $src\n\t" 9768 "FDIVp $dst,ST" %} 9769 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9770 ins_cost(150); 9771 ins_encode( Push_Reg_DPR(src), 9772 OpcP, RegOpc(dst) ); 9773 ins_pipe( fpu_reg_reg ); 9774 %} 9775 9776 // Strict FP instruction biases argument before division then 9777 // biases result, to avoid double rounding of subnormals. 9778 // 9779 // scale dividend by multiplying dividend by 2^(-15360) 9780 // load divisor 9781 // divide scaled dividend by divisor 9782 // rescale quotient by 2^(15360) 9783 // 9784 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9785 predicate (UseSSE<=1); 9786 match(Set dst (DivD dst src)); 9787 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9788 ins_cost(01); 9789 9790 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9791 "DMULp $dst,ST\n\t" 9792 "FLD $src\n\t" 9793 "FDIVp $dst,ST\n\t" 9794 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9795 "DMULp $dst,ST\n\t" %} 9796 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9797 ins_encode( strictfp_bias1(dst), 9798 Push_Reg_DPR(src), 9799 OpcP, RegOpc(dst), 9800 strictfp_bias2(dst) ); 9801 ins_pipe( fpu_reg_reg ); 9802 %} 9803 9804 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9805 predicate(UseSSE<=1); 9806 match(Set dst (ModD dst src)); 9807 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9808 9809 format %{ "DMOD $dst,$src" %} 9810 ins_cost(250); 9811 ins_encode(Push_Reg_Mod_DPR(dst, src), 9812 emitModDPR(), 9813 Push_Result_Mod_DPR(src), 9814 Pop_Reg_DPR(dst)); 9815 ins_pipe( pipe_slow ); 9816 %} 9817 9818 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9819 predicate(UseSSE>=2); 9820 match(Set dst (ModD src0 src1)); 9821 effect(KILL rax, KILL cr); 9822 9823 format %{ "SUB ESP,8\t # DMOD\n" 9824 "\tMOVSD [ESP+0],$src1\n" 9825 "\tFLD_D [ESP+0]\n" 9826 "\tMOVSD [ESP+0],$src0\n" 9827 "\tFLD_D [ESP+0]\n" 9828 "loop:\tFPREM\n" 9829 "\tFWAIT\n" 9830 "\tFNSTSW AX\n" 9831 "\tSAHF\n" 9832 "\tJP loop\n" 9833 "\tFSTP_D [ESP+0]\n" 9834 "\tMOVSD $dst,[ESP+0]\n" 9835 "\tADD ESP,8\n" 9836 "\tFSTP ST0\t # Restore FPU Stack" 9837 %} 9838 ins_cost(250); 9839 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9840 ins_pipe( pipe_slow ); 9841 %} 9842 9843 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9844 predicate (UseSSE<=1); 9845 match(Set dst(AtanD dst src)); 9846 format %{ "DATA $dst,$src" %} 9847 opcode(0xD9, 0xF3); 9848 ins_encode( Push_Reg_DPR(src), 9849 OpcP, OpcS, RegOpc(dst) ); 9850 ins_pipe( pipe_slow ); 9851 %} 9852 9853 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9854 predicate (UseSSE>=2); 9855 match(Set dst(AtanD dst src)); 9856 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9857 format %{ "DATA $dst,$src" %} 9858 opcode(0xD9, 0xF3); 9859 ins_encode( Push_SrcD(src), 9860 OpcP, OpcS, Push_ResultD(dst) ); 9861 ins_pipe( pipe_slow ); 9862 %} 9863 9864 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9865 predicate (UseSSE<=1); 9866 match(Set dst (SqrtD src)); 9867 format %{ "DSQRT $dst,$src" %} 9868 opcode(0xFA, 0xD9); 9869 ins_encode( Push_Reg_DPR(src), 9870 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 //-------------Float Instructions------------------------------- 9875 // Float Math 9876 9877 // Code for float compare: 9878 // fcompp(); 9879 // fwait(); fnstsw_ax(); 9880 // sahf(); 9881 // movl(dst, unordered_result); 9882 // jcc(Assembler::parity, exit); 9883 // movl(dst, less_result); 9884 // jcc(Assembler::below, exit); 9885 // movl(dst, equal_result); 9886 // jcc(Assembler::equal, exit); 9887 // movl(dst, greater_result); 9888 // exit: 9889 9890 // P6 version of float compare, sets condition codes in EFLAGS 9891 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9892 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9893 match(Set cr (CmpF src1 src2)); 9894 effect(KILL rax); 9895 ins_cost(150); 9896 format %{ "FLD $src1\n\t" 9897 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9898 "JNP exit\n\t" 9899 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9900 "SAHF\n" 9901 "exit:\tNOP // avoid branch to branch" %} 9902 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9903 ins_encode( Push_Reg_DPR(src1), 9904 OpcP, RegOpc(src2), 9905 cmpF_P6_fixup ); 9906 ins_pipe( pipe_slow ); 9907 %} 9908 9909 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9910 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9911 match(Set cr (CmpF src1 src2)); 9912 ins_cost(100); 9913 format %{ "FLD $src1\n\t" 9914 "FUCOMIP ST,$src2 // P6 instruction" %} 9915 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9916 ins_encode( Push_Reg_DPR(src1), 9917 OpcP, RegOpc(src2)); 9918 ins_pipe( pipe_slow ); 9919 %} 9920 9921 9922 // Compare & branch 9923 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9924 predicate(UseSSE == 0); 9925 match(Set cr (CmpF src1 src2)); 9926 effect(KILL rax); 9927 ins_cost(200); 9928 format %{ "FLD $src1\n\t" 9929 "FCOMp $src2\n\t" 9930 "FNSTSW AX\n\t" 9931 "TEST AX,0x400\n\t" 9932 "JZ,s flags\n\t" 9933 "MOV AH,1\t# unordered treat as LT\n" 9934 "flags:\tSAHF" %} 9935 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9936 ins_encode( Push_Reg_DPR(src1), 9937 OpcP, RegOpc(src2), 9938 fpu_flags); 9939 ins_pipe( pipe_slow ); 9940 %} 9941 9942 // Compare vs zero into -1,0,1 9943 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9944 predicate(UseSSE == 0); 9945 match(Set dst (CmpF3 src1 zero)); 9946 effect(KILL cr, KILL rax); 9947 ins_cost(280); 9948 format %{ "FTSTF $dst,$src1" %} 9949 opcode(0xE4, 0xD9); 9950 ins_encode( Push_Reg_DPR(src1), 9951 OpcS, OpcP, PopFPU, 9952 CmpF_Result(dst)); 9953 ins_pipe( pipe_slow ); 9954 %} 9955 9956 // Compare into -1,0,1 9957 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9958 predicate(UseSSE == 0); 9959 match(Set dst (CmpF3 src1 src2)); 9960 effect(KILL cr, KILL rax); 9961 ins_cost(300); 9962 format %{ "FCMPF $dst,$src1,$src2" %} 9963 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9964 ins_encode( Push_Reg_DPR(src1), 9965 OpcP, RegOpc(src2), 9966 CmpF_Result(dst)); 9967 ins_pipe( pipe_slow ); 9968 %} 9969 9970 // float compare and set condition codes in EFLAGS by XMM regs 9971 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 9972 predicate(UseSSE>=1); 9973 match(Set cr (CmpF src1 src2)); 9974 ins_cost(145); 9975 format %{ "UCOMISS $src1,$src2\n\t" 9976 "JNP,s exit\n\t" 9977 "PUSHF\t# saw NaN, set CF\n\t" 9978 "AND [rsp], #0xffffff2b\n\t" 9979 "POPF\n" 9980 "exit:" %} 9981 ins_encode %{ 9982 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9983 emit_cmpfp_fixup(masm); 9984 %} 9985 ins_pipe( pipe_slow ); 9986 %} 9987 9988 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 9989 predicate(UseSSE>=1); 9990 match(Set cr (CmpF src1 src2)); 9991 ins_cost(100); 9992 format %{ "UCOMISS $src1,$src2" %} 9993 ins_encode %{ 9994 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9995 %} 9996 ins_pipe( pipe_slow ); 9997 %} 9998 9999 // float compare and set condition codes in EFLAGS by XMM regs 10000 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10001 predicate(UseSSE>=1); 10002 match(Set cr (CmpF src1 (LoadF src2))); 10003 ins_cost(165); 10004 format %{ "UCOMISS $src1,$src2\n\t" 10005 "JNP,s exit\n\t" 10006 "PUSHF\t# saw NaN, set CF\n\t" 10007 "AND [rsp], #0xffffff2b\n\t" 10008 "POPF\n" 10009 "exit:" %} 10010 ins_encode %{ 10011 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10012 emit_cmpfp_fixup(masm); 10013 %} 10014 ins_pipe( pipe_slow ); 10015 %} 10016 10017 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10018 predicate(UseSSE>=1); 10019 match(Set cr (CmpF src1 (LoadF src2))); 10020 ins_cost(100); 10021 format %{ "UCOMISS $src1,$src2" %} 10022 ins_encode %{ 10023 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10024 %} 10025 ins_pipe( pipe_slow ); 10026 %} 10027 10028 // Compare into -1,0,1 in XMM 10029 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10030 predicate(UseSSE>=1); 10031 match(Set dst (CmpF3 src1 src2)); 10032 effect(KILL cr); 10033 ins_cost(255); 10034 format %{ "UCOMISS $src1, $src2\n\t" 10035 "MOV $dst, #-1\n\t" 10036 "JP,s done\n\t" 10037 "JB,s done\n\t" 10038 "SETNE $dst\n\t" 10039 "MOVZB $dst, $dst\n" 10040 "done:" %} 10041 ins_encode %{ 10042 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10043 emit_cmpfp3(masm, $dst$$Register); 10044 %} 10045 ins_pipe( pipe_slow ); 10046 %} 10047 10048 // Compare into -1,0,1 in XMM and memory 10049 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10050 predicate(UseSSE>=1); 10051 match(Set dst (CmpF3 src1 (LoadF src2))); 10052 effect(KILL cr); 10053 ins_cost(275); 10054 format %{ "UCOMISS $src1, $src2\n\t" 10055 "MOV $dst, #-1\n\t" 10056 "JP,s done\n\t" 10057 "JB,s done\n\t" 10058 "SETNE $dst\n\t" 10059 "MOVZB $dst, $dst\n" 10060 "done:" %} 10061 ins_encode %{ 10062 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10063 emit_cmpfp3(masm, $dst$$Register); 10064 %} 10065 ins_pipe( pipe_slow ); 10066 %} 10067 10068 // Spill to obtain 24-bit precision 10069 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10070 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10071 match(Set dst (SubF src1 src2)); 10072 10073 format %{ "FSUB $dst,$src1 - $src2" %} 10074 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10075 ins_encode( Push_Reg_FPR(src1), 10076 OpcReg_FPR(src2), 10077 Pop_Mem_FPR(dst) ); 10078 ins_pipe( fpu_mem_reg_reg ); 10079 %} 10080 // 10081 // This instruction does not round to 24-bits 10082 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10083 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10084 match(Set dst (SubF dst src)); 10085 10086 format %{ "FSUB $dst,$src" %} 10087 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10088 ins_encode( Push_Reg_FPR(src), 10089 OpcP, RegOpc(dst) ); 10090 ins_pipe( fpu_reg_reg ); 10091 %} 10092 10093 // Spill to obtain 24-bit precision 10094 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10095 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10096 match(Set dst (AddF src1 src2)); 10097 10098 format %{ "FADD $dst,$src1,$src2" %} 10099 opcode(0xD8, 0x0); /* D8 C0+i */ 10100 ins_encode( Push_Reg_FPR(src2), 10101 OpcReg_FPR(src1), 10102 Pop_Mem_FPR(dst) ); 10103 ins_pipe( fpu_mem_reg_reg ); 10104 %} 10105 // 10106 // This instruction does not round to 24-bits 10107 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10108 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10109 match(Set dst (AddF dst src)); 10110 10111 format %{ "FLD $src\n\t" 10112 "FADDp $dst,ST" %} 10113 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10114 ins_encode( Push_Reg_FPR(src), 10115 OpcP, RegOpc(dst) ); 10116 ins_pipe( fpu_reg_reg ); 10117 %} 10118 10119 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10120 predicate(UseSSE==0); 10121 match(Set dst (AbsF src)); 10122 ins_cost(100); 10123 format %{ "FABS" %} 10124 opcode(0xE1, 0xD9); 10125 ins_encode( OpcS, OpcP ); 10126 ins_pipe( fpu_reg_reg ); 10127 %} 10128 10129 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10130 predicate(UseSSE==0); 10131 match(Set dst (NegF src)); 10132 ins_cost(100); 10133 format %{ "FCHS" %} 10134 opcode(0xE0, 0xD9); 10135 ins_encode( OpcS, OpcP ); 10136 ins_pipe( fpu_reg_reg ); 10137 %} 10138 10139 // Cisc-alternate to addFPR_reg 10140 // Spill to obtain 24-bit precision 10141 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10142 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10143 match(Set dst (AddF src1 (LoadF src2))); 10144 10145 format %{ "FLD $src2\n\t" 10146 "FADD ST,$src1\n\t" 10147 "FSTP_S $dst" %} 10148 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10149 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10150 OpcReg_FPR(src1), 10151 Pop_Mem_FPR(dst), ClearInstMark ); 10152 ins_pipe( fpu_mem_reg_mem ); 10153 %} 10154 // 10155 // Cisc-alternate to addFPR_reg 10156 // This instruction does not round to 24-bits 10157 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10158 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10159 match(Set dst (AddF dst (LoadF src))); 10160 10161 format %{ "FADD $dst,$src" %} 10162 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10163 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), 10164 OpcP, RegOpc(dst), ClearInstMark ); 10165 ins_pipe( fpu_reg_mem ); 10166 %} 10167 10168 // // Following two instructions for _222_mpegaudio 10169 // Spill to obtain 24-bit precision 10170 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10171 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10172 match(Set dst (AddF src1 src2)); 10173 10174 format %{ "FADD $dst,$src1,$src2" %} 10175 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10176 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1), 10177 OpcReg_FPR(src2), 10178 Pop_Mem_FPR(dst), ClearInstMark ); 10179 ins_pipe( fpu_mem_reg_mem ); 10180 %} 10181 10182 // Cisc-spill variant 10183 // Spill to obtain 24-bit precision 10184 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10185 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10186 match(Set dst (AddF src1 (LoadF src2))); 10187 10188 format %{ "FADD $dst,$src1,$src2 cisc" %} 10189 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10190 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10191 OpcP, RMopc_Mem(secondary,src1), 10192 Pop_Mem_FPR(dst), 10193 ClearInstMark); 10194 ins_pipe( fpu_mem_mem_mem ); 10195 %} 10196 10197 // Spill to obtain 24-bit precision 10198 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10199 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10200 match(Set dst (AddF src1 src2)); 10201 10202 format %{ "FADD $dst,$src1,$src2" %} 10203 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10204 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10205 OpcP, RMopc_Mem(secondary,src1), 10206 Pop_Mem_FPR(dst), 10207 ClearInstMark); 10208 ins_pipe( fpu_mem_mem_mem ); 10209 %} 10210 10211 10212 // Spill to obtain 24-bit precision 10213 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10214 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10215 match(Set dst (AddF src con)); 10216 format %{ "FLD $src\n\t" 10217 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10218 "FSTP_S $dst" %} 10219 ins_encode %{ 10220 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10221 __ fadd_s($constantaddress($con)); 10222 __ fstp_s(Address(rsp, $dst$$disp)); 10223 %} 10224 ins_pipe(fpu_mem_reg_con); 10225 %} 10226 // 10227 // This instruction does not round to 24-bits 10228 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10229 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10230 match(Set dst (AddF src con)); 10231 format %{ "FLD $src\n\t" 10232 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10233 "FSTP $dst" %} 10234 ins_encode %{ 10235 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10236 __ fadd_s($constantaddress($con)); 10237 __ fstp_d($dst$$reg); 10238 %} 10239 ins_pipe(fpu_reg_reg_con); 10240 %} 10241 10242 // Spill to obtain 24-bit precision 10243 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10244 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10245 match(Set dst (MulF src1 src2)); 10246 10247 format %{ "FLD $src1\n\t" 10248 "FMUL $src2\n\t" 10249 "FSTP_S $dst" %} 10250 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10251 ins_encode( Push_Reg_FPR(src1), 10252 OpcReg_FPR(src2), 10253 Pop_Mem_FPR(dst) ); 10254 ins_pipe( fpu_mem_reg_reg ); 10255 %} 10256 // 10257 // This instruction does not round to 24-bits 10258 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10259 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10260 match(Set dst (MulF src1 src2)); 10261 10262 format %{ "FLD $src1\n\t" 10263 "FMUL $src2\n\t" 10264 "FSTP_S $dst" %} 10265 opcode(0xD8, 0x1); /* D8 C8+i */ 10266 ins_encode( Push_Reg_FPR(src2), 10267 OpcReg_FPR(src1), 10268 Pop_Reg_FPR(dst) ); 10269 ins_pipe( fpu_reg_reg_reg ); 10270 %} 10271 10272 10273 // Spill to obtain 24-bit precision 10274 // Cisc-alternate to reg-reg multiply 10275 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10276 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10277 match(Set dst (MulF src1 (LoadF src2))); 10278 10279 format %{ "FLD_S $src2\n\t" 10280 "FMUL $src1\n\t" 10281 "FSTP_S $dst" %} 10282 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10283 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10284 OpcReg_FPR(src1), 10285 Pop_Mem_FPR(dst), ClearInstMark ); 10286 ins_pipe( fpu_mem_reg_mem ); 10287 %} 10288 // 10289 // This instruction does not round to 24-bits 10290 // Cisc-alternate to reg-reg multiply 10291 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10292 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10293 match(Set dst (MulF src1 (LoadF src2))); 10294 10295 format %{ "FMUL $dst,$src1,$src2" %} 10296 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10297 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10298 OpcReg_FPR(src1), 10299 Pop_Reg_FPR(dst), ClearInstMark ); 10300 ins_pipe( fpu_reg_reg_mem ); 10301 %} 10302 10303 // Spill to obtain 24-bit precision 10304 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10305 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10306 match(Set dst (MulF src1 src2)); 10307 10308 format %{ "FMUL $dst,$src1,$src2" %} 10309 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10310 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), 10311 OpcP, RMopc_Mem(secondary,src1), 10312 Pop_Mem_FPR(dst), 10313 ClearInstMark ); 10314 ins_pipe( fpu_mem_mem_mem ); 10315 %} 10316 10317 // Spill to obtain 24-bit precision 10318 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10319 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10320 match(Set dst (MulF src con)); 10321 10322 format %{ "FLD $src\n\t" 10323 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10324 "FSTP_S $dst" %} 10325 ins_encode %{ 10326 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10327 __ fmul_s($constantaddress($con)); 10328 __ fstp_s(Address(rsp, $dst$$disp)); 10329 %} 10330 ins_pipe(fpu_mem_reg_con); 10331 %} 10332 // 10333 // This instruction does not round to 24-bits 10334 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10335 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10336 match(Set dst (MulF src con)); 10337 10338 format %{ "FLD $src\n\t" 10339 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10340 "FSTP $dst" %} 10341 ins_encode %{ 10342 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10343 __ fmul_s($constantaddress($con)); 10344 __ fstp_d($dst$$reg); 10345 %} 10346 ins_pipe(fpu_reg_reg_con); 10347 %} 10348 10349 10350 // 10351 // MACRO1 -- subsume unshared load into mulFPR 10352 // This instruction does not round to 24-bits 10353 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10354 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10355 match(Set dst (MulF (LoadF mem1) src)); 10356 10357 format %{ "FLD $mem1 ===MACRO1===\n\t" 10358 "FMUL ST,$src\n\t" 10359 "FSTP $dst" %} 10360 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10361 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1), 10362 OpcReg_FPR(src), 10363 Pop_Reg_FPR(dst), ClearInstMark ); 10364 ins_pipe( fpu_reg_reg_mem ); 10365 %} 10366 // 10367 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10368 // This instruction does not round to 24-bits 10369 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10370 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10371 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10372 ins_cost(95); 10373 10374 format %{ "FLD $mem1 ===MACRO2===\n\t" 10375 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10376 "FADD ST,$src2\n\t" 10377 "FSTP $dst" %} 10378 opcode(0xD9); /* LoadF D9 /0 */ 10379 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1), 10380 FMul_ST_reg(src1), 10381 FAdd_ST_reg(src2), 10382 Pop_Reg_FPR(dst), ClearInstMark ); 10383 ins_pipe( fpu_reg_mem_reg_reg ); 10384 %} 10385 10386 // MACRO3 -- addFPR a mulFPR 10387 // This instruction does not round to 24-bits. It is a '2-address' 10388 // instruction in that the result goes back to src2. This eliminates 10389 // a move from the macro; possibly the register allocator will have 10390 // to add it back (and maybe not). 10391 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10392 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10393 match(Set src2 (AddF (MulF src0 src1) src2)); 10394 10395 format %{ "FLD $src0 ===MACRO3===\n\t" 10396 "FMUL ST,$src1\n\t" 10397 "FADDP $src2,ST" %} 10398 opcode(0xD9); /* LoadF D9 /0 */ 10399 ins_encode( Push_Reg_FPR(src0), 10400 FMul_ST_reg(src1), 10401 FAddP_reg_ST(src2) ); 10402 ins_pipe( fpu_reg_reg_reg ); 10403 %} 10404 10405 // MACRO4 -- divFPR subFPR 10406 // This instruction does not round to 24-bits 10407 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10408 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10409 match(Set dst (DivF (SubF src2 src1) src3)); 10410 10411 format %{ "FLD $src2 ===MACRO4===\n\t" 10412 "FSUB ST,$src1\n\t" 10413 "FDIV ST,$src3\n\t" 10414 "FSTP $dst" %} 10415 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10416 ins_encode( Push_Reg_FPR(src2), 10417 subFPR_divFPR_encode(src1,src3), 10418 Pop_Reg_FPR(dst) ); 10419 ins_pipe( fpu_reg_reg_reg_reg ); 10420 %} 10421 10422 // Spill to obtain 24-bit precision 10423 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10424 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10425 match(Set dst (DivF src1 src2)); 10426 10427 format %{ "FDIV $dst,$src1,$src2" %} 10428 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10429 ins_encode( Push_Reg_FPR(src1), 10430 OpcReg_FPR(src2), 10431 Pop_Mem_FPR(dst) ); 10432 ins_pipe( fpu_mem_reg_reg ); 10433 %} 10434 // 10435 // This instruction does not round to 24-bits 10436 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10437 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10438 match(Set dst (DivF dst src)); 10439 10440 format %{ "FDIV $dst,$src" %} 10441 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10442 ins_encode( Push_Reg_FPR(src), 10443 OpcP, RegOpc(dst) ); 10444 ins_pipe( fpu_reg_reg ); 10445 %} 10446 10447 10448 // Spill to obtain 24-bit precision 10449 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10450 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10451 match(Set dst (ModF src1 src2)); 10452 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10453 10454 format %{ "FMOD $dst,$src1,$src2" %} 10455 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10456 emitModDPR(), 10457 Push_Result_Mod_DPR(src2), 10458 Pop_Mem_FPR(dst)); 10459 ins_pipe( pipe_slow ); 10460 %} 10461 // 10462 // This instruction does not round to 24-bits 10463 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10464 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10465 match(Set dst (ModF dst src)); 10466 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10467 10468 format %{ "FMOD $dst,$src" %} 10469 ins_encode(Push_Reg_Mod_DPR(dst, src), 10470 emitModDPR(), 10471 Push_Result_Mod_DPR(src), 10472 Pop_Reg_FPR(dst)); 10473 ins_pipe( pipe_slow ); 10474 %} 10475 10476 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10477 predicate(UseSSE>=1); 10478 match(Set dst (ModF src0 src1)); 10479 effect(KILL rax, KILL cr); 10480 format %{ "SUB ESP,4\t # FMOD\n" 10481 "\tMOVSS [ESP+0],$src1\n" 10482 "\tFLD_S [ESP+0]\n" 10483 "\tMOVSS [ESP+0],$src0\n" 10484 "\tFLD_S [ESP+0]\n" 10485 "loop:\tFPREM\n" 10486 "\tFWAIT\n" 10487 "\tFNSTSW AX\n" 10488 "\tSAHF\n" 10489 "\tJP loop\n" 10490 "\tFSTP_S [ESP+0]\n" 10491 "\tMOVSS $dst,[ESP+0]\n" 10492 "\tADD ESP,4\n" 10493 "\tFSTP ST0\t # Restore FPU Stack" 10494 %} 10495 ins_cost(250); 10496 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10497 ins_pipe( pipe_slow ); 10498 %} 10499 10500 10501 //----------Arithmetic Conversion Instructions--------------------------------- 10502 // The conversions operations are all Alpha sorted. Please keep it that way! 10503 10504 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10505 predicate(UseSSE==0); 10506 match(Set dst (RoundFloat src)); 10507 ins_cost(125); 10508 format %{ "FST_S $dst,$src\t# F-round" %} 10509 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10510 ins_pipe( fpu_mem_reg ); 10511 %} 10512 10513 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10514 predicate(UseSSE<=1); 10515 match(Set dst (RoundDouble src)); 10516 ins_cost(125); 10517 format %{ "FST_D $dst,$src\t# D-round" %} 10518 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10519 ins_pipe( fpu_mem_reg ); 10520 %} 10521 10522 // Force rounding to 24-bit precision and 6-bit exponent 10523 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10524 predicate(UseSSE==0); 10525 match(Set dst (ConvD2F src)); 10526 format %{ "FST_S $dst,$src\t# F-round" %} 10527 expand %{ 10528 roundFloat_mem_reg(dst,src); 10529 %} 10530 %} 10531 10532 // Force rounding to 24-bit precision and 6-bit exponent 10533 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10534 predicate(UseSSE==1); 10535 match(Set dst (ConvD2F src)); 10536 effect( KILL cr ); 10537 format %{ "SUB ESP,4\n\t" 10538 "FST_S [ESP],$src\t# F-round\n\t" 10539 "MOVSS $dst,[ESP]\n\t" 10540 "ADD ESP,4" %} 10541 ins_encode %{ 10542 __ subptr(rsp, 4); 10543 if ($src$$reg != FPR1L_enc) { 10544 __ fld_s($src$$reg-1); 10545 __ fstp_s(Address(rsp, 0)); 10546 } else { 10547 __ fst_s(Address(rsp, 0)); 10548 } 10549 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10550 __ addptr(rsp, 4); 10551 %} 10552 ins_pipe( pipe_slow ); 10553 %} 10554 10555 // Force rounding double precision to single precision 10556 instruct convD2F_reg(regF dst, regD src) %{ 10557 predicate(UseSSE>=2); 10558 match(Set dst (ConvD2F src)); 10559 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10560 ins_encode %{ 10561 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10562 %} 10563 ins_pipe( pipe_slow ); 10564 %} 10565 10566 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10567 predicate(UseSSE==0); 10568 match(Set dst (ConvF2D src)); 10569 format %{ "FST_S $dst,$src\t# D-round" %} 10570 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10571 ins_pipe( fpu_reg_reg ); 10572 %} 10573 10574 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10575 predicate(UseSSE==1); 10576 match(Set dst (ConvF2D src)); 10577 format %{ "FST_D $dst,$src\t# D-round" %} 10578 expand %{ 10579 roundDouble_mem_reg(dst,src); 10580 %} 10581 %} 10582 10583 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10584 predicate(UseSSE==1); 10585 match(Set dst (ConvF2D src)); 10586 effect( KILL cr ); 10587 format %{ "SUB ESP,4\n\t" 10588 "MOVSS [ESP] $src\n\t" 10589 "FLD_S [ESP]\n\t" 10590 "ADD ESP,4\n\t" 10591 "FSTP $dst\t# D-round" %} 10592 ins_encode %{ 10593 __ subptr(rsp, 4); 10594 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10595 __ fld_s(Address(rsp, 0)); 10596 __ addptr(rsp, 4); 10597 __ fstp_d($dst$$reg); 10598 %} 10599 ins_pipe( pipe_slow ); 10600 %} 10601 10602 instruct convF2D_reg(regD dst, regF src) %{ 10603 predicate(UseSSE>=2); 10604 match(Set dst (ConvF2D src)); 10605 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10606 ins_encode %{ 10607 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10608 %} 10609 ins_pipe( pipe_slow ); 10610 %} 10611 10612 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10613 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10614 predicate(UseSSE<=1); 10615 match(Set dst (ConvD2I src)); 10616 effect( KILL tmp, KILL cr ); 10617 format %{ "FLD $src\t# Convert double to int \n\t" 10618 "FLDCW trunc mode\n\t" 10619 "SUB ESP,4\n\t" 10620 "FISTp [ESP + #0]\n\t" 10621 "FLDCW std/24-bit mode\n\t" 10622 "POP EAX\n\t" 10623 "CMP EAX,0x80000000\n\t" 10624 "JNE,s fast\n\t" 10625 "FLD_D $src\n\t" 10626 "CALL d2i_wrapper\n" 10627 "fast:" %} 10628 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10629 ins_pipe( pipe_slow ); 10630 %} 10631 10632 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10633 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10634 predicate(UseSSE>=2); 10635 match(Set dst (ConvD2I src)); 10636 effect( KILL tmp, KILL cr ); 10637 format %{ "CVTTSD2SI $dst, $src\n\t" 10638 "CMP $dst,0x80000000\n\t" 10639 "JNE,s fast\n\t" 10640 "SUB ESP, 8\n\t" 10641 "MOVSD [ESP], $src\n\t" 10642 "FLD_D [ESP]\n\t" 10643 "ADD ESP, 8\n\t" 10644 "CALL d2i_wrapper\n" 10645 "fast:" %} 10646 ins_encode %{ 10647 Label fast; 10648 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10649 __ cmpl($dst$$Register, 0x80000000); 10650 __ jccb(Assembler::notEqual, fast); 10651 __ subptr(rsp, 8); 10652 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10653 __ fld_d(Address(rsp, 0)); 10654 __ addptr(rsp, 8); 10655 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10656 __ post_call_nop(); 10657 __ bind(fast); 10658 %} 10659 ins_pipe( pipe_slow ); 10660 %} 10661 10662 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10663 predicate(UseSSE<=1); 10664 match(Set dst (ConvD2L src)); 10665 effect( KILL cr ); 10666 format %{ "FLD $src\t# Convert double to long\n\t" 10667 "FLDCW trunc mode\n\t" 10668 "SUB ESP,8\n\t" 10669 "FISTp [ESP + #0]\n\t" 10670 "FLDCW std/24-bit mode\n\t" 10671 "POP EAX\n\t" 10672 "POP EDX\n\t" 10673 "CMP EDX,0x80000000\n\t" 10674 "JNE,s fast\n\t" 10675 "TEST EAX,EAX\n\t" 10676 "JNE,s fast\n\t" 10677 "FLD $src\n\t" 10678 "CALL d2l_wrapper\n" 10679 "fast:" %} 10680 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10681 ins_pipe( pipe_slow ); 10682 %} 10683 10684 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10685 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10686 predicate (UseSSE>=2); 10687 match(Set dst (ConvD2L src)); 10688 effect( KILL cr ); 10689 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10690 "MOVSD [ESP],$src\n\t" 10691 "FLD_D [ESP]\n\t" 10692 "FLDCW trunc mode\n\t" 10693 "FISTp [ESP + #0]\n\t" 10694 "FLDCW std/24-bit mode\n\t" 10695 "POP EAX\n\t" 10696 "POP EDX\n\t" 10697 "CMP EDX,0x80000000\n\t" 10698 "JNE,s fast\n\t" 10699 "TEST EAX,EAX\n\t" 10700 "JNE,s fast\n\t" 10701 "SUB ESP,8\n\t" 10702 "MOVSD [ESP],$src\n\t" 10703 "FLD_D [ESP]\n\t" 10704 "ADD ESP,8\n\t" 10705 "CALL d2l_wrapper\n" 10706 "fast:" %} 10707 ins_encode %{ 10708 Label fast; 10709 __ subptr(rsp, 8); 10710 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10711 __ fld_d(Address(rsp, 0)); 10712 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10713 __ fistp_d(Address(rsp, 0)); 10714 // Restore the rounding mode, mask the exception 10715 if (Compile::current()->in_24_bit_fp_mode()) { 10716 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10717 } else { 10718 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10719 } 10720 // Load the converted long, adjust CPU stack 10721 __ pop(rax); 10722 __ pop(rdx); 10723 __ cmpl(rdx, 0x80000000); 10724 __ jccb(Assembler::notEqual, fast); 10725 __ testl(rax, rax); 10726 __ jccb(Assembler::notEqual, fast); 10727 __ subptr(rsp, 8); 10728 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10729 __ fld_d(Address(rsp, 0)); 10730 __ addptr(rsp, 8); 10731 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10732 __ post_call_nop(); 10733 __ bind(fast); 10734 %} 10735 ins_pipe( pipe_slow ); 10736 %} 10737 10738 // Convert a double to an int. Java semantics require we do complex 10739 // manglations in the corner cases. So we set the rounding mode to 10740 // 'zero', store the darned double down as an int, and reset the 10741 // rounding mode to 'nearest'. The hardware stores a flag value down 10742 // if we would overflow or converted a NAN; we check for this and 10743 // and go the slow path if needed. 10744 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10745 predicate(UseSSE==0); 10746 match(Set dst (ConvF2I src)); 10747 effect( KILL tmp, KILL cr ); 10748 format %{ "FLD $src\t# Convert float to int \n\t" 10749 "FLDCW trunc mode\n\t" 10750 "SUB ESP,4\n\t" 10751 "FISTp [ESP + #0]\n\t" 10752 "FLDCW std/24-bit mode\n\t" 10753 "POP EAX\n\t" 10754 "CMP EAX,0x80000000\n\t" 10755 "JNE,s fast\n\t" 10756 "FLD $src\n\t" 10757 "CALL d2i_wrapper\n" 10758 "fast:" %} 10759 // DPR2I_encoding works for FPR2I 10760 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10761 ins_pipe( pipe_slow ); 10762 %} 10763 10764 // Convert a float in xmm to an int reg. 10765 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10766 predicate(UseSSE>=1); 10767 match(Set dst (ConvF2I src)); 10768 effect( KILL tmp, KILL cr ); 10769 format %{ "CVTTSS2SI $dst, $src\n\t" 10770 "CMP $dst,0x80000000\n\t" 10771 "JNE,s fast\n\t" 10772 "SUB ESP, 4\n\t" 10773 "MOVSS [ESP], $src\n\t" 10774 "FLD [ESP]\n\t" 10775 "ADD ESP, 4\n\t" 10776 "CALL d2i_wrapper\n" 10777 "fast:" %} 10778 ins_encode %{ 10779 Label fast; 10780 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10781 __ cmpl($dst$$Register, 0x80000000); 10782 __ jccb(Assembler::notEqual, fast); 10783 __ subptr(rsp, 4); 10784 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10785 __ fld_s(Address(rsp, 0)); 10786 __ addptr(rsp, 4); 10787 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10788 __ post_call_nop(); 10789 __ bind(fast); 10790 %} 10791 ins_pipe( pipe_slow ); 10792 %} 10793 10794 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10795 predicate(UseSSE==0); 10796 match(Set dst (ConvF2L src)); 10797 effect( KILL cr ); 10798 format %{ "FLD $src\t# Convert float to long\n\t" 10799 "FLDCW trunc mode\n\t" 10800 "SUB ESP,8\n\t" 10801 "FISTp [ESP + #0]\n\t" 10802 "FLDCW std/24-bit mode\n\t" 10803 "POP EAX\n\t" 10804 "POP EDX\n\t" 10805 "CMP EDX,0x80000000\n\t" 10806 "JNE,s fast\n\t" 10807 "TEST EAX,EAX\n\t" 10808 "JNE,s fast\n\t" 10809 "FLD $src\n\t" 10810 "CALL d2l_wrapper\n" 10811 "fast:" %} 10812 // DPR2L_encoding works for FPR2L 10813 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10814 ins_pipe( pipe_slow ); 10815 %} 10816 10817 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10818 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10819 predicate (UseSSE>=1); 10820 match(Set dst (ConvF2L src)); 10821 effect( KILL cr ); 10822 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10823 "MOVSS [ESP],$src\n\t" 10824 "FLD_S [ESP]\n\t" 10825 "FLDCW trunc mode\n\t" 10826 "FISTp [ESP + #0]\n\t" 10827 "FLDCW std/24-bit mode\n\t" 10828 "POP EAX\n\t" 10829 "POP EDX\n\t" 10830 "CMP EDX,0x80000000\n\t" 10831 "JNE,s fast\n\t" 10832 "TEST EAX,EAX\n\t" 10833 "JNE,s fast\n\t" 10834 "SUB ESP,4\t# Convert float to long\n\t" 10835 "MOVSS [ESP],$src\n\t" 10836 "FLD_S [ESP]\n\t" 10837 "ADD ESP,4\n\t" 10838 "CALL d2l_wrapper\n" 10839 "fast:" %} 10840 ins_encode %{ 10841 Label fast; 10842 __ subptr(rsp, 8); 10843 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10844 __ fld_s(Address(rsp, 0)); 10845 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10846 __ fistp_d(Address(rsp, 0)); 10847 // Restore the rounding mode, mask the exception 10848 if (Compile::current()->in_24_bit_fp_mode()) { 10849 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10850 } else { 10851 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10852 } 10853 // Load the converted long, adjust CPU stack 10854 __ pop(rax); 10855 __ pop(rdx); 10856 __ cmpl(rdx, 0x80000000); 10857 __ jccb(Assembler::notEqual, fast); 10858 __ testl(rax, rax); 10859 __ jccb(Assembler::notEqual, fast); 10860 __ subptr(rsp, 4); 10861 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10862 __ fld_s(Address(rsp, 0)); 10863 __ addptr(rsp, 4); 10864 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10865 __ post_call_nop(); 10866 __ bind(fast); 10867 %} 10868 ins_pipe( pipe_slow ); 10869 %} 10870 10871 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10872 predicate( UseSSE<=1 ); 10873 match(Set dst (ConvI2D src)); 10874 format %{ "FILD $src\n\t" 10875 "FSTP $dst" %} 10876 opcode(0xDB, 0x0); /* DB /0 */ 10877 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10878 ins_pipe( fpu_reg_mem ); 10879 %} 10880 10881 instruct convI2D_reg(regD dst, rRegI src) %{ 10882 predicate( UseSSE>=2 && !UseXmmI2D ); 10883 match(Set dst (ConvI2D src)); 10884 format %{ "CVTSI2SD $dst,$src" %} 10885 ins_encode %{ 10886 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10887 %} 10888 ins_pipe( pipe_slow ); 10889 %} 10890 10891 instruct convI2D_mem(regD dst, memory mem) %{ 10892 predicate( UseSSE>=2 ); 10893 match(Set dst (ConvI2D (LoadI mem))); 10894 format %{ "CVTSI2SD $dst,$mem" %} 10895 ins_encode %{ 10896 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10897 %} 10898 ins_pipe( pipe_slow ); 10899 %} 10900 10901 instruct convXI2D_reg(regD dst, rRegI src) 10902 %{ 10903 predicate( UseSSE>=2 && UseXmmI2D ); 10904 match(Set dst (ConvI2D src)); 10905 10906 format %{ "MOVD $dst,$src\n\t" 10907 "CVTDQ2PD $dst,$dst\t# i2d" %} 10908 ins_encode %{ 10909 __ movdl($dst$$XMMRegister, $src$$Register); 10910 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10911 %} 10912 ins_pipe(pipe_slow); // XXX 10913 %} 10914 10915 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10916 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10917 match(Set dst (ConvI2D (LoadI mem))); 10918 format %{ "FILD $mem\n\t" 10919 "FSTP $dst" %} 10920 opcode(0xDB); /* DB /0 */ 10921 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10922 Pop_Reg_DPR(dst), ClearInstMark); 10923 ins_pipe( fpu_reg_mem ); 10924 %} 10925 10926 // Convert a byte to a float; no rounding step needed. 10927 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10928 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10929 match(Set dst (ConvI2F src)); 10930 format %{ "FILD $src\n\t" 10931 "FSTP $dst" %} 10932 10933 opcode(0xDB, 0x0); /* DB /0 */ 10934 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10935 ins_pipe( fpu_reg_mem ); 10936 %} 10937 10938 // In 24-bit mode, force exponent rounding by storing back out 10939 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10940 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10941 match(Set dst (ConvI2F src)); 10942 ins_cost(200); 10943 format %{ "FILD $src\n\t" 10944 "FSTP_S $dst" %} 10945 opcode(0xDB, 0x0); /* DB /0 */ 10946 ins_encode( Push_Mem_I(src), 10947 Pop_Mem_FPR(dst)); 10948 ins_pipe( fpu_mem_mem ); 10949 %} 10950 10951 // In 24-bit mode, force exponent rounding by storing back out 10952 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10953 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10954 match(Set dst (ConvI2F (LoadI mem))); 10955 ins_cost(200); 10956 format %{ "FILD $mem\n\t" 10957 "FSTP_S $dst" %} 10958 opcode(0xDB); /* DB /0 */ 10959 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10960 Pop_Mem_FPR(dst), ClearInstMark); 10961 ins_pipe( fpu_mem_mem ); 10962 %} 10963 10964 // This instruction does not round to 24-bits 10965 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 10966 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10967 match(Set dst (ConvI2F src)); 10968 format %{ "FILD $src\n\t" 10969 "FSTP $dst" %} 10970 opcode(0xDB, 0x0); /* DB /0 */ 10971 ins_encode( Push_Mem_I(src), 10972 Pop_Reg_FPR(dst)); 10973 ins_pipe( fpu_reg_mem ); 10974 %} 10975 10976 // This instruction does not round to 24-bits 10977 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 10978 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10979 match(Set dst (ConvI2F (LoadI mem))); 10980 format %{ "FILD $mem\n\t" 10981 "FSTP $dst" %} 10982 opcode(0xDB); /* DB /0 */ 10983 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), 10984 Pop_Reg_FPR(dst), ClearInstMark); 10985 ins_pipe( fpu_reg_mem ); 10986 %} 10987 10988 // Convert an int to a float in xmm; no rounding step needed. 10989 instruct convI2F_reg(regF dst, rRegI src) %{ 10990 predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F )); 10991 match(Set dst (ConvI2F src)); 10992 format %{ "CVTSI2SS $dst, $src" %} 10993 ins_encode %{ 10994 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 10995 %} 10996 ins_pipe( pipe_slow ); 10997 %} 10998 10999 instruct convXI2F_reg(regF dst, rRegI src) 11000 %{ 11001 predicate( UseSSE>=2 && UseXmmI2F ); 11002 match(Set dst (ConvI2F src)); 11003 11004 format %{ "MOVD $dst,$src\n\t" 11005 "CVTDQ2PS $dst,$dst\t# i2f" %} 11006 ins_encode %{ 11007 __ movdl($dst$$XMMRegister, $src$$Register); 11008 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11009 %} 11010 ins_pipe(pipe_slow); // XXX 11011 %} 11012 11013 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11014 match(Set dst (ConvI2L src)); 11015 effect(KILL cr); 11016 ins_cost(375); 11017 format %{ "MOV $dst.lo,$src\n\t" 11018 "MOV $dst.hi,$src\n\t" 11019 "SAR $dst.hi,31" %} 11020 ins_encode(convert_int_long(dst,src)); 11021 ins_pipe( ialu_reg_reg_long ); 11022 %} 11023 11024 // Zero-extend convert int to long 11025 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11026 match(Set dst (AndL (ConvI2L src) mask) ); 11027 effect( KILL flags ); 11028 ins_cost(250); 11029 format %{ "MOV $dst.lo,$src\n\t" 11030 "XOR $dst.hi,$dst.hi" %} 11031 opcode(0x33); // XOR 11032 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11033 ins_pipe( ialu_reg_reg_long ); 11034 %} 11035 11036 // Zero-extend long 11037 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11038 match(Set dst (AndL src mask) ); 11039 effect( KILL flags ); 11040 ins_cost(250); 11041 format %{ "MOV $dst.lo,$src.lo\n\t" 11042 "XOR $dst.hi,$dst.hi\n\t" %} 11043 opcode(0x33); // XOR 11044 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11045 ins_pipe( ialu_reg_reg_long ); 11046 %} 11047 11048 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11049 predicate (UseSSE<=1); 11050 match(Set dst (ConvL2D src)); 11051 effect( KILL cr ); 11052 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11053 "PUSH $src.lo\n\t" 11054 "FILD ST,[ESP + #0]\n\t" 11055 "ADD ESP,8\n\t" 11056 "FSTP_D $dst\t# D-round" %} 11057 opcode(0xDF, 0x5); /* DF /5 */ 11058 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11059 ins_pipe( pipe_slow ); 11060 %} 11061 11062 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11063 predicate (UseSSE>=2); 11064 match(Set dst (ConvL2D src)); 11065 effect( KILL cr ); 11066 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11067 "PUSH $src.lo\n\t" 11068 "FILD_D [ESP]\n\t" 11069 "FSTP_D [ESP]\n\t" 11070 "MOVSD $dst,[ESP]\n\t" 11071 "ADD ESP,8" %} 11072 opcode(0xDF, 0x5); /* DF /5 */ 11073 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11074 ins_pipe( pipe_slow ); 11075 %} 11076 11077 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11078 predicate (UseSSE>=1); 11079 match(Set dst (ConvL2F src)); 11080 effect( KILL cr ); 11081 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11082 "PUSH $src.lo\n\t" 11083 "FILD_D [ESP]\n\t" 11084 "FSTP_S [ESP]\n\t" 11085 "MOVSS $dst,[ESP]\n\t" 11086 "ADD ESP,8" %} 11087 opcode(0xDF, 0x5); /* DF /5 */ 11088 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11089 ins_pipe( pipe_slow ); 11090 %} 11091 11092 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11093 match(Set dst (ConvL2F src)); 11094 effect( KILL cr ); 11095 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11096 "PUSH $src.lo\n\t" 11097 "FILD ST,[ESP + #0]\n\t" 11098 "ADD ESP,8\n\t" 11099 "FSTP_S $dst\t# F-round" %} 11100 opcode(0xDF, 0x5); /* DF /5 */ 11101 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11102 ins_pipe( pipe_slow ); 11103 %} 11104 11105 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11106 match(Set dst (ConvL2I src)); 11107 effect( DEF dst, USE src ); 11108 format %{ "MOV $dst,$src.lo" %} 11109 ins_encode(enc_CopyL_Lo(dst,src)); 11110 ins_pipe( ialu_reg_reg ); 11111 %} 11112 11113 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11114 match(Set dst (MoveF2I src)); 11115 effect( DEF dst, USE src ); 11116 ins_cost(100); 11117 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11118 ins_encode %{ 11119 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11120 %} 11121 ins_pipe( ialu_reg_mem ); 11122 %} 11123 11124 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11125 predicate(UseSSE==0); 11126 match(Set dst (MoveF2I src)); 11127 effect( DEF dst, USE src ); 11128 11129 ins_cost(125); 11130 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11131 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11132 ins_pipe( fpu_mem_reg ); 11133 %} 11134 11135 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11136 predicate(UseSSE>=1); 11137 match(Set dst (MoveF2I src)); 11138 effect( DEF dst, USE src ); 11139 11140 ins_cost(95); 11141 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11142 ins_encode %{ 11143 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11144 %} 11145 ins_pipe( pipe_slow ); 11146 %} 11147 11148 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11149 predicate(UseSSE>=2); 11150 match(Set dst (MoveF2I src)); 11151 effect( DEF dst, USE src ); 11152 ins_cost(85); 11153 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11154 ins_encode %{ 11155 __ movdl($dst$$Register, $src$$XMMRegister); 11156 %} 11157 ins_pipe( pipe_slow ); 11158 %} 11159 11160 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11161 match(Set dst (MoveI2F src)); 11162 effect( DEF dst, USE src ); 11163 11164 ins_cost(100); 11165 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11166 ins_encode %{ 11167 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11168 %} 11169 ins_pipe( ialu_mem_reg ); 11170 %} 11171 11172 11173 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11174 predicate(UseSSE==0); 11175 match(Set dst (MoveI2F src)); 11176 effect(DEF dst, USE src); 11177 11178 ins_cost(125); 11179 format %{ "FLD_S $src\n\t" 11180 "FSTP $dst\t# MoveI2F_stack_reg" %} 11181 opcode(0xD9); /* D9 /0, FLD m32real */ 11182 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 11183 Pop_Reg_FPR(dst), ClearInstMark ); 11184 ins_pipe( fpu_reg_mem ); 11185 %} 11186 11187 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11188 predicate(UseSSE>=1); 11189 match(Set dst (MoveI2F src)); 11190 effect( DEF dst, USE src ); 11191 11192 ins_cost(95); 11193 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11194 ins_encode %{ 11195 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11196 %} 11197 ins_pipe( pipe_slow ); 11198 %} 11199 11200 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11201 predicate(UseSSE>=2); 11202 match(Set dst (MoveI2F src)); 11203 effect( DEF dst, USE src ); 11204 11205 ins_cost(85); 11206 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11207 ins_encode %{ 11208 __ movdl($dst$$XMMRegister, $src$$Register); 11209 %} 11210 ins_pipe( pipe_slow ); 11211 %} 11212 11213 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11214 match(Set dst (MoveD2L src)); 11215 effect(DEF dst, USE src); 11216 11217 ins_cost(250); 11218 format %{ "MOV $dst.lo,$src\n\t" 11219 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11220 opcode(0x8B, 0x8B); 11221 ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark); 11222 ins_pipe( ialu_mem_long_reg ); 11223 %} 11224 11225 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11226 predicate(UseSSE<=1); 11227 match(Set dst (MoveD2L src)); 11228 effect(DEF dst, USE src); 11229 11230 ins_cost(125); 11231 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11232 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11233 ins_pipe( fpu_mem_reg ); 11234 %} 11235 11236 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11237 predicate(UseSSE>=2); 11238 match(Set dst (MoveD2L src)); 11239 effect(DEF dst, USE src); 11240 ins_cost(95); 11241 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11242 ins_encode %{ 11243 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11244 %} 11245 ins_pipe( pipe_slow ); 11246 %} 11247 11248 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11249 predicate(UseSSE>=2); 11250 match(Set dst (MoveD2L src)); 11251 effect(DEF dst, USE src, TEMP tmp); 11252 ins_cost(85); 11253 format %{ "MOVD $dst.lo,$src\n\t" 11254 "PSHUFLW $tmp,$src,0x4E\n\t" 11255 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11256 ins_encode %{ 11257 __ movdl($dst$$Register, $src$$XMMRegister); 11258 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11259 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11260 %} 11261 ins_pipe( pipe_slow ); 11262 %} 11263 11264 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11265 match(Set dst (MoveL2D src)); 11266 effect(DEF dst, USE src); 11267 11268 ins_cost(200); 11269 format %{ "MOV $dst,$src.lo\n\t" 11270 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11271 opcode(0x89, 0x89); 11272 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); 11273 ins_pipe( ialu_mem_long_reg ); 11274 %} 11275 11276 11277 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11278 predicate(UseSSE<=1); 11279 match(Set dst (MoveL2D src)); 11280 effect(DEF dst, USE src); 11281 ins_cost(125); 11282 11283 format %{ "FLD_D $src\n\t" 11284 "FSTP $dst\t# MoveL2D_stack_reg" %} 11285 opcode(0xDD); /* DD /0, FLD m64real */ 11286 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), 11287 Pop_Reg_DPR(dst), ClearInstMark ); 11288 ins_pipe( fpu_reg_mem ); 11289 %} 11290 11291 11292 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11293 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11294 match(Set dst (MoveL2D src)); 11295 effect(DEF dst, USE src); 11296 11297 ins_cost(95); 11298 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11299 ins_encode %{ 11300 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11301 %} 11302 ins_pipe( pipe_slow ); 11303 %} 11304 11305 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11306 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11307 match(Set dst (MoveL2D src)); 11308 effect(DEF dst, USE src); 11309 11310 ins_cost(95); 11311 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11312 ins_encode %{ 11313 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11314 %} 11315 ins_pipe( pipe_slow ); 11316 %} 11317 11318 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11319 predicate(UseSSE>=2); 11320 match(Set dst (MoveL2D src)); 11321 effect(TEMP dst, USE src, TEMP tmp); 11322 ins_cost(85); 11323 format %{ "MOVD $dst,$src.lo\n\t" 11324 "MOVD $tmp,$src.hi\n\t" 11325 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11326 ins_encode %{ 11327 __ movdl($dst$$XMMRegister, $src$$Register); 11328 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11329 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11330 %} 11331 ins_pipe( pipe_slow ); 11332 %} 11333 11334 //----------------------------- CompressBits/ExpandBits ------------------------ 11335 11336 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11337 predicate(n->bottom_type()->isa_long()); 11338 match(Set dst (CompressBits src mask)); 11339 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11340 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11341 ins_encode %{ 11342 Label exit, partail_result; 11343 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11344 // Merge the results of upper and lower destination registers such that upper destination 11345 // results are contiguously laid out after the lower destination result. 11346 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11347 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11348 __ popcntl($rtmp$$Register, $mask$$Register); 11349 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11350 __ cmpl($rtmp$$Register, 32); 11351 __ jccb(Assembler::equal, exit); 11352 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11353 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11354 // Shift left the contents of upper destination register by true bit count of lower mask register 11355 // and merge with lower destination register. 11356 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11357 __ orl($dst$$Register, $rtmp$$Register); 11358 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11359 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11360 // since contents of upper destination have already been copied to lower destination 11361 // register. 11362 __ cmpl($rtmp$$Register, 0); 11363 __ jccb(Assembler::greater, partail_result); 11364 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11365 __ jmp(exit); 11366 __ bind(partail_result); 11367 // Perform right shift over upper destination register to move out bits already copied 11368 // to lower destination register. 11369 __ subl($rtmp$$Register, 32); 11370 __ negl($rtmp$$Register); 11371 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11372 __ bind(exit); 11373 %} 11374 ins_pipe( pipe_slow ); 11375 %} 11376 11377 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11378 predicate(n->bottom_type()->isa_long()); 11379 match(Set dst (ExpandBits src mask)); 11380 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11381 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11382 ins_encode %{ 11383 // Extraction operation sequentially reads the bits from source register starting from LSB 11384 // and lays them out into destination register at bit locations corresponding to true bits 11385 // in mask register. Thus number of source bits read are equal to combined true bit count 11386 // of mask register pair. 11387 Label exit, mask_clipping; 11388 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11389 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11390 __ popcntl($rtmp$$Register, $mask$$Register); 11391 // If true bit count of lower mask register is 32 then none of bit of lower source register 11392 // will feed to upper destination register. 11393 __ cmpl($rtmp$$Register, 32); 11394 __ jccb(Assembler::equal, exit); 11395 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11396 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11397 // Shift right the contents of lower source register to remove already consumed bits. 11398 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11399 // Extract the bits from lower source register starting from LSB under the influence 11400 // of upper mask register. 11401 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11402 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11403 __ subl($rtmp$$Register, 32); 11404 __ negl($rtmp$$Register); 11405 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11406 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11407 // Clear the set bits in upper mask register which have been used to extract the contents 11408 // from lower source register. 11409 __ bind(mask_clipping); 11410 __ blsrl($mask$$Register, $mask$$Register); 11411 __ decrementl($rtmp$$Register, 1); 11412 __ jccb(Assembler::greater, mask_clipping); 11413 // Starting from LSB extract the bits from upper source register under the influence of 11414 // remaining set bits in upper mask register. 11415 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11416 // Merge the partial results extracted from lower and upper source register bits. 11417 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11418 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11419 __ bind(exit); 11420 %} 11421 ins_pipe( pipe_slow ); 11422 %} 11423 11424 // ======================================================================= 11425 // Fast clearing of an array 11426 // Small non-constant length ClearArray for non-AVX512 targets. 11427 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11428 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11429 match(Set dummy (ClearArray cnt base)); 11430 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11431 11432 format %{ $$template 11433 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11434 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11435 $$emit$$"JG LARGE\n\t" 11436 $$emit$$"SHL ECX, 1\n\t" 11437 $$emit$$"DEC ECX\n\t" 11438 $$emit$$"JS DONE\t# Zero length\n\t" 11439 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11440 $$emit$$"DEC ECX\n\t" 11441 $$emit$$"JGE LOOP\n\t" 11442 $$emit$$"JMP DONE\n\t" 11443 $$emit$$"# LARGE:\n\t" 11444 if (UseFastStosb) { 11445 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11446 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11447 } else if (UseXMMForObjInit) { 11448 $$emit$$"MOV RDI,RAX\n\t" 11449 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11450 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11451 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11452 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11453 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11454 $$emit$$"ADD 0x40,RAX\n\t" 11455 $$emit$$"# L_zero_64_bytes:\n\t" 11456 $$emit$$"SUB 0x8,RCX\n\t" 11457 $$emit$$"JGE L_loop\n\t" 11458 $$emit$$"ADD 0x4,RCX\n\t" 11459 $$emit$$"JL L_tail\n\t" 11460 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11461 $$emit$$"ADD 0x20,RAX\n\t" 11462 $$emit$$"SUB 0x4,RCX\n\t" 11463 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11464 $$emit$$"ADD 0x4,RCX\n\t" 11465 $$emit$$"JLE L_end\n\t" 11466 $$emit$$"DEC RCX\n\t" 11467 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11468 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11469 $$emit$$"ADD 0x8,RAX\n\t" 11470 $$emit$$"DEC RCX\n\t" 11471 $$emit$$"JGE L_sloop\n\t" 11472 $$emit$$"# L_end:\n\t" 11473 } else { 11474 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11475 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11476 } 11477 $$emit$$"# DONE" 11478 %} 11479 ins_encode %{ 11480 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11481 $tmp$$XMMRegister, false, knoreg); 11482 %} 11483 ins_pipe( pipe_slow ); 11484 %} 11485 11486 // Small non-constant length ClearArray for AVX512 targets. 11487 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11488 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11489 match(Set dummy (ClearArray cnt base)); 11490 ins_cost(125); 11491 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11492 11493 format %{ $$template 11494 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11495 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11496 $$emit$$"JG LARGE\n\t" 11497 $$emit$$"SHL ECX, 1\n\t" 11498 $$emit$$"DEC ECX\n\t" 11499 $$emit$$"JS DONE\t# Zero length\n\t" 11500 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11501 $$emit$$"DEC ECX\n\t" 11502 $$emit$$"JGE LOOP\n\t" 11503 $$emit$$"JMP DONE\n\t" 11504 $$emit$$"# LARGE:\n\t" 11505 if (UseFastStosb) { 11506 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11507 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11508 } else if (UseXMMForObjInit) { 11509 $$emit$$"MOV RDI,RAX\n\t" 11510 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11511 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11512 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11513 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11514 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11515 $$emit$$"ADD 0x40,RAX\n\t" 11516 $$emit$$"# L_zero_64_bytes:\n\t" 11517 $$emit$$"SUB 0x8,RCX\n\t" 11518 $$emit$$"JGE L_loop\n\t" 11519 $$emit$$"ADD 0x4,RCX\n\t" 11520 $$emit$$"JL L_tail\n\t" 11521 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11522 $$emit$$"ADD 0x20,RAX\n\t" 11523 $$emit$$"SUB 0x4,RCX\n\t" 11524 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11525 $$emit$$"ADD 0x4,RCX\n\t" 11526 $$emit$$"JLE L_end\n\t" 11527 $$emit$$"DEC RCX\n\t" 11528 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11529 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11530 $$emit$$"ADD 0x8,RAX\n\t" 11531 $$emit$$"DEC RCX\n\t" 11532 $$emit$$"JGE L_sloop\n\t" 11533 $$emit$$"# L_end:\n\t" 11534 } else { 11535 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11536 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11537 } 11538 $$emit$$"# DONE" 11539 %} 11540 ins_encode %{ 11541 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11542 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11543 %} 11544 ins_pipe( pipe_slow ); 11545 %} 11546 11547 // Large non-constant length ClearArray for non-AVX512 targets. 11548 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11549 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11550 match(Set dummy (ClearArray cnt base)); 11551 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11552 format %{ $$template 11553 if (UseFastStosb) { 11554 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11555 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11556 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11557 } else if (UseXMMForObjInit) { 11558 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11559 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11560 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11561 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11562 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11563 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11564 $$emit$$"ADD 0x40,RAX\n\t" 11565 $$emit$$"# L_zero_64_bytes:\n\t" 11566 $$emit$$"SUB 0x8,RCX\n\t" 11567 $$emit$$"JGE L_loop\n\t" 11568 $$emit$$"ADD 0x4,RCX\n\t" 11569 $$emit$$"JL L_tail\n\t" 11570 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11571 $$emit$$"ADD 0x20,RAX\n\t" 11572 $$emit$$"SUB 0x4,RCX\n\t" 11573 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11574 $$emit$$"ADD 0x4,RCX\n\t" 11575 $$emit$$"JLE L_end\n\t" 11576 $$emit$$"DEC RCX\n\t" 11577 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11578 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11579 $$emit$$"ADD 0x8,RAX\n\t" 11580 $$emit$$"DEC RCX\n\t" 11581 $$emit$$"JGE L_sloop\n\t" 11582 $$emit$$"# L_end:\n\t" 11583 } else { 11584 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11585 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11586 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11587 } 11588 $$emit$$"# DONE" 11589 %} 11590 ins_encode %{ 11591 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11592 $tmp$$XMMRegister, true, knoreg); 11593 %} 11594 ins_pipe( pipe_slow ); 11595 %} 11596 11597 // Large non-constant length ClearArray for AVX512 targets. 11598 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11599 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11600 match(Set dummy (ClearArray cnt base)); 11601 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11602 format %{ $$template 11603 if (UseFastStosb) { 11604 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11605 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11606 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11607 } else if (UseXMMForObjInit) { 11608 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11609 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11610 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11611 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11612 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11613 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11614 $$emit$$"ADD 0x40,RAX\n\t" 11615 $$emit$$"# L_zero_64_bytes:\n\t" 11616 $$emit$$"SUB 0x8,RCX\n\t" 11617 $$emit$$"JGE L_loop\n\t" 11618 $$emit$$"ADD 0x4,RCX\n\t" 11619 $$emit$$"JL L_tail\n\t" 11620 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11621 $$emit$$"ADD 0x20,RAX\n\t" 11622 $$emit$$"SUB 0x4,RCX\n\t" 11623 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11624 $$emit$$"ADD 0x4,RCX\n\t" 11625 $$emit$$"JLE L_end\n\t" 11626 $$emit$$"DEC RCX\n\t" 11627 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11628 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11629 $$emit$$"ADD 0x8,RAX\n\t" 11630 $$emit$$"DEC RCX\n\t" 11631 $$emit$$"JGE L_sloop\n\t" 11632 $$emit$$"# L_end:\n\t" 11633 } else { 11634 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11635 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11636 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11637 } 11638 $$emit$$"# DONE" 11639 %} 11640 ins_encode %{ 11641 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11642 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11643 %} 11644 ins_pipe( pipe_slow ); 11645 %} 11646 11647 // Small constant length ClearArray for AVX512 targets. 11648 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11649 %{ 11650 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl()); 11651 match(Set dummy (ClearArray cnt base)); 11652 ins_cost(100); 11653 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11654 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11655 ins_encode %{ 11656 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11657 %} 11658 ins_pipe(pipe_slow); 11659 %} 11660 11661 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11662 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11663 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11664 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11665 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11666 11667 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11668 ins_encode %{ 11669 __ string_compare($str1$$Register, $str2$$Register, 11670 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11671 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11672 %} 11673 ins_pipe( pipe_slow ); 11674 %} 11675 11676 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11677 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11678 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11679 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11680 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11681 11682 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11683 ins_encode %{ 11684 __ string_compare($str1$$Register, $str2$$Register, 11685 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11686 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11687 %} 11688 ins_pipe( pipe_slow ); 11689 %} 11690 11691 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11692 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11693 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11694 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11695 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11696 11697 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11698 ins_encode %{ 11699 __ string_compare($str1$$Register, $str2$$Register, 11700 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11701 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11702 %} 11703 ins_pipe( pipe_slow ); 11704 %} 11705 11706 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11707 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11708 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11709 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11710 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11711 11712 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11713 ins_encode %{ 11714 __ string_compare($str1$$Register, $str2$$Register, 11715 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11716 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11717 %} 11718 ins_pipe( pipe_slow ); 11719 %} 11720 11721 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11722 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11723 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11724 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11725 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11726 11727 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11728 ins_encode %{ 11729 __ string_compare($str1$$Register, $str2$$Register, 11730 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11731 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11732 %} 11733 ins_pipe( pipe_slow ); 11734 %} 11735 11736 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11737 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11738 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11739 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11740 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11741 11742 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11743 ins_encode %{ 11744 __ string_compare($str1$$Register, $str2$$Register, 11745 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11746 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11747 %} 11748 ins_pipe( pipe_slow ); 11749 %} 11750 11751 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11752 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11753 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11754 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11755 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11756 11757 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11758 ins_encode %{ 11759 __ string_compare($str2$$Register, $str1$$Register, 11760 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11761 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11762 %} 11763 ins_pipe( pipe_slow ); 11764 %} 11765 11766 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11767 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11768 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11769 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11770 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11771 11772 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11773 ins_encode %{ 11774 __ string_compare($str2$$Register, $str1$$Register, 11775 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11776 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11777 %} 11778 ins_pipe( pipe_slow ); 11779 %} 11780 11781 // fast string equals 11782 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11783 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11784 predicate(!VM_Version::supports_avx512vlbw()); 11785 match(Set result (StrEquals (Binary str1 str2) cnt)); 11786 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11787 11788 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11789 ins_encode %{ 11790 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11791 $cnt$$Register, $result$$Register, $tmp3$$Register, 11792 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11793 %} 11794 11795 ins_pipe( pipe_slow ); 11796 %} 11797 11798 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11799 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11800 predicate(VM_Version::supports_avx512vlbw()); 11801 match(Set result (StrEquals (Binary str1 str2) cnt)); 11802 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11803 11804 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11805 ins_encode %{ 11806 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11807 $cnt$$Register, $result$$Register, $tmp3$$Register, 11808 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11809 %} 11810 11811 ins_pipe( pipe_slow ); 11812 %} 11813 11814 11815 // fast search of substring with known size. 11816 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11817 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11818 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11819 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11820 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11821 11822 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11823 ins_encode %{ 11824 int icnt2 = (int)$int_cnt2$$constant; 11825 if (icnt2 >= 16) { 11826 // IndexOf for constant substrings with size >= 16 elements 11827 // which don't need to be loaded through stack. 11828 __ string_indexofC8($str1$$Register, $str2$$Register, 11829 $cnt1$$Register, $cnt2$$Register, 11830 icnt2, $result$$Register, 11831 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11832 } else { 11833 // Small strings are loaded through stack if they cross page boundary. 11834 __ string_indexof($str1$$Register, $str2$$Register, 11835 $cnt1$$Register, $cnt2$$Register, 11836 icnt2, $result$$Register, 11837 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11838 } 11839 %} 11840 ins_pipe( pipe_slow ); 11841 %} 11842 11843 // fast search of substring with known size. 11844 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11845 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11846 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11847 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11848 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11849 11850 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11851 ins_encode %{ 11852 int icnt2 = (int)$int_cnt2$$constant; 11853 if (icnt2 >= 8) { 11854 // IndexOf for constant substrings with size >= 8 elements 11855 // which don't need to be loaded through stack. 11856 __ string_indexofC8($str1$$Register, $str2$$Register, 11857 $cnt1$$Register, $cnt2$$Register, 11858 icnt2, $result$$Register, 11859 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11860 } else { 11861 // Small strings are loaded through stack if they cross page boundary. 11862 __ string_indexof($str1$$Register, $str2$$Register, 11863 $cnt1$$Register, $cnt2$$Register, 11864 icnt2, $result$$Register, 11865 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11866 } 11867 %} 11868 ins_pipe( pipe_slow ); 11869 %} 11870 11871 // fast search of substring with known size. 11872 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11873 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11874 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11875 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11876 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11877 11878 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11879 ins_encode %{ 11880 int icnt2 = (int)$int_cnt2$$constant; 11881 if (icnt2 >= 8) { 11882 // IndexOf for constant substrings with size >= 8 elements 11883 // which don't need to be loaded through stack. 11884 __ string_indexofC8($str1$$Register, $str2$$Register, 11885 $cnt1$$Register, $cnt2$$Register, 11886 icnt2, $result$$Register, 11887 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11888 } else { 11889 // Small strings are loaded through stack if they cross page boundary. 11890 __ string_indexof($str1$$Register, $str2$$Register, 11891 $cnt1$$Register, $cnt2$$Register, 11892 icnt2, $result$$Register, 11893 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11894 } 11895 %} 11896 ins_pipe( pipe_slow ); 11897 %} 11898 11899 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11900 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11901 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11902 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11903 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11904 11905 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11906 ins_encode %{ 11907 __ string_indexof($str1$$Register, $str2$$Register, 11908 $cnt1$$Register, $cnt2$$Register, 11909 (-1), $result$$Register, 11910 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11911 %} 11912 ins_pipe( pipe_slow ); 11913 %} 11914 11915 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11916 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11917 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11918 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11919 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11920 11921 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11922 ins_encode %{ 11923 __ string_indexof($str1$$Register, $str2$$Register, 11924 $cnt1$$Register, $cnt2$$Register, 11925 (-1), $result$$Register, 11926 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11927 %} 11928 ins_pipe( pipe_slow ); 11929 %} 11930 11931 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11932 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11933 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11934 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11935 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11936 11937 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11938 ins_encode %{ 11939 __ string_indexof($str1$$Register, $str2$$Register, 11940 $cnt1$$Register, $cnt2$$Register, 11941 (-1), $result$$Register, 11942 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11943 %} 11944 ins_pipe( pipe_slow ); 11945 %} 11946 11947 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11948 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11949 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 11950 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11951 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11952 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11953 ins_encode %{ 11954 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11955 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11956 %} 11957 ins_pipe( pipe_slow ); 11958 %} 11959 11960 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11961 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11962 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 11963 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11964 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11965 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11966 ins_encode %{ 11967 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11968 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11969 %} 11970 ins_pipe( pipe_slow ); 11971 %} 11972 11973 11974 // fast array equals 11975 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11976 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11977 %{ 11978 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11979 match(Set result (AryEq ary1 ary2)); 11980 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11981 //ins_cost(300); 11982 11983 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11984 ins_encode %{ 11985 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11986 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11987 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11988 %} 11989 ins_pipe( pipe_slow ); 11990 %} 11991 11992 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11993 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11994 %{ 11995 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11996 match(Set result (AryEq ary1 ary2)); 11997 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11998 //ins_cost(300); 11999 12000 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12001 ins_encode %{ 12002 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12003 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12004 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12005 %} 12006 ins_pipe( pipe_slow ); 12007 %} 12008 12009 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12010 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12011 %{ 12012 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12013 match(Set result (AryEq ary1 ary2)); 12014 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12015 //ins_cost(300); 12016 12017 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12018 ins_encode %{ 12019 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12020 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12021 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12022 %} 12023 ins_pipe( pipe_slow ); 12024 %} 12025 12026 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12027 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12028 %{ 12029 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12030 match(Set result (AryEq ary1 ary2)); 12031 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12032 //ins_cost(300); 12033 12034 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12035 ins_encode %{ 12036 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12037 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12038 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12039 %} 12040 ins_pipe( pipe_slow ); 12041 %} 12042 12043 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12044 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12045 %{ 12046 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12047 match(Set result (CountPositives ary1 len)); 12048 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12049 12050 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12051 ins_encode %{ 12052 __ count_positives($ary1$$Register, $len$$Register, 12053 $result$$Register, $tmp3$$Register, 12054 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12055 %} 12056 ins_pipe( pipe_slow ); 12057 %} 12058 12059 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12060 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12061 %{ 12062 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12063 match(Set result (CountPositives ary1 len)); 12064 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12065 12066 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12067 ins_encode %{ 12068 __ count_positives($ary1$$Register, $len$$Register, 12069 $result$$Register, $tmp3$$Register, 12070 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12071 %} 12072 ins_pipe( pipe_slow ); 12073 %} 12074 12075 12076 // fast char[] to byte[] compression 12077 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12078 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12079 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12080 match(Set result (StrCompressedCopy src (Binary dst len))); 12081 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12082 12083 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12084 ins_encode %{ 12085 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12086 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12087 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12088 knoreg, knoreg); 12089 %} 12090 ins_pipe( pipe_slow ); 12091 %} 12092 12093 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12094 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12095 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12096 match(Set result (StrCompressedCopy src (Binary dst len))); 12097 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12098 12099 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12100 ins_encode %{ 12101 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12102 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12103 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12104 $ktmp1$$KRegister, $ktmp2$$KRegister); 12105 %} 12106 ins_pipe( pipe_slow ); 12107 %} 12108 12109 // fast byte[] to char[] inflation 12110 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12111 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12112 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12113 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12114 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12115 12116 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12117 ins_encode %{ 12118 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12119 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12120 %} 12121 ins_pipe( pipe_slow ); 12122 %} 12123 12124 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12125 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12126 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12127 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12128 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12129 12130 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12131 ins_encode %{ 12132 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12133 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12134 %} 12135 ins_pipe( pipe_slow ); 12136 %} 12137 12138 // encode char[] to byte[] in ISO_8859_1 12139 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12140 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12141 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12142 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12143 match(Set result (EncodeISOArray src (Binary dst len))); 12144 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12145 12146 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12147 ins_encode %{ 12148 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12149 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12150 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12151 %} 12152 ins_pipe( pipe_slow ); 12153 %} 12154 12155 // encode char[] to byte[] in ASCII 12156 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12157 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12158 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12159 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12160 match(Set result (EncodeISOArray src (Binary dst len))); 12161 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12162 12163 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12164 ins_encode %{ 12165 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12166 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12167 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12168 %} 12169 ins_pipe( pipe_slow ); 12170 %} 12171 12172 //----------Control Flow Instructions------------------------------------------ 12173 // Signed compare Instructions 12174 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12175 match(Set cr (CmpI op1 op2)); 12176 effect( DEF cr, USE op1, USE op2 ); 12177 format %{ "CMP $op1,$op2" %} 12178 opcode(0x3B); /* Opcode 3B /r */ 12179 ins_encode( OpcP, RegReg( op1, op2) ); 12180 ins_pipe( ialu_cr_reg_reg ); 12181 %} 12182 12183 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12184 match(Set cr (CmpI op1 op2)); 12185 effect( DEF cr, USE op1 ); 12186 format %{ "CMP $op1,$op2" %} 12187 opcode(0x81,0x07); /* Opcode 81 /7 */ 12188 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12189 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12190 ins_pipe( ialu_cr_reg_imm ); 12191 %} 12192 12193 // Cisc-spilled version of cmpI_eReg 12194 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12195 match(Set cr (CmpI op1 (LoadI op2))); 12196 12197 format %{ "CMP $op1,$op2" %} 12198 ins_cost(500); 12199 opcode(0x3B); /* Opcode 3B /r */ 12200 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12201 ins_pipe( ialu_cr_reg_mem ); 12202 %} 12203 12204 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12205 match(Set cr (CmpI src zero)); 12206 effect( DEF cr, USE src ); 12207 12208 format %{ "TEST $src,$src" %} 12209 opcode(0x85); 12210 ins_encode( OpcP, RegReg( src, src ) ); 12211 ins_pipe( ialu_cr_reg_imm ); 12212 %} 12213 12214 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12215 match(Set cr (CmpI (AndI src con) zero)); 12216 12217 format %{ "TEST $src,$con" %} 12218 opcode(0xF7,0x00); 12219 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12220 ins_pipe( ialu_cr_reg_imm ); 12221 %} 12222 12223 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12224 match(Set cr (CmpI (AndI src mem) zero)); 12225 12226 format %{ "TEST $src,$mem" %} 12227 opcode(0x85); 12228 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); 12229 ins_pipe( ialu_cr_reg_mem ); 12230 %} 12231 12232 // Unsigned compare Instructions; really, same as signed except they 12233 // produce an eFlagsRegU instead of eFlagsReg. 12234 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12235 match(Set cr (CmpU op1 op2)); 12236 12237 format %{ "CMPu $op1,$op2" %} 12238 opcode(0x3B); /* Opcode 3B /r */ 12239 ins_encode( OpcP, RegReg( op1, op2) ); 12240 ins_pipe( ialu_cr_reg_reg ); 12241 %} 12242 12243 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12244 match(Set cr (CmpU op1 op2)); 12245 12246 format %{ "CMPu $op1,$op2" %} 12247 opcode(0x81,0x07); /* Opcode 81 /7 */ 12248 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12249 ins_pipe( ialu_cr_reg_imm ); 12250 %} 12251 12252 // // Cisc-spilled version of cmpU_eReg 12253 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12254 match(Set cr (CmpU op1 (LoadI op2))); 12255 12256 format %{ "CMPu $op1,$op2" %} 12257 ins_cost(500); 12258 opcode(0x3B); /* Opcode 3B /r */ 12259 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12260 ins_pipe( ialu_cr_reg_mem ); 12261 %} 12262 12263 // // Cisc-spilled version of cmpU_eReg 12264 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12265 // match(Set cr (CmpU (LoadI op1) op2)); 12266 // 12267 // format %{ "CMPu $op1,$op2" %} 12268 // ins_cost(500); 12269 // opcode(0x39); /* Opcode 39 /r */ 12270 // ins_encode( OpcP, RegMem( op1, op2) ); 12271 //%} 12272 12273 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12274 match(Set cr (CmpU src zero)); 12275 12276 format %{ "TESTu $src,$src" %} 12277 opcode(0x85); 12278 ins_encode( OpcP, RegReg( src, src ) ); 12279 ins_pipe( ialu_cr_reg_imm ); 12280 %} 12281 12282 // Unsigned pointer compare Instructions 12283 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12284 match(Set cr (CmpP op1 op2)); 12285 12286 format %{ "CMPu $op1,$op2" %} 12287 opcode(0x3B); /* Opcode 3B /r */ 12288 ins_encode( OpcP, RegReg( op1, op2) ); 12289 ins_pipe( ialu_cr_reg_reg ); 12290 %} 12291 12292 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12293 match(Set cr (CmpP op1 op2)); 12294 12295 format %{ "CMPu $op1,$op2" %} 12296 opcode(0x81,0x07); /* Opcode 81 /7 */ 12297 ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark ); 12298 ins_pipe( ialu_cr_reg_imm ); 12299 %} 12300 12301 // // Cisc-spilled version of cmpP_eReg 12302 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12303 match(Set cr (CmpP op1 (LoadP op2))); 12304 12305 format %{ "CMPu $op1,$op2" %} 12306 ins_cost(500); 12307 opcode(0x3B); /* Opcode 3B /r */ 12308 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12309 ins_pipe( ialu_cr_reg_mem ); 12310 %} 12311 12312 // // Cisc-spilled version of cmpP_eReg 12313 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12314 // match(Set cr (CmpP (LoadP op1) op2)); 12315 // 12316 // format %{ "CMPu $op1,$op2" %} 12317 // ins_cost(500); 12318 // opcode(0x39); /* Opcode 39 /r */ 12319 // ins_encode( OpcP, RegMem( op1, op2) ); 12320 //%} 12321 12322 // Compare raw pointer (used in out-of-heap check). 12323 // Only works because non-oop pointers must be raw pointers 12324 // and raw pointers have no anti-dependencies. 12325 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12326 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12327 match(Set cr (CmpP op1 (LoadP op2))); 12328 12329 format %{ "CMPu $op1,$op2" %} 12330 opcode(0x3B); /* Opcode 3B /r */ 12331 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); 12332 ins_pipe( ialu_cr_reg_mem ); 12333 %} 12334 12335 // 12336 // This will generate a signed flags result. This should be ok 12337 // since any compare to a zero should be eq/neq. 12338 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12339 match(Set cr (CmpP src zero)); 12340 12341 format %{ "TEST $src,$src" %} 12342 opcode(0x85); 12343 ins_encode( OpcP, RegReg( src, src ) ); 12344 ins_pipe( ialu_cr_reg_imm ); 12345 %} 12346 12347 // Cisc-spilled version of testP_reg 12348 // This will generate a signed flags result. This should be ok 12349 // since any compare to a zero should be eq/neq. 12350 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12351 match(Set cr (CmpP (LoadP op) zero)); 12352 12353 format %{ "TEST $op,0xFFFFFFFF" %} 12354 ins_cost(500); 12355 opcode(0xF7); /* Opcode F7 /0 */ 12356 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark ); 12357 ins_pipe( ialu_cr_reg_imm ); 12358 %} 12359 12360 // Yanked all unsigned pointer compare operations. 12361 // Pointer compares are done with CmpP which is already unsigned. 12362 12363 //----------Max and Min-------------------------------------------------------- 12364 // Min Instructions 12365 //// 12366 // *** Min and Max using the conditional move are slower than the 12367 // *** branch version on a Pentium III. 12368 // // Conditional move for min 12369 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12370 // effect( USE_DEF op2, USE op1, USE cr ); 12371 // format %{ "CMOVlt $op2,$op1\t! min" %} 12372 // opcode(0x4C,0x0F); 12373 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12374 // ins_pipe( pipe_cmov_reg ); 12375 //%} 12376 // 12377 //// Min Register with Register (P6 version) 12378 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12379 // predicate(VM_Version::supports_cmov() ); 12380 // match(Set op2 (MinI op1 op2)); 12381 // ins_cost(200); 12382 // expand %{ 12383 // eFlagsReg cr; 12384 // compI_eReg(cr,op1,op2); 12385 // cmovI_reg_lt(op2,op1,cr); 12386 // %} 12387 //%} 12388 12389 // Min Register with Register (generic version) 12390 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12391 match(Set dst (MinI dst src)); 12392 effect(KILL flags); 12393 ins_cost(300); 12394 12395 format %{ "MIN $dst,$src" %} 12396 opcode(0xCC); 12397 ins_encode( min_enc(dst,src) ); 12398 ins_pipe( pipe_slow ); 12399 %} 12400 12401 // Max Register with Register 12402 // *** Min and Max using the conditional move are slower than the 12403 // *** branch version on a Pentium III. 12404 // // Conditional move for max 12405 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12406 // effect( USE_DEF op2, USE op1, USE cr ); 12407 // format %{ "CMOVgt $op2,$op1\t! max" %} 12408 // opcode(0x4F,0x0F); 12409 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12410 // ins_pipe( pipe_cmov_reg ); 12411 //%} 12412 // 12413 // // Max Register with Register (P6 version) 12414 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12415 // predicate(VM_Version::supports_cmov() ); 12416 // match(Set op2 (MaxI op1 op2)); 12417 // ins_cost(200); 12418 // expand %{ 12419 // eFlagsReg cr; 12420 // compI_eReg(cr,op1,op2); 12421 // cmovI_reg_gt(op2,op1,cr); 12422 // %} 12423 //%} 12424 12425 // Max Register with Register (generic version) 12426 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12427 match(Set dst (MaxI dst src)); 12428 effect(KILL flags); 12429 ins_cost(300); 12430 12431 format %{ "MAX $dst,$src" %} 12432 opcode(0xCC); 12433 ins_encode( max_enc(dst,src) ); 12434 ins_pipe( pipe_slow ); 12435 %} 12436 12437 // ============================================================================ 12438 // Counted Loop limit node which represents exact final iterator value. 12439 // Note: the resulting value should fit into integer range since 12440 // counted loops have limit check on overflow. 12441 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12442 match(Set limit (LoopLimit (Binary init limit) stride)); 12443 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12444 ins_cost(300); 12445 12446 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12447 ins_encode %{ 12448 int strd = (int)$stride$$constant; 12449 assert(strd != 1 && strd != -1, "sanity"); 12450 int m1 = (strd > 0) ? 1 : -1; 12451 // Convert limit to long (EAX:EDX) 12452 __ cdql(); 12453 // Convert init to long (init:tmp) 12454 __ movl($tmp$$Register, $init$$Register); 12455 __ sarl($tmp$$Register, 31); 12456 // $limit - $init 12457 __ subl($limit$$Register, $init$$Register); 12458 __ sbbl($limit_hi$$Register, $tmp$$Register); 12459 // + ($stride - 1) 12460 if (strd > 0) { 12461 __ addl($limit$$Register, (strd - 1)); 12462 __ adcl($limit_hi$$Register, 0); 12463 __ movl($tmp$$Register, strd); 12464 } else { 12465 __ addl($limit$$Register, (strd + 1)); 12466 __ adcl($limit_hi$$Register, -1); 12467 __ lneg($limit_hi$$Register, $limit$$Register); 12468 __ movl($tmp$$Register, -strd); 12469 } 12470 // signed division: (EAX:EDX) / pos_stride 12471 __ idivl($tmp$$Register); 12472 if (strd < 0) { 12473 // restore sign 12474 __ negl($tmp$$Register); 12475 } 12476 // (EAX) * stride 12477 __ mull($tmp$$Register); 12478 // + init (ignore upper bits) 12479 __ addl($limit$$Register, $init$$Register); 12480 %} 12481 ins_pipe( pipe_slow ); 12482 %} 12483 12484 // ============================================================================ 12485 // Branch Instructions 12486 // Jump Table 12487 instruct jumpXtnd(rRegI switch_val) %{ 12488 match(Jump switch_val); 12489 ins_cost(350); 12490 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12491 ins_encode %{ 12492 // Jump to Address(table_base + switch_reg) 12493 Address index(noreg, $switch_val$$Register, Address::times_1); 12494 __ jump(ArrayAddress($constantaddress, index), noreg); 12495 %} 12496 ins_pipe(pipe_jmp); 12497 %} 12498 12499 // Jump Direct - Label defines a relative address from JMP+1 12500 instruct jmpDir(label labl) %{ 12501 match(Goto); 12502 effect(USE labl); 12503 12504 ins_cost(300); 12505 format %{ "JMP $labl" %} 12506 size(5); 12507 ins_encode %{ 12508 Label* L = $labl$$label; 12509 __ jmp(*L, false); // Always long jump 12510 %} 12511 ins_pipe( pipe_jmp ); 12512 %} 12513 12514 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12515 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12516 match(If cop cr); 12517 effect(USE labl); 12518 12519 ins_cost(300); 12520 format %{ "J$cop $labl" %} 12521 size(6); 12522 ins_encode %{ 12523 Label* L = $labl$$label; 12524 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12525 %} 12526 ins_pipe( pipe_jcc ); 12527 %} 12528 12529 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12530 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12531 match(CountedLoopEnd cop cr); 12532 effect(USE labl); 12533 12534 ins_cost(300); 12535 format %{ "J$cop $labl\t# Loop end" %} 12536 size(6); 12537 ins_encode %{ 12538 Label* L = $labl$$label; 12539 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12540 %} 12541 ins_pipe( pipe_jcc ); 12542 %} 12543 12544 // Jump Direct Conditional - using unsigned comparison 12545 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12546 match(If cop cmp); 12547 effect(USE labl); 12548 12549 ins_cost(300); 12550 format %{ "J$cop,u $labl" %} 12551 size(6); 12552 ins_encode %{ 12553 Label* L = $labl$$label; 12554 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12555 %} 12556 ins_pipe(pipe_jcc); 12557 %} 12558 12559 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12560 match(If cop cmp); 12561 effect(USE labl); 12562 12563 ins_cost(200); 12564 format %{ "J$cop,u $labl" %} 12565 size(6); 12566 ins_encode %{ 12567 Label* L = $labl$$label; 12568 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12569 %} 12570 ins_pipe(pipe_jcc); 12571 %} 12572 12573 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12574 match(If cop cmp); 12575 effect(USE labl); 12576 12577 ins_cost(200); 12578 format %{ $$template 12579 if ($cop$$cmpcode == Assembler::notEqual) { 12580 $$emit$$"JP,u $labl\n\t" 12581 $$emit$$"J$cop,u $labl" 12582 } else { 12583 $$emit$$"JP,u done\n\t" 12584 $$emit$$"J$cop,u $labl\n\t" 12585 $$emit$$"done:" 12586 } 12587 %} 12588 ins_encode %{ 12589 Label* l = $labl$$label; 12590 if ($cop$$cmpcode == Assembler::notEqual) { 12591 __ jcc(Assembler::parity, *l, false); 12592 __ jcc(Assembler::notEqual, *l, false); 12593 } else if ($cop$$cmpcode == Assembler::equal) { 12594 Label done; 12595 __ jccb(Assembler::parity, done); 12596 __ jcc(Assembler::equal, *l, false); 12597 __ bind(done); 12598 } else { 12599 ShouldNotReachHere(); 12600 } 12601 %} 12602 ins_pipe(pipe_jcc); 12603 %} 12604 12605 // ============================================================================ 12606 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12607 // array for an instance of the superklass. Set a hidden internal cache on a 12608 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12609 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12610 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12611 match(Set result (PartialSubtypeCheck sub super)); 12612 effect( KILL rcx, KILL cr ); 12613 12614 ins_cost(1100); // slightly larger than the next version 12615 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12616 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12617 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12618 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12619 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12620 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12621 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12622 "miss:\t" %} 12623 12624 opcode(0x1); // Force a XOR of EDI 12625 ins_encode( enc_PartialSubtypeCheck() ); 12626 ins_pipe( pipe_slow ); 12627 %} 12628 12629 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12630 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12631 effect( KILL rcx, KILL result ); 12632 12633 ins_cost(1000); 12634 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12635 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12636 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12637 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12638 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12639 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12640 "miss:\t" %} 12641 12642 opcode(0x0); // No need to XOR EDI 12643 ins_encode( enc_PartialSubtypeCheck() ); 12644 ins_pipe( pipe_slow ); 12645 %} 12646 12647 // ============================================================================ 12648 // Branch Instructions -- short offset versions 12649 // 12650 // These instructions are used to replace jumps of a long offset (the default 12651 // match) with jumps of a shorter offset. These instructions are all tagged 12652 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12653 // match rules in general matching. Instead, the ADLC generates a conversion 12654 // method in the MachNode which can be used to do in-place replacement of the 12655 // long variant with the shorter variant. The compiler will determine if a 12656 // branch can be taken by the is_short_branch_offset() predicate in the machine 12657 // specific code section of the file. 12658 12659 // Jump Direct - Label defines a relative address from JMP+1 12660 instruct jmpDir_short(label labl) %{ 12661 match(Goto); 12662 effect(USE labl); 12663 12664 ins_cost(300); 12665 format %{ "JMP,s $labl" %} 12666 size(2); 12667 ins_encode %{ 12668 Label* L = $labl$$label; 12669 __ jmpb(*L); 12670 %} 12671 ins_pipe( pipe_jmp ); 12672 ins_short_branch(1); 12673 %} 12674 12675 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12676 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12677 match(If cop cr); 12678 effect(USE labl); 12679 12680 ins_cost(300); 12681 format %{ "J$cop,s $labl" %} 12682 size(2); 12683 ins_encode %{ 12684 Label* L = $labl$$label; 12685 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12686 %} 12687 ins_pipe( pipe_jcc ); 12688 ins_short_branch(1); 12689 %} 12690 12691 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12692 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12693 match(CountedLoopEnd cop cr); 12694 effect(USE labl); 12695 12696 ins_cost(300); 12697 format %{ "J$cop,s $labl\t# Loop end" %} 12698 size(2); 12699 ins_encode %{ 12700 Label* L = $labl$$label; 12701 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12702 %} 12703 ins_pipe( pipe_jcc ); 12704 ins_short_branch(1); 12705 %} 12706 12707 // Jump Direct Conditional - using unsigned comparison 12708 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12709 match(If cop cmp); 12710 effect(USE labl); 12711 12712 ins_cost(300); 12713 format %{ "J$cop,us $labl" %} 12714 size(2); 12715 ins_encode %{ 12716 Label* L = $labl$$label; 12717 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12718 %} 12719 ins_pipe( pipe_jcc ); 12720 ins_short_branch(1); 12721 %} 12722 12723 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12724 match(If cop cmp); 12725 effect(USE labl); 12726 12727 ins_cost(300); 12728 format %{ "J$cop,us $labl" %} 12729 size(2); 12730 ins_encode %{ 12731 Label* L = $labl$$label; 12732 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12733 %} 12734 ins_pipe( pipe_jcc ); 12735 ins_short_branch(1); 12736 %} 12737 12738 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12739 match(If cop cmp); 12740 effect(USE labl); 12741 12742 ins_cost(300); 12743 format %{ $$template 12744 if ($cop$$cmpcode == Assembler::notEqual) { 12745 $$emit$$"JP,u,s $labl\n\t" 12746 $$emit$$"J$cop,u,s $labl" 12747 } else { 12748 $$emit$$"JP,u,s done\n\t" 12749 $$emit$$"J$cop,u,s $labl\n\t" 12750 $$emit$$"done:" 12751 } 12752 %} 12753 size(4); 12754 ins_encode %{ 12755 Label* l = $labl$$label; 12756 if ($cop$$cmpcode == Assembler::notEqual) { 12757 __ jccb(Assembler::parity, *l); 12758 __ jccb(Assembler::notEqual, *l); 12759 } else if ($cop$$cmpcode == Assembler::equal) { 12760 Label done; 12761 __ jccb(Assembler::parity, done); 12762 __ jccb(Assembler::equal, *l); 12763 __ bind(done); 12764 } else { 12765 ShouldNotReachHere(); 12766 } 12767 %} 12768 ins_pipe(pipe_jcc); 12769 ins_short_branch(1); 12770 %} 12771 12772 // ============================================================================ 12773 // Long Compare 12774 // 12775 // Currently we hold longs in 2 registers. Comparing such values efficiently 12776 // is tricky. The flavor of compare used depends on whether we are testing 12777 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12778 // The GE test is the negated LT test. The LE test can be had by commuting 12779 // the operands (yielding a GE test) and then negating; negate again for the 12780 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12781 // NE test is negated from that. 12782 12783 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12784 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12785 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12786 // are collapsed internally in the ADLC's dfa-gen code. The match for 12787 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12788 // foo match ends up with the wrong leaf. One fix is to not match both 12789 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12790 // both forms beat the trinary form of long-compare and both are very useful 12791 // on Intel which has so few registers. 12792 12793 // Manifest a CmpL result in an integer register. Very painful. 12794 // This is the test to avoid. 12795 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12796 match(Set dst (CmpL3 src1 src2)); 12797 effect( KILL flags ); 12798 ins_cost(1000); 12799 format %{ "XOR $dst,$dst\n\t" 12800 "CMP $src1.hi,$src2.hi\n\t" 12801 "JLT,s m_one\n\t" 12802 "JGT,s p_one\n\t" 12803 "CMP $src1.lo,$src2.lo\n\t" 12804 "JB,s m_one\n\t" 12805 "JEQ,s done\n" 12806 "p_one:\tINC $dst\n\t" 12807 "JMP,s done\n" 12808 "m_one:\tDEC $dst\n" 12809 "done:" %} 12810 ins_encode %{ 12811 Label p_one, m_one, done; 12812 __ xorptr($dst$$Register, $dst$$Register); 12813 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12814 __ jccb(Assembler::less, m_one); 12815 __ jccb(Assembler::greater, p_one); 12816 __ cmpl($src1$$Register, $src2$$Register); 12817 __ jccb(Assembler::below, m_one); 12818 __ jccb(Assembler::equal, done); 12819 __ bind(p_one); 12820 __ incrementl($dst$$Register); 12821 __ jmpb(done); 12822 __ bind(m_one); 12823 __ decrementl($dst$$Register); 12824 __ bind(done); 12825 %} 12826 ins_pipe( pipe_slow ); 12827 %} 12828 12829 //====== 12830 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12831 // compares. Can be used for LE or GT compares by reversing arguments. 12832 // NOT GOOD FOR EQ/NE tests. 12833 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12834 match( Set flags (CmpL src zero )); 12835 ins_cost(100); 12836 format %{ "TEST $src.hi,$src.hi" %} 12837 opcode(0x85); 12838 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12839 ins_pipe( ialu_cr_reg_reg ); 12840 %} 12841 12842 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12843 // compares. Can be used for LE or GT compares by reversing arguments. 12844 // NOT GOOD FOR EQ/NE tests. 12845 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12846 match( Set flags (CmpL src1 src2 )); 12847 effect( TEMP tmp ); 12848 ins_cost(300); 12849 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12850 "MOV $tmp,$src1.hi\n\t" 12851 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12852 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12853 ins_pipe( ialu_cr_reg_reg ); 12854 %} 12855 12856 // Long compares reg < zero/req OR reg >= zero/req. 12857 // Just a wrapper for a normal branch, plus the predicate test. 12858 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12859 match(If cmp flags); 12860 effect(USE labl); 12861 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12862 expand %{ 12863 jmpCon(cmp,flags,labl); // JLT or JGE... 12864 %} 12865 %} 12866 12867 //====== 12868 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12869 // compares. Can be used for LE or GT compares by reversing arguments. 12870 // NOT GOOD FOR EQ/NE tests. 12871 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12872 match(Set flags (CmpUL src zero)); 12873 ins_cost(100); 12874 format %{ "TEST $src.hi,$src.hi" %} 12875 opcode(0x85); 12876 ins_encode(OpcP, RegReg_Hi2(src, src)); 12877 ins_pipe(ialu_cr_reg_reg); 12878 %} 12879 12880 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12881 // compares. Can be used for LE or GT compares by reversing arguments. 12882 // NOT GOOD FOR EQ/NE tests. 12883 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12884 match(Set flags (CmpUL src1 src2)); 12885 effect(TEMP tmp); 12886 ins_cost(300); 12887 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12888 "MOV $tmp,$src1.hi\n\t" 12889 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12890 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12891 ins_pipe(ialu_cr_reg_reg); 12892 %} 12893 12894 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12895 // Just a wrapper for a normal branch, plus the predicate test. 12896 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12897 match(If cmp flags); 12898 effect(USE labl); 12899 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12900 expand %{ 12901 jmpCon(cmp, flags, labl); // JLT or JGE... 12902 %} 12903 %} 12904 12905 // Compare 2 longs and CMOVE longs. 12906 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12907 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12908 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12909 ins_cost(400); 12910 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12911 "CMOV$cmp $dst.hi,$src.hi" %} 12912 opcode(0x0F,0x40); 12913 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12914 ins_pipe( pipe_cmov_reg_long ); 12915 %} 12916 12917 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12918 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12919 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12920 ins_cost(500); 12921 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12922 "CMOV$cmp $dst.hi,$src.hi" %} 12923 opcode(0x0F,0x40); 12924 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 12925 ins_pipe( pipe_cmov_reg_long ); 12926 %} 12927 12928 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 12929 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12930 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12931 ins_cost(400); 12932 expand %{ 12933 cmovLL_reg_LTGE(cmp, flags, dst, src); 12934 %} 12935 %} 12936 12937 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 12938 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12939 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12940 ins_cost(500); 12941 expand %{ 12942 cmovLL_mem_LTGE(cmp, flags, dst, src); 12943 %} 12944 %} 12945 12946 // Compare 2 longs and CMOVE ints. 12947 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12948 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12949 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12950 ins_cost(200); 12951 format %{ "CMOV$cmp $dst,$src" %} 12952 opcode(0x0F,0x40); 12953 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12954 ins_pipe( pipe_cmov_reg ); 12955 %} 12956 12957 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12958 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12959 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12960 ins_cost(250); 12961 format %{ "CMOV$cmp $dst,$src" %} 12962 opcode(0x0F,0x40); 12963 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 12964 ins_pipe( pipe_cmov_mem ); 12965 %} 12966 12967 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 12968 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12969 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12970 ins_cost(200); 12971 expand %{ 12972 cmovII_reg_LTGE(cmp, flags, dst, src); 12973 %} 12974 %} 12975 12976 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 12977 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12978 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12979 ins_cost(250); 12980 expand %{ 12981 cmovII_mem_LTGE(cmp, flags, dst, src); 12982 %} 12983 %} 12984 12985 // Compare 2 longs and CMOVE ptrs. 12986 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12987 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12988 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12989 ins_cost(200); 12990 format %{ "CMOV$cmp $dst,$src" %} 12991 opcode(0x0F,0x40); 12992 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12993 ins_pipe( pipe_cmov_reg ); 12994 %} 12995 12996 // Compare 2 unsigned longs and CMOVE ptrs. 12997 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 12998 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12999 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13000 ins_cost(200); 13001 expand %{ 13002 cmovPP_reg_LTGE(cmp,flags,dst,src); 13003 %} 13004 %} 13005 13006 // Compare 2 longs and CMOVE doubles 13007 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13008 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13009 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13010 ins_cost(200); 13011 expand %{ 13012 fcmovDPR_regS(cmp,flags,dst,src); 13013 %} 13014 %} 13015 13016 // Compare 2 longs and CMOVE doubles 13017 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13018 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13019 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13020 ins_cost(200); 13021 expand %{ 13022 fcmovD_regS(cmp,flags,dst,src); 13023 %} 13024 %} 13025 13026 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13027 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13028 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13029 ins_cost(200); 13030 expand %{ 13031 fcmovFPR_regS(cmp,flags,dst,src); 13032 %} 13033 %} 13034 13035 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13036 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13037 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13038 ins_cost(200); 13039 expand %{ 13040 fcmovF_regS(cmp,flags,dst,src); 13041 %} 13042 %} 13043 13044 //====== 13045 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13046 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13047 match( Set flags (CmpL src zero )); 13048 effect(TEMP tmp); 13049 ins_cost(200); 13050 format %{ "MOV $tmp,$src.lo\n\t" 13051 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13052 ins_encode( long_cmp_flags0( src, tmp ) ); 13053 ins_pipe( ialu_reg_reg_long ); 13054 %} 13055 13056 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13057 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13058 match( Set flags (CmpL src1 src2 )); 13059 ins_cost(200+300); 13060 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13061 "JNE,s skip\n\t" 13062 "CMP $src1.hi,$src2.hi\n\t" 13063 "skip:\t" %} 13064 ins_encode( long_cmp_flags1( src1, src2 ) ); 13065 ins_pipe( ialu_cr_reg_reg ); 13066 %} 13067 13068 // Long compare reg == zero/reg OR reg != zero/reg 13069 // Just a wrapper for a normal branch, plus the predicate test. 13070 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13071 match(If cmp flags); 13072 effect(USE labl); 13073 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13074 expand %{ 13075 jmpCon(cmp,flags,labl); // JEQ or JNE... 13076 %} 13077 %} 13078 13079 //====== 13080 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13081 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13082 match(Set flags (CmpUL src zero)); 13083 effect(TEMP tmp); 13084 ins_cost(200); 13085 format %{ "MOV $tmp,$src.lo\n\t" 13086 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13087 ins_encode(long_cmp_flags0(src, tmp)); 13088 ins_pipe(ialu_reg_reg_long); 13089 %} 13090 13091 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13092 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13093 match(Set flags (CmpUL src1 src2)); 13094 ins_cost(200+300); 13095 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13096 "JNE,s skip\n\t" 13097 "CMP $src1.hi,$src2.hi\n\t" 13098 "skip:\t" %} 13099 ins_encode(long_cmp_flags1(src1, src2)); 13100 ins_pipe(ialu_cr_reg_reg); 13101 %} 13102 13103 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13104 // Just a wrapper for a normal branch, plus the predicate test. 13105 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13106 match(If cmp flags); 13107 effect(USE labl); 13108 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13109 expand %{ 13110 jmpCon(cmp, flags, labl); // JEQ or JNE... 13111 %} 13112 %} 13113 13114 // Compare 2 longs and CMOVE longs. 13115 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13116 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13117 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13118 ins_cost(400); 13119 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13120 "CMOV$cmp $dst.hi,$src.hi" %} 13121 opcode(0x0F,0x40); 13122 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13123 ins_pipe( pipe_cmov_reg_long ); 13124 %} 13125 13126 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13127 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13128 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13129 ins_cost(500); 13130 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13131 "CMOV$cmp $dst.hi,$src.hi" %} 13132 opcode(0x0F,0x40); 13133 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 13134 ins_pipe( pipe_cmov_reg_long ); 13135 %} 13136 13137 // Compare 2 longs and CMOVE ints. 13138 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13139 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13140 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13141 ins_cost(200); 13142 format %{ "CMOV$cmp $dst,$src" %} 13143 opcode(0x0F,0x40); 13144 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13145 ins_pipe( pipe_cmov_reg ); 13146 %} 13147 13148 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13149 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13150 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13151 ins_cost(250); 13152 format %{ "CMOV$cmp $dst,$src" %} 13153 opcode(0x0F,0x40); 13154 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 13155 ins_pipe( pipe_cmov_mem ); 13156 %} 13157 13158 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13159 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13160 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13161 ins_cost(200); 13162 expand %{ 13163 cmovII_reg_EQNE(cmp, flags, dst, src); 13164 %} 13165 %} 13166 13167 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13168 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13169 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13170 ins_cost(250); 13171 expand %{ 13172 cmovII_mem_EQNE(cmp, flags, dst, src); 13173 %} 13174 %} 13175 13176 // Compare 2 longs and CMOVE ptrs. 13177 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13178 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13179 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13180 ins_cost(200); 13181 format %{ "CMOV$cmp $dst,$src" %} 13182 opcode(0x0F,0x40); 13183 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13184 ins_pipe( pipe_cmov_reg ); 13185 %} 13186 13187 // Compare 2 unsigned longs and CMOVE ptrs. 13188 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13189 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13190 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13191 ins_cost(200); 13192 expand %{ 13193 cmovPP_reg_EQNE(cmp,flags,dst,src); 13194 %} 13195 %} 13196 13197 // Compare 2 longs and CMOVE doubles 13198 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13199 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13200 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13201 ins_cost(200); 13202 expand %{ 13203 fcmovDPR_regS(cmp,flags,dst,src); 13204 %} 13205 %} 13206 13207 // Compare 2 longs and CMOVE doubles 13208 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13209 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13210 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13211 ins_cost(200); 13212 expand %{ 13213 fcmovD_regS(cmp,flags,dst,src); 13214 %} 13215 %} 13216 13217 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13218 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13219 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13220 ins_cost(200); 13221 expand %{ 13222 fcmovFPR_regS(cmp,flags,dst,src); 13223 %} 13224 %} 13225 13226 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13227 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13228 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13229 ins_cost(200); 13230 expand %{ 13231 fcmovF_regS(cmp,flags,dst,src); 13232 %} 13233 %} 13234 13235 //====== 13236 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13237 // Same as cmpL_reg_flags_LEGT except must negate src 13238 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13239 match( Set flags (CmpL src zero )); 13240 effect( TEMP tmp ); 13241 ins_cost(300); 13242 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13243 "CMP $tmp,$src.lo\n\t" 13244 "SBB $tmp,$src.hi\n\t" %} 13245 ins_encode( long_cmp_flags3(src, tmp) ); 13246 ins_pipe( ialu_reg_reg_long ); 13247 %} 13248 13249 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13250 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13251 // requires a commuted test to get the same result. 13252 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13253 match( Set flags (CmpL src1 src2 )); 13254 effect( TEMP tmp ); 13255 ins_cost(300); 13256 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13257 "MOV $tmp,$src2.hi\n\t" 13258 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13259 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13260 ins_pipe( ialu_cr_reg_reg ); 13261 %} 13262 13263 // Long compares reg < zero/req OR reg >= zero/req. 13264 // Just a wrapper for a normal branch, plus the predicate test 13265 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13266 match(If cmp flags); 13267 effect(USE labl); 13268 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13269 ins_cost(300); 13270 expand %{ 13271 jmpCon(cmp,flags,labl); // JGT or JLE... 13272 %} 13273 %} 13274 13275 //====== 13276 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13277 // Same as cmpUL_reg_flags_LEGT except must negate src 13278 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13279 match(Set flags (CmpUL src zero)); 13280 effect(TEMP tmp); 13281 ins_cost(300); 13282 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13283 "CMP $tmp,$src.lo\n\t" 13284 "SBB $tmp,$src.hi\n\t" %} 13285 ins_encode(long_cmp_flags3(src, tmp)); 13286 ins_pipe(ialu_reg_reg_long); 13287 %} 13288 13289 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13290 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13291 // requires a commuted test to get the same result. 13292 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13293 match(Set flags (CmpUL src1 src2)); 13294 effect(TEMP tmp); 13295 ins_cost(300); 13296 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13297 "MOV $tmp,$src2.hi\n\t" 13298 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13299 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13300 ins_pipe(ialu_cr_reg_reg); 13301 %} 13302 13303 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13304 // Just a wrapper for a normal branch, plus the predicate test 13305 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13306 match(If cmp flags); 13307 effect(USE labl); 13308 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13309 ins_cost(300); 13310 expand %{ 13311 jmpCon(cmp, flags, labl); // JGT or JLE... 13312 %} 13313 %} 13314 13315 // Compare 2 longs and CMOVE longs. 13316 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13317 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13318 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13319 ins_cost(400); 13320 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13321 "CMOV$cmp $dst.hi,$src.hi" %} 13322 opcode(0x0F,0x40); 13323 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13324 ins_pipe( pipe_cmov_reg_long ); 13325 %} 13326 13327 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13328 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13329 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13330 ins_cost(500); 13331 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13332 "CMOV$cmp $dst.hi,$src.hi+4" %} 13333 opcode(0x0F,0x40); 13334 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); 13335 ins_pipe( pipe_cmov_reg_long ); 13336 %} 13337 13338 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13339 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13340 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13341 ins_cost(400); 13342 expand %{ 13343 cmovLL_reg_LEGT(cmp, flags, dst, src); 13344 %} 13345 %} 13346 13347 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13348 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13349 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13350 ins_cost(500); 13351 expand %{ 13352 cmovLL_mem_LEGT(cmp, flags, dst, src); 13353 %} 13354 %} 13355 13356 // Compare 2 longs and CMOVE ints. 13357 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13358 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13359 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13360 ins_cost(200); 13361 format %{ "CMOV$cmp $dst,$src" %} 13362 opcode(0x0F,0x40); 13363 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13364 ins_pipe( pipe_cmov_reg ); 13365 %} 13366 13367 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13368 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13369 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13370 ins_cost(250); 13371 format %{ "CMOV$cmp $dst,$src" %} 13372 opcode(0x0F,0x40); 13373 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); 13374 ins_pipe( pipe_cmov_mem ); 13375 %} 13376 13377 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13378 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13379 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13380 ins_cost(200); 13381 expand %{ 13382 cmovII_reg_LEGT(cmp, flags, dst, src); 13383 %} 13384 %} 13385 13386 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13387 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13388 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13389 ins_cost(250); 13390 expand %{ 13391 cmovII_mem_LEGT(cmp, flags, dst, src); 13392 %} 13393 %} 13394 13395 // Compare 2 longs and CMOVE ptrs. 13396 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13397 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13398 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13399 ins_cost(200); 13400 format %{ "CMOV$cmp $dst,$src" %} 13401 opcode(0x0F,0x40); 13402 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13403 ins_pipe( pipe_cmov_reg ); 13404 %} 13405 13406 // Compare 2 unsigned longs and CMOVE ptrs. 13407 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13408 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13409 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13410 ins_cost(200); 13411 expand %{ 13412 cmovPP_reg_LEGT(cmp,flags,dst,src); 13413 %} 13414 %} 13415 13416 // Compare 2 longs and CMOVE doubles 13417 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13418 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13419 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13420 ins_cost(200); 13421 expand %{ 13422 fcmovDPR_regS(cmp,flags,dst,src); 13423 %} 13424 %} 13425 13426 // Compare 2 longs and CMOVE doubles 13427 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13428 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13429 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13430 ins_cost(200); 13431 expand %{ 13432 fcmovD_regS(cmp,flags,dst,src); 13433 %} 13434 %} 13435 13436 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13437 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13438 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13439 ins_cost(200); 13440 expand %{ 13441 fcmovFPR_regS(cmp,flags,dst,src); 13442 %} 13443 %} 13444 13445 13446 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13447 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13448 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13449 ins_cost(200); 13450 expand %{ 13451 fcmovF_regS(cmp,flags,dst,src); 13452 %} 13453 %} 13454 13455 13456 // ============================================================================ 13457 // Procedure Call/Return Instructions 13458 // Call Java Static Instruction 13459 // Note: If this code changes, the corresponding ret_addr_offset() and 13460 // compute_padding() functions will have to be adjusted. 13461 instruct CallStaticJavaDirect(method meth) %{ 13462 match(CallStaticJava); 13463 effect(USE meth); 13464 13465 ins_cost(300); 13466 format %{ "CALL,static " %} 13467 opcode(0xE8); /* E8 cd */ 13468 ins_encode( pre_call_resets, 13469 Java_Static_Call( meth ), 13470 call_epilog, 13471 post_call_FPU ); 13472 ins_pipe( pipe_slow ); 13473 ins_alignment(4); 13474 %} 13475 13476 // Call Java Dynamic Instruction 13477 // Note: If this code changes, the corresponding ret_addr_offset() and 13478 // compute_padding() functions will have to be adjusted. 13479 instruct CallDynamicJavaDirect(method meth) %{ 13480 match(CallDynamicJava); 13481 effect(USE meth); 13482 13483 ins_cost(300); 13484 format %{ "MOV EAX,(oop)-1\n\t" 13485 "CALL,dynamic" %} 13486 opcode(0xE8); /* E8 cd */ 13487 ins_encode( pre_call_resets, 13488 Java_Dynamic_Call( meth ), 13489 call_epilog, 13490 post_call_FPU ); 13491 ins_pipe( pipe_slow ); 13492 ins_alignment(4); 13493 %} 13494 13495 // Call Runtime Instruction 13496 instruct CallRuntimeDirect(method meth) %{ 13497 match(CallRuntime ); 13498 effect(USE meth); 13499 13500 ins_cost(300); 13501 format %{ "CALL,runtime " %} 13502 opcode(0xE8); /* E8 cd */ 13503 // Use FFREEs to clear entries in float stack 13504 ins_encode( pre_call_resets, 13505 FFree_Float_Stack_All, 13506 Java_To_Runtime( meth ), 13507 post_call_FPU ); 13508 ins_pipe( pipe_slow ); 13509 %} 13510 13511 // Call runtime without safepoint 13512 instruct CallLeafDirect(method meth) %{ 13513 match(CallLeaf); 13514 effect(USE meth); 13515 13516 ins_cost(300); 13517 format %{ "CALL_LEAF,runtime " %} 13518 opcode(0xE8); /* E8 cd */ 13519 ins_encode( pre_call_resets, 13520 FFree_Float_Stack_All, 13521 Java_To_Runtime( meth ), 13522 Verify_FPU_For_Leaf, post_call_FPU ); 13523 ins_pipe( pipe_slow ); 13524 %} 13525 13526 instruct CallLeafNoFPDirect(method meth) %{ 13527 match(CallLeafNoFP); 13528 effect(USE meth); 13529 13530 ins_cost(300); 13531 format %{ "CALL_LEAF_NOFP,runtime " %} 13532 opcode(0xE8); /* E8 cd */ 13533 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13534 ins_pipe( pipe_slow ); 13535 %} 13536 13537 13538 // Return Instruction 13539 // Remove the return address & jump to it. 13540 instruct Ret() %{ 13541 match(Return); 13542 format %{ "RET" %} 13543 opcode(0xC3); 13544 ins_encode(OpcP); 13545 ins_pipe( pipe_jmp ); 13546 %} 13547 13548 // Tail Call; Jump from runtime stub to Java code. 13549 // Also known as an 'interprocedural jump'. 13550 // Target of jump will eventually return to caller. 13551 // TailJump below removes the return address. 13552 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been 13553 // emitted just above the TailCall which has reset ebp to the caller state. 13554 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13555 match(TailCall jump_target method_ptr); 13556 ins_cost(300); 13557 format %{ "JMP $jump_target \t# EBX holds method" %} 13558 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13559 ins_encode( OpcP, RegOpc(jump_target) ); 13560 ins_pipe( pipe_jmp ); 13561 %} 13562 13563 13564 // Tail Jump; remove the return address; jump to target. 13565 // TailCall above leaves the return address around. 13566 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13567 match( TailJump jump_target ex_oop ); 13568 ins_cost(300); 13569 format %{ "POP EDX\t# pop return address into dummy\n\t" 13570 "JMP $jump_target " %} 13571 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13572 ins_encode( enc_pop_rdx, 13573 OpcP, RegOpc(jump_target) ); 13574 ins_pipe( pipe_jmp ); 13575 %} 13576 13577 // Forward exception. 13578 instruct ForwardExceptionjmp() 13579 %{ 13580 match(ForwardException); 13581 13582 format %{ "JMP forward_exception_stub" %} 13583 ins_encode %{ 13584 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg); 13585 %} 13586 ins_pipe(pipe_jmp); 13587 %} 13588 13589 // Create exception oop: created by stack-crawling runtime code. 13590 // Created exception is now available to this handler, and is setup 13591 // just prior to jumping to this handler. No code emitted. 13592 instruct CreateException( eAXRegP ex_oop ) 13593 %{ 13594 match(Set ex_oop (CreateEx)); 13595 13596 size(0); 13597 // use the following format syntax 13598 format %{ "# exception oop is in EAX; no code emitted" %} 13599 ins_encode(); 13600 ins_pipe( empty ); 13601 %} 13602 13603 13604 // Rethrow exception: 13605 // The exception oop will come in the first argument position. 13606 // Then JUMP (not call) to the rethrow stub code. 13607 instruct RethrowException() 13608 %{ 13609 match(Rethrow); 13610 13611 // use the following format syntax 13612 format %{ "JMP rethrow_stub" %} 13613 ins_encode(enc_rethrow); 13614 ins_pipe( pipe_jmp ); 13615 %} 13616 13617 // inlined locking and unlocking 13618 13619 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13620 predicate(LockingMode != LM_LIGHTWEIGHT); 13621 match(Set cr (FastLock object box)); 13622 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13623 ins_cost(300); 13624 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13625 ins_encode %{ 13626 __ get_thread($thread$$Register); 13627 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13628 $scr$$Register, noreg, noreg, $thread$$Register, nullptr); 13629 %} 13630 ins_pipe(pipe_slow); 13631 %} 13632 13633 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13634 predicate(LockingMode != LM_LIGHTWEIGHT); 13635 match(Set cr (FastUnlock object box)); 13636 effect(TEMP tmp, USE_KILL box); 13637 ins_cost(300); 13638 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13639 ins_encode %{ 13640 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); 13641 %} 13642 ins_pipe(pipe_slow); 13643 %} 13644 13645 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{ 13646 predicate(LockingMode == LM_LIGHTWEIGHT); 13647 match(Set cr (FastLock object box)); 13648 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread); 13649 ins_cost(300); 13650 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %} 13651 ins_encode %{ 13652 __ get_thread($thread$$Register); 13653 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13654 %} 13655 ins_pipe(pipe_slow); 13656 %} 13657 13658 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{ 13659 predicate(LockingMode == LM_LIGHTWEIGHT); 13660 match(Set cr (FastUnlock object eax_reg)); 13661 effect(TEMP tmp, USE_KILL eax_reg, TEMP thread); 13662 ins_cost(300); 13663 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %} 13664 ins_encode %{ 13665 __ get_thread($thread$$Register); 13666 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13667 %} 13668 ins_pipe(pipe_slow); 13669 %} 13670 13671 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13672 predicate(Matcher::vector_length(n) <= 32); 13673 match(Set dst (MaskAll src)); 13674 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13675 ins_encode %{ 13676 int mask_len = Matcher::vector_length(this); 13677 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13678 %} 13679 ins_pipe( pipe_slow ); 13680 %} 13681 13682 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13683 predicate(Matcher::vector_length(n) > 32); 13684 match(Set dst (MaskAll src)); 13685 effect(TEMP ktmp); 13686 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13687 ins_encode %{ 13688 int mask_len = Matcher::vector_length(this); 13689 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13690 %} 13691 ins_pipe( pipe_slow ); 13692 %} 13693 13694 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13695 predicate(Matcher::vector_length(n) > 32); 13696 match(Set dst (MaskAll src)); 13697 effect(TEMP ktmp); 13698 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13699 ins_encode %{ 13700 int mask_len = Matcher::vector_length(this); 13701 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13702 %} 13703 ins_pipe( pipe_slow ); 13704 %} 13705 13706 // ============================================================================ 13707 // Safepoint Instruction 13708 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13709 match(SafePoint poll); 13710 effect(KILL cr, USE poll); 13711 13712 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13713 ins_cost(125); 13714 // EBP would need size(3) 13715 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13716 ins_encode %{ 13717 __ set_inst_mark(); 13718 __ relocate(relocInfo::poll_type); 13719 __ clear_inst_mark(); 13720 address pre_pc = __ pc(); 13721 __ testl(rax, Address($poll$$Register, 0)); 13722 address post_pc = __ pc(); 13723 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13724 %} 13725 ins_pipe(ialu_reg_mem); 13726 %} 13727 13728 13729 // ============================================================================ 13730 // This name is KNOWN by the ADLC and cannot be changed. 13731 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13732 // for this guy. 13733 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13734 match(Set dst (ThreadLocal)); 13735 effect(DEF dst, KILL cr); 13736 13737 format %{ "MOV $dst, Thread::current()" %} 13738 ins_encode %{ 13739 Register dstReg = as_Register($dst$$reg); 13740 __ get_thread(dstReg); 13741 %} 13742 ins_pipe( ialu_reg_fat ); 13743 %} 13744 13745 13746 13747 //----------PEEPHOLE RULES----------------------------------------------------- 13748 // These must follow all instruction definitions as they use the names 13749 // defined in the instructions definitions. 13750 // 13751 // peepmatch ( root_instr_name [preceding_instruction]* ); 13752 // 13753 // peepconstraint %{ 13754 // (instruction_number.operand_name relational_op instruction_number.operand_name 13755 // [, ...] ); 13756 // // instruction numbers are zero-based using left to right order in peepmatch 13757 // 13758 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13759 // // provide an instruction_number.operand_name for each operand that appears 13760 // // in the replacement instruction's match rule 13761 // 13762 // ---------VM FLAGS--------------------------------------------------------- 13763 // 13764 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13765 // 13766 // Each peephole rule is given an identifying number starting with zero and 13767 // increasing by one in the order seen by the parser. An individual peephole 13768 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13769 // on the command-line. 13770 // 13771 // ---------CURRENT LIMITATIONS---------------------------------------------- 13772 // 13773 // Only match adjacent instructions in same basic block 13774 // Only equality constraints 13775 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13776 // Only one replacement instruction 13777 // 13778 // ---------EXAMPLE---------------------------------------------------------- 13779 // 13780 // // pertinent parts of existing instructions in architecture description 13781 // instruct movI(rRegI dst, rRegI src) %{ 13782 // match(Set dst (CopyI src)); 13783 // %} 13784 // 13785 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13786 // match(Set dst (AddI dst src)); 13787 // effect(KILL cr); 13788 // %} 13789 // 13790 // // Change (inc mov) to lea 13791 // peephole %{ 13792 // // increment preceded by register-register move 13793 // peepmatch ( incI_eReg movI ); 13794 // // require that the destination register of the increment 13795 // // match the destination register of the move 13796 // peepconstraint ( 0.dst == 1.dst ); 13797 // // construct a replacement instruction that sets 13798 // // the destination to ( move's source register + one ) 13799 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13800 // %} 13801 // 13802 // Implementation no longer uses movX instructions since 13803 // machine-independent system no longer uses CopyX nodes. 13804 // 13805 // peephole %{ 13806 // peepmatch ( incI_eReg movI ); 13807 // peepconstraint ( 0.dst == 1.dst ); 13808 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13809 // %} 13810 // 13811 // peephole %{ 13812 // peepmatch ( decI_eReg movI ); 13813 // peepconstraint ( 0.dst == 1.dst ); 13814 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13815 // %} 13816 // 13817 // peephole %{ 13818 // peepmatch ( addI_eReg_imm movI ); 13819 // peepconstraint ( 0.dst == 1.dst ); 13820 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13821 // %} 13822 // 13823 // peephole %{ 13824 // peepmatch ( addP_eReg_imm movP ); 13825 // peepconstraint ( 0.dst == 1.dst ); 13826 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13827 // %} 13828 13829 // // Change load of spilled value to only a spill 13830 // instruct storeI(memory mem, rRegI src) %{ 13831 // match(Set mem (StoreI mem src)); 13832 // %} 13833 // 13834 // instruct loadI(rRegI dst, memory mem) %{ 13835 // match(Set dst (LoadI mem)); 13836 // %} 13837 // 13838 peephole %{ 13839 peepmatch ( loadI storeI ); 13840 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13841 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13842 %} 13843 13844 //----------SMARTSPILL RULES--------------------------------------------------- 13845 // These must follow all instruction definitions as they use the names 13846 // defined in the instructions definitions.