1 // 2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ _masm. 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 cbuf.insts()->emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 cbuf.insts()->emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(CodeBuffer &cbuf, int code) { 353 cbuf.insts()->emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 358 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 359 emit_opcode(cbuf, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(CodeBuffer &cbuf, int d8) { 364 cbuf.insts()->emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(CodeBuffer &cbuf, int d16) { 369 cbuf.insts()->emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(CodeBuffer &cbuf, int d32) { 374 cbuf.insts()->emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 379 int format) { 380 cbuf.relocate(cbuf.insts_mark(), reloc, format); 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 cbuf.relocate(cbuf.insts_mark(), rspec, format); 393 cbuf.insts()->emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 398 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (cbuf, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(cbuf, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(cbuf, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(cbuf, 0x1, reg_encoding, base); 423 emit_d8(cbuf, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (cbuf, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(cbuf, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (cbuf, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 450 emit_rm(cbuf, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 456 emit_rm(cbuf, scale, index, base); 457 emit_d8(cbuf, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 462 emit_rm(cbuf, scale, index, 0x04); 463 } else { 464 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 465 emit_rm(cbuf, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (cbuf, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( cbuf, 0x8B ); 483 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler& _masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 C2_MacroAssembler _masm(&cbuf); 613 614 int framesize = C->output()->frame_size_in_bytes(); 615 int bangsize = C->output()->bang_size_in_bytes(); 616 617 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 618 619 C->output()->set_frame_complete(cbuf.insts_size()); 620 621 if (C->has_mach_constant_base_node()) { 622 // NOTE: We set the table base offset here because users might be 623 // emitted before MachConstantBaseNode. 624 ConstantTable& constant_table = C->output()->constant_table(); 625 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 626 } 627 } 628 629 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 630 return MachNode::size(ra_); // too many variables; just compute it the hard way 631 } 632 633 int MachPrologNode::reloc() const { 634 return 0; // a large enough number 635 } 636 637 //============================================================================= 638 #ifndef PRODUCT 639 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 640 Compile *C = ra_->C; 641 int framesize = C->output()->frame_size_in_bytes(); 642 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 643 // Remove two words for return addr and rbp, 644 framesize -= 2*wordSize; 645 646 if (C->max_vector_size() > 16) { 647 st->print("VZEROUPPER"); 648 st->cr(); st->print("\t"); 649 } 650 if (C->in_24_bit_fp_mode()) { 651 st->print("FLDCW standard control word"); 652 st->cr(); st->print("\t"); 653 } 654 if (framesize) { 655 st->print("ADD ESP,%d\t# Destroy frame",framesize); 656 st->cr(); st->print("\t"); 657 } 658 st->print_cr("POPL EBP"); st->print("\t"); 659 if (do_polling() && C->is_method_compilation()) { 660 st->print("CMPL rsp, poll_offset[thread] \n\t" 661 "JA #safepoint_stub\t" 662 "# Safepoint: poll for GC"); 663 } 664 } 665 #endif 666 667 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 668 Compile *C = ra_->C; 669 MacroAssembler _masm(&cbuf); 670 671 if (C->max_vector_size() > 16) { 672 // Clear upper bits of YMM registers when current compiled code uses 673 // wide vectors to avoid AVX <-> SSE transition penalty during call. 674 _masm.vzeroupper(); 675 } 676 // If method set FPU control word, restore to standard control word 677 if (C->in_24_bit_fp_mode()) { 678 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 679 } 680 681 int framesize = C->output()->frame_size_in_bytes(); 682 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 683 // Remove two words for return addr and rbp, 684 framesize -= 2*wordSize; 685 686 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 687 688 if (framesize >= 128) { 689 emit_opcode(cbuf, 0x81); // add SP, #framesize 690 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 691 emit_d32(cbuf, framesize); 692 } else if (framesize) { 693 emit_opcode(cbuf, 0x83); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d8(cbuf, framesize); 696 } 697 698 emit_opcode(cbuf, 0x58 | EBP_enc); 699 700 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 701 __ reserved_stack_check(); 702 } 703 704 if (do_polling() && C->is_method_compilation()) { 705 Register thread = as_Register(EBX_enc); 706 MacroAssembler masm(&cbuf); 707 __ get_thread(thread); 708 Label dummy_label; 709 Label* code_stub = &dummy_label; 710 if (!C->output()->in_scratch_emit_size()) { 711 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 712 C->output()->add_stub(stub); 713 code_stub = &stub->entry(); 714 } 715 __ relocate(relocInfo::poll_return_type); 716 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 717 } 718 } 719 720 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 721 return MachNode::size(ra_); // too many variables; just compute it 722 // the hard way 723 } 724 725 int MachEpilogNode::reloc() const { 726 return 0; // a large enough number 727 } 728 729 const Pipeline * MachEpilogNode::pipeline() const { 730 return MachNode::pipeline_class(); 731 } 732 733 //============================================================================= 734 735 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 736 static enum RC rc_class( OptoReg::Name reg ) { 737 738 if( !OptoReg::is_valid(reg) ) return rc_bad; 739 if (OptoReg::is_stack(reg)) return rc_stack; 740 741 VMReg r = OptoReg::as_VMReg(reg); 742 if (r->is_Register()) return rc_int; 743 if (r->is_FloatRegister()) { 744 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 745 return rc_float; 746 } 747 if (r->is_KRegister()) return rc_kreg; 748 assert(r->is_XMMRegister(), "must be"); 749 return rc_xmm; 750 } 751 752 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 753 int opcode, const char *op_str, int size, outputStream* st ) { 754 if( cbuf ) { 755 emit_opcode (*cbuf, opcode ); 756 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 757 #ifndef PRODUCT 758 } else if( !do_size ) { 759 if( size != 0 ) st->print("\n\t"); 760 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 761 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 762 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 763 } else { // FLD, FST, PUSH, POP 764 st->print("%s [ESP + #%d]",op_str,offset); 765 } 766 #endif 767 } 768 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 769 return size+3+offset_size; 770 } 771 772 // Helper for XMM registers. Extra opcode bits, limited syntax. 773 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 774 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 775 int in_size_in_bits = Assembler::EVEX_32bit; 776 int evex_encoding = 0; 777 if (reg_lo+1 == reg_hi) { 778 in_size_in_bits = Assembler::EVEX_64bit; 779 evex_encoding = Assembler::VEX_W; 780 } 781 if (cbuf) { 782 MacroAssembler _masm(cbuf); 783 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 784 // it maps more cases to single byte displacement 785 _masm.set_managed(); 786 if (reg_lo+1 == reg_hi) { // double move? 787 if (is_load) { 788 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 789 } else { 790 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 791 } 792 } else { 793 if (is_load) { 794 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } 799 #ifndef PRODUCT 800 } else if (!do_size) { 801 if (size != 0) st->print("\n\t"); 802 if (reg_lo+1 == reg_hi) { // double move? 803 if (is_load) st->print("%s %s,[ESP + #%d]", 804 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 805 Matcher::regName[reg_lo], offset); 806 else st->print("MOVSD [ESP + #%d],%s", 807 offset, Matcher::regName[reg_lo]); 808 } else { 809 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 810 Matcher::regName[reg_lo], offset); 811 else st->print("MOVSS [ESP + #%d],%s", 812 offset, Matcher::regName[reg_lo]); 813 } 814 #endif 815 } 816 bool is_single_byte = false; 817 if ((UseAVX > 2) && (offset != 0)) { 818 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 819 } 820 int offset_size = 0; 821 if (UseAVX > 2 ) { 822 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 823 } else { 824 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 825 } 826 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 827 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 828 return size+5+offset_size; 829 } 830 831 832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 833 int src_hi, int dst_hi, int size, outputStream* st ) { 834 if (cbuf) { 835 MacroAssembler _masm(cbuf); 836 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 837 _masm.set_managed(); 838 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 839 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 840 as_XMMRegister(Matcher::_regEncode[src_lo])); 841 } else { 842 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 843 as_XMMRegister(Matcher::_regEncode[src_lo])); 844 } 845 #ifndef PRODUCT 846 } else if (!do_size) { 847 if (size != 0) st->print("\n\t"); 848 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 849 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 850 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 851 } else { 852 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 853 } 854 } else { 855 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 856 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } 861 #endif 862 } 863 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 864 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 865 int sz = (UseAVX > 2) ? 6 : 4; 866 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 867 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 868 return size + sz; 869 } 870 871 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 872 int src_hi, int dst_hi, int size, outputStream* st ) { 873 // 32-bit 874 if (cbuf) { 875 MacroAssembler _masm(cbuf); 876 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 877 _masm.set_managed(); 878 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 879 as_Register(Matcher::_regEncode[src_lo])); 880 #ifndef PRODUCT 881 } else if (!do_size) { 882 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 883 #endif 884 } 885 return (UseAVX> 2) ? 6 : 4; 886 } 887 888 889 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 890 int src_hi, int dst_hi, int size, outputStream* st ) { 891 // 32-bit 892 if (cbuf) { 893 MacroAssembler _masm(cbuf); 894 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 895 _masm.set_managed(); 896 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 897 as_XMMRegister(Matcher::_regEncode[src_lo])); 898 #ifndef PRODUCT 899 } else if (!do_size) { 900 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 901 #endif 902 } 903 return (UseAVX> 2) ? 6 : 4; 904 } 905 906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 907 if( cbuf ) { 908 emit_opcode(*cbuf, 0x8B ); 909 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 910 #ifndef PRODUCT 911 } else if( !do_size ) { 912 if( size != 0 ) st->print("\n\t"); 913 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 914 #endif 915 } 916 return size+2; 917 } 918 919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 920 int offset, int size, outputStream* st ) { 921 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 922 if( cbuf ) { 923 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 924 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 925 #ifndef PRODUCT 926 } else if( !do_size ) { 927 if( size != 0 ) st->print("\n\t"); 928 st->print("FLD %s",Matcher::regName[src_lo]); 929 #endif 930 } 931 size += 2; 932 } 933 934 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 935 const char *op_str; 936 int op; 937 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 938 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 939 op = 0xDD; 940 } else { // 32-bit store 941 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 942 op = 0xD9; 943 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 944 } 945 946 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 947 } 948 949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 950 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 951 int src_hi, int dst_hi, uint ireg, outputStream* st); 952 953 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 954 int stack_offset, int reg, uint ireg, outputStream* st); 955 956 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 957 int dst_offset, uint ireg, outputStream* st) { 958 if (cbuf) { 959 MacroAssembler _masm(cbuf); 960 switch (ireg) { 961 case Op_VecS: 962 __ pushl(Address(rsp, src_offset)); 963 __ popl (Address(rsp, dst_offset)); 964 break; 965 case Op_VecD: 966 __ pushl(Address(rsp, src_offset)); 967 __ popl (Address(rsp, dst_offset)); 968 __ pushl(Address(rsp, src_offset+4)); 969 __ popl (Address(rsp, dst_offset+4)); 970 break; 971 case Op_VecX: 972 __ movdqu(Address(rsp, -16), xmm0); 973 __ movdqu(xmm0, Address(rsp, src_offset)); 974 __ movdqu(Address(rsp, dst_offset), xmm0); 975 __ movdqu(xmm0, Address(rsp, -16)); 976 break; 977 case Op_VecY: 978 __ vmovdqu(Address(rsp, -32), xmm0); 979 __ vmovdqu(xmm0, Address(rsp, src_offset)); 980 __ vmovdqu(Address(rsp, dst_offset), xmm0); 981 __ vmovdqu(xmm0, Address(rsp, -32)); 982 break; 983 case Op_VecZ: 984 __ evmovdquq(Address(rsp, -64), xmm0, 2); 985 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 986 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 987 __ evmovdquq(xmm0, Address(rsp, -64), 2); 988 break; 989 default: 990 ShouldNotReachHere(); 991 } 992 #ifndef PRODUCT 993 } else { 994 switch (ireg) { 995 case Op_VecS: 996 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 997 "popl [rsp + #%d]", 998 src_offset, dst_offset); 999 break; 1000 case Op_VecD: 1001 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1002 "popq [rsp + #%d]\n\t" 1003 "pushl [rsp + #%d]\n\t" 1004 "popq [rsp + #%d]", 1005 src_offset, dst_offset, src_offset+4, dst_offset+4); 1006 break; 1007 case Op_VecX: 1008 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1009 "movdqu xmm0, [rsp + #%d]\n\t" 1010 "movdqu [rsp + #%d], xmm0\n\t" 1011 "movdqu xmm0, [rsp - #16]", 1012 src_offset, dst_offset); 1013 break; 1014 case Op_VecY: 1015 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1016 "vmovdqu xmm0, [rsp + #%d]\n\t" 1017 "vmovdqu [rsp + #%d], xmm0\n\t" 1018 "vmovdqu xmm0, [rsp - #32]", 1019 src_offset, dst_offset); 1020 break; 1021 case Op_VecZ: 1022 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1023 "vmovdqu xmm0, [rsp + #%d]\n\t" 1024 "vmovdqu [rsp + #%d], xmm0\n\t" 1025 "vmovdqu xmm0, [rsp - #64]", 1026 src_offset, dst_offset); 1027 break; 1028 default: 1029 ShouldNotReachHere(); 1030 } 1031 #endif 1032 } 1033 } 1034 1035 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1036 // Get registers to move 1037 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1038 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1039 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1040 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1041 1042 enum RC src_second_rc = rc_class(src_second); 1043 enum RC src_first_rc = rc_class(src_first); 1044 enum RC dst_second_rc = rc_class(dst_second); 1045 enum RC dst_first_rc = rc_class(dst_first); 1046 1047 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1048 1049 // Generate spill code! 1050 int size = 0; 1051 1052 if( src_first == dst_first && src_second == dst_second ) 1053 return size; // Self copy, no move 1054 1055 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) { 1056 uint ireg = ideal_reg(); 1057 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1058 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1059 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1060 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1061 // mem -> mem 1062 int src_offset = ra_->reg2offset(src_first); 1063 int dst_offset = ra_->reg2offset(dst_first); 1064 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1065 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1066 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1067 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1068 int stack_offset = ra_->reg2offset(dst_first); 1069 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1070 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1071 int stack_offset = ra_->reg2offset(src_first); 1072 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1073 } else { 1074 ShouldNotReachHere(); 1075 } 1076 return 0; 1077 } 1078 1079 // -------------------------------------- 1080 // Check for mem-mem move. push/pop to move. 1081 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1082 if( src_second == dst_first ) { // overlapping stack copy ranges 1083 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1084 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1085 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1086 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1087 } 1088 // move low bits 1089 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1090 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1091 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1092 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1093 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1094 } 1095 return size; 1096 } 1097 1098 // -------------------------------------- 1099 // Check for integer reg-reg copy 1100 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1101 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1102 1103 // Check for integer store 1104 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1105 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1106 1107 // Check for integer load 1108 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1109 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1110 1111 // Check for integer reg-xmm reg copy 1112 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1113 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1114 "no 64 bit integer-float reg moves" ); 1115 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1116 } 1117 // -------------------------------------- 1118 // Check for float reg-reg copy 1119 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1120 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1121 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1122 if( cbuf ) { 1123 1124 // Note the mucking with the register encode to compensate for the 0/1 1125 // indexing issue mentioned in a comment in the reg_def sections 1126 // for FPR registers many lines above here. 1127 1128 if( src_first != FPR1L_num ) { 1129 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1130 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1131 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1132 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1133 } else { 1134 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1135 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1136 } 1137 #ifndef PRODUCT 1138 } else if( !do_size ) { 1139 if( size != 0 ) st->print("\n\t"); 1140 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1141 else st->print( "FST %s", Matcher::regName[dst_first]); 1142 #endif 1143 } 1144 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1145 } 1146 1147 // Check for float store 1148 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1149 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1150 } 1151 1152 // Check for float load 1153 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1154 int offset = ra_->reg2offset(src_first); 1155 const char *op_str; 1156 int op; 1157 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1158 op_str = "FLD_D"; 1159 op = 0xDD; 1160 } else { // 32-bit load 1161 op_str = "FLD_S"; 1162 op = 0xD9; 1163 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1164 } 1165 if( cbuf ) { 1166 emit_opcode (*cbuf, op ); 1167 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1168 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1169 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1170 #ifndef PRODUCT 1171 } else if( !do_size ) { 1172 if( size != 0 ) st->print("\n\t"); 1173 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1174 #endif 1175 } 1176 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1177 return size + 3+offset_size+2; 1178 } 1179 1180 // Check for xmm reg-reg copy 1181 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1182 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1183 (src_first+1 == src_second && dst_first+1 == dst_second), 1184 "no non-adjacent float-moves" ); 1185 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1186 } 1187 1188 // Check for xmm reg-integer reg copy 1189 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1190 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1191 "no 64 bit float-integer reg moves" ); 1192 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1193 } 1194 1195 // Check for xmm store 1196 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1197 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1198 } 1199 1200 // Check for float xmm load 1201 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1202 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1203 } 1204 1205 // Copy from float reg to xmm reg 1206 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1207 // copy to the top of stack from floating point reg 1208 // and use LEA to preserve flags 1209 if( cbuf ) { 1210 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1211 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1212 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1213 emit_d8(*cbuf,0xF8); 1214 #ifndef PRODUCT 1215 } else if( !do_size ) { 1216 if( size != 0 ) st->print("\n\t"); 1217 st->print("LEA ESP,[ESP-8]"); 1218 #endif 1219 } 1220 size += 4; 1221 1222 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1223 1224 // Copy from the temp memory to the xmm reg. 1225 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1226 1227 if( cbuf ) { 1228 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1229 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1230 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1231 emit_d8(*cbuf,0x08); 1232 #ifndef PRODUCT 1233 } else if( !do_size ) { 1234 if( size != 0 ) st->print("\n\t"); 1235 st->print("LEA ESP,[ESP+8]"); 1236 #endif 1237 } 1238 size += 4; 1239 return size; 1240 } 1241 1242 // AVX-512 opmask specific spilling. 1243 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1244 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1245 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1246 MacroAssembler _masm(cbuf); 1247 int offset = ra_->reg2offset(src_first); 1248 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1249 return 0; 1250 } 1251 1252 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1253 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1254 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1255 MacroAssembler _masm(cbuf); 1256 int offset = ra_->reg2offset(dst_first); 1257 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1258 return 0; 1259 } 1260 1261 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1262 Unimplemented(); 1263 return 0; 1264 } 1265 1266 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1267 Unimplemented(); 1268 return 0; 1269 } 1270 1271 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1272 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1273 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1274 MacroAssembler _masm(cbuf); 1275 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1276 return 0; 1277 } 1278 1279 assert( size > 0, "missed a case" ); 1280 1281 // -------------------------------------------------------------------- 1282 // Check for second bits still needing moving. 1283 if( src_second == dst_second ) 1284 return size; // Self copy; no move 1285 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1286 1287 // Check for second word int-int move 1288 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1289 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1290 1291 // Check for second word integer store 1292 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1293 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1294 1295 // Check for second word integer load 1296 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1297 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1298 1299 Unimplemented(); 1300 return 0; // Mute compiler 1301 } 1302 1303 #ifndef PRODUCT 1304 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1305 implementation( NULL, ra_, false, st ); 1306 } 1307 #endif 1308 1309 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1310 implementation( &cbuf, ra_, false, NULL ); 1311 } 1312 1313 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1314 return MachNode::size(ra_); 1315 } 1316 1317 1318 //============================================================================= 1319 #ifndef PRODUCT 1320 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1321 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1322 int reg = ra_->get_reg_first(this); 1323 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1324 } 1325 #endif 1326 1327 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1328 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1329 int reg = ra_->get_encode(this); 1330 if( offset >= 128 ) { 1331 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1332 emit_rm(cbuf, 0x2, reg, 0x04); 1333 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1334 emit_d32(cbuf, offset); 1335 } 1336 else { 1337 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1338 emit_rm(cbuf, 0x1, reg, 0x04); 1339 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1340 emit_d8(cbuf, offset); 1341 } 1342 } 1343 1344 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1345 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1346 if( offset >= 128 ) { 1347 return 7; 1348 } 1349 else { 1350 return 4; 1351 } 1352 } 1353 1354 //============================================================================= 1355 #ifndef PRODUCT 1356 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1357 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1358 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1359 st->print_cr("\tNOP"); 1360 st->print_cr("\tNOP"); 1361 if( !OptoBreakpoint ) 1362 st->print_cr("\tNOP"); 1363 } 1364 #endif 1365 1366 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1367 MacroAssembler masm(&cbuf); 1368 #ifdef ASSERT 1369 uint insts_size = cbuf.insts_size(); 1370 #endif 1371 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1372 masm.jump_cc(Assembler::notEqual, 1373 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1374 /* WARNING these NOPs are critical so that verified entry point is properly 1375 aligned for patching by NativeJump::patch_verified_entry() */ 1376 int nops_cnt = 2; 1377 if( !OptoBreakpoint ) // Leave space for int3 1378 nops_cnt += 1; 1379 masm.nop(nops_cnt); 1380 1381 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1382 } 1383 1384 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1385 return OptoBreakpoint ? 11 : 12; 1386 } 1387 1388 1389 //============================================================================= 1390 1391 // Vector calling convention not supported. 1392 const bool Matcher::supports_vector_calling_convention() { 1393 return false; 1394 } 1395 1396 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1397 Unimplemented(); 1398 return OptoRegPair(0, 0); 1399 } 1400 1401 // Is this branch offset short enough that a short branch can be used? 1402 // 1403 // NOTE: If the platform does not provide any short branch variants, then 1404 // this method should return false for offset 0. 1405 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1406 // The passed offset is relative to address of the branch. 1407 // On 86 a branch displacement is calculated relative to address 1408 // of a next instruction. 1409 offset -= br_size; 1410 1411 // the short version of jmpConUCF2 contains multiple branches, 1412 // making the reach slightly less 1413 if (rule == jmpConUCF2_rule) 1414 return (-126 <= offset && offset <= 125); 1415 return (-128 <= offset && offset <= 127); 1416 } 1417 1418 // Return whether or not this register is ever used as an argument. This 1419 // function is used on startup to build the trampoline stubs in generateOptoStub. 1420 // Registers not mentioned will be killed by the VM call in the trampoline, and 1421 // arguments in those registers not be available to the callee. 1422 bool Matcher::can_be_java_arg( int reg ) { 1423 if( reg == ECX_num || reg == EDX_num ) return true; 1424 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1425 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1426 return false; 1427 } 1428 1429 bool Matcher::is_spillable_arg( int reg ) { 1430 return can_be_java_arg(reg); 1431 } 1432 1433 uint Matcher::int_pressure_limit() 1434 { 1435 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1436 } 1437 1438 uint Matcher::float_pressure_limit() 1439 { 1440 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1441 } 1442 1443 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1444 // Use hardware integer DIV instruction when 1445 // it is faster than a code which use multiply. 1446 // Only when constant divisor fits into 32 bit 1447 // (min_jint is excluded to get only correct 1448 // positive 32 bit values from negative). 1449 return VM_Version::has_fast_idiv() && 1450 (divisor == (int)divisor && divisor != min_jint); 1451 } 1452 1453 // Register for DIVI projection of divmodI 1454 RegMask Matcher::divI_proj_mask() { 1455 return EAX_REG_mask(); 1456 } 1457 1458 // Register for MODI projection of divmodI 1459 RegMask Matcher::modI_proj_mask() { 1460 return EDX_REG_mask(); 1461 } 1462 1463 // Register for DIVL projection of divmodL 1464 RegMask Matcher::divL_proj_mask() { 1465 ShouldNotReachHere(); 1466 return RegMask(); 1467 } 1468 1469 // Register for MODL projection of divmodL 1470 RegMask Matcher::modL_proj_mask() { 1471 ShouldNotReachHere(); 1472 return RegMask(); 1473 } 1474 1475 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1476 return NO_REG_mask(); 1477 } 1478 1479 // Returns true if the high 32 bits of the value is known to be zero. 1480 bool is_operand_hi32_zero(Node* n) { 1481 int opc = n->Opcode(); 1482 if (opc == Op_AndL) { 1483 Node* o2 = n->in(2); 1484 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1485 return true; 1486 } 1487 } 1488 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1489 return true; 1490 } 1491 return false; 1492 } 1493 1494 %} 1495 1496 //----------ENCODING BLOCK----------------------------------------------------- 1497 // This block specifies the encoding classes used by the compiler to output 1498 // byte streams. Encoding classes generate functions which are called by 1499 // Machine Instruction Nodes in order to generate the bit encoding of the 1500 // instruction. Operands specify their base encoding interface with the 1501 // interface keyword. There are currently supported four interfaces, 1502 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1503 // operand to generate a function which returns its register number when 1504 // queried. CONST_INTER causes an operand to generate a function which 1505 // returns the value of the constant when queried. MEMORY_INTER causes an 1506 // operand to generate four functions which return the Base Register, the 1507 // Index Register, the Scale Value, and the Offset Value of the operand when 1508 // queried. COND_INTER causes an operand to generate six functions which 1509 // return the encoding code (ie - encoding bits for the instruction) 1510 // associated with each basic boolean condition for a conditional instruction. 1511 // Instructions specify two basic values for encoding. They use the 1512 // ins_encode keyword to specify their encoding class (which must be one of 1513 // the class names specified in the encoding block), and they use the 1514 // opcode keyword to specify, in order, their primary, secondary, and 1515 // tertiary opcode. Only the opcode sections which a particular instruction 1516 // needs for encoding need to be specified. 1517 encode %{ 1518 // Build emit functions for each basic byte or larger field in the intel 1519 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1520 // code in the enc_class source block. Emit functions will live in the 1521 // main source block for now. In future, we can generalize this by 1522 // adding a syntax that specifies the sizes of fields in an order, 1523 // so that the adlc can build the emit functions automagically 1524 1525 // Emit primary opcode 1526 enc_class OpcP %{ 1527 emit_opcode(cbuf, $primary); 1528 %} 1529 1530 // Emit secondary opcode 1531 enc_class OpcS %{ 1532 emit_opcode(cbuf, $secondary); 1533 %} 1534 1535 // Emit opcode directly 1536 enc_class Opcode(immI d8) %{ 1537 emit_opcode(cbuf, $d8$$constant); 1538 %} 1539 1540 enc_class SizePrefix %{ 1541 emit_opcode(cbuf,0x66); 1542 %} 1543 1544 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1545 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1546 %} 1547 1548 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1549 emit_opcode(cbuf,$opcode$$constant); 1550 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1551 %} 1552 1553 enc_class mov_r32_imm0( rRegI dst ) %{ 1554 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1555 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1556 %} 1557 1558 enc_class cdq_enc %{ 1559 // Full implementation of Java idiv and irem; checks for 1560 // special case as described in JVM spec., p.243 & p.271. 1561 // 1562 // normal case special case 1563 // 1564 // input : rax,: dividend min_int 1565 // reg: divisor -1 1566 // 1567 // output: rax,: quotient (= rax, idiv reg) min_int 1568 // rdx: remainder (= rax, irem reg) 0 1569 // 1570 // Code sequnce: 1571 // 1572 // 81 F8 00 00 00 80 cmp rax,80000000h 1573 // 0F 85 0B 00 00 00 jne normal_case 1574 // 33 D2 xor rdx,edx 1575 // 83 F9 FF cmp rcx,0FFh 1576 // 0F 84 03 00 00 00 je done 1577 // normal_case: 1578 // 99 cdq 1579 // F7 F9 idiv rax,ecx 1580 // done: 1581 // 1582 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1583 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1584 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1585 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1586 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1587 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1588 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1589 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1590 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1591 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1592 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1593 // normal_case: 1594 emit_opcode(cbuf,0x99); // cdq 1595 // idiv (note: must be emitted by the user of this rule) 1596 // normal: 1597 %} 1598 1599 // Dense encoding for older common ops 1600 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1601 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1602 %} 1603 1604 1605 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1606 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1607 // Check for 8-bit immediate, and set sign extend bit in opcode 1608 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1609 emit_opcode(cbuf, $primary | 0x02); 1610 } 1611 else { // If 32-bit immediate 1612 emit_opcode(cbuf, $primary); 1613 } 1614 %} 1615 1616 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1617 // Emit primary opcode and set sign-extend bit 1618 // Check for 8-bit immediate, and set sign extend bit in opcode 1619 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1620 emit_opcode(cbuf, $primary | 0x02); } 1621 else { // If 32-bit immediate 1622 emit_opcode(cbuf, $primary); 1623 } 1624 // Emit r/m byte with secondary opcode, after primary opcode. 1625 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1626 %} 1627 1628 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1629 // Check for 8-bit immediate, and set sign extend bit in opcode 1630 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1631 $$$emit8$imm$$constant; 1632 } 1633 else { // If 32-bit immediate 1634 // Output immediate 1635 $$$emit32$imm$$constant; 1636 } 1637 %} 1638 1639 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1640 // Emit primary opcode and set sign-extend bit 1641 // Check for 8-bit immediate, and set sign extend bit in opcode 1642 int con = (int)$imm$$constant; // Throw away top bits 1643 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1644 // Emit r/m byte with secondary opcode, after primary opcode. 1645 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1646 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1647 else emit_d32(cbuf,con); 1648 %} 1649 1650 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1651 // Emit primary opcode and set sign-extend bit 1652 // Check for 8-bit immediate, and set sign extend bit in opcode 1653 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1654 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1655 // Emit r/m byte with tertiary opcode, after primary opcode. 1656 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1657 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1658 else emit_d32(cbuf,con); 1659 %} 1660 1661 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1662 emit_cc(cbuf, $secondary, $dst$$reg ); 1663 %} 1664 1665 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1666 int destlo = $dst$$reg; 1667 int desthi = HIGH_FROM_LOW_ENC(destlo); 1668 // bswap lo 1669 emit_opcode(cbuf, 0x0F); 1670 emit_cc(cbuf, 0xC8, destlo); 1671 // bswap hi 1672 emit_opcode(cbuf, 0x0F); 1673 emit_cc(cbuf, 0xC8, desthi); 1674 // xchg lo and hi 1675 emit_opcode(cbuf, 0x87); 1676 emit_rm(cbuf, 0x3, destlo, desthi); 1677 %} 1678 1679 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1680 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1681 %} 1682 1683 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1684 $$$emit8$primary; 1685 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1686 %} 1687 1688 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1689 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1690 emit_d8(cbuf, op >> 8 ); 1691 emit_d8(cbuf, op & 255); 1692 %} 1693 1694 // emulate a CMOV with a conditional branch around a MOV 1695 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1696 // Invert sense of branch from sense of CMOV 1697 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1698 emit_d8( cbuf, $brOffs$$constant ); 1699 %} 1700 1701 enc_class enc_PartialSubtypeCheck( ) %{ 1702 Register Redi = as_Register(EDI_enc); // result register 1703 Register Reax = as_Register(EAX_enc); // super class 1704 Register Recx = as_Register(ECX_enc); // killed 1705 Register Resi = as_Register(ESI_enc); // sub class 1706 Label miss; 1707 1708 MacroAssembler _masm(&cbuf); 1709 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1710 NULL, &miss, 1711 /*set_cond_codes:*/ true); 1712 if ($primary) { 1713 __ xorptr(Redi, Redi); 1714 } 1715 __ bind(miss); 1716 %} 1717 1718 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1719 MacroAssembler masm(&cbuf); 1720 int start = masm.offset(); 1721 if (UseSSE >= 2) { 1722 if (VerifyFPU) { 1723 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1724 } 1725 } else { 1726 // External c_calling_convention expects the FPU stack to be 'clean'. 1727 // Compiled code leaves it dirty. Do cleanup now. 1728 masm.empty_FPU_stack(); 1729 } 1730 if (sizeof_FFree_Float_Stack_All == -1) { 1731 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1732 } else { 1733 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1734 } 1735 %} 1736 1737 enc_class Verify_FPU_For_Leaf %{ 1738 if( VerifyFPU ) { 1739 MacroAssembler masm(&cbuf); 1740 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1741 } 1742 %} 1743 1744 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1745 // This is the instruction starting address for relocation info. 1746 MacroAssembler _masm(&cbuf); 1747 cbuf.set_insts_mark(); 1748 $$$emit8$primary; 1749 // CALL directly to the runtime 1750 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1751 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1752 __ post_call_nop(); 1753 1754 if (UseSSE >= 2) { 1755 MacroAssembler _masm(&cbuf); 1756 BasicType rt = tf()->return_type(); 1757 1758 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1759 // A C runtime call where the return value is unused. In SSE2+ 1760 // mode the result needs to be removed from the FPU stack. It's 1761 // likely that this function call could be removed by the 1762 // optimizer if the C function is a pure function. 1763 __ ffree(0); 1764 } else if (rt == T_FLOAT) { 1765 __ lea(rsp, Address(rsp, -4)); 1766 __ fstp_s(Address(rsp, 0)); 1767 __ movflt(xmm0, Address(rsp, 0)); 1768 __ lea(rsp, Address(rsp, 4)); 1769 } else if (rt == T_DOUBLE) { 1770 __ lea(rsp, Address(rsp, -8)); 1771 __ fstp_d(Address(rsp, 0)); 1772 __ movdbl(xmm0, Address(rsp, 0)); 1773 __ lea(rsp, Address(rsp, 8)); 1774 } 1775 } 1776 %} 1777 1778 enc_class pre_call_resets %{ 1779 // If method sets FPU control word restore it here 1780 debug_only(int off0 = cbuf.insts_size()); 1781 if (ra_->C->in_24_bit_fp_mode()) { 1782 MacroAssembler _masm(&cbuf); 1783 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1784 } 1785 // Clear upper bits of YMM registers when current compiled code uses 1786 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1787 MacroAssembler _masm(&cbuf); 1788 __ vzeroupper(); 1789 debug_only(int off1 = cbuf.insts_size()); 1790 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1791 %} 1792 1793 enc_class post_call_FPU %{ 1794 // If method sets FPU control word do it here also 1795 if (Compile::current()->in_24_bit_fp_mode()) { 1796 MacroAssembler masm(&cbuf); 1797 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1798 } 1799 %} 1800 1801 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1802 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1803 // who we intended to call. 1804 MacroAssembler _masm(&cbuf); 1805 cbuf.set_insts_mark(); 1806 $$$emit8$primary; 1807 1808 if (!_method) { 1809 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1810 runtime_call_Relocation::spec(), 1811 RELOC_IMM32); 1812 __ post_call_nop(); 1813 } else { 1814 int method_index = resolved_method_index(cbuf); 1815 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1816 : static_call_Relocation::spec(method_index); 1817 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1818 rspec, RELOC_DISP32); 1819 __ post_call_nop(); 1820 address mark = cbuf.insts_mark(); 1821 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1822 // Calls of the same statically bound method can share 1823 // a stub to the interpreter. 1824 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off()); 1825 } else { 1826 // Emit stubs for static call. 1827 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark); 1828 if (stub == NULL) { 1829 ciEnv::current()->record_failure("CodeCache is full"); 1830 return; 1831 } 1832 } 1833 } 1834 %} 1835 1836 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1837 MacroAssembler _masm(&cbuf); 1838 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1839 __ post_call_nop(); 1840 %} 1841 1842 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1843 int disp = in_bytes(Method::from_compiled_offset()); 1844 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1845 1846 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1847 MacroAssembler _masm(&cbuf); 1848 cbuf.set_insts_mark(); 1849 $$$emit8$primary; 1850 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1851 emit_d8(cbuf, disp); // Displacement 1852 __ post_call_nop(); 1853 %} 1854 1855 // Following encoding is no longer used, but may be restored if calling 1856 // convention changes significantly. 1857 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1858 // 1859 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1860 // // int ic_reg = Matcher::inline_cache_reg(); 1861 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1862 // // int imo_reg = Matcher::interpreter_method_reg(); 1863 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1864 // 1865 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1866 // // // so we load it immediately before the call 1867 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1868 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1869 // 1870 // // xor rbp,ebp 1871 // emit_opcode(cbuf, 0x33); 1872 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1873 // 1874 // // CALL to interpreter. 1875 // cbuf.set_insts_mark(); 1876 // $$$emit8$primary; 1877 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1878 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1879 // %} 1880 1881 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1882 $$$emit8$primary; 1883 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1884 $$$emit8$shift$$constant; 1885 %} 1886 1887 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1888 // Load immediate does not have a zero or sign extended version 1889 // for 8-bit immediates 1890 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1891 $$$emit32$src$$constant; 1892 %} 1893 1894 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1895 // Load immediate does not have a zero or sign extended version 1896 // for 8-bit immediates 1897 emit_opcode(cbuf, $primary + $dst$$reg); 1898 $$$emit32$src$$constant; 1899 %} 1900 1901 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1902 // Load immediate does not have a zero or sign extended version 1903 // for 8-bit immediates 1904 int dst_enc = $dst$$reg; 1905 int src_con = $src$$constant & 0x0FFFFFFFFL; 1906 if (src_con == 0) { 1907 // xor dst, dst 1908 emit_opcode(cbuf, 0x33); 1909 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1910 } else { 1911 emit_opcode(cbuf, $primary + dst_enc); 1912 emit_d32(cbuf, src_con); 1913 } 1914 %} 1915 1916 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1917 // Load immediate does not have a zero or sign extended version 1918 // for 8-bit immediates 1919 int dst_enc = $dst$$reg + 2; 1920 int src_con = ((julong)($src$$constant)) >> 32; 1921 if (src_con == 0) { 1922 // xor dst, dst 1923 emit_opcode(cbuf, 0x33); 1924 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1925 } else { 1926 emit_opcode(cbuf, $primary + dst_enc); 1927 emit_d32(cbuf, src_con); 1928 } 1929 %} 1930 1931 1932 // Encode a reg-reg copy. If it is useless, then empty encoding. 1933 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1934 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1935 %} 1936 1937 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1938 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1939 %} 1940 1941 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1942 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1943 %} 1944 1945 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1946 $$$emit8$primary; 1947 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1948 %} 1949 1950 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1951 $$$emit8$secondary; 1952 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1953 %} 1954 1955 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1956 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1957 %} 1958 1959 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1960 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1961 %} 1962 1963 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1964 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1965 %} 1966 1967 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1968 // Output immediate 1969 $$$emit32$src$$constant; 1970 %} 1971 1972 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1973 // Output Float immediate bits 1974 jfloat jf = $src$$constant; 1975 int jf_as_bits = jint_cast( jf ); 1976 emit_d32(cbuf, jf_as_bits); 1977 %} 1978 1979 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1980 // Output Float immediate bits 1981 jfloat jf = $src$$constant; 1982 int jf_as_bits = jint_cast( jf ); 1983 emit_d32(cbuf, jf_as_bits); 1984 %} 1985 1986 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1987 // Output immediate 1988 $$$emit16$src$$constant; 1989 %} 1990 1991 enc_class Con_d32(immI src) %{ 1992 emit_d32(cbuf,$src$$constant); 1993 %} 1994 1995 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1996 // Output immediate memory reference 1997 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 1998 emit_d32(cbuf, 0x00); 1999 %} 2000 2001 enc_class lock_prefix( ) %{ 2002 emit_opcode(cbuf,0xF0); // [Lock] 2003 %} 2004 2005 // Cmp-xchg long value. 2006 // Note: we need to swap rbx, and rcx before and after the 2007 // cmpxchg8 instruction because the instruction uses 2008 // rcx as the high order word of the new value to store but 2009 // our register encoding uses rbx,. 2010 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2011 2012 // XCHG rbx,ecx 2013 emit_opcode(cbuf,0x87); 2014 emit_opcode(cbuf,0xD9); 2015 // [Lock] 2016 emit_opcode(cbuf,0xF0); 2017 // CMPXCHG8 [Eptr] 2018 emit_opcode(cbuf,0x0F); 2019 emit_opcode(cbuf,0xC7); 2020 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2021 // XCHG rbx,ecx 2022 emit_opcode(cbuf,0x87); 2023 emit_opcode(cbuf,0xD9); 2024 %} 2025 2026 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2027 // [Lock] 2028 emit_opcode(cbuf,0xF0); 2029 2030 // CMPXCHG [Eptr] 2031 emit_opcode(cbuf,0x0F); 2032 emit_opcode(cbuf,0xB1); 2033 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2034 %} 2035 2036 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2037 // [Lock] 2038 emit_opcode(cbuf,0xF0); 2039 2040 // CMPXCHGB [Eptr] 2041 emit_opcode(cbuf,0x0F); 2042 emit_opcode(cbuf,0xB0); 2043 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2044 %} 2045 2046 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2047 // [Lock] 2048 emit_opcode(cbuf,0xF0); 2049 2050 // 16-bit mode 2051 emit_opcode(cbuf, 0x66); 2052 2053 // CMPXCHGW [Eptr] 2054 emit_opcode(cbuf,0x0F); 2055 emit_opcode(cbuf,0xB1); 2056 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2057 %} 2058 2059 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2060 int res_encoding = $res$$reg; 2061 2062 // MOV res,0 2063 emit_opcode( cbuf, 0xB8 + res_encoding); 2064 emit_d32( cbuf, 0 ); 2065 // JNE,s fail 2066 emit_opcode(cbuf,0x75); 2067 emit_d8(cbuf, 5 ); 2068 // MOV res,1 2069 emit_opcode( cbuf, 0xB8 + res_encoding); 2070 emit_d32( cbuf, 1 ); 2071 // fail: 2072 %} 2073 2074 enc_class set_instruction_start( ) %{ 2075 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2076 %} 2077 2078 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2079 int reg_encoding = $ereg$$reg; 2080 int base = $mem$$base; 2081 int index = $mem$$index; 2082 int scale = $mem$$scale; 2083 int displace = $mem$$disp; 2084 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2085 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2086 %} 2087 2088 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2089 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2090 int base = $mem$$base; 2091 int index = $mem$$index; 2092 int scale = $mem$$scale; 2093 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2094 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2095 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2096 %} 2097 2098 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2099 int r1, r2; 2100 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2101 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2102 emit_opcode(cbuf,0x0F); 2103 emit_opcode(cbuf,$tertiary); 2104 emit_rm(cbuf, 0x3, r1, r2); 2105 emit_d8(cbuf,$cnt$$constant); 2106 emit_d8(cbuf,$primary); 2107 emit_rm(cbuf, 0x3, $secondary, r1); 2108 emit_d8(cbuf,$cnt$$constant); 2109 %} 2110 2111 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2112 emit_opcode( cbuf, 0x8B ); // Move 2113 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2114 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2115 emit_d8(cbuf,$primary); 2116 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2117 emit_d8(cbuf,$cnt$$constant-32); 2118 } 2119 emit_d8(cbuf,$primary); 2120 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2121 emit_d8(cbuf,31); 2122 %} 2123 2124 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2125 int r1, r2; 2126 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2127 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2128 2129 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2130 emit_rm(cbuf, 0x3, r1, r2); 2131 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2132 emit_opcode(cbuf,$primary); 2133 emit_rm(cbuf, 0x3, $secondary, r1); 2134 emit_d8(cbuf,$cnt$$constant-32); 2135 } 2136 emit_opcode(cbuf,0x33); // XOR r2,r2 2137 emit_rm(cbuf, 0x3, r2, r2); 2138 %} 2139 2140 // Clone of RegMem but accepts an extra parameter to access each 2141 // half of a double in memory; it never needs relocation info. 2142 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2143 emit_opcode(cbuf,$opcode$$constant); 2144 int reg_encoding = $rm_reg$$reg; 2145 int base = $mem$$base; 2146 int index = $mem$$index; 2147 int scale = $mem$$scale; 2148 int displace = $mem$$disp + $disp_for_half$$constant; 2149 relocInfo::relocType disp_reloc = relocInfo::none; 2150 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2151 %} 2152 2153 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2154 // 2155 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2156 // and it never needs relocation information. 2157 // Frequently used to move data between FPU's Stack Top and memory. 2158 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2159 int rm_byte_opcode = $rm_opcode$$constant; 2160 int base = $mem$$base; 2161 int index = $mem$$index; 2162 int scale = $mem$$scale; 2163 int displace = $mem$$disp; 2164 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2165 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2166 %} 2167 2168 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2169 int rm_byte_opcode = $rm_opcode$$constant; 2170 int base = $mem$$base; 2171 int index = $mem$$index; 2172 int scale = $mem$$scale; 2173 int displace = $mem$$disp; 2174 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2175 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2176 %} 2177 2178 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2179 int reg_encoding = $dst$$reg; 2180 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2181 int index = 0x04; // 0x04 indicates no index 2182 int scale = 0x00; // 0x00 indicates no scale 2183 int displace = $src1$$constant; // 0x00 indicates no displacement 2184 relocInfo::relocType disp_reloc = relocInfo::none; 2185 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2186 %} 2187 2188 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2189 // Compare dst,src 2190 emit_opcode(cbuf,0x3B); 2191 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2192 // jmp dst < src around move 2193 emit_opcode(cbuf,0x7C); 2194 emit_d8(cbuf,2); 2195 // move dst,src 2196 emit_opcode(cbuf,0x8B); 2197 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2198 %} 2199 2200 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2201 // Compare dst,src 2202 emit_opcode(cbuf,0x3B); 2203 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2204 // jmp dst > src around move 2205 emit_opcode(cbuf,0x7F); 2206 emit_d8(cbuf,2); 2207 // move dst,src 2208 emit_opcode(cbuf,0x8B); 2209 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2210 %} 2211 2212 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2213 // If src is FPR1, we can just FST to store it. 2214 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2215 int reg_encoding = 0x2; // Just store 2216 int base = $mem$$base; 2217 int index = $mem$$index; 2218 int scale = $mem$$scale; 2219 int displace = $mem$$disp; 2220 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2221 if( $src$$reg != FPR1L_enc ) { 2222 reg_encoding = 0x3; // Store & pop 2223 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2224 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2225 } 2226 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2227 emit_opcode(cbuf,$primary); 2228 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2229 %} 2230 2231 enc_class neg_reg(rRegI dst) %{ 2232 // NEG $dst 2233 emit_opcode(cbuf,0xF7); 2234 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2235 %} 2236 2237 enc_class setLT_reg(eCXRegI dst) %{ 2238 // SETLT $dst 2239 emit_opcode(cbuf,0x0F); 2240 emit_opcode(cbuf,0x9C); 2241 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2242 %} 2243 2244 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2245 int tmpReg = $tmp$$reg; 2246 2247 // SUB $p,$q 2248 emit_opcode(cbuf,0x2B); 2249 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2250 // SBB $tmp,$tmp 2251 emit_opcode(cbuf,0x1B); 2252 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2253 // AND $tmp,$y 2254 emit_opcode(cbuf,0x23); 2255 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2256 // ADD $p,$tmp 2257 emit_opcode(cbuf,0x03); 2258 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2259 %} 2260 2261 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2262 // TEST shift,32 2263 emit_opcode(cbuf,0xF7); 2264 emit_rm(cbuf, 0x3, 0, ECX_enc); 2265 emit_d32(cbuf,0x20); 2266 // JEQ,s small 2267 emit_opcode(cbuf, 0x74); 2268 emit_d8(cbuf, 0x04); 2269 // MOV $dst.hi,$dst.lo 2270 emit_opcode( cbuf, 0x8B ); 2271 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2272 // CLR $dst.lo 2273 emit_opcode(cbuf, 0x33); 2274 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2275 // small: 2276 // SHLD $dst.hi,$dst.lo,$shift 2277 emit_opcode(cbuf,0x0F); 2278 emit_opcode(cbuf,0xA5); 2279 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2280 // SHL $dst.lo,$shift" 2281 emit_opcode(cbuf,0xD3); 2282 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2283 %} 2284 2285 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2286 // TEST shift,32 2287 emit_opcode(cbuf,0xF7); 2288 emit_rm(cbuf, 0x3, 0, ECX_enc); 2289 emit_d32(cbuf,0x20); 2290 // JEQ,s small 2291 emit_opcode(cbuf, 0x74); 2292 emit_d8(cbuf, 0x04); 2293 // MOV $dst.lo,$dst.hi 2294 emit_opcode( cbuf, 0x8B ); 2295 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2296 // CLR $dst.hi 2297 emit_opcode(cbuf, 0x33); 2298 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2299 // small: 2300 // SHRD $dst.lo,$dst.hi,$shift 2301 emit_opcode(cbuf,0x0F); 2302 emit_opcode(cbuf,0xAD); 2303 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2304 // SHR $dst.hi,$shift" 2305 emit_opcode(cbuf,0xD3); 2306 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2307 %} 2308 2309 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2310 // TEST shift,32 2311 emit_opcode(cbuf,0xF7); 2312 emit_rm(cbuf, 0x3, 0, ECX_enc); 2313 emit_d32(cbuf,0x20); 2314 // JEQ,s small 2315 emit_opcode(cbuf, 0x74); 2316 emit_d8(cbuf, 0x05); 2317 // MOV $dst.lo,$dst.hi 2318 emit_opcode( cbuf, 0x8B ); 2319 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2320 // SAR $dst.hi,31 2321 emit_opcode(cbuf, 0xC1); 2322 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2323 emit_d8(cbuf, 0x1F ); 2324 // small: 2325 // SHRD $dst.lo,$dst.hi,$shift 2326 emit_opcode(cbuf,0x0F); 2327 emit_opcode(cbuf,0xAD); 2328 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2329 // SAR $dst.hi,$shift" 2330 emit_opcode(cbuf,0xD3); 2331 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2332 %} 2333 2334 2335 // ----------------- Encodings for floating point unit ----------------- 2336 // May leave result in FPU-TOS or FPU reg depending on opcodes 2337 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2338 $$$emit8$primary; 2339 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2340 %} 2341 2342 // Pop argument in FPR0 with FSTP ST(0) 2343 enc_class PopFPU() %{ 2344 emit_opcode( cbuf, 0xDD ); 2345 emit_d8( cbuf, 0xD8 ); 2346 %} 2347 2348 // !!!!! equivalent to Pop_Reg_F 2349 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2350 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2351 emit_d8( cbuf, 0xD8+$dst$$reg ); 2352 %} 2353 2354 enc_class Push_Reg_DPR( regDPR dst ) %{ 2355 emit_opcode( cbuf, 0xD9 ); 2356 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2357 %} 2358 2359 enc_class strictfp_bias1( regDPR dst ) %{ 2360 emit_opcode( cbuf, 0xDB ); // FLD m80real 2361 emit_opcode( cbuf, 0x2D ); 2362 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2363 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2364 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2365 %} 2366 2367 enc_class strictfp_bias2( regDPR dst ) %{ 2368 emit_opcode( cbuf, 0xDB ); // FLD m80real 2369 emit_opcode( cbuf, 0x2D ); 2370 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2371 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2372 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2373 %} 2374 2375 // Special case for moving an integer register to a stack slot. 2376 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2377 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2378 %} 2379 2380 // Special case for moving a register to a stack slot. 2381 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2382 // Opcode already emitted 2383 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2384 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2385 emit_d32(cbuf, $dst$$disp); // Displacement 2386 %} 2387 2388 // Push the integer in stackSlot 'src' onto FP-stack 2389 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2390 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2391 %} 2392 2393 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2394 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2395 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2396 %} 2397 2398 // Same as Pop_Mem_F except for opcode 2399 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2400 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2401 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2402 %} 2403 2404 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2405 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2406 emit_d8( cbuf, 0xD8+$dst$$reg ); 2407 %} 2408 2409 enc_class Push_Reg_FPR( regFPR dst ) %{ 2410 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2411 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2412 %} 2413 2414 // Push FPU's float to a stack-slot, and pop FPU-stack 2415 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2416 int pop = 0x02; 2417 if ($src$$reg != FPR1L_enc) { 2418 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2419 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2420 pop = 0x03; 2421 } 2422 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2423 %} 2424 2425 // Push FPU's double to a stack-slot, and pop FPU-stack 2426 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2427 int pop = 0x02; 2428 if ($src$$reg != FPR1L_enc) { 2429 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2430 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2431 pop = 0x03; 2432 } 2433 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2434 %} 2435 2436 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2437 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2438 int pop = 0xD0 - 1; // -1 since we skip FLD 2439 if ($src$$reg != FPR1L_enc) { 2440 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2441 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2442 pop = 0xD8; 2443 } 2444 emit_opcode( cbuf, 0xDD ); 2445 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2446 %} 2447 2448 2449 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2450 // load dst in FPR0 2451 emit_opcode( cbuf, 0xD9 ); 2452 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2453 if ($src$$reg != FPR1L_enc) { 2454 // fincstp 2455 emit_opcode (cbuf, 0xD9); 2456 emit_opcode (cbuf, 0xF7); 2457 // swap src with FPR1: 2458 // FXCH FPR1 with src 2459 emit_opcode(cbuf, 0xD9); 2460 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2461 // fdecstp 2462 emit_opcode (cbuf, 0xD9); 2463 emit_opcode (cbuf, 0xF6); 2464 } 2465 %} 2466 2467 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2468 MacroAssembler _masm(&cbuf); 2469 __ subptr(rsp, 8); 2470 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2471 __ fld_d(Address(rsp, 0)); 2472 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2473 __ fld_d(Address(rsp, 0)); 2474 %} 2475 2476 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2477 MacroAssembler _masm(&cbuf); 2478 __ subptr(rsp, 4); 2479 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2480 __ fld_s(Address(rsp, 0)); 2481 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2482 __ fld_s(Address(rsp, 0)); 2483 %} 2484 2485 enc_class Push_ResultD(regD dst) %{ 2486 MacroAssembler _masm(&cbuf); 2487 __ fstp_d(Address(rsp, 0)); 2488 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2489 __ addptr(rsp, 8); 2490 %} 2491 2492 enc_class Push_ResultF(regF dst, immI d8) %{ 2493 MacroAssembler _masm(&cbuf); 2494 __ fstp_s(Address(rsp, 0)); 2495 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2496 __ addptr(rsp, $d8$$constant); 2497 %} 2498 2499 enc_class Push_SrcD(regD src) %{ 2500 MacroAssembler _masm(&cbuf); 2501 __ subptr(rsp, 8); 2502 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2503 __ fld_d(Address(rsp, 0)); 2504 %} 2505 2506 enc_class push_stack_temp_qword() %{ 2507 MacroAssembler _masm(&cbuf); 2508 __ subptr(rsp, 8); 2509 %} 2510 2511 enc_class pop_stack_temp_qword() %{ 2512 MacroAssembler _masm(&cbuf); 2513 __ addptr(rsp, 8); 2514 %} 2515 2516 enc_class push_xmm_to_fpr1(regD src) %{ 2517 MacroAssembler _masm(&cbuf); 2518 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2519 __ fld_d(Address(rsp, 0)); 2520 %} 2521 2522 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2523 if ($src$$reg != FPR1L_enc) { 2524 // fincstp 2525 emit_opcode (cbuf, 0xD9); 2526 emit_opcode (cbuf, 0xF7); 2527 // FXCH FPR1 with src 2528 emit_opcode(cbuf, 0xD9); 2529 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2530 // fdecstp 2531 emit_opcode (cbuf, 0xD9); 2532 emit_opcode (cbuf, 0xF6); 2533 } 2534 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2535 // // FSTP FPR$dst$$reg 2536 // emit_opcode( cbuf, 0xDD ); 2537 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2538 %} 2539 2540 enc_class fnstsw_sahf_skip_parity() %{ 2541 // fnstsw ax 2542 emit_opcode( cbuf, 0xDF ); 2543 emit_opcode( cbuf, 0xE0 ); 2544 // sahf 2545 emit_opcode( cbuf, 0x9E ); 2546 // jnp ::skip 2547 emit_opcode( cbuf, 0x7B ); 2548 emit_opcode( cbuf, 0x05 ); 2549 %} 2550 2551 enc_class emitModDPR() %{ 2552 // fprem must be iterative 2553 // :: loop 2554 // fprem 2555 emit_opcode( cbuf, 0xD9 ); 2556 emit_opcode( cbuf, 0xF8 ); 2557 // wait 2558 emit_opcode( cbuf, 0x9b ); 2559 // fnstsw ax 2560 emit_opcode( cbuf, 0xDF ); 2561 emit_opcode( cbuf, 0xE0 ); 2562 // sahf 2563 emit_opcode( cbuf, 0x9E ); 2564 // jp ::loop 2565 emit_opcode( cbuf, 0x0F ); 2566 emit_opcode( cbuf, 0x8A ); 2567 emit_opcode( cbuf, 0xF4 ); 2568 emit_opcode( cbuf, 0xFF ); 2569 emit_opcode( cbuf, 0xFF ); 2570 emit_opcode( cbuf, 0xFF ); 2571 %} 2572 2573 enc_class fpu_flags() %{ 2574 // fnstsw_ax 2575 emit_opcode( cbuf, 0xDF); 2576 emit_opcode( cbuf, 0xE0); 2577 // test ax,0x0400 2578 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2579 emit_opcode( cbuf, 0xA9 ); 2580 emit_d16 ( cbuf, 0x0400 ); 2581 // // // This sequence works, but stalls for 12-16 cycles on PPro 2582 // // test rax,0x0400 2583 // emit_opcode( cbuf, 0xA9 ); 2584 // emit_d32 ( cbuf, 0x00000400 ); 2585 // 2586 // jz exit (no unordered comparison) 2587 emit_opcode( cbuf, 0x74 ); 2588 emit_d8 ( cbuf, 0x02 ); 2589 // mov ah,1 - treat as LT case (set carry flag) 2590 emit_opcode( cbuf, 0xB4 ); 2591 emit_d8 ( cbuf, 0x01 ); 2592 // sahf 2593 emit_opcode( cbuf, 0x9E); 2594 %} 2595 2596 enc_class cmpF_P6_fixup() %{ 2597 // Fixup the integer flags in case comparison involved a NaN 2598 // 2599 // JNP exit (no unordered comparison, P-flag is set by NaN) 2600 emit_opcode( cbuf, 0x7B ); 2601 emit_d8 ( cbuf, 0x03 ); 2602 // MOV AH,1 - treat as LT case (set carry flag) 2603 emit_opcode( cbuf, 0xB4 ); 2604 emit_d8 ( cbuf, 0x01 ); 2605 // SAHF 2606 emit_opcode( cbuf, 0x9E); 2607 // NOP // target for branch to avoid branch to branch 2608 emit_opcode( cbuf, 0x90); 2609 %} 2610 2611 // fnstsw_ax(); 2612 // sahf(); 2613 // movl(dst, nan_result); 2614 // jcc(Assembler::parity, exit); 2615 // movl(dst, less_result); 2616 // jcc(Assembler::below, exit); 2617 // movl(dst, equal_result); 2618 // jcc(Assembler::equal, exit); 2619 // movl(dst, greater_result); 2620 2621 // less_result = 1; 2622 // greater_result = -1; 2623 // equal_result = 0; 2624 // nan_result = -1; 2625 2626 enc_class CmpF_Result(rRegI dst) %{ 2627 // fnstsw_ax(); 2628 emit_opcode( cbuf, 0xDF); 2629 emit_opcode( cbuf, 0xE0); 2630 // sahf 2631 emit_opcode( cbuf, 0x9E); 2632 // movl(dst, nan_result); 2633 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2634 emit_d32( cbuf, -1 ); 2635 // jcc(Assembler::parity, exit); 2636 emit_opcode( cbuf, 0x7A ); 2637 emit_d8 ( cbuf, 0x13 ); 2638 // movl(dst, less_result); 2639 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2640 emit_d32( cbuf, -1 ); 2641 // jcc(Assembler::below, exit); 2642 emit_opcode( cbuf, 0x72 ); 2643 emit_d8 ( cbuf, 0x0C ); 2644 // movl(dst, equal_result); 2645 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2646 emit_d32( cbuf, 0 ); 2647 // jcc(Assembler::equal, exit); 2648 emit_opcode( cbuf, 0x74 ); 2649 emit_d8 ( cbuf, 0x05 ); 2650 // movl(dst, greater_result); 2651 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2652 emit_d32( cbuf, 1 ); 2653 %} 2654 2655 2656 // Compare the longs and set flags 2657 // BROKEN! Do Not use as-is 2658 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2659 // CMP $src1.hi,$src2.hi 2660 emit_opcode( cbuf, 0x3B ); 2661 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2662 // JNE,s done 2663 emit_opcode(cbuf,0x75); 2664 emit_d8(cbuf, 2 ); 2665 // CMP $src1.lo,$src2.lo 2666 emit_opcode( cbuf, 0x3B ); 2667 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2668 // done: 2669 %} 2670 2671 enc_class convert_int_long( regL dst, rRegI src ) %{ 2672 // mov $dst.lo,$src 2673 int dst_encoding = $dst$$reg; 2674 int src_encoding = $src$$reg; 2675 encode_Copy( cbuf, dst_encoding , src_encoding ); 2676 // mov $dst.hi,$src 2677 encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2678 // sar $dst.hi,31 2679 emit_opcode( cbuf, 0xC1 ); 2680 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2681 emit_d8(cbuf, 0x1F ); 2682 %} 2683 2684 enc_class convert_long_double( eRegL src ) %{ 2685 // push $src.hi 2686 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2687 // push $src.lo 2688 emit_opcode(cbuf, 0x50+$src$$reg ); 2689 // fild 64-bits at [SP] 2690 emit_opcode(cbuf,0xdf); 2691 emit_d8(cbuf, 0x6C); 2692 emit_d8(cbuf, 0x24); 2693 emit_d8(cbuf, 0x00); 2694 // pop stack 2695 emit_opcode(cbuf, 0x83); // add SP, #8 2696 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2697 emit_d8(cbuf, 0x8); 2698 %} 2699 2700 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2701 // IMUL EDX:EAX,$src1 2702 emit_opcode( cbuf, 0xF7 ); 2703 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2704 // SAR EDX,$cnt-32 2705 int shift_count = ((int)$cnt$$constant) - 32; 2706 if (shift_count > 0) { 2707 emit_opcode(cbuf, 0xC1); 2708 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2709 emit_d8(cbuf, shift_count); 2710 } 2711 %} 2712 2713 // this version doesn't have add sp, 8 2714 enc_class convert_long_double2( eRegL src ) %{ 2715 // push $src.hi 2716 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2717 // push $src.lo 2718 emit_opcode(cbuf, 0x50+$src$$reg ); 2719 // fild 64-bits at [SP] 2720 emit_opcode(cbuf,0xdf); 2721 emit_d8(cbuf, 0x6C); 2722 emit_d8(cbuf, 0x24); 2723 emit_d8(cbuf, 0x00); 2724 %} 2725 2726 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2727 // Basic idea: long = (long)int * (long)int 2728 // IMUL EDX:EAX, src 2729 emit_opcode( cbuf, 0xF7 ); 2730 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2731 %} 2732 2733 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2734 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2735 // MUL EDX:EAX, src 2736 emit_opcode( cbuf, 0xF7 ); 2737 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2738 %} 2739 2740 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2741 // Basic idea: lo(result) = lo(x_lo * y_lo) 2742 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2743 // MOV $tmp,$src.lo 2744 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2745 // IMUL $tmp,EDX 2746 emit_opcode( cbuf, 0x0F ); 2747 emit_opcode( cbuf, 0xAF ); 2748 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2749 // MOV EDX,$src.hi 2750 encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2751 // IMUL EDX,EAX 2752 emit_opcode( cbuf, 0x0F ); 2753 emit_opcode( cbuf, 0xAF ); 2754 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2755 // ADD $tmp,EDX 2756 emit_opcode( cbuf, 0x03 ); 2757 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2758 // MUL EDX:EAX,$src.lo 2759 emit_opcode( cbuf, 0xF7 ); 2760 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2761 // ADD EDX,ESI 2762 emit_opcode( cbuf, 0x03 ); 2763 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2764 %} 2765 2766 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2767 // Basic idea: lo(result) = lo(src * y_lo) 2768 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2769 // IMUL $tmp,EDX,$src 2770 emit_opcode( cbuf, 0x6B ); 2771 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2772 emit_d8( cbuf, (int)$src$$constant ); 2773 // MOV EDX,$src 2774 emit_opcode(cbuf, 0xB8 + EDX_enc); 2775 emit_d32( cbuf, (int)$src$$constant ); 2776 // MUL EDX:EAX,EDX 2777 emit_opcode( cbuf, 0xF7 ); 2778 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2779 // ADD EDX,ESI 2780 emit_opcode( cbuf, 0x03 ); 2781 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2782 %} 2783 2784 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2785 // PUSH src1.hi 2786 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2787 // PUSH src1.lo 2788 emit_opcode(cbuf, 0x50+$src1$$reg ); 2789 // PUSH src2.hi 2790 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2791 // PUSH src2.lo 2792 emit_opcode(cbuf, 0x50+$src2$$reg ); 2793 // CALL directly to the runtime 2794 MacroAssembler _masm(&cbuf); 2795 cbuf.set_insts_mark(); 2796 emit_opcode(cbuf,0xE8); // Call into runtime 2797 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2798 __ post_call_nop(); 2799 // Restore stack 2800 emit_opcode(cbuf, 0x83); // add SP, #framesize 2801 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2802 emit_d8(cbuf, 4*4); 2803 %} 2804 2805 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2806 // PUSH src1.hi 2807 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2808 // PUSH src1.lo 2809 emit_opcode(cbuf, 0x50+$src1$$reg ); 2810 // PUSH src2.hi 2811 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2812 // PUSH src2.lo 2813 emit_opcode(cbuf, 0x50+$src2$$reg ); 2814 // CALL directly to the runtime 2815 MacroAssembler _masm(&cbuf); 2816 cbuf.set_insts_mark(); 2817 emit_opcode(cbuf,0xE8); // Call into runtime 2818 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2819 __ post_call_nop(); 2820 // Restore stack 2821 emit_opcode(cbuf, 0x83); // add SP, #framesize 2822 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2823 emit_d8(cbuf, 4*4); 2824 %} 2825 2826 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2827 // MOV $tmp,$src.lo 2828 emit_opcode(cbuf, 0x8B); 2829 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2830 // OR $tmp,$src.hi 2831 emit_opcode(cbuf, 0x0B); 2832 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2833 %} 2834 2835 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2836 // CMP $src1.lo,$src2.lo 2837 emit_opcode( cbuf, 0x3B ); 2838 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2839 // JNE,s skip 2840 emit_cc(cbuf, 0x70, 0x5); 2841 emit_d8(cbuf,2); 2842 // CMP $src1.hi,$src2.hi 2843 emit_opcode( cbuf, 0x3B ); 2844 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2845 %} 2846 2847 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2848 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2849 emit_opcode( cbuf, 0x3B ); 2850 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2851 // MOV $tmp,$src1.hi 2852 emit_opcode( cbuf, 0x8B ); 2853 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2854 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2855 emit_opcode( cbuf, 0x1B ); 2856 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2857 %} 2858 2859 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2860 // XOR $tmp,$tmp 2861 emit_opcode(cbuf,0x33); // XOR 2862 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2863 // CMP $tmp,$src.lo 2864 emit_opcode( cbuf, 0x3B ); 2865 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2866 // SBB $tmp,$src.hi 2867 emit_opcode( cbuf, 0x1B ); 2868 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2869 %} 2870 2871 // Sniff, sniff... smells like Gnu Superoptimizer 2872 enc_class neg_long( eRegL dst ) %{ 2873 emit_opcode(cbuf,0xF7); // NEG hi 2874 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2875 emit_opcode(cbuf,0xF7); // NEG lo 2876 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2877 emit_opcode(cbuf,0x83); // SBB hi,0 2878 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2879 emit_d8 (cbuf,0 ); 2880 %} 2881 2882 enc_class enc_pop_rdx() %{ 2883 emit_opcode(cbuf,0x5A); 2884 %} 2885 2886 enc_class enc_rethrow() %{ 2887 MacroAssembler _masm(&cbuf); 2888 cbuf.set_insts_mark(); 2889 emit_opcode(cbuf, 0xE9); // jmp entry 2890 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2891 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2892 __ post_call_nop(); 2893 %} 2894 2895 2896 // Convert a double to an int. Java semantics require we do complex 2897 // manglelations in the corner cases. So we set the rounding mode to 2898 // 'zero', store the darned double down as an int, and reset the 2899 // rounding mode to 'nearest'. The hardware throws an exception which 2900 // patches up the correct value directly to the stack. 2901 enc_class DPR2I_encoding( regDPR src ) %{ 2902 // Flip to round-to-zero mode. We attempted to allow invalid-op 2903 // exceptions here, so that a NAN or other corner-case value will 2904 // thrown an exception (but normal values get converted at full speed). 2905 // However, I2C adapters and other float-stack manglers leave pending 2906 // invalid-op exceptions hanging. We would have to clear them before 2907 // enabling them and that is more expensive than just testing for the 2908 // invalid value Intel stores down in the corner cases. 2909 emit_opcode(cbuf,0xD9); // FLDCW trunc 2910 emit_opcode(cbuf,0x2D); 2911 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2912 // Allocate a word 2913 emit_opcode(cbuf,0x83); // SUB ESP,4 2914 emit_opcode(cbuf,0xEC); 2915 emit_d8(cbuf,0x04); 2916 // Encoding assumes a double has been pushed into FPR0. 2917 // Store down the double as an int, popping the FPU stack 2918 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2919 emit_opcode(cbuf,0x1C); 2920 emit_d8(cbuf,0x24); 2921 // Restore the rounding mode; mask the exception 2922 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2923 emit_opcode(cbuf,0x2D); 2924 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2925 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2926 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2927 2928 // Load the converted int; adjust CPU stack 2929 emit_opcode(cbuf,0x58); // POP EAX 2930 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2931 emit_d32 (cbuf,0x80000000); // 0x80000000 2932 emit_opcode(cbuf,0x75); // JNE around_slow_call 2933 emit_d8 (cbuf,0x07); // Size of slow_call 2934 // Push src onto stack slow-path 2935 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2936 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2937 // CALL directly to the runtime 2938 MacroAssembler _masm(&cbuf); 2939 cbuf.set_insts_mark(); 2940 emit_opcode(cbuf,0xE8); // Call into runtime 2941 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2942 __ post_call_nop(); 2943 // Carry on here... 2944 %} 2945 2946 enc_class DPR2L_encoding( regDPR src ) %{ 2947 emit_opcode(cbuf,0xD9); // FLDCW trunc 2948 emit_opcode(cbuf,0x2D); 2949 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2950 // Allocate a word 2951 emit_opcode(cbuf,0x83); // SUB ESP,8 2952 emit_opcode(cbuf,0xEC); 2953 emit_d8(cbuf,0x08); 2954 // Encoding assumes a double has been pushed into FPR0. 2955 // Store down the double as a long, popping the FPU stack 2956 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2957 emit_opcode(cbuf,0x3C); 2958 emit_d8(cbuf,0x24); 2959 // Restore the rounding mode; mask the exception 2960 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2961 emit_opcode(cbuf,0x2D); 2962 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2963 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2964 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2965 2966 // Load the converted int; adjust CPU stack 2967 emit_opcode(cbuf,0x58); // POP EAX 2968 emit_opcode(cbuf,0x5A); // POP EDX 2969 emit_opcode(cbuf,0x81); // CMP EDX,imm 2970 emit_d8 (cbuf,0xFA); // rdx 2971 emit_d32 (cbuf,0x80000000); // 0x80000000 2972 emit_opcode(cbuf,0x75); // JNE around_slow_call 2973 emit_d8 (cbuf,0x07+4); // Size of slow_call 2974 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2975 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2976 emit_opcode(cbuf,0x75); // JNE around_slow_call 2977 emit_d8 (cbuf,0x07); // Size of slow_call 2978 // Push src onto stack slow-path 2979 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2980 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2981 // CALL directly to the runtime 2982 MacroAssembler _masm(&cbuf); 2983 cbuf.set_insts_mark(); 2984 emit_opcode(cbuf,0xE8); // Call into runtime 2985 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2986 __ post_call_nop(); 2987 // Carry on here... 2988 %} 2989 2990 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2991 // Operand was loaded from memory into fp ST (stack top) 2992 // FMUL ST,$src /* D8 C8+i */ 2993 emit_opcode(cbuf, 0xD8); 2994 emit_opcode(cbuf, 0xC8 + $src1$$reg); 2995 %} 2996 2997 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2998 // FADDP ST,src2 /* D8 C0+i */ 2999 emit_opcode(cbuf, 0xD8); 3000 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3001 //could use FADDP src2,fpST /* DE C0+i */ 3002 %} 3003 3004 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3005 // FADDP src2,ST /* DE C0+i */ 3006 emit_opcode(cbuf, 0xDE); 3007 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3008 %} 3009 3010 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3011 // Operand has been loaded into fp ST (stack top) 3012 // FSUB ST,$src1 3013 emit_opcode(cbuf, 0xD8); 3014 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3015 3016 // FDIV 3017 emit_opcode(cbuf, 0xD8); 3018 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3019 %} 3020 3021 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3022 // Operand was loaded from memory into fp ST (stack top) 3023 // FADD ST,$src /* D8 C0+i */ 3024 emit_opcode(cbuf, 0xD8); 3025 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3026 3027 // FMUL ST,src2 /* D8 C*+i */ 3028 emit_opcode(cbuf, 0xD8); 3029 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3030 %} 3031 3032 3033 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3034 // Operand was loaded from memory into fp ST (stack top) 3035 // FADD ST,$src /* D8 C0+i */ 3036 emit_opcode(cbuf, 0xD8); 3037 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3038 3039 // FMULP src2,ST /* DE C8+i */ 3040 emit_opcode(cbuf, 0xDE); 3041 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3042 %} 3043 3044 // Atomically load the volatile long 3045 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3046 emit_opcode(cbuf,0xDF); 3047 int rm_byte_opcode = 0x05; 3048 int base = $mem$$base; 3049 int index = $mem$$index; 3050 int scale = $mem$$scale; 3051 int displace = $mem$$disp; 3052 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3053 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3054 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3055 %} 3056 3057 // Volatile Store Long. Must be atomic, so move it into 3058 // the FP TOS and then do a 64-bit FIST. Has to probe the 3059 // target address before the store (for null-ptr checks) 3060 // so the memory operand is used twice in the encoding. 3061 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3062 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3063 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3064 emit_opcode(cbuf,0xDF); 3065 int rm_byte_opcode = 0x07; 3066 int base = $mem$$base; 3067 int index = $mem$$index; 3068 int scale = $mem$$scale; 3069 int displace = $mem$$disp; 3070 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3071 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3072 %} 3073 3074 %} 3075 3076 3077 //----------FRAME-------------------------------------------------------------- 3078 // Definition of frame structure and management information. 3079 // 3080 // S T A C K L A Y O U T Allocators stack-slot number 3081 // | (to get allocators register number 3082 // G Owned by | | v add OptoReg::stack0()) 3083 // r CALLER | | 3084 // o | +--------+ pad to even-align allocators stack-slot 3085 // w V | pad0 | numbers; owned by CALLER 3086 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3087 // h ^ | in | 5 3088 // | | args | 4 Holes in incoming args owned by SELF 3089 // | | | | 3 3090 // | | +--------+ 3091 // V | | old out| Empty on Intel, window on Sparc 3092 // | old |preserve| Must be even aligned. 3093 // | SP-+--------+----> Matcher::_old_SP, even aligned 3094 // | | in | 3 area for Intel ret address 3095 // Owned by |preserve| Empty on Sparc. 3096 // SELF +--------+ 3097 // | | pad2 | 2 pad to align old SP 3098 // | +--------+ 1 3099 // | | locks | 0 3100 // | +--------+----> OptoReg::stack0(), even aligned 3101 // | | pad1 | 11 pad to align new SP 3102 // | +--------+ 3103 // | | | 10 3104 // | | spills | 9 spills 3105 // V | | 8 (pad0 slot for callee) 3106 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3107 // ^ | out | 7 3108 // | | args | 6 Holes in outgoing args owned by CALLEE 3109 // Owned by +--------+ 3110 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3111 // | new |preserve| Must be even-aligned. 3112 // | SP-+--------+----> Matcher::_new_SP, even aligned 3113 // | | | 3114 // 3115 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3116 // known from SELF's arguments and the Java calling convention. 3117 // Region 6-7 is determined per call site. 3118 // Note 2: If the calling convention leaves holes in the incoming argument 3119 // area, those holes are owned by SELF. Holes in the outgoing area 3120 // are owned by the CALLEE. Holes should not be necessary in the 3121 // incoming area, as the Java calling convention is completely under 3122 // the control of the AD file. Doubles can be sorted and packed to 3123 // avoid holes. Holes in the outgoing arguments may be necessary for 3124 // varargs C calling conventions. 3125 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3126 // even aligned with pad0 as needed. 3127 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3128 // region 6-11 is even aligned; it may be padded out more so that 3129 // the region from SP to FP meets the minimum stack alignment. 3130 3131 frame %{ 3132 // These three registers define part of the calling convention 3133 // between compiled code and the interpreter. 3134 inline_cache_reg(EAX); // Inline Cache Register 3135 3136 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3137 cisc_spilling_operand_name(indOffset32); 3138 3139 // Number of stack slots consumed by locking an object 3140 sync_stack_slots(1); 3141 3142 // Compiled code's Frame Pointer 3143 frame_pointer(ESP); 3144 // Interpreter stores its frame pointer in a register which is 3145 // stored to the stack by I2CAdaptors. 3146 // I2CAdaptors convert from interpreted java to compiled java. 3147 interpreter_frame_pointer(EBP); 3148 3149 // Stack alignment requirement 3150 // Alignment size in bytes (128-bit -> 16 bytes) 3151 stack_alignment(StackAlignmentInBytes); 3152 3153 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3154 // for calls to C. Supports the var-args backing area for register parms. 3155 varargs_C_out_slots_killed(0); 3156 3157 // The after-PROLOG location of the return address. Location of 3158 // return address specifies a type (REG or STACK) and a number 3159 // representing the register number (i.e. - use a register name) or 3160 // stack slot. 3161 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3162 // Otherwise, it is above the locks and verification slot and alignment word 3163 return_addr(STACK - 1 + 3164 align_up((Compile::current()->in_preserve_stack_slots() + 3165 Compile::current()->fixed_slots()), 3166 stack_alignment_in_slots())); 3167 3168 // Location of C & interpreter return values 3169 c_return_value %{ 3170 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3171 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3172 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3173 3174 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3175 // that C functions return float and double results in XMM0. 3176 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3177 return OptoRegPair(XMM0b_num,XMM0_num); 3178 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3179 return OptoRegPair(OptoReg::Bad,XMM0_num); 3180 3181 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3182 %} 3183 3184 // Location of return values 3185 return_value %{ 3186 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3187 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3188 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3189 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3190 return OptoRegPair(XMM0b_num,XMM0_num); 3191 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3192 return OptoRegPair(OptoReg::Bad,XMM0_num); 3193 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3194 %} 3195 3196 %} 3197 3198 //----------ATTRIBUTES--------------------------------------------------------- 3199 //----------Operand Attributes------------------------------------------------- 3200 op_attrib op_cost(0); // Required cost attribute 3201 3202 //----------Instruction Attributes--------------------------------------------- 3203 ins_attrib ins_cost(100); // Required cost attribute 3204 ins_attrib ins_size(8); // Required size attribute (in bits) 3205 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3206 // non-matching short branch variant of some 3207 // long branch? 3208 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3209 // specifies the alignment that some part of the instruction (not 3210 // necessarily the start) requires. If > 1, a compute_padding() 3211 // function must be provided for the instruction 3212 3213 //----------OPERANDS----------------------------------------------------------- 3214 // Operand definitions must precede instruction definitions for correct parsing 3215 // in the ADLC because operands constitute user defined types which are used in 3216 // instruction definitions. 3217 3218 //----------Simple Operands---------------------------------------------------- 3219 // Immediate Operands 3220 // Integer Immediate 3221 operand immI() %{ 3222 match(ConI); 3223 3224 op_cost(10); 3225 format %{ %} 3226 interface(CONST_INTER); 3227 %} 3228 3229 // Constant for test vs zero 3230 operand immI_0() %{ 3231 predicate(n->get_int() == 0); 3232 match(ConI); 3233 3234 op_cost(0); 3235 format %{ %} 3236 interface(CONST_INTER); 3237 %} 3238 3239 // Constant for increment 3240 operand immI_1() %{ 3241 predicate(n->get_int() == 1); 3242 match(ConI); 3243 3244 op_cost(0); 3245 format %{ %} 3246 interface(CONST_INTER); 3247 %} 3248 3249 // Constant for decrement 3250 operand immI_M1() %{ 3251 predicate(n->get_int() == -1); 3252 match(ConI); 3253 3254 op_cost(0); 3255 format %{ %} 3256 interface(CONST_INTER); 3257 %} 3258 3259 // Valid scale values for addressing modes 3260 operand immI2() %{ 3261 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3262 match(ConI); 3263 3264 format %{ %} 3265 interface(CONST_INTER); 3266 %} 3267 3268 operand immI8() %{ 3269 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3270 match(ConI); 3271 3272 op_cost(5); 3273 format %{ %} 3274 interface(CONST_INTER); 3275 %} 3276 3277 operand immU8() %{ 3278 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3279 match(ConI); 3280 3281 op_cost(5); 3282 format %{ %} 3283 interface(CONST_INTER); 3284 %} 3285 3286 operand immI16() %{ 3287 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3288 match(ConI); 3289 3290 op_cost(10); 3291 format %{ %} 3292 interface(CONST_INTER); 3293 %} 3294 3295 // Int Immediate non-negative 3296 operand immU31() 3297 %{ 3298 predicate(n->get_int() >= 0); 3299 match(ConI); 3300 3301 op_cost(0); 3302 format %{ %} 3303 interface(CONST_INTER); 3304 %} 3305 3306 // Constant for long shifts 3307 operand immI_32() %{ 3308 predicate( n->get_int() == 32 ); 3309 match(ConI); 3310 3311 op_cost(0); 3312 format %{ %} 3313 interface(CONST_INTER); 3314 %} 3315 3316 operand immI_1_31() %{ 3317 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3318 match(ConI); 3319 3320 op_cost(0); 3321 format %{ %} 3322 interface(CONST_INTER); 3323 %} 3324 3325 operand immI_32_63() %{ 3326 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3327 match(ConI); 3328 op_cost(0); 3329 3330 format %{ %} 3331 interface(CONST_INTER); 3332 %} 3333 3334 operand immI_2() %{ 3335 predicate( n->get_int() == 2 ); 3336 match(ConI); 3337 3338 op_cost(0); 3339 format %{ %} 3340 interface(CONST_INTER); 3341 %} 3342 3343 operand immI_3() %{ 3344 predicate( n->get_int() == 3 ); 3345 match(ConI); 3346 3347 op_cost(0); 3348 format %{ %} 3349 interface(CONST_INTER); 3350 %} 3351 3352 operand immI_4() 3353 %{ 3354 predicate(n->get_int() == 4); 3355 match(ConI); 3356 3357 op_cost(0); 3358 format %{ %} 3359 interface(CONST_INTER); 3360 %} 3361 3362 operand immI_8() 3363 %{ 3364 predicate(n->get_int() == 8); 3365 match(ConI); 3366 3367 op_cost(0); 3368 format %{ %} 3369 interface(CONST_INTER); 3370 %} 3371 3372 // Pointer Immediate 3373 operand immP() %{ 3374 match(ConP); 3375 3376 op_cost(10); 3377 format %{ %} 3378 interface(CONST_INTER); 3379 %} 3380 3381 // NULL Pointer Immediate 3382 operand immP0() %{ 3383 predicate( n->get_ptr() == 0 ); 3384 match(ConP); 3385 op_cost(0); 3386 3387 format %{ %} 3388 interface(CONST_INTER); 3389 %} 3390 3391 // Long Immediate 3392 operand immL() %{ 3393 match(ConL); 3394 3395 op_cost(20); 3396 format %{ %} 3397 interface(CONST_INTER); 3398 %} 3399 3400 // Long Immediate zero 3401 operand immL0() %{ 3402 predicate( n->get_long() == 0L ); 3403 match(ConL); 3404 op_cost(0); 3405 3406 format %{ %} 3407 interface(CONST_INTER); 3408 %} 3409 3410 // Long Immediate zero 3411 operand immL_M1() %{ 3412 predicate( n->get_long() == -1L ); 3413 match(ConL); 3414 op_cost(0); 3415 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 // Long immediate from 0 to 127. 3421 // Used for a shorter form of long mul by 10. 3422 operand immL_127() %{ 3423 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3424 match(ConL); 3425 op_cost(0); 3426 3427 format %{ %} 3428 interface(CONST_INTER); 3429 %} 3430 3431 // Long Immediate: low 32-bit mask 3432 operand immL_32bits() %{ 3433 predicate(n->get_long() == 0xFFFFFFFFL); 3434 match(ConL); 3435 op_cost(0); 3436 3437 format %{ %} 3438 interface(CONST_INTER); 3439 %} 3440 3441 // Long Immediate: low 32-bit mask 3442 operand immL32() %{ 3443 predicate(n->get_long() == (int)(n->get_long())); 3444 match(ConL); 3445 op_cost(20); 3446 3447 format %{ %} 3448 interface(CONST_INTER); 3449 %} 3450 3451 //Double Immediate zero 3452 operand immDPR0() %{ 3453 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3454 // bug that generates code such that NaNs compare equal to 0.0 3455 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3456 match(ConD); 3457 3458 op_cost(5); 3459 format %{ %} 3460 interface(CONST_INTER); 3461 %} 3462 3463 // Double Immediate one 3464 operand immDPR1() %{ 3465 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3466 match(ConD); 3467 3468 op_cost(5); 3469 format %{ %} 3470 interface(CONST_INTER); 3471 %} 3472 3473 // Double Immediate 3474 operand immDPR() %{ 3475 predicate(UseSSE<=1); 3476 match(ConD); 3477 3478 op_cost(5); 3479 format %{ %} 3480 interface(CONST_INTER); 3481 %} 3482 3483 operand immD() %{ 3484 predicate(UseSSE>=2); 3485 match(ConD); 3486 3487 op_cost(5); 3488 format %{ %} 3489 interface(CONST_INTER); 3490 %} 3491 3492 // Double Immediate zero 3493 operand immD0() %{ 3494 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3495 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3496 // compare equal to -0.0. 3497 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3498 match(ConD); 3499 3500 format %{ %} 3501 interface(CONST_INTER); 3502 %} 3503 3504 // Float Immediate zero 3505 operand immFPR0() %{ 3506 predicate(UseSSE == 0 && n->getf() == 0.0F); 3507 match(ConF); 3508 3509 op_cost(5); 3510 format %{ %} 3511 interface(CONST_INTER); 3512 %} 3513 3514 // Float Immediate one 3515 operand immFPR1() %{ 3516 predicate(UseSSE == 0 && n->getf() == 1.0F); 3517 match(ConF); 3518 3519 op_cost(5); 3520 format %{ %} 3521 interface(CONST_INTER); 3522 %} 3523 3524 // Float Immediate 3525 operand immFPR() %{ 3526 predicate( UseSSE == 0 ); 3527 match(ConF); 3528 3529 op_cost(5); 3530 format %{ %} 3531 interface(CONST_INTER); 3532 %} 3533 3534 // Float Immediate 3535 operand immF() %{ 3536 predicate(UseSSE >= 1); 3537 match(ConF); 3538 3539 op_cost(5); 3540 format %{ %} 3541 interface(CONST_INTER); 3542 %} 3543 3544 // Float Immediate zero. Zero and not -0.0 3545 operand immF0() %{ 3546 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3547 match(ConF); 3548 3549 op_cost(5); 3550 format %{ %} 3551 interface(CONST_INTER); 3552 %} 3553 3554 // Immediates for special shifts (sign extend) 3555 3556 // Constants for increment 3557 operand immI_16() %{ 3558 predicate( n->get_int() == 16 ); 3559 match(ConI); 3560 3561 format %{ %} 3562 interface(CONST_INTER); 3563 %} 3564 3565 operand immI_24() %{ 3566 predicate( n->get_int() == 24 ); 3567 match(ConI); 3568 3569 format %{ %} 3570 interface(CONST_INTER); 3571 %} 3572 3573 // Constant for byte-wide masking 3574 operand immI_255() %{ 3575 predicate( n->get_int() == 255 ); 3576 match(ConI); 3577 3578 format %{ %} 3579 interface(CONST_INTER); 3580 %} 3581 3582 // Constant for short-wide masking 3583 operand immI_65535() %{ 3584 predicate(n->get_int() == 65535); 3585 match(ConI); 3586 3587 format %{ %} 3588 interface(CONST_INTER); 3589 %} 3590 3591 operand kReg() 3592 %{ 3593 constraint(ALLOC_IN_RC(vectmask_reg)); 3594 match(RegVectMask); 3595 format %{%} 3596 interface(REG_INTER); 3597 %} 3598 3599 operand kReg_K1() 3600 %{ 3601 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3602 match(RegVectMask); 3603 format %{%} 3604 interface(REG_INTER); 3605 %} 3606 3607 operand kReg_K2() 3608 %{ 3609 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3610 match(RegVectMask); 3611 format %{%} 3612 interface(REG_INTER); 3613 %} 3614 3615 // Special Registers 3616 operand kReg_K3() 3617 %{ 3618 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3619 match(RegVectMask); 3620 format %{%} 3621 interface(REG_INTER); 3622 %} 3623 3624 operand kReg_K4() 3625 %{ 3626 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3627 match(RegVectMask); 3628 format %{%} 3629 interface(REG_INTER); 3630 %} 3631 3632 operand kReg_K5() 3633 %{ 3634 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3635 match(RegVectMask); 3636 format %{%} 3637 interface(REG_INTER); 3638 %} 3639 3640 operand kReg_K6() 3641 %{ 3642 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3643 match(RegVectMask); 3644 format %{%} 3645 interface(REG_INTER); 3646 %} 3647 3648 // Special Registers 3649 operand kReg_K7() 3650 %{ 3651 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3652 match(RegVectMask); 3653 format %{%} 3654 interface(REG_INTER); 3655 %} 3656 3657 // Register Operands 3658 // Integer Register 3659 operand rRegI() %{ 3660 constraint(ALLOC_IN_RC(int_reg)); 3661 match(RegI); 3662 match(xRegI); 3663 match(eAXRegI); 3664 match(eBXRegI); 3665 match(eCXRegI); 3666 match(eDXRegI); 3667 match(eDIRegI); 3668 match(eSIRegI); 3669 3670 format %{ %} 3671 interface(REG_INTER); 3672 %} 3673 3674 // Subset of Integer Register 3675 operand xRegI(rRegI reg) %{ 3676 constraint(ALLOC_IN_RC(int_x_reg)); 3677 match(reg); 3678 match(eAXRegI); 3679 match(eBXRegI); 3680 match(eCXRegI); 3681 match(eDXRegI); 3682 3683 format %{ %} 3684 interface(REG_INTER); 3685 %} 3686 3687 // Special Registers 3688 operand eAXRegI(xRegI reg) %{ 3689 constraint(ALLOC_IN_RC(eax_reg)); 3690 match(reg); 3691 match(rRegI); 3692 3693 format %{ "EAX" %} 3694 interface(REG_INTER); 3695 %} 3696 3697 // Special Registers 3698 operand eBXRegI(xRegI reg) %{ 3699 constraint(ALLOC_IN_RC(ebx_reg)); 3700 match(reg); 3701 match(rRegI); 3702 3703 format %{ "EBX" %} 3704 interface(REG_INTER); 3705 %} 3706 3707 operand eCXRegI(xRegI reg) %{ 3708 constraint(ALLOC_IN_RC(ecx_reg)); 3709 match(reg); 3710 match(rRegI); 3711 3712 format %{ "ECX" %} 3713 interface(REG_INTER); 3714 %} 3715 3716 operand eDXRegI(xRegI reg) %{ 3717 constraint(ALLOC_IN_RC(edx_reg)); 3718 match(reg); 3719 match(rRegI); 3720 3721 format %{ "EDX" %} 3722 interface(REG_INTER); 3723 %} 3724 3725 operand eDIRegI(xRegI reg) %{ 3726 constraint(ALLOC_IN_RC(edi_reg)); 3727 match(reg); 3728 match(rRegI); 3729 3730 format %{ "EDI" %} 3731 interface(REG_INTER); 3732 %} 3733 3734 operand naxRegI() %{ 3735 constraint(ALLOC_IN_RC(nax_reg)); 3736 match(RegI); 3737 match(eCXRegI); 3738 match(eDXRegI); 3739 match(eSIRegI); 3740 match(eDIRegI); 3741 3742 format %{ %} 3743 interface(REG_INTER); 3744 %} 3745 3746 operand nadxRegI() %{ 3747 constraint(ALLOC_IN_RC(nadx_reg)); 3748 match(RegI); 3749 match(eBXRegI); 3750 match(eCXRegI); 3751 match(eSIRegI); 3752 match(eDIRegI); 3753 3754 format %{ %} 3755 interface(REG_INTER); 3756 %} 3757 3758 operand ncxRegI() %{ 3759 constraint(ALLOC_IN_RC(ncx_reg)); 3760 match(RegI); 3761 match(eAXRegI); 3762 match(eDXRegI); 3763 match(eSIRegI); 3764 match(eDIRegI); 3765 3766 format %{ %} 3767 interface(REG_INTER); 3768 %} 3769 3770 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3771 // // 3772 operand eSIRegI(xRegI reg) %{ 3773 constraint(ALLOC_IN_RC(esi_reg)); 3774 match(reg); 3775 match(rRegI); 3776 3777 format %{ "ESI" %} 3778 interface(REG_INTER); 3779 %} 3780 3781 // Pointer Register 3782 operand anyRegP() %{ 3783 constraint(ALLOC_IN_RC(any_reg)); 3784 match(RegP); 3785 match(eAXRegP); 3786 match(eBXRegP); 3787 match(eCXRegP); 3788 match(eDIRegP); 3789 match(eRegP); 3790 3791 format %{ %} 3792 interface(REG_INTER); 3793 %} 3794 3795 operand eRegP() %{ 3796 constraint(ALLOC_IN_RC(int_reg)); 3797 match(RegP); 3798 match(eAXRegP); 3799 match(eBXRegP); 3800 match(eCXRegP); 3801 match(eDIRegP); 3802 3803 format %{ %} 3804 interface(REG_INTER); 3805 %} 3806 3807 operand rRegP() %{ 3808 constraint(ALLOC_IN_RC(int_reg)); 3809 match(RegP); 3810 match(eAXRegP); 3811 match(eBXRegP); 3812 match(eCXRegP); 3813 match(eDIRegP); 3814 3815 format %{ %} 3816 interface(REG_INTER); 3817 %} 3818 3819 // On windows95, EBP is not safe to use for implicit null tests. 3820 operand eRegP_no_EBP() %{ 3821 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3822 match(RegP); 3823 match(eAXRegP); 3824 match(eBXRegP); 3825 match(eCXRegP); 3826 match(eDIRegP); 3827 3828 op_cost(100); 3829 format %{ %} 3830 interface(REG_INTER); 3831 %} 3832 3833 operand naxRegP() %{ 3834 constraint(ALLOC_IN_RC(nax_reg)); 3835 match(RegP); 3836 match(eBXRegP); 3837 match(eDXRegP); 3838 match(eCXRegP); 3839 match(eSIRegP); 3840 match(eDIRegP); 3841 3842 format %{ %} 3843 interface(REG_INTER); 3844 %} 3845 3846 operand nabxRegP() %{ 3847 constraint(ALLOC_IN_RC(nabx_reg)); 3848 match(RegP); 3849 match(eCXRegP); 3850 match(eDXRegP); 3851 match(eSIRegP); 3852 match(eDIRegP); 3853 3854 format %{ %} 3855 interface(REG_INTER); 3856 %} 3857 3858 operand pRegP() %{ 3859 constraint(ALLOC_IN_RC(p_reg)); 3860 match(RegP); 3861 match(eBXRegP); 3862 match(eDXRegP); 3863 match(eSIRegP); 3864 match(eDIRegP); 3865 3866 format %{ %} 3867 interface(REG_INTER); 3868 %} 3869 3870 // Special Registers 3871 // Return a pointer value 3872 operand eAXRegP(eRegP reg) %{ 3873 constraint(ALLOC_IN_RC(eax_reg)); 3874 match(reg); 3875 format %{ "EAX" %} 3876 interface(REG_INTER); 3877 %} 3878 3879 // Used in AtomicAdd 3880 operand eBXRegP(eRegP reg) %{ 3881 constraint(ALLOC_IN_RC(ebx_reg)); 3882 match(reg); 3883 format %{ "EBX" %} 3884 interface(REG_INTER); 3885 %} 3886 3887 // Tail-call (interprocedural jump) to interpreter 3888 operand eCXRegP(eRegP reg) %{ 3889 constraint(ALLOC_IN_RC(ecx_reg)); 3890 match(reg); 3891 format %{ "ECX" %} 3892 interface(REG_INTER); 3893 %} 3894 3895 operand eDXRegP(eRegP reg) %{ 3896 constraint(ALLOC_IN_RC(edx_reg)); 3897 match(reg); 3898 format %{ "EDX" %} 3899 interface(REG_INTER); 3900 %} 3901 3902 operand eSIRegP(eRegP reg) %{ 3903 constraint(ALLOC_IN_RC(esi_reg)); 3904 match(reg); 3905 format %{ "ESI" %} 3906 interface(REG_INTER); 3907 %} 3908 3909 // Used in rep stosw 3910 operand eDIRegP(eRegP reg) %{ 3911 constraint(ALLOC_IN_RC(edi_reg)); 3912 match(reg); 3913 format %{ "EDI" %} 3914 interface(REG_INTER); 3915 %} 3916 3917 operand eRegL() %{ 3918 constraint(ALLOC_IN_RC(long_reg)); 3919 match(RegL); 3920 match(eADXRegL); 3921 3922 format %{ %} 3923 interface(REG_INTER); 3924 %} 3925 3926 operand eADXRegL( eRegL reg ) %{ 3927 constraint(ALLOC_IN_RC(eadx_reg)); 3928 match(reg); 3929 3930 format %{ "EDX:EAX" %} 3931 interface(REG_INTER); 3932 %} 3933 3934 operand eBCXRegL( eRegL reg ) %{ 3935 constraint(ALLOC_IN_RC(ebcx_reg)); 3936 match(reg); 3937 3938 format %{ "EBX:ECX" %} 3939 interface(REG_INTER); 3940 %} 3941 3942 operand eBDPRegL( eRegL reg ) %{ 3943 constraint(ALLOC_IN_RC(ebpd_reg)); 3944 match(reg); 3945 3946 format %{ "EBP:EDI" %} 3947 interface(REG_INTER); 3948 %} 3949 // Special case for integer high multiply 3950 operand eADXRegL_low_only() %{ 3951 constraint(ALLOC_IN_RC(eadx_reg)); 3952 match(RegL); 3953 3954 format %{ "EAX" %} 3955 interface(REG_INTER); 3956 %} 3957 3958 // Flags register, used as output of compare instructions 3959 operand rFlagsReg() %{ 3960 constraint(ALLOC_IN_RC(int_flags)); 3961 match(RegFlags); 3962 3963 format %{ "EFLAGS" %} 3964 interface(REG_INTER); 3965 %} 3966 3967 // Flags register, used as output of compare instructions 3968 operand eFlagsReg() %{ 3969 constraint(ALLOC_IN_RC(int_flags)); 3970 match(RegFlags); 3971 3972 format %{ "EFLAGS" %} 3973 interface(REG_INTER); 3974 %} 3975 3976 // Flags register, used as output of FLOATING POINT compare instructions 3977 operand eFlagsRegU() %{ 3978 constraint(ALLOC_IN_RC(int_flags)); 3979 match(RegFlags); 3980 3981 format %{ "EFLAGS_U" %} 3982 interface(REG_INTER); 3983 %} 3984 3985 operand eFlagsRegUCF() %{ 3986 constraint(ALLOC_IN_RC(int_flags)); 3987 match(RegFlags); 3988 predicate(false); 3989 3990 format %{ "EFLAGS_U_CF" %} 3991 interface(REG_INTER); 3992 %} 3993 3994 // Condition Code Register used by long compare 3995 operand flagsReg_long_LTGE() %{ 3996 constraint(ALLOC_IN_RC(int_flags)); 3997 match(RegFlags); 3998 format %{ "FLAGS_LTGE" %} 3999 interface(REG_INTER); 4000 %} 4001 operand flagsReg_long_EQNE() %{ 4002 constraint(ALLOC_IN_RC(int_flags)); 4003 match(RegFlags); 4004 format %{ "FLAGS_EQNE" %} 4005 interface(REG_INTER); 4006 %} 4007 operand flagsReg_long_LEGT() %{ 4008 constraint(ALLOC_IN_RC(int_flags)); 4009 match(RegFlags); 4010 format %{ "FLAGS_LEGT" %} 4011 interface(REG_INTER); 4012 %} 4013 4014 // Condition Code Register used by unsigned long compare 4015 operand flagsReg_ulong_LTGE() %{ 4016 constraint(ALLOC_IN_RC(int_flags)); 4017 match(RegFlags); 4018 format %{ "FLAGS_U_LTGE" %} 4019 interface(REG_INTER); 4020 %} 4021 operand flagsReg_ulong_EQNE() %{ 4022 constraint(ALLOC_IN_RC(int_flags)); 4023 match(RegFlags); 4024 format %{ "FLAGS_U_EQNE" %} 4025 interface(REG_INTER); 4026 %} 4027 operand flagsReg_ulong_LEGT() %{ 4028 constraint(ALLOC_IN_RC(int_flags)); 4029 match(RegFlags); 4030 format %{ "FLAGS_U_LEGT" %} 4031 interface(REG_INTER); 4032 %} 4033 4034 // Float register operands 4035 operand regDPR() %{ 4036 predicate( UseSSE < 2 ); 4037 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4038 match(RegD); 4039 match(regDPR1); 4040 match(regDPR2); 4041 format %{ %} 4042 interface(REG_INTER); 4043 %} 4044 4045 operand regDPR1(regDPR reg) %{ 4046 predicate( UseSSE < 2 ); 4047 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4048 match(reg); 4049 format %{ "FPR1" %} 4050 interface(REG_INTER); 4051 %} 4052 4053 operand regDPR2(regDPR reg) %{ 4054 predicate( UseSSE < 2 ); 4055 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4056 match(reg); 4057 format %{ "FPR2" %} 4058 interface(REG_INTER); 4059 %} 4060 4061 operand regnotDPR1(regDPR reg) %{ 4062 predicate( UseSSE < 2 ); 4063 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4064 match(reg); 4065 format %{ %} 4066 interface(REG_INTER); 4067 %} 4068 4069 // Float register operands 4070 operand regFPR() %{ 4071 predicate( UseSSE < 2 ); 4072 constraint(ALLOC_IN_RC(fp_flt_reg)); 4073 match(RegF); 4074 match(regFPR1); 4075 format %{ %} 4076 interface(REG_INTER); 4077 %} 4078 4079 // Float register operands 4080 operand regFPR1(regFPR reg) %{ 4081 predicate( UseSSE < 2 ); 4082 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4083 match(reg); 4084 format %{ "FPR1" %} 4085 interface(REG_INTER); 4086 %} 4087 4088 // XMM Float register operands 4089 operand regF() %{ 4090 predicate( UseSSE>=1 ); 4091 constraint(ALLOC_IN_RC(float_reg_legacy)); 4092 match(RegF); 4093 format %{ %} 4094 interface(REG_INTER); 4095 %} 4096 4097 operand legRegF() %{ 4098 predicate( UseSSE>=1 ); 4099 constraint(ALLOC_IN_RC(float_reg_legacy)); 4100 match(RegF); 4101 format %{ %} 4102 interface(REG_INTER); 4103 %} 4104 4105 // Float register operands 4106 operand vlRegF() %{ 4107 constraint(ALLOC_IN_RC(float_reg_vl)); 4108 match(RegF); 4109 4110 format %{ %} 4111 interface(REG_INTER); 4112 %} 4113 4114 // XMM Double register operands 4115 operand regD() %{ 4116 predicate( UseSSE>=2 ); 4117 constraint(ALLOC_IN_RC(double_reg_legacy)); 4118 match(RegD); 4119 format %{ %} 4120 interface(REG_INTER); 4121 %} 4122 4123 // Double register operands 4124 operand legRegD() %{ 4125 predicate( UseSSE>=2 ); 4126 constraint(ALLOC_IN_RC(double_reg_legacy)); 4127 match(RegD); 4128 format %{ %} 4129 interface(REG_INTER); 4130 %} 4131 4132 operand vlRegD() %{ 4133 constraint(ALLOC_IN_RC(double_reg_vl)); 4134 match(RegD); 4135 4136 format %{ %} 4137 interface(REG_INTER); 4138 %} 4139 4140 //----------Memory Operands---------------------------------------------------- 4141 // Direct Memory Operand 4142 operand direct(immP addr) %{ 4143 match(addr); 4144 4145 format %{ "[$addr]" %} 4146 interface(MEMORY_INTER) %{ 4147 base(0xFFFFFFFF); 4148 index(0x4); 4149 scale(0x0); 4150 disp($addr); 4151 %} 4152 %} 4153 4154 // Indirect Memory Operand 4155 operand indirect(eRegP reg) %{ 4156 constraint(ALLOC_IN_RC(int_reg)); 4157 match(reg); 4158 4159 format %{ "[$reg]" %} 4160 interface(MEMORY_INTER) %{ 4161 base($reg); 4162 index(0x4); 4163 scale(0x0); 4164 disp(0x0); 4165 %} 4166 %} 4167 4168 // Indirect Memory Plus Short Offset Operand 4169 operand indOffset8(eRegP reg, immI8 off) %{ 4170 match(AddP reg off); 4171 4172 format %{ "[$reg + $off]" %} 4173 interface(MEMORY_INTER) %{ 4174 base($reg); 4175 index(0x4); 4176 scale(0x0); 4177 disp($off); 4178 %} 4179 %} 4180 4181 // Indirect Memory Plus Long Offset Operand 4182 operand indOffset32(eRegP reg, immI off) %{ 4183 match(AddP reg off); 4184 4185 format %{ "[$reg + $off]" %} 4186 interface(MEMORY_INTER) %{ 4187 base($reg); 4188 index(0x4); 4189 scale(0x0); 4190 disp($off); 4191 %} 4192 %} 4193 4194 // Indirect Memory Plus Long Offset Operand 4195 operand indOffset32X(rRegI reg, immP off) %{ 4196 match(AddP off reg); 4197 4198 format %{ "[$reg + $off]" %} 4199 interface(MEMORY_INTER) %{ 4200 base($reg); 4201 index(0x4); 4202 scale(0x0); 4203 disp($off); 4204 %} 4205 %} 4206 4207 // Indirect Memory Plus Index Register Plus Offset Operand 4208 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4209 match(AddP (AddP reg ireg) off); 4210 4211 op_cost(10); 4212 format %{"[$reg + $off + $ireg]" %} 4213 interface(MEMORY_INTER) %{ 4214 base($reg); 4215 index($ireg); 4216 scale(0x0); 4217 disp($off); 4218 %} 4219 %} 4220 4221 // Indirect Memory Plus Index Register Plus Offset Operand 4222 operand indIndex(eRegP reg, rRegI ireg) %{ 4223 match(AddP reg ireg); 4224 4225 op_cost(10); 4226 format %{"[$reg + $ireg]" %} 4227 interface(MEMORY_INTER) %{ 4228 base($reg); 4229 index($ireg); 4230 scale(0x0); 4231 disp(0x0); 4232 %} 4233 %} 4234 4235 // // ------------------------------------------------------------------------- 4236 // // 486 architecture doesn't support "scale * index + offset" with out a base 4237 // // ------------------------------------------------------------------------- 4238 // // Scaled Memory Operands 4239 // // Indirect Memory Times Scale Plus Offset Operand 4240 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4241 // match(AddP off (LShiftI ireg scale)); 4242 // 4243 // op_cost(10); 4244 // format %{"[$off + $ireg << $scale]" %} 4245 // interface(MEMORY_INTER) %{ 4246 // base(0x4); 4247 // index($ireg); 4248 // scale($scale); 4249 // disp($off); 4250 // %} 4251 // %} 4252 4253 // Indirect Memory Times Scale Plus Index Register 4254 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4255 match(AddP reg (LShiftI ireg scale)); 4256 4257 op_cost(10); 4258 format %{"[$reg + $ireg << $scale]" %} 4259 interface(MEMORY_INTER) %{ 4260 base($reg); 4261 index($ireg); 4262 scale($scale); 4263 disp(0x0); 4264 %} 4265 %} 4266 4267 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4268 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4269 match(AddP (AddP reg (LShiftI ireg scale)) off); 4270 4271 op_cost(10); 4272 format %{"[$reg + $off + $ireg << $scale]" %} 4273 interface(MEMORY_INTER) %{ 4274 base($reg); 4275 index($ireg); 4276 scale($scale); 4277 disp($off); 4278 %} 4279 %} 4280 4281 //----------Load Long Memory Operands------------------------------------------ 4282 // The load-long idiom will use it's address expression again after loading 4283 // the first word of the long. If the load-long destination overlaps with 4284 // registers used in the addressing expression, the 2nd half will be loaded 4285 // from a clobbered address. Fix this by requiring that load-long use 4286 // address registers that do not overlap with the load-long target. 4287 4288 // load-long support 4289 operand load_long_RegP() %{ 4290 constraint(ALLOC_IN_RC(esi_reg)); 4291 match(RegP); 4292 match(eSIRegP); 4293 op_cost(100); 4294 format %{ %} 4295 interface(REG_INTER); 4296 %} 4297 4298 // Indirect Memory Operand Long 4299 operand load_long_indirect(load_long_RegP reg) %{ 4300 constraint(ALLOC_IN_RC(esi_reg)); 4301 match(reg); 4302 4303 format %{ "[$reg]" %} 4304 interface(MEMORY_INTER) %{ 4305 base($reg); 4306 index(0x4); 4307 scale(0x0); 4308 disp(0x0); 4309 %} 4310 %} 4311 4312 // Indirect Memory Plus Long Offset Operand 4313 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4314 match(AddP reg off); 4315 4316 format %{ "[$reg + $off]" %} 4317 interface(MEMORY_INTER) %{ 4318 base($reg); 4319 index(0x4); 4320 scale(0x0); 4321 disp($off); 4322 %} 4323 %} 4324 4325 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4326 4327 4328 //----------Special Memory Operands-------------------------------------------- 4329 // Stack Slot Operand - This operand is used for loading and storing temporary 4330 // values on the stack where a match requires a value to 4331 // flow through memory. 4332 operand stackSlotP(sRegP reg) %{ 4333 constraint(ALLOC_IN_RC(stack_slots)); 4334 // No match rule because this operand is only generated in matching 4335 format %{ "[$reg]" %} 4336 interface(MEMORY_INTER) %{ 4337 base(0x4); // ESP 4338 index(0x4); // No Index 4339 scale(0x0); // No Scale 4340 disp($reg); // Stack Offset 4341 %} 4342 %} 4343 4344 operand stackSlotI(sRegI reg) %{ 4345 constraint(ALLOC_IN_RC(stack_slots)); 4346 // No match rule because this operand is only generated in matching 4347 format %{ "[$reg]" %} 4348 interface(MEMORY_INTER) %{ 4349 base(0x4); // ESP 4350 index(0x4); // No Index 4351 scale(0x0); // No Scale 4352 disp($reg); // Stack Offset 4353 %} 4354 %} 4355 4356 operand stackSlotF(sRegF reg) %{ 4357 constraint(ALLOC_IN_RC(stack_slots)); 4358 // No match rule because this operand is only generated in matching 4359 format %{ "[$reg]" %} 4360 interface(MEMORY_INTER) %{ 4361 base(0x4); // ESP 4362 index(0x4); // No Index 4363 scale(0x0); // No Scale 4364 disp($reg); // Stack Offset 4365 %} 4366 %} 4367 4368 operand stackSlotD(sRegD reg) %{ 4369 constraint(ALLOC_IN_RC(stack_slots)); 4370 // No match rule because this operand is only generated in matching 4371 format %{ "[$reg]" %} 4372 interface(MEMORY_INTER) %{ 4373 base(0x4); // ESP 4374 index(0x4); // No Index 4375 scale(0x0); // No Scale 4376 disp($reg); // Stack Offset 4377 %} 4378 %} 4379 4380 operand stackSlotL(sRegL reg) %{ 4381 constraint(ALLOC_IN_RC(stack_slots)); 4382 // No match rule because this operand is only generated in matching 4383 format %{ "[$reg]" %} 4384 interface(MEMORY_INTER) %{ 4385 base(0x4); // ESP 4386 index(0x4); // No Index 4387 scale(0x0); // No Scale 4388 disp($reg); // Stack Offset 4389 %} 4390 %} 4391 4392 //----------Conditional Branch Operands---------------------------------------- 4393 // Comparison Op - This is the operation of the comparison, and is limited to 4394 // the following set of codes: 4395 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4396 // 4397 // Other attributes of the comparison, such as unsignedness, are specified 4398 // by the comparison instruction that sets a condition code flags register. 4399 // That result is represented by a flags operand whose subtype is appropriate 4400 // to the unsignedness (etc.) of the comparison. 4401 // 4402 // Later, the instruction which matches both the Comparison Op (a Bool) and 4403 // the flags (produced by the Cmp) specifies the coding of the comparison op 4404 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4405 4406 // Comparison Code 4407 operand cmpOp() %{ 4408 match(Bool); 4409 4410 format %{ "" %} 4411 interface(COND_INTER) %{ 4412 equal(0x4, "e"); 4413 not_equal(0x5, "ne"); 4414 less(0xC, "l"); 4415 greater_equal(0xD, "ge"); 4416 less_equal(0xE, "le"); 4417 greater(0xF, "g"); 4418 overflow(0x0, "o"); 4419 no_overflow(0x1, "no"); 4420 %} 4421 %} 4422 4423 // Comparison Code, unsigned compare. Used by FP also, with 4424 // C2 (unordered) turned into GT or LT already. The other bits 4425 // C0 and C3 are turned into Carry & Zero flags. 4426 operand cmpOpU() %{ 4427 match(Bool); 4428 4429 format %{ "" %} 4430 interface(COND_INTER) %{ 4431 equal(0x4, "e"); 4432 not_equal(0x5, "ne"); 4433 less(0x2, "b"); 4434 greater_equal(0x3, "nb"); 4435 less_equal(0x6, "be"); 4436 greater(0x7, "nbe"); 4437 overflow(0x0, "o"); 4438 no_overflow(0x1, "no"); 4439 %} 4440 %} 4441 4442 // Floating comparisons that don't require any fixup for the unordered case 4443 operand cmpOpUCF() %{ 4444 match(Bool); 4445 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4446 n->as_Bool()->_test._test == BoolTest::ge || 4447 n->as_Bool()->_test._test == BoolTest::le || 4448 n->as_Bool()->_test._test == BoolTest::gt); 4449 format %{ "" %} 4450 interface(COND_INTER) %{ 4451 equal(0x4, "e"); 4452 not_equal(0x5, "ne"); 4453 less(0x2, "b"); 4454 greater_equal(0x3, "nb"); 4455 less_equal(0x6, "be"); 4456 greater(0x7, "nbe"); 4457 overflow(0x0, "o"); 4458 no_overflow(0x1, "no"); 4459 %} 4460 %} 4461 4462 4463 // Floating comparisons that can be fixed up with extra conditional jumps 4464 operand cmpOpUCF2() %{ 4465 match(Bool); 4466 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4467 n->as_Bool()->_test._test == BoolTest::eq); 4468 format %{ "" %} 4469 interface(COND_INTER) %{ 4470 equal(0x4, "e"); 4471 not_equal(0x5, "ne"); 4472 less(0x2, "b"); 4473 greater_equal(0x3, "nb"); 4474 less_equal(0x6, "be"); 4475 greater(0x7, "nbe"); 4476 overflow(0x0, "o"); 4477 no_overflow(0x1, "no"); 4478 %} 4479 %} 4480 4481 // Comparison Code for FP conditional move 4482 operand cmpOp_fcmov() %{ 4483 match(Bool); 4484 4485 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4486 n->as_Bool()->_test._test != BoolTest::no_overflow); 4487 format %{ "" %} 4488 interface(COND_INTER) %{ 4489 equal (0x0C8); 4490 not_equal (0x1C8); 4491 less (0x0C0); 4492 greater_equal(0x1C0); 4493 less_equal (0x0D0); 4494 greater (0x1D0); 4495 overflow(0x0, "o"); // not really supported by the instruction 4496 no_overflow(0x1, "no"); // not really supported by the instruction 4497 %} 4498 %} 4499 4500 // Comparison Code used in long compares 4501 operand cmpOp_commute() %{ 4502 match(Bool); 4503 4504 format %{ "" %} 4505 interface(COND_INTER) %{ 4506 equal(0x4, "e"); 4507 not_equal(0x5, "ne"); 4508 less(0xF, "g"); 4509 greater_equal(0xE, "le"); 4510 less_equal(0xD, "ge"); 4511 greater(0xC, "l"); 4512 overflow(0x0, "o"); 4513 no_overflow(0x1, "no"); 4514 %} 4515 %} 4516 4517 // Comparison Code used in unsigned long compares 4518 operand cmpOpU_commute() %{ 4519 match(Bool); 4520 4521 format %{ "" %} 4522 interface(COND_INTER) %{ 4523 equal(0x4, "e"); 4524 not_equal(0x5, "ne"); 4525 less(0x7, "nbe"); 4526 greater_equal(0x6, "be"); 4527 less_equal(0x3, "nb"); 4528 greater(0x2, "b"); 4529 overflow(0x0, "o"); 4530 no_overflow(0x1, "no"); 4531 %} 4532 %} 4533 4534 //----------OPERAND CLASSES---------------------------------------------------- 4535 // Operand Classes are groups of operands that are used as to simplify 4536 // instruction definitions by not requiring the AD writer to specify separate 4537 // instructions for every form of operand when the instruction accepts 4538 // multiple operand types with the same basic encoding and format. The classic 4539 // case of this is memory operands. 4540 4541 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4542 indIndex, indIndexScale, indIndexScaleOffset); 4543 4544 // Long memory operations are encoded in 2 instructions and a +4 offset. 4545 // This means some kind of offset is always required and you cannot use 4546 // an oop as the offset (done when working on static globals). 4547 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4548 indIndex, indIndexScale, indIndexScaleOffset); 4549 4550 4551 //----------PIPELINE----------------------------------------------------------- 4552 // Rules which define the behavior of the target architectures pipeline. 4553 pipeline %{ 4554 4555 //----------ATTRIBUTES--------------------------------------------------------- 4556 attributes %{ 4557 variable_size_instructions; // Fixed size instructions 4558 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4559 instruction_unit_size = 1; // An instruction is 1 bytes long 4560 instruction_fetch_unit_size = 16; // The processor fetches one line 4561 instruction_fetch_units = 1; // of 16 bytes 4562 4563 // List of nop instructions 4564 nops( MachNop ); 4565 %} 4566 4567 //----------RESOURCES---------------------------------------------------------- 4568 // Resources are the functional units available to the machine 4569 4570 // Generic P2/P3 pipeline 4571 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4572 // 3 instructions decoded per cycle. 4573 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4574 // 2 ALU op, only ALU0 handles mul/div instructions. 4575 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4576 MS0, MS1, MEM = MS0 | MS1, 4577 BR, FPU, 4578 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4579 4580 //----------PIPELINE DESCRIPTION----------------------------------------------- 4581 // Pipeline Description specifies the stages in the machine's pipeline 4582 4583 // Generic P2/P3 pipeline 4584 pipe_desc(S0, S1, S2, S3, S4, S5); 4585 4586 //----------PIPELINE CLASSES--------------------------------------------------- 4587 // Pipeline Classes describe the stages in which input and output are 4588 // referenced by the hardware pipeline. 4589 4590 // Naming convention: ialu or fpu 4591 // Then: _reg 4592 // Then: _reg if there is a 2nd register 4593 // Then: _long if it's a pair of instructions implementing a long 4594 // Then: _fat if it requires the big decoder 4595 // Or: _mem if it requires the big decoder and a memory unit. 4596 4597 // Integer ALU reg operation 4598 pipe_class ialu_reg(rRegI dst) %{ 4599 single_instruction; 4600 dst : S4(write); 4601 dst : S3(read); 4602 DECODE : S0; // any decoder 4603 ALU : S3; // any alu 4604 %} 4605 4606 // Long ALU reg operation 4607 pipe_class ialu_reg_long(eRegL dst) %{ 4608 instruction_count(2); 4609 dst : S4(write); 4610 dst : S3(read); 4611 DECODE : S0(2); // any 2 decoders 4612 ALU : S3(2); // both alus 4613 %} 4614 4615 // Integer ALU reg operation using big decoder 4616 pipe_class ialu_reg_fat(rRegI dst) %{ 4617 single_instruction; 4618 dst : S4(write); 4619 dst : S3(read); 4620 D0 : S0; // big decoder only 4621 ALU : S3; // any alu 4622 %} 4623 4624 // Long ALU reg operation using big decoder 4625 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4626 instruction_count(2); 4627 dst : S4(write); 4628 dst : S3(read); 4629 D0 : S0(2); // big decoder only; twice 4630 ALU : S3(2); // any 2 alus 4631 %} 4632 4633 // Integer ALU reg-reg operation 4634 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4635 single_instruction; 4636 dst : S4(write); 4637 src : S3(read); 4638 DECODE : S0; // any decoder 4639 ALU : S3; // any alu 4640 %} 4641 4642 // Long ALU reg-reg operation 4643 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4644 instruction_count(2); 4645 dst : S4(write); 4646 src : S3(read); 4647 DECODE : S0(2); // any 2 decoders 4648 ALU : S3(2); // both alus 4649 %} 4650 4651 // Integer ALU reg-reg operation 4652 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4653 single_instruction; 4654 dst : S4(write); 4655 src : S3(read); 4656 D0 : S0; // big decoder only 4657 ALU : S3; // any alu 4658 %} 4659 4660 // Long ALU reg-reg operation 4661 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4662 instruction_count(2); 4663 dst : S4(write); 4664 src : S3(read); 4665 D0 : S0(2); // big decoder only; twice 4666 ALU : S3(2); // both alus 4667 %} 4668 4669 // Integer ALU reg-mem operation 4670 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4671 single_instruction; 4672 dst : S5(write); 4673 mem : S3(read); 4674 D0 : S0; // big decoder only 4675 ALU : S4; // any alu 4676 MEM : S3; // any mem 4677 %} 4678 4679 // Long ALU reg-mem operation 4680 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4681 instruction_count(2); 4682 dst : S5(write); 4683 mem : S3(read); 4684 D0 : S0(2); // big decoder only; twice 4685 ALU : S4(2); // any 2 alus 4686 MEM : S3(2); // both mems 4687 %} 4688 4689 // Integer mem operation (prefetch) 4690 pipe_class ialu_mem(memory mem) 4691 %{ 4692 single_instruction; 4693 mem : S3(read); 4694 D0 : S0; // big decoder only 4695 MEM : S3; // any mem 4696 %} 4697 4698 // Integer Store to Memory 4699 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4700 single_instruction; 4701 mem : S3(read); 4702 src : S5(read); 4703 D0 : S0; // big decoder only 4704 ALU : S4; // any alu 4705 MEM : S3; 4706 %} 4707 4708 // Long Store to Memory 4709 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4710 instruction_count(2); 4711 mem : S3(read); 4712 src : S5(read); 4713 D0 : S0(2); // big decoder only; twice 4714 ALU : S4(2); // any 2 alus 4715 MEM : S3(2); // Both mems 4716 %} 4717 4718 // Integer Store to Memory 4719 pipe_class ialu_mem_imm(memory mem) %{ 4720 single_instruction; 4721 mem : S3(read); 4722 D0 : S0; // big decoder only 4723 ALU : S4; // any alu 4724 MEM : S3; 4725 %} 4726 4727 // Integer ALU0 reg-reg operation 4728 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4729 single_instruction; 4730 dst : S4(write); 4731 src : S3(read); 4732 D0 : S0; // Big decoder only 4733 ALU0 : S3; // only alu0 4734 %} 4735 4736 // Integer ALU0 reg-mem operation 4737 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4738 single_instruction; 4739 dst : S5(write); 4740 mem : S3(read); 4741 D0 : S0; // big decoder only 4742 ALU0 : S4; // ALU0 only 4743 MEM : S3; // any mem 4744 %} 4745 4746 // Integer ALU reg-reg operation 4747 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4748 single_instruction; 4749 cr : S4(write); 4750 src1 : S3(read); 4751 src2 : S3(read); 4752 DECODE : S0; // any decoder 4753 ALU : S3; // any alu 4754 %} 4755 4756 // Integer ALU reg-imm operation 4757 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4758 single_instruction; 4759 cr : S4(write); 4760 src1 : S3(read); 4761 DECODE : S0; // any decoder 4762 ALU : S3; // any alu 4763 %} 4764 4765 // Integer ALU reg-mem operation 4766 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4767 single_instruction; 4768 cr : S4(write); 4769 src1 : S3(read); 4770 src2 : S3(read); 4771 D0 : S0; // big decoder only 4772 ALU : S4; // any alu 4773 MEM : S3; 4774 %} 4775 4776 // Conditional move reg-reg 4777 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4778 instruction_count(4); 4779 y : S4(read); 4780 q : S3(read); 4781 p : S3(read); 4782 DECODE : S0(4); // any decoder 4783 %} 4784 4785 // Conditional move reg-reg 4786 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4787 single_instruction; 4788 dst : S4(write); 4789 src : S3(read); 4790 cr : S3(read); 4791 DECODE : S0; // any decoder 4792 %} 4793 4794 // Conditional move reg-mem 4795 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4796 single_instruction; 4797 dst : S4(write); 4798 src : S3(read); 4799 cr : S3(read); 4800 DECODE : S0; // any decoder 4801 MEM : S3; 4802 %} 4803 4804 // Conditional move reg-reg long 4805 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4806 single_instruction; 4807 dst : S4(write); 4808 src : S3(read); 4809 cr : S3(read); 4810 DECODE : S0(2); // any 2 decoders 4811 %} 4812 4813 // Conditional move double reg-reg 4814 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4815 single_instruction; 4816 dst : S4(write); 4817 src : S3(read); 4818 cr : S3(read); 4819 DECODE : S0; // any decoder 4820 %} 4821 4822 // Float reg-reg operation 4823 pipe_class fpu_reg(regDPR dst) %{ 4824 instruction_count(2); 4825 dst : S3(read); 4826 DECODE : S0(2); // any 2 decoders 4827 FPU : S3; 4828 %} 4829 4830 // Float reg-reg operation 4831 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4832 instruction_count(2); 4833 dst : S4(write); 4834 src : S3(read); 4835 DECODE : S0(2); // any 2 decoders 4836 FPU : S3; 4837 %} 4838 4839 // Float reg-reg operation 4840 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4841 instruction_count(3); 4842 dst : S4(write); 4843 src1 : S3(read); 4844 src2 : S3(read); 4845 DECODE : S0(3); // any 3 decoders 4846 FPU : S3(2); 4847 %} 4848 4849 // Float reg-reg operation 4850 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4851 instruction_count(4); 4852 dst : S4(write); 4853 src1 : S3(read); 4854 src2 : S3(read); 4855 src3 : S3(read); 4856 DECODE : S0(4); // any 3 decoders 4857 FPU : S3(2); 4858 %} 4859 4860 // Float reg-reg operation 4861 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4862 instruction_count(4); 4863 dst : S4(write); 4864 src1 : S3(read); 4865 src2 : S3(read); 4866 src3 : S3(read); 4867 DECODE : S1(3); // any 3 decoders 4868 D0 : S0; // Big decoder only 4869 FPU : S3(2); 4870 MEM : S3; 4871 %} 4872 4873 // Float reg-mem operation 4874 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4875 instruction_count(2); 4876 dst : S5(write); 4877 mem : S3(read); 4878 D0 : S0; // big decoder only 4879 DECODE : S1; // any decoder for FPU POP 4880 FPU : S4; 4881 MEM : S3; // any mem 4882 %} 4883 4884 // Float reg-mem operation 4885 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4886 instruction_count(3); 4887 dst : S5(write); 4888 src1 : S3(read); 4889 mem : S3(read); 4890 D0 : S0; // big decoder only 4891 DECODE : S1(2); // any decoder for FPU POP 4892 FPU : S4; 4893 MEM : S3; // any mem 4894 %} 4895 4896 // Float mem-reg operation 4897 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4898 instruction_count(2); 4899 src : S5(read); 4900 mem : S3(read); 4901 DECODE : S0; // any decoder for FPU PUSH 4902 D0 : S1; // big decoder only 4903 FPU : S4; 4904 MEM : S3; // any mem 4905 %} 4906 4907 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4908 instruction_count(3); 4909 src1 : S3(read); 4910 src2 : S3(read); 4911 mem : S3(read); 4912 DECODE : S0(2); // any decoder for FPU PUSH 4913 D0 : S1; // big decoder only 4914 FPU : S4; 4915 MEM : S3; // any mem 4916 %} 4917 4918 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4919 instruction_count(3); 4920 src1 : S3(read); 4921 src2 : S3(read); 4922 mem : S4(read); 4923 DECODE : S0; // any decoder for FPU PUSH 4924 D0 : S0(2); // big decoder only 4925 FPU : S4; 4926 MEM : S3(2); // any mem 4927 %} 4928 4929 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4930 instruction_count(2); 4931 src1 : S3(read); 4932 dst : S4(read); 4933 D0 : S0(2); // big decoder only 4934 MEM : S3(2); // any mem 4935 %} 4936 4937 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4938 instruction_count(3); 4939 src1 : S3(read); 4940 src2 : S3(read); 4941 dst : S4(read); 4942 D0 : S0(3); // big decoder only 4943 FPU : S4; 4944 MEM : S3(3); // any mem 4945 %} 4946 4947 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4948 instruction_count(3); 4949 src1 : S4(read); 4950 mem : S4(read); 4951 DECODE : S0; // any decoder for FPU PUSH 4952 D0 : S0(2); // big decoder only 4953 FPU : S4; 4954 MEM : S3(2); // any mem 4955 %} 4956 4957 // Float load constant 4958 pipe_class fpu_reg_con(regDPR dst) %{ 4959 instruction_count(2); 4960 dst : S5(write); 4961 D0 : S0; // big decoder only for the load 4962 DECODE : S1; // any decoder for FPU POP 4963 FPU : S4; 4964 MEM : S3; // any mem 4965 %} 4966 4967 // Float load constant 4968 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4969 instruction_count(3); 4970 dst : S5(write); 4971 src : S3(read); 4972 D0 : S0; // big decoder only for the load 4973 DECODE : S1(2); // any decoder for FPU POP 4974 FPU : S4; 4975 MEM : S3; // any mem 4976 %} 4977 4978 // UnConditional branch 4979 pipe_class pipe_jmp( label labl ) %{ 4980 single_instruction; 4981 BR : S3; 4982 %} 4983 4984 // Conditional branch 4985 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4986 single_instruction; 4987 cr : S1(read); 4988 BR : S3; 4989 %} 4990 4991 // Allocation idiom 4992 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4993 instruction_count(1); force_serialization; 4994 fixed_latency(6); 4995 heap_ptr : S3(read); 4996 DECODE : S0(3); 4997 D0 : S2; 4998 MEM : S3; 4999 ALU : S3(2); 5000 dst : S5(write); 5001 BR : S5; 5002 %} 5003 5004 // Generic big/slow expanded idiom 5005 pipe_class pipe_slow( ) %{ 5006 instruction_count(10); multiple_bundles; force_serialization; 5007 fixed_latency(100); 5008 D0 : S0(2); 5009 MEM : S3(2); 5010 %} 5011 5012 // The real do-nothing guy 5013 pipe_class empty( ) %{ 5014 instruction_count(0); 5015 %} 5016 5017 // Define the class for the Nop node 5018 define %{ 5019 MachNop = empty; 5020 %} 5021 5022 %} 5023 5024 //----------INSTRUCTIONS------------------------------------------------------- 5025 // 5026 // match -- States which machine-independent subtree may be replaced 5027 // by this instruction. 5028 // ins_cost -- The estimated cost of this instruction is used by instruction 5029 // selection to identify a minimum cost tree of machine 5030 // instructions that matches a tree of machine-independent 5031 // instructions. 5032 // format -- A string providing the disassembly for this instruction. 5033 // The value of an instruction's operand may be inserted 5034 // by referring to it with a '$' prefix. 5035 // opcode -- Three instruction opcodes may be provided. These are referred 5036 // to within an encode class as $primary, $secondary, and $tertiary 5037 // respectively. The primary opcode is commonly used to 5038 // indicate the type of machine instruction, while secondary 5039 // and tertiary are often used for prefix options or addressing 5040 // modes. 5041 // ins_encode -- A list of encode classes with parameters. The encode class 5042 // name must have been defined in an 'enc_class' specification 5043 // in the encode section of the architecture description. 5044 5045 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 5046 // Load Float 5047 instruct MoveF2LEG(legRegF dst, regF src) %{ 5048 match(Set dst src); 5049 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5050 ins_encode %{ 5051 ShouldNotReachHere(); 5052 %} 5053 ins_pipe( fpu_reg_reg ); 5054 %} 5055 5056 // Load Float 5057 instruct MoveLEG2F(regF dst, legRegF src) %{ 5058 match(Set dst src); 5059 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5060 ins_encode %{ 5061 ShouldNotReachHere(); 5062 %} 5063 ins_pipe( fpu_reg_reg ); 5064 %} 5065 5066 // Load Float 5067 instruct MoveF2VL(vlRegF dst, regF src) %{ 5068 match(Set dst src); 5069 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5070 ins_encode %{ 5071 ShouldNotReachHere(); 5072 %} 5073 ins_pipe( fpu_reg_reg ); 5074 %} 5075 5076 // Load Float 5077 instruct MoveVL2F(regF dst, vlRegF src) %{ 5078 match(Set dst src); 5079 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5080 ins_encode %{ 5081 ShouldNotReachHere(); 5082 %} 5083 ins_pipe( fpu_reg_reg ); 5084 %} 5085 5086 5087 5088 // Load Double 5089 instruct MoveD2LEG(legRegD dst, regD src) %{ 5090 match(Set dst src); 5091 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5092 ins_encode %{ 5093 ShouldNotReachHere(); 5094 %} 5095 ins_pipe( fpu_reg_reg ); 5096 %} 5097 5098 // Load Double 5099 instruct MoveLEG2D(regD dst, legRegD src) %{ 5100 match(Set dst src); 5101 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5102 ins_encode %{ 5103 ShouldNotReachHere(); 5104 %} 5105 ins_pipe( fpu_reg_reg ); 5106 %} 5107 5108 // Load Double 5109 instruct MoveD2VL(vlRegD dst, regD src) %{ 5110 match(Set dst src); 5111 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5112 ins_encode %{ 5113 ShouldNotReachHere(); 5114 %} 5115 ins_pipe( fpu_reg_reg ); 5116 %} 5117 5118 // Load Double 5119 instruct MoveVL2D(regD dst, vlRegD src) %{ 5120 match(Set dst src); 5121 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5122 ins_encode %{ 5123 ShouldNotReachHere(); 5124 %} 5125 ins_pipe( fpu_reg_reg ); 5126 %} 5127 5128 //----------BSWAP-Instruction-------------------------------------------------- 5129 instruct bytes_reverse_int(rRegI dst) %{ 5130 match(Set dst (ReverseBytesI dst)); 5131 5132 format %{ "BSWAP $dst" %} 5133 opcode(0x0F, 0xC8); 5134 ins_encode( OpcP, OpcSReg(dst) ); 5135 ins_pipe( ialu_reg ); 5136 %} 5137 5138 instruct bytes_reverse_long(eRegL dst) %{ 5139 match(Set dst (ReverseBytesL dst)); 5140 5141 format %{ "BSWAP $dst.lo\n\t" 5142 "BSWAP $dst.hi\n\t" 5143 "XCHG $dst.lo $dst.hi" %} 5144 5145 ins_cost(125); 5146 ins_encode( bswap_long_bytes(dst) ); 5147 ins_pipe( ialu_reg_reg); 5148 %} 5149 5150 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5151 match(Set dst (ReverseBytesUS dst)); 5152 effect(KILL cr); 5153 5154 format %{ "BSWAP $dst\n\t" 5155 "SHR $dst,16\n\t" %} 5156 ins_encode %{ 5157 __ bswapl($dst$$Register); 5158 __ shrl($dst$$Register, 16); 5159 %} 5160 ins_pipe( ialu_reg ); 5161 %} 5162 5163 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5164 match(Set dst (ReverseBytesS dst)); 5165 effect(KILL cr); 5166 5167 format %{ "BSWAP $dst\n\t" 5168 "SAR $dst,16\n\t" %} 5169 ins_encode %{ 5170 __ bswapl($dst$$Register); 5171 __ sarl($dst$$Register, 16); 5172 %} 5173 ins_pipe( ialu_reg ); 5174 %} 5175 5176 5177 //---------- Zeros Count Instructions ------------------------------------------ 5178 5179 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5180 predicate(UseCountLeadingZerosInstruction); 5181 match(Set dst (CountLeadingZerosI src)); 5182 effect(KILL cr); 5183 5184 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5185 ins_encode %{ 5186 __ lzcntl($dst$$Register, $src$$Register); 5187 %} 5188 ins_pipe(ialu_reg); 5189 %} 5190 5191 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5192 predicate(!UseCountLeadingZerosInstruction); 5193 match(Set dst (CountLeadingZerosI src)); 5194 effect(KILL cr); 5195 5196 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5197 "JNZ skip\n\t" 5198 "MOV $dst, -1\n" 5199 "skip:\n\t" 5200 "NEG $dst\n\t" 5201 "ADD $dst, 31" %} 5202 ins_encode %{ 5203 Register Rdst = $dst$$Register; 5204 Register Rsrc = $src$$Register; 5205 Label skip; 5206 __ bsrl(Rdst, Rsrc); 5207 __ jccb(Assembler::notZero, skip); 5208 __ movl(Rdst, -1); 5209 __ bind(skip); 5210 __ negl(Rdst); 5211 __ addl(Rdst, BitsPerInt - 1); 5212 %} 5213 ins_pipe(ialu_reg); 5214 %} 5215 5216 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5217 predicate(UseCountLeadingZerosInstruction); 5218 match(Set dst (CountLeadingZerosL src)); 5219 effect(TEMP dst, KILL cr); 5220 5221 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5222 "JNC done\n\t" 5223 "LZCNT $dst, $src.lo\n\t" 5224 "ADD $dst, 32\n" 5225 "done:" %} 5226 ins_encode %{ 5227 Register Rdst = $dst$$Register; 5228 Register Rsrc = $src$$Register; 5229 Label done; 5230 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5231 __ jccb(Assembler::carryClear, done); 5232 __ lzcntl(Rdst, Rsrc); 5233 __ addl(Rdst, BitsPerInt); 5234 __ bind(done); 5235 %} 5236 ins_pipe(ialu_reg); 5237 %} 5238 5239 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5240 predicate(!UseCountLeadingZerosInstruction); 5241 match(Set dst (CountLeadingZerosL src)); 5242 effect(TEMP dst, KILL cr); 5243 5244 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5245 "JZ msw_is_zero\n\t" 5246 "ADD $dst, 32\n\t" 5247 "JMP not_zero\n" 5248 "msw_is_zero:\n\t" 5249 "BSR $dst, $src.lo\n\t" 5250 "JNZ not_zero\n\t" 5251 "MOV $dst, -1\n" 5252 "not_zero:\n\t" 5253 "NEG $dst\n\t" 5254 "ADD $dst, 63\n" %} 5255 ins_encode %{ 5256 Register Rdst = $dst$$Register; 5257 Register Rsrc = $src$$Register; 5258 Label msw_is_zero; 5259 Label not_zero; 5260 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5261 __ jccb(Assembler::zero, msw_is_zero); 5262 __ addl(Rdst, BitsPerInt); 5263 __ jmpb(not_zero); 5264 __ bind(msw_is_zero); 5265 __ bsrl(Rdst, Rsrc); 5266 __ jccb(Assembler::notZero, not_zero); 5267 __ movl(Rdst, -1); 5268 __ bind(not_zero); 5269 __ negl(Rdst); 5270 __ addl(Rdst, BitsPerLong - 1); 5271 %} 5272 ins_pipe(ialu_reg); 5273 %} 5274 5275 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5276 predicate(UseCountTrailingZerosInstruction); 5277 match(Set dst (CountTrailingZerosI src)); 5278 effect(KILL cr); 5279 5280 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5281 ins_encode %{ 5282 __ tzcntl($dst$$Register, $src$$Register); 5283 %} 5284 ins_pipe(ialu_reg); 5285 %} 5286 5287 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5288 predicate(!UseCountTrailingZerosInstruction); 5289 match(Set dst (CountTrailingZerosI src)); 5290 effect(KILL cr); 5291 5292 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5293 "JNZ done\n\t" 5294 "MOV $dst, 32\n" 5295 "done:" %} 5296 ins_encode %{ 5297 Register Rdst = $dst$$Register; 5298 Label done; 5299 __ bsfl(Rdst, $src$$Register); 5300 __ jccb(Assembler::notZero, done); 5301 __ movl(Rdst, BitsPerInt); 5302 __ bind(done); 5303 %} 5304 ins_pipe(ialu_reg); 5305 %} 5306 5307 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5308 predicate(UseCountTrailingZerosInstruction); 5309 match(Set dst (CountTrailingZerosL src)); 5310 effect(TEMP dst, KILL cr); 5311 5312 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5313 "JNC done\n\t" 5314 "TZCNT $dst, $src.hi\n\t" 5315 "ADD $dst, 32\n" 5316 "done:" %} 5317 ins_encode %{ 5318 Register Rdst = $dst$$Register; 5319 Register Rsrc = $src$$Register; 5320 Label done; 5321 __ tzcntl(Rdst, Rsrc); 5322 __ jccb(Assembler::carryClear, done); 5323 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5324 __ addl(Rdst, BitsPerInt); 5325 __ bind(done); 5326 %} 5327 ins_pipe(ialu_reg); 5328 %} 5329 5330 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5331 predicate(!UseCountTrailingZerosInstruction); 5332 match(Set dst (CountTrailingZerosL src)); 5333 effect(TEMP dst, KILL cr); 5334 5335 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5336 "JNZ done\n\t" 5337 "BSF $dst, $src.hi\n\t" 5338 "JNZ msw_not_zero\n\t" 5339 "MOV $dst, 32\n" 5340 "msw_not_zero:\n\t" 5341 "ADD $dst, 32\n" 5342 "done:" %} 5343 ins_encode %{ 5344 Register Rdst = $dst$$Register; 5345 Register Rsrc = $src$$Register; 5346 Label msw_not_zero; 5347 Label done; 5348 __ bsfl(Rdst, Rsrc); 5349 __ jccb(Assembler::notZero, done); 5350 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5351 __ jccb(Assembler::notZero, msw_not_zero); 5352 __ movl(Rdst, BitsPerInt); 5353 __ bind(msw_not_zero); 5354 __ addl(Rdst, BitsPerInt); 5355 __ bind(done); 5356 %} 5357 ins_pipe(ialu_reg); 5358 %} 5359 5360 5361 //---------- Population Count Instructions ------------------------------------- 5362 5363 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5364 predicate(UsePopCountInstruction); 5365 match(Set dst (PopCountI src)); 5366 effect(KILL cr); 5367 5368 format %{ "POPCNT $dst, $src" %} 5369 ins_encode %{ 5370 __ popcntl($dst$$Register, $src$$Register); 5371 %} 5372 ins_pipe(ialu_reg); 5373 %} 5374 5375 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5376 predicate(UsePopCountInstruction); 5377 match(Set dst (PopCountI (LoadI mem))); 5378 effect(KILL cr); 5379 5380 format %{ "POPCNT $dst, $mem" %} 5381 ins_encode %{ 5382 __ popcntl($dst$$Register, $mem$$Address); 5383 %} 5384 ins_pipe(ialu_reg); 5385 %} 5386 5387 // Note: Long.bitCount(long) returns an int. 5388 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5389 predicate(UsePopCountInstruction); 5390 match(Set dst (PopCountL src)); 5391 effect(KILL cr, TEMP tmp, TEMP dst); 5392 5393 format %{ "POPCNT $dst, $src.lo\n\t" 5394 "POPCNT $tmp, $src.hi\n\t" 5395 "ADD $dst, $tmp" %} 5396 ins_encode %{ 5397 __ popcntl($dst$$Register, $src$$Register); 5398 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5399 __ addl($dst$$Register, $tmp$$Register); 5400 %} 5401 ins_pipe(ialu_reg); 5402 %} 5403 5404 // Note: Long.bitCount(long) returns an int. 5405 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5406 predicate(UsePopCountInstruction); 5407 match(Set dst (PopCountL (LoadL mem))); 5408 effect(KILL cr, TEMP tmp, TEMP dst); 5409 5410 format %{ "POPCNT $dst, $mem\n\t" 5411 "POPCNT $tmp, $mem+4\n\t" 5412 "ADD $dst, $tmp" %} 5413 ins_encode %{ 5414 //__ popcntl($dst$$Register, $mem$$Address$$first); 5415 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5416 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5417 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5418 __ addl($dst$$Register, $tmp$$Register); 5419 %} 5420 ins_pipe(ialu_reg); 5421 %} 5422 5423 5424 //----------Load/Store/Move Instructions--------------------------------------- 5425 //----------Load Instructions-------------------------------------------------- 5426 // Load Byte (8bit signed) 5427 instruct loadB(xRegI dst, memory mem) %{ 5428 match(Set dst (LoadB mem)); 5429 5430 ins_cost(125); 5431 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5432 5433 ins_encode %{ 5434 __ movsbl($dst$$Register, $mem$$Address); 5435 %} 5436 5437 ins_pipe(ialu_reg_mem); 5438 %} 5439 5440 // Load Byte (8bit signed) into Long Register 5441 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5442 match(Set dst (ConvI2L (LoadB mem))); 5443 effect(KILL cr); 5444 5445 ins_cost(375); 5446 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5447 "MOV $dst.hi,$dst.lo\n\t" 5448 "SAR $dst.hi,7" %} 5449 5450 ins_encode %{ 5451 __ movsbl($dst$$Register, $mem$$Address); 5452 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5453 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5454 %} 5455 5456 ins_pipe(ialu_reg_mem); 5457 %} 5458 5459 // Load Unsigned Byte (8bit UNsigned) 5460 instruct loadUB(xRegI dst, memory mem) %{ 5461 match(Set dst (LoadUB mem)); 5462 5463 ins_cost(125); 5464 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5465 5466 ins_encode %{ 5467 __ movzbl($dst$$Register, $mem$$Address); 5468 %} 5469 5470 ins_pipe(ialu_reg_mem); 5471 %} 5472 5473 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5474 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5475 match(Set dst (ConvI2L (LoadUB mem))); 5476 effect(KILL cr); 5477 5478 ins_cost(250); 5479 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5480 "XOR $dst.hi,$dst.hi" %} 5481 5482 ins_encode %{ 5483 Register Rdst = $dst$$Register; 5484 __ movzbl(Rdst, $mem$$Address); 5485 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5486 %} 5487 5488 ins_pipe(ialu_reg_mem); 5489 %} 5490 5491 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5492 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5493 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5494 effect(KILL cr); 5495 5496 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5497 "XOR $dst.hi,$dst.hi\n\t" 5498 "AND $dst.lo,right_n_bits($mask, 8)" %} 5499 ins_encode %{ 5500 Register Rdst = $dst$$Register; 5501 __ movzbl(Rdst, $mem$$Address); 5502 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5503 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5504 %} 5505 ins_pipe(ialu_reg_mem); 5506 %} 5507 5508 // Load Short (16bit signed) 5509 instruct loadS(rRegI dst, memory mem) %{ 5510 match(Set dst (LoadS mem)); 5511 5512 ins_cost(125); 5513 format %{ "MOVSX $dst,$mem\t# short" %} 5514 5515 ins_encode %{ 5516 __ movswl($dst$$Register, $mem$$Address); 5517 %} 5518 5519 ins_pipe(ialu_reg_mem); 5520 %} 5521 5522 // Load Short (16 bit signed) to Byte (8 bit signed) 5523 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5524 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5525 5526 ins_cost(125); 5527 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5528 ins_encode %{ 5529 __ movsbl($dst$$Register, $mem$$Address); 5530 %} 5531 ins_pipe(ialu_reg_mem); 5532 %} 5533 5534 // Load Short (16bit signed) into Long Register 5535 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5536 match(Set dst (ConvI2L (LoadS mem))); 5537 effect(KILL cr); 5538 5539 ins_cost(375); 5540 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5541 "MOV $dst.hi,$dst.lo\n\t" 5542 "SAR $dst.hi,15" %} 5543 5544 ins_encode %{ 5545 __ movswl($dst$$Register, $mem$$Address); 5546 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5547 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5548 %} 5549 5550 ins_pipe(ialu_reg_mem); 5551 %} 5552 5553 // Load Unsigned Short/Char (16bit unsigned) 5554 instruct loadUS(rRegI dst, memory mem) %{ 5555 match(Set dst (LoadUS mem)); 5556 5557 ins_cost(125); 5558 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5559 5560 ins_encode %{ 5561 __ movzwl($dst$$Register, $mem$$Address); 5562 %} 5563 5564 ins_pipe(ialu_reg_mem); 5565 %} 5566 5567 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5568 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5569 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5570 5571 ins_cost(125); 5572 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5573 ins_encode %{ 5574 __ movsbl($dst$$Register, $mem$$Address); 5575 %} 5576 ins_pipe(ialu_reg_mem); 5577 %} 5578 5579 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5580 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5581 match(Set dst (ConvI2L (LoadUS mem))); 5582 effect(KILL cr); 5583 5584 ins_cost(250); 5585 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5586 "XOR $dst.hi,$dst.hi" %} 5587 5588 ins_encode %{ 5589 __ movzwl($dst$$Register, $mem$$Address); 5590 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5591 %} 5592 5593 ins_pipe(ialu_reg_mem); 5594 %} 5595 5596 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5597 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5598 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5599 effect(KILL cr); 5600 5601 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5602 "XOR $dst.hi,$dst.hi" %} 5603 ins_encode %{ 5604 Register Rdst = $dst$$Register; 5605 __ movzbl(Rdst, $mem$$Address); 5606 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5607 %} 5608 ins_pipe(ialu_reg_mem); 5609 %} 5610 5611 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5612 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5613 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5614 effect(KILL cr); 5615 5616 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5617 "XOR $dst.hi,$dst.hi\n\t" 5618 "AND $dst.lo,right_n_bits($mask, 16)" %} 5619 ins_encode %{ 5620 Register Rdst = $dst$$Register; 5621 __ movzwl(Rdst, $mem$$Address); 5622 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5623 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5624 %} 5625 ins_pipe(ialu_reg_mem); 5626 %} 5627 5628 // Load Integer 5629 instruct loadI(rRegI dst, memory mem) %{ 5630 match(Set dst (LoadI mem)); 5631 5632 ins_cost(125); 5633 format %{ "MOV $dst,$mem\t# int" %} 5634 5635 ins_encode %{ 5636 __ movl($dst$$Register, $mem$$Address); 5637 %} 5638 5639 ins_pipe(ialu_reg_mem); 5640 %} 5641 5642 // Load Integer (32 bit signed) to Byte (8 bit signed) 5643 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5644 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5645 5646 ins_cost(125); 5647 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5648 ins_encode %{ 5649 __ movsbl($dst$$Register, $mem$$Address); 5650 %} 5651 ins_pipe(ialu_reg_mem); 5652 %} 5653 5654 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5655 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5656 match(Set dst (AndI (LoadI mem) mask)); 5657 5658 ins_cost(125); 5659 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5660 ins_encode %{ 5661 __ movzbl($dst$$Register, $mem$$Address); 5662 %} 5663 ins_pipe(ialu_reg_mem); 5664 %} 5665 5666 // Load Integer (32 bit signed) to Short (16 bit signed) 5667 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5668 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5669 5670 ins_cost(125); 5671 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5672 ins_encode %{ 5673 __ movswl($dst$$Register, $mem$$Address); 5674 %} 5675 ins_pipe(ialu_reg_mem); 5676 %} 5677 5678 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5679 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5680 match(Set dst (AndI (LoadI mem) mask)); 5681 5682 ins_cost(125); 5683 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5684 ins_encode %{ 5685 __ movzwl($dst$$Register, $mem$$Address); 5686 %} 5687 ins_pipe(ialu_reg_mem); 5688 %} 5689 5690 // Load Integer into Long Register 5691 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5692 match(Set dst (ConvI2L (LoadI mem))); 5693 effect(KILL cr); 5694 5695 ins_cost(375); 5696 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5697 "MOV $dst.hi,$dst.lo\n\t" 5698 "SAR $dst.hi,31" %} 5699 5700 ins_encode %{ 5701 __ movl($dst$$Register, $mem$$Address); 5702 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5703 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5704 %} 5705 5706 ins_pipe(ialu_reg_mem); 5707 %} 5708 5709 // Load Integer with mask 0xFF into Long Register 5710 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5711 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5712 effect(KILL cr); 5713 5714 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5715 "XOR $dst.hi,$dst.hi" %} 5716 ins_encode %{ 5717 Register Rdst = $dst$$Register; 5718 __ movzbl(Rdst, $mem$$Address); 5719 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5720 %} 5721 ins_pipe(ialu_reg_mem); 5722 %} 5723 5724 // Load Integer with mask 0xFFFF into Long Register 5725 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5726 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5727 effect(KILL cr); 5728 5729 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5730 "XOR $dst.hi,$dst.hi" %} 5731 ins_encode %{ 5732 Register Rdst = $dst$$Register; 5733 __ movzwl(Rdst, $mem$$Address); 5734 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5735 %} 5736 ins_pipe(ialu_reg_mem); 5737 %} 5738 5739 // Load Integer with 31-bit mask into Long Register 5740 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5741 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5742 effect(KILL cr); 5743 5744 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5745 "XOR $dst.hi,$dst.hi\n\t" 5746 "AND $dst.lo,$mask" %} 5747 ins_encode %{ 5748 Register Rdst = $dst$$Register; 5749 __ movl(Rdst, $mem$$Address); 5750 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5751 __ andl(Rdst, $mask$$constant); 5752 %} 5753 ins_pipe(ialu_reg_mem); 5754 %} 5755 5756 // Load Unsigned Integer into Long Register 5757 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5758 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5759 effect(KILL cr); 5760 5761 ins_cost(250); 5762 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5763 "XOR $dst.hi,$dst.hi" %} 5764 5765 ins_encode %{ 5766 __ movl($dst$$Register, $mem$$Address); 5767 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5768 %} 5769 5770 ins_pipe(ialu_reg_mem); 5771 %} 5772 5773 // Load Long. Cannot clobber address while loading, so restrict address 5774 // register to ESI 5775 instruct loadL(eRegL dst, load_long_memory mem) %{ 5776 predicate(!((LoadLNode*)n)->require_atomic_access()); 5777 match(Set dst (LoadL mem)); 5778 5779 ins_cost(250); 5780 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5781 "MOV $dst.hi,$mem+4" %} 5782 5783 ins_encode %{ 5784 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5785 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5786 __ movl($dst$$Register, Amemlo); 5787 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5788 %} 5789 5790 ins_pipe(ialu_reg_long_mem); 5791 %} 5792 5793 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5794 // then store it down to the stack and reload on the int 5795 // side. 5796 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5797 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5798 match(Set dst (LoadL mem)); 5799 5800 ins_cost(200); 5801 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5802 "FISTp $dst" %} 5803 ins_encode(enc_loadL_volatile(mem,dst)); 5804 ins_pipe( fpu_reg_mem ); 5805 %} 5806 5807 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5808 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5809 match(Set dst (LoadL mem)); 5810 effect(TEMP tmp); 5811 ins_cost(180); 5812 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5813 "MOVSD $dst,$tmp" %} 5814 ins_encode %{ 5815 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5816 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5817 %} 5818 ins_pipe( pipe_slow ); 5819 %} 5820 5821 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5822 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5823 match(Set dst (LoadL mem)); 5824 effect(TEMP tmp); 5825 ins_cost(160); 5826 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5827 "MOVD $dst.lo,$tmp\n\t" 5828 "PSRLQ $tmp,32\n\t" 5829 "MOVD $dst.hi,$tmp" %} 5830 ins_encode %{ 5831 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5832 __ movdl($dst$$Register, $tmp$$XMMRegister); 5833 __ psrlq($tmp$$XMMRegister, 32); 5834 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5835 %} 5836 ins_pipe( pipe_slow ); 5837 %} 5838 5839 // Load Range 5840 instruct loadRange(rRegI dst, memory mem) %{ 5841 match(Set dst (LoadRange mem)); 5842 5843 ins_cost(125); 5844 format %{ "MOV $dst,$mem" %} 5845 opcode(0x8B); 5846 ins_encode( OpcP, RegMem(dst,mem)); 5847 ins_pipe( ialu_reg_mem ); 5848 %} 5849 5850 5851 // Load Pointer 5852 instruct loadP(eRegP dst, memory mem) %{ 5853 match(Set dst (LoadP mem)); 5854 5855 ins_cost(125); 5856 format %{ "MOV $dst,$mem" %} 5857 opcode(0x8B); 5858 ins_encode( OpcP, RegMem(dst,mem)); 5859 ins_pipe( ialu_reg_mem ); 5860 %} 5861 5862 // Load Klass Pointer 5863 instruct loadKlass(eRegP dst, memory mem) %{ 5864 match(Set dst (LoadKlass mem)); 5865 5866 ins_cost(125); 5867 format %{ "MOV $dst,$mem" %} 5868 opcode(0x8B); 5869 ins_encode( OpcP, RegMem(dst,mem)); 5870 ins_pipe( ialu_reg_mem ); 5871 %} 5872 5873 // Load Double 5874 instruct loadDPR(regDPR dst, memory mem) %{ 5875 predicate(UseSSE<=1); 5876 match(Set dst (LoadD mem)); 5877 5878 ins_cost(150); 5879 format %{ "FLD_D ST,$mem\n\t" 5880 "FSTP $dst" %} 5881 opcode(0xDD); /* DD /0 */ 5882 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5883 Pop_Reg_DPR(dst) ); 5884 ins_pipe( fpu_reg_mem ); 5885 %} 5886 5887 // Load Double to XMM 5888 instruct loadD(regD dst, memory mem) %{ 5889 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5890 match(Set dst (LoadD mem)); 5891 ins_cost(145); 5892 format %{ "MOVSD $dst,$mem" %} 5893 ins_encode %{ 5894 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5895 %} 5896 ins_pipe( pipe_slow ); 5897 %} 5898 5899 instruct loadD_partial(regD dst, memory mem) %{ 5900 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5901 match(Set dst (LoadD mem)); 5902 ins_cost(145); 5903 format %{ "MOVLPD $dst,$mem" %} 5904 ins_encode %{ 5905 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5906 %} 5907 ins_pipe( pipe_slow ); 5908 %} 5909 5910 // Load to XMM register (single-precision floating point) 5911 // MOVSS instruction 5912 instruct loadF(regF dst, memory mem) %{ 5913 predicate(UseSSE>=1); 5914 match(Set dst (LoadF mem)); 5915 ins_cost(145); 5916 format %{ "MOVSS $dst,$mem" %} 5917 ins_encode %{ 5918 __ movflt ($dst$$XMMRegister, $mem$$Address); 5919 %} 5920 ins_pipe( pipe_slow ); 5921 %} 5922 5923 // Load Float 5924 instruct loadFPR(regFPR dst, memory mem) %{ 5925 predicate(UseSSE==0); 5926 match(Set dst (LoadF mem)); 5927 5928 ins_cost(150); 5929 format %{ "FLD_S ST,$mem\n\t" 5930 "FSTP $dst" %} 5931 opcode(0xD9); /* D9 /0 */ 5932 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5933 Pop_Reg_FPR(dst) ); 5934 ins_pipe( fpu_reg_mem ); 5935 %} 5936 5937 // Load Effective Address 5938 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5939 match(Set dst mem); 5940 5941 ins_cost(110); 5942 format %{ "LEA $dst,$mem" %} 5943 opcode(0x8D); 5944 ins_encode( OpcP, RegMem(dst,mem)); 5945 ins_pipe( ialu_reg_reg_fat ); 5946 %} 5947 5948 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5949 match(Set dst mem); 5950 5951 ins_cost(110); 5952 format %{ "LEA $dst,$mem" %} 5953 opcode(0x8D); 5954 ins_encode( OpcP, RegMem(dst,mem)); 5955 ins_pipe( ialu_reg_reg_fat ); 5956 %} 5957 5958 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5959 match(Set dst mem); 5960 5961 ins_cost(110); 5962 format %{ "LEA $dst,$mem" %} 5963 opcode(0x8D); 5964 ins_encode( OpcP, RegMem(dst,mem)); 5965 ins_pipe( ialu_reg_reg_fat ); 5966 %} 5967 5968 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5969 match(Set dst mem); 5970 5971 ins_cost(110); 5972 format %{ "LEA $dst,$mem" %} 5973 opcode(0x8D); 5974 ins_encode( OpcP, RegMem(dst,mem)); 5975 ins_pipe( ialu_reg_reg_fat ); 5976 %} 5977 5978 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5979 match(Set dst mem); 5980 5981 ins_cost(110); 5982 format %{ "LEA $dst,$mem" %} 5983 opcode(0x8D); 5984 ins_encode( OpcP, RegMem(dst,mem)); 5985 ins_pipe( ialu_reg_reg_fat ); 5986 %} 5987 5988 // Load Constant 5989 instruct loadConI(rRegI dst, immI src) %{ 5990 match(Set dst src); 5991 5992 format %{ "MOV $dst,$src" %} 5993 ins_encode( LdImmI(dst, src) ); 5994 ins_pipe( ialu_reg_fat ); 5995 %} 5996 5997 // Load Constant zero 5998 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5999 match(Set dst src); 6000 effect(KILL cr); 6001 6002 ins_cost(50); 6003 format %{ "XOR $dst,$dst" %} 6004 opcode(0x33); /* + rd */ 6005 ins_encode( OpcP, RegReg( dst, dst ) ); 6006 ins_pipe( ialu_reg ); 6007 %} 6008 6009 instruct loadConP(eRegP dst, immP src) %{ 6010 match(Set dst src); 6011 6012 format %{ "MOV $dst,$src" %} 6013 opcode(0xB8); /* + rd */ 6014 ins_encode( LdImmP(dst, src) ); 6015 ins_pipe( ialu_reg_fat ); 6016 %} 6017 6018 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6019 match(Set dst src); 6020 effect(KILL cr); 6021 ins_cost(200); 6022 format %{ "MOV $dst.lo,$src.lo\n\t" 6023 "MOV $dst.hi,$src.hi" %} 6024 opcode(0xB8); 6025 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6026 ins_pipe( ialu_reg_long_fat ); 6027 %} 6028 6029 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6030 match(Set dst src); 6031 effect(KILL cr); 6032 ins_cost(150); 6033 format %{ "XOR $dst.lo,$dst.lo\n\t" 6034 "XOR $dst.hi,$dst.hi" %} 6035 opcode(0x33,0x33); 6036 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6037 ins_pipe( ialu_reg_long ); 6038 %} 6039 6040 // The instruction usage is guarded by predicate in operand immFPR(). 6041 instruct loadConFPR(regFPR dst, immFPR con) %{ 6042 match(Set dst con); 6043 ins_cost(125); 6044 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6045 "FSTP $dst" %} 6046 ins_encode %{ 6047 __ fld_s($constantaddress($con)); 6048 __ fstp_d($dst$$reg); 6049 %} 6050 ins_pipe(fpu_reg_con); 6051 %} 6052 6053 // The instruction usage is guarded by predicate in operand immFPR0(). 6054 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6055 match(Set dst con); 6056 ins_cost(125); 6057 format %{ "FLDZ ST\n\t" 6058 "FSTP $dst" %} 6059 ins_encode %{ 6060 __ fldz(); 6061 __ fstp_d($dst$$reg); 6062 %} 6063 ins_pipe(fpu_reg_con); 6064 %} 6065 6066 // The instruction usage is guarded by predicate in operand immFPR1(). 6067 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6068 match(Set dst con); 6069 ins_cost(125); 6070 format %{ "FLD1 ST\n\t" 6071 "FSTP $dst" %} 6072 ins_encode %{ 6073 __ fld1(); 6074 __ fstp_d($dst$$reg); 6075 %} 6076 ins_pipe(fpu_reg_con); 6077 %} 6078 6079 // The instruction usage is guarded by predicate in operand immF(). 6080 instruct loadConF(regF dst, immF con) %{ 6081 match(Set dst con); 6082 ins_cost(125); 6083 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6084 ins_encode %{ 6085 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6086 %} 6087 ins_pipe(pipe_slow); 6088 %} 6089 6090 // The instruction usage is guarded by predicate in operand immF0(). 6091 instruct loadConF0(regF dst, immF0 src) %{ 6092 match(Set dst src); 6093 ins_cost(100); 6094 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6095 ins_encode %{ 6096 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6097 %} 6098 ins_pipe(pipe_slow); 6099 %} 6100 6101 // The instruction usage is guarded by predicate in operand immDPR(). 6102 instruct loadConDPR(regDPR dst, immDPR con) %{ 6103 match(Set dst con); 6104 ins_cost(125); 6105 6106 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6107 "FSTP $dst" %} 6108 ins_encode %{ 6109 __ fld_d($constantaddress($con)); 6110 __ fstp_d($dst$$reg); 6111 %} 6112 ins_pipe(fpu_reg_con); 6113 %} 6114 6115 // The instruction usage is guarded by predicate in operand immDPR0(). 6116 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6117 match(Set dst con); 6118 ins_cost(125); 6119 6120 format %{ "FLDZ ST\n\t" 6121 "FSTP $dst" %} 6122 ins_encode %{ 6123 __ fldz(); 6124 __ fstp_d($dst$$reg); 6125 %} 6126 ins_pipe(fpu_reg_con); 6127 %} 6128 6129 // The instruction usage is guarded by predicate in operand immDPR1(). 6130 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6131 match(Set dst con); 6132 ins_cost(125); 6133 6134 format %{ "FLD1 ST\n\t" 6135 "FSTP $dst" %} 6136 ins_encode %{ 6137 __ fld1(); 6138 __ fstp_d($dst$$reg); 6139 %} 6140 ins_pipe(fpu_reg_con); 6141 %} 6142 6143 // The instruction usage is guarded by predicate in operand immD(). 6144 instruct loadConD(regD dst, immD con) %{ 6145 match(Set dst con); 6146 ins_cost(125); 6147 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6148 ins_encode %{ 6149 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6150 %} 6151 ins_pipe(pipe_slow); 6152 %} 6153 6154 // The instruction usage is guarded by predicate in operand immD0(). 6155 instruct loadConD0(regD dst, immD0 src) %{ 6156 match(Set dst src); 6157 ins_cost(100); 6158 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6159 ins_encode %{ 6160 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6161 %} 6162 ins_pipe( pipe_slow ); 6163 %} 6164 6165 // Load Stack Slot 6166 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6167 match(Set dst src); 6168 ins_cost(125); 6169 6170 format %{ "MOV $dst,$src" %} 6171 opcode(0x8B); 6172 ins_encode( OpcP, RegMem(dst,src)); 6173 ins_pipe( ialu_reg_mem ); 6174 %} 6175 6176 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6177 match(Set dst src); 6178 6179 ins_cost(200); 6180 format %{ "MOV $dst,$src.lo\n\t" 6181 "MOV $dst+4,$src.hi" %} 6182 opcode(0x8B, 0x8B); 6183 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6184 ins_pipe( ialu_mem_long_reg ); 6185 %} 6186 6187 // Load Stack Slot 6188 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6189 match(Set dst src); 6190 ins_cost(125); 6191 6192 format %{ "MOV $dst,$src" %} 6193 opcode(0x8B); 6194 ins_encode( OpcP, RegMem(dst,src)); 6195 ins_pipe( ialu_reg_mem ); 6196 %} 6197 6198 // Load Stack Slot 6199 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6200 match(Set dst src); 6201 ins_cost(125); 6202 6203 format %{ "FLD_S $src\n\t" 6204 "FSTP $dst" %} 6205 opcode(0xD9); /* D9 /0, FLD m32real */ 6206 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6207 Pop_Reg_FPR(dst) ); 6208 ins_pipe( fpu_reg_mem ); 6209 %} 6210 6211 // Load Stack Slot 6212 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6213 match(Set dst src); 6214 ins_cost(125); 6215 6216 format %{ "FLD_D $src\n\t" 6217 "FSTP $dst" %} 6218 opcode(0xDD); /* DD /0, FLD m64real */ 6219 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6220 Pop_Reg_DPR(dst) ); 6221 ins_pipe( fpu_reg_mem ); 6222 %} 6223 6224 // Prefetch instructions for allocation. 6225 // Must be safe to execute with invalid address (cannot fault). 6226 6227 instruct prefetchAlloc0( memory mem ) %{ 6228 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6229 match(PrefetchAllocation mem); 6230 ins_cost(0); 6231 size(0); 6232 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6233 ins_encode(); 6234 ins_pipe(empty); 6235 %} 6236 6237 instruct prefetchAlloc( memory mem ) %{ 6238 predicate(AllocatePrefetchInstr==3); 6239 match( PrefetchAllocation mem ); 6240 ins_cost(100); 6241 6242 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6243 ins_encode %{ 6244 __ prefetchw($mem$$Address); 6245 %} 6246 ins_pipe(ialu_mem); 6247 %} 6248 6249 instruct prefetchAllocNTA( memory mem ) %{ 6250 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6251 match(PrefetchAllocation mem); 6252 ins_cost(100); 6253 6254 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6255 ins_encode %{ 6256 __ prefetchnta($mem$$Address); 6257 %} 6258 ins_pipe(ialu_mem); 6259 %} 6260 6261 instruct prefetchAllocT0( memory mem ) %{ 6262 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6263 match(PrefetchAllocation mem); 6264 ins_cost(100); 6265 6266 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6267 ins_encode %{ 6268 __ prefetcht0($mem$$Address); 6269 %} 6270 ins_pipe(ialu_mem); 6271 %} 6272 6273 instruct prefetchAllocT2( memory mem ) %{ 6274 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6275 match(PrefetchAllocation mem); 6276 ins_cost(100); 6277 6278 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6279 ins_encode %{ 6280 __ prefetcht2($mem$$Address); 6281 %} 6282 ins_pipe(ialu_mem); 6283 %} 6284 6285 //----------Store Instructions------------------------------------------------- 6286 6287 // Store Byte 6288 instruct storeB(memory mem, xRegI src) %{ 6289 match(Set mem (StoreB mem src)); 6290 6291 ins_cost(125); 6292 format %{ "MOV8 $mem,$src" %} 6293 opcode(0x88); 6294 ins_encode( OpcP, RegMem( src, mem ) ); 6295 ins_pipe( ialu_mem_reg ); 6296 %} 6297 6298 // Store Char/Short 6299 instruct storeC(memory mem, rRegI src) %{ 6300 match(Set mem (StoreC mem src)); 6301 6302 ins_cost(125); 6303 format %{ "MOV16 $mem,$src" %} 6304 opcode(0x89, 0x66); 6305 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6306 ins_pipe( ialu_mem_reg ); 6307 %} 6308 6309 // Store Integer 6310 instruct storeI(memory mem, rRegI src) %{ 6311 match(Set mem (StoreI mem src)); 6312 6313 ins_cost(125); 6314 format %{ "MOV $mem,$src" %} 6315 opcode(0x89); 6316 ins_encode( OpcP, RegMem( src, mem ) ); 6317 ins_pipe( ialu_mem_reg ); 6318 %} 6319 6320 // Store Long 6321 instruct storeL(long_memory mem, eRegL src) %{ 6322 predicate(!((StoreLNode*)n)->require_atomic_access()); 6323 match(Set mem (StoreL mem src)); 6324 6325 ins_cost(200); 6326 format %{ "MOV $mem,$src.lo\n\t" 6327 "MOV $mem+4,$src.hi" %} 6328 opcode(0x89, 0x89); 6329 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6330 ins_pipe( ialu_mem_long_reg ); 6331 %} 6332 6333 // Store Long to Integer 6334 instruct storeL2I(memory mem, eRegL src) %{ 6335 match(Set mem (StoreI mem (ConvL2I src))); 6336 6337 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6338 ins_encode %{ 6339 __ movl($mem$$Address, $src$$Register); 6340 %} 6341 ins_pipe(ialu_mem_reg); 6342 %} 6343 6344 // Volatile Store Long. Must be atomic, so move it into 6345 // the FP TOS and then do a 64-bit FIST. Has to probe the 6346 // target address before the store (for null-ptr checks) 6347 // so the memory operand is used twice in the encoding. 6348 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6349 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6350 match(Set mem (StoreL mem src)); 6351 effect( KILL cr ); 6352 ins_cost(400); 6353 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6354 "FILD $src\n\t" 6355 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6356 opcode(0x3B); 6357 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6358 ins_pipe( fpu_reg_mem ); 6359 %} 6360 6361 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6362 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6363 match(Set mem (StoreL mem src)); 6364 effect( TEMP tmp, KILL cr ); 6365 ins_cost(380); 6366 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6367 "MOVSD $tmp,$src\n\t" 6368 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6369 ins_encode %{ 6370 __ cmpl(rax, $mem$$Address); 6371 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6372 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6378 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6379 match(Set mem (StoreL mem src)); 6380 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6381 ins_cost(360); 6382 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6383 "MOVD $tmp,$src.lo\n\t" 6384 "MOVD $tmp2,$src.hi\n\t" 6385 "PUNPCKLDQ $tmp,$tmp2\n\t" 6386 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6387 ins_encode %{ 6388 __ cmpl(rax, $mem$$Address); 6389 __ movdl($tmp$$XMMRegister, $src$$Register); 6390 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6391 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6392 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6393 %} 6394 ins_pipe( pipe_slow ); 6395 %} 6396 6397 // Store Pointer; for storing unknown oops and raw pointers 6398 instruct storeP(memory mem, anyRegP src) %{ 6399 match(Set mem (StoreP mem src)); 6400 6401 ins_cost(125); 6402 format %{ "MOV $mem,$src" %} 6403 opcode(0x89); 6404 ins_encode( OpcP, RegMem( src, mem ) ); 6405 ins_pipe( ialu_mem_reg ); 6406 %} 6407 6408 // Store Integer Immediate 6409 instruct storeImmI(memory mem, immI src) %{ 6410 match(Set mem (StoreI mem src)); 6411 6412 ins_cost(150); 6413 format %{ "MOV $mem,$src" %} 6414 opcode(0xC7); /* C7 /0 */ 6415 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6416 ins_pipe( ialu_mem_imm ); 6417 %} 6418 6419 // Store Short/Char Immediate 6420 instruct storeImmI16(memory mem, immI16 src) %{ 6421 predicate(UseStoreImmI16); 6422 match(Set mem (StoreC mem src)); 6423 6424 ins_cost(150); 6425 format %{ "MOV16 $mem,$src" %} 6426 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6427 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6428 ins_pipe( ialu_mem_imm ); 6429 %} 6430 6431 // Store Pointer Immediate; null pointers or constant oops that do not 6432 // need card-mark barriers. 6433 instruct storeImmP(memory mem, immP src) %{ 6434 match(Set mem (StoreP mem src)); 6435 6436 ins_cost(150); 6437 format %{ "MOV $mem,$src" %} 6438 opcode(0xC7); /* C7 /0 */ 6439 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6440 ins_pipe( ialu_mem_imm ); 6441 %} 6442 6443 // Store Byte Immediate 6444 instruct storeImmB(memory mem, immI8 src) %{ 6445 match(Set mem (StoreB mem src)); 6446 6447 ins_cost(150); 6448 format %{ "MOV8 $mem,$src" %} 6449 opcode(0xC6); /* C6 /0 */ 6450 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6451 ins_pipe( ialu_mem_imm ); 6452 %} 6453 6454 // Store CMS card-mark Immediate 6455 instruct storeImmCM(memory mem, immI8 src) %{ 6456 match(Set mem (StoreCM mem src)); 6457 6458 ins_cost(150); 6459 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6460 opcode(0xC6); /* C6 /0 */ 6461 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6462 ins_pipe( ialu_mem_imm ); 6463 %} 6464 6465 // Store Double 6466 instruct storeDPR( memory mem, regDPR1 src) %{ 6467 predicate(UseSSE<=1); 6468 match(Set mem (StoreD mem src)); 6469 6470 ins_cost(100); 6471 format %{ "FST_D $mem,$src" %} 6472 opcode(0xDD); /* DD /2 */ 6473 ins_encode( enc_FPR_store(mem,src) ); 6474 ins_pipe( fpu_mem_reg ); 6475 %} 6476 6477 // Store double does rounding on x86 6478 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6479 predicate(UseSSE<=1); 6480 match(Set mem (StoreD mem (RoundDouble src))); 6481 6482 ins_cost(100); 6483 format %{ "FST_D $mem,$src\t# round" %} 6484 opcode(0xDD); /* DD /2 */ 6485 ins_encode( enc_FPR_store(mem,src) ); 6486 ins_pipe( fpu_mem_reg ); 6487 %} 6488 6489 // Store XMM register to memory (double-precision floating points) 6490 // MOVSD instruction 6491 instruct storeD(memory mem, regD src) %{ 6492 predicate(UseSSE>=2); 6493 match(Set mem (StoreD mem src)); 6494 ins_cost(95); 6495 format %{ "MOVSD $mem,$src" %} 6496 ins_encode %{ 6497 __ movdbl($mem$$Address, $src$$XMMRegister); 6498 %} 6499 ins_pipe( pipe_slow ); 6500 %} 6501 6502 // Store XMM register to memory (single-precision floating point) 6503 // MOVSS instruction 6504 instruct storeF(memory mem, regF src) %{ 6505 predicate(UseSSE>=1); 6506 match(Set mem (StoreF mem src)); 6507 ins_cost(95); 6508 format %{ "MOVSS $mem,$src" %} 6509 ins_encode %{ 6510 __ movflt($mem$$Address, $src$$XMMRegister); 6511 %} 6512 ins_pipe( pipe_slow ); 6513 %} 6514 6515 6516 // Store Float 6517 instruct storeFPR( memory mem, regFPR1 src) %{ 6518 predicate(UseSSE==0); 6519 match(Set mem (StoreF mem src)); 6520 6521 ins_cost(100); 6522 format %{ "FST_S $mem,$src" %} 6523 opcode(0xD9); /* D9 /2 */ 6524 ins_encode( enc_FPR_store(mem,src) ); 6525 ins_pipe( fpu_mem_reg ); 6526 %} 6527 6528 // Store Float does rounding on x86 6529 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6530 predicate(UseSSE==0); 6531 match(Set mem (StoreF mem (RoundFloat src))); 6532 6533 ins_cost(100); 6534 format %{ "FST_S $mem,$src\t# round" %} 6535 opcode(0xD9); /* D9 /2 */ 6536 ins_encode( enc_FPR_store(mem,src) ); 6537 ins_pipe( fpu_mem_reg ); 6538 %} 6539 6540 // Store Float does rounding on x86 6541 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6542 predicate(UseSSE<=1); 6543 match(Set mem (StoreF mem (ConvD2F src))); 6544 6545 ins_cost(100); 6546 format %{ "FST_S $mem,$src\t# D-round" %} 6547 opcode(0xD9); /* D9 /2 */ 6548 ins_encode( enc_FPR_store(mem,src) ); 6549 ins_pipe( fpu_mem_reg ); 6550 %} 6551 6552 // Store immediate Float value (it is faster than store from FPU register) 6553 // The instruction usage is guarded by predicate in operand immFPR(). 6554 instruct storeFPR_imm( memory mem, immFPR src) %{ 6555 match(Set mem (StoreF mem src)); 6556 6557 ins_cost(50); 6558 format %{ "MOV $mem,$src\t# store float" %} 6559 opcode(0xC7); /* C7 /0 */ 6560 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6561 ins_pipe( ialu_mem_imm ); 6562 %} 6563 6564 // Store immediate Float value (it is faster than store from XMM register) 6565 // The instruction usage is guarded by predicate in operand immF(). 6566 instruct storeF_imm( memory mem, immF src) %{ 6567 match(Set mem (StoreF mem src)); 6568 6569 ins_cost(50); 6570 format %{ "MOV $mem,$src\t# store float" %} 6571 opcode(0xC7); /* C7 /0 */ 6572 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6573 ins_pipe( ialu_mem_imm ); 6574 %} 6575 6576 // Store Integer to stack slot 6577 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6578 match(Set dst src); 6579 6580 ins_cost(100); 6581 format %{ "MOV $dst,$src" %} 6582 opcode(0x89); 6583 ins_encode( OpcPRegSS( dst, src ) ); 6584 ins_pipe( ialu_mem_reg ); 6585 %} 6586 6587 // Store Integer to stack slot 6588 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6589 match(Set dst src); 6590 6591 ins_cost(100); 6592 format %{ "MOV $dst,$src" %} 6593 opcode(0x89); 6594 ins_encode( OpcPRegSS( dst, src ) ); 6595 ins_pipe( ialu_mem_reg ); 6596 %} 6597 6598 // Store Long to stack slot 6599 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6600 match(Set dst src); 6601 6602 ins_cost(200); 6603 format %{ "MOV $dst,$src.lo\n\t" 6604 "MOV $dst+4,$src.hi" %} 6605 opcode(0x89, 0x89); 6606 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6607 ins_pipe( ialu_mem_long_reg ); 6608 %} 6609 6610 //----------MemBar Instructions----------------------------------------------- 6611 // Memory barrier flavors 6612 6613 instruct membar_acquire() %{ 6614 match(MemBarAcquire); 6615 match(LoadFence); 6616 ins_cost(400); 6617 6618 size(0); 6619 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6620 ins_encode(); 6621 ins_pipe(empty); 6622 %} 6623 6624 instruct membar_acquire_lock() %{ 6625 match(MemBarAcquireLock); 6626 ins_cost(0); 6627 6628 size(0); 6629 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6630 ins_encode( ); 6631 ins_pipe(empty); 6632 %} 6633 6634 instruct membar_release() %{ 6635 match(MemBarRelease); 6636 match(StoreFence); 6637 ins_cost(400); 6638 6639 size(0); 6640 format %{ "MEMBAR-release ! (empty encoding)" %} 6641 ins_encode( ); 6642 ins_pipe(empty); 6643 %} 6644 6645 instruct membar_release_lock() %{ 6646 match(MemBarReleaseLock); 6647 ins_cost(0); 6648 6649 size(0); 6650 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6651 ins_encode( ); 6652 ins_pipe(empty); 6653 %} 6654 6655 instruct membar_volatile(eFlagsReg cr) %{ 6656 match(MemBarVolatile); 6657 effect(KILL cr); 6658 ins_cost(400); 6659 6660 format %{ 6661 $$template 6662 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6663 %} 6664 ins_encode %{ 6665 __ membar(Assembler::StoreLoad); 6666 %} 6667 ins_pipe(pipe_slow); 6668 %} 6669 6670 instruct unnecessary_membar_volatile() %{ 6671 match(MemBarVolatile); 6672 predicate(Matcher::post_store_load_barrier(n)); 6673 ins_cost(0); 6674 6675 size(0); 6676 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6677 ins_encode( ); 6678 ins_pipe(empty); 6679 %} 6680 6681 instruct membar_storestore() %{ 6682 match(MemBarStoreStore); 6683 match(StoreStoreFence); 6684 ins_cost(0); 6685 6686 size(0); 6687 format %{ "MEMBAR-storestore (empty encoding)" %} 6688 ins_encode( ); 6689 ins_pipe(empty); 6690 %} 6691 6692 //----------Move Instructions-------------------------------------------------- 6693 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6694 match(Set dst (CastX2P src)); 6695 format %{ "# X2P $dst, $src" %} 6696 ins_encode( /*empty encoding*/ ); 6697 ins_cost(0); 6698 ins_pipe(empty); 6699 %} 6700 6701 instruct castP2X(rRegI dst, eRegP src ) %{ 6702 match(Set dst (CastP2X src)); 6703 ins_cost(50); 6704 format %{ "MOV $dst, $src\t# CastP2X" %} 6705 ins_encode( enc_Copy( dst, src) ); 6706 ins_pipe( ialu_reg_reg ); 6707 %} 6708 6709 //----------Conditional Move--------------------------------------------------- 6710 // Conditional move 6711 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6712 predicate(!VM_Version::supports_cmov() ); 6713 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6714 ins_cost(200); 6715 format %{ "J$cop,us skip\t# signed cmove\n\t" 6716 "MOV $dst,$src\n" 6717 "skip:" %} 6718 ins_encode %{ 6719 Label Lskip; 6720 // Invert sense of branch from sense of CMOV 6721 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6722 __ movl($dst$$Register, $src$$Register); 6723 __ bind(Lskip); 6724 %} 6725 ins_pipe( pipe_cmov_reg ); 6726 %} 6727 6728 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6729 predicate(!VM_Version::supports_cmov() ); 6730 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6731 ins_cost(200); 6732 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6733 "MOV $dst,$src\n" 6734 "skip:" %} 6735 ins_encode %{ 6736 Label Lskip; 6737 // Invert sense of branch from sense of CMOV 6738 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6739 __ movl($dst$$Register, $src$$Register); 6740 __ bind(Lskip); 6741 %} 6742 ins_pipe( pipe_cmov_reg ); 6743 %} 6744 6745 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6746 predicate(VM_Version::supports_cmov() ); 6747 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6748 ins_cost(200); 6749 format %{ "CMOV$cop $dst,$src" %} 6750 opcode(0x0F,0x40); 6751 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6752 ins_pipe( pipe_cmov_reg ); 6753 %} 6754 6755 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6756 predicate(VM_Version::supports_cmov() ); 6757 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6758 ins_cost(200); 6759 format %{ "CMOV$cop $dst,$src" %} 6760 opcode(0x0F,0x40); 6761 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6762 ins_pipe( pipe_cmov_reg ); 6763 %} 6764 6765 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6766 predicate(VM_Version::supports_cmov() ); 6767 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6768 ins_cost(200); 6769 expand %{ 6770 cmovI_regU(cop, cr, dst, src); 6771 %} 6772 %} 6773 6774 // Conditional move 6775 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6776 predicate(VM_Version::supports_cmov() ); 6777 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6778 ins_cost(250); 6779 format %{ "CMOV$cop $dst,$src" %} 6780 opcode(0x0F,0x40); 6781 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6782 ins_pipe( pipe_cmov_mem ); 6783 %} 6784 6785 // Conditional move 6786 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6787 predicate(VM_Version::supports_cmov() ); 6788 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6789 ins_cost(250); 6790 format %{ "CMOV$cop $dst,$src" %} 6791 opcode(0x0F,0x40); 6792 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6793 ins_pipe( pipe_cmov_mem ); 6794 %} 6795 6796 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6797 predicate(VM_Version::supports_cmov() ); 6798 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6799 ins_cost(250); 6800 expand %{ 6801 cmovI_memU(cop, cr, dst, src); 6802 %} 6803 %} 6804 6805 // Conditional move 6806 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6807 predicate(VM_Version::supports_cmov() ); 6808 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6809 ins_cost(200); 6810 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6811 opcode(0x0F,0x40); 6812 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6813 ins_pipe( pipe_cmov_reg ); 6814 %} 6815 6816 // Conditional move (non-P6 version) 6817 // Note: a CMoveP is generated for stubs and native wrappers 6818 // regardless of whether we are on a P6, so we 6819 // emulate a cmov here 6820 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6821 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6822 ins_cost(300); 6823 format %{ "Jn$cop skip\n\t" 6824 "MOV $dst,$src\t# pointer\n" 6825 "skip:" %} 6826 opcode(0x8b); 6827 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6828 ins_pipe( pipe_cmov_reg ); 6829 %} 6830 6831 // Conditional move 6832 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6833 predicate(VM_Version::supports_cmov() ); 6834 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6835 ins_cost(200); 6836 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6837 opcode(0x0F,0x40); 6838 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6839 ins_pipe( pipe_cmov_reg ); 6840 %} 6841 6842 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6843 predicate(VM_Version::supports_cmov() ); 6844 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6845 ins_cost(200); 6846 expand %{ 6847 cmovP_regU(cop, cr, dst, src); 6848 %} 6849 %} 6850 6851 // DISABLED: Requires the ADLC to emit a bottom_type call that 6852 // correctly meets the two pointer arguments; one is an incoming 6853 // register but the other is a memory operand. ALSO appears to 6854 // be buggy with implicit null checks. 6855 // 6856 //// Conditional move 6857 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6858 // predicate(VM_Version::supports_cmov() ); 6859 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6860 // ins_cost(250); 6861 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6862 // opcode(0x0F,0x40); 6863 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6864 // ins_pipe( pipe_cmov_mem ); 6865 //%} 6866 // 6867 //// Conditional move 6868 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6869 // predicate(VM_Version::supports_cmov() ); 6870 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6871 // ins_cost(250); 6872 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6873 // opcode(0x0F,0x40); 6874 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6875 // ins_pipe( pipe_cmov_mem ); 6876 //%} 6877 6878 // Conditional move 6879 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6880 predicate(UseSSE<=1); 6881 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6882 ins_cost(200); 6883 format %{ "FCMOV$cop $dst,$src\t# double" %} 6884 opcode(0xDA); 6885 ins_encode( enc_cmov_dpr(cop,src) ); 6886 ins_pipe( pipe_cmovDPR_reg ); 6887 %} 6888 6889 // Conditional move 6890 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6891 predicate(UseSSE==0); 6892 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6893 ins_cost(200); 6894 format %{ "FCMOV$cop $dst,$src\t# float" %} 6895 opcode(0xDA); 6896 ins_encode( enc_cmov_dpr(cop,src) ); 6897 ins_pipe( pipe_cmovDPR_reg ); 6898 %} 6899 6900 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6901 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6902 predicate(UseSSE<=1); 6903 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6904 ins_cost(200); 6905 format %{ "Jn$cop skip\n\t" 6906 "MOV $dst,$src\t# double\n" 6907 "skip:" %} 6908 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6909 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6910 ins_pipe( pipe_cmovDPR_reg ); 6911 %} 6912 6913 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6914 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6915 predicate(UseSSE==0); 6916 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6917 ins_cost(200); 6918 format %{ "Jn$cop skip\n\t" 6919 "MOV $dst,$src\t# float\n" 6920 "skip:" %} 6921 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6922 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6923 ins_pipe( pipe_cmovDPR_reg ); 6924 %} 6925 6926 // No CMOVE with SSE/SSE2 6927 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6928 predicate (UseSSE>=1); 6929 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6930 ins_cost(200); 6931 format %{ "Jn$cop skip\n\t" 6932 "MOVSS $dst,$src\t# float\n" 6933 "skip:" %} 6934 ins_encode %{ 6935 Label skip; 6936 // Invert sense of branch from sense of CMOV 6937 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6938 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6939 __ bind(skip); 6940 %} 6941 ins_pipe( pipe_slow ); 6942 %} 6943 6944 // No CMOVE with SSE/SSE2 6945 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6946 predicate (UseSSE>=2); 6947 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6948 ins_cost(200); 6949 format %{ "Jn$cop skip\n\t" 6950 "MOVSD $dst,$src\t# float\n" 6951 "skip:" %} 6952 ins_encode %{ 6953 Label skip; 6954 // Invert sense of branch from sense of CMOV 6955 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6956 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6957 __ bind(skip); 6958 %} 6959 ins_pipe( pipe_slow ); 6960 %} 6961 6962 // unsigned version 6963 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6964 predicate (UseSSE>=1); 6965 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6966 ins_cost(200); 6967 format %{ "Jn$cop skip\n\t" 6968 "MOVSS $dst,$src\t# float\n" 6969 "skip:" %} 6970 ins_encode %{ 6971 Label skip; 6972 // Invert sense of branch from sense of CMOV 6973 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6974 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6975 __ bind(skip); 6976 %} 6977 ins_pipe( pipe_slow ); 6978 %} 6979 6980 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6981 predicate (UseSSE>=1); 6982 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6983 ins_cost(200); 6984 expand %{ 6985 fcmovF_regU(cop, cr, dst, src); 6986 %} 6987 %} 6988 6989 // unsigned version 6990 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6991 predicate (UseSSE>=2); 6992 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6993 ins_cost(200); 6994 format %{ "Jn$cop skip\n\t" 6995 "MOVSD $dst,$src\t# float\n" 6996 "skip:" %} 6997 ins_encode %{ 6998 Label skip; 6999 // Invert sense of branch from sense of CMOV 7000 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7001 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7002 __ bind(skip); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7008 predicate (UseSSE>=2); 7009 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7010 ins_cost(200); 7011 expand %{ 7012 fcmovD_regU(cop, cr, dst, src); 7013 %} 7014 %} 7015 7016 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7017 predicate(VM_Version::supports_cmov() ); 7018 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7019 ins_cost(200); 7020 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7021 "CMOV$cop $dst.hi,$src.hi" %} 7022 opcode(0x0F,0x40); 7023 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7024 ins_pipe( pipe_cmov_reg_long ); 7025 %} 7026 7027 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7028 predicate(VM_Version::supports_cmov() ); 7029 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7030 ins_cost(200); 7031 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7032 "CMOV$cop $dst.hi,$src.hi" %} 7033 opcode(0x0F,0x40); 7034 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7035 ins_pipe( pipe_cmov_reg_long ); 7036 %} 7037 7038 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7039 predicate(VM_Version::supports_cmov() ); 7040 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7041 ins_cost(200); 7042 expand %{ 7043 cmovL_regU(cop, cr, dst, src); 7044 %} 7045 %} 7046 7047 //----------Arithmetic Instructions-------------------------------------------- 7048 //----------Addition Instructions---------------------------------------------- 7049 7050 // Integer Addition Instructions 7051 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7052 match(Set dst (AddI dst src)); 7053 effect(KILL cr); 7054 7055 size(2); 7056 format %{ "ADD $dst,$src" %} 7057 opcode(0x03); 7058 ins_encode( OpcP, RegReg( dst, src) ); 7059 ins_pipe( ialu_reg_reg ); 7060 %} 7061 7062 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7063 match(Set dst (AddI dst src)); 7064 effect(KILL cr); 7065 7066 format %{ "ADD $dst,$src" %} 7067 opcode(0x81, 0x00); /* /0 id */ 7068 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7069 ins_pipe( ialu_reg ); 7070 %} 7071 7072 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7073 predicate(UseIncDec); 7074 match(Set dst (AddI dst src)); 7075 effect(KILL cr); 7076 7077 size(1); 7078 format %{ "INC $dst" %} 7079 opcode(0x40); /* */ 7080 ins_encode( Opc_plus( primary, dst ) ); 7081 ins_pipe( ialu_reg ); 7082 %} 7083 7084 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7085 match(Set dst (AddI src0 src1)); 7086 ins_cost(110); 7087 7088 format %{ "LEA $dst,[$src0 + $src1]" %} 7089 opcode(0x8D); /* 0x8D /r */ 7090 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7091 ins_pipe( ialu_reg_reg ); 7092 %} 7093 7094 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7095 match(Set dst (AddP src0 src1)); 7096 ins_cost(110); 7097 7098 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7099 opcode(0x8D); /* 0x8D /r */ 7100 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7101 ins_pipe( ialu_reg_reg ); 7102 %} 7103 7104 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7105 predicate(UseIncDec); 7106 match(Set dst (AddI dst src)); 7107 effect(KILL cr); 7108 7109 size(1); 7110 format %{ "DEC $dst" %} 7111 opcode(0x48); /* */ 7112 ins_encode( Opc_plus( primary, dst ) ); 7113 ins_pipe( ialu_reg ); 7114 %} 7115 7116 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7117 match(Set dst (AddP dst src)); 7118 effect(KILL cr); 7119 7120 size(2); 7121 format %{ "ADD $dst,$src" %} 7122 opcode(0x03); 7123 ins_encode( OpcP, RegReg( dst, src) ); 7124 ins_pipe( ialu_reg_reg ); 7125 %} 7126 7127 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7128 match(Set dst (AddP dst src)); 7129 effect(KILL cr); 7130 7131 format %{ "ADD $dst,$src" %} 7132 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7133 // ins_encode( RegImm( dst, src) ); 7134 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7135 ins_pipe( ialu_reg ); 7136 %} 7137 7138 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7139 match(Set dst (AddI dst (LoadI src))); 7140 effect(KILL cr); 7141 7142 ins_cost(150); 7143 format %{ "ADD $dst,$src" %} 7144 opcode(0x03); 7145 ins_encode( OpcP, RegMem( dst, src) ); 7146 ins_pipe( ialu_reg_mem ); 7147 %} 7148 7149 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7150 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7151 effect(KILL cr); 7152 7153 ins_cost(150); 7154 format %{ "ADD $dst,$src" %} 7155 opcode(0x01); /* Opcode 01 /r */ 7156 ins_encode( OpcP, RegMem( src, dst ) ); 7157 ins_pipe( ialu_mem_reg ); 7158 %} 7159 7160 // Add Memory with Immediate 7161 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7162 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7163 effect(KILL cr); 7164 7165 ins_cost(125); 7166 format %{ "ADD $dst,$src" %} 7167 opcode(0x81); /* Opcode 81 /0 id */ 7168 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7169 ins_pipe( ialu_mem_imm ); 7170 %} 7171 7172 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7173 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7174 effect(KILL cr); 7175 7176 ins_cost(125); 7177 format %{ "INC $dst" %} 7178 opcode(0xFF); /* Opcode FF /0 */ 7179 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7180 ins_pipe( ialu_mem_imm ); 7181 %} 7182 7183 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7184 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7185 effect(KILL cr); 7186 7187 ins_cost(125); 7188 format %{ "DEC $dst" %} 7189 opcode(0xFF); /* Opcode FF /1 */ 7190 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7191 ins_pipe( ialu_mem_imm ); 7192 %} 7193 7194 7195 instruct checkCastPP( eRegP dst ) %{ 7196 match(Set dst (CheckCastPP dst)); 7197 7198 size(0); 7199 format %{ "#checkcastPP of $dst" %} 7200 ins_encode( /*empty encoding*/ ); 7201 ins_pipe( empty ); 7202 %} 7203 7204 instruct castPP( eRegP dst ) %{ 7205 match(Set dst (CastPP dst)); 7206 format %{ "#castPP of $dst" %} 7207 ins_encode( /*empty encoding*/ ); 7208 ins_pipe( empty ); 7209 %} 7210 7211 instruct castII( rRegI dst ) %{ 7212 match(Set dst (CastII dst)); 7213 format %{ "#castII of $dst" %} 7214 ins_encode( /*empty encoding*/ ); 7215 ins_cost(0); 7216 ins_pipe( empty ); 7217 %} 7218 7219 instruct castLL( eRegL dst ) %{ 7220 match(Set dst (CastLL dst)); 7221 format %{ "#castLL of $dst" %} 7222 ins_encode( /*empty encoding*/ ); 7223 ins_cost(0); 7224 ins_pipe( empty ); 7225 %} 7226 7227 instruct castFF( regF dst ) %{ 7228 predicate(UseSSE >= 1); 7229 match(Set dst (CastFF dst)); 7230 format %{ "#castFF of $dst" %} 7231 ins_encode( /*empty encoding*/ ); 7232 ins_cost(0); 7233 ins_pipe( empty ); 7234 %} 7235 7236 instruct castDD( regD dst ) %{ 7237 predicate(UseSSE >= 2); 7238 match(Set dst (CastDD dst)); 7239 format %{ "#castDD of $dst" %} 7240 ins_encode( /*empty encoding*/ ); 7241 ins_cost(0); 7242 ins_pipe( empty ); 7243 %} 7244 7245 instruct castFF_PR( regFPR dst ) %{ 7246 predicate(UseSSE < 1); 7247 match(Set dst (CastFF dst)); 7248 format %{ "#castFF of $dst" %} 7249 ins_encode( /*empty encoding*/ ); 7250 ins_cost(0); 7251 ins_pipe( empty ); 7252 %} 7253 7254 instruct castDD_PR( regDPR dst ) %{ 7255 predicate(UseSSE < 2); 7256 match(Set dst (CastDD dst)); 7257 format %{ "#castDD of $dst" %} 7258 ins_encode( /*empty encoding*/ ); 7259 ins_cost(0); 7260 ins_pipe( empty ); 7261 %} 7262 7263 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7264 7265 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7266 predicate(VM_Version::supports_cx8()); 7267 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7268 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7269 effect(KILL cr, KILL oldval); 7270 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7271 "MOV $res,0\n\t" 7272 "JNE,s fail\n\t" 7273 "MOV $res,1\n" 7274 "fail:" %} 7275 ins_encode( enc_cmpxchg8(mem_ptr), 7276 enc_flags_ne_to_boolean(res) ); 7277 ins_pipe( pipe_cmpxchg ); 7278 %} 7279 7280 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7281 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7282 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7283 effect(KILL cr, KILL oldval); 7284 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7285 "MOV $res,0\n\t" 7286 "JNE,s fail\n\t" 7287 "MOV $res,1\n" 7288 "fail:" %} 7289 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7290 ins_pipe( pipe_cmpxchg ); 7291 %} 7292 7293 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7294 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7295 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7296 effect(KILL cr, KILL oldval); 7297 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7298 "MOV $res,0\n\t" 7299 "JNE,s fail\n\t" 7300 "MOV $res,1\n" 7301 "fail:" %} 7302 ins_encode( enc_cmpxchgb(mem_ptr), 7303 enc_flags_ne_to_boolean(res) ); 7304 ins_pipe( pipe_cmpxchg ); 7305 %} 7306 7307 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7308 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7309 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7310 effect(KILL cr, KILL oldval); 7311 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7312 "MOV $res,0\n\t" 7313 "JNE,s fail\n\t" 7314 "MOV $res,1\n" 7315 "fail:" %} 7316 ins_encode( enc_cmpxchgw(mem_ptr), 7317 enc_flags_ne_to_boolean(res) ); 7318 ins_pipe( pipe_cmpxchg ); 7319 %} 7320 7321 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7322 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7323 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7324 effect(KILL cr, KILL oldval); 7325 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7326 "MOV $res,0\n\t" 7327 "JNE,s fail\n\t" 7328 "MOV $res,1\n" 7329 "fail:" %} 7330 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7331 ins_pipe( pipe_cmpxchg ); 7332 %} 7333 7334 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7335 predicate(VM_Version::supports_cx8()); 7336 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7337 effect(KILL cr); 7338 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7339 ins_encode( enc_cmpxchg8(mem_ptr) ); 7340 ins_pipe( pipe_cmpxchg ); 7341 %} 7342 7343 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7344 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7345 effect(KILL cr); 7346 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7347 ins_encode( enc_cmpxchg(mem_ptr) ); 7348 ins_pipe( pipe_cmpxchg ); 7349 %} 7350 7351 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7352 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7353 effect(KILL cr); 7354 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7355 ins_encode( enc_cmpxchgb(mem_ptr) ); 7356 ins_pipe( pipe_cmpxchg ); 7357 %} 7358 7359 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7360 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7361 effect(KILL cr); 7362 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7363 ins_encode( enc_cmpxchgw(mem_ptr) ); 7364 ins_pipe( pipe_cmpxchg ); 7365 %} 7366 7367 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7368 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7369 effect(KILL cr); 7370 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7371 ins_encode( enc_cmpxchg(mem_ptr) ); 7372 ins_pipe( pipe_cmpxchg ); 7373 %} 7374 7375 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7376 predicate(n->as_LoadStore()->result_not_used()); 7377 match(Set dummy (GetAndAddB mem add)); 7378 effect(KILL cr); 7379 format %{ "ADDB [$mem],$add" %} 7380 ins_encode %{ 7381 __ lock(); 7382 __ addb($mem$$Address, $add$$constant); 7383 %} 7384 ins_pipe( pipe_cmpxchg ); 7385 %} 7386 7387 // Important to match to xRegI: only 8-bit regs. 7388 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7389 match(Set newval (GetAndAddB mem newval)); 7390 effect(KILL cr); 7391 format %{ "XADDB [$mem],$newval" %} 7392 ins_encode %{ 7393 __ lock(); 7394 __ xaddb($mem$$Address, $newval$$Register); 7395 %} 7396 ins_pipe( pipe_cmpxchg ); 7397 %} 7398 7399 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7400 predicate(n->as_LoadStore()->result_not_used()); 7401 match(Set dummy (GetAndAddS mem add)); 7402 effect(KILL cr); 7403 format %{ "ADDS [$mem],$add" %} 7404 ins_encode %{ 7405 __ lock(); 7406 __ addw($mem$$Address, $add$$constant); 7407 %} 7408 ins_pipe( pipe_cmpxchg ); 7409 %} 7410 7411 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7412 match(Set newval (GetAndAddS mem newval)); 7413 effect(KILL cr); 7414 format %{ "XADDS [$mem],$newval" %} 7415 ins_encode %{ 7416 __ lock(); 7417 __ xaddw($mem$$Address, $newval$$Register); 7418 %} 7419 ins_pipe( pipe_cmpxchg ); 7420 %} 7421 7422 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7423 predicate(n->as_LoadStore()->result_not_used()); 7424 match(Set dummy (GetAndAddI mem add)); 7425 effect(KILL cr); 7426 format %{ "ADDL [$mem],$add" %} 7427 ins_encode %{ 7428 __ lock(); 7429 __ addl($mem$$Address, $add$$constant); 7430 %} 7431 ins_pipe( pipe_cmpxchg ); 7432 %} 7433 7434 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7435 match(Set newval (GetAndAddI mem newval)); 7436 effect(KILL cr); 7437 format %{ "XADDL [$mem],$newval" %} 7438 ins_encode %{ 7439 __ lock(); 7440 __ xaddl($mem$$Address, $newval$$Register); 7441 %} 7442 ins_pipe( pipe_cmpxchg ); 7443 %} 7444 7445 // Important to match to xRegI: only 8-bit regs. 7446 instruct xchgB( memory mem, xRegI newval) %{ 7447 match(Set newval (GetAndSetB mem newval)); 7448 format %{ "XCHGB $newval,[$mem]" %} 7449 ins_encode %{ 7450 __ xchgb($newval$$Register, $mem$$Address); 7451 %} 7452 ins_pipe( pipe_cmpxchg ); 7453 %} 7454 7455 instruct xchgS( memory mem, rRegI newval) %{ 7456 match(Set newval (GetAndSetS mem newval)); 7457 format %{ "XCHGW $newval,[$mem]" %} 7458 ins_encode %{ 7459 __ xchgw($newval$$Register, $mem$$Address); 7460 %} 7461 ins_pipe( pipe_cmpxchg ); 7462 %} 7463 7464 instruct xchgI( memory mem, rRegI newval) %{ 7465 match(Set newval (GetAndSetI mem newval)); 7466 format %{ "XCHGL $newval,[$mem]" %} 7467 ins_encode %{ 7468 __ xchgl($newval$$Register, $mem$$Address); 7469 %} 7470 ins_pipe( pipe_cmpxchg ); 7471 %} 7472 7473 instruct xchgP( memory mem, pRegP newval) %{ 7474 match(Set newval (GetAndSetP mem newval)); 7475 format %{ "XCHGL $newval,[$mem]" %} 7476 ins_encode %{ 7477 __ xchgl($newval$$Register, $mem$$Address); 7478 %} 7479 ins_pipe( pipe_cmpxchg ); 7480 %} 7481 7482 //----------Subtraction Instructions------------------------------------------- 7483 7484 // Integer Subtraction Instructions 7485 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7486 match(Set dst (SubI dst src)); 7487 effect(KILL cr); 7488 7489 size(2); 7490 format %{ "SUB $dst,$src" %} 7491 opcode(0x2B); 7492 ins_encode( OpcP, RegReg( dst, src) ); 7493 ins_pipe( ialu_reg_reg ); 7494 %} 7495 7496 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7497 match(Set dst (SubI dst src)); 7498 effect(KILL cr); 7499 7500 format %{ "SUB $dst,$src" %} 7501 opcode(0x81,0x05); /* Opcode 81 /5 */ 7502 // ins_encode( RegImm( dst, src) ); 7503 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7504 ins_pipe( ialu_reg ); 7505 %} 7506 7507 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7508 match(Set dst (SubI dst (LoadI src))); 7509 effect(KILL cr); 7510 7511 ins_cost(150); 7512 format %{ "SUB $dst,$src" %} 7513 opcode(0x2B); 7514 ins_encode( OpcP, RegMem( dst, src) ); 7515 ins_pipe( ialu_reg_mem ); 7516 %} 7517 7518 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7519 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7520 effect(KILL cr); 7521 7522 ins_cost(150); 7523 format %{ "SUB $dst,$src" %} 7524 opcode(0x29); /* Opcode 29 /r */ 7525 ins_encode( OpcP, RegMem( src, dst ) ); 7526 ins_pipe( ialu_mem_reg ); 7527 %} 7528 7529 // Subtract from a pointer 7530 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7531 match(Set dst (AddP dst (SubI zero src))); 7532 effect(KILL cr); 7533 7534 size(2); 7535 format %{ "SUB $dst,$src" %} 7536 opcode(0x2B); 7537 ins_encode( OpcP, RegReg( dst, src) ); 7538 ins_pipe( ialu_reg_reg ); 7539 %} 7540 7541 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7542 match(Set dst (SubI zero dst)); 7543 effect(KILL cr); 7544 7545 size(2); 7546 format %{ "NEG $dst" %} 7547 opcode(0xF7,0x03); // Opcode F7 /3 7548 ins_encode( OpcP, RegOpc( dst ) ); 7549 ins_pipe( ialu_reg ); 7550 %} 7551 7552 //----------Multiplication/Division Instructions------------------------------- 7553 // Integer Multiplication Instructions 7554 // Multiply Register 7555 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7556 match(Set dst (MulI dst src)); 7557 effect(KILL cr); 7558 7559 size(3); 7560 ins_cost(300); 7561 format %{ "IMUL $dst,$src" %} 7562 opcode(0xAF, 0x0F); 7563 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7564 ins_pipe( ialu_reg_reg_alu0 ); 7565 %} 7566 7567 // Multiply 32-bit Immediate 7568 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7569 match(Set dst (MulI src imm)); 7570 effect(KILL cr); 7571 7572 ins_cost(300); 7573 format %{ "IMUL $dst,$src,$imm" %} 7574 opcode(0x69); /* 69 /r id */ 7575 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7576 ins_pipe( ialu_reg_reg_alu0 ); 7577 %} 7578 7579 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7580 match(Set dst src); 7581 effect(KILL cr); 7582 7583 // Note that this is artificially increased to make it more expensive than loadConL 7584 ins_cost(250); 7585 format %{ "MOV EAX,$src\t// low word only" %} 7586 opcode(0xB8); 7587 ins_encode( LdImmL_Lo(dst, src) ); 7588 ins_pipe( ialu_reg_fat ); 7589 %} 7590 7591 // Multiply by 32-bit Immediate, taking the shifted high order results 7592 // (special case for shift by 32) 7593 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7594 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7595 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7596 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7597 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7598 effect(USE src1, KILL cr); 7599 7600 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7601 ins_cost(0*100 + 1*400 - 150); 7602 format %{ "IMUL EDX:EAX,$src1" %} 7603 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7604 ins_pipe( pipe_slow ); 7605 %} 7606 7607 // Multiply by 32-bit Immediate, taking the shifted high order results 7608 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7609 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7610 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7611 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7612 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7613 effect(USE src1, KILL cr); 7614 7615 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7616 ins_cost(1*100 + 1*400 - 150); 7617 format %{ "IMUL EDX:EAX,$src1\n\t" 7618 "SAR EDX,$cnt-32" %} 7619 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7620 ins_pipe( pipe_slow ); 7621 %} 7622 7623 // Multiply Memory 32-bit Immediate 7624 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7625 match(Set dst (MulI (LoadI src) imm)); 7626 effect(KILL cr); 7627 7628 ins_cost(300); 7629 format %{ "IMUL $dst,$src,$imm" %} 7630 opcode(0x69); /* 69 /r id */ 7631 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7632 ins_pipe( ialu_reg_mem_alu0 ); 7633 %} 7634 7635 // Multiply Memory 7636 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7637 match(Set dst (MulI dst (LoadI src))); 7638 effect(KILL cr); 7639 7640 ins_cost(350); 7641 format %{ "IMUL $dst,$src" %} 7642 opcode(0xAF, 0x0F); 7643 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7644 ins_pipe( ialu_reg_mem_alu0 ); 7645 %} 7646 7647 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7648 %{ 7649 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7650 effect(KILL cr, KILL src2); 7651 7652 expand %{ mulI_eReg(dst, src1, cr); 7653 mulI_eReg(src2, src3, cr); 7654 addI_eReg(dst, src2, cr); %} 7655 %} 7656 7657 // Multiply Register Int to Long 7658 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7659 // Basic Idea: long = (long)int * (long)int 7660 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7661 effect(DEF dst, USE src, USE src1, KILL flags); 7662 7663 ins_cost(300); 7664 format %{ "IMUL $dst,$src1" %} 7665 7666 ins_encode( long_int_multiply( dst, src1 ) ); 7667 ins_pipe( ialu_reg_reg_alu0 ); 7668 %} 7669 7670 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7671 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7672 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7673 effect(KILL flags); 7674 7675 ins_cost(300); 7676 format %{ "MUL $dst,$src1" %} 7677 7678 ins_encode( long_uint_multiply(dst, src1) ); 7679 ins_pipe( ialu_reg_reg_alu0 ); 7680 %} 7681 7682 // Multiply Register Long 7683 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7684 match(Set dst (MulL dst src)); 7685 effect(KILL cr, TEMP tmp); 7686 ins_cost(4*100+3*400); 7687 // Basic idea: lo(result) = lo(x_lo * y_lo) 7688 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7689 format %{ "MOV $tmp,$src.lo\n\t" 7690 "IMUL $tmp,EDX\n\t" 7691 "MOV EDX,$src.hi\n\t" 7692 "IMUL EDX,EAX\n\t" 7693 "ADD $tmp,EDX\n\t" 7694 "MUL EDX:EAX,$src.lo\n\t" 7695 "ADD EDX,$tmp" %} 7696 ins_encode( long_multiply( dst, src, tmp ) ); 7697 ins_pipe( pipe_slow ); 7698 %} 7699 7700 // Multiply Register Long where the left operand's high 32 bits are zero 7701 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7702 predicate(is_operand_hi32_zero(n->in(1))); 7703 match(Set dst (MulL dst src)); 7704 effect(KILL cr, TEMP tmp); 7705 ins_cost(2*100+2*400); 7706 // Basic idea: lo(result) = lo(x_lo * y_lo) 7707 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7708 format %{ "MOV $tmp,$src.hi\n\t" 7709 "IMUL $tmp,EAX\n\t" 7710 "MUL EDX:EAX,$src.lo\n\t" 7711 "ADD EDX,$tmp" %} 7712 ins_encode %{ 7713 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7714 __ imull($tmp$$Register, rax); 7715 __ mull($src$$Register); 7716 __ addl(rdx, $tmp$$Register); 7717 %} 7718 ins_pipe( pipe_slow ); 7719 %} 7720 7721 // Multiply Register Long where the right operand's high 32 bits are zero 7722 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7723 predicate(is_operand_hi32_zero(n->in(2))); 7724 match(Set dst (MulL dst src)); 7725 effect(KILL cr, TEMP tmp); 7726 ins_cost(2*100+2*400); 7727 // Basic idea: lo(result) = lo(x_lo * y_lo) 7728 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7729 format %{ "MOV $tmp,$src.lo\n\t" 7730 "IMUL $tmp,EDX\n\t" 7731 "MUL EDX:EAX,$src.lo\n\t" 7732 "ADD EDX,$tmp" %} 7733 ins_encode %{ 7734 __ movl($tmp$$Register, $src$$Register); 7735 __ imull($tmp$$Register, rdx); 7736 __ mull($src$$Register); 7737 __ addl(rdx, $tmp$$Register); 7738 %} 7739 ins_pipe( pipe_slow ); 7740 %} 7741 7742 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7743 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7744 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7745 match(Set dst (MulL dst src)); 7746 effect(KILL cr); 7747 ins_cost(1*400); 7748 // Basic idea: lo(result) = lo(x_lo * y_lo) 7749 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7750 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7751 ins_encode %{ 7752 __ mull($src$$Register); 7753 %} 7754 ins_pipe( pipe_slow ); 7755 %} 7756 7757 // Multiply Register Long by small constant 7758 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7759 match(Set dst (MulL dst src)); 7760 effect(KILL cr, TEMP tmp); 7761 ins_cost(2*100+2*400); 7762 size(12); 7763 // Basic idea: lo(result) = lo(src * EAX) 7764 // hi(result) = hi(src * EAX) + lo(src * EDX) 7765 format %{ "IMUL $tmp,EDX,$src\n\t" 7766 "MOV EDX,$src\n\t" 7767 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7768 "ADD EDX,$tmp" %} 7769 ins_encode( long_multiply_con( dst, src, tmp ) ); 7770 ins_pipe( pipe_slow ); 7771 %} 7772 7773 // Integer DIV with Register 7774 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7775 match(Set rax (DivI rax div)); 7776 effect(KILL rdx, KILL cr); 7777 size(26); 7778 ins_cost(30*100+10*100); 7779 format %{ "CMP EAX,0x80000000\n\t" 7780 "JNE,s normal\n\t" 7781 "XOR EDX,EDX\n\t" 7782 "CMP ECX,-1\n\t" 7783 "JE,s done\n" 7784 "normal: CDQ\n\t" 7785 "IDIV $div\n\t" 7786 "done:" %} 7787 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7788 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7789 ins_pipe( ialu_reg_reg_alu0 ); 7790 %} 7791 7792 // Divide Register Long 7793 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7794 match(Set dst (DivL src1 src2)); 7795 effect(CALL); 7796 ins_cost(10000); 7797 format %{ "PUSH $src1.hi\n\t" 7798 "PUSH $src1.lo\n\t" 7799 "PUSH $src2.hi\n\t" 7800 "PUSH $src2.lo\n\t" 7801 "CALL SharedRuntime::ldiv\n\t" 7802 "ADD ESP,16" %} 7803 ins_encode( long_div(src1,src2) ); 7804 ins_pipe( pipe_slow ); 7805 %} 7806 7807 // Integer DIVMOD with Register, both quotient and mod results 7808 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7809 match(DivModI rax div); 7810 effect(KILL cr); 7811 size(26); 7812 ins_cost(30*100+10*100); 7813 format %{ "CMP EAX,0x80000000\n\t" 7814 "JNE,s normal\n\t" 7815 "XOR EDX,EDX\n\t" 7816 "CMP ECX,-1\n\t" 7817 "JE,s done\n" 7818 "normal: CDQ\n\t" 7819 "IDIV $div\n\t" 7820 "done:" %} 7821 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7822 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7823 ins_pipe( pipe_slow ); 7824 %} 7825 7826 // Integer MOD with Register 7827 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7828 match(Set rdx (ModI rax div)); 7829 effect(KILL rax, KILL cr); 7830 7831 size(26); 7832 ins_cost(300); 7833 format %{ "CDQ\n\t" 7834 "IDIV $div" %} 7835 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7836 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7837 ins_pipe( ialu_reg_reg_alu0 ); 7838 %} 7839 7840 // Remainder Register Long 7841 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7842 match(Set dst (ModL src1 src2)); 7843 effect(CALL); 7844 ins_cost(10000); 7845 format %{ "PUSH $src1.hi\n\t" 7846 "PUSH $src1.lo\n\t" 7847 "PUSH $src2.hi\n\t" 7848 "PUSH $src2.lo\n\t" 7849 "CALL SharedRuntime::lrem\n\t" 7850 "ADD ESP,16" %} 7851 ins_encode( long_mod(src1,src2) ); 7852 ins_pipe( pipe_slow ); 7853 %} 7854 7855 // Divide Register Long (no special case since divisor != -1) 7856 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7857 match(Set dst (DivL dst imm)); 7858 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7859 ins_cost(1000); 7860 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7861 "XOR $tmp2,$tmp2\n\t" 7862 "CMP $tmp,EDX\n\t" 7863 "JA,s fast\n\t" 7864 "MOV $tmp2,EAX\n\t" 7865 "MOV EAX,EDX\n\t" 7866 "MOV EDX,0\n\t" 7867 "JLE,s pos\n\t" 7868 "LNEG EAX : $tmp2\n\t" 7869 "DIV $tmp # unsigned division\n\t" 7870 "XCHG EAX,$tmp2\n\t" 7871 "DIV $tmp\n\t" 7872 "LNEG $tmp2 : EAX\n\t" 7873 "JMP,s done\n" 7874 "pos:\n\t" 7875 "DIV $tmp\n\t" 7876 "XCHG EAX,$tmp2\n" 7877 "fast:\n\t" 7878 "DIV $tmp\n" 7879 "done:\n\t" 7880 "MOV EDX,$tmp2\n\t" 7881 "NEG EDX:EAX # if $imm < 0" %} 7882 ins_encode %{ 7883 int con = (int)$imm$$constant; 7884 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7885 int pcon = (con > 0) ? con : -con; 7886 Label Lfast, Lpos, Ldone; 7887 7888 __ movl($tmp$$Register, pcon); 7889 __ xorl($tmp2$$Register,$tmp2$$Register); 7890 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7891 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7892 7893 __ movl($tmp2$$Register, $dst$$Register); // save 7894 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7895 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7896 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7897 7898 // Negative dividend. 7899 // convert value to positive to use unsigned division 7900 __ lneg($dst$$Register, $tmp2$$Register); 7901 __ divl($tmp$$Register); 7902 __ xchgl($dst$$Register, $tmp2$$Register); 7903 __ divl($tmp$$Register); 7904 // revert result back to negative 7905 __ lneg($tmp2$$Register, $dst$$Register); 7906 __ jmpb(Ldone); 7907 7908 __ bind(Lpos); 7909 __ divl($tmp$$Register); // Use unsigned division 7910 __ xchgl($dst$$Register, $tmp2$$Register); 7911 // Fallthrow for final divide, tmp2 has 32 bit hi result 7912 7913 __ bind(Lfast); 7914 // fast path: src is positive 7915 __ divl($tmp$$Register); // Use unsigned division 7916 7917 __ bind(Ldone); 7918 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7919 if (con < 0) { 7920 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7921 } 7922 %} 7923 ins_pipe( pipe_slow ); 7924 %} 7925 7926 // Remainder Register Long (remainder fit into 32 bits) 7927 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7928 match(Set dst (ModL dst imm)); 7929 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7930 ins_cost(1000); 7931 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7932 "CMP $tmp,EDX\n\t" 7933 "JA,s fast\n\t" 7934 "MOV $tmp2,EAX\n\t" 7935 "MOV EAX,EDX\n\t" 7936 "MOV EDX,0\n\t" 7937 "JLE,s pos\n\t" 7938 "LNEG EAX : $tmp2\n\t" 7939 "DIV $tmp # unsigned division\n\t" 7940 "MOV EAX,$tmp2\n\t" 7941 "DIV $tmp\n\t" 7942 "NEG EDX\n\t" 7943 "JMP,s done\n" 7944 "pos:\n\t" 7945 "DIV $tmp\n\t" 7946 "MOV EAX,$tmp2\n" 7947 "fast:\n\t" 7948 "DIV $tmp\n" 7949 "done:\n\t" 7950 "MOV EAX,EDX\n\t" 7951 "SAR EDX,31\n\t" %} 7952 ins_encode %{ 7953 int con = (int)$imm$$constant; 7954 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7955 int pcon = (con > 0) ? con : -con; 7956 Label Lfast, Lpos, Ldone; 7957 7958 __ movl($tmp$$Register, pcon); 7959 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7960 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7961 7962 __ movl($tmp2$$Register, $dst$$Register); // save 7963 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7964 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7965 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7966 7967 // Negative dividend. 7968 // convert value to positive to use unsigned division 7969 __ lneg($dst$$Register, $tmp2$$Register); 7970 __ divl($tmp$$Register); 7971 __ movl($dst$$Register, $tmp2$$Register); 7972 __ divl($tmp$$Register); 7973 // revert remainder back to negative 7974 __ negl(HIGH_FROM_LOW($dst$$Register)); 7975 __ jmpb(Ldone); 7976 7977 __ bind(Lpos); 7978 __ divl($tmp$$Register); 7979 __ movl($dst$$Register, $tmp2$$Register); 7980 7981 __ bind(Lfast); 7982 // fast path: src is positive 7983 __ divl($tmp$$Register); 7984 7985 __ bind(Ldone); 7986 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7987 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7988 7989 %} 7990 ins_pipe( pipe_slow ); 7991 %} 7992 7993 // Integer Shift Instructions 7994 // Shift Left by one 7995 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7996 match(Set dst (LShiftI dst shift)); 7997 effect(KILL cr); 7998 7999 size(2); 8000 format %{ "SHL $dst,$shift" %} 8001 opcode(0xD1, 0x4); /* D1 /4 */ 8002 ins_encode( OpcP, RegOpc( dst ) ); 8003 ins_pipe( ialu_reg ); 8004 %} 8005 8006 // Shift Left by 8-bit immediate 8007 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8008 match(Set dst (LShiftI dst shift)); 8009 effect(KILL cr); 8010 8011 size(3); 8012 format %{ "SHL $dst,$shift" %} 8013 opcode(0xC1, 0x4); /* C1 /4 ib */ 8014 ins_encode( RegOpcImm( dst, shift) ); 8015 ins_pipe( ialu_reg ); 8016 %} 8017 8018 // Shift Left by variable 8019 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8020 match(Set dst (LShiftI dst shift)); 8021 effect(KILL cr); 8022 8023 size(2); 8024 format %{ "SHL $dst,$shift" %} 8025 opcode(0xD3, 0x4); /* D3 /4 */ 8026 ins_encode( OpcP, RegOpc( dst ) ); 8027 ins_pipe( ialu_reg_reg ); 8028 %} 8029 8030 // Arithmetic shift right by one 8031 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8032 match(Set dst (RShiftI dst shift)); 8033 effect(KILL cr); 8034 8035 size(2); 8036 format %{ "SAR $dst,$shift" %} 8037 opcode(0xD1, 0x7); /* D1 /7 */ 8038 ins_encode( OpcP, RegOpc( dst ) ); 8039 ins_pipe( ialu_reg ); 8040 %} 8041 8042 // Arithmetic shift right by one 8043 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8044 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8045 effect(KILL cr); 8046 format %{ "SAR $dst,$shift" %} 8047 opcode(0xD1, 0x7); /* D1 /7 */ 8048 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8049 ins_pipe( ialu_mem_imm ); 8050 %} 8051 8052 // Arithmetic Shift Right by 8-bit immediate 8053 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8054 match(Set dst (RShiftI dst shift)); 8055 effect(KILL cr); 8056 8057 size(3); 8058 format %{ "SAR $dst,$shift" %} 8059 opcode(0xC1, 0x7); /* C1 /7 ib */ 8060 ins_encode( RegOpcImm( dst, shift ) ); 8061 ins_pipe( ialu_mem_imm ); 8062 %} 8063 8064 // Arithmetic Shift Right by 8-bit immediate 8065 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8066 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8067 effect(KILL cr); 8068 8069 format %{ "SAR $dst,$shift" %} 8070 opcode(0xC1, 0x7); /* C1 /7 ib */ 8071 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8072 ins_pipe( ialu_mem_imm ); 8073 %} 8074 8075 // Arithmetic Shift Right by variable 8076 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8077 match(Set dst (RShiftI dst shift)); 8078 effect(KILL cr); 8079 8080 size(2); 8081 format %{ "SAR $dst,$shift" %} 8082 opcode(0xD3, 0x7); /* D3 /7 */ 8083 ins_encode( OpcP, RegOpc( dst ) ); 8084 ins_pipe( ialu_reg_reg ); 8085 %} 8086 8087 // Logical shift right by one 8088 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8089 match(Set dst (URShiftI dst shift)); 8090 effect(KILL cr); 8091 8092 size(2); 8093 format %{ "SHR $dst,$shift" %} 8094 opcode(0xD1, 0x5); /* D1 /5 */ 8095 ins_encode( OpcP, RegOpc( dst ) ); 8096 ins_pipe( ialu_reg ); 8097 %} 8098 8099 // Logical Shift Right by 8-bit immediate 8100 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8101 match(Set dst (URShiftI dst shift)); 8102 effect(KILL cr); 8103 8104 size(3); 8105 format %{ "SHR $dst,$shift" %} 8106 opcode(0xC1, 0x5); /* C1 /5 ib */ 8107 ins_encode( RegOpcImm( dst, shift) ); 8108 ins_pipe( ialu_reg ); 8109 %} 8110 8111 8112 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8113 // This idiom is used by the compiler for the i2b bytecode. 8114 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8115 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8116 8117 size(3); 8118 format %{ "MOVSX $dst,$src :8" %} 8119 ins_encode %{ 8120 __ movsbl($dst$$Register, $src$$Register); 8121 %} 8122 ins_pipe(ialu_reg_reg); 8123 %} 8124 8125 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8126 // This idiom is used by the compiler the i2s bytecode. 8127 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8128 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8129 8130 size(3); 8131 format %{ "MOVSX $dst,$src :16" %} 8132 ins_encode %{ 8133 __ movswl($dst$$Register, $src$$Register); 8134 %} 8135 ins_pipe(ialu_reg_reg); 8136 %} 8137 8138 8139 // Logical Shift Right by variable 8140 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8141 match(Set dst (URShiftI dst shift)); 8142 effect(KILL cr); 8143 8144 size(2); 8145 format %{ "SHR $dst,$shift" %} 8146 opcode(0xD3, 0x5); /* D3 /5 */ 8147 ins_encode( OpcP, RegOpc( dst ) ); 8148 ins_pipe( ialu_reg_reg ); 8149 %} 8150 8151 8152 //----------Logical Instructions----------------------------------------------- 8153 //----------Integer Logical Instructions--------------------------------------- 8154 // And Instructions 8155 // And Register with Register 8156 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8157 match(Set dst (AndI dst src)); 8158 effect(KILL cr); 8159 8160 size(2); 8161 format %{ "AND $dst,$src" %} 8162 opcode(0x23); 8163 ins_encode( OpcP, RegReg( dst, src) ); 8164 ins_pipe( ialu_reg_reg ); 8165 %} 8166 8167 // And Register with Immediate 8168 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8169 match(Set dst (AndI dst src)); 8170 effect(KILL cr); 8171 8172 format %{ "AND $dst,$src" %} 8173 opcode(0x81,0x04); /* Opcode 81 /4 */ 8174 // ins_encode( RegImm( dst, src) ); 8175 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8176 ins_pipe( ialu_reg ); 8177 %} 8178 8179 // And Register with Memory 8180 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8181 match(Set dst (AndI dst (LoadI src))); 8182 effect(KILL cr); 8183 8184 ins_cost(150); 8185 format %{ "AND $dst,$src" %} 8186 opcode(0x23); 8187 ins_encode( OpcP, RegMem( dst, src) ); 8188 ins_pipe( ialu_reg_mem ); 8189 %} 8190 8191 // And Memory with Register 8192 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8193 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8194 effect(KILL cr); 8195 8196 ins_cost(150); 8197 format %{ "AND $dst,$src" %} 8198 opcode(0x21); /* Opcode 21 /r */ 8199 ins_encode( OpcP, RegMem( src, dst ) ); 8200 ins_pipe( ialu_mem_reg ); 8201 %} 8202 8203 // And Memory with Immediate 8204 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8205 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8206 effect(KILL cr); 8207 8208 ins_cost(125); 8209 format %{ "AND $dst,$src" %} 8210 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8211 // ins_encode( MemImm( dst, src) ); 8212 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8213 ins_pipe( ialu_mem_imm ); 8214 %} 8215 8216 // BMI1 instructions 8217 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8218 match(Set dst (AndI (XorI src1 minus_1) src2)); 8219 predicate(UseBMI1Instructions); 8220 effect(KILL cr); 8221 8222 format %{ "ANDNL $dst, $src1, $src2" %} 8223 8224 ins_encode %{ 8225 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8226 %} 8227 ins_pipe(ialu_reg); 8228 %} 8229 8230 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8231 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8232 predicate(UseBMI1Instructions); 8233 effect(KILL cr); 8234 8235 ins_cost(125); 8236 format %{ "ANDNL $dst, $src1, $src2" %} 8237 8238 ins_encode %{ 8239 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8240 %} 8241 ins_pipe(ialu_reg_mem); 8242 %} 8243 8244 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8245 match(Set dst (AndI (SubI imm_zero src) src)); 8246 predicate(UseBMI1Instructions); 8247 effect(KILL cr); 8248 8249 format %{ "BLSIL $dst, $src" %} 8250 8251 ins_encode %{ 8252 __ blsil($dst$$Register, $src$$Register); 8253 %} 8254 ins_pipe(ialu_reg); 8255 %} 8256 8257 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8258 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8259 predicate(UseBMI1Instructions); 8260 effect(KILL cr); 8261 8262 ins_cost(125); 8263 format %{ "BLSIL $dst, $src" %} 8264 8265 ins_encode %{ 8266 __ blsil($dst$$Register, $src$$Address); 8267 %} 8268 ins_pipe(ialu_reg_mem); 8269 %} 8270 8271 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8272 %{ 8273 match(Set dst (XorI (AddI src minus_1) src)); 8274 predicate(UseBMI1Instructions); 8275 effect(KILL cr); 8276 8277 format %{ "BLSMSKL $dst, $src" %} 8278 8279 ins_encode %{ 8280 __ blsmskl($dst$$Register, $src$$Register); 8281 %} 8282 8283 ins_pipe(ialu_reg); 8284 %} 8285 8286 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8287 %{ 8288 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8289 predicate(UseBMI1Instructions); 8290 effect(KILL cr); 8291 8292 ins_cost(125); 8293 format %{ "BLSMSKL $dst, $src" %} 8294 8295 ins_encode %{ 8296 __ blsmskl($dst$$Register, $src$$Address); 8297 %} 8298 8299 ins_pipe(ialu_reg_mem); 8300 %} 8301 8302 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8303 %{ 8304 match(Set dst (AndI (AddI src minus_1) src) ); 8305 predicate(UseBMI1Instructions); 8306 effect(KILL cr); 8307 8308 format %{ "BLSRL $dst, $src" %} 8309 8310 ins_encode %{ 8311 __ blsrl($dst$$Register, $src$$Register); 8312 %} 8313 8314 ins_pipe(ialu_reg); 8315 %} 8316 8317 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8318 %{ 8319 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8320 predicate(UseBMI1Instructions); 8321 effect(KILL cr); 8322 8323 ins_cost(125); 8324 format %{ "BLSRL $dst, $src" %} 8325 8326 ins_encode %{ 8327 __ blsrl($dst$$Register, $src$$Address); 8328 %} 8329 8330 ins_pipe(ialu_reg_mem); 8331 %} 8332 8333 // Or Instructions 8334 // Or Register with Register 8335 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8336 match(Set dst (OrI dst src)); 8337 effect(KILL cr); 8338 8339 size(2); 8340 format %{ "OR $dst,$src" %} 8341 opcode(0x0B); 8342 ins_encode( OpcP, RegReg( dst, src) ); 8343 ins_pipe( ialu_reg_reg ); 8344 %} 8345 8346 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8347 match(Set dst (OrI dst (CastP2X src))); 8348 effect(KILL cr); 8349 8350 size(2); 8351 format %{ "OR $dst,$src" %} 8352 opcode(0x0B); 8353 ins_encode( OpcP, RegReg( dst, src) ); 8354 ins_pipe( ialu_reg_reg ); 8355 %} 8356 8357 8358 // Or Register with Immediate 8359 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8360 match(Set dst (OrI dst src)); 8361 effect(KILL cr); 8362 8363 format %{ "OR $dst,$src" %} 8364 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8365 // ins_encode( RegImm( dst, src) ); 8366 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8367 ins_pipe( ialu_reg ); 8368 %} 8369 8370 // Or Register with Memory 8371 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8372 match(Set dst (OrI dst (LoadI src))); 8373 effect(KILL cr); 8374 8375 ins_cost(150); 8376 format %{ "OR $dst,$src" %} 8377 opcode(0x0B); 8378 ins_encode( OpcP, RegMem( dst, src) ); 8379 ins_pipe( ialu_reg_mem ); 8380 %} 8381 8382 // Or Memory with Register 8383 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8384 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8385 effect(KILL cr); 8386 8387 ins_cost(150); 8388 format %{ "OR $dst,$src" %} 8389 opcode(0x09); /* Opcode 09 /r */ 8390 ins_encode( OpcP, RegMem( src, dst ) ); 8391 ins_pipe( ialu_mem_reg ); 8392 %} 8393 8394 // Or Memory with Immediate 8395 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8396 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8397 effect(KILL cr); 8398 8399 ins_cost(125); 8400 format %{ "OR $dst,$src" %} 8401 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8402 // ins_encode( MemImm( dst, src) ); 8403 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8404 ins_pipe( ialu_mem_imm ); 8405 %} 8406 8407 // ROL/ROR 8408 // ROL expand 8409 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8410 effect(USE_DEF dst, USE shift, KILL cr); 8411 8412 format %{ "ROL $dst, $shift" %} 8413 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8414 ins_encode( OpcP, RegOpc( dst )); 8415 ins_pipe( ialu_reg ); 8416 %} 8417 8418 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8419 effect(USE_DEF dst, USE shift, KILL cr); 8420 8421 format %{ "ROL $dst, $shift" %} 8422 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8423 ins_encode( RegOpcImm(dst, shift) ); 8424 ins_pipe(ialu_reg); 8425 %} 8426 8427 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8428 effect(USE_DEF dst, USE shift, KILL cr); 8429 8430 format %{ "ROL $dst, $shift" %} 8431 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8432 ins_encode(OpcP, RegOpc(dst)); 8433 ins_pipe( ialu_reg_reg ); 8434 %} 8435 // end of ROL expand 8436 8437 // ROL 32bit by one once 8438 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8439 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8440 8441 expand %{ 8442 rolI_eReg_imm1(dst, lshift, cr); 8443 %} 8444 %} 8445 8446 // ROL 32bit var by imm8 once 8447 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8448 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8449 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8450 8451 expand %{ 8452 rolI_eReg_imm8(dst, lshift, cr); 8453 %} 8454 %} 8455 8456 // ROL 32bit var by var once 8457 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8458 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8459 8460 expand %{ 8461 rolI_eReg_CL(dst, shift, cr); 8462 %} 8463 %} 8464 8465 // ROL 32bit var by var once 8466 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8467 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8468 8469 expand %{ 8470 rolI_eReg_CL(dst, shift, cr); 8471 %} 8472 %} 8473 8474 // ROR expand 8475 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8476 effect(USE_DEF dst, USE shift, KILL cr); 8477 8478 format %{ "ROR $dst, $shift" %} 8479 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8480 ins_encode( OpcP, RegOpc( dst ) ); 8481 ins_pipe( ialu_reg ); 8482 %} 8483 8484 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8485 effect (USE_DEF dst, USE shift, KILL cr); 8486 8487 format %{ "ROR $dst, $shift" %} 8488 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8489 ins_encode( RegOpcImm(dst, shift) ); 8490 ins_pipe( ialu_reg ); 8491 %} 8492 8493 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8494 effect(USE_DEF dst, USE shift, KILL cr); 8495 8496 format %{ "ROR $dst, $shift" %} 8497 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8498 ins_encode(OpcP, RegOpc(dst)); 8499 ins_pipe( ialu_reg_reg ); 8500 %} 8501 // end of ROR expand 8502 8503 // ROR right once 8504 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8505 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8506 8507 expand %{ 8508 rorI_eReg_imm1(dst, rshift, cr); 8509 %} 8510 %} 8511 8512 // ROR 32bit by immI8 once 8513 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8514 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8515 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8516 8517 expand %{ 8518 rorI_eReg_imm8(dst, rshift, cr); 8519 %} 8520 %} 8521 8522 // ROR 32bit var by var once 8523 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8524 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8525 8526 expand %{ 8527 rorI_eReg_CL(dst, shift, cr); 8528 %} 8529 %} 8530 8531 // ROR 32bit var by var once 8532 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8533 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8534 8535 expand %{ 8536 rorI_eReg_CL(dst, shift, cr); 8537 %} 8538 %} 8539 8540 // Xor Instructions 8541 // Xor Register with Register 8542 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8543 match(Set dst (XorI dst src)); 8544 effect(KILL cr); 8545 8546 size(2); 8547 format %{ "XOR $dst,$src" %} 8548 opcode(0x33); 8549 ins_encode( OpcP, RegReg( dst, src) ); 8550 ins_pipe( ialu_reg_reg ); 8551 %} 8552 8553 // Xor Register with Immediate -1 8554 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8555 match(Set dst (XorI dst imm)); 8556 8557 size(2); 8558 format %{ "NOT $dst" %} 8559 ins_encode %{ 8560 __ notl($dst$$Register); 8561 %} 8562 ins_pipe( ialu_reg ); 8563 %} 8564 8565 // Xor Register with Immediate 8566 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8567 match(Set dst (XorI dst src)); 8568 effect(KILL cr); 8569 8570 format %{ "XOR $dst,$src" %} 8571 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8572 // ins_encode( RegImm( dst, src) ); 8573 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8574 ins_pipe( ialu_reg ); 8575 %} 8576 8577 // Xor Register with Memory 8578 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8579 match(Set dst (XorI dst (LoadI src))); 8580 effect(KILL cr); 8581 8582 ins_cost(150); 8583 format %{ "XOR $dst,$src" %} 8584 opcode(0x33); 8585 ins_encode( OpcP, RegMem(dst, src) ); 8586 ins_pipe( ialu_reg_mem ); 8587 %} 8588 8589 // Xor Memory with Register 8590 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8591 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8592 effect(KILL cr); 8593 8594 ins_cost(150); 8595 format %{ "XOR $dst,$src" %} 8596 opcode(0x31); /* Opcode 31 /r */ 8597 ins_encode( OpcP, RegMem( src, dst ) ); 8598 ins_pipe( ialu_mem_reg ); 8599 %} 8600 8601 // Xor Memory with Immediate 8602 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8603 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8604 effect(KILL cr); 8605 8606 ins_cost(125); 8607 format %{ "XOR $dst,$src" %} 8608 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8609 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8610 ins_pipe( ialu_mem_imm ); 8611 %} 8612 8613 //----------Convert Int to Boolean--------------------------------------------- 8614 8615 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8616 effect( DEF dst, USE src ); 8617 format %{ "MOV $dst,$src" %} 8618 ins_encode( enc_Copy( dst, src) ); 8619 ins_pipe( ialu_reg_reg ); 8620 %} 8621 8622 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8623 effect( USE_DEF dst, USE src, KILL cr ); 8624 8625 size(4); 8626 format %{ "NEG $dst\n\t" 8627 "ADC $dst,$src" %} 8628 ins_encode( neg_reg(dst), 8629 OpcRegReg(0x13,dst,src) ); 8630 ins_pipe( ialu_reg_reg_long ); 8631 %} 8632 8633 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8634 match(Set dst (Conv2B src)); 8635 8636 expand %{ 8637 movI_nocopy(dst,src); 8638 ci2b(dst,src,cr); 8639 %} 8640 %} 8641 8642 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8643 effect( DEF dst, USE src ); 8644 format %{ "MOV $dst,$src" %} 8645 ins_encode( enc_Copy( dst, src) ); 8646 ins_pipe( ialu_reg_reg ); 8647 %} 8648 8649 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8650 effect( USE_DEF dst, USE src, KILL cr ); 8651 format %{ "NEG $dst\n\t" 8652 "ADC $dst,$src" %} 8653 ins_encode( neg_reg(dst), 8654 OpcRegReg(0x13,dst,src) ); 8655 ins_pipe( ialu_reg_reg_long ); 8656 %} 8657 8658 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8659 match(Set dst (Conv2B src)); 8660 8661 expand %{ 8662 movP_nocopy(dst,src); 8663 cp2b(dst,src,cr); 8664 %} 8665 %} 8666 8667 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8668 match(Set dst (CmpLTMask p q)); 8669 effect(KILL cr); 8670 ins_cost(400); 8671 8672 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8673 format %{ "XOR $dst,$dst\n\t" 8674 "CMP $p,$q\n\t" 8675 "SETlt $dst\n\t" 8676 "NEG $dst" %} 8677 ins_encode %{ 8678 Register Rp = $p$$Register; 8679 Register Rq = $q$$Register; 8680 Register Rd = $dst$$Register; 8681 Label done; 8682 __ xorl(Rd, Rd); 8683 __ cmpl(Rp, Rq); 8684 __ setb(Assembler::less, Rd); 8685 __ negl(Rd); 8686 %} 8687 8688 ins_pipe(pipe_slow); 8689 %} 8690 8691 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8692 match(Set dst (CmpLTMask dst zero)); 8693 effect(DEF dst, KILL cr); 8694 ins_cost(100); 8695 8696 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8697 ins_encode %{ 8698 __ sarl($dst$$Register, 31); 8699 %} 8700 ins_pipe(ialu_reg); 8701 %} 8702 8703 /* better to save a register than avoid a branch */ 8704 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8705 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8706 effect(KILL cr); 8707 ins_cost(400); 8708 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8709 "JGE done\n\t" 8710 "ADD $p,$y\n" 8711 "done: " %} 8712 ins_encode %{ 8713 Register Rp = $p$$Register; 8714 Register Rq = $q$$Register; 8715 Register Ry = $y$$Register; 8716 Label done; 8717 __ subl(Rp, Rq); 8718 __ jccb(Assembler::greaterEqual, done); 8719 __ addl(Rp, Ry); 8720 __ bind(done); 8721 %} 8722 8723 ins_pipe(pipe_cmplt); 8724 %} 8725 8726 /* better to save a register than avoid a branch */ 8727 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8728 match(Set y (AndI (CmpLTMask p q) y)); 8729 effect(KILL cr); 8730 8731 ins_cost(300); 8732 8733 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8734 "JLT done\n\t" 8735 "XORL $y, $y\n" 8736 "done: " %} 8737 ins_encode %{ 8738 Register Rp = $p$$Register; 8739 Register Rq = $q$$Register; 8740 Register Ry = $y$$Register; 8741 Label done; 8742 __ cmpl(Rp, Rq); 8743 __ jccb(Assembler::less, done); 8744 __ xorl(Ry, Ry); 8745 __ bind(done); 8746 %} 8747 8748 ins_pipe(pipe_cmplt); 8749 %} 8750 8751 /* If I enable this, I encourage spilling in the inner loop of compress. 8752 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8753 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8754 */ 8755 //----------Overflow Math Instructions----------------------------------------- 8756 8757 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8758 %{ 8759 match(Set cr (OverflowAddI op1 op2)); 8760 effect(DEF cr, USE_KILL op1, USE op2); 8761 8762 format %{ "ADD $op1, $op2\t# overflow check int" %} 8763 8764 ins_encode %{ 8765 __ addl($op1$$Register, $op2$$Register); 8766 %} 8767 ins_pipe(ialu_reg_reg); 8768 %} 8769 8770 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8771 %{ 8772 match(Set cr (OverflowAddI op1 op2)); 8773 effect(DEF cr, USE_KILL op1, USE op2); 8774 8775 format %{ "ADD $op1, $op2\t# overflow check int" %} 8776 8777 ins_encode %{ 8778 __ addl($op1$$Register, $op2$$constant); 8779 %} 8780 ins_pipe(ialu_reg_reg); 8781 %} 8782 8783 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8784 %{ 8785 match(Set cr (OverflowSubI op1 op2)); 8786 8787 format %{ "CMP $op1, $op2\t# overflow check int" %} 8788 ins_encode %{ 8789 __ cmpl($op1$$Register, $op2$$Register); 8790 %} 8791 ins_pipe(ialu_reg_reg); 8792 %} 8793 8794 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8795 %{ 8796 match(Set cr (OverflowSubI op1 op2)); 8797 8798 format %{ "CMP $op1, $op2\t# overflow check int" %} 8799 ins_encode %{ 8800 __ cmpl($op1$$Register, $op2$$constant); 8801 %} 8802 ins_pipe(ialu_reg_reg); 8803 %} 8804 8805 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8806 %{ 8807 match(Set cr (OverflowSubI zero op2)); 8808 effect(DEF cr, USE_KILL op2); 8809 8810 format %{ "NEG $op2\t# overflow check int" %} 8811 ins_encode %{ 8812 __ negl($op2$$Register); 8813 %} 8814 ins_pipe(ialu_reg_reg); 8815 %} 8816 8817 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8818 %{ 8819 match(Set cr (OverflowMulI op1 op2)); 8820 effect(DEF cr, USE_KILL op1, USE op2); 8821 8822 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8823 ins_encode %{ 8824 __ imull($op1$$Register, $op2$$Register); 8825 %} 8826 ins_pipe(ialu_reg_reg_alu0); 8827 %} 8828 8829 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8830 %{ 8831 match(Set cr (OverflowMulI op1 op2)); 8832 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8833 8834 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8835 ins_encode %{ 8836 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8837 %} 8838 ins_pipe(ialu_reg_reg_alu0); 8839 %} 8840 8841 // Integer Absolute Instructions 8842 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8843 %{ 8844 match(Set dst (AbsI src)); 8845 effect(TEMP dst, TEMP tmp, KILL cr); 8846 format %{ "movl $tmp, $src\n\t" 8847 "sarl $tmp, 31\n\t" 8848 "movl $dst, $src\n\t" 8849 "xorl $dst, $tmp\n\t" 8850 "subl $dst, $tmp\n" 8851 %} 8852 ins_encode %{ 8853 __ movl($tmp$$Register, $src$$Register); 8854 __ sarl($tmp$$Register, 31); 8855 __ movl($dst$$Register, $src$$Register); 8856 __ xorl($dst$$Register, $tmp$$Register); 8857 __ subl($dst$$Register, $tmp$$Register); 8858 %} 8859 8860 ins_pipe(ialu_reg_reg); 8861 %} 8862 8863 //----------Long Instructions------------------------------------------------ 8864 // Add Long Register with Register 8865 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8866 match(Set dst (AddL dst src)); 8867 effect(KILL cr); 8868 ins_cost(200); 8869 format %{ "ADD $dst.lo,$src.lo\n\t" 8870 "ADC $dst.hi,$src.hi" %} 8871 opcode(0x03, 0x13); 8872 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8873 ins_pipe( ialu_reg_reg_long ); 8874 %} 8875 8876 // Add Long Register with Immediate 8877 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8878 match(Set dst (AddL dst src)); 8879 effect(KILL cr); 8880 format %{ "ADD $dst.lo,$src.lo\n\t" 8881 "ADC $dst.hi,$src.hi" %} 8882 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8883 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8884 ins_pipe( ialu_reg_long ); 8885 %} 8886 8887 // Add Long Register with Memory 8888 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8889 match(Set dst (AddL dst (LoadL mem))); 8890 effect(KILL cr); 8891 ins_cost(125); 8892 format %{ "ADD $dst.lo,$mem\n\t" 8893 "ADC $dst.hi,$mem+4" %} 8894 opcode(0x03, 0x13); 8895 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8896 ins_pipe( ialu_reg_long_mem ); 8897 %} 8898 8899 // Subtract Long Register with Register. 8900 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8901 match(Set dst (SubL dst src)); 8902 effect(KILL cr); 8903 ins_cost(200); 8904 format %{ "SUB $dst.lo,$src.lo\n\t" 8905 "SBB $dst.hi,$src.hi" %} 8906 opcode(0x2B, 0x1B); 8907 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8908 ins_pipe( ialu_reg_reg_long ); 8909 %} 8910 8911 // Subtract Long Register with Immediate 8912 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8913 match(Set dst (SubL dst src)); 8914 effect(KILL cr); 8915 format %{ "SUB $dst.lo,$src.lo\n\t" 8916 "SBB $dst.hi,$src.hi" %} 8917 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8918 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8919 ins_pipe( ialu_reg_long ); 8920 %} 8921 8922 // Subtract Long Register with Memory 8923 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8924 match(Set dst (SubL dst (LoadL mem))); 8925 effect(KILL cr); 8926 ins_cost(125); 8927 format %{ "SUB $dst.lo,$mem\n\t" 8928 "SBB $dst.hi,$mem+4" %} 8929 opcode(0x2B, 0x1B); 8930 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8931 ins_pipe( ialu_reg_long_mem ); 8932 %} 8933 8934 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8935 match(Set dst (SubL zero dst)); 8936 effect(KILL cr); 8937 ins_cost(300); 8938 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8939 ins_encode( neg_long(dst) ); 8940 ins_pipe( ialu_reg_reg_long ); 8941 %} 8942 8943 // And Long Register with Register 8944 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8945 match(Set dst (AndL dst src)); 8946 effect(KILL cr); 8947 format %{ "AND $dst.lo,$src.lo\n\t" 8948 "AND $dst.hi,$src.hi" %} 8949 opcode(0x23,0x23); 8950 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8951 ins_pipe( ialu_reg_reg_long ); 8952 %} 8953 8954 // And Long Register with Immediate 8955 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8956 match(Set dst (AndL dst src)); 8957 effect(KILL cr); 8958 format %{ "AND $dst.lo,$src.lo\n\t" 8959 "AND $dst.hi,$src.hi" %} 8960 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8961 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8962 ins_pipe( ialu_reg_long ); 8963 %} 8964 8965 // And Long Register with Memory 8966 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8967 match(Set dst (AndL dst (LoadL mem))); 8968 effect(KILL cr); 8969 ins_cost(125); 8970 format %{ "AND $dst.lo,$mem\n\t" 8971 "AND $dst.hi,$mem+4" %} 8972 opcode(0x23, 0x23); 8973 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8974 ins_pipe( ialu_reg_long_mem ); 8975 %} 8976 8977 // BMI1 instructions 8978 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8979 match(Set dst (AndL (XorL src1 minus_1) src2)); 8980 predicate(UseBMI1Instructions); 8981 effect(KILL cr, TEMP dst); 8982 8983 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8984 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8985 %} 8986 8987 ins_encode %{ 8988 Register Rdst = $dst$$Register; 8989 Register Rsrc1 = $src1$$Register; 8990 Register Rsrc2 = $src2$$Register; 8991 __ andnl(Rdst, Rsrc1, Rsrc2); 8992 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8993 %} 8994 ins_pipe(ialu_reg_reg_long); 8995 %} 8996 8997 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8998 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8999 predicate(UseBMI1Instructions); 9000 effect(KILL cr, TEMP dst); 9001 9002 ins_cost(125); 9003 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9004 "ANDNL $dst.hi, $src1.hi, $src2+4" 9005 %} 9006 9007 ins_encode %{ 9008 Register Rdst = $dst$$Register; 9009 Register Rsrc1 = $src1$$Register; 9010 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9011 9012 __ andnl(Rdst, Rsrc1, $src2$$Address); 9013 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9014 %} 9015 ins_pipe(ialu_reg_mem); 9016 %} 9017 9018 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9019 match(Set dst (AndL (SubL imm_zero src) src)); 9020 predicate(UseBMI1Instructions); 9021 effect(KILL cr, TEMP dst); 9022 9023 format %{ "MOVL $dst.hi, 0\n\t" 9024 "BLSIL $dst.lo, $src.lo\n\t" 9025 "JNZ done\n\t" 9026 "BLSIL $dst.hi, $src.hi\n" 9027 "done:" 9028 %} 9029 9030 ins_encode %{ 9031 Label done; 9032 Register Rdst = $dst$$Register; 9033 Register Rsrc = $src$$Register; 9034 __ movl(HIGH_FROM_LOW(Rdst), 0); 9035 __ blsil(Rdst, Rsrc); 9036 __ jccb(Assembler::notZero, done); 9037 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9038 __ bind(done); 9039 %} 9040 ins_pipe(ialu_reg); 9041 %} 9042 9043 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9044 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9045 predicate(UseBMI1Instructions); 9046 effect(KILL cr, TEMP dst); 9047 9048 ins_cost(125); 9049 format %{ "MOVL $dst.hi, 0\n\t" 9050 "BLSIL $dst.lo, $src\n\t" 9051 "JNZ done\n\t" 9052 "BLSIL $dst.hi, $src+4\n" 9053 "done:" 9054 %} 9055 9056 ins_encode %{ 9057 Label done; 9058 Register Rdst = $dst$$Register; 9059 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9060 9061 __ movl(HIGH_FROM_LOW(Rdst), 0); 9062 __ blsil(Rdst, $src$$Address); 9063 __ jccb(Assembler::notZero, done); 9064 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9065 __ bind(done); 9066 %} 9067 ins_pipe(ialu_reg_mem); 9068 %} 9069 9070 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9071 %{ 9072 match(Set dst (XorL (AddL src minus_1) src)); 9073 predicate(UseBMI1Instructions); 9074 effect(KILL cr, TEMP dst); 9075 9076 format %{ "MOVL $dst.hi, 0\n\t" 9077 "BLSMSKL $dst.lo, $src.lo\n\t" 9078 "JNC done\n\t" 9079 "BLSMSKL $dst.hi, $src.hi\n" 9080 "done:" 9081 %} 9082 9083 ins_encode %{ 9084 Label done; 9085 Register Rdst = $dst$$Register; 9086 Register Rsrc = $src$$Register; 9087 __ movl(HIGH_FROM_LOW(Rdst), 0); 9088 __ blsmskl(Rdst, Rsrc); 9089 __ jccb(Assembler::carryClear, done); 9090 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9091 __ bind(done); 9092 %} 9093 9094 ins_pipe(ialu_reg); 9095 %} 9096 9097 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9098 %{ 9099 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9100 predicate(UseBMI1Instructions); 9101 effect(KILL cr, TEMP dst); 9102 9103 ins_cost(125); 9104 format %{ "MOVL $dst.hi, 0\n\t" 9105 "BLSMSKL $dst.lo, $src\n\t" 9106 "JNC done\n\t" 9107 "BLSMSKL $dst.hi, $src+4\n" 9108 "done:" 9109 %} 9110 9111 ins_encode %{ 9112 Label done; 9113 Register Rdst = $dst$$Register; 9114 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9115 9116 __ movl(HIGH_FROM_LOW(Rdst), 0); 9117 __ blsmskl(Rdst, $src$$Address); 9118 __ jccb(Assembler::carryClear, done); 9119 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9120 __ bind(done); 9121 %} 9122 9123 ins_pipe(ialu_reg_mem); 9124 %} 9125 9126 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9127 %{ 9128 match(Set dst (AndL (AddL src minus_1) src) ); 9129 predicate(UseBMI1Instructions); 9130 effect(KILL cr, TEMP dst); 9131 9132 format %{ "MOVL $dst.hi, $src.hi\n\t" 9133 "BLSRL $dst.lo, $src.lo\n\t" 9134 "JNC done\n\t" 9135 "BLSRL $dst.hi, $src.hi\n" 9136 "done:" 9137 %} 9138 9139 ins_encode %{ 9140 Label done; 9141 Register Rdst = $dst$$Register; 9142 Register Rsrc = $src$$Register; 9143 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9144 __ blsrl(Rdst, Rsrc); 9145 __ jccb(Assembler::carryClear, done); 9146 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9147 __ bind(done); 9148 %} 9149 9150 ins_pipe(ialu_reg); 9151 %} 9152 9153 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9154 %{ 9155 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9156 predicate(UseBMI1Instructions); 9157 effect(KILL cr, TEMP dst); 9158 9159 ins_cost(125); 9160 format %{ "MOVL $dst.hi, $src+4\n\t" 9161 "BLSRL $dst.lo, $src\n\t" 9162 "JNC done\n\t" 9163 "BLSRL $dst.hi, $src+4\n" 9164 "done:" 9165 %} 9166 9167 ins_encode %{ 9168 Label done; 9169 Register Rdst = $dst$$Register; 9170 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9171 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9172 __ blsrl(Rdst, $src$$Address); 9173 __ jccb(Assembler::carryClear, done); 9174 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9175 __ bind(done); 9176 %} 9177 9178 ins_pipe(ialu_reg_mem); 9179 %} 9180 9181 // Or Long Register with Register 9182 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9183 match(Set dst (OrL dst src)); 9184 effect(KILL cr); 9185 format %{ "OR $dst.lo,$src.lo\n\t" 9186 "OR $dst.hi,$src.hi" %} 9187 opcode(0x0B,0x0B); 9188 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9189 ins_pipe( ialu_reg_reg_long ); 9190 %} 9191 9192 // Or Long Register with Immediate 9193 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9194 match(Set dst (OrL dst src)); 9195 effect(KILL cr); 9196 format %{ "OR $dst.lo,$src.lo\n\t" 9197 "OR $dst.hi,$src.hi" %} 9198 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9199 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9200 ins_pipe( ialu_reg_long ); 9201 %} 9202 9203 // Or Long Register with Memory 9204 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9205 match(Set dst (OrL dst (LoadL mem))); 9206 effect(KILL cr); 9207 ins_cost(125); 9208 format %{ "OR $dst.lo,$mem\n\t" 9209 "OR $dst.hi,$mem+4" %} 9210 opcode(0x0B,0x0B); 9211 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9212 ins_pipe( ialu_reg_long_mem ); 9213 %} 9214 9215 // Xor Long Register with Register 9216 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9217 match(Set dst (XorL dst src)); 9218 effect(KILL cr); 9219 format %{ "XOR $dst.lo,$src.lo\n\t" 9220 "XOR $dst.hi,$src.hi" %} 9221 opcode(0x33,0x33); 9222 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9223 ins_pipe( ialu_reg_reg_long ); 9224 %} 9225 9226 // Xor Long Register with Immediate -1 9227 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9228 match(Set dst (XorL dst imm)); 9229 format %{ "NOT $dst.lo\n\t" 9230 "NOT $dst.hi" %} 9231 ins_encode %{ 9232 __ notl($dst$$Register); 9233 __ notl(HIGH_FROM_LOW($dst$$Register)); 9234 %} 9235 ins_pipe( ialu_reg_long ); 9236 %} 9237 9238 // Xor Long Register with Immediate 9239 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9240 match(Set dst (XorL dst src)); 9241 effect(KILL cr); 9242 format %{ "XOR $dst.lo,$src.lo\n\t" 9243 "XOR $dst.hi,$src.hi" %} 9244 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9245 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9246 ins_pipe( ialu_reg_long ); 9247 %} 9248 9249 // Xor Long Register with Memory 9250 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9251 match(Set dst (XorL dst (LoadL mem))); 9252 effect(KILL cr); 9253 ins_cost(125); 9254 format %{ "XOR $dst.lo,$mem\n\t" 9255 "XOR $dst.hi,$mem+4" %} 9256 opcode(0x33,0x33); 9257 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9258 ins_pipe( ialu_reg_long_mem ); 9259 %} 9260 9261 // Shift Left Long by 1 9262 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9263 predicate(UseNewLongLShift); 9264 match(Set dst (LShiftL dst cnt)); 9265 effect(KILL cr); 9266 ins_cost(100); 9267 format %{ "ADD $dst.lo,$dst.lo\n\t" 9268 "ADC $dst.hi,$dst.hi" %} 9269 ins_encode %{ 9270 __ addl($dst$$Register,$dst$$Register); 9271 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9272 %} 9273 ins_pipe( ialu_reg_long ); 9274 %} 9275 9276 // Shift Left Long by 2 9277 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9278 predicate(UseNewLongLShift); 9279 match(Set dst (LShiftL dst cnt)); 9280 effect(KILL cr); 9281 ins_cost(100); 9282 format %{ "ADD $dst.lo,$dst.lo\n\t" 9283 "ADC $dst.hi,$dst.hi\n\t" 9284 "ADD $dst.lo,$dst.lo\n\t" 9285 "ADC $dst.hi,$dst.hi" %} 9286 ins_encode %{ 9287 __ addl($dst$$Register,$dst$$Register); 9288 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9289 __ addl($dst$$Register,$dst$$Register); 9290 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9291 %} 9292 ins_pipe( ialu_reg_long ); 9293 %} 9294 9295 // Shift Left Long by 3 9296 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9297 predicate(UseNewLongLShift); 9298 match(Set dst (LShiftL dst cnt)); 9299 effect(KILL cr); 9300 ins_cost(100); 9301 format %{ "ADD $dst.lo,$dst.lo\n\t" 9302 "ADC $dst.hi,$dst.hi\n\t" 9303 "ADD $dst.lo,$dst.lo\n\t" 9304 "ADC $dst.hi,$dst.hi\n\t" 9305 "ADD $dst.lo,$dst.lo\n\t" 9306 "ADC $dst.hi,$dst.hi" %} 9307 ins_encode %{ 9308 __ addl($dst$$Register,$dst$$Register); 9309 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9310 __ addl($dst$$Register,$dst$$Register); 9311 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9312 __ addl($dst$$Register,$dst$$Register); 9313 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9314 %} 9315 ins_pipe( ialu_reg_long ); 9316 %} 9317 9318 // Shift Left Long by 1-31 9319 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9320 match(Set dst (LShiftL dst cnt)); 9321 effect(KILL cr); 9322 ins_cost(200); 9323 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9324 "SHL $dst.lo,$cnt" %} 9325 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9326 ins_encode( move_long_small_shift(dst,cnt) ); 9327 ins_pipe( ialu_reg_long ); 9328 %} 9329 9330 // Shift Left Long by 32-63 9331 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9332 match(Set dst (LShiftL dst cnt)); 9333 effect(KILL cr); 9334 ins_cost(300); 9335 format %{ "MOV $dst.hi,$dst.lo\n" 9336 "\tSHL $dst.hi,$cnt-32\n" 9337 "\tXOR $dst.lo,$dst.lo" %} 9338 opcode(0xC1, 0x4); /* C1 /4 ib */ 9339 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9340 ins_pipe( ialu_reg_long ); 9341 %} 9342 9343 // Shift Left Long by variable 9344 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9345 match(Set dst (LShiftL dst shift)); 9346 effect(KILL cr); 9347 ins_cost(500+200); 9348 size(17); 9349 format %{ "TEST $shift,32\n\t" 9350 "JEQ,s small\n\t" 9351 "MOV $dst.hi,$dst.lo\n\t" 9352 "XOR $dst.lo,$dst.lo\n" 9353 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9354 "SHL $dst.lo,$shift" %} 9355 ins_encode( shift_left_long( dst, shift ) ); 9356 ins_pipe( pipe_slow ); 9357 %} 9358 9359 // Shift Right Long by 1-31 9360 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9361 match(Set dst (URShiftL dst cnt)); 9362 effect(KILL cr); 9363 ins_cost(200); 9364 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9365 "SHR $dst.hi,$cnt" %} 9366 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9367 ins_encode( move_long_small_shift(dst,cnt) ); 9368 ins_pipe( ialu_reg_long ); 9369 %} 9370 9371 // Shift Right Long by 32-63 9372 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9373 match(Set dst (URShiftL dst cnt)); 9374 effect(KILL cr); 9375 ins_cost(300); 9376 format %{ "MOV $dst.lo,$dst.hi\n" 9377 "\tSHR $dst.lo,$cnt-32\n" 9378 "\tXOR $dst.hi,$dst.hi" %} 9379 opcode(0xC1, 0x5); /* C1 /5 ib */ 9380 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9381 ins_pipe( ialu_reg_long ); 9382 %} 9383 9384 // Shift Right Long by variable 9385 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9386 match(Set dst (URShiftL dst shift)); 9387 effect(KILL cr); 9388 ins_cost(600); 9389 size(17); 9390 format %{ "TEST $shift,32\n\t" 9391 "JEQ,s small\n\t" 9392 "MOV $dst.lo,$dst.hi\n\t" 9393 "XOR $dst.hi,$dst.hi\n" 9394 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9395 "SHR $dst.hi,$shift" %} 9396 ins_encode( shift_right_long( dst, shift ) ); 9397 ins_pipe( pipe_slow ); 9398 %} 9399 9400 // Shift Right Long by 1-31 9401 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9402 match(Set dst (RShiftL dst cnt)); 9403 effect(KILL cr); 9404 ins_cost(200); 9405 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9406 "SAR $dst.hi,$cnt" %} 9407 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9408 ins_encode( move_long_small_shift(dst,cnt) ); 9409 ins_pipe( ialu_reg_long ); 9410 %} 9411 9412 // Shift Right Long by 32-63 9413 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9414 match(Set dst (RShiftL dst cnt)); 9415 effect(KILL cr); 9416 ins_cost(300); 9417 format %{ "MOV $dst.lo,$dst.hi\n" 9418 "\tSAR $dst.lo,$cnt-32\n" 9419 "\tSAR $dst.hi,31" %} 9420 opcode(0xC1, 0x7); /* C1 /7 ib */ 9421 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9422 ins_pipe( ialu_reg_long ); 9423 %} 9424 9425 // Shift Right arithmetic Long by variable 9426 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9427 match(Set dst (RShiftL dst shift)); 9428 effect(KILL cr); 9429 ins_cost(600); 9430 size(18); 9431 format %{ "TEST $shift,32\n\t" 9432 "JEQ,s small\n\t" 9433 "MOV $dst.lo,$dst.hi\n\t" 9434 "SAR $dst.hi,31\n" 9435 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9436 "SAR $dst.hi,$shift" %} 9437 ins_encode( shift_right_arith_long( dst, shift ) ); 9438 ins_pipe( pipe_slow ); 9439 %} 9440 9441 9442 //----------Double Instructions------------------------------------------------ 9443 // Double Math 9444 9445 // Compare & branch 9446 9447 // P6 version of float compare, sets condition codes in EFLAGS 9448 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9449 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9450 match(Set cr (CmpD src1 src2)); 9451 effect(KILL rax); 9452 ins_cost(150); 9453 format %{ "FLD $src1\n\t" 9454 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9455 "JNP exit\n\t" 9456 "MOV ah,1 // saw a NaN, set CF\n\t" 9457 "SAHF\n" 9458 "exit:\tNOP // avoid branch to branch" %} 9459 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9460 ins_encode( Push_Reg_DPR(src1), 9461 OpcP, RegOpc(src2), 9462 cmpF_P6_fixup ); 9463 ins_pipe( pipe_slow ); 9464 %} 9465 9466 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9467 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9468 match(Set cr (CmpD src1 src2)); 9469 ins_cost(150); 9470 format %{ "FLD $src1\n\t" 9471 "FUCOMIP ST,$src2 // P6 instruction" %} 9472 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9473 ins_encode( Push_Reg_DPR(src1), 9474 OpcP, RegOpc(src2)); 9475 ins_pipe( pipe_slow ); 9476 %} 9477 9478 // Compare & branch 9479 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9480 predicate(UseSSE<=1); 9481 match(Set cr (CmpD src1 src2)); 9482 effect(KILL rax); 9483 ins_cost(200); 9484 format %{ "FLD $src1\n\t" 9485 "FCOMp $src2\n\t" 9486 "FNSTSW AX\n\t" 9487 "TEST AX,0x400\n\t" 9488 "JZ,s flags\n\t" 9489 "MOV AH,1\t# unordered treat as LT\n" 9490 "flags:\tSAHF" %} 9491 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9492 ins_encode( Push_Reg_DPR(src1), 9493 OpcP, RegOpc(src2), 9494 fpu_flags); 9495 ins_pipe( pipe_slow ); 9496 %} 9497 9498 // Compare vs zero into -1,0,1 9499 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9500 predicate(UseSSE<=1); 9501 match(Set dst (CmpD3 src1 zero)); 9502 effect(KILL cr, KILL rax); 9503 ins_cost(280); 9504 format %{ "FTSTD $dst,$src1" %} 9505 opcode(0xE4, 0xD9); 9506 ins_encode( Push_Reg_DPR(src1), 9507 OpcS, OpcP, PopFPU, 9508 CmpF_Result(dst)); 9509 ins_pipe( pipe_slow ); 9510 %} 9511 9512 // Compare into -1,0,1 9513 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9514 predicate(UseSSE<=1); 9515 match(Set dst (CmpD3 src1 src2)); 9516 effect(KILL cr, KILL rax); 9517 ins_cost(300); 9518 format %{ "FCMPD $dst,$src1,$src2" %} 9519 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9520 ins_encode( Push_Reg_DPR(src1), 9521 OpcP, RegOpc(src2), 9522 CmpF_Result(dst)); 9523 ins_pipe( pipe_slow ); 9524 %} 9525 9526 // float compare and set condition codes in EFLAGS by XMM regs 9527 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9528 predicate(UseSSE>=2); 9529 match(Set cr (CmpD src1 src2)); 9530 ins_cost(145); 9531 format %{ "UCOMISD $src1,$src2\n\t" 9532 "JNP,s exit\n\t" 9533 "PUSHF\t# saw NaN, set CF\n\t" 9534 "AND [rsp], #0xffffff2b\n\t" 9535 "POPF\n" 9536 "exit:" %} 9537 ins_encode %{ 9538 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9539 emit_cmpfp_fixup(_masm); 9540 %} 9541 ins_pipe( pipe_slow ); 9542 %} 9543 9544 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9545 predicate(UseSSE>=2); 9546 match(Set cr (CmpD src1 src2)); 9547 ins_cost(100); 9548 format %{ "UCOMISD $src1,$src2" %} 9549 ins_encode %{ 9550 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9551 %} 9552 ins_pipe( pipe_slow ); 9553 %} 9554 9555 // float compare and set condition codes in EFLAGS by XMM regs 9556 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9557 predicate(UseSSE>=2); 9558 match(Set cr (CmpD src1 (LoadD src2))); 9559 ins_cost(145); 9560 format %{ "UCOMISD $src1,$src2\n\t" 9561 "JNP,s exit\n\t" 9562 "PUSHF\t# saw NaN, set CF\n\t" 9563 "AND [rsp], #0xffffff2b\n\t" 9564 "POPF\n" 9565 "exit:" %} 9566 ins_encode %{ 9567 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9568 emit_cmpfp_fixup(_masm); 9569 %} 9570 ins_pipe( pipe_slow ); 9571 %} 9572 9573 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9574 predicate(UseSSE>=2); 9575 match(Set cr (CmpD src1 (LoadD src2))); 9576 ins_cost(100); 9577 format %{ "UCOMISD $src1,$src2" %} 9578 ins_encode %{ 9579 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9580 %} 9581 ins_pipe( pipe_slow ); 9582 %} 9583 9584 // Compare into -1,0,1 in XMM 9585 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9586 predicate(UseSSE>=2); 9587 match(Set dst (CmpD3 src1 src2)); 9588 effect(KILL cr); 9589 ins_cost(255); 9590 format %{ "UCOMISD $src1, $src2\n\t" 9591 "MOV $dst, #-1\n\t" 9592 "JP,s done\n\t" 9593 "JB,s done\n\t" 9594 "SETNE $dst\n\t" 9595 "MOVZB $dst, $dst\n" 9596 "done:" %} 9597 ins_encode %{ 9598 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9599 emit_cmpfp3(_masm, $dst$$Register); 9600 %} 9601 ins_pipe( pipe_slow ); 9602 %} 9603 9604 // Compare into -1,0,1 in XMM and memory 9605 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9606 predicate(UseSSE>=2); 9607 match(Set dst (CmpD3 src1 (LoadD src2))); 9608 effect(KILL cr); 9609 ins_cost(275); 9610 format %{ "UCOMISD $src1, $src2\n\t" 9611 "MOV $dst, #-1\n\t" 9612 "JP,s done\n\t" 9613 "JB,s done\n\t" 9614 "SETNE $dst\n\t" 9615 "MOVZB $dst, $dst\n" 9616 "done:" %} 9617 ins_encode %{ 9618 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9619 emit_cmpfp3(_masm, $dst$$Register); 9620 %} 9621 ins_pipe( pipe_slow ); 9622 %} 9623 9624 9625 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9626 predicate (UseSSE <=1); 9627 match(Set dst (SubD dst src)); 9628 9629 format %{ "FLD $src\n\t" 9630 "DSUBp $dst,ST" %} 9631 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9632 ins_cost(150); 9633 ins_encode( Push_Reg_DPR(src), 9634 OpcP, RegOpc(dst) ); 9635 ins_pipe( fpu_reg_reg ); 9636 %} 9637 9638 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9639 predicate (UseSSE <=1); 9640 match(Set dst (RoundDouble (SubD src1 src2))); 9641 ins_cost(250); 9642 9643 format %{ "FLD $src2\n\t" 9644 "DSUB ST,$src1\n\t" 9645 "FSTP_D $dst\t# D-round" %} 9646 opcode(0xD8, 0x5); 9647 ins_encode( Push_Reg_DPR(src2), 9648 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9649 ins_pipe( fpu_mem_reg_reg ); 9650 %} 9651 9652 9653 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9654 predicate (UseSSE <=1); 9655 match(Set dst (SubD dst (LoadD src))); 9656 ins_cost(150); 9657 9658 format %{ "FLD $src\n\t" 9659 "DSUBp $dst,ST" %} 9660 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9661 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9662 OpcP, RegOpc(dst) ); 9663 ins_pipe( fpu_reg_mem ); 9664 %} 9665 9666 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9667 predicate (UseSSE<=1); 9668 match(Set dst (AbsD src)); 9669 ins_cost(100); 9670 format %{ "FABS" %} 9671 opcode(0xE1, 0xD9); 9672 ins_encode( OpcS, OpcP ); 9673 ins_pipe( fpu_reg_reg ); 9674 %} 9675 9676 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9677 predicate(UseSSE<=1); 9678 match(Set dst (NegD src)); 9679 ins_cost(100); 9680 format %{ "FCHS" %} 9681 opcode(0xE0, 0xD9); 9682 ins_encode( OpcS, OpcP ); 9683 ins_pipe( fpu_reg_reg ); 9684 %} 9685 9686 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9687 predicate(UseSSE<=1); 9688 match(Set dst (AddD dst src)); 9689 format %{ "FLD $src\n\t" 9690 "DADD $dst,ST" %} 9691 size(4); 9692 ins_cost(150); 9693 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9694 ins_encode( Push_Reg_DPR(src), 9695 OpcP, RegOpc(dst) ); 9696 ins_pipe( fpu_reg_reg ); 9697 %} 9698 9699 9700 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9701 predicate(UseSSE<=1); 9702 match(Set dst (RoundDouble (AddD src1 src2))); 9703 ins_cost(250); 9704 9705 format %{ "FLD $src2\n\t" 9706 "DADD ST,$src1\n\t" 9707 "FSTP_D $dst\t# D-round" %} 9708 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9709 ins_encode( Push_Reg_DPR(src2), 9710 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9711 ins_pipe( fpu_mem_reg_reg ); 9712 %} 9713 9714 9715 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9716 predicate(UseSSE<=1); 9717 match(Set dst (AddD dst (LoadD src))); 9718 ins_cost(150); 9719 9720 format %{ "FLD $src\n\t" 9721 "DADDp $dst,ST" %} 9722 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9723 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9724 OpcP, RegOpc(dst) ); 9725 ins_pipe( fpu_reg_mem ); 9726 %} 9727 9728 // add-to-memory 9729 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9730 predicate(UseSSE<=1); 9731 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9732 ins_cost(150); 9733 9734 format %{ "FLD_D $dst\n\t" 9735 "DADD ST,$src\n\t" 9736 "FST_D $dst" %} 9737 opcode(0xDD, 0x0); 9738 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9739 Opcode(0xD8), RegOpc(src), 9740 set_instruction_start, 9741 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9742 ins_pipe( fpu_reg_mem ); 9743 %} 9744 9745 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9746 predicate(UseSSE<=1); 9747 match(Set dst (AddD dst con)); 9748 ins_cost(125); 9749 format %{ "FLD1\n\t" 9750 "DADDp $dst,ST" %} 9751 ins_encode %{ 9752 __ fld1(); 9753 __ faddp($dst$$reg); 9754 %} 9755 ins_pipe(fpu_reg); 9756 %} 9757 9758 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9759 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9760 match(Set dst (AddD dst con)); 9761 ins_cost(200); 9762 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9763 "DADDp $dst,ST" %} 9764 ins_encode %{ 9765 __ fld_d($constantaddress($con)); 9766 __ faddp($dst$$reg); 9767 %} 9768 ins_pipe(fpu_reg_mem); 9769 %} 9770 9771 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9772 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9773 match(Set dst (RoundDouble (AddD src con))); 9774 ins_cost(200); 9775 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9776 "DADD ST,$src\n\t" 9777 "FSTP_D $dst\t# D-round" %} 9778 ins_encode %{ 9779 __ fld_d($constantaddress($con)); 9780 __ fadd($src$$reg); 9781 __ fstp_d(Address(rsp, $dst$$disp)); 9782 %} 9783 ins_pipe(fpu_mem_reg_con); 9784 %} 9785 9786 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9787 predicate(UseSSE<=1); 9788 match(Set dst (MulD dst src)); 9789 format %{ "FLD $src\n\t" 9790 "DMULp $dst,ST" %} 9791 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9792 ins_cost(150); 9793 ins_encode( Push_Reg_DPR(src), 9794 OpcP, RegOpc(dst) ); 9795 ins_pipe( fpu_reg_reg ); 9796 %} 9797 9798 // Strict FP instruction biases argument before multiply then 9799 // biases result to avoid double rounding of subnormals. 9800 // 9801 // scale arg1 by multiplying arg1 by 2^(-15360) 9802 // load arg2 9803 // multiply scaled arg1 by arg2 9804 // rescale product by 2^(15360) 9805 // 9806 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9807 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9808 match(Set dst (MulD dst src)); 9809 ins_cost(1); // Select this instruction for all FP double multiplies 9810 9811 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9812 "DMULp $dst,ST\n\t" 9813 "FLD $src\n\t" 9814 "DMULp $dst,ST\n\t" 9815 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9816 "DMULp $dst,ST\n\t" %} 9817 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9818 ins_encode( strictfp_bias1(dst), 9819 Push_Reg_DPR(src), 9820 OpcP, RegOpc(dst), 9821 strictfp_bias2(dst) ); 9822 ins_pipe( fpu_reg_reg ); 9823 %} 9824 9825 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9826 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9827 match(Set dst (MulD dst con)); 9828 ins_cost(200); 9829 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9830 "DMULp $dst,ST" %} 9831 ins_encode %{ 9832 __ fld_d($constantaddress($con)); 9833 __ fmulp($dst$$reg); 9834 %} 9835 ins_pipe(fpu_reg_mem); 9836 %} 9837 9838 9839 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9840 predicate( UseSSE<=1 ); 9841 match(Set dst (MulD dst (LoadD src))); 9842 ins_cost(200); 9843 format %{ "FLD_D $src\n\t" 9844 "DMULp $dst,ST" %} 9845 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9846 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9847 OpcP, RegOpc(dst) ); 9848 ins_pipe( fpu_reg_mem ); 9849 %} 9850 9851 // 9852 // Cisc-alternate to reg-reg multiply 9853 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9854 predicate( UseSSE<=1 ); 9855 match(Set dst (MulD src (LoadD mem))); 9856 ins_cost(250); 9857 format %{ "FLD_D $mem\n\t" 9858 "DMUL ST,$src\n\t" 9859 "FSTP_D $dst" %} 9860 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9861 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9862 OpcReg_FPR(src), 9863 Pop_Reg_DPR(dst) ); 9864 ins_pipe( fpu_reg_reg_mem ); 9865 %} 9866 9867 9868 // MACRO3 -- addDPR a mulDPR 9869 // This instruction is a '2-address' instruction in that the result goes 9870 // back to src2. This eliminates a move from the macro; possibly the 9871 // register allocator will have to add it back (and maybe not). 9872 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9873 predicate( UseSSE<=1 ); 9874 match(Set src2 (AddD (MulD src0 src1) src2)); 9875 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9876 "DMUL ST,$src1\n\t" 9877 "DADDp $src2,ST" %} 9878 ins_cost(250); 9879 opcode(0xDD); /* LoadD DD /0 */ 9880 ins_encode( Push_Reg_FPR(src0), 9881 FMul_ST_reg(src1), 9882 FAddP_reg_ST(src2) ); 9883 ins_pipe( fpu_reg_reg_reg ); 9884 %} 9885 9886 9887 // MACRO3 -- subDPR a mulDPR 9888 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9889 predicate( UseSSE<=1 ); 9890 match(Set src2 (SubD (MulD src0 src1) src2)); 9891 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9892 "DMUL ST,$src1\n\t" 9893 "DSUBRp $src2,ST" %} 9894 ins_cost(250); 9895 ins_encode( Push_Reg_FPR(src0), 9896 FMul_ST_reg(src1), 9897 Opcode(0xDE), Opc_plus(0xE0,src2)); 9898 ins_pipe( fpu_reg_reg_reg ); 9899 %} 9900 9901 9902 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9903 predicate( UseSSE<=1 ); 9904 match(Set dst (DivD dst src)); 9905 9906 format %{ "FLD $src\n\t" 9907 "FDIVp $dst,ST" %} 9908 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9909 ins_cost(150); 9910 ins_encode( Push_Reg_DPR(src), 9911 OpcP, RegOpc(dst) ); 9912 ins_pipe( fpu_reg_reg ); 9913 %} 9914 9915 // Strict FP instruction biases argument before division then 9916 // biases result, to avoid double rounding of subnormals. 9917 // 9918 // scale dividend by multiplying dividend by 2^(-15360) 9919 // load divisor 9920 // divide scaled dividend by divisor 9921 // rescale quotient by 2^(15360) 9922 // 9923 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9924 predicate (UseSSE<=1); 9925 match(Set dst (DivD dst src)); 9926 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9927 ins_cost(01); 9928 9929 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9930 "DMULp $dst,ST\n\t" 9931 "FLD $src\n\t" 9932 "FDIVp $dst,ST\n\t" 9933 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9934 "DMULp $dst,ST\n\t" %} 9935 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9936 ins_encode( strictfp_bias1(dst), 9937 Push_Reg_DPR(src), 9938 OpcP, RegOpc(dst), 9939 strictfp_bias2(dst) ); 9940 ins_pipe( fpu_reg_reg ); 9941 %} 9942 9943 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9944 predicate(UseSSE<=1); 9945 match(Set dst (ModD dst src)); 9946 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9947 9948 format %{ "DMOD $dst,$src" %} 9949 ins_cost(250); 9950 ins_encode(Push_Reg_Mod_DPR(dst, src), 9951 emitModDPR(), 9952 Push_Result_Mod_DPR(src), 9953 Pop_Reg_DPR(dst)); 9954 ins_pipe( pipe_slow ); 9955 %} 9956 9957 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9958 predicate(UseSSE>=2); 9959 match(Set dst (ModD src0 src1)); 9960 effect(KILL rax, KILL cr); 9961 9962 format %{ "SUB ESP,8\t # DMOD\n" 9963 "\tMOVSD [ESP+0],$src1\n" 9964 "\tFLD_D [ESP+0]\n" 9965 "\tMOVSD [ESP+0],$src0\n" 9966 "\tFLD_D [ESP+0]\n" 9967 "loop:\tFPREM\n" 9968 "\tFWAIT\n" 9969 "\tFNSTSW AX\n" 9970 "\tSAHF\n" 9971 "\tJP loop\n" 9972 "\tFSTP_D [ESP+0]\n" 9973 "\tMOVSD $dst,[ESP+0]\n" 9974 "\tADD ESP,8\n" 9975 "\tFSTP ST0\t # Restore FPU Stack" 9976 %} 9977 ins_cost(250); 9978 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9979 ins_pipe( pipe_slow ); 9980 %} 9981 9982 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9983 predicate (UseSSE<=1); 9984 match(Set dst(AtanD dst src)); 9985 format %{ "DATA $dst,$src" %} 9986 opcode(0xD9, 0xF3); 9987 ins_encode( Push_Reg_DPR(src), 9988 OpcP, OpcS, RegOpc(dst) ); 9989 ins_pipe( pipe_slow ); 9990 %} 9991 9992 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9993 predicate (UseSSE>=2); 9994 match(Set dst(AtanD dst src)); 9995 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9996 format %{ "DATA $dst,$src" %} 9997 opcode(0xD9, 0xF3); 9998 ins_encode( Push_SrcD(src), 9999 OpcP, OpcS, Push_ResultD(dst) ); 10000 ins_pipe( pipe_slow ); 10001 %} 10002 10003 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10004 predicate (UseSSE<=1); 10005 match(Set dst (SqrtD src)); 10006 format %{ "DSQRT $dst,$src" %} 10007 opcode(0xFA, 0xD9); 10008 ins_encode( Push_Reg_DPR(src), 10009 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10010 ins_pipe( pipe_slow ); 10011 %} 10012 10013 //-------------Float Instructions------------------------------- 10014 // Float Math 10015 10016 // Code for float compare: 10017 // fcompp(); 10018 // fwait(); fnstsw_ax(); 10019 // sahf(); 10020 // movl(dst, unordered_result); 10021 // jcc(Assembler::parity, exit); 10022 // movl(dst, less_result); 10023 // jcc(Assembler::below, exit); 10024 // movl(dst, equal_result); 10025 // jcc(Assembler::equal, exit); 10026 // movl(dst, greater_result); 10027 // exit: 10028 10029 // P6 version of float compare, sets condition codes in EFLAGS 10030 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10031 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10032 match(Set cr (CmpF src1 src2)); 10033 effect(KILL rax); 10034 ins_cost(150); 10035 format %{ "FLD $src1\n\t" 10036 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10037 "JNP exit\n\t" 10038 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10039 "SAHF\n" 10040 "exit:\tNOP // avoid branch to branch" %} 10041 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10042 ins_encode( Push_Reg_DPR(src1), 10043 OpcP, RegOpc(src2), 10044 cmpF_P6_fixup ); 10045 ins_pipe( pipe_slow ); 10046 %} 10047 10048 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10049 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10050 match(Set cr (CmpF src1 src2)); 10051 ins_cost(100); 10052 format %{ "FLD $src1\n\t" 10053 "FUCOMIP ST,$src2 // P6 instruction" %} 10054 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10055 ins_encode( Push_Reg_DPR(src1), 10056 OpcP, RegOpc(src2)); 10057 ins_pipe( pipe_slow ); 10058 %} 10059 10060 10061 // Compare & branch 10062 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10063 predicate(UseSSE == 0); 10064 match(Set cr (CmpF src1 src2)); 10065 effect(KILL rax); 10066 ins_cost(200); 10067 format %{ "FLD $src1\n\t" 10068 "FCOMp $src2\n\t" 10069 "FNSTSW AX\n\t" 10070 "TEST AX,0x400\n\t" 10071 "JZ,s flags\n\t" 10072 "MOV AH,1\t# unordered treat as LT\n" 10073 "flags:\tSAHF" %} 10074 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10075 ins_encode( Push_Reg_DPR(src1), 10076 OpcP, RegOpc(src2), 10077 fpu_flags); 10078 ins_pipe( pipe_slow ); 10079 %} 10080 10081 // Compare vs zero into -1,0,1 10082 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10083 predicate(UseSSE == 0); 10084 match(Set dst (CmpF3 src1 zero)); 10085 effect(KILL cr, KILL rax); 10086 ins_cost(280); 10087 format %{ "FTSTF $dst,$src1" %} 10088 opcode(0xE4, 0xD9); 10089 ins_encode( Push_Reg_DPR(src1), 10090 OpcS, OpcP, PopFPU, 10091 CmpF_Result(dst)); 10092 ins_pipe( pipe_slow ); 10093 %} 10094 10095 // Compare into -1,0,1 10096 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10097 predicate(UseSSE == 0); 10098 match(Set dst (CmpF3 src1 src2)); 10099 effect(KILL cr, KILL rax); 10100 ins_cost(300); 10101 format %{ "FCMPF $dst,$src1,$src2" %} 10102 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10103 ins_encode( Push_Reg_DPR(src1), 10104 OpcP, RegOpc(src2), 10105 CmpF_Result(dst)); 10106 ins_pipe( pipe_slow ); 10107 %} 10108 10109 // float compare and set condition codes in EFLAGS by XMM regs 10110 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10111 predicate(UseSSE>=1); 10112 match(Set cr (CmpF src1 src2)); 10113 ins_cost(145); 10114 format %{ "UCOMISS $src1,$src2\n\t" 10115 "JNP,s exit\n\t" 10116 "PUSHF\t# saw NaN, set CF\n\t" 10117 "AND [rsp], #0xffffff2b\n\t" 10118 "POPF\n" 10119 "exit:" %} 10120 ins_encode %{ 10121 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10122 emit_cmpfp_fixup(_masm); 10123 %} 10124 ins_pipe( pipe_slow ); 10125 %} 10126 10127 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10128 predicate(UseSSE>=1); 10129 match(Set cr (CmpF src1 src2)); 10130 ins_cost(100); 10131 format %{ "UCOMISS $src1,$src2" %} 10132 ins_encode %{ 10133 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10134 %} 10135 ins_pipe( pipe_slow ); 10136 %} 10137 10138 // float compare and set condition codes in EFLAGS by XMM regs 10139 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10140 predicate(UseSSE>=1); 10141 match(Set cr (CmpF src1 (LoadF src2))); 10142 ins_cost(165); 10143 format %{ "UCOMISS $src1,$src2\n\t" 10144 "JNP,s exit\n\t" 10145 "PUSHF\t# saw NaN, set CF\n\t" 10146 "AND [rsp], #0xffffff2b\n\t" 10147 "POPF\n" 10148 "exit:" %} 10149 ins_encode %{ 10150 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10151 emit_cmpfp_fixup(_masm); 10152 %} 10153 ins_pipe( pipe_slow ); 10154 %} 10155 10156 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10157 predicate(UseSSE>=1); 10158 match(Set cr (CmpF src1 (LoadF src2))); 10159 ins_cost(100); 10160 format %{ "UCOMISS $src1,$src2" %} 10161 ins_encode %{ 10162 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10163 %} 10164 ins_pipe( pipe_slow ); 10165 %} 10166 10167 // Compare into -1,0,1 in XMM 10168 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10169 predicate(UseSSE>=1); 10170 match(Set dst (CmpF3 src1 src2)); 10171 effect(KILL cr); 10172 ins_cost(255); 10173 format %{ "UCOMISS $src1, $src2\n\t" 10174 "MOV $dst, #-1\n\t" 10175 "JP,s done\n\t" 10176 "JB,s done\n\t" 10177 "SETNE $dst\n\t" 10178 "MOVZB $dst, $dst\n" 10179 "done:" %} 10180 ins_encode %{ 10181 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10182 emit_cmpfp3(_masm, $dst$$Register); 10183 %} 10184 ins_pipe( pipe_slow ); 10185 %} 10186 10187 // Compare into -1,0,1 in XMM and memory 10188 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10189 predicate(UseSSE>=1); 10190 match(Set dst (CmpF3 src1 (LoadF src2))); 10191 effect(KILL cr); 10192 ins_cost(275); 10193 format %{ "UCOMISS $src1, $src2\n\t" 10194 "MOV $dst, #-1\n\t" 10195 "JP,s done\n\t" 10196 "JB,s done\n\t" 10197 "SETNE $dst\n\t" 10198 "MOVZB $dst, $dst\n" 10199 "done:" %} 10200 ins_encode %{ 10201 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10202 emit_cmpfp3(_masm, $dst$$Register); 10203 %} 10204 ins_pipe( pipe_slow ); 10205 %} 10206 10207 // Spill to obtain 24-bit precision 10208 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10209 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10210 match(Set dst (SubF src1 src2)); 10211 10212 format %{ "FSUB $dst,$src1 - $src2" %} 10213 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10214 ins_encode( Push_Reg_FPR(src1), 10215 OpcReg_FPR(src2), 10216 Pop_Mem_FPR(dst) ); 10217 ins_pipe( fpu_mem_reg_reg ); 10218 %} 10219 // 10220 // This instruction does not round to 24-bits 10221 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10222 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10223 match(Set dst (SubF dst src)); 10224 10225 format %{ "FSUB $dst,$src" %} 10226 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10227 ins_encode( Push_Reg_FPR(src), 10228 OpcP, RegOpc(dst) ); 10229 ins_pipe( fpu_reg_reg ); 10230 %} 10231 10232 // Spill to obtain 24-bit precision 10233 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10234 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10235 match(Set dst (AddF src1 src2)); 10236 10237 format %{ "FADD $dst,$src1,$src2" %} 10238 opcode(0xD8, 0x0); /* D8 C0+i */ 10239 ins_encode( Push_Reg_FPR(src2), 10240 OpcReg_FPR(src1), 10241 Pop_Mem_FPR(dst) ); 10242 ins_pipe( fpu_mem_reg_reg ); 10243 %} 10244 // 10245 // This instruction does not round to 24-bits 10246 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10247 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10248 match(Set dst (AddF dst src)); 10249 10250 format %{ "FLD $src\n\t" 10251 "FADDp $dst,ST" %} 10252 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10253 ins_encode( Push_Reg_FPR(src), 10254 OpcP, RegOpc(dst) ); 10255 ins_pipe( fpu_reg_reg ); 10256 %} 10257 10258 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10259 predicate(UseSSE==0); 10260 match(Set dst (AbsF src)); 10261 ins_cost(100); 10262 format %{ "FABS" %} 10263 opcode(0xE1, 0xD9); 10264 ins_encode( OpcS, OpcP ); 10265 ins_pipe( fpu_reg_reg ); 10266 %} 10267 10268 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10269 predicate(UseSSE==0); 10270 match(Set dst (NegF src)); 10271 ins_cost(100); 10272 format %{ "FCHS" %} 10273 opcode(0xE0, 0xD9); 10274 ins_encode( OpcS, OpcP ); 10275 ins_pipe( fpu_reg_reg ); 10276 %} 10277 10278 // Cisc-alternate to addFPR_reg 10279 // Spill to obtain 24-bit precision 10280 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10281 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10282 match(Set dst (AddF src1 (LoadF src2))); 10283 10284 format %{ "FLD $src2\n\t" 10285 "FADD ST,$src1\n\t" 10286 "FSTP_S $dst" %} 10287 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10288 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10289 OpcReg_FPR(src1), 10290 Pop_Mem_FPR(dst) ); 10291 ins_pipe( fpu_mem_reg_mem ); 10292 %} 10293 // 10294 // Cisc-alternate to addFPR_reg 10295 // This instruction does not round to 24-bits 10296 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10297 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10298 match(Set dst (AddF dst (LoadF src))); 10299 10300 format %{ "FADD $dst,$src" %} 10301 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10302 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10303 OpcP, RegOpc(dst) ); 10304 ins_pipe( fpu_reg_mem ); 10305 %} 10306 10307 // // Following two instructions for _222_mpegaudio 10308 // Spill to obtain 24-bit precision 10309 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10310 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10311 match(Set dst (AddF src1 src2)); 10312 10313 format %{ "FADD $dst,$src1,$src2" %} 10314 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10315 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10316 OpcReg_FPR(src2), 10317 Pop_Mem_FPR(dst) ); 10318 ins_pipe( fpu_mem_reg_mem ); 10319 %} 10320 10321 // Cisc-spill variant 10322 // Spill to obtain 24-bit precision 10323 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10324 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10325 match(Set dst (AddF src1 (LoadF src2))); 10326 10327 format %{ "FADD $dst,$src1,$src2 cisc" %} 10328 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10329 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10330 set_instruction_start, 10331 OpcP, RMopc_Mem(secondary,src1), 10332 Pop_Mem_FPR(dst) ); 10333 ins_pipe( fpu_mem_mem_mem ); 10334 %} 10335 10336 // Spill to obtain 24-bit precision 10337 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10338 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10339 match(Set dst (AddF src1 src2)); 10340 10341 format %{ "FADD $dst,$src1,$src2" %} 10342 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10343 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10344 set_instruction_start, 10345 OpcP, RMopc_Mem(secondary,src1), 10346 Pop_Mem_FPR(dst) ); 10347 ins_pipe( fpu_mem_mem_mem ); 10348 %} 10349 10350 10351 // Spill to obtain 24-bit precision 10352 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10353 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10354 match(Set dst (AddF src con)); 10355 format %{ "FLD $src\n\t" 10356 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10357 "FSTP_S $dst" %} 10358 ins_encode %{ 10359 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10360 __ fadd_s($constantaddress($con)); 10361 __ fstp_s(Address(rsp, $dst$$disp)); 10362 %} 10363 ins_pipe(fpu_mem_reg_con); 10364 %} 10365 // 10366 // This instruction does not round to 24-bits 10367 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10368 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10369 match(Set dst (AddF src con)); 10370 format %{ "FLD $src\n\t" 10371 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10372 "FSTP $dst" %} 10373 ins_encode %{ 10374 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10375 __ fadd_s($constantaddress($con)); 10376 __ fstp_d($dst$$reg); 10377 %} 10378 ins_pipe(fpu_reg_reg_con); 10379 %} 10380 10381 // Spill to obtain 24-bit precision 10382 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10383 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10384 match(Set dst (MulF src1 src2)); 10385 10386 format %{ "FLD $src1\n\t" 10387 "FMUL $src2\n\t" 10388 "FSTP_S $dst" %} 10389 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10390 ins_encode( Push_Reg_FPR(src1), 10391 OpcReg_FPR(src2), 10392 Pop_Mem_FPR(dst) ); 10393 ins_pipe( fpu_mem_reg_reg ); 10394 %} 10395 // 10396 // This instruction does not round to 24-bits 10397 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10398 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10399 match(Set dst (MulF src1 src2)); 10400 10401 format %{ "FLD $src1\n\t" 10402 "FMUL $src2\n\t" 10403 "FSTP_S $dst" %} 10404 opcode(0xD8, 0x1); /* D8 C8+i */ 10405 ins_encode( Push_Reg_FPR(src2), 10406 OpcReg_FPR(src1), 10407 Pop_Reg_FPR(dst) ); 10408 ins_pipe( fpu_reg_reg_reg ); 10409 %} 10410 10411 10412 // Spill to obtain 24-bit precision 10413 // Cisc-alternate to reg-reg multiply 10414 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10415 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10416 match(Set dst (MulF src1 (LoadF src2))); 10417 10418 format %{ "FLD_S $src2\n\t" 10419 "FMUL $src1\n\t" 10420 "FSTP_S $dst" %} 10421 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10422 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10423 OpcReg_FPR(src1), 10424 Pop_Mem_FPR(dst) ); 10425 ins_pipe( fpu_mem_reg_mem ); 10426 %} 10427 // 10428 // This instruction does not round to 24-bits 10429 // Cisc-alternate to reg-reg multiply 10430 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10431 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10432 match(Set dst (MulF src1 (LoadF src2))); 10433 10434 format %{ "FMUL $dst,$src1,$src2" %} 10435 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10436 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10437 OpcReg_FPR(src1), 10438 Pop_Reg_FPR(dst) ); 10439 ins_pipe( fpu_reg_reg_mem ); 10440 %} 10441 10442 // Spill to obtain 24-bit precision 10443 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10444 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10445 match(Set dst (MulF src1 src2)); 10446 10447 format %{ "FMUL $dst,$src1,$src2" %} 10448 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10449 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10450 set_instruction_start, 10451 OpcP, RMopc_Mem(secondary,src1), 10452 Pop_Mem_FPR(dst) ); 10453 ins_pipe( fpu_mem_mem_mem ); 10454 %} 10455 10456 // Spill to obtain 24-bit precision 10457 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10458 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10459 match(Set dst (MulF src con)); 10460 10461 format %{ "FLD $src\n\t" 10462 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10463 "FSTP_S $dst" %} 10464 ins_encode %{ 10465 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10466 __ fmul_s($constantaddress($con)); 10467 __ fstp_s(Address(rsp, $dst$$disp)); 10468 %} 10469 ins_pipe(fpu_mem_reg_con); 10470 %} 10471 // 10472 // This instruction does not round to 24-bits 10473 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10474 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10475 match(Set dst (MulF src con)); 10476 10477 format %{ "FLD $src\n\t" 10478 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10479 "FSTP $dst" %} 10480 ins_encode %{ 10481 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10482 __ fmul_s($constantaddress($con)); 10483 __ fstp_d($dst$$reg); 10484 %} 10485 ins_pipe(fpu_reg_reg_con); 10486 %} 10487 10488 10489 // 10490 // MACRO1 -- subsume unshared load into mulFPR 10491 // This instruction does not round to 24-bits 10492 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10493 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10494 match(Set dst (MulF (LoadF mem1) src)); 10495 10496 format %{ "FLD $mem1 ===MACRO1===\n\t" 10497 "FMUL ST,$src\n\t" 10498 "FSTP $dst" %} 10499 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10500 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10501 OpcReg_FPR(src), 10502 Pop_Reg_FPR(dst) ); 10503 ins_pipe( fpu_reg_reg_mem ); 10504 %} 10505 // 10506 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10507 // This instruction does not round to 24-bits 10508 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10509 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10510 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10511 ins_cost(95); 10512 10513 format %{ "FLD $mem1 ===MACRO2===\n\t" 10514 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10515 "FADD ST,$src2\n\t" 10516 "FSTP $dst" %} 10517 opcode(0xD9); /* LoadF D9 /0 */ 10518 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10519 FMul_ST_reg(src1), 10520 FAdd_ST_reg(src2), 10521 Pop_Reg_FPR(dst) ); 10522 ins_pipe( fpu_reg_mem_reg_reg ); 10523 %} 10524 10525 // MACRO3 -- addFPR a mulFPR 10526 // This instruction does not round to 24-bits. It is a '2-address' 10527 // instruction in that the result goes back to src2. This eliminates 10528 // a move from the macro; possibly the register allocator will have 10529 // to add it back (and maybe not). 10530 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10531 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10532 match(Set src2 (AddF (MulF src0 src1) src2)); 10533 10534 format %{ "FLD $src0 ===MACRO3===\n\t" 10535 "FMUL ST,$src1\n\t" 10536 "FADDP $src2,ST" %} 10537 opcode(0xD9); /* LoadF D9 /0 */ 10538 ins_encode( Push_Reg_FPR(src0), 10539 FMul_ST_reg(src1), 10540 FAddP_reg_ST(src2) ); 10541 ins_pipe( fpu_reg_reg_reg ); 10542 %} 10543 10544 // MACRO4 -- divFPR subFPR 10545 // This instruction does not round to 24-bits 10546 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10547 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10548 match(Set dst (DivF (SubF src2 src1) src3)); 10549 10550 format %{ "FLD $src2 ===MACRO4===\n\t" 10551 "FSUB ST,$src1\n\t" 10552 "FDIV ST,$src3\n\t" 10553 "FSTP $dst" %} 10554 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10555 ins_encode( Push_Reg_FPR(src2), 10556 subFPR_divFPR_encode(src1,src3), 10557 Pop_Reg_FPR(dst) ); 10558 ins_pipe( fpu_reg_reg_reg_reg ); 10559 %} 10560 10561 // Spill to obtain 24-bit precision 10562 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10563 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10564 match(Set dst (DivF src1 src2)); 10565 10566 format %{ "FDIV $dst,$src1,$src2" %} 10567 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10568 ins_encode( Push_Reg_FPR(src1), 10569 OpcReg_FPR(src2), 10570 Pop_Mem_FPR(dst) ); 10571 ins_pipe( fpu_mem_reg_reg ); 10572 %} 10573 // 10574 // This instruction does not round to 24-bits 10575 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10576 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10577 match(Set dst (DivF dst src)); 10578 10579 format %{ "FDIV $dst,$src" %} 10580 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10581 ins_encode( Push_Reg_FPR(src), 10582 OpcP, RegOpc(dst) ); 10583 ins_pipe( fpu_reg_reg ); 10584 %} 10585 10586 10587 // Spill to obtain 24-bit precision 10588 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10589 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10590 match(Set dst (ModF src1 src2)); 10591 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10592 10593 format %{ "FMOD $dst,$src1,$src2" %} 10594 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10595 emitModDPR(), 10596 Push_Result_Mod_DPR(src2), 10597 Pop_Mem_FPR(dst)); 10598 ins_pipe( pipe_slow ); 10599 %} 10600 // 10601 // This instruction does not round to 24-bits 10602 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10603 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10604 match(Set dst (ModF dst src)); 10605 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10606 10607 format %{ "FMOD $dst,$src" %} 10608 ins_encode(Push_Reg_Mod_DPR(dst, src), 10609 emitModDPR(), 10610 Push_Result_Mod_DPR(src), 10611 Pop_Reg_FPR(dst)); 10612 ins_pipe( pipe_slow ); 10613 %} 10614 10615 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10616 predicate(UseSSE>=1); 10617 match(Set dst (ModF src0 src1)); 10618 effect(KILL rax, KILL cr); 10619 format %{ "SUB ESP,4\t # FMOD\n" 10620 "\tMOVSS [ESP+0],$src1\n" 10621 "\tFLD_S [ESP+0]\n" 10622 "\tMOVSS [ESP+0],$src0\n" 10623 "\tFLD_S [ESP+0]\n" 10624 "loop:\tFPREM\n" 10625 "\tFWAIT\n" 10626 "\tFNSTSW AX\n" 10627 "\tSAHF\n" 10628 "\tJP loop\n" 10629 "\tFSTP_S [ESP+0]\n" 10630 "\tMOVSS $dst,[ESP+0]\n" 10631 "\tADD ESP,4\n" 10632 "\tFSTP ST0\t # Restore FPU Stack" 10633 %} 10634 ins_cost(250); 10635 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10636 ins_pipe( pipe_slow ); 10637 %} 10638 10639 10640 //----------Arithmetic Conversion Instructions--------------------------------- 10641 // The conversions operations are all Alpha sorted. Please keep it that way! 10642 10643 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10644 predicate(UseSSE==0); 10645 match(Set dst (RoundFloat src)); 10646 ins_cost(125); 10647 format %{ "FST_S $dst,$src\t# F-round" %} 10648 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10649 ins_pipe( fpu_mem_reg ); 10650 %} 10651 10652 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10653 predicate(UseSSE<=1); 10654 match(Set dst (RoundDouble src)); 10655 ins_cost(125); 10656 format %{ "FST_D $dst,$src\t# D-round" %} 10657 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10658 ins_pipe( fpu_mem_reg ); 10659 %} 10660 10661 // Force rounding to 24-bit precision and 6-bit exponent 10662 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10663 predicate(UseSSE==0); 10664 match(Set dst (ConvD2F src)); 10665 format %{ "FST_S $dst,$src\t# F-round" %} 10666 expand %{ 10667 roundFloat_mem_reg(dst,src); 10668 %} 10669 %} 10670 10671 // Force rounding to 24-bit precision and 6-bit exponent 10672 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10673 predicate(UseSSE==1); 10674 match(Set dst (ConvD2F src)); 10675 effect( KILL cr ); 10676 format %{ "SUB ESP,4\n\t" 10677 "FST_S [ESP],$src\t# F-round\n\t" 10678 "MOVSS $dst,[ESP]\n\t" 10679 "ADD ESP,4" %} 10680 ins_encode %{ 10681 __ subptr(rsp, 4); 10682 if ($src$$reg != FPR1L_enc) { 10683 __ fld_s($src$$reg-1); 10684 __ fstp_s(Address(rsp, 0)); 10685 } else { 10686 __ fst_s(Address(rsp, 0)); 10687 } 10688 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10689 __ addptr(rsp, 4); 10690 %} 10691 ins_pipe( pipe_slow ); 10692 %} 10693 10694 // Force rounding double precision to single precision 10695 instruct convD2F_reg(regF dst, regD src) %{ 10696 predicate(UseSSE>=2); 10697 match(Set dst (ConvD2F src)); 10698 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10699 ins_encode %{ 10700 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10701 %} 10702 ins_pipe( pipe_slow ); 10703 %} 10704 10705 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10706 predicate(UseSSE==0); 10707 match(Set dst (ConvF2D src)); 10708 format %{ "FST_S $dst,$src\t# D-round" %} 10709 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10710 ins_pipe( fpu_reg_reg ); 10711 %} 10712 10713 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10714 predicate(UseSSE==1); 10715 match(Set dst (ConvF2D src)); 10716 format %{ "FST_D $dst,$src\t# D-round" %} 10717 expand %{ 10718 roundDouble_mem_reg(dst,src); 10719 %} 10720 %} 10721 10722 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10723 predicate(UseSSE==1); 10724 match(Set dst (ConvF2D src)); 10725 effect( KILL cr ); 10726 format %{ "SUB ESP,4\n\t" 10727 "MOVSS [ESP] $src\n\t" 10728 "FLD_S [ESP]\n\t" 10729 "ADD ESP,4\n\t" 10730 "FSTP $dst\t# D-round" %} 10731 ins_encode %{ 10732 __ subptr(rsp, 4); 10733 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10734 __ fld_s(Address(rsp, 0)); 10735 __ addptr(rsp, 4); 10736 __ fstp_d($dst$$reg); 10737 %} 10738 ins_pipe( pipe_slow ); 10739 %} 10740 10741 instruct convF2D_reg(regD dst, regF src) %{ 10742 predicate(UseSSE>=2); 10743 match(Set dst (ConvF2D src)); 10744 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10745 ins_encode %{ 10746 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10747 %} 10748 ins_pipe( pipe_slow ); 10749 %} 10750 10751 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10752 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10753 predicate(UseSSE<=1); 10754 match(Set dst (ConvD2I src)); 10755 effect( KILL tmp, KILL cr ); 10756 format %{ "FLD $src\t# Convert double to int \n\t" 10757 "FLDCW trunc mode\n\t" 10758 "SUB ESP,4\n\t" 10759 "FISTp [ESP + #0]\n\t" 10760 "FLDCW std/24-bit mode\n\t" 10761 "POP EAX\n\t" 10762 "CMP EAX,0x80000000\n\t" 10763 "JNE,s fast\n\t" 10764 "FLD_D $src\n\t" 10765 "CALL d2i_wrapper\n" 10766 "fast:" %} 10767 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10768 ins_pipe( pipe_slow ); 10769 %} 10770 10771 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10772 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10773 predicate(UseSSE>=2); 10774 match(Set dst (ConvD2I src)); 10775 effect( KILL tmp, KILL cr ); 10776 format %{ "CVTTSD2SI $dst, $src\n\t" 10777 "CMP $dst,0x80000000\n\t" 10778 "JNE,s fast\n\t" 10779 "SUB ESP, 8\n\t" 10780 "MOVSD [ESP], $src\n\t" 10781 "FLD_D [ESP]\n\t" 10782 "ADD ESP, 8\n\t" 10783 "CALL d2i_wrapper\n" 10784 "fast:" %} 10785 ins_encode %{ 10786 Label fast; 10787 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10788 __ cmpl($dst$$Register, 0x80000000); 10789 __ jccb(Assembler::notEqual, fast); 10790 __ subptr(rsp, 8); 10791 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10792 __ fld_d(Address(rsp, 0)); 10793 __ addptr(rsp, 8); 10794 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10795 __ post_call_nop(); 10796 __ bind(fast); 10797 %} 10798 ins_pipe( pipe_slow ); 10799 %} 10800 10801 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10802 predicate(UseSSE<=1); 10803 match(Set dst (ConvD2L src)); 10804 effect( KILL cr ); 10805 format %{ "FLD $src\t# Convert double to long\n\t" 10806 "FLDCW trunc mode\n\t" 10807 "SUB ESP,8\n\t" 10808 "FISTp [ESP + #0]\n\t" 10809 "FLDCW std/24-bit mode\n\t" 10810 "POP EAX\n\t" 10811 "POP EDX\n\t" 10812 "CMP EDX,0x80000000\n\t" 10813 "JNE,s fast\n\t" 10814 "TEST EAX,EAX\n\t" 10815 "JNE,s fast\n\t" 10816 "FLD $src\n\t" 10817 "CALL d2l_wrapper\n" 10818 "fast:" %} 10819 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10820 ins_pipe( pipe_slow ); 10821 %} 10822 10823 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10824 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10825 predicate (UseSSE>=2); 10826 match(Set dst (ConvD2L src)); 10827 effect( KILL cr ); 10828 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10829 "MOVSD [ESP],$src\n\t" 10830 "FLD_D [ESP]\n\t" 10831 "FLDCW trunc mode\n\t" 10832 "FISTp [ESP + #0]\n\t" 10833 "FLDCW std/24-bit mode\n\t" 10834 "POP EAX\n\t" 10835 "POP EDX\n\t" 10836 "CMP EDX,0x80000000\n\t" 10837 "JNE,s fast\n\t" 10838 "TEST EAX,EAX\n\t" 10839 "JNE,s fast\n\t" 10840 "SUB ESP,8\n\t" 10841 "MOVSD [ESP],$src\n\t" 10842 "FLD_D [ESP]\n\t" 10843 "ADD ESP,8\n\t" 10844 "CALL d2l_wrapper\n" 10845 "fast:" %} 10846 ins_encode %{ 10847 Label fast; 10848 __ subptr(rsp, 8); 10849 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10850 __ fld_d(Address(rsp, 0)); 10851 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10852 __ fistp_d(Address(rsp, 0)); 10853 // Restore the rounding mode, mask the exception 10854 if (Compile::current()->in_24_bit_fp_mode()) { 10855 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10856 } else { 10857 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10858 } 10859 // Load the converted long, adjust CPU stack 10860 __ pop(rax); 10861 __ pop(rdx); 10862 __ cmpl(rdx, 0x80000000); 10863 __ jccb(Assembler::notEqual, fast); 10864 __ testl(rax, rax); 10865 __ jccb(Assembler::notEqual, fast); 10866 __ subptr(rsp, 8); 10867 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10868 __ fld_d(Address(rsp, 0)); 10869 __ addptr(rsp, 8); 10870 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10871 __ post_call_nop(); 10872 __ bind(fast); 10873 %} 10874 ins_pipe( pipe_slow ); 10875 %} 10876 10877 // Convert a double to an int. Java semantics require we do complex 10878 // manglations in the corner cases. So we set the rounding mode to 10879 // 'zero', store the darned double down as an int, and reset the 10880 // rounding mode to 'nearest'. The hardware stores a flag value down 10881 // if we would overflow or converted a NAN; we check for this and 10882 // and go the slow path if needed. 10883 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10884 predicate(UseSSE==0); 10885 match(Set dst (ConvF2I src)); 10886 effect( KILL tmp, KILL cr ); 10887 format %{ "FLD $src\t# Convert float to int \n\t" 10888 "FLDCW trunc mode\n\t" 10889 "SUB ESP,4\n\t" 10890 "FISTp [ESP + #0]\n\t" 10891 "FLDCW std/24-bit mode\n\t" 10892 "POP EAX\n\t" 10893 "CMP EAX,0x80000000\n\t" 10894 "JNE,s fast\n\t" 10895 "FLD $src\n\t" 10896 "CALL d2i_wrapper\n" 10897 "fast:" %} 10898 // DPR2I_encoding works for FPR2I 10899 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10900 ins_pipe( pipe_slow ); 10901 %} 10902 10903 // Convert a float in xmm to an int reg. 10904 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10905 predicate(UseSSE>=1); 10906 match(Set dst (ConvF2I src)); 10907 effect( KILL tmp, KILL cr ); 10908 format %{ "CVTTSS2SI $dst, $src\n\t" 10909 "CMP $dst,0x80000000\n\t" 10910 "JNE,s fast\n\t" 10911 "SUB ESP, 4\n\t" 10912 "MOVSS [ESP], $src\n\t" 10913 "FLD [ESP]\n\t" 10914 "ADD ESP, 4\n\t" 10915 "CALL d2i_wrapper\n" 10916 "fast:" %} 10917 ins_encode %{ 10918 Label fast; 10919 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10920 __ cmpl($dst$$Register, 0x80000000); 10921 __ jccb(Assembler::notEqual, fast); 10922 __ subptr(rsp, 4); 10923 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10924 __ fld_s(Address(rsp, 0)); 10925 __ addptr(rsp, 4); 10926 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10927 __ post_call_nop(); 10928 __ bind(fast); 10929 %} 10930 ins_pipe( pipe_slow ); 10931 %} 10932 10933 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10934 predicate(UseSSE==0); 10935 match(Set dst (ConvF2L src)); 10936 effect( KILL cr ); 10937 format %{ "FLD $src\t# Convert float to long\n\t" 10938 "FLDCW trunc mode\n\t" 10939 "SUB ESP,8\n\t" 10940 "FISTp [ESP + #0]\n\t" 10941 "FLDCW std/24-bit mode\n\t" 10942 "POP EAX\n\t" 10943 "POP EDX\n\t" 10944 "CMP EDX,0x80000000\n\t" 10945 "JNE,s fast\n\t" 10946 "TEST EAX,EAX\n\t" 10947 "JNE,s fast\n\t" 10948 "FLD $src\n\t" 10949 "CALL d2l_wrapper\n" 10950 "fast:" %} 10951 // DPR2L_encoding works for FPR2L 10952 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10953 ins_pipe( pipe_slow ); 10954 %} 10955 10956 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10957 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10958 predicate (UseSSE>=1); 10959 match(Set dst (ConvF2L src)); 10960 effect( KILL cr ); 10961 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10962 "MOVSS [ESP],$src\n\t" 10963 "FLD_S [ESP]\n\t" 10964 "FLDCW trunc mode\n\t" 10965 "FISTp [ESP + #0]\n\t" 10966 "FLDCW std/24-bit mode\n\t" 10967 "POP EAX\n\t" 10968 "POP EDX\n\t" 10969 "CMP EDX,0x80000000\n\t" 10970 "JNE,s fast\n\t" 10971 "TEST EAX,EAX\n\t" 10972 "JNE,s fast\n\t" 10973 "SUB ESP,4\t# Convert float to long\n\t" 10974 "MOVSS [ESP],$src\n\t" 10975 "FLD_S [ESP]\n\t" 10976 "ADD ESP,4\n\t" 10977 "CALL d2l_wrapper\n" 10978 "fast:" %} 10979 ins_encode %{ 10980 Label fast; 10981 __ subptr(rsp, 8); 10982 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10983 __ fld_s(Address(rsp, 0)); 10984 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10985 __ fistp_d(Address(rsp, 0)); 10986 // Restore the rounding mode, mask the exception 10987 if (Compile::current()->in_24_bit_fp_mode()) { 10988 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10989 } else { 10990 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10991 } 10992 // Load the converted long, adjust CPU stack 10993 __ pop(rax); 10994 __ pop(rdx); 10995 __ cmpl(rdx, 0x80000000); 10996 __ jccb(Assembler::notEqual, fast); 10997 __ testl(rax, rax); 10998 __ jccb(Assembler::notEqual, fast); 10999 __ subptr(rsp, 4); 11000 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11001 __ fld_s(Address(rsp, 0)); 11002 __ addptr(rsp, 4); 11003 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11004 __ post_call_nop(); 11005 __ bind(fast); 11006 %} 11007 ins_pipe( pipe_slow ); 11008 %} 11009 11010 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11011 predicate( UseSSE<=1 ); 11012 match(Set dst (ConvI2D src)); 11013 format %{ "FILD $src\n\t" 11014 "FSTP $dst" %} 11015 opcode(0xDB, 0x0); /* DB /0 */ 11016 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11017 ins_pipe( fpu_reg_mem ); 11018 %} 11019 11020 instruct convI2D_reg(regD dst, rRegI src) %{ 11021 predicate( UseSSE>=2 && !UseXmmI2D ); 11022 match(Set dst (ConvI2D src)); 11023 format %{ "CVTSI2SD $dst,$src" %} 11024 ins_encode %{ 11025 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11026 %} 11027 ins_pipe( pipe_slow ); 11028 %} 11029 11030 instruct convI2D_mem(regD dst, memory mem) %{ 11031 predicate( UseSSE>=2 ); 11032 match(Set dst (ConvI2D (LoadI mem))); 11033 format %{ "CVTSI2SD $dst,$mem" %} 11034 ins_encode %{ 11035 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11036 %} 11037 ins_pipe( pipe_slow ); 11038 %} 11039 11040 instruct convXI2D_reg(regD dst, rRegI src) 11041 %{ 11042 predicate( UseSSE>=2 && UseXmmI2D ); 11043 match(Set dst (ConvI2D src)); 11044 11045 format %{ "MOVD $dst,$src\n\t" 11046 "CVTDQ2PD $dst,$dst\t# i2d" %} 11047 ins_encode %{ 11048 __ movdl($dst$$XMMRegister, $src$$Register); 11049 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11050 %} 11051 ins_pipe(pipe_slow); // XXX 11052 %} 11053 11054 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11055 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11056 match(Set dst (ConvI2D (LoadI mem))); 11057 format %{ "FILD $mem\n\t" 11058 "FSTP $dst" %} 11059 opcode(0xDB); /* DB /0 */ 11060 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11061 Pop_Reg_DPR(dst)); 11062 ins_pipe( fpu_reg_mem ); 11063 %} 11064 11065 // Convert a byte to a float; no rounding step needed. 11066 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11067 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11068 match(Set dst (ConvI2F src)); 11069 format %{ "FILD $src\n\t" 11070 "FSTP $dst" %} 11071 11072 opcode(0xDB, 0x0); /* DB /0 */ 11073 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11074 ins_pipe( fpu_reg_mem ); 11075 %} 11076 11077 // In 24-bit mode, force exponent rounding by storing back out 11078 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11079 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11080 match(Set dst (ConvI2F src)); 11081 ins_cost(200); 11082 format %{ "FILD $src\n\t" 11083 "FSTP_S $dst" %} 11084 opcode(0xDB, 0x0); /* DB /0 */ 11085 ins_encode( Push_Mem_I(src), 11086 Pop_Mem_FPR(dst)); 11087 ins_pipe( fpu_mem_mem ); 11088 %} 11089 11090 // In 24-bit mode, force exponent rounding by storing back out 11091 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11092 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11093 match(Set dst (ConvI2F (LoadI mem))); 11094 ins_cost(200); 11095 format %{ "FILD $mem\n\t" 11096 "FSTP_S $dst" %} 11097 opcode(0xDB); /* DB /0 */ 11098 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11099 Pop_Mem_FPR(dst)); 11100 ins_pipe( fpu_mem_mem ); 11101 %} 11102 11103 // This instruction does not round to 24-bits 11104 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11105 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11106 match(Set dst (ConvI2F src)); 11107 format %{ "FILD $src\n\t" 11108 "FSTP $dst" %} 11109 opcode(0xDB, 0x0); /* DB /0 */ 11110 ins_encode( Push_Mem_I(src), 11111 Pop_Reg_FPR(dst)); 11112 ins_pipe( fpu_reg_mem ); 11113 %} 11114 11115 // This instruction does not round to 24-bits 11116 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11117 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11118 match(Set dst (ConvI2F (LoadI mem))); 11119 format %{ "FILD $mem\n\t" 11120 "FSTP $dst" %} 11121 opcode(0xDB); /* DB /0 */ 11122 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11123 Pop_Reg_FPR(dst)); 11124 ins_pipe( fpu_reg_mem ); 11125 %} 11126 11127 // Convert an int to a float in xmm; no rounding step needed. 11128 instruct convI2F_reg(regF dst, rRegI src) %{ 11129 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11130 match(Set dst (ConvI2F src)); 11131 format %{ "CVTSI2SS $dst, $src" %} 11132 ins_encode %{ 11133 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11134 %} 11135 ins_pipe( pipe_slow ); 11136 %} 11137 11138 instruct convXI2F_reg(regF dst, rRegI src) 11139 %{ 11140 predicate( UseSSE>=2 && UseXmmI2F ); 11141 match(Set dst (ConvI2F src)); 11142 11143 format %{ "MOVD $dst,$src\n\t" 11144 "CVTDQ2PS $dst,$dst\t# i2f" %} 11145 ins_encode %{ 11146 __ movdl($dst$$XMMRegister, $src$$Register); 11147 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11148 %} 11149 ins_pipe(pipe_slow); // XXX 11150 %} 11151 11152 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11153 match(Set dst (ConvI2L src)); 11154 effect(KILL cr); 11155 ins_cost(375); 11156 format %{ "MOV $dst.lo,$src\n\t" 11157 "MOV $dst.hi,$src\n\t" 11158 "SAR $dst.hi,31" %} 11159 ins_encode(convert_int_long(dst,src)); 11160 ins_pipe( ialu_reg_reg_long ); 11161 %} 11162 11163 // Zero-extend convert int to long 11164 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11165 match(Set dst (AndL (ConvI2L src) mask) ); 11166 effect( KILL flags ); 11167 ins_cost(250); 11168 format %{ "MOV $dst.lo,$src\n\t" 11169 "XOR $dst.hi,$dst.hi" %} 11170 opcode(0x33); // XOR 11171 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11172 ins_pipe( ialu_reg_reg_long ); 11173 %} 11174 11175 // Zero-extend long 11176 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11177 match(Set dst (AndL src mask) ); 11178 effect( KILL flags ); 11179 ins_cost(250); 11180 format %{ "MOV $dst.lo,$src.lo\n\t" 11181 "XOR $dst.hi,$dst.hi\n\t" %} 11182 opcode(0x33); // XOR 11183 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11184 ins_pipe( ialu_reg_reg_long ); 11185 %} 11186 11187 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11188 predicate (UseSSE<=1); 11189 match(Set dst (ConvL2D src)); 11190 effect( KILL cr ); 11191 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11192 "PUSH $src.lo\n\t" 11193 "FILD ST,[ESP + #0]\n\t" 11194 "ADD ESP,8\n\t" 11195 "FSTP_D $dst\t# D-round" %} 11196 opcode(0xDF, 0x5); /* DF /5 */ 11197 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11198 ins_pipe( pipe_slow ); 11199 %} 11200 11201 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11202 predicate (UseSSE>=2); 11203 match(Set dst (ConvL2D src)); 11204 effect( KILL cr ); 11205 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11206 "PUSH $src.lo\n\t" 11207 "FILD_D [ESP]\n\t" 11208 "FSTP_D [ESP]\n\t" 11209 "MOVSD $dst,[ESP]\n\t" 11210 "ADD ESP,8" %} 11211 opcode(0xDF, 0x5); /* DF /5 */ 11212 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11213 ins_pipe( pipe_slow ); 11214 %} 11215 11216 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11217 predicate (UseSSE>=1); 11218 match(Set dst (ConvL2F src)); 11219 effect( KILL cr ); 11220 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11221 "PUSH $src.lo\n\t" 11222 "FILD_D [ESP]\n\t" 11223 "FSTP_S [ESP]\n\t" 11224 "MOVSS $dst,[ESP]\n\t" 11225 "ADD ESP,8" %} 11226 opcode(0xDF, 0x5); /* DF /5 */ 11227 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11228 ins_pipe( pipe_slow ); 11229 %} 11230 11231 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11232 match(Set dst (ConvL2F src)); 11233 effect( KILL cr ); 11234 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11235 "PUSH $src.lo\n\t" 11236 "FILD ST,[ESP + #0]\n\t" 11237 "ADD ESP,8\n\t" 11238 "FSTP_S $dst\t# F-round" %} 11239 opcode(0xDF, 0x5); /* DF /5 */ 11240 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11241 ins_pipe( pipe_slow ); 11242 %} 11243 11244 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11245 match(Set dst (ConvL2I src)); 11246 effect( DEF dst, USE src ); 11247 format %{ "MOV $dst,$src.lo" %} 11248 ins_encode(enc_CopyL_Lo(dst,src)); 11249 ins_pipe( ialu_reg_reg ); 11250 %} 11251 11252 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11253 match(Set dst (MoveF2I src)); 11254 effect( DEF dst, USE src ); 11255 ins_cost(100); 11256 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11257 ins_encode %{ 11258 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11259 %} 11260 ins_pipe( ialu_reg_mem ); 11261 %} 11262 11263 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11264 predicate(UseSSE==0); 11265 match(Set dst (MoveF2I src)); 11266 effect( DEF dst, USE src ); 11267 11268 ins_cost(125); 11269 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11270 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11271 ins_pipe( fpu_mem_reg ); 11272 %} 11273 11274 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11275 predicate(UseSSE>=1); 11276 match(Set dst (MoveF2I src)); 11277 effect( DEF dst, USE src ); 11278 11279 ins_cost(95); 11280 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11281 ins_encode %{ 11282 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11283 %} 11284 ins_pipe( pipe_slow ); 11285 %} 11286 11287 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11288 predicate(UseSSE>=2); 11289 match(Set dst (MoveF2I src)); 11290 effect( DEF dst, USE src ); 11291 ins_cost(85); 11292 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11293 ins_encode %{ 11294 __ movdl($dst$$Register, $src$$XMMRegister); 11295 %} 11296 ins_pipe( pipe_slow ); 11297 %} 11298 11299 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11300 match(Set dst (MoveI2F src)); 11301 effect( DEF dst, USE src ); 11302 11303 ins_cost(100); 11304 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11305 ins_encode %{ 11306 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11307 %} 11308 ins_pipe( ialu_mem_reg ); 11309 %} 11310 11311 11312 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11313 predicate(UseSSE==0); 11314 match(Set dst (MoveI2F src)); 11315 effect(DEF dst, USE src); 11316 11317 ins_cost(125); 11318 format %{ "FLD_S $src\n\t" 11319 "FSTP $dst\t# MoveI2F_stack_reg" %} 11320 opcode(0xD9); /* D9 /0, FLD m32real */ 11321 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11322 Pop_Reg_FPR(dst) ); 11323 ins_pipe( fpu_reg_mem ); 11324 %} 11325 11326 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11327 predicate(UseSSE>=1); 11328 match(Set dst (MoveI2F src)); 11329 effect( DEF dst, USE src ); 11330 11331 ins_cost(95); 11332 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11333 ins_encode %{ 11334 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11335 %} 11336 ins_pipe( pipe_slow ); 11337 %} 11338 11339 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11340 predicate(UseSSE>=2); 11341 match(Set dst (MoveI2F src)); 11342 effect( DEF dst, USE src ); 11343 11344 ins_cost(85); 11345 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11346 ins_encode %{ 11347 __ movdl($dst$$XMMRegister, $src$$Register); 11348 %} 11349 ins_pipe( pipe_slow ); 11350 %} 11351 11352 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11353 match(Set dst (MoveD2L src)); 11354 effect(DEF dst, USE src); 11355 11356 ins_cost(250); 11357 format %{ "MOV $dst.lo,$src\n\t" 11358 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11359 opcode(0x8B, 0x8B); 11360 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11361 ins_pipe( ialu_mem_long_reg ); 11362 %} 11363 11364 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11365 predicate(UseSSE<=1); 11366 match(Set dst (MoveD2L src)); 11367 effect(DEF dst, USE src); 11368 11369 ins_cost(125); 11370 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11371 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11372 ins_pipe( fpu_mem_reg ); 11373 %} 11374 11375 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11376 predicate(UseSSE>=2); 11377 match(Set dst (MoveD2L src)); 11378 effect(DEF dst, USE src); 11379 ins_cost(95); 11380 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11381 ins_encode %{ 11382 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11383 %} 11384 ins_pipe( pipe_slow ); 11385 %} 11386 11387 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11388 predicate(UseSSE>=2); 11389 match(Set dst (MoveD2L src)); 11390 effect(DEF dst, USE src, TEMP tmp); 11391 ins_cost(85); 11392 format %{ "MOVD $dst.lo,$src\n\t" 11393 "PSHUFLW $tmp,$src,0x4E\n\t" 11394 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11395 ins_encode %{ 11396 __ movdl($dst$$Register, $src$$XMMRegister); 11397 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11398 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11399 %} 11400 ins_pipe( pipe_slow ); 11401 %} 11402 11403 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11404 match(Set dst (MoveL2D src)); 11405 effect(DEF dst, USE src); 11406 11407 ins_cost(200); 11408 format %{ "MOV $dst,$src.lo\n\t" 11409 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11410 opcode(0x89, 0x89); 11411 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11412 ins_pipe( ialu_mem_long_reg ); 11413 %} 11414 11415 11416 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11417 predicate(UseSSE<=1); 11418 match(Set dst (MoveL2D src)); 11419 effect(DEF dst, USE src); 11420 ins_cost(125); 11421 11422 format %{ "FLD_D $src\n\t" 11423 "FSTP $dst\t# MoveL2D_stack_reg" %} 11424 opcode(0xDD); /* DD /0, FLD m64real */ 11425 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11426 Pop_Reg_DPR(dst) ); 11427 ins_pipe( fpu_reg_mem ); 11428 %} 11429 11430 11431 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11432 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11433 match(Set dst (MoveL2D src)); 11434 effect(DEF dst, USE src); 11435 11436 ins_cost(95); 11437 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11438 ins_encode %{ 11439 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11440 %} 11441 ins_pipe( pipe_slow ); 11442 %} 11443 11444 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11445 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11446 match(Set dst (MoveL2D src)); 11447 effect(DEF dst, USE src); 11448 11449 ins_cost(95); 11450 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11451 ins_encode %{ 11452 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11453 %} 11454 ins_pipe( pipe_slow ); 11455 %} 11456 11457 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11458 predicate(UseSSE>=2); 11459 match(Set dst (MoveL2D src)); 11460 effect(TEMP dst, USE src, TEMP tmp); 11461 ins_cost(85); 11462 format %{ "MOVD $dst,$src.lo\n\t" 11463 "MOVD $tmp,$src.hi\n\t" 11464 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11465 ins_encode %{ 11466 __ movdl($dst$$XMMRegister, $src$$Register); 11467 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11468 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11469 %} 11470 ins_pipe( pipe_slow ); 11471 %} 11472 11473 //----------------------------- CompressBits/ExpandBits ------------------------ 11474 11475 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11476 predicate(n->bottom_type()->isa_long()); 11477 match(Set dst (CompressBits src mask)); 11478 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11479 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11480 ins_encode %{ 11481 Label exit, partail_result; 11482 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11483 // Merge the results of upper and lower destination registers such that upper destination 11484 // results are contiguously laid out after the lower destination result. 11485 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11486 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11487 __ popcntl($rtmp$$Register, $mask$$Register); 11488 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11489 __ cmpl($rtmp$$Register, 32); 11490 __ jccb(Assembler::equal, exit); 11491 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11492 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11493 // Shift left the contents of upper destination register by true bit count of lower mask register 11494 // and merge with lower destination register. 11495 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11496 __ orl($dst$$Register, $rtmp$$Register); 11497 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11498 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11499 // since contents of upper destination have already been copied to lower destination 11500 // register. 11501 __ cmpl($rtmp$$Register, 0); 11502 __ jccb(Assembler::greater, partail_result); 11503 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11504 __ jmp(exit); 11505 __ bind(partail_result); 11506 // Perform right shift over upper destination register to move out bits already copied 11507 // to lower destination register. 11508 __ subl($rtmp$$Register, 32); 11509 __ negl($rtmp$$Register); 11510 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11511 __ bind(exit); 11512 %} 11513 ins_pipe( pipe_slow ); 11514 %} 11515 11516 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11517 predicate(n->bottom_type()->isa_long()); 11518 match(Set dst (ExpandBits src mask)); 11519 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11520 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11521 ins_encode %{ 11522 // Extraction operation sequentially reads the bits from source register starting from LSB 11523 // and lays them out into destination register at bit locations corresponding to true bits 11524 // in mask register. Thus number of source bits read are equal to combined true bit count 11525 // of mask register pair. 11526 Label exit, mask_clipping; 11527 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11528 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11529 __ popcntl($rtmp$$Register, $mask$$Register); 11530 // If true bit count of lower mask register is 32 then none of bit of lower source register 11531 // will feed to upper destination register. 11532 __ cmpl($rtmp$$Register, 32); 11533 __ jccb(Assembler::equal, exit); 11534 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11535 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11536 // Shift right the contents of lower source register to remove already consumed bits. 11537 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11538 // Extract the bits from lower source register starting from LSB under the influence 11539 // of upper mask register. 11540 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11541 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11542 __ subl($rtmp$$Register, 32); 11543 __ negl($rtmp$$Register); 11544 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11545 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11546 // Clear the set bits in upper mask register which have been used to extract the contents 11547 // from lower source register. 11548 __ bind(mask_clipping); 11549 __ blsrl($mask$$Register, $mask$$Register); 11550 __ decrementl($rtmp$$Register, 1); 11551 __ jccb(Assembler::greater, mask_clipping); 11552 // Starting from LSB extract the bits from upper source register under the influence of 11553 // remaining set bits in upper mask register. 11554 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11555 // Merge the partial results extracted from lower and upper source register bits. 11556 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11557 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11558 __ bind(exit); 11559 %} 11560 ins_pipe( pipe_slow ); 11561 %} 11562 11563 // ======================================================================= 11564 // fast clearing of an array 11565 // Small ClearArray non-AVX512. 11566 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11567 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11568 match(Set dummy (ClearArray cnt base)); 11569 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11570 11571 format %{ $$template 11572 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11573 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11574 $$emit$$"JG LARGE\n\t" 11575 $$emit$$"SHL ECX, 1\n\t" 11576 $$emit$$"DEC ECX\n\t" 11577 $$emit$$"JS DONE\t# Zero length\n\t" 11578 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11579 $$emit$$"DEC ECX\n\t" 11580 $$emit$$"JGE LOOP\n\t" 11581 $$emit$$"JMP DONE\n\t" 11582 $$emit$$"# LARGE:\n\t" 11583 if (UseFastStosb) { 11584 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11585 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11586 } else if (UseXMMForObjInit) { 11587 $$emit$$"MOV RDI,RAX\n\t" 11588 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11589 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11590 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11591 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11592 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11593 $$emit$$"ADD 0x40,RAX\n\t" 11594 $$emit$$"# L_zero_64_bytes:\n\t" 11595 $$emit$$"SUB 0x8,RCX\n\t" 11596 $$emit$$"JGE L_loop\n\t" 11597 $$emit$$"ADD 0x4,RCX\n\t" 11598 $$emit$$"JL L_tail\n\t" 11599 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11600 $$emit$$"ADD 0x20,RAX\n\t" 11601 $$emit$$"SUB 0x4,RCX\n\t" 11602 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11603 $$emit$$"ADD 0x4,RCX\n\t" 11604 $$emit$$"JLE L_end\n\t" 11605 $$emit$$"DEC RCX\n\t" 11606 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11607 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11608 $$emit$$"ADD 0x8,RAX\n\t" 11609 $$emit$$"DEC RCX\n\t" 11610 $$emit$$"JGE L_sloop\n\t" 11611 $$emit$$"# L_end:\n\t" 11612 } else { 11613 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11614 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11615 } 11616 $$emit$$"# DONE" 11617 %} 11618 ins_encode %{ 11619 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11620 $tmp$$XMMRegister, false, knoreg); 11621 %} 11622 ins_pipe( pipe_slow ); 11623 %} 11624 11625 // Small ClearArray AVX512 non-constant length. 11626 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11627 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11628 match(Set dummy (ClearArray cnt base)); 11629 ins_cost(125); 11630 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11631 11632 format %{ $$template 11633 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11634 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11635 $$emit$$"JG LARGE\n\t" 11636 $$emit$$"SHL ECX, 1\n\t" 11637 $$emit$$"DEC ECX\n\t" 11638 $$emit$$"JS DONE\t# Zero length\n\t" 11639 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11640 $$emit$$"DEC ECX\n\t" 11641 $$emit$$"JGE LOOP\n\t" 11642 $$emit$$"JMP DONE\n\t" 11643 $$emit$$"# LARGE:\n\t" 11644 if (UseFastStosb) { 11645 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11646 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11647 } else if (UseXMMForObjInit) { 11648 $$emit$$"MOV RDI,RAX\n\t" 11649 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11650 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11651 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11652 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11653 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11654 $$emit$$"ADD 0x40,RAX\n\t" 11655 $$emit$$"# L_zero_64_bytes:\n\t" 11656 $$emit$$"SUB 0x8,RCX\n\t" 11657 $$emit$$"JGE L_loop\n\t" 11658 $$emit$$"ADD 0x4,RCX\n\t" 11659 $$emit$$"JL L_tail\n\t" 11660 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11661 $$emit$$"ADD 0x20,RAX\n\t" 11662 $$emit$$"SUB 0x4,RCX\n\t" 11663 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11664 $$emit$$"ADD 0x4,RCX\n\t" 11665 $$emit$$"JLE L_end\n\t" 11666 $$emit$$"DEC RCX\n\t" 11667 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11668 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11669 $$emit$$"ADD 0x8,RAX\n\t" 11670 $$emit$$"DEC RCX\n\t" 11671 $$emit$$"JGE L_sloop\n\t" 11672 $$emit$$"# L_end:\n\t" 11673 } else { 11674 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11675 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11676 } 11677 $$emit$$"# DONE" 11678 %} 11679 ins_encode %{ 11680 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11681 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11682 %} 11683 ins_pipe( pipe_slow ); 11684 %} 11685 11686 // Large ClearArray non-AVX512. 11687 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11688 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11689 match(Set dummy (ClearArray cnt base)); 11690 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11691 format %{ $$template 11692 if (UseFastStosb) { 11693 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11694 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11695 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11696 } else if (UseXMMForObjInit) { 11697 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11698 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11699 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11700 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11701 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11702 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11703 $$emit$$"ADD 0x40,RAX\n\t" 11704 $$emit$$"# L_zero_64_bytes:\n\t" 11705 $$emit$$"SUB 0x8,RCX\n\t" 11706 $$emit$$"JGE L_loop\n\t" 11707 $$emit$$"ADD 0x4,RCX\n\t" 11708 $$emit$$"JL L_tail\n\t" 11709 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11710 $$emit$$"ADD 0x20,RAX\n\t" 11711 $$emit$$"SUB 0x4,RCX\n\t" 11712 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11713 $$emit$$"ADD 0x4,RCX\n\t" 11714 $$emit$$"JLE L_end\n\t" 11715 $$emit$$"DEC RCX\n\t" 11716 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11717 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11718 $$emit$$"ADD 0x8,RAX\n\t" 11719 $$emit$$"DEC RCX\n\t" 11720 $$emit$$"JGE L_sloop\n\t" 11721 $$emit$$"# L_end:\n\t" 11722 } else { 11723 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11724 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11725 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11726 } 11727 $$emit$$"# DONE" 11728 %} 11729 ins_encode %{ 11730 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11731 $tmp$$XMMRegister, true, knoreg); 11732 %} 11733 ins_pipe( pipe_slow ); 11734 %} 11735 11736 // Large ClearArray AVX512. 11737 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11738 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11739 match(Set dummy (ClearArray cnt base)); 11740 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11741 format %{ $$template 11742 if (UseFastStosb) { 11743 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11744 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11745 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11746 } else if (UseXMMForObjInit) { 11747 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11748 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11749 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11750 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11751 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11752 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11753 $$emit$$"ADD 0x40,RAX\n\t" 11754 $$emit$$"# L_zero_64_bytes:\n\t" 11755 $$emit$$"SUB 0x8,RCX\n\t" 11756 $$emit$$"JGE L_loop\n\t" 11757 $$emit$$"ADD 0x4,RCX\n\t" 11758 $$emit$$"JL L_tail\n\t" 11759 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11760 $$emit$$"ADD 0x20,RAX\n\t" 11761 $$emit$$"SUB 0x4,RCX\n\t" 11762 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11763 $$emit$$"ADD 0x4,RCX\n\t" 11764 $$emit$$"JLE L_end\n\t" 11765 $$emit$$"DEC RCX\n\t" 11766 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11767 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11768 $$emit$$"ADD 0x8,RAX\n\t" 11769 $$emit$$"DEC RCX\n\t" 11770 $$emit$$"JGE L_sloop\n\t" 11771 $$emit$$"# L_end:\n\t" 11772 } else { 11773 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11774 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11775 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11776 } 11777 $$emit$$"# DONE" 11778 %} 11779 ins_encode %{ 11780 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11781 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11782 %} 11783 ins_pipe( pipe_slow ); 11784 %} 11785 11786 // Small ClearArray AVX512 constant length. 11787 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11788 %{ 11789 predicate(!((ClearArrayNode*)n)->is_large() && 11790 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11791 match(Set dummy (ClearArray cnt base)); 11792 ins_cost(100); 11793 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11794 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11795 ins_encode %{ 11796 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11797 %} 11798 ins_pipe(pipe_slow); 11799 %} 11800 11801 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11802 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11803 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11804 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11805 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11806 11807 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11808 ins_encode %{ 11809 __ string_compare($str1$$Register, $str2$$Register, 11810 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11811 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11812 %} 11813 ins_pipe( pipe_slow ); 11814 %} 11815 11816 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11817 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11818 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11819 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11820 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11821 11822 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11823 ins_encode %{ 11824 __ string_compare($str1$$Register, $str2$$Register, 11825 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11826 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11827 %} 11828 ins_pipe( pipe_slow ); 11829 %} 11830 11831 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11832 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11833 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11834 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11835 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11836 11837 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11838 ins_encode %{ 11839 __ string_compare($str1$$Register, $str2$$Register, 11840 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11841 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11842 %} 11843 ins_pipe( pipe_slow ); 11844 %} 11845 11846 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11847 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11848 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11849 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11850 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11851 11852 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11853 ins_encode %{ 11854 __ string_compare($str1$$Register, $str2$$Register, 11855 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11856 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11857 %} 11858 ins_pipe( pipe_slow ); 11859 %} 11860 11861 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11862 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11863 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11864 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11865 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11866 11867 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11868 ins_encode %{ 11869 __ string_compare($str1$$Register, $str2$$Register, 11870 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11871 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11872 %} 11873 ins_pipe( pipe_slow ); 11874 %} 11875 11876 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11877 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11878 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11879 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11880 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11881 11882 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11883 ins_encode %{ 11884 __ string_compare($str1$$Register, $str2$$Register, 11885 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11886 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11887 %} 11888 ins_pipe( pipe_slow ); 11889 %} 11890 11891 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11892 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11893 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11894 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11895 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11896 11897 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11898 ins_encode %{ 11899 __ string_compare($str2$$Register, $str1$$Register, 11900 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11901 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11902 %} 11903 ins_pipe( pipe_slow ); 11904 %} 11905 11906 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11907 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11908 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11909 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11910 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11911 11912 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11913 ins_encode %{ 11914 __ string_compare($str2$$Register, $str1$$Register, 11915 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11916 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11917 %} 11918 ins_pipe( pipe_slow ); 11919 %} 11920 11921 // fast string equals 11922 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11923 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11924 predicate(!VM_Version::supports_avx512vlbw()); 11925 match(Set result (StrEquals (Binary str1 str2) cnt)); 11926 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11927 11928 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11929 ins_encode %{ 11930 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11931 $cnt$$Register, $result$$Register, $tmp3$$Register, 11932 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11933 %} 11934 11935 ins_pipe( pipe_slow ); 11936 %} 11937 11938 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11939 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11940 predicate(VM_Version::supports_avx512vlbw()); 11941 match(Set result (StrEquals (Binary str1 str2) cnt)); 11942 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11943 11944 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11945 ins_encode %{ 11946 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11947 $cnt$$Register, $result$$Register, $tmp3$$Register, 11948 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11949 %} 11950 11951 ins_pipe( pipe_slow ); 11952 %} 11953 11954 11955 // fast search of substring with known size. 11956 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11957 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11958 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11959 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11960 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11961 11962 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11963 ins_encode %{ 11964 int icnt2 = (int)$int_cnt2$$constant; 11965 if (icnt2 >= 16) { 11966 // IndexOf for constant substrings with size >= 16 elements 11967 // which don't need to be loaded through stack. 11968 __ string_indexofC8($str1$$Register, $str2$$Register, 11969 $cnt1$$Register, $cnt2$$Register, 11970 icnt2, $result$$Register, 11971 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11972 } else { 11973 // Small strings are loaded through stack if they cross page boundary. 11974 __ string_indexof($str1$$Register, $str2$$Register, 11975 $cnt1$$Register, $cnt2$$Register, 11976 icnt2, $result$$Register, 11977 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11978 } 11979 %} 11980 ins_pipe( pipe_slow ); 11981 %} 11982 11983 // fast search of substring with known size. 11984 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11985 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11986 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11987 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11988 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11989 11990 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11991 ins_encode %{ 11992 int icnt2 = (int)$int_cnt2$$constant; 11993 if (icnt2 >= 8) { 11994 // IndexOf for constant substrings with size >= 8 elements 11995 // which don't need to be loaded through stack. 11996 __ string_indexofC8($str1$$Register, $str2$$Register, 11997 $cnt1$$Register, $cnt2$$Register, 11998 icnt2, $result$$Register, 11999 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12000 } else { 12001 // Small strings are loaded through stack if they cross page boundary. 12002 __ string_indexof($str1$$Register, $str2$$Register, 12003 $cnt1$$Register, $cnt2$$Register, 12004 icnt2, $result$$Register, 12005 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12006 } 12007 %} 12008 ins_pipe( pipe_slow ); 12009 %} 12010 12011 // fast search of substring with known size. 12012 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12013 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12014 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12015 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12016 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12017 12018 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12019 ins_encode %{ 12020 int icnt2 = (int)$int_cnt2$$constant; 12021 if (icnt2 >= 8) { 12022 // IndexOf for constant substrings with size >= 8 elements 12023 // which don't need to be loaded through stack. 12024 __ string_indexofC8($str1$$Register, $str2$$Register, 12025 $cnt1$$Register, $cnt2$$Register, 12026 icnt2, $result$$Register, 12027 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12028 } else { 12029 // Small strings are loaded through stack if they cross page boundary. 12030 __ string_indexof($str1$$Register, $str2$$Register, 12031 $cnt1$$Register, $cnt2$$Register, 12032 icnt2, $result$$Register, 12033 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12034 } 12035 %} 12036 ins_pipe( pipe_slow ); 12037 %} 12038 12039 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12040 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12041 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 12042 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12043 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12044 12045 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12046 ins_encode %{ 12047 __ string_indexof($str1$$Register, $str2$$Register, 12048 $cnt1$$Register, $cnt2$$Register, 12049 (-1), $result$$Register, 12050 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 12051 %} 12052 ins_pipe( pipe_slow ); 12053 %} 12054 12055 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12056 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12057 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12058 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12059 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12060 12061 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12062 ins_encode %{ 12063 __ string_indexof($str1$$Register, $str2$$Register, 12064 $cnt1$$Register, $cnt2$$Register, 12065 (-1), $result$$Register, 12066 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12067 %} 12068 ins_pipe( pipe_slow ); 12069 %} 12070 12071 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12072 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12073 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12074 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12075 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12076 12077 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12078 ins_encode %{ 12079 __ string_indexof($str1$$Register, $str2$$Register, 12080 $cnt1$$Register, $cnt2$$Register, 12081 (-1), $result$$Register, 12082 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12083 %} 12084 ins_pipe( pipe_slow ); 12085 %} 12086 12087 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12088 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12089 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12090 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12091 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12092 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12093 ins_encode %{ 12094 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12095 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12096 %} 12097 ins_pipe( pipe_slow ); 12098 %} 12099 12100 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12101 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12102 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12103 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12104 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12105 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12106 ins_encode %{ 12107 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12108 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12109 %} 12110 ins_pipe( pipe_slow ); 12111 %} 12112 12113 12114 // fast array equals 12115 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12116 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12117 %{ 12118 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12119 match(Set result (AryEq ary1 ary2)); 12120 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12121 //ins_cost(300); 12122 12123 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12124 ins_encode %{ 12125 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12126 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12127 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12128 %} 12129 ins_pipe( pipe_slow ); 12130 %} 12131 12132 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12133 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12134 %{ 12135 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12136 match(Set result (AryEq ary1 ary2)); 12137 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12138 //ins_cost(300); 12139 12140 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12141 ins_encode %{ 12142 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12143 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12144 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12145 %} 12146 ins_pipe( pipe_slow ); 12147 %} 12148 12149 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12150 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12151 %{ 12152 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12153 match(Set result (AryEq ary1 ary2)); 12154 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12155 //ins_cost(300); 12156 12157 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12158 ins_encode %{ 12159 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12160 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12161 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12162 %} 12163 ins_pipe( pipe_slow ); 12164 %} 12165 12166 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12167 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12168 %{ 12169 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12170 match(Set result (AryEq ary1 ary2)); 12171 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12172 //ins_cost(300); 12173 12174 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12175 ins_encode %{ 12176 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12177 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12178 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12179 %} 12180 ins_pipe( pipe_slow ); 12181 %} 12182 12183 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12184 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12185 %{ 12186 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12187 match(Set result (CountPositives ary1 len)); 12188 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12189 12190 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12191 ins_encode %{ 12192 __ count_positives($ary1$$Register, $len$$Register, 12193 $result$$Register, $tmp3$$Register, 12194 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12195 %} 12196 ins_pipe( pipe_slow ); 12197 %} 12198 12199 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12200 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12201 %{ 12202 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12203 match(Set result (CountPositives ary1 len)); 12204 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12205 12206 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12207 ins_encode %{ 12208 __ count_positives($ary1$$Register, $len$$Register, 12209 $result$$Register, $tmp3$$Register, 12210 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12211 %} 12212 ins_pipe( pipe_slow ); 12213 %} 12214 12215 12216 // fast char[] to byte[] compression 12217 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12218 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12219 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12220 match(Set result (StrCompressedCopy src (Binary dst len))); 12221 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12222 12223 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12224 ins_encode %{ 12225 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12226 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12227 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12228 knoreg, knoreg); 12229 %} 12230 ins_pipe( pipe_slow ); 12231 %} 12232 12233 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12234 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12235 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12236 match(Set result (StrCompressedCopy src (Binary dst len))); 12237 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12238 12239 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12240 ins_encode %{ 12241 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12242 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12243 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12244 $ktmp1$$KRegister, $ktmp2$$KRegister); 12245 %} 12246 ins_pipe( pipe_slow ); 12247 %} 12248 12249 // fast byte[] to char[] inflation 12250 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12251 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12252 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12253 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12254 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12255 12256 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12257 ins_encode %{ 12258 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12259 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12260 %} 12261 ins_pipe( pipe_slow ); 12262 %} 12263 12264 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12265 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12266 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12267 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12268 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12269 12270 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12271 ins_encode %{ 12272 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12273 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12274 %} 12275 ins_pipe( pipe_slow ); 12276 %} 12277 12278 // encode char[] to byte[] in ISO_8859_1 12279 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12280 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12281 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12282 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12283 match(Set result (EncodeISOArray src (Binary dst len))); 12284 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12285 12286 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12287 ins_encode %{ 12288 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12289 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12290 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12291 %} 12292 ins_pipe( pipe_slow ); 12293 %} 12294 12295 // encode char[] to byte[] in ASCII 12296 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12297 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12298 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12299 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12300 match(Set result (EncodeISOArray src (Binary dst len))); 12301 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12302 12303 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12304 ins_encode %{ 12305 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12306 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12307 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12308 %} 12309 ins_pipe( pipe_slow ); 12310 %} 12311 12312 //----------Control Flow Instructions------------------------------------------ 12313 // Signed compare Instructions 12314 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12315 match(Set cr (CmpI op1 op2)); 12316 effect( DEF cr, USE op1, USE op2 ); 12317 format %{ "CMP $op1,$op2" %} 12318 opcode(0x3B); /* Opcode 3B /r */ 12319 ins_encode( OpcP, RegReg( op1, op2) ); 12320 ins_pipe( ialu_cr_reg_reg ); 12321 %} 12322 12323 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12324 match(Set cr (CmpI op1 op2)); 12325 effect( DEF cr, USE op1 ); 12326 format %{ "CMP $op1,$op2" %} 12327 opcode(0x81,0x07); /* Opcode 81 /7 */ 12328 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12329 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12330 ins_pipe( ialu_cr_reg_imm ); 12331 %} 12332 12333 // Cisc-spilled version of cmpI_eReg 12334 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12335 match(Set cr (CmpI op1 (LoadI op2))); 12336 12337 format %{ "CMP $op1,$op2" %} 12338 ins_cost(500); 12339 opcode(0x3B); /* Opcode 3B /r */ 12340 ins_encode( OpcP, RegMem( op1, op2) ); 12341 ins_pipe( ialu_cr_reg_mem ); 12342 %} 12343 12344 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12345 match(Set cr (CmpI src zero)); 12346 effect( DEF cr, USE src ); 12347 12348 format %{ "TEST $src,$src" %} 12349 opcode(0x85); 12350 ins_encode( OpcP, RegReg( src, src ) ); 12351 ins_pipe( ialu_cr_reg_imm ); 12352 %} 12353 12354 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12355 match(Set cr (CmpI (AndI src con) zero)); 12356 12357 format %{ "TEST $src,$con" %} 12358 opcode(0xF7,0x00); 12359 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12360 ins_pipe( ialu_cr_reg_imm ); 12361 %} 12362 12363 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12364 match(Set cr (CmpI (AndI src mem) zero)); 12365 12366 format %{ "TEST $src,$mem" %} 12367 opcode(0x85); 12368 ins_encode( OpcP, RegMem( src, mem ) ); 12369 ins_pipe( ialu_cr_reg_mem ); 12370 %} 12371 12372 // Unsigned compare Instructions; really, same as signed except they 12373 // produce an eFlagsRegU instead of eFlagsReg. 12374 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12375 match(Set cr (CmpU op1 op2)); 12376 12377 format %{ "CMPu $op1,$op2" %} 12378 opcode(0x3B); /* Opcode 3B /r */ 12379 ins_encode( OpcP, RegReg( op1, op2) ); 12380 ins_pipe( ialu_cr_reg_reg ); 12381 %} 12382 12383 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12384 match(Set cr (CmpU op1 op2)); 12385 12386 format %{ "CMPu $op1,$op2" %} 12387 opcode(0x81,0x07); /* Opcode 81 /7 */ 12388 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12389 ins_pipe( ialu_cr_reg_imm ); 12390 %} 12391 12392 // // Cisc-spilled version of cmpU_eReg 12393 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12394 match(Set cr (CmpU op1 (LoadI op2))); 12395 12396 format %{ "CMPu $op1,$op2" %} 12397 ins_cost(500); 12398 opcode(0x3B); /* Opcode 3B /r */ 12399 ins_encode( OpcP, RegMem( op1, op2) ); 12400 ins_pipe( ialu_cr_reg_mem ); 12401 %} 12402 12403 // // Cisc-spilled version of cmpU_eReg 12404 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12405 // match(Set cr (CmpU (LoadI op1) op2)); 12406 // 12407 // format %{ "CMPu $op1,$op2" %} 12408 // ins_cost(500); 12409 // opcode(0x39); /* Opcode 39 /r */ 12410 // ins_encode( OpcP, RegMem( op1, op2) ); 12411 //%} 12412 12413 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12414 match(Set cr (CmpU src zero)); 12415 12416 format %{ "TESTu $src,$src" %} 12417 opcode(0x85); 12418 ins_encode( OpcP, RegReg( src, src ) ); 12419 ins_pipe( ialu_cr_reg_imm ); 12420 %} 12421 12422 // Unsigned pointer compare Instructions 12423 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12424 match(Set cr (CmpP op1 op2)); 12425 12426 format %{ "CMPu $op1,$op2" %} 12427 opcode(0x3B); /* Opcode 3B /r */ 12428 ins_encode( OpcP, RegReg( op1, op2) ); 12429 ins_pipe( ialu_cr_reg_reg ); 12430 %} 12431 12432 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12433 match(Set cr (CmpP op1 op2)); 12434 12435 format %{ "CMPu $op1,$op2" %} 12436 opcode(0x81,0x07); /* Opcode 81 /7 */ 12437 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12438 ins_pipe( ialu_cr_reg_imm ); 12439 %} 12440 12441 // // Cisc-spilled version of cmpP_eReg 12442 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12443 match(Set cr (CmpP op1 (LoadP op2))); 12444 12445 format %{ "CMPu $op1,$op2" %} 12446 ins_cost(500); 12447 opcode(0x3B); /* Opcode 3B /r */ 12448 ins_encode( OpcP, RegMem( op1, op2) ); 12449 ins_pipe( ialu_cr_reg_mem ); 12450 %} 12451 12452 // // Cisc-spilled version of cmpP_eReg 12453 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12454 // match(Set cr (CmpP (LoadP op1) op2)); 12455 // 12456 // format %{ "CMPu $op1,$op2" %} 12457 // ins_cost(500); 12458 // opcode(0x39); /* Opcode 39 /r */ 12459 // ins_encode( OpcP, RegMem( op1, op2) ); 12460 //%} 12461 12462 // Compare raw pointer (used in out-of-heap check). 12463 // Only works because non-oop pointers must be raw pointers 12464 // and raw pointers have no anti-dependencies. 12465 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12466 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12467 match(Set cr (CmpP op1 (LoadP op2))); 12468 12469 format %{ "CMPu $op1,$op2" %} 12470 opcode(0x3B); /* Opcode 3B /r */ 12471 ins_encode( OpcP, RegMem( op1, op2) ); 12472 ins_pipe( ialu_cr_reg_mem ); 12473 %} 12474 12475 // 12476 // This will generate a signed flags result. This should be ok 12477 // since any compare to a zero should be eq/neq. 12478 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12479 match(Set cr (CmpP src zero)); 12480 12481 format %{ "TEST $src,$src" %} 12482 opcode(0x85); 12483 ins_encode( OpcP, RegReg( src, src ) ); 12484 ins_pipe( ialu_cr_reg_imm ); 12485 %} 12486 12487 // Cisc-spilled version of testP_reg 12488 // This will generate a signed flags result. This should be ok 12489 // since any compare to a zero should be eq/neq. 12490 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12491 match(Set cr (CmpP (LoadP op) zero)); 12492 12493 format %{ "TEST $op,0xFFFFFFFF" %} 12494 ins_cost(500); 12495 opcode(0xF7); /* Opcode F7 /0 */ 12496 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12497 ins_pipe( ialu_cr_reg_imm ); 12498 %} 12499 12500 // Yanked all unsigned pointer compare operations. 12501 // Pointer compares are done with CmpP which is already unsigned. 12502 12503 //----------Max and Min-------------------------------------------------------- 12504 // Min Instructions 12505 //// 12506 // *** Min and Max using the conditional move are slower than the 12507 // *** branch version on a Pentium III. 12508 // // Conditional move for min 12509 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12510 // effect( USE_DEF op2, USE op1, USE cr ); 12511 // format %{ "CMOVlt $op2,$op1\t! min" %} 12512 // opcode(0x4C,0x0F); 12513 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12514 // ins_pipe( pipe_cmov_reg ); 12515 //%} 12516 // 12517 //// Min Register with Register (P6 version) 12518 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12519 // predicate(VM_Version::supports_cmov() ); 12520 // match(Set op2 (MinI op1 op2)); 12521 // ins_cost(200); 12522 // expand %{ 12523 // eFlagsReg cr; 12524 // compI_eReg(cr,op1,op2); 12525 // cmovI_reg_lt(op2,op1,cr); 12526 // %} 12527 //%} 12528 12529 // Min Register with Register (generic version) 12530 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12531 match(Set dst (MinI dst src)); 12532 effect(KILL flags); 12533 ins_cost(300); 12534 12535 format %{ "MIN $dst,$src" %} 12536 opcode(0xCC); 12537 ins_encode( min_enc(dst,src) ); 12538 ins_pipe( pipe_slow ); 12539 %} 12540 12541 // Max Register with Register 12542 // *** Min and Max using the conditional move are slower than the 12543 // *** branch version on a Pentium III. 12544 // // Conditional move for max 12545 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12546 // effect( USE_DEF op2, USE op1, USE cr ); 12547 // format %{ "CMOVgt $op2,$op1\t! max" %} 12548 // opcode(0x4F,0x0F); 12549 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12550 // ins_pipe( pipe_cmov_reg ); 12551 //%} 12552 // 12553 // // Max Register with Register (P6 version) 12554 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12555 // predicate(VM_Version::supports_cmov() ); 12556 // match(Set op2 (MaxI op1 op2)); 12557 // ins_cost(200); 12558 // expand %{ 12559 // eFlagsReg cr; 12560 // compI_eReg(cr,op1,op2); 12561 // cmovI_reg_gt(op2,op1,cr); 12562 // %} 12563 //%} 12564 12565 // Max Register with Register (generic version) 12566 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12567 match(Set dst (MaxI dst src)); 12568 effect(KILL flags); 12569 ins_cost(300); 12570 12571 format %{ "MAX $dst,$src" %} 12572 opcode(0xCC); 12573 ins_encode( max_enc(dst,src) ); 12574 ins_pipe( pipe_slow ); 12575 %} 12576 12577 // ============================================================================ 12578 // Counted Loop limit node which represents exact final iterator value. 12579 // Note: the resulting value should fit into integer range since 12580 // counted loops have limit check on overflow. 12581 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12582 match(Set limit (LoopLimit (Binary init limit) stride)); 12583 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12584 ins_cost(300); 12585 12586 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12587 ins_encode %{ 12588 int strd = (int)$stride$$constant; 12589 assert(strd != 1 && strd != -1, "sanity"); 12590 int m1 = (strd > 0) ? 1 : -1; 12591 // Convert limit to long (EAX:EDX) 12592 __ cdql(); 12593 // Convert init to long (init:tmp) 12594 __ movl($tmp$$Register, $init$$Register); 12595 __ sarl($tmp$$Register, 31); 12596 // $limit - $init 12597 __ subl($limit$$Register, $init$$Register); 12598 __ sbbl($limit_hi$$Register, $tmp$$Register); 12599 // + ($stride - 1) 12600 if (strd > 0) { 12601 __ addl($limit$$Register, (strd - 1)); 12602 __ adcl($limit_hi$$Register, 0); 12603 __ movl($tmp$$Register, strd); 12604 } else { 12605 __ addl($limit$$Register, (strd + 1)); 12606 __ adcl($limit_hi$$Register, -1); 12607 __ lneg($limit_hi$$Register, $limit$$Register); 12608 __ movl($tmp$$Register, -strd); 12609 } 12610 // signed division: (EAX:EDX) / pos_stride 12611 __ idivl($tmp$$Register); 12612 if (strd < 0) { 12613 // restore sign 12614 __ negl($tmp$$Register); 12615 } 12616 // (EAX) * stride 12617 __ mull($tmp$$Register); 12618 // + init (ignore upper bits) 12619 __ addl($limit$$Register, $init$$Register); 12620 %} 12621 ins_pipe( pipe_slow ); 12622 %} 12623 12624 // ============================================================================ 12625 // Branch Instructions 12626 // Jump Table 12627 instruct jumpXtnd(rRegI switch_val) %{ 12628 match(Jump switch_val); 12629 ins_cost(350); 12630 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12631 ins_encode %{ 12632 // Jump to Address(table_base + switch_reg) 12633 Address index(noreg, $switch_val$$Register, Address::times_1); 12634 __ jump(ArrayAddress($constantaddress, index), noreg); 12635 %} 12636 ins_pipe(pipe_jmp); 12637 %} 12638 12639 // Jump Direct - Label defines a relative address from JMP+1 12640 instruct jmpDir(label labl) %{ 12641 match(Goto); 12642 effect(USE labl); 12643 12644 ins_cost(300); 12645 format %{ "JMP $labl" %} 12646 size(5); 12647 ins_encode %{ 12648 Label* L = $labl$$label; 12649 __ jmp(*L, false); // Always long jump 12650 %} 12651 ins_pipe( pipe_jmp ); 12652 %} 12653 12654 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12655 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12656 match(If cop cr); 12657 effect(USE labl); 12658 12659 ins_cost(300); 12660 format %{ "J$cop $labl" %} 12661 size(6); 12662 ins_encode %{ 12663 Label* L = $labl$$label; 12664 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12665 %} 12666 ins_pipe( pipe_jcc ); 12667 %} 12668 12669 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12670 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12671 match(CountedLoopEnd cop cr); 12672 effect(USE labl); 12673 12674 ins_cost(300); 12675 format %{ "J$cop $labl\t# Loop end" %} 12676 size(6); 12677 ins_encode %{ 12678 Label* L = $labl$$label; 12679 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12680 %} 12681 ins_pipe( pipe_jcc ); 12682 %} 12683 12684 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12685 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12686 match(CountedLoopEnd cop cmp); 12687 effect(USE labl); 12688 12689 ins_cost(300); 12690 format %{ "J$cop,u $labl\t# Loop end" %} 12691 size(6); 12692 ins_encode %{ 12693 Label* L = $labl$$label; 12694 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12695 %} 12696 ins_pipe( pipe_jcc ); 12697 %} 12698 12699 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12700 match(CountedLoopEnd cop cmp); 12701 effect(USE labl); 12702 12703 ins_cost(200); 12704 format %{ "J$cop,u $labl\t# Loop end" %} 12705 size(6); 12706 ins_encode %{ 12707 Label* L = $labl$$label; 12708 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12709 %} 12710 ins_pipe( pipe_jcc ); 12711 %} 12712 12713 // Jump Direct Conditional - using unsigned comparison 12714 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12715 match(If cop cmp); 12716 effect(USE labl); 12717 12718 ins_cost(300); 12719 format %{ "J$cop,u $labl" %} 12720 size(6); 12721 ins_encode %{ 12722 Label* L = $labl$$label; 12723 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12724 %} 12725 ins_pipe(pipe_jcc); 12726 %} 12727 12728 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12729 match(If cop cmp); 12730 effect(USE labl); 12731 12732 ins_cost(200); 12733 format %{ "J$cop,u $labl" %} 12734 size(6); 12735 ins_encode %{ 12736 Label* L = $labl$$label; 12737 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12738 %} 12739 ins_pipe(pipe_jcc); 12740 %} 12741 12742 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12743 match(If cop cmp); 12744 effect(USE labl); 12745 12746 ins_cost(200); 12747 format %{ $$template 12748 if ($cop$$cmpcode == Assembler::notEqual) { 12749 $$emit$$"JP,u $labl\n\t" 12750 $$emit$$"J$cop,u $labl" 12751 } else { 12752 $$emit$$"JP,u done\n\t" 12753 $$emit$$"J$cop,u $labl\n\t" 12754 $$emit$$"done:" 12755 } 12756 %} 12757 ins_encode %{ 12758 Label* l = $labl$$label; 12759 if ($cop$$cmpcode == Assembler::notEqual) { 12760 __ jcc(Assembler::parity, *l, false); 12761 __ jcc(Assembler::notEqual, *l, false); 12762 } else if ($cop$$cmpcode == Assembler::equal) { 12763 Label done; 12764 __ jccb(Assembler::parity, done); 12765 __ jcc(Assembler::equal, *l, false); 12766 __ bind(done); 12767 } else { 12768 ShouldNotReachHere(); 12769 } 12770 %} 12771 ins_pipe(pipe_jcc); 12772 %} 12773 12774 // ============================================================================ 12775 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12776 // array for an instance of the superklass. Set a hidden internal cache on a 12777 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12778 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12779 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12780 match(Set result (PartialSubtypeCheck sub super)); 12781 effect( KILL rcx, KILL cr ); 12782 12783 ins_cost(1100); // slightly larger than the next version 12784 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12785 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12786 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12787 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12788 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12789 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12790 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12791 "miss:\t" %} 12792 12793 opcode(0x1); // Force a XOR of EDI 12794 ins_encode( enc_PartialSubtypeCheck() ); 12795 ins_pipe( pipe_slow ); 12796 %} 12797 12798 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12799 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12800 effect( KILL rcx, KILL result ); 12801 12802 ins_cost(1000); 12803 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12804 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12805 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12806 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12807 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12808 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12809 "miss:\t" %} 12810 12811 opcode(0x0); // No need to XOR EDI 12812 ins_encode( enc_PartialSubtypeCheck() ); 12813 ins_pipe( pipe_slow ); 12814 %} 12815 12816 // ============================================================================ 12817 // Branch Instructions -- short offset versions 12818 // 12819 // These instructions are used to replace jumps of a long offset (the default 12820 // match) with jumps of a shorter offset. These instructions are all tagged 12821 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12822 // match rules in general matching. Instead, the ADLC generates a conversion 12823 // method in the MachNode which can be used to do in-place replacement of the 12824 // long variant with the shorter variant. The compiler will determine if a 12825 // branch can be taken by the is_short_branch_offset() predicate in the machine 12826 // specific code section of the file. 12827 12828 // Jump Direct - Label defines a relative address from JMP+1 12829 instruct jmpDir_short(label labl) %{ 12830 match(Goto); 12831 effect(USE labl); 12832 12833 ins_cost(300); 12834 format %{ "JMP,s $labl" %} 12835 size(2); 12836 ins_encode %{ 12837 Label* L = $labl$$label; 12838 __ jmpb(*L); 12839 %} 12840 ins_pipe( pipe_jmp ); 12841 ins_short_branch(1); 12842 %} 12843 12844 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12845 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12846 match(If cop cr); 12847 effect(USE labl); 12848 12849 ins_cost(300); 12850 format %{ "J$cop,s $labl" %} 12851 size(2); 12852 ins_encode %{ 12853 Label* L = $labl$$label; 12854 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12855 %} 12856 ins_pipe( pipe_jcc ); 12857 ins_short_branch(1); 12858 %} 12859 12860 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12861 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12862 match(CountedLoopEnd cop cr); 12863 effect(USE labl); 12864 12865 ins_cost(300); 12866 format %{ "J$cop,s $labl\t# Loop end" %} 12867 size(2); 12868 ins_encode %{ 12869 Label* L = $labl$$label; 12870 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12871 %} 12872 ins_pipe( pipe_jcc ); 12873 ins_short_branch(1); 12874 %} 12875 12876 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12877 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12878 match(CountedLoopEnd cop cmp); 12879 effect(USE labl); 12880 12881 ins_cost(300); 12882 format %{ "J$cop,us $labl\t# Loop end" %} 12883 size(2); 12884 ins_encode %{ 12885 Label* L = $labl$$label; 12886 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12887 %} 12888 ins_pipe( pipe_jcc ); 12889 ins_short_branch(1); 12890 %} 12891 12892 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12893 match(CountedLoopEnd cop cmp); 12894 effect(USE labl); 12895 12896 ins_cost(300); 12897 format %{ "J$cop,us $labl\t# Loop end" %} 12898 size(2); 12899 ins_encode %{ 12900 Label* L = $labl$$label; 12901 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12902 %} 12903 ins_pipe( pipe_jcc ); 12904 ins_short_branch(1); 12905 %} 12906 12907 // Jump Direct Conditional - using unsigned comparison 12908 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12909 match(If cop cmp); 12910 effect(USE labl); 12911 12912 ins_cost(300); 12913 format %{ "J$cop,us $labl" %} 12914 size(2); 12915 ins_encode %{ 12916 Label* L = $labl$$label; 12917 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12918 %} 12919 ins_pipe( pipe_jcc ); 12920 ins_short_branch(1); 12921 %} 12922 12923 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12924 match(If cop cmp); 12925 effect(USE labl); 12926 12927 ins_cost(300); 12928 format %{ "J$cop,us $labl" %} 12929 size(2); 12930 ins_encode %{ 12931 Label* L = $labl$$label; 12932 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12933 %} 12934 ins_pipe( pipe_jcc ); 12935 ins_short_branch(1); 12936 %} 12937 12938 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12939 match(If cop cmp); 12940 effect(USE labl); 12941 12942 ins_cost(300); 12943 format %{ $$template 12944 if ($cop$$cmpcode == Assembler::notEqual) { 12945 $$emit$$"JP,u,s $labl\n\t" 12946 $$emit$$"J$cop,u,s $labl" 12947 } else { 12948 $$emit$$"JP,u,s done\n\t" 12949 $$emit$$"J$cop,u,s $labl\n\t" 12950 $$emit$$"done:" 12951 } 12952 %} 12953 size(4); 12954 ins_encode %{ 12955 Label* l = $labl$$label; 12956 if ($cop$$cmpcode == Assembler::notEqual) { 12957 __ jccb(Assembler::parity, *l); 12958 __ jccb(Assembler::notEqual, *l); 12959 } else if ($cop$$cmpcode == Assembler::equal) { 12960 Label done; 12961 __ jccb(Assembler::parity, done); 12962 __ jccb(Assembler::equal, *l); 12963 __ bind(done); 12964 } else { 12965 ShouldNotReachHere(); 12966 } 12967 %} 12968 ins_pipe(pipe_jcc); 12969 ins_short_branch(1); 12970 %} 12971 12972 // ============================================================================ 12973 // Long Compare 12974 // 12975 // Currently we hold longs in 2 registers. Comparing such values efficiently 12976 // is tricky. The flavor of compare used depends on whether we are testing 12977 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12978 // The GE test is the negated LT test. The LE test can be had by commuting 12979 // the operands (yielding a GE test) and then negating; negate again for the 12980 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12981 // NE test is negated from that. 12982 12983 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12984 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12985 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12986 // are collapsed internally in the ADLC's dfa-gen code. The match for 12987 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12988 // foo match ends up with the wrong leaf. One fix is to not match both 12989 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12990 // both forms beat the trinary form of long-compare and both are very useful 12991 // on Intel which has so few registers. 12992 12993 // Manifest a CmpL result in an integer register. Very painful. 12994 // This is the test to avoid. 12995 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12996 match(Set dst (CmpL3 src1 src2)); 12997 effect( KILL flags ); 12998 ins_cost(1000); 12999 format %{ "XOR $dst,$dst\n\t" 13000 "CMP $src1.hi,$src2.hi\n\t" 13001 "JLT,s m_one\n\t" 13002 "JGT,s p_one\n\t" 13003 "CMP $src1.lo,$src2.lo\n\t" 13004 "JB,s m_one\n\t" 13005 "JEQ,s done\n" 13006 "p_one:\tINC $dst\n\t" 13007 "JMP,s done\n" 13008 "m_one:\tDEC $dst\n" 13009 "done:" %} 13010 ins_encode %{ 13011 Label p_one, m_one, done; 13012 __ xorptr($dst$$Register, $dst$$Register); 13013 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 13014 __ jccb(Assembler::less, m_one); 13015 __ jccb(Assembler::greater, p_one); 13016 __ cmpl($src1$$Register, $src2$$Register); 13017 __ jccb(Assembler::below, m_one); 13018 __ jccb(Assembler::equal, done); 13019 __ bind(p_one); 13020 __ incrementl($dst$$Register); 13021 __ jmpb(done); 13022 __ bind(m_one); 13023 __ decrementl($dst$$Register); 13024 __ bind(done); 13025 %} 13026 ins_pipe( pipe_slow ); 13027 %} 13028 13029 //====== 13030 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13031 // compares. Can be used for LE or GT compares by reversing arguments. 13032 // NOT GOOD FOR EQ/NE tests. 13033 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 13034 match( Set flags (CmpL src zero )); 13035 ins_cost(100); 13036 format %{ "TEST $src.hi,$src.hi" %} 13037 opcode(0x85); 13038 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 13039 ins_pipe( ialu_cr_reg_reg ); 13040 %} 13041 13042 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13043 // compares. Can be used for LE or GT compares by reversing arguments. 13044 // NOT GOOD FOR EQ/NE tests. 13045 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13046 match( Set flags (CmpL src1 src2 )); 13047 effect( TEMP tmp ); 13048 ins_cost(300); 13049 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13050 "MOV $tmp,$src1.hi\n\t" 13051 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13052 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13053 ins_pipe( ialu_cr_reg_reg ); 13054 %} 13055 13056 // Long compares reg < zero/req OR reg >= zero/req. 13057 // Just a wrapper for a normal branch, plus the predicate test. 13058 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13059 match(If cmp flags); 13060 effect(USE labl); 13061 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13062 expand %{ 13063 jmpCon(cmp,flags,labl); // JLT or JGE... 13064 %} 13065 %} 13066 13067 //====== 13068 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13069 // compares. Can be used for LE or GT compares by reversing arguments. 13070 // NOT GOOD FOR EQ/NE tests. 13071 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13072 match(Set flags (CmpUL src zero)); 13073 ins_cost(100); 13074 format %{ "TEST $src.hi,$src.hi" %} 13075 opcode(0x85); 13076 ins_encode(OpcP, RegReg_Hi2(src, src)); 13077 ins_pipe(ialu_cr_reg_reg); 13078 %} 13079 13080 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13081 // compares. Can be used for LE or GT compares by reversing arguments. 13082 // NOT GOOD FOR EQ/NE tests. 13083 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13084 match(Set flags (CmpUL src1 src2)); 13085 effect(TEMP tmp); 13086 ins_cost(300); 13087 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13088 "MOV $tmp,$src1.hi\n\t" 13089 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13090 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13091 ins_pipe(ialu_cr_reg_reg); 13092 %} 13093 13094 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13095 // Just a wrapper for a normal branch, plus the predicate test. 13096 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13097 match(If cmp flags); 13098 effect(USE labl); 13099 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13100 expand %{ 13101 jmpCon(cmp, flags, labl); // JLT or JGE... 13102 %} 13103 %} 13104 13105 // Compare 2 longs and CMOVE longs. 13106 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13107 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13108 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13109 ins_cost(400); 13110 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13111 "CMOV$cmp $dst.hi,$src.hi" %} 13112 opcode(0x0F,0x40); 13113 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13114 ins_pipe( pipe_cmov_reg_long ); 13115 %} 13116 13117 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13118 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13119 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13120 ins_cost(500); 13121 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13122 "CMOV$cmp $dst.hi,$src.hi" %} 13123 opcode(0x0F,0x40); 13124 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13125 ins_pipe( pipe_cmov_reg_long ); 13126 %} 13127 13128 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 13129 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13130 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13131 ins_cost(400); 13132 expand %{ 13133 cmovLL_reg_LTGE(cmp, flags, dst, src); 13134 %} 13135 %} 13136 13137 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 13138 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13139 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13140 ins_cost(500); 13141 expand %{ 13142 cmovLL_mem_LTGE(cmp, flags, dst, src); 13143 %} 13144 %} 13145 13146 // Compare 2 longs and CMOVE ints. 13147 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13148 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13149 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13150 ins_cost(200); 13151 format %{ "CMOV$cmp $dst,$src" %} 13152 opcode(0x0F,0x40); 13153 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13154 ins_pipe( pipe_cmov_reg ); 13155 %} 13156 13157 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13158 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13159 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13160 ins_cost(250); 13161 format %{ "CMOV$cmp $dst,$src" %} 13162 opcode(0x0F,0x40); 13163 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13164 ins_pipe( pipe_cmov_mem ); 13165 %} 13166 13167 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 13168 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13169 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13170 ins_cost(200); 13171 expand %{ 13172 cmovII_reg_LTGE(cmp, flags, dst, src); 13173 %} 13174 %} 13175 13176 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 13177 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13178 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13179 ins_cost(250); 13180 expand %{ 13181 cmovII_mem_LTGE(cmp, flags, dst, src); 13182 %} 13183 %} 13184 13185 // Compare 2 longs and CMOVE ptrs. 13186 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13187 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13188 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13189 ins_cost(200); 13190 format %{ "CMOV$cmp $dst,$src" %} 13191 opcode(0x0F,0x40); 13192 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13193 ins_pipe( pipe_cmov_reg ); 13194 %} 13195 13196 // Compare 2 unsigned longs and CMOVE ptrs. 13197 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 13198 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13199 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13200 ins_cost(200); 13201 expand %{ 13202 cmovPP_reg_LTGE(cmp,flags,dst,src); 13203 %} 13204 %} 13205 13206 // Compare 2 longs and CMOVE doubles 13207 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13208 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13209 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13210 ins_cost(200); 13211 expand %{ 13212 fcmovDPR_regS(cmp,flags,dst,src); 13213 %} 13214 %} 13215 13216 // Compare 2 longs and CMOVE doubles 13217 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13218 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13219 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13220 ins_cost(200); 13221 expand %{ 13222 fcmovD_regS(cmp,flags,dst,src); 13223 %} 13224 %} 13225 13226 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13227 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13228 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13229 ins_cost(200); 13230 expand %{ 13231 fcmovFPR_regS(cmp,flags,dst,src); 13232 %} 13233 %} 13234 13235 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13236 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13237 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13238 ins_cost(200); 13239 expand %{ 13240 fcmovF_regS(cmp,flags,dst,src); 13241 %} 13242 %} 13243 13244 //====== 13245 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13246 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13247 match( Set flags (CmpL src zero )); 13248 effect(TEMP tmp); 13249 ins_cost(200); 13250 format %{ "MOV $tmp,$src.lo\n\t" 13251 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13252 ins_encode( long_cmp_flags0( src, tmp ) ); 13253 ins_pipe( ialu_reg_reg_long ); 13254 %} 13255 13256 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13257 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13258 match( Set flags (CmpL src1 src2 )); 13259 ins_cost(200+300); 13260 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13261 "JNE,s skip\n\t" 13262 "CMP $src1.hi,$src2.hi\n\t" 13263 "skip:\t" %} 13264 ins_encode( long_cmp_flags1( src1, src2 ) ); 13265 ins_pipe( ialu_cr_reg_reg ); 13266 %} 13267 13268 // Long compare reg == zero/reg OR reg != zero/reg 13269 // Just a wrapper for a normal branch, plus the predicate test. 13270 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13271 match(If cmp flags); 13272 effect(USE labl); 13273 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13274 expand %{ 13275 jmpCon(cmp,flags,labl); // JEQ or JNE... 13276 %} 13277 %} 13278 13279 //====== 13280 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13281 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13282 match(Set flags (CmpUL src zero)); 13283 effect(TEMP tmp); 13284 ins_cost(200); 13285 format %{ "MOV $tmp,$src.lo\n\t" 13286 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13287 ins_encode(long_cmp_flags0(src, tmp)); 13288 ins_pipe(ialu_reg_reg_long); 13289 %} 13290 13291 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13292 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13293 match(Set flags (CmpUL src1 src2)); 13294 ins_cost(200+300); 13295 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13296 "JNE,s skip\n\t" 13297 "CMP $src1.hi,$src2.hi\n\t" 13298 "skip:\t" %} 13299 ins_encode(long_cmp_flags1(src1, src2)); 13300 ins_pipe(ialu_cr_reg_reg); 13301 %} 13302 13303 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13304 // Just a wrapper for a normal branch, plus the predicate test. 13305 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13306 match(If cmp flags); 13307 effect(USE labl); 13308 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13309 expand %{ 13310 jmpCon(cmp, flags, labl); // JEQ or JNE... 13311 %} 13312 %} 13313 13314 // Compare 2 longs and CMOVE longs. 13315 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13316 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13317 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13318 ins_cost(400); 13319 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13320 "CMOV$cmp $dst.hi,$src.hi" %} 13321 opcode(0x0F,0x40); 13322 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13323 ins_pipe( pipe_cmov_reg_long ); 13324 %} 13325 13326 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13327 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13328 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13329 ins_cost(500); 13330 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13331 "CMOV$cmp $dst.hi,$src.hi" %} 13332 opcode(0x0F,0x40); 13333 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13334 ins_pipe( pipe_cmov_reg_long ); 13335 %} 13336 13337 // Compare 2 longs and CMOVE ints. 13338 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13339 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13340 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13341 ins_cost(200); 13342 format %{ "CMOV$cmp $dst,$src" %} 13343 opcode(0x0F,0x40); 13344 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13345 ins_pipe( pipe_cmov_reg ); 13346 %} 13347 13348 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13349 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13350 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13351 ins_cost(250); 13352 format %{ "CMOV$cmp $dst,$src" %} 13353 opcode(0x0F,0x40); 13354 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13355 ins_pipe( pipe_cmov_mem ); 13356 %} 13357 13358 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13359 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13360 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13361 ins_cost(200); 13362 expand %{ 13363 cmovII_reg_EQNE(cmp, flags, dst, src); 13364 %} 13365 %} 13366 13367 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13368 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13369 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13370 ins_cost(250); 13371 expand %{ 13372 cmovII_mem_EQNE(cmp, flags, dst, src); 13373 %} 13374 %} 13375 13376 // Compare 2 longs and CMOVE ptrs. 13377 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13378 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13379 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13380 ins_cost(200); 13381 format %{ "CMOV$cmp $dst,$src" %} 13382 opcode(0x0F,0x40); 13383 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13384 ins_pipe( pipe_cmov_reg ); 13385 %} 13386 13387 // Compare 2 unsigned longs and CMOVE ptrs. 13388 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13389 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13390 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13391 ins_cost(200); 13392 expand %{ 13393 cmovPP_reg_EQNE(cmp,flags,dst,src); 13394 %} 13395 %} 13396 13397 // Compare 2 longs and CMOVE doubles 13398 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13399 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13400 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13401 ins_cost(200); 13402 expand %{ 13403 fcmovDPR_regS(cmp,flags,dst,src); 13404 %} 13405 %} 13406 13407 // Compare 2 longs and CMOVE doubles 13408 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13409 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13410 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13411 ins_cost(200); 13412 expand %{ 13413 fcmovD_regS(cmp,flags,dst,src); 13414 %} 13415 %} 13416 13417 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13418 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13419 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13420 ins_cost(200); 13421 expand %{ 13422 fcmovFPR_regS(cmp,flags,dst,src); 13423 %} 13424 %} 13425 13426 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13427 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13428 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13429 ins_cost(200); 13430 expand %{ 13431 fcmovF_regS(cmp,flags,dst,src); 13432 %} 13433 %} 13434 13435 //====== 13436 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13437 // Same as cmpL_reg_flags_LEGT except must negate src 13438 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13439 match( Set flags (CmpL src zero )); 13440 effect( TEMP tmp ); 13441 ins_cost(300); 13442 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13443 "CMP $tmp,$src.lo\n\t" 13444 "SBB $tmp,$src.hi\n\t" %} 13445 ins_encode( long_cmp_flags3(src, tmp) ); 13446 ins_pipe( ialu_reg_reg_long ); 13447 %} 13448 13449 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13450 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13451 // requires a commuted test to get the same result. 13452 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13453 match( Set flags (CmpL src1 src2 )); 13454 effect( TEMP tmp ); 13455 ins_cost(300); 13456 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13457 "MOV $tmp,$src2.hi\n\t" 13458 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13459 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13460 ins_pipe( ialu_cr_reg_reg ); 13461 %} 13462 13463 // Long compares reg < zero/req OR reg >= zero/req. 13464 // Just a wrapper for a normal branch, plus the predicate test 13465 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13466 match(If cmp flags); 13467 effect(USE labl); 13468 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13469 ins_cost(300); 13470 expand %{ 13471 jmpCon(cmp,flags,labl); // JGT or JLE... 13472 %} 13473 %} 13474 13475 //====== 13476 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13477 // Same as cmpUL_reg_flags_LEGT except must negate src 13478 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13479 match(Set flags (CmpUL src zero)); 13480 effect(TEMP tmp); 13481 ins_cost(300); 13482 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13483 "CMP $tmp,$src.lo\n\t" 13484 "SBB $tmp,$src.hi\n\t" %} 13485 ins_encode(long_cmp_flags3(src, tmp)); 13486 ins_pipe(ialu_reg_reg_long); 13487 %} 13488 13489 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13490 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13491 // requires a commuted test to get the same result. 13492 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13493 match(Set flags (CmpUL src1 src2)); 13494 effect(TEMP tmp); 13495 ins_cost(300); 13496 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13497 "MOV $tmp,$src2.hi\n\t" 13498 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13499 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13500 ins_pipe(ialu_cr_reg_reg); 13501 %} 13502 13503 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13504 // Just a wrapper for a normal branch, plus the predicate test 13505 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13506 match(If cmp flags); 13507 effect(USE labl); 13508 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13509 ins_cost(300); 13510 expand %{ 13511 jmpCon(cmp, flags, labl); // JGT or JLE... 13512 %} 13513 %} 13514 13515 // Compare 2 longs and CMOVE longs. 13516 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13517 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13518 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13519 ins_cost(400); 13520 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13521 "CMOV$cmp $dst.hi,$src.hi" %} 13522 opcode(0x0F,0x40); 13523 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13524 ins_pipe( pipe_cmov_reg_long ); 13525 %} 13526 13527 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13528 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13529 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13530 ins_cost(500); 13531 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13532 "CMOV$cmp $dst.hi,$src.hi+4" %} 13533 opcode(0x0F,0x40); 13534 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13535 ins_pipe( pipe_cmov_reg_long ); 13536 %} 13537 13538 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13539 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13540 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13541 ins_cost(400); 13542 expand %{ 13543 cmovLL_reg_LEGT(cmp, flags, dst, src); 13544 %} 13545 %} 13546 13547 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13548 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13549 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13550 ins_cost(500); 13551 expand %{ 13552 cmovLL_mem_LEGT(cmp, flags, dst, src); 13553 %} 13554 %} 13555 13556 // Compare 2 longs and CMOVE ints. 13557 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13558 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13559 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13560 ins_cost(200); 13561 format %{ "CMOV$cmp $dst,$src" %} 13562 opcode(0x0F,0x40); 13563 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13564 ins_pipe( pipe_cmov_reg ); 13565 %} 13566 13567 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13568 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13569 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13570 ins_cost(250); 13571 format %{ "CMOV$cmp $dst,$src" %} 13572 opcode(0x0F,0x40); 13573 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13574 ins_pipe( pipe_cmov_mem ); 13575 %} 13576 13577 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13578 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13579 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13580 ins_cost(200); 13581 expand %{ 13582 cmovII_reg_LEGT(cmp, flags, dst, src); 13583 %} 13584 %} 13585 13586 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13587 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13588 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13589 ins_cost(250); 13590 expand %{ 13591 cmovII_mem_LEGT(cmp, flags, dst, src); 13592 %} 13593 %} 13594 13595 // Compare 2 longs and CMOVE ptrs. 13596 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13597 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13598 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13599 ins_cost(200); 13600 format %{ "CMOV$cmp $dst,$src" %} 13601 opcode(0x0F,0x40); 13602 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13603 ins_pipe( pipe_cmov_reg ); 13604 %} 13605 13606 // Compare 2 unsigned longs and CMOVE ptrs. 13607 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13608 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13609 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13610 ins_cost(200); 13611 expand %{ 13612 cmovPP_reg_LEGT(cmp,flags,dst,src); 13613 %} 13614 %} 13615 13616 // Compare 2 longs and CMOVE doubles 13617 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13618 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13619 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13620 ins_cost(200); 13621 expand %{ 13622 fcmovDPR_regS(cmp,flags,dst,src); 13623 %} 13624 %} 13625 13626 // Compare 2 longs and CMOVE doubles 13627 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13628 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13629 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13630 ins_cost(200); 13631 expand %{ 13632 fcmovD_regS(cmp,flags,dst,src); 13633 %} 13634 %} 13635 13636 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13637 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13638 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13639 ins_cost(200); 13640 expand %{ 13641 fcmovFPR_regS(cmp,flags,dst,src); 13642 %} 13643 %} 13644 13645 13646 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13647 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13648 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13649 ins_cost(200); 13650 expand %{ 13651 fcmovF_regS(cmp,flags,dst,src); 13652 %} 13653 %} 13654 13655 13656 // ============================================================================ 13657 // Procedure Call/Return Instructions 13658 // Call Java Static Instruction 13659 // Note: If this code changes, the corresponding ret_addr_offset() and 13660 // compute_padding() functions will have to be adjusted. 13661 instruct CallStaticJavaDirect(method meth) %{ 13662 match(CallStaticJava); 13663 effect(USE meth); 13664 13665 ins_cost(300); 13666 format %{ "CALL,static " %} 13667 opcode(0xE8); /* E8 cd */ 13668 ins_encode( pre_call_resets, 13669 Java_Static_Call( meth ), 13670 call_epilog, 13671 post_call_FPU ); 13672 ins_pipe( pipe_slow ); 13673 ins_alignment(4); 13674 %} 13675 13676 // Call Java Dynamic Instruction 13677 // Note: If this code changes, the corresponding ret_addr_offset() and 13678 // compute_padding() functions will have to be adjusted. 13679 instruct CallDynamicJavaDirect(method meth) %{ 13680 match(CallDynamicJava); 13681 effect(USE meth); 13682 13683 ins_cost(300); 13684 format %{ "MOV EAX,(oop)-1\n\t" 13685 "CALL,dynamic" %} 13686 opcode(0xE8); /* E8 cd */ 13687 ins_encode( pre_call_resets, 13688 Java_Dynamic_Call( meth ), 13689 call_epilog, 13690 post_call_FPU ); 13691 ins_pipe( pipe_slow ); 13692 ins_alignment(4); 13693 %} 13694 13695 // Call Runtime Instruction 13696 instruct CallRuntimeDirect(method meth) %{ 13697 match(CallRuntime ); 13698 effect(USE meth); 13699 13700 ins_cost(300); 13701 format %{ "CALL,runtime " %} 13702 opcode(0xE8); /* E8 cd */ 13703 // Use FFREEs to clear entries in float stack 13704 ins_encode( pre_call_resets, 13705 FFree_Float_Stack_All, 13706 Java_To_Runtime( meth ), 13707 post_call_FPU ); 13708 ins_pipe( pipe_slow ); 13709 %} 13710 13711 // Call runtime without safepoint 13712 instruct CallLeafDirect(method meth) %{ 13713 match(CallLeaf); 13714 effect(USE meth); 13715 13716 ins_cost(300); 13717 format %{ "CALL_LEAF,runtime " %} 13718 opcode(0xE8); /* E8 cd */ 13719 ins_encode( pre_call_resets, 13720 FFree_Float_Stack_All, 13721 Java_To_Runtime( meth ), 13722 Verify_FPU_For_Leaf, post_call_FPU ); 13723 ins_pipe( pipe_slow ); 13724 %} 13725 13726 instruct CallLeafNoFPDirect(method meth) %{ 13727 match(CallLeafNoFP); 13728 effect(USE meth); 13729 13730 ins_cost(300); 13731 format %{ "CALL_LEAF_NOFP,runtime " %} 13732 opcode(0xE8); /* E8 cd */ 13733 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13734 ins_pipe( pipe_slow ); 13735 %} 13736 13737 13738 // Return Instruction 13739 // Remove the return address & jump to it. 13740 instruct Ret() %{ 13741 match(Return); 13742 format %{ "RET" %} 13743 opcode(0xC3); 13744 ins_encode(OpcP); 13745 ins_pipe( pipe_jmp ); 13746 %} 13747 13748 // Tail Call; Jump from runtime stub to Java code. 13749 // Also known as an 'interprocedural jump'. 13750 // Target of jump will eventually return to caller. 13751 // TailJump below removes the return address. 13752 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13753 match(TailCall jump_target method_ptr); 13754 ins_cost(300); 13755 format %{ "JMP $jump_target \t# EBX holds method" %} 13756 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13757 ins_encode( OpcP, RegOpc(jump_target) ); 13758 ins_pipe( pipe_jmp ); 13759 %} 13760 13761 13762 // Tail Jump; remove the return address; jump to target. 13763 // TailCall above leaves the return address around. 13764 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13765 match( TailJump jump_target ex_oop ); 13766 ins_cost(300); 13767 format %{ "POP EDX\t# pop return address into dummy\n\t" 13768 "JMP $jump_target " %} 13769 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13770 ins_encode( enc_pop_rdx, 13771 OpcP, RegOpc(jump_target) ); 13772 ins_pipe( pipe_jmp ); 13773 %} 13774 13775 // Create exception oop: created by stack-crawling runtime code. 13776 // Created exception is now available to this handler, and is setup 13777 // just prior to jumping to this handler. No code emitted. 13778 instruct CreateException( eAXRegP ex_oop ) 13779 %{ 13780 match(Set ex_oop (CreateEx)); 13781 13782 size(0); 13783 // use the following format syntax 13784 format %{ "# exception oop is in EAX; no code emitted" %} 13785 ins_encode(); 13786 ins_pipe( empty ); 13787 %} 13788 13789 13790 // Rethrow exception: 13791 // The exception oop will come in the first argument position. 13792 // Then JUMP (not call) to the rethrow stub code. 13793 instruct RethrowException() 13794 %{ 13795 match(Rethrow); 13796 13797 // use the following format syntax 13798 format %{ "JMP rethrow_stub" %} 13799 ins_encode(enc_rethrow); 13800 ins_pipe( pipe_jmp ); 13801 %} 13802 13803 // inlined locking and unlocking 13804 13805 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13806 predicate(Compile::current()->use_rtm()); 13807 match(Set cr (FastLock object box)); 13808 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13809 ins_cost(300); 13810 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13811 ins_encode %{ 13812 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13813 $scr$$Register, $cx1$$Register, $cx2$$Register, 13814 _rtm_counters, _stack_rtm_counters, 13815 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13816 true, ra_->C->profile_rtm()); 13817 %} 13818 ins_pipe(pipe_slow); 13819 %} 13820 13821 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13822 predicate(!Compile::current()->use_rtm()); 13823 match(Set cr (FastLock object box)); 13824 effect(TEMP tmp, TEMP scr, USE_KILL box); 13825 ins_cost(300); 13826 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13827 ins_encode %{ 13828 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13829 $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false); 13830 %} 13831 ins_pipe(pipe_slow); 13832 %} 13833 13834 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13835 match(Set cr (FastUnlock object box)); 13836 effect(TEMP tmp, USE_KILL box); 13837 ins_cost(300); 13838 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13839 ins_encode %{ 13840 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13841 %} 13842 ins_pipe(pipe_slow); 13843 %} 13844 13845 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13846 predicate(Matcher::vector_length(n) <= 32); 13847 match(Set dst (MaskAll src)); 13848 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13849 ins_encode %{ 13850 int mask_len = Matcher::vector_length(this); 13851 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13852 %} 13853 ins_pipe( pipe_slow ); 13854 %} 13855 13856 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13857 predicate(Matcher::vector_length(n) > 32); 13858 match(Set dst (MaskAll src)); 13859 effect(TEMP ktmp); 13860 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13861 ins_encode %{ 13862 int mask_len = Matcher::vector_length(this); 13863 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13864 %} 13865 ins_pipe( pipe_slow ); 13866 %} 13867 13868 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13869 predicate(Matcher::vector_length(n) > 32); 13870 match(Set dst (MaskAll src)); 13871 effect(TEMP ktmp); 13872 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13873 ins_encode %{ 13874 int mask_len = Matcher::vector_length(this); 13875 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13876 %} 13877 ins_pipe( pipe_slow ); 13878 %} 13879 13880 // ============================================================================ 13881 // Safepoint Instruction 13882 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13883 match(SafePoint poll); 13884 effect(KILL cr, USE poll); 13885 13886 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13887 ins_cost(125); 13888 // EBP would need size(3) 13889 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13890 ins_encode %{ 13891 __ relocate(relocInfo::poll_type); 13892 address pre_pc = __ pc(); 13893 __ testl(rax, Address($poll$$Register, 0)); 13894 address post_pc = __ pc(); 13895 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13896 %} 13897 ins_pipe(ialu_reg_mem); 13898 %} 13899 13900 13901 // ============================================================================ 13902 // This name is KNOWN by the ADLC and cannot be changed. 13903 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13904 // for this guy. 13905 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13906 match(Set dst (ThreadLocal)); 13907 effect(DEF dst, KILL cr); 13908 13909 format %{ "MOV $dst, Thread::current()" %} 13910 ins_encode %{ 13911 Register dstReg = as_Register($dst$$reg); 13912 __ get_thread(dstReg); 13913 %} 13914 ins_pipe( ialu_reg_fat ); 13915 %} 13916 13917 13918 13919 //----------PEEPHOLE RULES----------------------------------------------------- 13920 // These must follow all instruction definitions as they use the names 13921 // defined in the instructions definitions. 13922 // 13923 // peepmatch ( root_instr_name [preceding_instruction]* ); 13924 // 13925 // peepconstraint %{ 13926 // (instruction_number.operand_name relational_op instruction_number.operand_name 13927 // [, ...] ); 13928 // // instruction numbers are zero-based using left to right order in peepmatch 13929 // 13930 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13931 // // provide an instruction_number.operand_name for each operand that appears 13932 // // in the replacement instruction's match rule 13933 // 13934 // ---------VM FLAGS--------------------------------------------------------- 13935 // 13936 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13937 // 13938 // Each peephole rule is given an identifying number starting with zero and 13939 // increasing by one in the order seen by the parser. An individual peephole 13940 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13941 // on the command-line. 13942 // 13943 // ---------CURRENT LIMITATIONS---------------------------------------------- 13944 // 13945 // Only match adjacent instructions in same basic block 13946 // Only equality constraints 13947 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13948 // Only one replacement instruction 13949 // 13950 // ---------EXAMPLE---------------------------------------------------------- 13951 // 13952 // // pertinent parts of existing instructions in architecture description 13953 // instruct movI(rRegI dst, rRegI src) %{ 13954 // match(Set dst (CopyI src)); 13955 // %} 13956 // 13957 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13958 // match(Set dst (AddI dst src)); 13959 // effect(KILL cr); 13960 // %} 13961 // 13962 // // Change (inc mov) to lea 13963 // peephole %{ 13964 // // increment preceded by register-register move 13965 // peepmatch ( incI_eReg movI ); 13966 // // require that the destination register of the increment 13967 // // match the destination register of the move 13968 // peepconstraint ( 0.dst == 1.dst ); 13969 // // construct a replacement instruction that sets 13970 // // the destination to ( move's source register + one ) 13971 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13972 // %} 13973 // 13974 // Implementation no longer uses movX instructions since 13975 // machine-independent system no longer uses CopyX nodes. 13976 // 13977 // peephole %{ 13978 // peepmatch ( incI_eReg movI ); 13979 // peepconstraint ( 0.dst == 1.dst ); 13980 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13981 // %} 13982 // 13983 // peephole %{ 13984 // peepmatch ( decI_eReg movI ); 13985 // peepconstraint ( 0.dst == 1.dst ); 13986 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13987 // %} 13988 // 13989 // peephole %{ 13990 // peepmatch ( addI_eReg_imm movI ); 13991 // peepconstraint ( 0.dst == 1.dst ); 13992 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13993 // %} 13994 // 13995 // peephole %{ 13996 // peepmatch ( addP_eReg_imm movP ); 13997 // peepconstraint ( 0.dst == 1.dst ); 13998 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13999 // %} 14000 14001 // // Change load of spilled value to only a spill 14002 // instruct storeI(memory mem, rRegI src) %{ 14003 // match(Set mem (StoreI mem src)); 14004 // %} 14005 // 14006 // instruct loadI(rRegI dst, memory mem) %{ 14007 // match(Set dst (LoadI mem)); 14008 // %} 14009 // 14010 peephole %{ 14011 peepmatch ( loadI storeI ); 14012 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 14013 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 14014 %} 14015 14016 //----------SMARTSPILL RULES--------------------------------------------------- 14017 // These must follow all instruction definitions as they use the names 14018 // defined in the instructions definitions.