1 // 2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ _masm. 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 cbuf.insts()->emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 cbuf.insts()->emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(CodeBuffer &cbuf, int code) { 353 cbuf.insts()->emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 358 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 359 emit_opcode(cbuf, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(CodeBuffer &cbuf, int d8) { 364 cbuf.insts()->emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(CodeBuffer &cbuf, int d16) { 369 cbuf.insts()->emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(CodeBuffer &cbuf, int d32) { 374 cbuf.insts()->emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 379 int format) { 380 cbuf.relocate(cbuf.insts_mark(), reloc, format); 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 cbuf.relocate(cbuf.insts_mark(), rspec, format); 393 cbuf.insts()->emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 398 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (cbuf, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(cbuf, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(cbuf, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(cbuf, 0x1, reg_encoding, base); 423 emit_d8(cbuf, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (cbuf, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(cbuf, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (cbuf, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 450 emit_rm(cbuf, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 456 emit_rm(cbuf, scale, index, base); 457 emit_d8(cbuf, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 462 emit_rm(cbuf, scale, index, 0x04); 463 } else { 464 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 465 emit_rm(cbuf, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (cbuf, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( cbuf, 0x8B ); 483 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler& _masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 C2_MacroAssembler _masm(&cbuf); 613 614 __ verified_entry(C); 615 616 C->output()->set_frame_complete(cbuf.insts_size()); 617 618 if (C->has_mach_constant_base_node()) { 619 // NOTE: We set the table base offset here because users might be 620 // emitted before MachConstantBaseNode. 621 ConstantTable& constant_table = C->output()->constant_table(); 622 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 623 } 624 } 625 626 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 627 return MachNode::size(ra_); // too many variables; just compute it the hard way 628 } 629 630 int MachPrologNode::reloc() const { 631 return 0; // a large enough number 632 } 633 634 //============================================================================= 635 #ifndef PRODUCT 636 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 637 Compile *C = ra_->C; 638 int framesize = C->output()->frame_size_in_bytes(); 639 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 640 // Remove two words for return addr and rbp, 641 framesize -= 2*wordSize; 642 643 if (C->max_vector_size() > 16) { 644 st->print("VZEROUPPER"); 645 st->cr(); st->print("\t"); 646 } 647 if (C->in_24_bit_fp_mode()) { 648 st->print("FLDCW standard control word"); 649 st->cr(); st->print("\t"); 650 } 651 if (framesize) { 652 st->print("ADD ESP,%d\t# Destroy frame",framesize); 653 st->cr(); st->print("\t"); 654 } 655 st->print_cr("POPL EBP"); st->print("\t"); 656 if (do_polling() && C->is_method_compilation()) { 657 st->print("CMPL rsp, poll_offset[thread] \n\t" 658 "JA #safepoint_stub\t" 659 "# Safepoint: poll for GC"); 660 } 661 } 662 #endif 663 664 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 665 Compile *C = ra_->C; 666 MacroAssembler _masm(&cbuf); 667 668 if (C->max_vector_size() > 16) { 669 // Clear upper bits of YMM registers when current compiled code uses 670 // wide vectors to avoid AVX <-> SSE transition penalty during call. 671 _masm.vzeroupper(); 672 } 673 // If method set FPU control word, restore to standard control word 674 if (C->in_24_bit_fp_mode()) { 675 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 676 } 677 678 int framesize = C->output()->frame_size_in_bytes(); 679 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 680 // Remove two words for return addr and rbp, 681 framesize -= 2*wordSize; 682 683 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 684 685 if (framesize >= 128) { 686 emit_opcode(cbuf, 0x81); // add SP, #framesize 687 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 688 emit_d32(cbuf, framesize); 689 } else if (framesize) { 690 emit_opcode(cbuf, 0x83); // add SP, #framesize 691 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 692 emit_d8(cbuf, framesize); 693 } 694 695 emit_opcode(cbuf, 0x58 | EBP_enc); 696 697 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 698 __ reserved_stack_check(); 699 } 700 701 if (do_polling() && C->is_method_compilation()) { 702 Register thread = as_Register(EBX_enc); 703 MacroAssembler masm(&cbuf); 704 __ get_thread(thread); 705 Label dummy_label; 706 Label* code_stub = &dummy_label; 707 if (!C->output()->in_scratch_emit_size()) { 708 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 709 C->output()->add_stub(stub); 710 code_stub = &stub->entry(); 711 } 712 __ relocate(relocInfo::poll_return_type); 713 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 714 } 715 } 716 717 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 718 return MachNode::size(ra_); // too many variables; just compute it 719 // the hard way 720 } 721 722 int MachEpilogNode::reloc() const { 723 return 0; // a large enough number 724 } 725 726 const Pipeline * MachEpilogNode::pipeline() const { 727 return MachNode::pipeline_class(); 728 } 729 730 //============================================================================= 731 732 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 733 static enum RC rc_class( OptoReg::Name reg ) { 734 735 if( !OptoReg::is_valid(reg) ) return rc_bad; 736 if (OptoReg::is_stack(reg)) return rc_stack; 737 738 VMReg r = OptoReg::as_VMReg(reg); 739 if (r->is_Register()) return rc_int; 740 if (r->is_FloatRegister()) { 741 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 742 return rc_float; 743 } 744 if (r->is_KRegister()) return rc_kreg; 745 assert(r->is_XMMRegister(), "must be"); 746 return rc_xmm; 747 } 748 749 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 750 int opcode, const char *op_str, int size, outputStream* st ) { 751 if( cbuf ) { 752 emit_opcode (*cbuf, opcode ); 753 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 754 #ifndef PRODUCT 755 } else if( !do_size ) { 756 if( size != 0 ) st->print("\n\t"); 757 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 758 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 759 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 760 } else { // FLD, FST, PUSH, POP 761 st->print("%s [ESP + #%d]",op_str,offset); 762 } 763 #endif 764 } 765 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 766 return size+3+offset_size; 767 } 768 769 // Helper for XMM registers. Extra opcode bits, limited syntax. 770 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 771 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 772 int in_size_in_bits = Assembler::EVEX_32bit; 773 int evex_encoding = 0; 774 if (reg_lo+1 == reg_hi) { 775 in_size_in_bits = Assembler::EVEX_64bit; 776 evex_encoding = Assembler::VEX_W; 777 } 778 if (cbuf) { 779 MacroAssembler _masm(cbuf); 780 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 781 // it maps more cases to single byte displacement 782 _masm.set_managed(); 783 if (reg_lo+1 == reg_hi) { // double move? 784 if (is_load) { 785 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 786 } else { 787 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 788 } 789 } else { 790 if (is_load) { 791 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 792 } else { 793 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 794 } 795 } 796 #ifndef PRODUCT 797 } else if (!do_size) { 798 if (size != 0) st->print("\n\t"); 799 if (reg_lo+1 == reg_hi) { // double move? 800 if (is_load) st->print("%s %s,[ESP + #%d]", 801 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 802 Matcher::regName[reg_lo], offset); 803 else st->print("MOVSD [ESP + #%d],%s", 804 offset, Matcher::regName[reg_lo]); 805 } else { 806 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 807 Matcher::regName[reg_lo], offset); 808 else st->print("MOVSS [ESP + #%d],%s", 809 offset, Matcher::regName[reg_lo]); 810 } 811 #endif 812 } 813 bool is_single_byte = false; 814 if ((UseAVX > 2) && (offset != 0)) { 815 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 816 } 817 int offset_size = 0; 818 if (UseAVX > 2 ) { 819 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 820 } else { 821 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 822 } 823 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 824 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 825 return size+5+offset_size; 826 } 827 828 829 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 830 int src_hi, int dst_hi, int size, outputStream* st ) { 831 if (cbuf) { 832 MacroAssembler _masm(cbuf); 833 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 834 _masm.set_managed(); 835 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 836 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 837 as_XMMRegister(Matcher::_regEncode[src_lo])); 838 } else { 839 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 840 as_XMMRegister(Matcher::_regEncode[src_lo])); 841 } 842 #ifndef PRODUCT 843 } else if (!do_size) { 844 if (size != 0) st->print("\n\t"); 845 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 846 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 847 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 848 } else { 849 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 850 } 851 } else { 852 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 853 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 854 } else { 855 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 856 } 857 } 858 #endif 859 } 860 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 861 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 862 int sz = (UseAVX > 2) ? 6 : 4; 863 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 864 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 865 return size + sz; 866 } 867 868 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 869 int src_hi, int dst_hi, int size, outputStream* st ) { 870 // 32-bit 871 if (cbuf) { 872 MacroAssembler _masm(cbuf); 873 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 874 _masm.set_managed(); 875 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 876 as_Register(Matcher::_regEncode[src_lo])); 877 #ifndef PRODUCT 878 } else if (!do_size) { 879 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 880 #endif 881 } 882 return (UseAVX> 2) ? 6 : 4; 883 } 884 885 886 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 887 int src_hi, int dst_hi, int size, outputStream* st ) { 888 // 32-bit 889 if (cbuf) { 890 MacroAssembler _masm(cbuf); 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 _masm.set_managed(); 893 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 894 as_XMMRegister(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 904 if( cbuf ) { 905 emit_opcode(*cbuf, 0x8B ); 906 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 907 #ifndef PRODUCT 908 } else if( !do_size ) { 909 if( size != 0 ) st->print("\n\t"); 910 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 911 #endif 912 } 913 return size+2; 914 } 915 916 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 917 int offset, int size, outputStream* st ) { 918 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 919 if( cbuf ) { 920 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 921 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 922 #ifndef PRODUCT 923 } else if( !do_size ) { 924 if( size != 0 ) st->print("\n\t"); 925 st->print("FLD %s",Matcher::regName[src_lo]); 926 #endif 927 } 928 size += 2; 929 } 930 931 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 932 const char *op_str; 933 int op; 934 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 935 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 936 op = 0xDD; 937 } else { // 32-bit store 938 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 939 op = 0xD9; 940 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 941 } 942 943 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 944 } 945 946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 947 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 948 int src_hi, int dst_hi, uint ireg, outputStream* st); 949 950 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 951 int stack_offset, int reg, uint ireg, outputStream* st); 952 953 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 954 int dst_offset, uint ireg, outputStream* st) { 955 if (cbuf) { 956 MacroAssembler _masm(cbuf); 957 switch (ireg) { 958 case Op_VecS: 959 __ pushl(Address(rsp, src_offset)); 960 __ popl (Address(rsp, dst_offset)); 961 break; 962 case Op_VecD: 963 __ pushl(Address(rsp, src_offset)); 964 __ popl (Address(rsp, dst_offset)); 965 __ pushl(Address(rsp, src_offset+4)); 966 __ popl (Address(rsp, dst_offset+4)); 967 break; 968 case Op_VecX: 969 __ movdqu(Address(rsp, -16), xmm0); 970 __ movdqu(xmm0, Address(rsp, src_offset)); 971 __ movdqu(Address(rsp, dst_offset), xmm0); 972 __ movdqu(xmm0, Address(rsp, -16)); 973 break; 974 case Op_VecY: 975 __ vmovdqu(Address(rsp, -32), xmm0); 976 __ vmovdqu(xmm0, Address(rsp, src_offset)); 977 __ vmovdqu(Address(rsp, dst_offset), xmm0); 978 __ vmovdqu(xmm0, Address(rsp, -32)); 979 break; 980 case Op_VecZ: 981 __ evmovdquq(Address(rsp, -64), xmm0, 2); 982 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 983 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 984 __ evmovdquq(xmm0, Address(rsp, -64), 2); 985 break; 986 default: 987 ShouldNotReachHere(); 988 } 989 #ifndef PRODUCT 990 } else { 991 switch (ireg) { 992 case Op_VecS: 993 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 994 "popl [rsp + #%d]", 995 src_offset, dst_offset); 996 break; 997 case Op_VecD: 998 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 999 "popq [rsp + #%d]\n\t" 1000 "pushl [rsp + #%d]\n\t" 1001 "popq [rsp + #%d]", 1002 src_offset, dst_offset, src_offset+4, dst_offset+4); 1003 break; 1004 case Op_VecX: 1005 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1006 "movdqu xmm0, [rsp + #%d]\n\t" 1007 "movdqu [rsp + #%d], xmm0\n\t" 1008 "movdqu xmm0, [rsp - #16]", 1009 src_offset, dst_offset); 1010 break; 1011 case Op_VecY: 1012 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1013 "vmovdqu xmm0, [rsp + #%d]\n\t" 1014 "vmovdqu [rsp + #%d], xmm0\n\t" 1015 "vmovdqu xmm0, [rsp - #32]", 1016 src_offset, dst_offset); 1017 break; 1018 case Op_VecZ: 1019 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1020 "vmovdqu xmm0, [rsp + #%d]\n\t" 1021 "vmovdqu [rsp + #%d], xmm0\n\t" 1022 "vmovdqu xmm0, [rsp - #64]", 1023 src_offset, dst_offset); 1024 break; 1025 default: 1026 ShouldNotReachHere(); 1027 } 1028 #endif 1029 } 1030 } 1031 1032 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1033 // Get registers to move 1034 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1035 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1036 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1037 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1038 1039 enum RC src_second_rc = rc_class(src_second); 1040 enum RC src_first_rc = rc_class(src_first); 1041 enum RC dst_second_rc = rc_class(dst_second); 1042 enum RC dst_first_rc = rc_class(dst_first); 1043 1044 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1045 1046 // Generate spill code! 1047 int size = 0; 1048 1049 if( src_first == dst_first && src_second == dst_second ) 1050 return size; // Self copy, no move 1051 1052 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) { 1053 uint ireg = ideal_reg(); 1054 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1055 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1056 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1057 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1058 // mem -> mem 1059 int src_offset = ra_->reg2offset(src_first); 1060 int dst_offset = ra_->reg2offset(dst_first); 1061 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1062 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1063 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1064 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1065 int stack_offset = ra_->reg2offset(dst_first); 1066 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1067 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1068 int stack_offset = ra_->reg2offset(src_first); 1069 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1070 } else { 1071 ShouldNotReachHere(); 1072 } 1073 return 0; 1074 } 1075 1076 // -------------------------------------- 1077 // Check for mem-mem move. push/pop to move. 1078 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1079 if( src_second == dst_first ) { // overlapping stack copy ranges 1080 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1081 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1082 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1083 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1084 } 1085 // move low bits 1086 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1087 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1088 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1089 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1090 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1091 } 1092 return size; 1093 } 1094 1095 // -------------------------------------- 1096 // Check for integer reg-reg copy 1097 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1098 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1099 1100 // Check for integer store 1101 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1102 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1103 1104 // Check for integer load 1105 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1106 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1107 1108 // Check for integer reg-xmm reg copy 1109 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1110 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1111 "no 64 bit integer-float reg moves" ); 1112 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1113 } 1114 // -------------------------------------- 1115 // Check for float reg-reg copy 1116 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1117 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1118 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1119 if( cbuf ) { 1120 1121 // Note the mucking with the register encode to compensate for the 0/1 1122 // indexing issue mentioned in a comment in the reg_def sections 1123 // for FPR registers many lines above here. 1124 1125 if( src_first != FPR1L_num ) { 1126 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1127 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1128 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1129 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1130 } else { 1131 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1132 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1133 } 1134 #ifndef PRODUCT 1135 } else if( !do_size ) { 1136 if( size != 0 ) st->print("\n\t"); 1137 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1138 else st->print( "FST %s", Matcher::regName[dst_first]); 1139 #endif 1140 } 1141 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1142 } 1143 1144 // Check for float store 1145 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1146 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1147 } 1148 1149 // Check for float load 1150 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1151 int offset = ra_->reg2offset(src_first); 1152 const char *op_str; 1153 int op; 1154 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1155 op_str = "FLD_D"; 1156 op = 0xDD; 1157 } else { // 32-bit load 1158 op_str = "FLD_S"; 1159 op = 0xD9; 1160 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1161 } 1162 if( cbuf ) { 1163 emit_opcode (*cbuf, op ); 1164 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 #ifndef PRODUCT 1168 } else if( !do_size ) { 1169 if( size != 0 ) st->print("\n\t"); 1170 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1171 #endif 1172 } 1173 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1174 return size + 3+offset_size+2; 1175 } 1176 1177 // Check for xmm reg-reg copy 1178 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1179 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1180 (src_first+1 == src_second && dst_first+1 == dst_second), 1181 "no non-adjacent float-moves" ); 1182 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1183 } 1184 1185 // Check for xmm reg-integer reg copy 1186 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1187 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1188 "no 64 bit float-integer reg moves" ); 1189 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1190 } 1191 1192 // Check for xmm store 1193 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1194 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1195 } 1196 1197 // Check for float xmm load 1198 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1199 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1200 } 1201 1202 // Copy from float reg to xmm reg 1203 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1204 // copy to the top of stack from floating point reg 1205 // and use LEA to preserve flags 1206 if( cbuf ) { 1207 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1208 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1209 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1210 emit_d8(*cbuf,0xF8); 1211 #ifndef PRODUCT 1212 } else if( !do_size ) { 1213 if( size != 0 ) st->print("\n\t"); 1214 st->print("LEA ESP,[ESP-8]"); 1215 #endif 1216 } 1217 size += 4; 1218 1219 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1220 1221 // Copy from the temp memory to the xmm reg. 1222 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1223 1224 if( cbuf ) { 1225 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1226 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1227 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1228 emit_d8(*cbuf,0x08); 1229 #ifndef PRODUCT 1230 } else if( !do_size ) { 1231 if( size != 0 ) st->print("\n\t"); 1232 st->print("LEA ESP,[ESP+8]"); 1233 #endif 1234 } 1235 size += 4; 1236 return size; 1237 } 1238 1239 // AVX-512 opmask specific spilling. 1240 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1241 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1242 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1243 MacroAssembler _masm(cbuf); 1244 int offset = ra_->reg2offset(src_first); 1245 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1246 return 0; 1247 } 1248 1249 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1250 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1251 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1252 MacroAssembler _masm(cbuf); 1253 int offset = ra_->reg2offset(dst_first); 1254 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1255 return 0; 1256 } 1257 1258 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1259 Unimplemented(); 1260 return 0; 1261 } 1262 1263 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1264 Unimplemented(); 1265 return 0; 1266 } 1267 1268 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1269 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1270 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1271 MacroAssembler _masm(cbuf); 1272 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1273 return 0; 1274 } 1275 1276 assert( size > 0, "missed a case" ); 1277 1278 // -------------------------------------------------------------------- 1279 // Check for second bits still needing moving. 1280 if( src_second == dst_second ) 1281 return size; // Self copy; no move 1282 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1283 1284 // Check for second word int-int move 1285 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1286 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1287 1288 // Check for second word integer store 1289 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1290 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1291 1292 // Check for second word integer load 1293 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1295 1296 Unimplemented(); 1297 return 0; // Mute compiler 1298 } 1299 1300 #ifndef PRODUCT 1301 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1302 implementation( NULL, ra_, false, st ); 1303 } 1304 #endif 1305 1306 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1307 implementation( &cbuf, ra_, false, NULL ); 1308 } 1309 1310 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1311 return MachNode::size(ra_); 1312 } 1313 1314 1315 //============================================================================= 1316 #ifndef PRODUCT 1317 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1318 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1319 int reg = ra_->get_reg_first(this); 1320 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1321 } 1322 #endif 1323 1324 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1325 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1326 int reg = ra_->get_encode(this); 1327 if( offset >= 128 ) { 1328 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1329 emit_rm(cbuf, 0x2, reg, 0x04); 1330 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1331 emit_d32(cbuf, offset); 1332 } 1333 else { 1334 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1335 emit_rm(cbuf, 0x1, reg, 0x04); 1336 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1337 emit_d8(cbuf, offset); 1338 } 1339 } 1340 1341 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1342 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1343 if( offset >= 128 ) { 1344 return 7; 1345 } 1346 else { 1347 return 4; 1348 } 1349 } 1350 1351 //============================================================================= 1352 #ifndef PRODUCT 1353 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1354 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1355 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1356 st->print_cr("\tNOP"); 1357 st->print_cr("\tNOP"); 1358 if( !OptoBreakpoint ) 1359 st->print_cr("\tNOP"); 1360 } 1361 #endif 1362 1363 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1364 MacroAssembler masm(&cbuf); 1365 #ifdef ASSERT 1366 uint insts_size = cbuf.insts_size(); 1367 #endif 1368 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1369 masm.jump_cc(Assembler::notEqual, 1370 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1371 /* WARNING these NOPs are critical so that verified entry point is properly 1372 aligned for patching by NativeJump::patch_verified_entry() */ 1373 int nops_cnt = 2; 1374 if( !OptoBreakpoint ) // Leave space for int3 1375 nops_cnt += 1; 1376 masm.nop(nops_cnt); 1377 1378 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1379 } 1380 1381 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1382 return OptoBreakpoint ? 11 : 12; 1383 } 1384 1385 1386 //============================================================================= 1387 1388 // Vector calling convention not supported. 1389 const bool Matcher::supports_vector_calling_convention() { 1390 return false; 1391 } 1392 1393 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1394 Unimplemented(); 1395 return OptoRegPair(0, 0); 1396 } 1397 1398 // Is this branch offset short enough that a short branch can be used? 1399 // 1400 // NOTE: If the platform does not provide any short branch variants, then 1401 // this method should return false for offset 0. 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413 } 1414 1415 // Return whether or not this register is ever used as an argument. This 1416 // function is used on startup to build the trampoline stubs in generateOptoStub. 1417 // Registers not mentioned will be killed by the VM call in the trampoline, and 1418 // arguments in those registers not be available to the callee. 1419 bool Matcher::can_be_java_arg( int reg ) { 1420 if( reg == ECX_num || reg == EDX_num ) return true; 1421 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1422 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1423 return false; 1424 } 1425 1426 bool Matcher::is_spillable_arg( int reg ) { 1427 return can_be_java_arg(reg); 1428 } 1429 1430 uint Matcher::int_pressure_limit() 1431 { 1432 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1433 } 1434 1435 uint Matcher::float_pressure_limit() 1436 { 1437 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1438 } 1439 1440 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1441 // Use hardware integer DIV instruction when 1442 // it is faster than a code which use multiply. 1443 // Only when constant divisor fits into 32 bit 1444 // (min_jint is excluded to get only correct 1445 // positive 32 bit values from negative). 1446 return VM_Version::has_fast_idiv() && 1447 (divisor == (int)divisor && divisor != min_jint); 1448 } 1449 1450 // Register for DIVI projection of divmodI 1451 RegMask Matcher::divI_proj_mask() { 1452 return EAX_REG_mask(); 1453 } 1454 1455 // Register for MODI projection of divmodI 1456 RegMask Matcher::modI_proj_mask() { 1457 return EDX_REG_mask(); 1458 } 1459 1460 // Register for DIVL projection of divmodL 1461 RegMask Matcher::divL_proj_mask() { 1462 ShouldNotReachHere(); 1463 return RegMask(); 1464 } 1465 1466 // Register for MODL projection of divmodL 1467 RegMask Matcher::modL_proj_mask() { 1468 ShouldNotReachHere(); 1469 return RegMask(); 1470 } 1471 1472 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1473 return NO_REG_mask(); 1474 } 1475 1476 // Returns true if the high 32 bits of the value is known to be zero. 1477 bool is_operand_hi32_zero(Node* n) { 1478 int opc = n->Opcode(); 1479 if (opc == Op_AndL) { 1480 Node* o2 = n->in(2); 1481 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1482 return true; 1483 } 1484 } 1485 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1486 return true; 1487 } 1488 return false; 1489 } 1490 1491 %} 1492 1493 //----------ENCODING BLOCK----------------------------------------------------- 1494 // This block specifies the encoding classes used by the compiler to output 1495 // byte streams. Encoding classes generate functions which are called by 1496 // Machine Instruction Nodes in order to generate the bit encoding of the 1497 // instruction. Operands specify their base encoding interface with the 1498 // interface keyword. There are currently supported four interfaces, 1499 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1500 // operand to generate a function which returns its register number when 1501 // queried. CONST_INTER causes an operand to generate a function which 1502 // returns the value of the constant when queried. MEMORY_INTER causes an 1503 // operand to generate four functions which return the Base Register, the 1504 // Index Register, the Scale Value, and the Offset Value of the operand when 1505 // queried. COND_INTER causes an operand to generate six functions which 1506 // return the encoding code (ie - encoding bits for the instruction) 1507 // associated with each basic boolean condition for a conditional instruction. 1508 // Instructions specify two basic values for encoding. They use the 1509 // ins_encode keyword to specify their encoding class (which must be one of 1510 // the class names specified in the encoding block), and they use the 1511 // opcode keyword to specify, in order, their primary, secondary, and 1512 // tertiary opcode. Only the opcode sections which a particular instruction 1513 // needs for encoding need to be specified. 1514 encode %{ 1515 // Build emit functions for each basic byte or larger field in the intel 1516 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1517 // code in the enc_class source block. Emit functions will live in the 1518 // main source block for now. In future, we can generalize this by 1519 // adding a syntax that specifies the sizes of fields in an order, 1520 // so that the adlc can build the emit functions automagically 1521 1522 // Emit primary opcode 1523 enc_class OpcP %{ 1524 emit_opcode(cbuf, $primary); 1525 %} 1526 1527 // Emit secondary opcode 1528 enc_class OpcS %{ 1529 emit_opcode(cbuf, $secondary); 1530 %} 1531 1532 // Emit opcode directly 1533 enc_class Opcode(immI d8) %{ 1534 emit_opcode(cbuf, $d8$$constant); 1535 %} 1536 1537 enc_class SizePrefix %{ 1538 emit_opcode(cbuf,0x66); 1539 %} 1540 1541 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1542 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1543 %} 1544 1545 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1546 emit_opcode(cbuf,$opcode$$constant); 1547 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1548 %} 1549 1550 enc_class mov_r32_imm0( rRegI dst ) %{ 1551 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1552 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1553 %} 1554 1555 enc_class cdq_enc %{ 1556 // Full implementation of Java idiv and irem; checks for 1557 // special case as described in JVM spec., p.243 & p.271. 1558 // 1559 // normal case special case 1560 // 1561 // input : rax,: dividend min_int 1562 // reg: divisor -1 1563 // 1564 // output: rax,: quotient (= rax, idiv reg) min_int 1565 // rdx: remainder (= rax, irem reg) 0 1566 // 1567 // Code sequnce: 1568 // 1569 // 81 F8 00 00 00 80 cmp rax,80000000h 1570 // 0F 85 0B 00 00 00 jne normal_case 1571 // 33 D2 xor rdx,edx 1572 // 83 F9 FF cmp rcx,0FFh 1573 // 0F 84 03 00 00 00 je done 1574 // normal_case: 1575 // 99 cdq 1576 // F7 F9 idiv rax,ecx 1577 // done: 1578 // 1579 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1580 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1581 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1582 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1583 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1584 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1585 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1586 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1587 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1588 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1589 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1590 // normal_case: 1591 emit_opcode(cbuf,0x99); // cdq 1592 // idiv (note: must be emitted by the user of this rule) 1593 // normal: 1594 %} 1595 1596 // Dense encoding for older common ops 1597 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1598 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1599 %} 1600 1601 1602 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1603 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1604 // Check for 8-bit immediate, and set sign extend bit in opcode 1605 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1606 emit_opcode(cbuf, $primary | 0x02); 1607 } 1608 else { // If 32-bit immediate 1609 emit_opcode(cbuf, $primary); 1610 } 1611 %} 1612 1613 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1614 // Emit primary opcode and set sign-extend bit 1615 // Check for 8-bit immediate, and set sign extend bit in opcode 1616 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1617 emit_opcode(cbuf, $primary | 0x02); } 1618 else { // If 32-bit immediate 1619 emit_opcode(cbuf, $primary); 1620 } 1621 // Emit r/m byte with secondary opcode, after primary opcode. 1622 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1623 %} 1624 1625 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1626 // Check for 8-bit immediate, and set sign extend bit in opcode 1627 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1628 $$$emit8$imm$$constant; 1629 } 1630 else { // If 32-bit immediate 1631 // Output immediate 1632 $$$emit32$imm$$constant; 1633 } 1634 %} 1635 1636 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1637 // Emit primary opcode and set sign-extend bit 1638 // Check for 8-bit immediate, and set sign extend bit in opcode 1639 int con = (int)$imm$$constant; // Throw away top bits 1640 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1641 // Emit r/m byte with secondary opcode, after primary opcode. 1642 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1643 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1644 else emit_d32(cbuf,con); 1645 %} 1646 1647 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1648 // Emit primary opcode and set sign-extend bit 1649 // Check for 8-bit immediate, and set sign extend bit in opcode 1650 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1651 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1652 // Emit r/m byte with tertiary opcode, after primary opcode. 1653 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1654 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1655 else emit_d32(cbuf,con); 1656 %} 1657 1658 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1659 emit_cc(cbuf, $secondary, $dst$$reg ); 1660 %} 1661 1662 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1663 int destlo = $dst$$reg; 1664 int desthi = HIGH_FROM_LOW_ENC(destlo); 1665 // bswap lo 1666 emit_opcode(cbuf, 0x0F); 1667 emit_cc(cbuf, 0xC8, destlo); 1668 // bswap hi 1669 emit_opcode(cbuf, 0x0F); 1670 emit_cc(cbuf, 0xC8, desthi); 1671 // xchg lo and hi 1672 emit_opcode(cbuf, 0x87); 1673 emit_rm(cbuf, 0x3, destlo, desthi); 1674 %} 1675 1676 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1677 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1678 %} 1679 1680 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1681 $$$emit8$primary; 1682 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1683 %} 1684 1685 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1686 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1687 emit_d8(cbuf, op >> 8 ); 1688 emit_d8(cbuf, op & 255); 1689 %} 1690 1691 // emulate a CMOV with a conditional branch around a MOV 1692 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1693 // Invert sense of branch from sense of CMOV 1694 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1695 emit_d8( cbuf, $brOffs$$constant ); 1696 %} 1697 1698 enc_class enc_PartialSubtypeCheck( ) %{ 1699 Register Redi = as_Register(EDI_enc); // result register 1700 Register Reax = as_Register(EAX_enc); // super class 1701 Register Recx = as_Register(ECX_enc); // killed 1702 Register Resi = as_Register(ESI_enc); // sub class 1703 Label miss; 1704 1705 MacroAssembler _masm(&cbuf); 1706 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1707 NULL, &miss, 1708 /*set_cond_codes:*/ true); 1709 if ($primary) { 1710 __ xorptr(Redi, Redi); 1711 } 1712 __ bind(miss); 1713 %} 1714 1715 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1716 MacroAssembler masm(&cbuf); 1717 int start = masm.offset(); 1718 if (UseSSE >= 2) { 1719 if (VerifyFPU) { 1720 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1721 } 1722 } else { 1723 // External c_calling_convention expects the FPU stack to be 'clean'. 1724 // Compiled code leaves it dirty. Do cleanup now. 1725 masm.empty_FPU_stack(); 1726 } 1727 if (sizeof_FFree_Float_Stack_All == -1) { 1728 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1729 } else { 1730 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1731 } 1732 %} 1733 1734 enc_class Verify_FPU_For_Leaf %{ 1735 if( VerifyFPU ) { 1736 MacroAssembler masm(&cbuf); 1737 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1738 } 1739 %} 1740 1741 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1742 // This is the instruction starting address for relocation info. 1743 MacroAssembler _masm(&cbuf); 1744 cbuf.set_insts_mark(); 1745 $$$emit8$primary; 1746 // CALL directly to the runtime 1747 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1748 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1749 __ post_call_nop(); 1750 1751 if (UseSSE >= 2) { 1752 MacroAssembler _masm(&cbuf); 1753 BasicType rt = tf()->return_type(); 1754 1755 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1756 // A C runtime call where the return value is unused. In SSE2+ 1757 // mode the result needs to be removed from the FPU stack. It's 1758 // likely that this function call could be removed by the 1759 // optimizer if the C function is a pure function. 1760 __ ffree(0); 1761 } else if (rt == T_FLOAT) { 1762 __ lea(rsp, Address(rsp, -4)); 1763 __ fstp_s(Address(rsp, 0)); 1764 __ movflt(xmm0, Address(rsp, 0)); 1765 __ lea(rsp, Address(rsp, 4)); 1766 } else if (rt == T_DOUBLE) { 1767 __ lea(rsp, Address(rsp, -8)); 1768 __ fstp_d(Address(rsp, 0)); 1769 __ movdbl(xmm0, Address(rsp, 0)); 1770 __ lea(rsp, Address(rsp, 8)); 1771 } 1772 } 1773 %} 1774 1775 enc_class pre_call_resets %{ 1776 // If method sets FPU control word restore it here 1777 debug_only(int off0 = cbuf.insts_size()); 1778 if (ra_->C->in_24_bit_fp_mode()) { 1779 MacroAssembler _masm(&cbuf); 1780 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1781 } 1782 // Clear upper bits of YMM registers when current compiled code uses 1783 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1784 MacroAssembler _masm(&cbuf); 1785 __ vzeroupper(); 1786 debug_only(int off1 = cbuf.insts_size()); 1787 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1788 %} 1789 1790 enc_class post_call_FPU %{ 1791 // If method sets FPU control word do it here also 1792 if (Compile::current()->in_24_bit_fp_mode()) { 1793 MacroAssembler masm(&cbuf); 1794 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1795 } 1796 %} 1797 1798 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1799 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1800 // who we intended to call. 1801 MacroAssembler _masm(&cbuf); 1802 cbuf.set_insts_mark(); 1803 $$$emit8$primary; 1804 1805 if (!_method) { 1806 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1807 runtime_call_Relocation::spec(), 1808 RELOC_IMM32); 1809 __ post_call_nop(); 1810 } else { 1811 int method_index = resolved_method_index(cbuf); 1812 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1813 : static_call_Relocation::spec(method_index); 1814 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1815 rspec, RELOC_DISP32); 1816 __ post_call_nop(); 1817 address mark = cbuf.insts_mark(); 1818 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1819 // Calls of the same statically bound method can share 1820 // a stub to the interpreter. 1821 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off()); 1822 } else { 1823 // Emit stubs for static call. 1824 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark); 1825 if (stub == NULL) { 1826 ciEnv::current()->record_failure("CodeCache is full"); 1827 return; 1828 } 1829 } 1830 } 1831 %} 1832 1833 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1834 MacroAssembler _masm(&cbuf); 1835 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1836 __ post_call_nop(); 1837 %} 1838 1839 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1840 int disp = in_bytes(Method::from_compiled_offset()); 1841 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1842 1843 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1844 MacroAssembler _masm(&cbuf); 1845 cbuf.set_insts_mark(); 1846 $$$emit8$primary; 1847 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1848 emit_d8(cbuf, disp); // Displacement 1849 __ post_call_nop(); 1850 %} 1851 1852 // Following encoding is no longer used, but may be restored if calling 1853 // convention changes significantly. 1854 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1855 // 1856 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1857 // // int ic_reg = Matcher::inline_cache_reg(); 1858 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1859 // // int imo_reg = Matcher::interpreter_method_reg(); 1860 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1861 // 1862 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1863 // // // so we load it immediately before the call 1864 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1865 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1866 // 1867 // // xor rbp,ebp 1868 // emit_opcode(cbuf, 0x33); 1869 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1870 // 1871 // // CALL to interpreter. 1872 // cbuf.set_insts_mark(); 1873 // $$$emit8$primary; 1874 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1875 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1876 // %} 1877 1878 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1879 $$$emit8$primary; 1880 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1881 $$$emit8$shift$$constant; 1882 %} 1883 1884 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1885 // Load immediate does not have a zero or sign extended version 1886 // for 8-bit immediates 1887 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1888 $$$emit32$src$$constant; 1889 %} 1890 1891 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1892 // Load immediate does not have a zero or sign extended version 1893 // for 8-bit immediates 1894 emit_opcode(cbuf, $primary + $dst$$reg); 1895 $$$emit32$src$$constant; 1896 %} 1897 1898 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1899 // Load immediate does not have a zero or sign extended version 1900 // for 8-bit immediates 1901 int dst_enc = $dst$$reg; 1902 int src_con = $src$$constant & 0x0FFFFFFFFL; 1903 if (src_con == 0) { 1904 // xor dst, dst 1905 emit_opcode(cbuf, 0x33); 1906 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1907 } else { 1908 emit_opcode(cbuf, $primary + dst_enc); 1909 emit_d32(cbuf, src_con); 1910 } 1911 %} 1912 1913 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1914 // Load immediate does not have a zero or sign extended version 1915 // for 8-bit immediates 1916 int dst_enc = $dst$$reg + 2; 1917 int src_con = ((julong)($src$$constant)) >> 32; 1918 if (src_con == 0) { 1919 // xor dst, dst 1920 emit_opcode(cbuf, 0x33); 1921 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1922 } else { 1923 emit_opcode(cbuf, $primary + dst_enc); 1924 emit_d32(cbuf, src_con); 1925 } 1926 %} 1927 1928 1929 // Encode a reg-reg copy. If it is useless, then empty encoding. 1930 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1931 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1932 %} 1933 1934 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1935 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1936 %} 1937 1938 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1939 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1940 %} 1941 1942 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1943 $$$emit8$primary; 1944 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1945 %} 1946 1947 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1948 $$$emit8$secondary; 1949 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1950 %} 1951 1952 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1953 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1954 %} 1955 1956 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1957 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1958 %} 1959 1960 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1961 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1962 %} 1963 1964 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1965 // Output immediate 1966 $$$emit32$src$$constant; 1967 %} 1968 1969 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1970 // Output Float immediate bits 1971 jfloat jf = $src$$constant; 1972 int jf_as_bits = jint_cast( jf ); 1973 emit_d32(cbuf, jf_as_bits); 1974 %} 1975 1976 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1977 // Output Float immediate bits 1978 jfloat jf = $src$$constant; 1979 int jf_as_bits = jint_cast( jf ); 1980 emit_d32(cbuf, jf_as_bits); 1981 %} 1982 1983 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1984 // Output immediate 1985 $$$emit16$src$$constant; 1986 %} 1987 1988 enc_class Con_d32(immI src) %{ 1989 emit_d32(cbuf,$src$$constant); 1990 %} 1991 1992 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1993 // Output immediate memory reference 1994 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 1995 emit_d32(cbuf, 0x00); 1996 %} 1997 1998 enc_class lock_prefix( ) %{ 1999 emit_opcode(cbuf,0xF0); // [Lock] 2000 %} 2001 2002 // Cmp-xchg long value. 2003 // Note: we need to swap rbx, and rcx before and after the 2004 // cmpxchg8 instruction because the instruction uses 2005 // rcx as the high order word of the new value to store but 2006 // our register encoding uses rbx,. 2007 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2008 2009 // XCHG rbx,ecx 2010 emit_opcode(cbuf,0x87); 2011 emit_opcode(cbuf,0xD9); 2012 // [Lock] 2013 emit_opcode(cbuf,0xF0); 2014 // CMPXCHG8 [Eptr] 2015 emit_opcode(cbuf,0x0F); 2016 emit_opcode(cbuf,0xC7); 2017 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2018 // XCHG rbx,ecx 2019 emit_opcode(cbuf,0x87); 2020 emit_opcode(cbuf,0xD9); 2021 %} 2022 2023 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2024 // [Lock] 2025 emit_opcode(cbuf,0xF0); 2026 2027 // CMPXCHG [Eptr] 2028 emit_opcode(cbuf,0x0F); 2029 emit_opcode(cbuf,0xB1); 2030 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2031 %} 2032 2033 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2034 // [Lock] 2035 emit_opcode(cbuf,0xF0); 2036 2037 // CMPXCHGB [Eptr] 2038 emit_opcode(cbuf,0x0F); 2039 emit_opcode(cbuf,0xB0); 2040 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2041 %} 2042 2043 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2044 // [Lock] 2045 emit_opcode(cbuf,0xF0); 2046 2047 // 16-bit mode 2048 emit_opcode(cbuf, 0x66); 2049 2050 // CMPXCHGW [Eptr] 2051 emit_opcode(cbuf,0x0F); 2052 emit_opcode(cbuf,0xB1); 2053 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2054 %} 2055 2056 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2057 int res_encoding = $res$$reg; 2058 2059 // MOV res,0 2060 emit_opcode( cbuf, 0xB8 + res_encoding); 2061 emit_d32( cbuf, 0 ); 2062 // JNE,s fail 2063 emit_opcode(cbuf,0x75); 2064 emit_d8(cbuf, 5 ); 2065 // MOV res,1 2066 emit_opcode( cbuf, 0xB8 + res_encoding); 2067 emit_d32( cbuf, 1 ); 2068 // fail: 2069 %} 2070 2071 enc_class set_instruction_start( ) %{ 2072 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2073 %} 2074 2075 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2076 int reg_encoding = $ereg$$reg; 2077 int base = $mem$$base; 2078 int index = $mem$$index; 2079 int scale = $mem$$scale; 2080 int displace = $mem$$disp; 2081 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2082 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2083 %} 2084 2085 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2086 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2087 int base = $mem$$base; 2088 int index = $mem$$index; 2089 int scale = $mem$$scale; 2090 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2091 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2092 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2093 %} 2094 2095 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2096 int r1, r2; 2097 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2098 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2099 emit_opcode(cbuf,0x0F); 2100 emit_opcode(cbuf,$tertiary); 2101 emit_rm(cbuf, 0x3, r1, r2); 2102 emit_d8(cbuf,$cnt$$constant); 2103 emit_d8(cbuf,$primary); 2104 emit_rm(cbuf, 0x3, $secondary, r1); 2105 emit_d8(cbuf,$cnt$$constant); 2106 %} 2107 2108 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2109 emit_opcode( cbuf, 0x8B ); // Move 2110 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2111 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2112 emit_d8(cbuf,$primary); 2113 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2114 emit_d8(cbuf,$cnt$$constant-32); 2115 } 2116 emit_d8(cbuf,$primary); 2117 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2118 emit_d8(cbuf,31); 2119 %} 2120 2121 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2122 int r1, r2; 2123 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2124 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2125 2126 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2127 emit_rm(cbuf, 0x3, r1, r2); 2128 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2129 emit_opcode(cbuf,$primary); 2130 emit_rm(cbuf, 0x3, $secondary, r1); 2131 emit_d8(cbuf,$cnt$$constant-32); 2132 } 2133 emit_opcode(cbuf,0x33); // XOR r2,r2 2134 emit_rm(cbuf, 0x3, r2, r2); 2135 %} 2136 2137 // Clone of RegMem but accepts an extra parameter to access each 2138 // half of a double in memory; it never needs relocation info. 2139 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2140 emit_opcode(cbuf,$opcode$$constant); 2141 int reg_encoding = $rm_reg$$reg; 2142 int base = $mem$$base; 2143 int index = $mem$$index; 2144 int scale = $mem$$scale; 2145 int displace = $mem$$disp + $disp_for_half$$constant; 2146 relocInfo::relocType disp_reloc = relocInfo::none; 2147 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2148 %} 2149 2150 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2151 // 2152 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2153 // and it never needs relocation information. 2154 // Frequently used to move data between FPU's Stack Top and memory. 2155 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2156 int rm_byte_opcode = $rm_opcode$$constant; 2157 int base = $mem$$base; 2158 int index = $mem$$index; 2159 int scale = $mem$$scale; 2160 int displace = $mem$$disp; 2161 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2162 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2163 %} 2164 2165 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2166 int rm_byte_opcode = $rm_opcode$$constant; 2167 int base = $mem$$base; 2168 int index = $mem$$index; 2169 int scale = $mem$$scale; 2170 int displace = $mem$$disp; 2171 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2172 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2173 %} 2174 2175 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2176 int reg_encoding = $dst$$reg; 2177 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2178 int index = 0x04; // 0x04 indicates no index 2179 int scale = 0x00; // 0x00 indicates no scale 2180 int displace = $src1$$constant; // 0x00 indicates no displacement 2181 relocInfo::relocType disp_reloc = relocInfo::none; 2182 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2183 %} 2184 2185 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2186 // Compare dst,src 2187 emit_opcode(cbuf,0x3B); 2188 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2189 // jmp dst < src around move 2190 emit_opcode(cbuf,0x7C); 2191 emit_d8(cbuf,2); 2192 // move dst,src 2193 emit_opcode(cbuf,0x8B); 2194 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2195 %} 2196 2197 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2198 // Compare dst,src 2199 emit_opcode(cbuf,0x3B); 2200 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2201 // jmp dst > src around move 2202 emit_opcode(cbuf,0x7F); 2203 emit_d8(cbuf,2); 2204 // move dst,src 2205 emit_opcode(cbuf,0x8B); 2206 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2207 %} 2208 2209 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2210 // If src is FPR1, we can just FST to store it. 2211 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2212 int reg_encoding = 0x2; // Just store 2213 int base = $mem$$base; 2214 int index = $mem$$index; 2215 int scale = $mem$$scale; 2216 int displace = $mem$$disp; 2217 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2218 if( $src$$reg != FPR1L_enc ) { 2219 reg_encoding = 0x3; // Store & pop 2220 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2221 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2222 } 2223 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2224 emit_opcode(cbuf,$primary); 2225 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2226 %} 2227 2228 enc_class neg_reg(rRegI dst) %{ 2229 // NEG $dst 2230 emit_opcode(cbuf,0xF7); 2231 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2232 %} 2233 2234 enc_class setLT_reg(eCXRegI dst) %{ 2235 // SETLT $dst 2236 emit_opcode(cbuf,0x0F); 2237 emit_opcode(cbuf,0x9C); 2238 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2239 %} 2240 2241 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2242 int tmpReg = $tmp$$reg; 2243 2244 // SUB $p,$q 2245 emit_opcode(cbuf,0x2B); 2246 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2247 // SBB $tmp,$tmp 2248 emit_opcode(cbuf,0x1B); 2249 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2250 // AND $tmp,$y 2251 emit_opcode(cbuf,0x23); 2252 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2253 // ADD $p,$tmp 2254 emit_opcode(cbuf,0x03); 2255 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2256 %} 2257 2258 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2259 // TEST shift,32 2260 emit_opcode(cbuf,0xF7); 2261 emit_rm(cbuf, 0x3, 0, ECX_enc); 2262 emit_d32(cbuf,0x20); 2263 // JEQ,s small 2264 emit_opcode(cbuf, 0x74); 2265 emit_d8(cbuf, 0x04); 2266 // MOV $dst.hi,$dst.lo 2267 emit_opcode( cbuf, 0x8B ); 2268 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2269 // CLR $dst.lo 2270 emit_opcode(cbuf, 0x33); 2271 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2272 // small: 2273 // SHLD $dst.hi,$dst.lo,$shift 2274 emit_opcode(cbuf,0x0F); 2275 emit_opcode(cbuf,0xA5); 2276 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2277 // SHL $dst.lo,$shift" 2278 emit_opcode(cbuf,0xD3); 2279 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2280 %} 2281 2282 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2283 // TEST shift,32 2284 emit_opcode(cbuf,0xF7); 2285 emit_rm(cbuf, 0x3, 0, ECX_enc); 2286 emit_d32(cbuf,0x20); 2287 // JEQ,s small 2288 emit_opcode(cbuf, 0x74); 2289 emit_d8(cbuf, 0x04); 2290 // MOV $dst.lo,$dst.hi 2291 emit_opcode( cbuf, 0x8B ); 2292 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2293 // CLR $dst.hi 2294 emit_opcode(cbuf, 0x33); 2295 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2296 // small: 2297 // SHRD $dst.lo,$dst.hi,$shift 2298 emit_opcode(cbuf,0x0F); 2299 emit_opcode(cbuf,0xAD); 2300 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2301 // SHR $dst.hi,$shift" 2302 emit_opcode(cbuf,0xD3); 2303 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2304 %} 2305 2306 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2307 // TEST shift,32 2308 emit_opcode(cbuf,0xF7); 2309 emit_rm(cbuf, 0x3, 0, ECX_enc); 2310 emit_d32(cbuf,0x20); 2311 // JEQ,s small 2312 emit_opcode(cbuf, 0x74); 2313 emit_d8(cbuf, 0x05); 2314 // MOV $dst.lo,$dst.hi 2315 emit_opcode( cbuf, 0x8B ); 2316 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2317 // SAR $dst.hi,31 2318 emit_opcode(cbuf, 0xC1); 2319 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2320 emit_d8(cbuf, 0x1F ); 2321 // small: 2322 // SHRD $dst.lo,$dst.hi,$shift 2323 emit_opcode(cbuf,0x0F); 2324 emit_opcode(cbuf,0xAD); 2325 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2326 // SAR $dst.hi,$shift" 2327 emit_opcode(cbuf,0xD3); 2328 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2329 %} 2330 2331 2332 // ----------------- Encodings for floating point unit ----------------- 2333 // May leave result in FPU-TOS or FPU reg depending on opcodes 2334 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2335 $$$emit8$primary; 2336 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2337 %} 2338 2339 // Pop argument in FPR0 with FSTP ST(0) 2340 enc_class PopFPU() %{ 2341 emit_opcode( cbuf, 0xDD ); 2342 emit_d8( cbuf, 0xD8 ); 2343 %} 2344 2345 // !!!!! equivalent to Pop_Reg_F 2346 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2347 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2348 emit_d8( cbuf, 0xD8+$dst$$reg ); 2349 %} 2350 2351 enc_class Push_Reg_DPR( regDPR dst ) %{ 2352 emit_opcode( cbuf, 0xD9 ); 2353 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2354 %} 2355 2356 enc_class strictfp_bias1( regDPR dst ) %{ 2357 emit_opcode( cbuf, 0xDB ); // FLD m80real 2358 emit_opcode( cbuf, 0x2D ); 2359 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2360 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2361 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2362 %} 2363 2364 enc_class strictfp_bias2( regDPR dst ) %{ 2365 emit_opcode( cbuf, 0xDB ); // FLD m80real 2366 emit_opcode( cbuf, 0x2D ); 2367 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2368 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2369 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2370 %} 2371 2372 // Special case for moving an integer register to a stack slot. 2373 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2374 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2375 %} 2376 2377 // Special case for moving a register to a stack slot. 2378 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2379 // Opcode already emitted 2380 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2381 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2382 emit_d32(cbuf, $dst$$disp); // Displacement 2383 %} 2384 2385 // Push the integer in stackSlot 'src' onto FP-stack 2386 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2387 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2388 %} 2389 2390 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2391 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2392 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2393 %} 2394 2395 // Same as Pop_Mem_F except for opcode 2396 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2397 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2398 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2399 %} 2400 2401 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2402 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2403 emit_d8( cbuf, 0xD8+$dst$$reg ); 2404 %} 2405 2406 enc_class Push_Reg_FPR( regFPR dst ) %{ 2407 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2408 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2409 %} 2410 2411 // Push FPU's float to a stack-slot, and pop FPU-stack 2412 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2413 int pop = 0x02; 2414 if ($src$$reg != FPR1L_enc) { 2415 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2416 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2417 pop = 0x03; 2418 } 2419 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2420 %} 2421 2422 // Push FPU's double to a stack-slot, and pop FPU-stack 2423 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2424 int pop = 0x02; 2425 if ($src$$reg != FPR1L_enc) { 2426 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2427 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2428 pop = 0x03; 2429 } 2430 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2431 %} 2432 2433 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2434 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2435 int pop = 0xD0 - 1; // -1 since we skip FLD 2436 if ($src$$reg != FPR1L_enc) { 2437 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2438 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2439 pop = 0xD8; 2440 } 2441 emit_opcode( cbuf, 0xDD ); 2442 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2443 %} 2444 2445 2446 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2447 // load dst in FPR0 2448 emit_opcode( cbuf, 0xD9 ); 2449 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2450 if ($src$$reg != FPR1L_enc) { 2451 // fincstp 2452 emit_opcode (cbuf, 0xD9); 2453 emit_opcode (cbuf, 0xF7); 2454 // swap src with FPR1: 2455 // FXCH FPR1 with src 2456 emit_opcode(cbuf, 0xD9); 2457 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2458 // fdecstp 2459 emit_opcode (cbuf, 0xD9); 2460 emit_opcode (cbuf, 0xF6); 2461 } 2462 %} 2463 2464 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2465 MacroAssembler _masm(&cbuf); 2466 __ subptr(rsp, 8); 2467 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2468 __ fld_d(Address(rsp, 0)); 2469 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2470 __ fld_d(Address(rsp, 0)); 2471 %} 2472 2473 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2474 MacroAssembler _masm(&cbuf); 2475 __ subptr(rsp, 4); 2476 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2477 __ fld_s(Address(rsp, 0)); 2478 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2479 __ fld_s(Address(rsp, 0)); 2480 %} 2481 2482 enc_class Push_ResultD(regD dst) %{ 2483 MacroAssembler _masm(&cbuf); 2484 __ fstp_d(Address(rsp, 0)); 2485 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2486 __ addptr(rsp, 8); 2487 %} 2488 2489 enc_class Push_ResultF(regF dst, immI d8) %{ 2490 MacroAssembler _masm(&cbuf); 2491 __ fstp_s(Address(rsp, 0)); 2492 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2493 __ addptr(rsp, $d8$$constant); 2494 %} 2495 2496 enc_class Push_SrcD(regD src) %{ 2497 MacroAssembler _masm(&cbuf); 2498 __ subptr(rsp, 8); 2499 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2500 __ fld_d(Address(rsp, 0)); 2501 %} 2502 2503 enc_class push_stack_temp_qword() %{ 2504 MacroAssembler _masm(&cbuf); 2505 __ subptr(rsp, 8); 2506 %} 2507 2508 enc_class pop_stack_temp_qword() %{ 2509 MacroAssembler _masm(&cbuf); 2510 __ addptr(rsp, 8); 2511 %} 2512 2513 enc_class push_xmm_to_fpr1(regD src) %{ 2514 MacroAssembler _masm(&cbuf); 2515 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2516 __ fld_d(Address(rsp, 0)); 2517 %} 2518 2519 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2520 if ($src$$reg != FPR1L_enc) { 2521 // fincstp 2522 emit_opcode (cbuf, 0xD9); 2523 emit_opcode (cbuf, 0xF7); 2524 // FXCH FPR1 with src 2525 emit_opcode(cbuf, 0xD9); 2526 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2527 // fdecstp 2528 emit_opcode (cbuf, 0xD9); 2529 emit_opcode (cbuf, 0xF6); 2530 } 2531 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2532 // // FSTP FPR$dst$$reg 2533 // emit_opcode( cbuf, 0xDD ); 2534 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2535 %} 2536 2537 enc_class fnstsw_sahf_skip_parity() %{ 2538 // fnstsw ax 2539 emit_opcode( cbuf, 0xDF ); 2540 emit_opcode( cbuf, 0xE0 ); 2541 // sahf 2542 emit_opcode( cbuf, 0x9E ); 2543 // jnp ::skip 2544 emit_opcode( cbuf, 0x7B ); 2545 emit_opcode( cbuf, 0x05 ); 2546 %} 2547 2548 enc_class emitModDPR() %{ 2549 // fprem must be iterative 2550 // :: loop 2551 // fprem 2552 emit_opcode( cbuf, 0xD9 ); 2553 emit_opcode( cbuf, 0xF8 ); 2554 // wait 2555 emit_opcode( cbuf, 0x9b ); 2556 // fnstsw ax 2557 emit_opcode( cbuf, 0xDF ); 2558 emit_opcode( cbuf, 0xE0 ); 2559 // sahf 2560 emit_opcode( cbuf, 0x9E ); 2561 // jp ::loop 2562 emit_opcode( cbuf, 0x0F ); 2563 emit_opcode( cbuf, 0x8A ); 2564 emit_opcode( cbuf, 0xF4 ); 2565 emit_opcode( cbuf, 0xFF ); 2566 emit_opcode( cbuf, 0xFF ); 2567 emit_opcode( cbuf, 0xFF ); 2568 %} 2569 2570 enc_class fpu_flags() %{ 2571 // fnstsw_ax 2572 emit_opcode( cbuf, 0xDF); 2573 emit_opcode( cbuf, 0xE0); 2574 // test ax,0x0400 2575 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2576 emit_opcode( cbuf, 0xA9 ); 2577 emit_d16 ( cbuf, 0x0400 ); 2578 // // // This sequence works, but stalls for 12-16 cycles on PPro 2579 // // test rax,0x0400 2580 // emit_opcode( cbuf, 0xA9 ); 2581 // emit_d32 ( cbuf, 0x00000400 ); 2582 // 2583 // jz exit (no unordered comparison) 2584 emit_opcode( cbuf, 0x74 ); 2585 emit_d8 ( cbuf, 0x02 ); 2586 // mov ah,1 - treat as LT case (set carry flag) 2587 emit_opcode( cbuf, 0xB4 ); 2588 emit_d8 ( cbuf, 0x01 ); 2589 // sahf 2590 emit_opcode( cbuf, 0x9E); 2591 %} 2592 2593 enc_class cmpF_P6_fixup() %{ 2594 // Fixup the integer flags in case comparison involved a NaN 2595 // 2596 // JNP exit (no unordered comparison, P-flag is set by NaN) 2597 emit_opcode( cbuf, 0x7B ); 2598 emit_d8 ( cbuf, 0x03 ); 2599 // MOV AH,1 - treat as LT case (set carry flag) 2600 emit_opcode( cbuf, 0xB4 ); 2601 emit_d8 ( cbuf, 0x01 ); 2602 // SAHF 2603 emit_opcode( cbuf, 0x9E); 2604 // NOP // target for branch to avoid branch to branch 2605 emit_opcode( cbuf, 0x90); 2606 %} 2607 2608 // fnstsw_ax(); 2609 // sahf(); 2610 // movl(dst, nan_result); 2611 // jcc(Assembler::parity, exit); 2612 // movl(dst, less_result); 2613 // jcc(Assembler::below, exit); 2614 // movl(dst, equal_result); 2615 // jcc(Assembler::equal, exit); 2616 // movl(dst, greater_result); 2617 2618 // less_result = 1; 2619 // greater_result = -1; 2620 // equal_result = 0; 2621 // nan_result = -1; 2622 2623 enc_class CmpF_Result(rRegI dst) %{ 2624 // fnstsw_ax(); 2625 emit_opcode( cbuf, 0xDF); 2626 emit_opcode( cbuf, 0xE0); 2627 // sahf 2628 emit_opcode( cbuf, 0x9E); 2629 // movl(dst, nan_result); 2630 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2631 emit_d32( cbuf, -1 ); 2632 // jcc(Assembler::parity, exit); 2633 emit_opcode( cbuf, 0x7A ); 2634 emit_d8 ( cbuf, 0x13 ); 2635 // movl(dst, less_result); 2636 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2637 emit_d32( cbuf, -1 ); 2638 // jcc(Assembler::below, exit); 2639 emit_opcode( cbuf, 0x72 ); 2640 emit_d8 ( cbuf, 0x0C ); 2641 // movl(dst, equal_result); 2642 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2643 emit_d32( cbuf, 0 ); 2644 // jcc(Assembler::equal, exit); 2645 emit_opcode( cbuf, 0x74 ); 2646 emit_d8 ( cbuf, 0x05 ); 2647 // movl(dst, greater_result); 2648 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2649 emit_d32( cbuf, 1 ); 2650 %} 2651 2652 2653 // Compare the longs and set flags 2654 // BROKEN! Do Not use as-is 2655 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2656 // CMP $src1.hi,$src2.hi 2657 emit_opcode( cbuf, 0x3B ); 2658 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2659 // JNE,s done 2660 emit_opcode(cbuf,0x75); 2661 emit_d8(cbuf, 2 ); 2662 // CMP $src1.lo,$src2.lo 2663 emit_opcode( cbuf, 0x3B ); 2664 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2665 // done: 2666 %} 2667 2668 enc_class convert_int_long( regL dst, rRegI src ) %{ 2669 // mov $dst.lo,$src 2670 int dst_encoding = $dst$$reg; 2671 int src_encoding = $src$$reg; 2672 encode_Copy( cbuf, dst_encoding , src_encoding ); 2673 // mov $dst.hi,$src 2674 encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2675 // sar $dst.hi,31 2676 emit_opcode( cbuf, 0xC1 ); 2677 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2678 emit_d8(cbuf, 0x1F ); 2679 %} 2680 2681 enc_class convert_long_double( eRegL src ) %{ 2682 // push $src.hi 2683 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2684 // push $src.lo 2685 emit_opcode(cbuf, 0x50+$src$$reg ); 2686 // fild 64-bits at [SP] 2687 emit_opcode(cbuf,0xdf); 2688 emit_d8(cbuf, 0x6C); 2689 emit_d8(cbuf, 0x24); 2690 emit_d8(cbuf, 0x00); 2691 // pop stack 2692 emit_opcode(cbuf, 0x83); // add SP, #8 2693 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2694 emit_d8(cbuf, 0x8); 2695 %} 2696 2697 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2698 // IMUL EDX:EAX,$src1 2699 emit_opcode( cbuf, 0xF7 ); 2700 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2701 // SAR EDX,$cnt-32 2702 int shift_count = ((int)$cnt$$constant) - 32; 2703 if (shift_count > 0) { 2704 emit_opcode(cbuf, 0xC1); 2705 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2706 emit_d8(cbuf, shift_count); 2707 } 2708 %} 2709 2710 // this version doesn't have add sp, 8 2711 enc_class convert_long_double2( eRegL src ) %{ 2712 // push $src.hi 2713 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2714 // push $src.lo 2715 emit_opcode(cbuf, 0x50+$src$$reg ); 2716 // fild 64-bits at [SP] 2717 emit_opcode(cbuf,0xdf); 2718 emit_d8(cbuf, 0x6C); 2719 emit_d8(cbuf, 0x24); 2720 emit_d8(cbuf, 0x00); 2721 %} 2722 2723 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2724 // Basic idea: long = (long)int * (long)int 2725 // IMUL EDX:EAX, src 2726 emit_opcode( cbuf, 0xF7 ); 2727 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2728 %} 2729 2730 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2731 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2732 // MUL EDX:EAX, src 2733 emit_opcode( cbuf, 0xF7 ); 2734 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2735 %} 2736 2737 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2738 // Basic idea: lo(result) = lo(x_lo * y_lo) 2739 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2740 // MOV $tmp,$src.lo 2741 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2742 // IMUL $tmp,EDX 2743 emit_opcode( cbuf, 0x0F ); 2744 emit_opcode( cbuf, 0xAF ); 2745 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2746 // MOV EDX,$src.hi 2747 encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2748 // IMUL EDX,EAX 2749 emit_opcode( cbuf, 0x0F ); 2750 emit_opcode( cbuf, 0xAF ); 2751 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2752 // ADD $tmp,EDX 2753 emit_opcode( cbuf, 0x03 ); 2754 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2755 // MUL EDX:EAX,$src.lo 2756 emit_opcode( cbuf, 0xF7 ); 2757 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2758 // ADD EDX,ESI 2759 emit_opcode( cbuf, 0x03 ); 2760 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2761 %} 2762 2763 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2764 // Basic idea: lo(result) = lo(src * y_lo) 2765 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2766 // IMUL $tmp,EDX,$src 2767 emit_opcode( cbuf, 0x6B ); 2768 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2769 emit_d8( cbuf, (int)$src$$constant ); 2770 // MOV EDX,$src 2771 emit_opcode(cbuf, 0xB8 + EDX_enc); 2772 emit_d32( cbuf, (int)$src$$constant ); 2773 // MUL EDX:EAX,EDX 2774 emit_opcode( cbuf, 0xF7 ); 2775 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2776 // ADD EDX,ESI 2777 emit_opcode( cbuf, 0x03 ); 2778 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2779 %} 2780 2781 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2782 // PUSH src1.hi 2783 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2784 // PUSH src1.lo 2785 emit_opcode(cbuf, 0x50+$src1$$reg ); 2786 // PUSH src2.hi 2787 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2788 // PUSH src2.lo 2789 emit_opcode(cbuf, 0x50+$src2$$reg ); 2790 // CALL directly to the runtime 2791 MacroAssembler _masm(&cbuf); 2792 cbuf.set_insts_mark(); 2793 emit_opcode(cbuf,0xE8); // Call into runtime 2794 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2795 __ post_call_nop(); 2796 // Restore stack 2797 emit_opcode(cbuf, 0x83); // add SP, #framesize 2798 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2799 emit_d8(cbuf, 4*4); 2800 %} 2801 2802 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2803 // PUSH src1.hi 2804 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2805 // PUSH src1.lo 2806 emit_opcode(cbuf, 0x50+$src1$$reg ); 2807 // PUSH src2.hi 2808 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2809 // PUSH src2.lo 2810 emit_opcode(cbuf, 0x50+$src2$$reg ); 2811 // CALL directly to the runtime 2812 MacroAssembler _masm(&cbuf); 2813 cbuf.set_insts_mark(); 2814 emit_opcode(cbuf,0xE8); // Call into runtime 2815 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2816 __ post_call_nop(); 2817 // Restore stack 2818 emit_opcode(cbuf, 0x83); // add SP, #framesize 2819 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2820 emit_d8(cbuf, 4*4); 2821 %} 2822 2823 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2824 // MOV $tmp,$src.lo 2825 emit_opcode(cbuf, 0x8B); 2826 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2827 // OR $tmp,$src.hi 2828 emit_opcode(cbuf, 0x0B); 2829 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2830 %} 2831 2832 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2833 // CMP $src1.lo,$src2.lo 2834 emit_opcode( cbuf, 0x3B ); 2835 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2836 // JNE,s skip 2837 emit_cc(cbuf, 0x70, 0x5); 2838 emit_d8(cbuf,2); 2839 // CMP $src1.hi,$src2.hi 2840 emit_opcode( cbuf, 0x3B ); 2841 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2842 %} 2843 2844 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2845 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2846 emit_opcode( cbuf, 0x3B ); 2847 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2848 // MOV $tmp,$src1.hi 2849 emit_opcode( cbuf, 0x8B ); 2850 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2851 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2852 emit_opcode( cbuf, 0x1B ); 2853 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2854 %} 2855 2856 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2857 // XOR $tmp,$tmp 2858 emit_opcode(cbuf,0x33); // XOR 2859 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2860 // CMP $tmp,$src.lo 2861 emit_opcode( cbuf, 0x3B ); 2862 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2863 // SBB $tmp,$src.hi 2864 emit_opcode( cbuf, 0x1B ); 2865 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2866 %} 2867 2868 // Sniff, sniff... smells like Gnu Superoptimizer 2869 enc_class neg_long( eRegL dst ) %{ 2870 emit_opcode(cbuf,0xF7); // NEG hi 2871 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2872 emit_opcode(cbuf,0xF7); // NEG lo 2873 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2874 emit_opcode(cbuf,0x83); // SBB hi,0 2875 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2876 emit_d8 (cbuf,0 ); 2877 %} 2878 2879 enc_class enc_pop_rdx() %{ 2880 emit_opcode(cbuf,0x5A); 2881 %} 2882 2883 enc_class enc_rethrow() %{ 2884 MacroAssembler _masm(&cbuf); 2885 cbuf.set_insts_mark(); 2886 emit_opcode(cbuf, 0xE9); // jmp entry 2887 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2888 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2889 __ post_call_nop(); 2890 %} 2891 2892 2893 // Convert a double to an int. Java semantics require we do complex 2894 // manglelations in the corner cases. So we set the rounding mode to 2895 // 'zero', store the darned double down as an int, and reset the 2896 // rounding mode to 'nearest'. The hardware throws an exception which 2897 // patches up the correct value directly to the stack. 2898 enc_class DPR2I_encoding( regDPR src ) %{ 2899 // Flip to round-to-zero mode. We attempted to allow invalid-op 2900 // exceptions here, so that a NAN or other corner-case value will 2901 // thrown an exception (but normal values get converted at full speed). 2902 // However, I2C adapters and other float-stack manglers leave pending 2903 // invalid-op exceptions hanging. We would have to clear them before 2904 // enabling them and that is more expensive than just testing for the 2905 // invalid value Intel stores down in the corner cases. 2906 emit_opcode(cbuf,0xD9); // FLDCW trunc 2907 emit_opcode(cbuf,0x2D); 2908 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2909 // Allocate a word 2910 emit_opcode(cbuf,0x83); // SUB ESP,4 2911 emit_opcode(cbuf,0xEC); 2912 emit_d8(cbuf,0x04); 2913 // Encoding assumes a double has been pushed into FPR0. 2914 // Store down the double as an int, popping the FPU stack 2915 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2916 emit_opcode(cbuf,0x1C); 2917 emit_d8(cbuf,0x24); 2918 // Restore the rounding mode; mask the exception 2919 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2920 emit_opcode(cbuf,0x2D); 2921 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2922 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2923 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2924 2925 // Load the converted int; adjust CPU stack 2926 emit_opcode(cbuf,0x58); // POP EAX 2927 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2928 emit_d32 (cbuf,0x80000000); // 0x80000000 2929 emit_opcode(cbuf,0x75); // JNE around_slow_call 2930 emit_d8 (cbuf,0x07); // Size of slow_call 2931 // Push src onto stack slow-path 2932 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2933 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2934 // CALL directly to the runtime 2935 MacroAssembler _masm(&cbuf); 2936 cbuf.set_insts_mark(); 2937 emit_opcode(cbuf,0xE8); // Call into runtime 2938 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2939 __ post_call_nop(); 2940 // Carry on here... 2941 %} 2942 2943 enc_class DPR2L_encoding( regDPR src ) %{ 2944 emit_opcode(cbuf,0xD9); // FLDCW trunc 2945 emit_opcode(cbuf,0x2D); 2946 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2947 // Allocate a word 2948 emit_opcode(cbuf,0x83); // SUB ESP,8 2949 emit_opcode(cbuf,0xEC); 2950 emit_d8(cbuf,0x08); 2951 // Encoding assumes a double has been pushed into FPR0. 2952 // Store down the double as a long, popping the FPU stack 2953 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2954 emit_opcode(cbuf,0x3C); 2955 emit_d8(cbuf,0x24); 2956 // Restore the rounding mode; mask the exception 2957 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2958 emit_opcode(cbuf,0x2D); 2959 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2960 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2961 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2962 2963 // Load the converted int; adjust CPU stack 2964 emit_opcode(cbuf,0x58); // POP EAX 2965 emit_opcode(cbuf,0x5A); // POP EDX 2966 emit_opcode(cbuf,0x81); // CMP EDX,imm 2967 emit_d8 (cbuf,0xFA); // rdx 2968 emit_d32 (cbuf,0x80000000); // 0x80000000 2969 emit_opcode(cbuf,0x75); // JNE around_slow_call 2970 emit_d8 (cbuf,0x07+4); // Size of slow_call 2971 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2972 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2973 emit_opcode(cbuf,0x75); // JNE around_slow_call 2974 emit_d8 (cbuf,0x07); // Size of slow_call 2975 // Push src onto stack slow-path 2976 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2977 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2978 // CALL directly to the runtime 2979 MacroAssembler _masm(&cbuf); 2980 cbuf.set_insts_mark(); 2981 emit_opcode(cbuf,0xE8); // Call into runtime 2982 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2983 __ post_call_nop(); 2984 // Carry on here... 2985 %} 2986 2987 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2988 // Operand was loaded from memory into fp ST (stack top) 2989 // FMUL ST,$src /* D8 C8+i */ 2990 emit_opcode(cbuf, 0xD8); 2991 emit_opcode(cbuf, 0xC8 + $src1$$reg); 2992 %} 2993 2994 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2995 // FADDP ST,src2 /* D8 C0+i */ 2996 emit_opcode(cbuf, 0xD8); 2997 emit_opcode(cbuf, 0xC0 + $src2$$reg); 2998 //could use FADDP src2,fpST /* DE C0+i */ 2999 %} 3000 3001 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3002 // FADDP src2,ST /* DE C0+i */ 3003 emit_opcode(cbuf, 0xDE); 3004 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3005 %} 3006 3007 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3008 // Operand has been loaded into fp ST (stack top) 3009 // FSUB ST,$src1 3010 emit_opcode(cbuf, 0xD8); 3011 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3012 3013 // FDIV 3014 emit_opcode(cbuf, 0xD8); 3015 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3016 %} 3017 3018 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3019 // Operand was loaded from memory into fp ST (stack top) 3020 // FADD ST,$src /* D8 C0+i */ 3021 emit_opcode(cbuf, 0xD8); 3022 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3023 3024 // FMUL ST,src2 /* D8 C*+i */ 3025 emit_opcode(cbuf, 0xD8); 3026 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3027 %} 3028 3029 3030 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3031 // Operand was loaded from memory into fp ST (stack top) 3032 // FADD ST,$src /* D8 C0+i */ 3033 emit_opcode(cbuf, 0xD8); 3034 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3035 3036 // FMULP src2,ST /* DE C8+i */ 3037 emit_opcode(cbuf, 0xDE); 3038 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3039 %} 3040 3041 // Atomically load the volatile long 3042 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3043 emit_opcode(cbuf,0xDF); 3044 int rm_byte_opcode = 0x05; 3045 int base = $mem$$base; 3046 int index = $mem$$index; 3047 int scale = $mem$$scale; 3048 int displace = $mem$$disp; 3049 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3050 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3051 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3052 %} 3053 3054 // Volatile Store Long. Must be atomic, so move it into 3055 // the FP TOS and then do a 64-bit FIST. Has to probe the 3056 // target address before the store (for null-ptr checks) 3057 // so the memory operand is used twice in the encoding. 3058 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3059 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3060 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3061 emit_opcode(cbuf,0xDF); 3062 int rm_byte_opcode = 0x07; 3063 int base = $mem$$base; 3064 int index = $mem$$index; 3065 int scale = $mem$$scale; 3066 int displace = $mem$$disp; 3067 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3068 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3069 %} 3070 3071 %} 3072 3073 3074 //----------FRAME-------------------------------------------------------------- 3075 // Definition of frame structure and management information. 3076 // 3077 // S T A C K L A Y O U T Allocators stack-slot number 3078 // | (to get allocators register number 3079 // G Owned by | | v add OptoReg::stack0()) 3080 // r CALLER | | 3081 // o | +--------+ pad to even-align allocators stack-slot 3082 // w V | pad0 | numbers; owned by CALLER 3083 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3084 // h ^ | in | 5 3085 // | | args | 4 Holes in incoming args owned by SELF 3086 // | | | | 3 3087 // | | +--------+ 3088 // V | | old out| Empty on Intel, window on Sparc 3089 // | old |preserve| Must be even aligned. 3090 // | SP-+--------+----> Matcher::_old_SP, even aligned 3091 // | | in | 3 area for Intel ret address 3092 // Owned by |preserve| Empty on Sparc. 3093 // SELF +--------+ 3094 // | | pad2 | 2 pad to align old SP 3095 // | +--------+ 1 3096 // | | locks | 0 3097 // | +--------+----> OptoReg::stack0(), even aligned 3098 // | | pad1 | 11 pad to align new SP 3099 // | +--------+ 3100 // | | | 10 3101 // | | spills | 9 spills 3102 // V | | 8 (pad0 slot for callee) 3103 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3104 // ^ | out | 7 3105 // | | args | 6 Holes in outgoing args owned by CALLEE 3106 // Owned by +--------+ 3107 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3108 // | new |preserve| Must be even-aligned. 3109 // | SP-+--------+----> Matcher::_new_SP, even aligned 3110 // | | | 3111 // 3112 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3113 // known from SELF's arguments and the Java calling convention. 3114 // Region 6-7 is determined per call site. 3115 // Note 2: If the calling convention leaves holes in the incoming argument 3116 // area, those holes are owned by SELF. Holes in the outgoing area 3117 // are owned by the CALLEE. Holes should not be necessary in the 3118 // incoming area, as the Java calling convention is completely under 3119 // the control of the AD file. Doubles can be sorted and packed to 3120 // avoid holes. Holes in the outgoing arguments may be necessary for 3121 // varargs C calling conventions. 3122 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3123 // even aligned with pad0 as needed. 3124 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3125 // region 6-11 is even aligned; it may be padded out more so that 3126 // the region from SP to FP meets the minimum stack alignment. 3127 3128 frame %{ 3129 // These three registers define part of the calling convention 3130 // between compiled code and the interpreter. 3131 inline_cache_reg(EAX); // Inline Cache Register 3132 3133 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3134 cisc_spilling_operand_name(indOffset32); 3135 3136 // Number of stack slots consumed by locking an object 3137 sync_stack_slots(1); 3138 3139 // Compiled code's Frame Pointer 3140 frame_pointer(ESP); 3141 // Interpreter stores its frame pointer in a register which is 3142 // stored to the stack by I2CAdaptors. 3143 // I2CAdaptors convert from interpreted java to compiled java. 3144 interpreter_frame_pointer(EBP); 3145 3146 // Stack alignment requirement 3147 // Alignment size in bytes (128-bit -> 16 bytes) 3148 stack_alignment(StackAlignmentInBytes); 3149 3150 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3151 // for calls to C. Supports the var-args backing area for register parms. 3152 varargs_C_out_slots_killed(0); 3153 3154 // The after-PROLOG location of the return address. Location of 3155 // return address specifies a type (REG or STACK) and a number 3156 // representing the register number (i.e. - use a register name) or 3157 // stack slot. 3158 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3159 // Otherwise, it is above the locks and verification slot and alignment word 3160 return_addr(STACK - 1 + 3161 align_up((Compile::current()->in_preserve_stack_slots() + 3162 Compile::current()->fixed_slots()), 3163 stack_alignment_in_slots())); 3164 3165 // Location of C & interpreter return values 3166 c_return_value %{ 3167 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3168 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3169 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3170 3171 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3172 // that C functions return float and double results in XMM0. 3173 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3174 return OptoRegPair(XMM0b_num,XMM0_num); 3175 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3176 return OptoRegPair(OptoReg::Bad,XMM0_num); 3177 3178 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3179 %} 3180 3181 // Location of return values 3182 return_value %{ 3183 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3184 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3185 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3186 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3187 return OptoRegPair(XMM0b_num,XMM0_num); 3188 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3189 return OptoRegPair(OptoReg::Bad,XMM0_num); 3190 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3191 %} 3192 3193 %} 3194 3195 //----------ATTRIBUTES--------------------------------------------------------- 3196 //----------Operand Attributes------------------------------------------------- 3197 op_attrib op_cost(0); // Required cost attribute 3198 3199 //----------Instruction Attributes--------------------------------------------- 3200 ins_attrib ins_cost(100); // Required cost attribute 3201 ins_attrib ins_size(8); // Required size attribute (in bits) 3202 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3203 // non-matching short branch variant of some 3204 // long branch? 3205 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3206 // specifies the alignment that some part of the instruction (not 3207 // necessarily the start) requires. If > 1, a compute_padding() 3208 // function must be provided for the instruction 3209 3210 //----------OPERANDS----------------------------------------------------------- 3211 // Operand definitions must precede instruction definitions for correct parsing 3212 // in the ADLC because operands constitute user defined types which are used in 3213 // instruction definitions. 3214 3215 //----------Simple Operands---------------------------------------------------- 3216 // Immediate Operands 3217 // Integer Immediate 3218 operand immI() %{ 3219 match(ConI); 3220 3221 op_cost(10); 3222 format %{ %} 3223 interface(CONST_INTER); 3224 %} 3225 3226 // Constant for test vs zero 3227 operand immI_0() %{ 3228 predicate(n->get_int() == 0); 3229 match(ConI); 3230 3231 op_cost(0); 3232 format %{ %} 3233 interface(CONST_INTER); 3234 %} 3235 3236 // Constant for increment 3237 operand immI_1() %{ 3238 predicate(n->get_int() == 1); 3239 match(ConI); 3240 3241 op_cost(0); 3242 format %{ %} 3243 interface(CONST_INTER); 3244 %} 3245 3246 // Constant for decrement 3247 operand immI_M1() %{ 3248 predicate(n->get_int() == -1); 3249 match(ConI); 3250 3251 op_cost(0); 3252 format %{ %} 3253 interface(CONST_INTER); 3254 %} 3255 3256 // Valid scale values for addressing modes 3257 operand immI2() %{ 3258 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3259 match(ConI); 3260 3261 format %{ %} 3262 interface(CONST_INTER); 3263 %} 3264 3265 operand immI8() %{ 3266 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3267 match(ConI); 3268 3269 op_cost(5); 3270 format %{ %} 3271 interface(CONST_INTER); 3272 %} 3273 3274 operand immU8() %{ 3275 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3276 match(ConI); 3277 3278 op_cost(5); 3279 format %{ %} 3280 interface(CONST_INTER); 3281 %} 3282 3283 operand immI16() %{ 3284 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3285 match(ConI); 3286 3287 op_cost(10); 3288 format %{ %} 3289 interface(CONST_INTER); 3290 %} 3291 3292 // Int Immediate non-negative 3293 operand immU31() 3294 %{ 3295 predicate(n->get_int() >= 0); 3296 match(ConI); 3297 3298 op_cost(0); 3299 format %{ %} 3300 interface(CONST_INTER); 3301 %} 3302 3303 // Constant for long shifts 3304 operand immI_32() %{ 3305 predicate( n->get_int() == 32 ); 3306 match(ConI); 3307 3308 op_cost(0); 3309 format %{ %} 3310 interface(CONST_INTER); 3311 %} 3312 3313 operand immI_1_31() %{ 3314 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3315 match(ConI); 3316 3317 op_cost(0); 3318 format %{ %} 3319 interface(CONST_INTER); 3320 %} 3321 3322 operand immI_32_63() %{ 3323 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3324 match(ConI); 3325 op_cost(0); 3326 3327 format %{ %} 3328 interface(CONST_INTER); 3329 %} 3330 3331 operand immI_2() %{ 3332 predicate( n->get_int() == 2 ); 3333 match(ConI); 3334 3335 op_cost(0); 3336 format %{ %} 3337 interface(CONST_INTER); 3338 %} 3339 3340 operand immI_3() %{ 3341 predicate( n->get_int() == 3 ); 3342 match(ConI); 3343 3344 op_cost(0); 3345 format %{ %} 3346 interface(CONST_INTER); 3347 %} 3348 3349 operand immI_4() 3350 %{ 3351 predicate(n->get_int() == 4); 3352 match(ConI); 3353 3354 op_cost(0); 3355 format %{ %} 3356 interface(CONST_INTER); 3357 %} 3358 3359 operand immI_8() 3360 %{ 3361 predicate(n->get_int() == 8); 3362 match(ConI); 3363 3364 op_cost(0); 3365 format %{ %} 3366 interface(CONST_INTER); 3367 %} 3368 3369 // Pointer Immediate 3370 operand immP() %{ 3371 match(ConP); 3372 3373 op_cost(10); 3374 format %{ %} 3375 interface(CONST_INTER); 3376 %} 3377 3378 // NULL Pointer Immediate 3379 operand immP0() %{ 3380 predicate( n->get_ptr() == 0 ); 3381 match(ConP); 3382 op_cost(0); 3383 3384 format %{ %} 3385 interface(CONST_INTER); 3386 %} 3387 3388 // Long Immediate 3389 operand immL() %{ 3390 match(ConL); 3391 3392 op_cost(20); 3393 format %{ %} 3394 interface(CONST_INTER); 3395 %} 3396 3397 // Long Immediate zero 3398 operand immL0() %{ 3399 predicate( n->get_long() == 0L ); 3400 match(ConL); 3401 op_cost(0); 3402 3403 format %{ %} 3404 interface(CONST_INTER); 3405 %} 3406 3407 // Long Immediate zero 3408 operand immL_M1() %{ 3409 predicate( n->get_long() == -1L ); 3410 match(ConL); 3411 op_cost(0); 3412 3413 format %{ %} 3414 interface(CONST_INTER); 3415 %} 3416 3417 // Long immediate from 0 to 127. 3418 // Used for a shorter form of long mul by 10. 3419 operand immL_127() %{ 3420 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3421 match(ConL); 3422 op_cost(0); 3423 3424 format %{ %} 3425 interface(CONST_INTER); 3426 %} 3427 3428 // Long Immediate: low 32-bit mask 3429 operand immL_32bits() %{ 3430 predicate(n->get_long() == 0xFFFFFFFFL); 3431 match(ConL); 3432 op_cost(0); 3433 3434 format %{ %} 3435 interface(CONST_INTER); 3436 %} 3437 3438 // Long Immediate: low 32-bit mask 3439 operand immL32() %{ 3440 predicate(n->get_long() == (int)(n->get_long())); 3441 match(ConL); 3442 op_cost(20); 3443 3444 format %{ %} 3445 interface(CONST_INTER); 3446 %} 3447 3448 //Double Immediate zero 3449 operand immDPR0() %{ 3450 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3451 // bug that generates code such that NaNs compare equal to 0.0 3452 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3453 match(ConD); 3454 3455 op_cost(5); 3456 format %{ %} 3457 interface(CONST_INTER); 3458 %} 3459 3460 // Double Immediate one 3461 operand immDPR1() %{ 3462 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3463 match(ConD); 3464 3465 op_cost(5); 3466 format %{ %} 3467 interface(CONST_INTER); 3468 %} 3469 3470 // Double Immediate 3471 operand immDPR() %{ 3472 predicate(UseSSE<=1); 3473 match(ConD); 3474 3475 op_cost(5); 3476 format %{ %} 3477 interface(CONST_INTER); 3478 %} 3479 3480 operand immD() %{ 3481 predicate(UseSSE>=2); 3482 match(ConD); 3483 3484 op_cost(5); 3485 format %{ %} 3486 interface(CONST_INTER); 3487 %} 3488 3489 // Double Immediate zero 3490 operand immD0() %{ 3491 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3492 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3493 // compare equal to -0.0. 3494 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3495 match(ConD); 3496 3497 format %{ %} 3498 interface(CONST_INTER); 3499 %} 3500 3501 // Float Immediate zero 3502 operand immFPR0() %{ 3503 predicate(UseSSE == 0 && n->getf() == 0.0F); 3504 match(ConF); 3505 3506 op_cost(5); 3507 format %{ %} 3508 interface(CONST_INTER); 3509 %} 3510 3511 // Float Immediate one 3512 operand immFPR1() %{ 3513 predicate(UseSSE == 0 && n->getf() == 1.0F); 3514 match(ConF); 3515 3516 op_cost(5); 3517 format %{ %} 3518 interface(CONST_INTER); 3519 %} 3520 3521 // Float Immediate 3522 operand immFPR() %{ 3523 predicate( UseSSE == 0 ); 3524 match(ConF); 3525 3526 op_cost(5); 3527 format %{ %} 3528 interface(CONST_INTER); 3529 %} 3530 3531 // Float Immediate 3532 operand immF() %{ 3533 predicate(UseSSE >= 1); 3534 match(ConF); 3535 3536 op_cost(5); 3537 format %{ %} 3538 interface(CONST_INTER); 3539 %} 3540 3541 // Float Immediate zero. Zero and not -0.0 3542 operand immF0() %{ 3543 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3544 match(ConF); 3545 3546 op_cost(5); 3547 format %{ %} 3548 interface(CONST_INTER); 3549 %} 3550 3551 // Immediates for special shifts (sign extend) 3552 3553 // Constants for increment 3554 operand immI_16() %{ 3555 predicate( n->get_int() == 16 ); 3556 match(ConI); 3557 3558 format %{ %} 3559 interface(CONST_INTER); 3560 %} 3561 3562 operand immI_24() %{ 3563 predicate( n->get_int() == 24 ); 3564 match(ConI); 3565 3566 format %{ %} 3567 interface(CONST_INTER); 3568 %} 3569 3570 // Constant for byte-wide masking 3571 operand immI_255() %{ 3572 predicate( n->get_int() == 255 ); 3573 match(ConI); 3574 3575 format %{ %} 3576 interface(CONST_INTER); 3577 %} 3578 3579 // Constant for short-wide masking 3580 operand immI_65535() %{ 3581 predicate(n->get_int() == 65535); 3582 match(ConI); 3583 3584 format %{ %} 3585 interface(CONST_INTER); 3586 %} 3587 3588 operand kReg() 3589 %{ 3590 constraint(ALLOC_IN_RC(vectmask_reg)); 3591 match(RegVectMask); 3592 format %{%} 3593 interface(REG_INTER); 3594 %} 3595 3596 operand kReg_K1() 3597 %{ 3598 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3599 match(RegVectMask); 3600 format %{%} 3601 interface(REG_INTER); 3602 %} 3603 3604 operand kReg_K2() 3605 %{ 3606 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3607 match(RegVectMask); 3608 format %{%} 3609 interface(REG_INTER); 3610 %} 3611 3612 // Special Registers 3613 operand kReg_K3() 3614 %{ 3615 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3616 match(RegVectMask); 3617 format %{%} 3618 interface(REG_INTER); 3619 %} 3620 3621 operand kReg_K4() 3622 %{ 3623 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3624 match(RegVectMask); 3625 format %{%} 3626 interface(REG_INTER); 3627 %} 3628 3629 operand kReg_K5() 3630 %{ 3631 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3632 match(RegVectMask); 3633 format %{%} 3634 interface(REG_INTER); 3635 %} 3636 3637 operand kReg_K6() 3638 %{ 3639 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3640 match(RegVectMask); 3641 format %{%} 3642 interface(REG_INTER); 3643 %} 3644 3645 // Special Registers 3646 operand kReg_K7() 3647 %{ 3648 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3649 match(RegVectMask); 3650 format %{%} 3651 interface(REG_INTER); 3652 %} 3653 3654 // Register Operands 3655 // Integer Register 3656 operand rRegI() %{ 3657 constraint(ALLOC_IN_RC(int_reg)); 3658 match(RegI); 3659 match(xRegI); 3660 match(eAXRegI); 3661 match(eBXRegI); 3662 match(eCXRegI); 3663 match(eDXRegI); 3664 match(eDIRegI); 3665 match(eSIRegI); 3666 3667 format %{ %} 3668 interface(REG_INTER); 3669 %} 3670 3671 // Subset of Integer Register 3672 operand xRegI(rRegI reg) %{ 3673 constraint(ALLOC_IN_RC(int_x_reg)); 3674 match(reg); 3675 match(eAXRegI); 3676 match(eBXRegI); 3677 match(eCXRegI); 3678 match(eDXRegI); 3679 3680 format %{ %} 3681 interface(REG_INTER); 3682 %} 3683 3684 // Special Registers 3685 operand eAXRegI(xRegI reg) %{ 3686 constraint(ALLOC_IN_RC(eax_reg)); 3687 match(reg); 3688 match(rRegI); 3689 3690 format %{ "EAX" %} 3691 interface(REG_INTER); 3692 %} 3693 3694 // Special Registers 3695 operand eBXRegI(xRegI reg) %{ 3696 constraint(ALLOC_IN_RC(ebx_reg)); 3697 match(reg); 3698 match(rRegI); 3699 3700 format %{ "EBX" %} 3701 interface(REG_INTER); 3702 %} 3703 3704 operand eCXRegI(xRegI reg) %{ 3705 constraint(ALLOC_IN_RC(ecx_reg)); 3706 match(reg); 3707 match(rRegI); 3708 3709 format %{ "ECX" %} 3710 interface(REG_INTER); 3711 %} 3712 3713 operand eDXRegI(xRegI reg) %{ 3714 constraint(ALLOC_IN_RC(edx_reg)); 3715 match(reg); 3716 match(rRegI); 3717 3718 format %{ "EDX" %} 3719 interface(REG_INTER); 3720 %} 3721 3722 operand eDIRegI(xRegI reg) %{ 3723 constraint(ALLOC_IN_RC(edi_reg)); 3724 match(reg); 3725 match(rRegI); 3726 3727 format %{ "EDI" %} 3728 interface(REG_INTER); 3729 %} 3730 3731 operand naxRegI() %{ 3732 constraint(ALLOC_IN_RC(nax_reg)); 3733 match(RegI); 3734 match(eCXRegI); 3735 match(eDXRegI); 3736 match(eSIRegI); 3737 match(eDIRegI); 3738 3739 format %{ %} 3740 interface(REG_INTER); 3741 %} 3742 3743 operand nadxRegI() %{ 3744 constraint(ALLOC_IN_RC(nadx_reg)); 3745 match(RegI); 3746 match(eBXRegI); 3747 match(eCXRegI); 3748 match(eSIRegI); 3749 match(eDIRegI); 3750 3751 format %{ %} 3752 interface(REG_INTER); 3753 %} 3754 3755 operand ncxRegI() %{ 3756 constraint(ALLOC_IN_RC(ncx_reg)); 3757 match(RegI); 3758 match(eAXRegI); 3759 match(eDXRegI); 3760 match(eSIRegI); 3761 match(eDIRegI); 3762 3763 format %{ %} 3764 interface(REG_INTER); 3765 %} 3766 3767 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3768 // // 3769 operand eSIRegI(xRegI reg) %{ 3770 constraint(ALLOC_IN_RC(esi_reg)); 3771 match(reg); 3772 match(rRegI); 3773 3774 format %{ "ESI" %} 3775 interface(REG_INTER); 3776 %} 3777 3778 // Pointer Register 3779 operand anyRegP() %{ 3780 constraint(ALLOC_IN_RC(any_reg)); 3781 match(RegP); 3782 match(eAXRegP); 3783 match(eBXRegP); 3784 match(eCXRegP); 3785 match(eDIRegP); 3786 match(eRegP); 3787 3788 format %{ %} 3789 interface(REG_INTER); 3790 %} 3791 3792 operand eRegP() %{ 3793 constraint(ALLOC_IN_RC(int_reg)); 3794 match(RegP); 3795 match(eAXRegP); 3796 match(eBXRegP); 3797 match(eCXRegP); 3798 match(eDIRegP); 3799 3800 format %{ %} 3801 interface(REG_INTER); 3802 %} 3803 3804 operand rRegP() %{ 3805 constraint(ALLOC_IN_RC(int_reg)); 3806 match(RegP); 3807 match(eAXRegP); 3808 match(eBXRegP); 3809 match(eCXRegP); 3810 match(eDIRegP); 3811 3812 format %{ %} 3813 interface(REG_INTER); 3814 %} 3815 3816 // On windows95, EBP is not safe to use for implicit null tests. 3817 operand eRegP_no_EBP() %{ 3818 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3819 match(RegP); 3820 match(eAXRegP); 3821 match(eBXRegP); 3822 match(eCXRegP); 3823 match(eDIRegP); 3824 3825 op_cost(100); 3826 format %{ %} 3827 interface(REG_INTER); 3828 %} 3829 3830 operand naxRegP() %{ 3831 constraint(ALLOC_IN_RC(nax_reg)); 3832 match(RegP); 3833 match(eBXRegP); 3834 match(eDXRegP); 3835 match(eCXRegP); 3836 match(eSIRegP); 3837 match(eDIRegP); 3838 3839 format %{ %} 3840 interface(REG_INTER); 3841 %} 3842 3843 operand nabxRegP() %{ 3844 constraint(ALLOC_IN_RC(nabx_reg)); 3845 match(RegP); 3846 match(eCXRegP); 3847 match(eDXRegP); 3848 match(eSIRegP); 3849 match(eDIRegP); 3850 3851 format %{ %} 3852 interface(REG_INTER); 3853 %} 3854 3855 operand pRegP() %{ 3856 constraint(ALLOC_IN_RC(p_reg)); 3857 match(RegP); 3858 match(eBXRegP); 3859 match(eDXRegP); 3860 match(eSIRegP); 3861 match(eDIRegP); 3862 3863 format %{ %} 3864 interface(REG_INTER); 3865 %} 3866 3867 // Special Registers 3868 // Return a pointer value 3869 operand eAXRegP(eRegP reg) %{ 3870 constraint(ALLOC_IN_RC(eax_reg)); 3871 match(reg); 3872 format %{ "EAX" %} 3873 interface(REG_INTER); 3874 %} 3875 3876 // Used in AtomicAdd 3877 operand eBXRegP(eRegP reg) %{ 3878 constraint(ALLOC_IN_RC(ebx_reg)); 3879 match(reg); 3880 format %{ "EBX" %} 3881 interface(REG_INTER); 3882 %} 3883 3884 // Tail-call (interprocedural jump) to interpreter 3885 operand eCXRegP(eRegP reg) %{ 3886 constraint(ALLOC_IN_RC(ecx_reg)); 3887 match(reg); 3888 format %{ "ECX" %} 3889 interface(REG_INTER); 3890 %} 3891 3892 operand eDXRegP(eRegP reg) %{ 3893 constraint(ALLOC_IN_RC(edx_reg)); 3894 match(reg); 3895 format %{ "EDX" %} 3896 interface(REG_INTER); 3897 %} 3898 3899 operand eSIRegP(eRegP reg) %{ 3900 constraint(ALLOC_IN_RC(esi_reg)); 3901 match(reg); 3902 format %{ "ESI" %} 3903 interface(REG_INTER); 3904 %} 3905 3906 // Used in rep stosw 3907 operand eDIRegP(eRegP reg) %{ 3908 constraint(ALLOC_IN_RC(edi_reg)); 3909 match(reg); 3910 format %{ "EDI" %} 3911 interface(REG_INTER); 3912 %} 3913 3914 operand eRegL() %{ 3915 constraint(ALLOC_IN_RC(long_reg)); 3916 match(RegL); 3917 match(eADXRegL); 3918 3919 format %{ %} 3920 interface(REG_INTER); 3921 %} 3922 3923 operand eADXRegL( eRegL reg ) %{ 3924 constraint(ALLOC_IN_RC(eadx_reg)); 3925 match(reg); 3926 3927 format %{ "EDX:EAX" %} 3928 interface(REG_INTER); 3929 %} 3930 3931 operand eBCXRegL( eRegL reg ) %{ 3932 constraint(ALLOC_IN_RC(ebcx_reg)); 3933 match(reg); 3934 3935 format %{ "EBX:ECX" %} 3936 interface(REG_INTER); 3937 %} 3938 3939 operand eBDPRegL( eRegL reg ) %{ 3940 constraint(ALLOC_IN_RC(ebpd_reg)); 3941 match(reg); 3942 3943 format %{ "EBP:EDI" %} 3944 interface(REG_INTER); 3945 %} 3946 // Special case for integer high multiply 3947 operand eADXRegL_low_only() %{ 3948 constraint(ALLOC_IN_RC(eadx_reg)); 3949 match(RegL); 3950 3951 format %{ "EAX" %} 3952 interface(REG_INTER); 3953 %} 3954 3955 // Flags register, used as output of compare instructions 3956 operand rFlagsReg() %{ 3957 constraint(ALLOC_IN_RC(int_flags)); 3958 match(RegFlags); 3959 3960 format %{ "EFLAGS" %} 3961 interface(REG_INTER); 3962 %} 3963 3964 // Flags register, used as output of compare instructions 3965 operand eFlagsReg() %{ 3966 constraint(ALLOC_IN_RC(int_flags)); 3967 match(RegFlags); 3968 3969 format %{ "EFLAGS" %} 3970 interface(REG_INTER); 3971 %} 3972 3973 // Flags register, used as output of FLOATING POINT compare instructions 3974 operand eFlagsRegU() %{ 3975 constraint(ALLOC_IN_RC(int_flags)); 3976 match(RegFlags); 3977 3978 format %{ "EFLAGS_U" %} 3979 interface(REG_INTER); 3980 %} 3981 3982 operand eFlagsRegUCF() %{ 3983 constraint(ALLOC_IN_RC(int_flags)); 3984 match(RegFlags); 3985 predicate(false); 3986 3987 format %{ "EFLAGS_U_CF" %} 3988 interface(REG_INTER); 3989 %} 3990 3991 // Condition Code Register used by long compare 3992 operand flagsReg_long_LTGE() %{ 3993 constraint(ALLOC_IN_RC(int_flags)); 3994 match(RegFlags); 3995 format %{ "FLAGS_LTGE" %} 3996 interface(REG_INTER); 3997 %} 3998 operand flagsReg_long_EQNE() %{ 3999 constraint(ALLOC_IN_RC(int_flags)); 4000 match(RegFlags); 4001 format %{ "FLAGS_EQNE" %} 4002 interface(REG_INTER); 4003 %} 4004 operand flagsReg_long_LEGT() %{ 4005 constraint(ALLOC_IN_RC(int_flags)); 4006 match(RegFlags); 4007 format %{ "FLAGS_LEGT" %} 4008 interface(REG_INTER); 4009 %} 4010 4011 // Condition Code Register used by unsigned long compare 4012 operand flagsReg_ulong_LTGE() %{ 4013 constraint(ALLOC_IN_RC(int_flags)); 4014 match(RegFlags); 4015 format %{ "FLAGS_U_LTGE" %} 4016 interface(REG_INTER); 4017 %} 4018 operand flagsReg_ulong_EQNE() %{ 4019 constraint(ALLOC_IN_RC(int_flags)); 4020 match(RegFlags); 4021 format %{ "FLAGS_U_EQNE" %} 4022 interface(REG_INTER); 4023 %} 4024 operand flagsReg_ulong_LEGT() %{ 4025 constraint(ALLOC_IN_RC(int_flags)); 4026 match(RegFlags); 4027 format %{ "FLAGS_U_LEGT" %} 4028 interface(REG_INTER); 4029 %} 4030 4031 // Float register operands 4032 operand regDPR() %{ 4033 predicate( UseSSE < 2 ); 4034 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4035 match(RegD); 4036 match(regDPR1); 4037 match(regDPR2); 4038 format %{ %} 4039 interface(REG_INTER); 4040 %} 4041 4042 operand regDPR1(regDPR reg) %{ 4043 predicate( UseSSE < 2 ); 4044 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4045 match(reg); 4046 format %{ "FPR1" %} 4047 interface(REG_INTER); 4048 %} 4049 4050 operand regDPR2(regDPR reg) %{ 4051 predicate( UseSSE < 2 ); 4052 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4053 match(reg); 4054 format %{ "FPR2" %} 4055 interface(REG_INTER); 4056 %} 4057 4058 operand regnotDPR1(regDPR reg) %{ 4059 predicate( UseSSE < 2 ); 4060 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4061 match(reg); 4062 format %{ %} 4063 interface(REG_INTER); 4064 %} 4065 4066 // Float register operands 4067 operand regFPR() %{ 4068 predicate( UseSSE < 2 ); 4069 constraint(ALLOC_IN_RC(fp_flt_reg)); 4070 match(RegF); 4071 match(regFPR1); 4072 format %{ %} 4073 interface(REG_INTER); 4074 %} 4075 4076 // Float register operands 4077 operand regFPR1(regFPR reg) %{ 4078 predicate( UseSSE < 2 ); 4079 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4080 match(reg); 4081 format %{ "FPR1" %} 4082 interface(REG_INTER); 4083 %} 4084 4085 // XMM Float register operands 4086 operand regF() %{ 4087 predicate( UseSSE>=1 ); 4088 constraint(ALLOC_IN_RC(float_reg_legacy)); 4089 match(RegF); 4090 format %{ %} 4091 interface(REG_INTER); 4092 %} 4093 4094 operand legRegF() %{ 4095 predicate( UseSSE>=1 ); 4096 constraint(ALLOC_IN_RC(float_reg_legacy)); 4097 match(RegF); 4098 format %{ %} 4099 interface(REG_INTER); 4100 %} 4101 4102 // Float register operands 4103 operand vlRegF() %{ 4104 constraint(ALLOC_IN_RC(float_reg_vl)); 4105 match(RegF); 4106 4107 format %{ %} 4108 interface(REG_INTER); 4109 %} 4110 4111 // XMM Double register operands 4112 operand regD() %{ 4113 predicate( UseSSE>=2 ); 4114 constraint(ALLOC_IN_RC(double_reg_legacy)); 4115 match(RegD); 4116 format %{ %} 4117 interface(REG_INTER); 4118 %} 4119 4120 // Double register operands 4121 operand legRegD() %{ 4122 predicate( UseSSE>=2 ); 4123 constraint(ALLOC_IN_RC(double_reg_legacy)); 4124 match(RegD); 4125 format %{ %} 4126 interface(REG_INTER); 4127 %} 4128 4129 operand vlRegD() %{ 4130 constraint(ALLOC_IN_RC(double_reg_vl)); 4131 match(RegD); 4132 4133 format %{ %} 4134 interface(REG_INTER); 4135 %} 4136 4137 //----------Memory Operands---------------------------------------------------- 4138 // Direct Memory Operand 4139 operand direct(immP addr) %{ 4140 match(addr); 4141 4142 format %{ "[$addr]" %} 4143 interface(MEMORY_INTER) %{ 4144 base(0xFFFFFFFF); 4145 index(0x4); 4146 scale(0x0); 4147 disp($addr); 4148 %} 4149 %} 4150 4151 // Indirect Memory Operand 4152 operand indirect(eRegP reg) %{ 4153 constraint(ALLOC_IN_RC(int_reg)); 4154 match(reg); 4155 4156 format %{ "[$reg]" %} 4157 interface(MEMORY_INTER) %{ 4158 base($reg); 4159 index(0x4); 4160 scale(0x0); 4161 disp(0x0); 4162 %} 4163 %} 4164 4165 // Indirect Memory Plus Short Offset Operand 4166 operand indOffset8(eRegP reg, immI8 off) %{ 4167 match(AddP reg off); 4168 4169 format %{ "[$reg + $off]" %} 4170 interface(MEMORY_INTER) %{ 4171 base($reg); 4172 index(0x4); 4173 scale(0x0); 4174 disp($off); 4175 %} 4176 %} 4177 4178 // Indirect Memory Plus Long Offset Operand 4179 operand indOffset32(eRegP reg, immI off) %{ 4180 match(AddP reg off); 4181 4182 format %{ "[$reg + $off]" %} 4183 interface(MEMORY_INTER) %{ 4184 base($reg); 4185 index(0x4); 4186 scale(0x0); 4187 disp($off); 4188 %} 4189 %} 4190 4191 // Indirect Memory Plus Long Offset Operand 4192 operand indOffset32X(rRegI reg, immP off) %{ 4193 match(AddP off reg); 4194 4195 format %{ "[$reg + $off]" %} 4196 interface(MEMORY_INTER) %{ 4197 base($reg); 4198 index(0x4); 4199 scale(0x0); 4200 disp($off); 4201 %} 4202 %} 4203 4204 // Indirect Memory Plus Index Register Plus Offset Operand 4205 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4206 match(AddP (AddP reg ireg) off); 4207 4208 op_cost(10); 4209 format %{"[$reg + $off + $ireg]" %} 4210 interface(MEMORY_INTER) %{ 4211 base($reg); 4212 index($ireg); 4213 scale(0x0); 4214 disp($off); 4215 %} 4216 %} 4217 4218 // Indirect Memory Plus Index Register Plus Offset Operand 4219 operand indIndex(eRegP reg, rRegI ireg) %{ 4220 match(AddP reg ireg); 4221 4222 op_cost(10); 4223 format %{"[$reg + $ireg]" %} 4224 interface(MEMORY_INTER) %{ 4225 base($reg); 4226 index($ireg); 4227 scale(0x0); 4228 disp(0x0); 4229 %} 4230 %} 4231 4232 // // ------------------------------------------------------------------------- 4233 // // 486 architecture doesn't support "scale * index + offset" with out a base 4234 // // ------------------------------------------------------------------------- 4235 // // Scaled Memory Operands 4236 // // Indirect Memory Times Scale Plus Offset Operand 4237 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4238 // match(AddP off (LShiftI ireg scale)); 4239 // 4240 // op_cost(10); 4241 // format %{"[$off + $ireg << $scale]" %} 4242 // interface(MEMORY_INTER) %{ 4243 // base(0x4); 4244 // index($ireg); 4245 // scale($scale); 4246 // disp($off); 4247 // %} 4248 // %} 4249 4250 // Indirect Memory Times Scale Plus Index Register 4251 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4252 match(AddP reg (LShiftI ireg scale)); 4253 4254 op_cost(10); 4255 format %{"[$reg + $ireg << $scale]" %} 4256 interface(MEMORY_INTER) %{ 4257 base($reg); 4258 index($ireg); 4259 scale($scale); 4260 disp(0x0); 4261 %} 4262 %} 4263 4264 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4265 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4266 match(AddP (AddP reg (LShiftI ireg scale)) off); 4267 4268 op_cost(10); 4269 format %{"[$reg + $off + $ireg << $scale]" %} 4270 interface(MEMORY_INTER) %{ 4271 base($reg); 4272 index($ireg); 4273 scale($scale); 4274 disp($off); 4275 %} 4276 %} 4277 4278 //----------Load Long Memory Operands------------------------------------------ 4279 // The load-long idiom will use it's address expression again after loading 4280 // the first word of the long. If the load-long destination overlaps with 4281 // registers used in the addressing expression, the 2nd half will be loaded 4282 // from a clobbered address. Fix this by requiring that load-long use 4283 // address registers that do not overlap with the load-long target. 4284 4285 // load-long support 4286 operand load_long_RegP() %{ 4287 constraint(ALLOC_IN_RC(esi_reg)); 4288 match(RegP); 4289 match(eSIRegP); 4290 op_cost(100); 4291 format %{ %} 4292 interface(REG_INTER); 4293 %} 4294 4295 // Indirect Memory Operand Long 4296 operand load_long_indirect(load_long_RegP reg) %{ 4297 constraint(ALLOC_IN_RC(esi_reg)); 4298 match(reg); 4299 4300 format %{ "[$reg]" %} 4301 interface(MEMORY_INTER) %{ 4302 base($reg); 4303 index(0x4); 4304 scale(0x0); 4305 disp(0x0); 4306 %} 4307 %} 4308 4309 // Indirect Memory Plus Long Offset Operand 4310 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4311 match(AddP reg off); 4312 4313 format %{ "[$reg + $off]" %} 4314 interface(MEMORY_INTER) %{ 4315 base($reg); 4316 index(0x4); 4317 scale(0x0); 4318 disp($off); 4319 %} 4320 %} 4321 4322 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4323 4324 4325 //----------Special Memory Operands-------------------------------------------- 4326 // Stack Slot Operand - This operand is used for loading and storing temporary 4327 // values on the stack where a match requires a value to 4328 // flow through memory. 4329 operand stackSlotP(sRegP reg) %{ 4330 constraint(ALLOC_IN_RC(stack_slots)); 4331 // No match rule because this operand is only generated in matching 4332 format %{ "[$reg]" %} 4333 interface(MEMORY_INTER) %{ 4334 base(0x4); // ESP 4335 index(0x4); // No Index 4336 scale(0x0); // No Scale 4337 disp($reg); // Stack Offset 4338 %} 4339 %} 4340 4341 operand stackSlotI(sRegI reg) %{ 4342 constraint(ALLOC_IN_RC(stack_slots)); 4343 // No match rule because this operand is only generated in matching 4344 format %{ "[$reg]" %} 4345 interface(MEMORY_INTER) %{ 4346 base(0x4); // ESP 4347 index(0x4); // No Index 4348 scale(0x0); // No Scale 4349 disp($reg); // Stack Offset 4350 %} 4351 %} 4352 4353 operand stackSlotF(sRegF reg) %{ 4354 constraint(ALLOC_IN_RC(stack_slots)); 4355 // No match rule because this operand is only generated in matching 4356 format %{ "[$reg]" %} 4357 interface(MEMORY_INTER) %{ 4358 base(0x4); // ESP 4359 index(0x4); // No Index 4360 scale(0x0); // No Scale 4361 disp($reg); // Stack Offset 4362 %} 4363 %} 4364 4365 operand stackSlotD(sRegD reg) %{ 4366 constraint(ALLOC_IN_RC(stack_slots)); 4367 // No match rule because this operand is only generated in matching 4368 format %{ "[$reg]" %} 4369 interface(MEMORY_INTER) %{ 4370 base(0x4); // ESP 4371 index(0x4); // No Index 4372 scale(0x0); // No Scale 4373 disp($reg); // Stack Offset 4374 %} 4375 %} 4376 4377 operand stackSlotL(sRegL reg) %{ 4378 constraint(ALLOC_IN_RC(stack_slots)); 4379 // No match rule because this operand is only generated in matching 4380 format %{ "[$reg]" %} 4381 interface(MEMORY_INTER) %{ 4382 base(0x4); // ESP 4383 index(0x4); // No Index 4384 scale(0x0); // No Scale 4385 disp($reg); // Stack Offset 4386 %} 4387 %} 4388 4389 //----------Conditional Branch Operands---------------------------------------- 4390 // Comparison Op - This is the operation of the comparison, and is limited to 4391 // the following set of codes: 4392 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4393 // 4394 // Other attributes of the comparison, such as unsignedness, are specified 4395 // by the comparison instruction that sets a condition code flags register. 4396 // That result is represented by a flags operand whose subtype is appropriate 4397 // to the unsignedness (etc.) of the comparison. 4398 // 4399 // Later, the instruction which matches both the Comparison Op (a Bool) and 4400 // the flags (produced by the Cmp) specifies the coding of the comparison op 4401 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4402 4403 // Comparison Code 4404 operand cmpOp() %{ 4405 match(Bool); 4406 4407 format %{ "" %} 4408 interface(COND_INTER) %{ 4409 equal(0x4, "e"); 4410 not_equal(0x5, "ne"); 4411 less(0xC, "l"); 4412 greater_equal(0xD, "ge"); 4413 less_equal(0xE, "le"); 4414 greater(0xF, "g"); 4415 overflow(0x0, "o"); 4416 no_overflow(0x1, "no"); 4417 %} 4418 %} 4419 4420 // Comparison Code, unsigned compare. Used by FP also, with 4421 // C2 (unordered) turned into GT or LT already. The other bits 4422 // C0 and C3 are turned into Carry & Zero flags. 4423 operand cmpOpU() %{ 4424 match(Bool); 4425 4426 format %{ "" %} 4427 interface(COND_INTER) %{ 4428 equal(0x4, "e"); 4429 not_equal(0x5, "ne"); 4430 less(0x2, "b"); 4431 greater_equal(0x3, "nb"); 4432 less_equal(0x6, "be"); 4433 greater(0x7, "nbe"); 4434 overflow(0x0, "o"); 4435 no_overflow(0x1, "no"); 4436 %} 4437 %} 4438 4439 // Floating comparisons that don't require any fixup for the unordered case 4440 operand cmpOpUCF() %{ 4441 match(Bool); 4442 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4443 n->as_Bool()->_test._test == BoolTest::ge || 4444 n->as_Bool()->_test._test == BoolTest::le || 4445 n->as_Bool()->_test._test == BoolTest::gt); 4446 format %{ "" %} 4447 interface(COND_INTER) %{ 4448 equal(0x4, "e"); 4449 not_equal(0x5, "ne"); 4450 less(0x2, "b"); 4451 greater_equal(0x3, "nb"); 4452 less_equal(0x6, "be"); 4453 greater(0x7, "nbe"); 4454 overflow(0x0, "o"); 4455 no_overflow(0x1, "no"); 4456 %} 4457 %} 4458 4459 4460 // Floating comparisons that can be fixed up with extra conditional jumps 4461 operand cmpOpUCF2() %{ 4462 match(Bool); 4463 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4464 n->as_Bool()->_test._test == BoolTest::eq); 4465 format %{ "" %} 4466 interface(COND_INTER) %{ 4467 equal(0x4, "e"); 4468 not_equal(0x5, "ne"); 4469 less(0x2, "b"); 4470 greater_equal(0x3, "nb"); 4471 less_equal(0x6, "be"); 4472 greater(0x7, "nbe"); 4473 overflow(0x0, "o"); 4474 no_overflow(0x1, "no"); 4475 %} 4476 %} 4477 4478 // Comparison Code for FP conditional move 4479 operand cmpOp_fcmov() %{ 4480 match(Bool); 4481 4482 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4483 n->as_Bool()->_test._test != BoolTest::no_overflow); 4484 format %{ "" %} 4485 interface(COND_INTER) %{ 4486 equal (0x0C8); 4487 not_equal (0x1C8); 4488 less (0x0C0); 4489 greater_equal(0x1C0); 4490 less_equal (0x0D0); 4491 greater (0x1D0); 4492 overflow(0x0, "o"); // not really supported by the instruction 4493 no_overflow(0x1, "no"); // not really supported by the instruction 4494 %} 4495 %} 4496 4497 // Comparison Code used in long compares 4498 operand cmpOp_commute() %{ 4499 match(Bool); 4500 4501 format %{ "" %} 4502 interface(COND_INTER) %{ 4503 equal(0x4, "e"); 4504 not_equal(0x5, "ne"); 4505 less(0xF, "g"); 4506 greater_equal(0xE, "le"); 4507 less_equal(0xD, "ge"); 4508 greater(0xC, "l"); 4509 overflow(0x0, "o"); 4510 no_overflow(0x1, "no"); 4511 %} 4512 %} 4513 4514 // Comparison Code used in unsigned long compares 4515 operand cmpOpU_commute() %{ 4516 match(Bool); 4517 4518 format %{ "" %} 4519 interface(COND_INTER) %{ 4520 equal(0x4, "e"); 4521 not_equal(0x5, "ne"); 4522 less(0x7, "nbe"); 4523 greater_equal(0x6, "be"); 4524 less_equal(0x3, "nb"); 4525 greater(0x2, "b"); 4526 overflow(0x0, "o"); 4527 no_overflow(0x1, "no"); 4528 %} 4529 %} 4530 4531 //----------OPERAND CLASSES---------------------------------------------------- 4532 // Operand Classes are groups of operands that are used as to simplify 4533 // instruction definitions by not requiring the AD writer to specify separate 4534 // instructions for every form of operand when the instruction accepts 4535 // multiple operand types with the same basic encoding and format. The classic 4536 // case of this is memory operands. 4537 4538 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4539 indIndex, indIndexScale, indIndexScaleOffset); 4540 4541 // Long memory operations are encoded in 2 instructions and a +4 offset. 4542 // This means some kind of offset is always required and you cannot use 4543 // an oop as the offset (done when working on static globals). 4544 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4545 indIndex, indIndexScale, indIndexScaleOffset); 4546 4547 4548 //----------PIPELINE----------------------------------------------------------- 4549 // Rules which define the behavior of the target architectures pipeline. 4550 pipeline %{ 4551 4552 //----------ATTRIBUTES--------------------------------------------------------- 4553 attributes %{ 4554 variable_size_instructions; // Fixed size instructions 4555 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4556 instruction_unit_size = 1; // An instruction is 1 bytes long 4557 instruction_fetch_unit_size = 16; // The processor fetches one line 4558 instruction_fetch_units = 1; // of 16 bytes 4559 4560 // List of nop instructions 4561 nops( MachNop ); 4562 %} 4563 4564 //----------RESOURCES---------------------------------------------------------- 4565 // Resources are the functional units available to the machine 4566 4567 // Generic P2/P3 pipeline 4568 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4569 // 3 instructions decoded per cycle. 4570 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4571 // 2 ALU op, only ALU0 handles mul/div instructions. 4572 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4573 MS0, MS1, MEM = MS0 | MS1, 4574 BR, FPU, 4575 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4576 4577 //----------PIPELINE DESCRIPTION----------------------------------------------- 4578 // Pipeline Description specifies the stages in the machine's pipeline 4579 4580 // Generic P2/P3 pipeline 4581 pipe_desc(S0, S1, S2, S3, S4, S5); 4582 4583 //----------PIPELINE CLASSES--------------------------------------------------- 4584 // Pipeline Classes describe the stages in which input and output are 4585 // referenced by the hardware pipeline. 4586 4587 // Naming convention: ialu or fpu 4588 // Then: _reg 4589 // Then: _reg if there is a 2nd register 4590 // Then: _long if it's a pair of instructions implementing a long 4591 // Then: _fat if it requires the big decoder 4592 // Or: _mem if it requires the big decoder and a memory unit. 4593 4594 // Integer ALU reg operation 4595 pipe_class ialu_reg(rRegI dst) %{ 4596 single_instruction; 4597 dst : S4(write); 4598 dst : S3(read); 4599 DECODE : S0; // any decoder 4600 ALU : S3; // any alu 4601 %} 4602 4603 // Long ALU reg operation 4604 pipe_class ialu_reg_long(eRegL dst) %{ 4605 instruction_count(2); 4606 dst : S4(write); 4607 dst : S3(read); 4608 DECODE : S0(2); // any 2 decoders 4609 ALU : S3(2); // both alus 4610 %} 4611 4612 // Integer ALU reg operation using big decoder 4613 pipe_class ialu_reg_fat(rRegI dst) %{ 4614 single_instruction; 4615 dst : S4(write); 4616 dst : S3(read); 4617 D0 : S0; // big decoder only 4618 ALU : S3; // any alu 4619 %} 4620 4621 // Long ALU reg operation using big decoder 4622 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4623 instruction_count(2); 4624 dst : S4(write); 4625 dst : S3(read); 4626 D0 : S0(2); // big decoder only; twice 4627 ALU : S3(2); // any 2 alus 4628 %} 4629 4630 // Integer ALU reg-reg operation 4631 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4632 single_instruction; 4633 dst : S4(write); 4634 src : S3(read); 4635 DECODE : S0; // any decoder 4636 ALU : S3; // any alu 4637 %} 4638 4639 // Long ALU reg-reg operation 4640 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4641 instruction_count(2); 4642 dst : S4(write); 4643 src : S3(read); 4644 DECODE : S0(2); // any 2 decoders 4645 ALU : S3(2); // both alus 4646 %} 4647 4648 // Integer ALU reg-reg operation 4649 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4650 single_instruction; 4651 dst : S4(write); 4652 src : S3(read); 4653 D0 : S0; // big decoder only 4654 ALU : S3; // any alu 4655 %} 4656 4657 // Long ALU reg-reg operation 4658 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4659 instruction_count(2); 4660 dst : S4(write); 4661 src : S3(read); 4662 D0 : S0(2); // big decoder only; twice 4663 ALU : S3(2); // both alus 4664 %} 4665 4666 // Integer ALU reg-mem operation 4667 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4668 single_instruction; 4669 dst : S5(write); 4670 mem : S3(read); 4671 D0 : S0; // big decoder only 4672 ALU : S4; // any alu 4673 MEM : S3; // any mem 4674 %} 4675 4676 // Long ALU reg-mem operation 4677 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4678 instruction_count(2); 4679 dst : S5(write); 4680 mem : S3(read); 4681 D0 : S0(2); // big decoder only; twice 4682 ALU : S4(2); // any 2 alus 4683 MEM : S3(2); // both mems 4684 %} 4685 4686 // Integer mem operation (prefetch) 4687 pipe_class ialu_mem(memory mem) 4688 %{ 4689 single_instruction; 4690 mem : S3(read); 4691 D0 : S0; // big decoder only 4692 MEM : S3; // any mem 4693 %} 4694 4695 // Integer Store to Memory 4696 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4697 single_instruction; 4698 mem : S3(read); 4699 src : S5(read); 4700 D0 : S0; // big decoder only 4701 ALU : S4; // any alu 4702 MEM : S3; 4703 %} 4704 4705 // Long Store to Memory 4706 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4707 instruction_count(2); 4708 mem : S3(read); 4709 src : S5(read); 4710 D0 : S0(2); // big decoder only; twice 4711 ALU : S4(2); // any 2 alus 4712 MEM : S3(2); // Both mems 4713 %} 4714 4715 // Integer Store to Memory 4716 pipe_class ialu_mem_imm(memory mem) %{ 4717 single_instruction; 4718 mem : S3(read); 4719 D0 : S0; // big decoder only 4720 ALU : S4; // any alu 4721 MEM : S3; 4722 %} 4723 4724 // Integer ALU0 reg-reg operation 4725 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4726 single_instruction; 4727 dst : S4(write); 4728 src : S3(read); 4729 D0 : S0; // Big decoder only 4730 ALU0 : S3; // only alu0 4731 %} 4732 4733 // Integer ALU0 reg-mem operation 4734 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4735 single_instruction; 4736 dst : S5(write); 4737 mem : S3(read); 4738 D0 : S0; // big decoder only 4739 ALU0 : S4; // ALU0 only 4740 MEM : S3; // any mem 4741 %} 4742 4743 // Integer ALU reg-reg operation 4744 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4745 single_instruction; 4746 cr : S4(write); 4747 src1 : S3(read); 4748 src2 : S3(read); 4749 DECODE : S0; // any decoder 4750 ALU : S3; // any alu 4751 %} 4752 4753 // Integer ALU reg-imm operation 4754 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4755 single_instruction; 4756 cr : S4(write); 4757 src1 : S3(read); 4758 DECODE : S0; // any decoder 4759 ALU : S3; // any alu 4760 %} 4761 4762 // Integer ALU reg-mem operation 4763 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4764 single_instruction; 4765 cr : S4(write); 4766 src1 : S3(read); 4767 src2 : S3(read); 4768 D0 : S0; // big decoder only 4769 ALU : S4; // any alu 4770 MEM : S3; 4771 %} 4772 4773 // Conditional move reg-reg 4774 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4775 instruction_count(4); 4776 y : S4(read); 4777 q : S3(read); 4778 p : S3(read); 4779 DECODE : S0(4); // any decoder 4780 %} 4781 4782 // Conditional move reg-reg 4783 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4784 single_instruction; 4785 dst : S4(write); 4786 src : S3(read); 4787 cr : S3(read); 4788 DECODE : S0; // any decoder 4789 %} 4790 4791 // Conditional move reg-mem 4792 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4793 single_instruction; 4794 dst : S4(write); 4795 src : S3(read); 4796 cr : S3(read); 4797 DECODE : S0; // any decoder 4798 MEM : S3; 4799 %} 4800 4801 // Conditional move reg-reg long 4802 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4803 single_instruction; 4804 dst : S4(write); 4805 src : S3(read); 4806 cr : S3(read); 4807 DECODE : S0(2); // any 2 decoders 4808 %} 4809 4810 // Conditional move double reg-reg 4811 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4812 single_instruction; 4813 dst : S4(write); 4814 src : S3(read); 4815 cr : S3(read); 4816 DECODE : S0; // any decoder 4817 %} 4818 4819 // Float reg-reg operation 4820 pipe_class fpu_reg(regDPR dst) %{ 4821 instruction_count(2); 4822 dst : S3(read); 4823 DECODE : S0(2); // any 2 decoders 4824 FPU : S3; 4825 %} 4826 4827 // Float reg-reg operation 4828 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4829 instruction_count(2); 4830 dst : S4(write); 4831 src : S3(read); 4832 DECODE : S0(2); // any 2 decoders 4833 FPU : S3; 4834 %} 4835 4836 // Float reg-reg operation 4837 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4838 instruction_count(3); 4839 dst : S4(write); 4840 src1 : S3(read); 4841 src2 : S3(read); 4842 DECODE : S0(3); // any 3 decoders 4843 FPU : S3(2); 4844 %} 4845 4846 // Float reg-reg operation 4847 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4848 instruction_count(4); 4849 dst : S4(write); 4850 src1 : S3(read); 4851 src2 : S3(read); 4852 src3 : S3(read); 4853 DECODE : S0(4); // any 3 decoders 4854 FPU : S3(2); 4855 %} 4856 4857 // Float reg-reg operation 4858 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4859 instruction_count(4); 4860 dst : S4(write); 4861 src1 : S3(read); 4862 src2 : S3(read); 4863 src3 : S3(read); 4864 DECODE : S1(3); // any 3 decoders 4865 D0 : S0; // Big decoder only 4866 FPU : S3(2); 4867 MEM : S3; 4868 %} 4869 4870 // Float reg-mem operation 4871 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4872 instruction_count(2); 4873 dst : S5(write); 4874 mem : S3(read); 4875 D0 : S0; // big decoder only 4876 DECODE : S1; // any decoder for FPU POP 4877 FPU : S4; 4878 MEM : S3; // any mem 4879 %} 4880 4881 // Float reg-mem operation 4882 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4883 instruction_count(3); 4884 dst : S5(write); 4885 src1 : S3(read); 4886 mem : S3(read); 4887 D0 : S0; // big decoder only 4888 DECODE : S1(2); // any decoder for FPU POP 4889 FPU : S4; 4890 MEM : S3; // any mem 4891 %} 4892 4893 // Float mem-reg operation 4894 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4895 instruction_count(2); 4896 src : S5(read); 4897 mem : S3(read); 4898 DECODE : S0; // any decoder for FPU PUSH 4899 D0 : S1; // big decoder only 4900 FPU : S4; 4901 MEM : S3; // any mem 4902 %} 4903 4904 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4905 instruction_count(3); 4906 src1 : S3(read); 4907 src2 : S3(read); 4908 mem : S3(read); 4909 DECODE : S0(2); // any decoder for FPU PUSH 4910 D0 : S1; // big decoder only 4911 FPU : S4; 4912 MEM : S3; // any mem 4913 %} 4914 4915 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4916 instruction_count(3); 4917 src1 : S3(read); 4918 src2 : S3(read); 4919 mem : S4(read); 4920 DECODE : S0; // any decoder for FPU PUSH 4921 D0 : S0(2); // big decoder only 4922 FPU : S4; 4923 MEM : S3(2); // any mem 4924 %} 4925 4926 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4927 instruction_count(2); 4928 src1 : S3(read); 4929 dst : S4(read); 4930 D0 : S0(2); // big decoder only 4931 MEM : S3(2); // any mem 4932 %} 4933 4934 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4935 instruction_count(3); 4936 src1 : S3(read); 4937 src2 : S3(read); 4938 dst : S4(read); 4939 D0 : S0(3); // big decoder only 4940 FPU : S4; 4941 MEM : S3(3); // any mem 4942 %} 4943 4944 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4945 instruction_count(3); 4946 src1 : S4(read); 4947 mem : S4(read); 4948 DECODE : S0; // any decoder for FPU PUSH 4949 D0 : S0(2); // big decoder only 4950 FPU : S4; 4951 MEM : S3(2); // any mem 4952 %} 4953 4954 // Float load constant 4955 pipe_class fpu_reg_con(regDPR dst) %{ 4956 instruction_count(2); 4957 dst : S5(write); 4958 D0 : S0; // big decoder only for the load 4959 DECODE : S1; // any decoder for FPU POP 4960 FPU : S4; 4961 MEM : S3; // any mem 4962 %} 4963 4964 // Float load constant 4965 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4966 instruction_count(3); 4967 dst : S5(write); 4968 src : S3(read); 4969 D0 : S0; // big decoder only for the load 4970 DECODE : S1(2); // any decoder for FPU POP 4971 FPU : S4; 4972 MEM : S3; // any mem 4973 %} 4974 4975 // UnConditional branch 4976 pipe_class pipe_jmp( label labl ) %{ 4977 single_instruction; 4978 BR : S3; 4979 %} 4980 4981 // Conditional branch 4982 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4983 single_instruction; 4984 cr : S1(read); 4985 BR : S3; 4986 %} 4987 4988 // Allocation idiom 4989 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4990 instruction_count(1); force_serialization; 4991 fixed_latency(6); 4992 heap_ptr : S3(read); 4993 DECODE : S0(3); 4994 D0 : S2; 4995 MEM : S3; 4996 ALU : S3(2); 4997 dst : S5(write); 4998 BR : S5; 4999 %} 5000 5001 // Generic big/slow expanded idiom 5002 pipe_class pipe_slow( ) %{ 5003 instruction_count(10); multiple_bundles; force_serialization; 5004 fixed_latency(100); 5005 D0 : S0(2); 5006 MEM : S3(2); 5007 %} 5008 5009 // The real do-nothing guy 5010 pipe_class empty( ) %{ 5011 instruction_count(0); 5012 %} 5013 5014 // Define the class for the Nop node 5015 define %{ 5016 MachNop = empty; 5017 %} 5018 5019 %} 5020 5021 //----------INSTRUCTIONS------------------------------------------------------- 5022 // 5023 // match -- States which machine-independent subtree may be replaced 5024 // by this instruction. 5025 // ins_cost -- The estimated cost of this instruction is used by instruction 5026 // selection to identify a minimum cost tree of machine 5027 // instructions that matches a tree of machine-independent 5028 // instructions. 5029 // format -- A string providing the disassembly for this instruction. 5030 // The value of an instruction's operand may be inserted 5031 // by referring to it with a '$' prefix. 5032 // opcode -- Three instruction opcodes may be provided. These are referred 5033 // to within an encode class as $primary, $secondary, and $tertiary 5034 // respectively. The primary opcode is commonly used to 5035 // indicate the type of machine instruction, while secondary 5036 // and tertiary are often used for prefix options or addressing 5037 // modes. 5038 // ins_encode -- A list of encode classes with parameters. The encode class 5039 // name must have been defined in an 'enc_class' specification 5040 // in the encode section of the architecture description. 5041 5042 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 5043 // Load Float 5044 instruct MoveF2LEG(legRegF dst, regF src) %{ 5045 match(Set dst src); 5046 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5047 ins_encode %{ 5048 ShouldNotReachHere(); 5049 %} 5050 ins_pipe( fpu_reg_reg ); 5051 %} 5052 5053 // Load Float 5054 instruct MoveLEG2F(regF dst, legRegF src) %{ 5055 match(Set dst src); 5056 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5057 ins_encode %{ 5058 ShouldNotReachHere(); 5059 %} 5060 ins_pipe( fpu_reg_reg ); 5061 %} 5062 5063 // Load Float 5064 instruct MoveF2VL(vlRegF dst, regF src) %{ 5065 match(Set dst src); 5066 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5067 ins_encode %{ 5068 ShouldNotReachHere(); 5069 %} 5070 ins_pipe( fpu_reg_reg ); 5071 %} 5072 5073 // Load Float 5074 instruct MoveVL2F(regF dst, vlRegF src) %{ 5075 match(Set dst src); 5076 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5077 ins_encode %{ 5078 ShouldNotReachHere(); 5079 %} 5080 ins_pipe( fpu_reg_reg ); 5081 %} 5082 5083 5084 5085 // Load Double 5086 instruct MoveD2LEG(legRegD dst, regD src) %{ 5087 match(Set dst src); 5088 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5089 ins_encode %{ 5090 ShouldNotReachHere(); 5091 %} 5092 ins_pipe( fpu_reg_reg ); 5093 %} 5094 5095 // Load Double 5096 instruct MoveLEG2D(regD dst, legRegD src) %{ 5097 match(Set dst src); 5098 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5099 ins_encode %{ 5100 ShouldNotReachHere(); 5101 %} 5102 ins_pipe( fpu_reg_reg ); 5103 %} 5104 5105 // Load Double 5106 instruct MoveD2VL(vlRegD dst, regD src) %{ 5107 match(Set dst src); 5108 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5109 ins_encode %{ 5110 ShouldNotReachHere(); 5111 %} 5112 ins_pipe( fpu_reg_reg ); 5113 %} 5114 5115 // Load Double 5116 instruct MoveVL2D(regD dst, vlRegD src) %{ 5117 match(Set dst src); 5118 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5119 ins_encode %{ 5120 ShouldNotReachHere(); 5121 %} 5122 ins_pipe( fpu_reg_reg ); 5123 %} 5124 5125 //----------BSWAP-Instruction-------------------------------------------------- 5126 instruct bytes_reverse_int(rRegI dst) %{ 5127 match(Set dst (ReverseBytesI dst)); 5128 5129 format %{ "BSWAP $dst" %} 5130 opcode(0x0F, 0xC8); 5131 ins_encode( OpcP, OpcSReg(dst) ); 5132 ins_pipe( ialu_reg ); 5133 %} 5134 5135 instruct bytes_reverse_long(eRegL dst) %{ 5136 match(Set dst (ReverseBytesL dst)); 5137 5138 format %{ "BSWAP $dst.lo\n\t" 5139 "BSWAP $dst.hi\n\t" 5140 "XCHG $dst.lo $dst.hi" %} 5141 5142 ins_cost(125); 5143 ins_encode( bswap_long_bytes(dst) ); 5144 ins_pipe( ialu_reg_reg); 5145 %} 5146 5147 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5148 match(Set dst (ReverseBytesUS dst)); 5149 effect(KILL cr); 5150 5151 format %{ "BSWAP $dst\n\t" 5152 "SHR $dst,16\n\t" %} 5153 ins_encode %{ 5154 __ bswapl($dst$$Register); 5155 __ shrl($dst$$Register, 16); 5156 %} 5157 ins_pipe( ialu_reg ); 5158 %} 5159 5160 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5161 match(Set dst (ReverseBytesS dst)); 5162 effect(KILL cr); 5163 5164 format %{ "BSWAP $dst\n\t" 5165 "SAR $dst,16\n\t" %} 5166 ins_encode %{ 5167 __ bswapl($dst$$Register); 5168 __ sarl($dst$$Register, 16); 5169 %} 5170 ins_pipe( ialu_reg ); 5171 %} 5172 5173 5174 //---------- Zeros Count Instructions ------------------------------------------ 5175 5176 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5177 predicate(UseCountLeadingZerosInstruction); 5178 match(Set dst (CountLeadingZerosI src)); 5179 effect(KILL cr); 5180 5181 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5182 ins_encode %{ 5183 __ lzcntl($dst$$Register, $src$$Register); 5184 %} 5185 ins_pipe(ialu_reg); 5186 %} 5187 5188 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5189 predicate(!UseCountLeadingZerosInstruction); 5190 match(Set dst (CountLeadingZerosI src)); 5191 effect(KILL cr); 5192 5193 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5194 "JNZ skip\n\t" 5195 "MOV $dst, -1\n" 5196 "skip:\n\t" 5197 "NEG $dst\n\t" 5198 "ADD $dst, 31" %} 5199 ins_encode %{ 5200 Register Rdst = $dst$$Register; 5201 Register Rsrc = $src$$Register; 5202 Label skip; 5203 __ bsrl(Rdst, Rsrc); 5204 __ jccb(Assembler::notZero, skip); 5205 __ movl(Rdst, -1); 5206 __ bind(skip); 5207 __ negl(Rdst); 5208 __ addl(Rdst, BitsPerInt - 1); 5209 %} 5210 ins_pipe(ialu_reg); 5211 %} 5212 5213 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5214 predicate(UseCountLeadingZerosInstruction); 5215 match(Set dst (CountLeadingZerosL src)); 5216 effect(TEMP dst, KILL cr); 5217 5218 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5219 "JNC done\n\t" 5220 "LZCNT $dst, $src.lo\n\t" 5221 "ADD $dst, 32\n" 5222 "done:" %} 5223 ins_encode %{ 5224 Register Rdst = $dst$$Register; 5225 Register Rsrc = $src$$Register; 5226 Label done; 5227 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5228 __ jccb(Assembler::carryClear, done); 5229 __ lzcntl(Rdst, Rsrc); 5230 __ addl(Rdst, BitsPerInt); 5231 __ bind(done); 5232 %} 5233 ins_pipe(ialu_reg); 5234 %} 5235 5236 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5237 predicate(!UseCountLeadingZerosInstruction); 5238 match(Set dst (CountLeadingZerosL src)); 5239 effect(TEMP dst, KILL cr); 5240 5241 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5242 "JZ msw_is_zero\n\t" 5243 "ADD $dst, 32\n\t" 5244 "JMP not_zero\n" 5245 "msw_is_zero:\n\t" 5246 "BSR $dst, $src.lo\n\t" 5247 "JNZ not_zero\n\t" 5248 "MOV $dst, -1\n" 5249 "not_zero:\n\t" 5250 "NEG $dst\n\t" 5251 "ADD $dst, 63\n" %} 5252 ins_encode %{ 5253 Register Rdst = $dst$$Register; 5254 Register Rsrc = $src$$Register; 5255 Label msw_is_zero; 5256 Label not_zero; 5257 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5258 __ jccb(Assembler::zero, msw_is_zero); 5259 __ addl(Rdst, BitsPerInt); 5260 __ jmpb(not_zero); 5261 __ bind(msw_is_zero); 5262 __ bsrl(Rdst, Rsrc); 5263 __ jccb(Assembler::notZero, not_zero); 5264 __ movl(Rdst, -1); 5265 __ bind(not_zero); 5266 __ negl(Rdst); 5267 __ addl(Rdst, BitsPerLong - 1); 5268 %} 5269 ins_pipe(ialu_reg); 5270 %} 5271 5272 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5273 predicate(UseCountTrailingZerosInstruction); 5274 match(Set dst (CountTrailingZerosI src)); 5275 effect(KILL cr); 5276 5277 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5278 ins_encode %{ 5279 __ tzcntl($dst$$Register, $src$$Register); 5280 %} 5281 ins_pipe(ialu_reg); 5282 %} 5283 5284 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5285 predicate(!UseCountTrailingZerosInstruction); 5286 match(Set dst (CountTrailingZerosI src)); 5287 effect(KILL cr); 5288 5289 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5290 "JNZ done\n\t" 5291 "MOV $dst, 32\n" 5292 "done:" %} 5293 ins_encode %{ 5294 Register Rdst = $dst$$Register; 5295 Label done; 5296 __ bsfl(Rdst, $src$$Register); 5297 __ jccb(Assembler::notZero, done); 5298 __ movl(Rdst, BitsPerInt); 5299 __ bind(done); 5300 %} 5301 ins_pipe(ialu_reg); 5302 %} 5303 5304 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5305 predicate(UseCountTrailingZerosInstruction); 5306 match(Set dst (CountTrailingZerosL src)); 5307 effect(TEMP dst, KILL cr); 5308 5309 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5310 "JNC done\n\t" 5311 "TZCNT $dst, $src.hi\n\t" 5312 "ADD $dst, 32\n" 5313 "done:" %} 5314 ins_encode %{ 5315 Register Rdst = $dst$$Register; 5316 Register Rsrc = $src$$Register; 5317 Label done; 5318 __ tzcntl(Rdst, Rsrc); 5319 __ jccb(Assembler::carryClear, done); 5320 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5321 __ addl(Rdst, BitsPerInt); 5322 __ bind(done); 5323 %} 5324 ins_pipe(ialu_reg); 5325 %} 5326 5327 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5328 predicate(!UseCountTrailingZerosInstruction); 5329 match(Set dst (CountTrailingZerosL src)); 5330 effect(TEMP dst, KILL cr); 5331 5332 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5333 "JNZ done\n\t" 5334 "BSF $dst, $src.hi\n\t" 5335 "JNZ msw_not_zero\n\t" 5336 "MOV $dst, 32\n" 5337 "msw_not_zero:\n\t" 5338 "ADD $dst, 32\n" 5339 "done:" %} 5340 ins_encode %{ 5341 Register Rdst = $dst$$Register; 5342 Register Rsrc = $src$$Register; 5343 Label msw_not_zero; 5344 Label done; 5345 __ bsfl(Rdst, Rsrc); 5346 __ jccb(Assembler::notZero, done); 5347 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5348 __ jccb(Assembler::notZero, msw_not_zero); 5349 __ movl(Rdst, BitsPerInt); 5350 __ bind(msw_not_zero); 5351 __ addl(Rdst, BitsPerInt); 5352 __ bind(done); 5353 %} 5354 ins_pipe(ialu_reg); 5355 %} 5356 5357 5358 //---------- Population Count Instructions ------------------------------------- 5359 5360 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5361 predicate(UsePopCountInstruction); 5362 match(Set dst (PopCountI src)); 5363 effect(KILL cr); 5364 5365 format %{ "POPCNT $dst, $src" %} 5366 ins_encode %{ 5367 __ popcntl($dst$$Register, $src$$Register); 5368 %} 5369 ins_pipe(ialu_reg); 5370 %} 5371 5372 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5373 predicate(UsePopCountInstruction); 5374 match(Set dst (PopCountI (LoadI mem))); 5375 effect(KILL cr); 5376 5377 format %{ "POPCNT $dst, $mem" %} 5378 ins_encode %{ 5379 __ popcntl($dst$$Register, $mem$$Address); 5380 %} 5381 ins_pipe(ialu_reg); 5382 %} 5383 5384 // Note: Long.bitCount(long) returns an int. 5385 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5386 predicate(UsePopCountInstruction); 5387 match(Set dst (PopCountL src)); 5388 effect(KILL cr, TEMP tmp, TEMP dst); 5389 5390 format %{ "POPCNT $dst, $src.lo\n\t" 5391 "POPCNT $tmp, $src.hi\n\t" 5392 "ADD $dst, $tmp" %} 5393 ins_encode %{ 5394 __ popcntl($dst$$Register, $src$$Register); 5395 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5396 __ addl($dst$$Register, $tmp$$Register); 5397 %} 5398 ins_pipe(ialu_reg); 5399 %} 5400 5401 // Note: Long.bitCount(long) returns an int. 5402 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5403 predicate(UsePopCountInstruction); 5404 match(Set dst (PopCountL (LoadL mem))); 5405 effect(KILL cr, TEMP tmp, TEMP dst); 5406 5407 format %{ "POPCNT $dst, $mem\n\t" 5408 "POPCNT $tmp, $mem+4\n\t" 5409 "ADD $dst, $tmp" %} 5410 ins_encode %{ 5411 //__ popcntl($dst$$Register, $mem$$Address$$first); 5412 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5413 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5414 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5415 __ addl($dst$$Register, $tmp$$Register); 5416 %} 5417 ins_pipe(ialu_reg); 5418 %} 5419 5420 5421 //----------Load/Store/Move Instructions--------------------------------------- 5422 //----------Load Instructions-------------------------------------------------- 5423 // Load Byte (8bit signed) 5424 instruct loadB(xRegI dst, memory mem) %{ 5425 match(Set dst (LoadB mem)); 5426 5427 ins_cost(125); 5428 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5429 5430 ins_encode %{ 5431 __ movsbl($dst$$Register, $mem$$Address); 5432 %} 5433 5434 ins_pipe(ialu_reg_mem); 5435 %} 5436 5437 // Load Byte (8bit signed) into Long Register 5438 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5439 match(Set dst (ConvI2L (LoadB mem))); 5440 effect(KILL cr); 5441 5442 ins_cost(375); 5443 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5444 "MOV $dst.hi,$dst.lo\n\t" 5445 "SAR $dst.hi,7" %} 5446 5447 ins_encode %{ 5448 __ movsbl($dst$$Register, $mem$$Address); 5449 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5450 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5451 %} 5452 5453 ins_pipe(ialu_reg_mem); 5454 %} 5455 5456 // Load Unsigned Byte (8bit UNsigned) 5457 instruct loadUB(xRegI dst, memory mem) %{ 5458 match(Set dst (LoadUB mem)); 5459 5460 ins_cost(125); 5461 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5462 5463 ins_encode %{ 5464 __ movzbl($dst$$Register, $mem$$Address); 5465 %} 5466 5467 ins_pipe(ialu_reg_mem); 5468 %} 5469 5470 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5471 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5472 match(Set dst (ConvI2L (LoadUB mem))); 5473 effect(KILL cr); 5474 5475 ins_cost(250); 5476 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5477 "XOR $dst.hi,$dst.hi" %} 5478 5479 ins_encode %{ 5480 Register Rdst = $dst$$Register; 5481 __ movzbl(Rdst, $mem$$Address); 5482 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5483 %} 5484 5485 ins_pipe(ialu_reg_mem); 5486 %} 5487 5488 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5489 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5490 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5491 effect(KILL cr); 5492 5493 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5494 "XOR $dst.hi,$dst.hi\n\t" 5495 "AND $dst.lo,right_n_bits($mask, 8)" %} 5496 ins_encode %{ 5497 Register Rdst = $dst$$Register; 5498 __ movzbl(Rdst, $mem$$Address); 5499 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5500 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5501 %} 5502 ins_pipe(ialu_reg_mem); 5503 %} 5504 5505 // Load Short (16bit signed) 5506 instruct loadS(rRegI dst, memory mem) %{ 5507 match(Set dst (LoadS mem)); 5508 5509 ins_cost(125); 5510 format %{ "MOVSX $dst,$mem\t# short" %} 5511 5512 ins_encode %{ 5513 __ movswl($dst$$Register, $mem$$Address); 5514 %} 5515 5516 ins_pipe(ialu_reg_mem); 5517 %} 5518 5519 // Load Short (16 bit signed) to Byte (8 bit signed) 5520 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5521 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5522 5523 ins_cost(125); 5524 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5525 ins_encode %{ 5526 __ movsbl($dst$$Register, $mem$$Address); 5527 %} 5528 ins_pipe(ialu_reg_mem); 5529 %} 5530 5531 // Load Short (16bit signed) into Long Register 5532 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5533 match(Set dst (ConvI2L (LoadS mem))); 5534 effect(KILL cr); 5535 5536 ins_cost(375); 5537 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5538 "MOV $dst.hi,$dst.lo\n\t" 5539 "SAR $dst.hi,15" %} 5540 5541 ins_encode %{ 5542 __ movswl($dst$$Register, $mem$$Address); 5543 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5544 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5545 %} 5546 5547 ins_pipe(ialu_reg_mem); 5548 %} 5549 5550 // Load Unsigned Short/Char (16bit unsigned) 5551 instruct loadUS(rRegI dst, memory mem) %{ 5552 match(Set dst (LoadUS mem)); 5553 5554 ins_cost(125); 5555 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5556 5557 ins_encode %{ 5558 __ movzwl($dst$$Register, $mem$$Address); 5559 %} 5560 5561 ins_pipe(ialu_reg_mem); 5562 %} 5563 5564 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5565 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5566 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5567 5568 ins_cost(125); 5569 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5570 ins_encode %{ 5571 __ movsbl($dst$$Register, $mem$$Address); 5572 %} 5573 ins_pipe(ialu_reg_mem); 5574 %} 5575 5576 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5577 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5578 match(Set dst (ConvI2L (LoadUS mem))); 5579 effect(KILL cr); 5580 5581 ins_cost(250); 5582 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5583 "XOR $dst.hi,$dst.hi" %} 5584 5585 ins_encode %{ 5586 __ movzwl($dst$$Register, $mem$$Address); 5587 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5588 %} 5589 5590 ins_pipe(ialu_reg_mem); 5591 %} 5592 5593 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5594 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5595 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5596 effect(KILL cr); 5597 5598 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5599 "XOR $dst.hi,$dst.hi" %} 5600 ins_encode %{ 5601 Register Rdst = $dst$$Register; 5602 __ movzbl(Rdst, $mem$$Address); 5603 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5604 %} 5605 ins_pipe(ialu_reg_mem); 5606 %} 5607 5608 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5609 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5610 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5611 effect(KILL cr); 5612 5613 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5614 "XOR $dst.hi,$dst.hi\n\t" 5615 "AND $dst.lo,right_n_bits($mask, 16)" %} 5616 ins_encode %{ 5617 Register Rdst = $dst$$Register; 5618 __ movzwl(Rdst, $mem$$Address); 5619 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5620 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5621 %} 5622 ins_pipe(ialu_reg_mem); 5623 %} 5624 5625 // Load Integer 5626 instruct loadI(rRegI dst, memory mem) %{ 5627 match(Set dst (LoadI mem)); 5628 5629 ins_cost(125); 5630 format %{ "MOV $dst,$mem\t# int" %} 5631 5632 ins_encode %{ 5633 __ movl($dst$$Register, $mem$$Address); 5634 %} 5635 5636 ins_pipe(ialu_reg_mem); 5637 %} 5638 5639 // Load Integer (32 bit signed) to Byte (8 bit signed) 5640 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5641 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5642 5643 ins_cost(125); 5644 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5645 ins_encode %{ 5646 __ movsbl($dst$$Register, $mem$$Address); 5647 %} 5648 ins_pipe(ialu_reg_mem); 5649 %} 5650 5651 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5652 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5653 match(Set dst (AndI (LoadI mem) mask)); 5654 5655 ins_cost(125); 5656 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5657 ins_encode %{ 5658 __ movzbl($dst$$Register, $mem$$Address); 5659 %} 5660 ins_pipe(ialu_reg_mem); 5661 %} 5662 5663 // Load Integer (32 bit signed) to Short (16 bit signed) 5664 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5665 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5666 5667 ins_cost(125); 5668 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5669 ins_encode %{ 5670 __ movswl($dst$$Register, $mem$$Address); 5671 %} 5672 ins_pipe(ialu_reg_mem); 5673 %} 5674 5675 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5676 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5677 match(Set dst (AndI (LoadI mem) mask)); 5678 5679 ins_cost(125); 5680 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5681 ins_encode %{ 5682 __ movzwl($dst$$Register, $mem$$Address); 5683 %} 5684 ins_pipe(ialu_reg_mem); 5685 %} 5686 5687 // Load Integer into Long Register 5688 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5689 match(Set dst (ConvI2L (LoadI mem))); 5690 effect(KILL cr); 5691 5692 ins_cost(375); 5693 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5694 "MOV $dst.hi,$dst.lo\n\t" 5695 "SAR $dst.hi,31" %} 5696 5697 ins_encode %{ 5698 __ movl($dst$$Register, $mem$$Address); 5699 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5700 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5701 %} 5702 5703 ins_pipe(ialu_reg_mem); 5704 %} 5705 5706 // Load Integer with mask 0xFF into Long Register 5707 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5708 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5709 effect(KILL cr); 5710 5711 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5712 "XOR $dst.hi,$dst.hi" %} 5713 ins_encode %{ 5714 Register Rdst = $dst$$Register; 5715 __ movzbl(Rdst, $mem$$Address); 5716 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5717 %} 5718 ins_pipe(ialu_reg_mem); 5719 %} 5720 5721 // Load Integer with mask 0xFFFF into Long Register 5722 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5723 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5724 effect(KILL cr); 5725 5726 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5727 "XOR $dst.hi,$dst.hi" %} 5728 ins_encode %{ 5729 Register Rdst = $dst$$Register; 5730 __ movzwl(Rdst, $mem$$Address); 5731 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5732 %} 5733 ins_pipe(ialu_reg_mem); 5734 %} 5735 5736 // Load Integer with 31-bit mask into Long Register 5737 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5738 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5739 effect(KILL cr); 5740 5741 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5742 "XOR $dst.hi,$dst.hi\n\t" 5743 "AND $dst.lo,$mask" %} 5744 ins_encode %{ 5745 Register Rdst = $dst$$Register; 5746 __ movl(Rdst, $mem$$Address); 5747 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5748 __ andl(Rdst, $mask$$constant); 5749 %} 5750 ins_pipe(ialu_reg_mem); 5751 %} 5752 5753 // Load Unsigned Integer into Long Register 5754 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5755 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5756 effect(KILL cr); 5757 5758 ins_cost(250); 5759 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5760 "XOR $dst.hi,$dst.hi" %} 5761 5762 ins_encode %{ 5763 __ movl($dst$$Register, $mem$$Address); 5764 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5765 %} 5766 5767 ins_pipe(ialu_reg_mem); 5768 %} 5769 5770 // Load Long. Cannot clobber address while loading, so restrict address 5771 // register to ESI 5772 instruct loadL(eRegL dst, load_long_memory mem) %{ 5773 predicate(!((LoadLNode*)n)->require_atomic_access()); 5774 match(Set dst (LoadL mem)); 5775 5776 ins_cost(250); 5777 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5778 "MOV $dst.hi,$mem+4" %} 5779 5780 ins_encode %{ 5781 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5782 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5783 __ movl($dst$$Register, Amemlo); 5784 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5785 %} 5786 5787 ins_pipe(ialu_reg_long_mem); 5788 %} 5789 5790 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5791 // then store it down to the stack and reload on the int 5792 // side. 5793 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5794 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5795 match(Set dst (LoadL mem)); 5796 5797 ins_cost(200); 5798 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5799 "FISTp $dst" %} 5800 ins_encode(enc_loadL_volatile(mem,dst)); 5801 ins_pipe( fpu_reg_mem ); 5802 %} 5803 5804 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5805 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5806 match(Set dst (LoadL mem)); 5807 effect(TEMP tmp); 5808 ins_cost(180); 5809 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5810 "MOVSD $dst,$tmp" %} 5811 ins_encode %{ 5812 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5813 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5814 %} 5815 ins_pipe( pipe_slow ); 5816 %} 5817 5818 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5819 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5820 match(Set dst (LoadL mem)); 5821 effect(TEMP tmp); 5822 ins_cost(160); 5823 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5824 "MOVD $dst.lo,$tmp\n\t" 5825 "PSRLQ $tmp,32\n\t" 5826 "MOVD $dst.hi,$tmp" %} 5827 ins_encode %{ 5828 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5829 __ movdl($dst$$Register, $tmp$$XMMRegister); 5830 __ psrlq($tmp$$XMMRegister, 32); 5831 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5832 %} 5833 ins_pipe( pipe_slow ); 5834 %} 5835 5836 // Load Range 5837 instruct loadRange(rRegI dst, memory mem) %{ 5838 match(Set dst (LoadRange mem)); 5839 5840 ins_cost(125); 5841 format %{ "MOV $dst,$mem" %} 5842 opcode(0x8B); 5843 ins_encode( OpcP, RegMem(dst,mem)); 5844 ins_pipe( ialu_reg_mem ); 5845 %} 5846 5847 5848 // Load Pointer 5849 instruct loadP(eRegP dst, memory mem) %{ 5850 match(Set dst (LoadP mem)); 5851 5852 ins_cost(125); 5853 format %{ "MOV $dst,$mem" %} 5854 opcode(0x8B); 5855 ins_encode( OpcP, RegMem(dst,mem)); 5856 ins_pipe( ialu_reg_mem ); 5857 %} 5858 5859 // Load Klass Pointer 5860 instruct loadKlass(eRegP dst, memory mem) %{ 5861 match(Set dst (LoadKlass mem)); 5862 5863 ins_cost(125); 5864 format %{ "MOV $dst,$mem" %} 5865 opcode(0x8B); 5866 ins_encode( OpcP, RegMem(dst,mem)); 5867 ins_pipe( ialu_reg_mem ); 5868 %} 5869 5870 // Load Double 5871 instruct loadDPR(regDPR dst, memory mem) %{ 5872 predicate(UseSSE<=1); 5873 match(Set dst (LoadD mem)); 5874 5875 ins_cost(150); 5876 format %{ "FLD_D ST,$mem\n\t" 5877 "FSTP $dst" %} 5878 opcode(0xDD); /* DD /0 */ 5879 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5880 Pop_Reg_DPR(dst) ); 5881 ins_pipe( fpu_reg_mem ); 5882 %} 5883 5884 // Load Double to XMM 5885 instruct loadD(regD dst, memory mem) %{ 5886 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5887 match(Set dst (LoadD mem)); 5888 ins_cost(145); 5889 format %{ "MOVSD $dst,$mem" %} 5890 ins_encode %{ 5891 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5892 %} 5893 ins_pipe( pipe_slow ); 5894 %} 5895 5896 instruct loadD_partial(regD dst, memory mem) %{ 5897 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5898 match(Set dst (LoadD mem)); 5899 ins_cost(145); 5900 format %{ "MOVLPD $dst,$mem" %} 5901 ins_encode %{ 5902 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5903 %} 5904 ins_pipe( pipe_slow ); 5905 %} 5906 5907 // Load to XMM register (single-precision floating point) 5908 // MOVSS instruction 5909 instruct loadF(regF dst, memory mem) %{ 5910 predicate(UseSSE>=1); 5911 match(Set dst (LoadF mem)); 5912 ins_cost(145); 5913 format %{ "MOVSS $dst,$mem" %} 5914 ins_encode %{ 5915 __ movflt ($dst$$XMMRegister, $mem$$Address); 5916 %} 5917 ins_pipe( pipe_slow ); 5918 %} 5919 5920 // Load Float 5921 instruct loadFPR(regFPR dst, memory mem) %{ 5922 predicate(UseSSE==0); 5923 match(Set dst (LoadF mem)); 5924 5925 ins_cost(150); 5926 format %{ "FLD_S ST,$mem\n\t" 5927 "FSTP $dst" %} 5928 opcode(0xD9); /* D9 /0 */ 5929 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5930 Pop_Reg_FPR(dst) ); 5931 ins_pipe( fpu_reg_mem ); 5932 %} 5933 5934 // Load Effective Address 5935 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5936 match(Set dst mem); 5937 5938 ins_cost(110); 5939 format %{ "LEA $dst,$mem" %} 5940 opcode(0x8D); 5941 ins_encode( OpcP, RegMem(dst,mem)); 5942 ins_pipe( ialu_reg_reg_fat ); 5943 %} 5944 5945 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5946 match(Set dst mem); 5947 5948 ins_cost(110); 5949 format %{ "LEA $dst,$mem" %} 5950 opcode(0x8D); 5951 ins_encode( OpcP, RegMem(dst,mem)); 5952 ins_pipe( ialu_reg_reg_fat ); 5953 %} 5954 5955 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5956 match(Set dst mem); 5957 5958 ins_cost(110); 5959 format %{ "LEA $dst,$mem" %} 5960 opcode(0x8D); 5961 ins_encode( OpcP, RegMem(dst,mem)); 5962 ins_pipe( ialu_reg_reg_fat ); 5963 %} 5964 5965 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5966 match(Set dst mem); 5967 5968 ins_cost(110); 5969 format %{ "LEA $dst,$mem" %} 5970 opcode(0x8D); 5971 ins_encode( OpcP, RegMem(dst,mem)); 5972 ins_pipe( ialu_reg_reg_fat ); 5973 %} 5974 5975 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5976 match(Set dst mem); 5977 5978 ins_cost(110); 5979 format %{ "LEA $dst,$mem" %} 5980 opcode(0x8D); 5981 ins_encode( OpcP, RegMem(dst,mem)); 5982 ins_pipe( ialu_reg_reg_fat ); 5983 %} 5984 5985 // Load Constant 5986 instruct loadConI(rRegI dst, immI src) %{ 5987 match(Set dst src); 5988 5989 format %{ "MOV $dst,$src" %} 5990 ins_encode( LdImmI(dst, src) ); 5991 ins_pipe( ialu_reg_fat ); 5992 %} 5993 5994 // Load Constant zero 5995 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5996 match(Set dst src); 5997 effect(KILL cr); 5998 5999 ins_cost(50); 6000 format %{ "XOR $dst,$dst" %} 6001 opcode(0x33); /* + rd */ 6002 ins_encode( OpcP, RegReg( dst, dst ) ); 6003 ins_pipe( ialu_reg ); 6004 %} 6005 6006 instruct loadConP(eRegP dst, immP src) %{ 6007 match(Set dst src); 6008 6009 format %{ "MOV $dst,$src" %} 6010 opcode(0xB8); /* + rd */ 6011 ins_encode( LdImmP(dst, src) ); 6012 ins_pipe( ialu_reg_fat ); 6013 %} 6014 6015 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6016 match(Set dst src); 6017 effect(KILL cr); 6018 ins_cost(200); 6019 format %{ "MOV $dst.lo,$src.lo\n\t" 6020 "MOV $dst.hi,$src.hi" %} 6021 opcode(0xB8); 6022 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6023 ins_pipe( ialu_reg_long_fat ); 6024 %} 6025 6026 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6027 match(Set dst src); 6028 effect(KILL cr); 6029 ins_cost(150); 6030 format %{ "XOR $dst.lo,$dst.lo\n\t" 6031 "XOR $dst.hi,$dst.hi" %} 6032 opcode(0x33,0x33); 6033 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6034 ins_pipe( ialu_reg_long ); 6035 %} 6036 6037 // The instruction usage is guarded by predicate in operand immFPR(). 6038 instruct loadConFPR(regFPR dst, immFPR con) %{ 6039 match(Set dst con); 6040 ins_cost(125); 6041 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6042 "FSTP $dst" %} 6043 ins_encode %{ 6044 __ fld_s($constantaddress($con)); 6045 __ fstp_d($dst$$reg); 6046 %} 6047 ins_pipe(fpu_reg_con); 6048 %} 6049 6050 // The instruction usage is guarded by predicate in operand immFPR0(). 6051 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6052 match(Set dst con); 6053 ins_cost(125); 6054 format %{ "FLDZ ST\n\t" 6055 "FSTP $dst" %} 6056 ins_encode %{ 6057 __ fldz(); 6058 __ fstp_d($dst$$reg); 6059 %} 6060 ins_pipe(fpu_reg_con); 6061 %} 6062 6063 // The instruction usage is guarded by predicate in operand immFPR1(). 6064 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6065 match(Set dst con); 6066 ins_cost(125); 6067 format %{ "FLD1 ST\n\t" 6068 "FSTP $dst" %} 6069 ins_encode %{ 6070 __ fld1(); 6071 __ fstp_d($dst$$reg); 6072 %} 6073 ins_pipe(fpu_reg_con); 6074 %} 6075 6076 // The instruction usage is guarded by predicate in operand immF(). 6077 instruct loadConF(regF dst, immF con) %{ 6078 match(Set dst con); 6079 ins_cost(125); 6080 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6081 ins_encode %{ 6082 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6083 %} 6084 ins_pipe(pipe_slow); 6085 %} 6086 6087 // The instruction usage is guarded by predicate in operand immF0(). 6088 instruct loadConF0(regF dst, immF0 src) %{ 6089 match(Set dst src); 6090 ins_cost(100); 6091 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6092 ins_encode %{ 6093 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6094 %} 6095 ins_pipe(pipe_slow); 6096 %} 6097 6098 // The instruction usage is guarded by predicate in operand immDPR(). 6099 instruct loadConDPR(regDPR dst, immDPR con) %{ 6100 match(Set dst con); 6101 ins_cost(125); 6102 6103 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6104 "FSTP $dst" %} 6105 ins_encode %{ 6106 __ fld_d($constantaddress($con)); 6107 __ fstp_d($dst$$reg); 6108 %} 6109 ins_pipe(fpu_reg_con); 6110 %} 6111 6112 // The instruction usage is guarded by predicate in operand immDPR0(). 6113 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6114 match(Set dst con); 6115 ins_cost(125); 6116 6117 format %{ "FLDZ ST\n\t" 6118 "FSTP $dst" %} 6119 ins_encode %{ 6120 __ fldz(); 6121 __ fstp_d($dst$$reg); 6122 %} 6123 ins_pipe(fpu_reg_con); 6124 %} 6125 6126 // The instruction usage is guarded by predicate in operand immDPR1(). 6127 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6128 match(Set dst con); 6129 ins_cost(125); 6130 6131 format %{ "FLD1 ST\n\t" 6132 "FSTP $dst" %} 6133 ins_encode %{ 6134 __ fld1(); 6135 __ fstp_d($dst$$reg); 6136 %} 6137 ins_pipe(fpu_reg_con); 6138 %} 6139 6140 // The instruction usage is guarded by predicate in operand immD(). 6141 instruct loadConD(regD dst, immD con) %{ 6142 match(Set dst con); 6143 ins_cost(125); 6144 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6145 ins_encode %{ 6146 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6147 %} 6148 ins_pipe(pipe_slow); 6149 %} 6150 6151 // The instruction usage is guarded by predicate in operand immD0(). 6152 instruct loadConD0(regD dst, immD0 src) %{ 6153 match(Set dst src); 6154 ins_cost(100); 6155 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6156 ins_encode %{ 6157 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6158 %} 6159 ins_pipe( pipe_slow ); 6160 %} 6161 6162 // Load Stack Slot 6163 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6164 match(Set dst src); 6165 ins_cost(125); 6166 6167 format %{ "MOV $dst,$src" %} 6168 opcode(0x8B); 6169 ins_encode( OpcP, RegMem(dst,src)); 6170 ins_pipe( ialu_reg_mem ); 6171 %} 6172 6173 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6174 match(Set dst src); 6175 6176 ins_cost(200); 6177 format %{ "MOV $dst,$src.lo\n\t" 6178 "MOV $dst+4,$src.hi" %} 6179 opcode(0x8B, 0x8B); 6180 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6181 ins_pipe( ialu_mem_long_reg ); 6182 %} 6183 6184 // Load Stack Slot 6185 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6186 match(Set dst src); 6187 ins_cost(125); 6188 6189 format %{ "MOV $dst,$src" %} 6190 opcode(0x8B); 6191 ins_encode( OpcP, RegMem(dst,src)); 6192 ins_pipe( ialu_reg_mem ); 6193 %} 6194 6195 // Load Stack Slot 6196 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6197 match(Set dst src); 6198 ins_cost(125); 6199 6200 format %{ "FLD_S $src\n\t" 6201 "FSTP $dst" %} 6202 opcode(0xD9); /* D9 /0, FLD m32real */ 6203 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6204 Pop_Reg_FPR(dst) ); 6205 ins_pipe( fpu_reg_mem ); 6206 %} 6207 6208 // Load Stack Slot 6209 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6210 match(Set dst src); 6211 ins_cost(125); 6212 6213 format %{ "FLD_D $src\n\t" 6214 "FSTP $dst" %} 6215 opcode(0xDD); /* DD /0, FLD m64real */ 6216 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6217 Pop_Reg_DPR(dst) ); 6218 ins_pipe( fpu_reg_mem ); 6219 %} 6220 6221 // Prefetch instructions for allocation. 6222 // Must be safe to execute with invalid address (cannot fault). 6223 6224 instruct prefetchAlloc0( memory mem ) %{ 6225 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6226 match(PrefetchAllocation mem); 6227 ins_cost(0); 6228 size(0); 6229 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6230 ins_encode(); 6231 ins_pipe(empty); 6232 %} 6233 6234 instruct prefetchAlloc( memory mem ) %{ 6235 predicate(AllocatePrefetchInstr==3); 6236 match( PrefetchAllocation mem ); 6237 ins_cost(100); 6238 6239 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6240 ins_encode %{ 6241 __ prefetchw($mem$$Address); 6242 %} 6243 ins_pipe(ialu_mem); 6244 %} 6245 6246 instruct prefetchAllocNTA( memory mem ) %{ 6247 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6248 match(PrefetchAllocation mem); 6249 ins_cost(100); 6250 6251 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6252 ins_encode %{ 6253 __ prefetchnta($mem$$Address); 6254 %} 6255 ins_pipe(ialu_mem); 6256 %} 6257 6258 instruct prefetchAllocT0( memory mem ) %{ 6259 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6260 match(PrefetchAllocation mem); 6261 ins_cost(100); 6262 6263 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6264 ins_encode %{ 6265 __ prefetcht0($mem$$Address); 6266 %} 6267 ins_pipe(ialu_mem); 6268 %} 6269 6270 instruct prefetchAllocT2( memory mem ) %{ 6271 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6272 match(PrefetchAllocation mem); 6273 ins_cost(100); 6274 6275 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6276 ins_encode %{ 6277 __ prefetcht2($mem$$Address); 6278 %} 6279 ins_pipe(ialu_mem); 6280 %} 6281 6282 //----------Store Instructions------------------------------------------------- 6283 6284 // Store Byte 6285 instruct storeB(memory mem, xRegI src) %{ 6286 match(Set mem (StoreB mem src)); 6287 6288 ins_cost(125); 6289 format %{ "MOV8 $mem,$src" %} 6290 opcode(0x88); 6291 ins_encode( OpcP, RegMem( src, mem ) ); 6292 ins_pipe( ialu_mem_reg ); 6293 %} 6294 6295 // Store Char/Short 6296 instruct storeC(memory mem, rRegI src) %{ 6297 match(Set mem (StoreC mem src)); 6298 6299 ins_cost(125); 6300 format %{ "MOV16 $mem,$src" %} 6301 opcode(0x89, 0x66); 6302 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6303 ins_pipe( ialu_mem_reg ); 6304 %} 6305 6306 // Store Integer 6307 instruct storeI(memory mem, rRegI src) %{ 6308 match(Set mem (StoreI mem src)); 6309 6310 ins_cost(125); 6311 format %{ "MOV $mem,$src" %} 6312 opcode(0x89); 6313 ins_encode( OpcP, RegMem( src, mem ) ); 6314 ins_pipe( ialu_mem_reg ); 6315 %} 6316 6317 // Store Long 6318 instruct storeL(long_memory mem, eRegL src) %{ 6319 predicate(!((StoreLNode*)n)->require_atomic_access()); 6320 match(Set mem (StoreL mem src)); 6321 6322 ins_cost(200); 6323 format %{ "MOV $mem,$src.lo\n\t" 6324 "MOV $mem+4,$src.hi" %} 6325 opcode(0x89, 0x89); 6326 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6327 ins_pipe( ialu_mem_long_reg ); 6328 %} 6329 6330 // Store Long to Integer 6331 instruct storeL2I(memory mem, eRegL src) %{ 6332 match(Set mem (StoreI mem (ConvL2I src))); 6333 6334 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6335 ins_encode %{ 6336 __ movl($mem$$Address, $src$$Register); 6337 %} 6338 ins_pipe(ialu_mem_reg); 6339 %} 6340 6341 // Volatile Store Long. Must be atomic, so move it into 6342 // the FP TOS and then do a 64-bit FIST. Has to probe the 6343 // target address before the store (for null-ptr checks) 6344 // so the memory operand is used twice in the encoding. 6345 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6346 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6347 match(Set mem (StoreL mem src)); 6348 effect( KILL cr ); 6349 ins_cost(400); 6350 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6351 "FILD $src\n\t" 6352 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6353 opcode(0x3B); 6354 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6355 ins_pipe( fpu_reg_mem ); 6356 %} 6357 6358 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6359 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6360 match(Set mem (StoreL mem src)); 6361 effect( TEMP tmp, KILL cr ); 6362 ins_cost(380); 6363 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6364 "MOVSD $tmp,$src\n\t" 6365 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6366 ins_encode %{ 6367 __ cmpl(rax, $mem$$Address); 6368 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6369 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6370 %} 6371 ins_pipe( pipe_slow ); 6372 %} 6373 6374 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6375 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6376 match(Set mem (StoreL mem src)); 6377 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6378 ins_cost(360); 6379 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6380 "MOVD $tmp,$src.lo\n\t" 6381 "MOVD $tmp2,$src.hi\n\t" 6382 "PUNPCKLDQ $tmp,$tmp2\n\t" 6383 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6384 ins_encode %{ 6385 __ cmpl(rax, $mem$$Address); 6386 __ movdl($tmp$$XMMRegister, $src$$Register); 6387 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6388 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6389 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6390 %} 6391 ins_pipe( pipe_slow ); 6392 %} 6393 6394 // Store Pointer; for storing unknown oops and raw pointers 6395 instruct storeP(memory mem, anyRegP src) %{ 6396 match(Set mem (StoreP mem src)); 6397 6398 ins_cost(125); 6399 format %{ "MOV $mem,$src" %} 6400 opcode(0x89); 6401 ins_encode( OpcP, RegMem( src, mem ) ); 6402 ins_pipe( ialu_mem_reg ); 6403 %} 6404 6405 // Store Integer Immediate 6406 instruct storeImmI(memory mem, immI src) %{ 6407 match(Set mem (StoreI mem src)); 6408 6409 ins_cost(150); 6410 format %{ "MOV $mem,$src" %} 6411 opcode(0xC7); /* C7 /0 */ 6412 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6413 ins_pipe( ialu_mem_imm ); 6414 %} 6415 6416 // Store Short/Char Immediate 6417 instruct storeImmI16(memory mem, immI16 src) %{ 6418 predicate(UseStoreImmI16); 6419 match(Set mem (StoreC mem src)); 6420 6421 ins_cost(150); 6422 format %{ "MOV16 $mem,$src" %} 6423 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6424 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6425 ins_pipe( ialu_mem_imm ); 6426 %} 6427 6428 // Store Pointer Immediate; null pointers or constant oops that do not 6429 // need card-mark barriers. 6430 instruct storeImmP(memory mem, immP src) %{ 6431 match(Set mem (StoreP mem src)); 6432 6433 ins_cost(150); 6434 format %{ "MOV $mem,$src" %} 6435 opcode(0xC7); /* C7 /0 */ 6436 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6437 ins_pipe( ialu_mem_imm ); 6438 %} 6439 6440 // Store Byte Immediate 6441 instruct storeImmB(memory mem, immI8 src) %{ 6442 match(Set mem (StoreB mem src)); 6443 6444 ins_cost(150); 6445 format %{ "MOV8 $mem,$src" %} 6446 opcode(0xC6); /* C6 /0 */ 6447 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6448 ins_pipe( ialu_mem_imm ); 6449 %} 6450 6451 // Store CMS card-mark Immediate 6452 instruct storeImmCM(memory mem, immI8 src) %{ 6453 match(Set mem (StoreCM mem src)); 6454 6455 ins_cost(150); 6456 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6457 opcode(0xC6); /* C6 /0 */ 6458 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6459 ins_pipe( ialu_mem_imm ); 6460 %} 6461 6462 // Store Double 6463 instruct storeDPR( memory mem, regDPR1 src) %{ 6464 predicate(UseSSE<=1); 6465 match(Set mem (StoreD mem src)); 6466 6467 ins_cost(100); 6468 format %{ "FST_D $mem,$src" %} 6469 opcode(0xDD); /* DD /2 */ 6470 ins_encode( enc_FPR_store(mem,src) ); 6471 ins_pipe( fpu_mem_reg ); 6472 %} 6473 6474 // Store double does rounding on x86 6475 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6476 predicate(UseSSE<=1); 6477 match(Set mem (StoreD mem (RoundDouble src))); 6478 6479 ins_cost(100); 6480 format %{ "FST_D $mem,$src\t# round" %} 6481 opcode(0xDD); /* DD /2 */ 6482 ins_encode( enc_FPR_store(mem,src) ); 6483 ins_pipe( fpu_mem_reg ); 6484 %} 6485 6486 // Store XMM register to memory (double-precision floating points) 6487 // MOVSD instruction 6488 instruct storeD(memory mem, regD src) %{ 6489 predicate(UseSSE>=2); 6490 match(Set mem (StoreD mem src)); 6491 ins_cost(95); 6492 format %{ "MOVSD $mem,$src" %} 6493 ins_encode %{ 6494 __ movdbl($mem$$Address, $src$$XMMRegister); 6495 %} 6496 ins_pipe( pipe_slow ); 6497 %} 6498 6499 // Store XMM register to memory (single-precision floating point) 6500 // MOVSS instruction 6501 instruct storeF(memory mem, regF src) %{ 6502 predicate(UseSSE>=1); 6503 match(Set mem (StoreF mem src)); 6504 ins_cost(95); 6505 format %{ "MOVSS $mem,$src" %} 6506 ins_encode %{ 6507 __ movflt($mem$$Address, $src$$XMMRegister); 6508 %} 6509 ins_pipe( pipe_slow ); 6510 %} 6511 6512 6513 // Store Float 6514 instruct storeFPR( memory mem, regFPR1 src) %{ 6515 predicate(UseSSE==0); 6516 match(Set mem (StoreF mem src)); 6517 6518 ins_cost(100); 6519 format %{ "FST_S $mem,$src" %} 6520 opcode(0xD9); /* D9 /2 */ 6521 ins_encode( enc_FPR_store(mem,src) ); 6522 ins_pipe( fpu_mem_reg ); 6523 %} 6524 6525 // Store Float does rounding on x86 6526 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6527 predicate(UseSSE==0); 6528 match(Set mem (StoreF mem (RoundFloat src))); 6529 6530 ins_cost(100); 6531 format %{ "FST_S $mem,$src\t# round" %} 6532 opcode(0xD9); /* D9 /2 */ 6533 ins_encode( enc_FPR_store(mem,src) ); 6534 ins_pipe( fpu_mem_reg ); 6535 %} 6536 6537 // Store Float does rounding on x86 6538 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6539 predicate(UseSSE<=1); 6540 match(Set mem (StoreF mem (ConvD2F src))); 6541 6542 ins_cost(100); 6543 format %{ "FST_S $mem,$src\t# D-round" %} 6544 opcode(0xD9); /* D9 /2 */ 6545 ins_encode( enc_FPR_store(mem,src) ); 6546 ins_pipe( fpu_mem_reg ); 6547 %} 6548 6549 // Store immediate Float value (it is faster than store from FPU register) 6550 // The instruction usage is guarded by predicate in operand immFPR(). 6551 instruct storeFPR_imm( memory mem, immFPR src) %{ 6552 match(Set mem (StoreF mem src)); 6553 6554 ins_cost(50); 6555 format %{ "MOV $mem,$src\t# store float" %} 6556 opcode(0xC7); /* C7 /0 */ 6557 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6558 ins_pipe( ialu_mem_imm ); 6559 %} 6560 6561 // Store immediate Float value (it is faster than store from XMM register) 6562 // The instruction usage is guarded by predicate in operand immF(). 6563 instruct storeF_imm( memory mem, immF src) %{ 6564 match(Set mem (StoreF mem src)); 6565 6566 ins_cost(50); 6567 format %{ "MOV $mem,$src\t# store float" %} 6568 opcode(0xC7); /* C7 /0 */ 6569 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6570 ins_pipe( ialu_mem_imm ); 6571 %} 6572 6573 // Store Integer to stack slot 6574 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6575 match(Set dst src); 6576 6577 ins_cost(100); 6578 format %{ "MOV $dst,$src" %} 6579 opcode(0x89); 6580 ins_encode( OpcPRegSS( dst, src ) ); 6581 ins_pipe( ialu_mem_reg ); 6582 %} 6583 6584 // Store Integer to stack slot 6585 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6586 match(Set dst src); 6587 6588 ins_cost(100); 6589 format %{ "MOV $dst,$src" %} 6590 opcode(0x89); 6591 ins_encode( OpcPRegSS( dst, src ) ); 6592 ins_pipe( ialu_mem_reg ); 6593 %} 6594 6595 // Store Long to stack slot 6596 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6597 match(Set dst src); 6598 6599 ins_cost(200); 6600 format %{ "MOV $dst,$src.lo\n\t" 6601 "MOV $dst+4,$src.hi" %} 6602 opcode(0x89, 0x89); 6603 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6604 ins_pipe( ialu_mem_long_reg ); 6605 %} 6606 6607 //----------MemBar Instructions----------------------------------------------- 6608 // Memory barrier flavors 6609 6610 instruct membar_acquire() %{ 6611 match(MemBarAcquire); 6612 match(LoadFence); 6613 ins_cost(400); 6614 6615 size(0); 6616 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6617 ins_encode(); 6618 ins_pipe(empty); 6619 %} 6620 6621 instruct membar_acquire_lock() %{ 6622 match(MemBarAcquireLock); 6623 ins_cost(0); 6624 6625 size(0); 6626 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6627 ins_encode( ); 6628 ins_pipe(empty); 6629 %} 6630 6631 instruct membar_release() %{ 6632 match(MemBarRelease); 6633 match(StoreFence); 6634 ins_cost(400); 6635 6636 size(0); 6637 format %{ "MEMBAR-release ! (empty encoding)" %} 6638 ins_encode( ); 6639 ins_pipe(empty); 6640 %} 6641 6642 instruct membar_release_lock() %{ 6643 match(MemBarReleaseLock); 6644 ins_cost(0); 6645 6646 size(0); 6647 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6648 ins_encode( ); 6649 ins_pipe(empty); 6650 %} 6651 6652 instruct membar_volatile(eFlagsReg cr) %{ 6653 match(MemBarVolatile); 6654 effect(KILL cr); 6655 ins_cost(400); 6656 6657 format %{ 6658 $$template 6659 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6660 %} 6661 ins_encode %{ 6662 __ membar(Assembler::StoreLoad); 6663 %} 6664 ins_pipe(pipe_slow); 6665 %} 6666 6667 instruct unnecessary_membar_volatile() %{ 6668 match(MemBarVolatile); 6669 predicate(Matcher::post_store_load_barrier(n)); 6670 ins_cost(0); 6671 6672 size(0); 6673 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6674 ins_encode( ); 6675 ins_pipe(empty); 6676 %} 6677 6678 instruct membar_storestore() %{ 6679 match(MemBarStoreStore); 6680 match(StoreStoreFence); 6681 ins_cost(0); 6682 6683 size(0); 6684 format %{ "MEMBAR-storestore (empty encoding)" %} 6685 ins_encode( ); 6686 ins_pipe(empty); 6687 %} 6688 6689 //----------Move Instructions-------------------------------------------------- 6690 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6691 match(Set dst (CastX2P src)); 6692 format %{ "# X2P $dst, $src" %} 6693 ins_encode( /*empty encoding*/ ); 6694 ins_cost(0); 6695 ins_pipe(empty); 6696 %} 6697 6698 instruct castP2X(rRegI dst, eRegP src ) %{ 6699 match(Set dst (CastP2X src)); 6700 ins_cost(50); 6701 format %{ "MOV $dst, $src\t# CastP2X" %} 6702 ins_encode( enc_Copy( dst, src) ); 6703 ins_pipe( ialu_reg_reg ); 6704 %} 6705 6706 //----------Conditional Move--------------------------------------------------- 6707 // Conditional move 6708 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6709 predicate(!VM_Version::supports_cmov() ); 6710 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6711 ins_cost(200); 6712 format %{ "J$cop,us skip\t# signed cmove\n\t" 6713 "MOV $dst,$src\n" 6714 "skip:" %} 6715 ins_encode %{ 6716 Label Lskip; 6717 // Invert sense of branch from sense of CMOV 6718 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6719 __ movl($dst$$Register, $src$$Register); 6720 __ bind(Lskip); 6721 %} 6722 ins_pipe( pipe_cmov_reg ); 6723 %} 6724 6725 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6726 predicate(!VM_Version::supports_cmov() ); 6727 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6728 ins_cost(200); 6729 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6730 "MOV $dst,$src\n" 6731 "skip:" %} 6732 ins_encode %{ 6733 Label Lskip; 6734 // Invert sense of branch from sense of CMOV 6735 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6736 __ movl($dst$$Register, $src$$Register); 6737 __ bind(Lskip); 6738 %} 6739 ins_pipe( pipe_cmov_reg ); 6740 %} 6741 6742 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6743 predicate(VM_Version::supports_cmov() ); 6744 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6745 ins_cost(200); 6746 format %{ "CMOV$cop $dst,$src" %} 6747 opcode(0x0F,0x40); 6748 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6749 ins_pipe( pipe_cmov_reg ); 6750 %} 6751 6752 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6753 predicate(VM_Version::supports_cmov() ); 6754 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6755 ins_cost(200); 6756 format %{ "CMOV$cop $dst,$src" %} 6757 opcode(0x0F,0x40); 6758 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6759 ins_pipe( pipe_cmov_reg ); 6760 %} 6761 6762 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6763 predicate(VM_Version::supports_cmov() ); 6764 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6765 ins_cost(200); 6766 expand %{ 6767 cmovI_regU(cop, cr, dst, src); 6768 %} 6769 %} 6770 6771 // Conditional move 6772 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6773 predicate(VM_Version::supports_cmov() ); 6774 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6775 ins_cost(250); 6776 format %{ "CMOV$cop $dst,$src" %} 6777 opcode(0x0F,0x40); 6778 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6779 ins_pipe( pipe_cmov_mem ); 6780 %} 6781 6782 // Conditional move 6783 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6784 predicate(VM_Version::supports_cmov() ); 6785 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6786 ins_cost(250); 6787 format %{ "CMOV$cop $dst,$src" %} 6788 opcode(0x0F,0x40); 6789 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6790 ins_pipe( pipe_cmov_mem ); 6791 %} 6792 6793 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6794 predicate(VM_Version::supports_cmov() ); 6795 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6796 ins_cost(250); 6797 expand %{ 6798 cmovI_memU(cop, cr, dst, src); 6799 %} 6800 %} 6801 6802 // Conditional move 6803 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6804 predicate(VM_Version::supports_cmov() ); 6805 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6806 ins_cost(200); 6807 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6808 opcode(0x0F,0x40); 6809 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6810 ins_pipe( pipe_cmov_reg ); 6811 %} 6812 6813 // Conditional move (non-P6 version) 6814 // Note: a CMoveP is generated for stubs and native wrappers 6815 // regardless of whether we are on a P6, so we 6816 // emulate a cmov here 6817 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6818 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6819 ins_cost(300); 6820 format %{ "Jn$cop skip\n\t" 6821 "MOV $dst,$src\t# pointer\n" 6822 "skip:" %} 6823 opcode(0x8b); 6824 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6825 ins_pipe( pipe_cmov_reg ); 6826 %} 6827 6828 // Conditional move 6829 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6830 predicate(VM_Version::supports_cmov() ); 6831 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6832 ins_cost(200); 6833 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6834 opcode(0x0F,0x40); 6835 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6836 ins_pipe( pipe_cmov_reg ); 6837 %} 6838 6839 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6840 predicate(VM_Version::supports_cmov() ); 6841 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6842 ins_cost(200); 6843 expand %{ 6844 cmovP_regU(cop, cr, dst, src); 6845 %} 6846 %} 6847 6848 // DISABLED: Requires the ADLC to emit a bottom_type call that 6849 // correctly meets the two pointer arguments; one is an incoming 6850 // register but the other is a memory operand. ALSO appears to 6851 // be buggy with implicit null checks. 6852 // 6853 //// Conditional move 6854 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6855 // predicate(VM_Version::supports_cmov() ); 6856 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6857 // ins_cost(250); 6858 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6859 // opcode(0x0F,0x40); 6860 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6861 // ins_pipe( pipe_cmov_mem ); 6862 //%} 6863 // 6864 //// Conditional move 6865 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6866 // predicate(VM_Version::supports_cmov() ); 6867 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6868 // ins_cost(250); 6869 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6870 // opcode(0x0F,0x40); 6871 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6872 // ins_pipe( pipe_cmov_mem ); 6873 //%} 6874 6875 // Conditional move 6876 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6877 predicate(UseSSE<=1); 6878 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6879 ins_cost(200); 6880 format %{ "FCMOV$cop $dst,$src\t# double" %} 6881 opcode(0xDA); 6882 ins_encode( enc_cmov_dpr(cop,src) ); 6883 ins_pipe( pipe_cmovDPR_reg ); 6884 %} 6885 6886 // Conditional move 6887 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6888 predicate(UseSSE==0); 6889 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6890 ins_cost(200); 6891 format %{ "FCMOV$cop $dst,$src\t# float" %} 6892 opcode(0xDA); 6893 ins_encode( enc_cmov_dpr(cop,src) ); 6894 ins_pipe( pipe_cmovDPR_reg ); 6895 %} 6896 6897 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6898 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6899 predicate(UseSSE<=1); 6900 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6901 ins_cost(200); 6902 format %{ "Jn$cop skip\n\t" 6903 "MOV $dst,$src\t# double\n" 6904 "skip:" %} 6905 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6906 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6907 ins_pipe( pipe_cmovDPR_reg ); 6908 %} 6909 6910 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6911 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6912 predicate(UseSSE==0); 6913 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6914 ins_cost(200); 6915 format %{ "Jn$cop skip\n\t" 6916 "MOV $dst,$src\t# float\n" 6917 "skip:" %} 6918 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6919 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6920 ins_pipe( pipe_cmovDPR_reg ); 6921 %} 6922 6923 // No CMOVE with SSE/SSE2 6924 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6925 predicate (UseSSE>=1); 6926 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6927 ins_cost(200); 6928 format %{ "Jn$cop skip\n\t" 6929 "MOVSS $dst,$src\t# float\n" 6930 "skip:" %} 6931 ins_encode %{ 6932 Label skip; 6933 // Invert sense of branch from sense of CMOV 6934 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6935 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6936 __ bind(skip); 6937 %} 6938 ins_pipe( pipe_slow ); 6939 %} 6940 6941 // No CMOVE with SSE/SSE2 6942 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6943 predicate (UseSSE>=2); 6944 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6945 ins_cost(200); 6946 format %{ "Jn$cop skip\n\t" 6947 "MOVSD $dst,$src\t# float\n" 6948 "skip:" %} 6949 ins_encode %{ 6950 Label skip; 6951 // Invert sense of branch from sense of CMOV 6952 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6953 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6954 __ bind(skip); 6955 %} 6956 ins_pipe( pipe_slow ); 6957 %} 6958 6959 // unsigned version 6960 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6961 predicate (UseSSE>=1); 6962 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6963 ins_cost(200); 6964 format %{ "Jn$cop skip\n\t" 6965 "MOVSS $dst,$src\t# float\n" 6966 "skip:" %} 6967 ins_encode %{ 6968 Label skip; 6969 // Invert sense of branch from sense of CMOV 6970 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6971 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6972 __ bind(skip); 6973 %} 6974 ins_pipe( pipe_slow ); 6975 %} 6976 6977 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6978 predicate (UseSSE>=1); 6979 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6980 ins_cost(200); 6981 expand %{ 6982 fcmovF_regU(cop, cr, dst, src); 6983 %} 6984 %} 6985 6986 // unsigned version 6987 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6988 predicate (UseSSE>=2); 6989 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6990 ins_cost(200); 6991 format %{ "Jn$cop skip\n\t" 6992 "MOVSD $dst,$src\t# float\n" 6993 "skip:" %} 6994 ins_encode %{ 6995 Label skip; 6996 // Invert sense of branch from sense of CMOV 6997 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6998 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6999 __ bind(skip); 7000 %} 7001 ins_pipe( pipe_slow ); 7002 %} 7003 7004 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7005 predicate (UseSSE>=2); 7006 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7007 ins_cost(200); 7008 expand %{ 7009 fcmovD_regU(cop, cr, dst, src); 7010 %} 7011 %} 7012 7013 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7014 predicate(VM_Version::supports_cmov() ); 7015 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7016 ins_cost(200); 7017 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7018 "CMOV$cop $dst.hi,$src.hi" %} 7019 opcode(0x0F,0x40); 7020 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7021 ins_pipe( pipe_cmov_reg_long ); 7022 %} 7023 7024 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7025 predicate(VM_Version::supports_cmov() ); 7026 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7027 ins_cost(200); 7028 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7029 "CMOV$cop $dst.hi,$src.hi" %} 7030 opcode(0x0F,0x40); 7031 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7032 ins_pipe( pipe_cmov_reg_long ); 7033 %} 7034 7035 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7036 predicate(VM_Version::supports_cmov() ); 7037 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7038 ins_cost(200); 7039 expand %{ 7040 cmovL_regU(cop, cr, dst, src); 7041 %} 7042 %} 7043 7044 //----------Arithmetic Instructions-------------------------------------------- 7045 //----------Addition Instructions---------------------------------------------- 7046 7047 // Integer Addition Instructions 7048 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7049 match(Set dst (AddI dst src)); 7050 effect(KILL cr); 7051 7052 size(2); 7053 format %{ "ADD $dst,$src" %} 7054 opcode(0x03); 7055 ins_encode( OpcP, RegReg( dst, src) ); 7056 ins_pipe( ialu_reg_reg ); 7057 %} 7058 7059 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7060 match(Set dst (AddI dst src)); 7061 effect(KILL cr); 7062 7063 format %{ "ADD $dst,$src" %} 7064 opcode(0x81, 0x00); /* /0 id */ 7065 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7066 ins_pipe( ialu_reg ); 7067 %} 7068 7069 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7070 predicate(UseIncDec); 7071 match(Set dst (AddI dst src)); 7072 effect(KILL cr); 7073 7074 size(1); 7075 format %{ "INC $dst" %} 7076 opcode(0x40); /* */ 7077 ins_encode( Opc_plus( primary, dst ) ); 7078 ins_pipe( ialu_reg ); 7079 %} 7080 7081 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7082 match(Set dst (AddI src0 src1)); 7083 ins_cost(110); 7084 7085 format %{ "LEA $dst,[$src0 + $src1]" %} 7086 opcode(0x8D); /* 0x8D /r */ 7087 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7088 ins_pipe( ialu_reg_reg ); 7089 %} 7090 7091 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7092 match(Set dst (AddP src0 src1)); 7093 ins_cost(110); 7094 7095 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7096 opcode(0x8D); /* 0x8D /r */ 7097 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7098 ins_pipe( ialu_reg_reg ); 7099 %} 7100 7101 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7102 predicate(UseIncDec); 7103 match(Set dst (AddI dst src)); 7104 effect(KILL cr); 7105 7106 size(1); 7107 format %{ "DEC $dst" %} 7108 opcode(0x48); /* */ 7109 ins_encode( Opc_plus( primary, dst ) ); 7110 ins_pipe( ialu_reg ); 7111 %} 7112 7113 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7114 match(Set dst (AddP dst src)); 7115 effect(KILL cr); 7116 7117 size(2); 7118 format %{ "ADD $dst,$src" %} 7119 opcode(0x03); 7120 ins_encode( OpcP, RegReg( dst, src) ); 7121 ins_pipe( ialu_reg_reg ); 7122 %} 7123 7124 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7125 match(Set dst (AddP dst src)); 7126 effect(KILL cr); 7127 7128 format %{ "ADD $dst,$src" %} 7129 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7130 // ins_encode( RegImm( dst, src) ); 7131 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7132 ins_pipe( ialu_reg ); 7133 %} 7134 7135 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7136 match(Set dst (AddI dst (LoadI src))); 7137 effect(KILL cr); 7138 7139 ins_cost(150); 7140 format %{ "ADD $dst,$src" %} 7141 opcode(0x03); 7142 ins_encode( OpcP, RegMem( dst, src) ); 7143 ins_pipe( ialu_reg_mem ); 7144 %} 7145 7146 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7147 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7148 effect(KILL cr); 7149 7150 ins_cost(150); 7151 format %{ "ADD $dst,$src" %} 7152 opcode(0x01); /* Opcode 01 /r */ 7153 ins_encode( OpcP, RegMem( src, dst ) ); 7154 ins_pipe( ialu_mem_reg ); 7155 %} 7156 7157 // Add Memory with Immediate 7158 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7159 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7160 effect(KILL cr); 7161 7162 ins_cost(125); 7163 format %{ "ADD $dst,$src" %} 7164 opcode(0x81); /* Opcode 81 /0 id */ 7165 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7166 ins_pipe( ialu_mem_imm ); 7167 %} 7168 7169 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7170 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7171 effect(KILL cr); 7172 7173 ins_cost(125); 7174 format %{ "INC $dst" %} 7175 opcode(0xFF); /* Opcode FF /0 */ 7176 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7177 ins_pipe( ialu_mem_imm ); 7178 %} 7179 7180 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7181 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7182 effect(KILL cr); 7183 7184 ins_cost(125); 7185 format %{ "DEC $dst" %} 7186 opcode(0xFF); /* Opcode FF /1 */ 7187 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7188 ins_pipe( ialu_mem_imm ); 7189 %} 7190 7191 7192 instruct checkCastPP( eRegP dst ) %{ 7193 match(Set dst (CheckCastPP dst)); 7194 7195 size(0); 7196 format %{ "#checkcastPP of $dst" %} 7197 ins_encode( /*empty encoding*/ ); 7198 ins_pipe( empty ); 7199 %} 7200 7201 instruct castPP( eRegP dst ) %{ 7202 match(Set dst (CastPP dst)); 7203 format %{ "#castPP of $dst" %} 7204 ins_encode( /*empty encoding*/ ); 7205 ins_pipe( empty ); 7206 %} 7207 7208 instruct castII( rRegI dst ) %{ 7209 match(Set dst (CastII dst)); 7210 format %{ "#castII of $dst" %} 7211 ins_encode( /*empty encoding*/ ); 7212 ins_cost(0); 7213 ins_pipe( empty ); 7214 %} 7215 7216 instruct castLL( eRegL dst ) %{ 7217 match(Set dst (CastLL dst)); 7218 format %{ "#castLL of $dst" %} 7219 ins_encode( /*empty encoding*/ ); 7220 ins_cost(0); 7221 ins_pipe( empty ); 7222 %} 7223 7224 instruct castFF( regF dst ) %{ 7225 predicate(UseSSE >= 1); 7226 match(Set dst (CastFF dst)); 7227 format %{ "#castFF of $dst" %} 7228 ins_encode( /*empty encoding*/ ); 7229 ins_cost(0); 7230 ins_pipe( empty ); 7231 %} 7232 7233 instruct castDD( regD dst ) %{ 7234 predicate(UseSSE >= 2); 7235 match(Set dst (CastDD dst)); 7236 format %{ "#castDD of $dst" %} 7237 ins_encode( /*empty encoding*/ ); 7238 ins_cost(0); 7239 ins_pipe( empty ); 7240 %} 7241 7242 instruct castFF_PR( regFPR dst ) %{ 7243 predicate(UseSSE < 1); 7244 match(Set dst (CastFF dst)); 7245 format %{ "#castFF of $dst" %} 7246 ins_encode( /*empty encoding*/ ); 7247 ins_cost(0); 7248 ins_pipe( empty ); 7249 %} 7250 7251 instruct castDD_PR( regDPR dst ) %{ 7252 predicate(UseSSE < 2); 7253 match(Set dst (CastDD dst)); 7254 format %{ "#castDD of $dst" %} 7255 ins_encode( /*empty encoding*/ ); 7256 ins_cost(0); 7257 ins_pipe( empty ); 7258 %} 7259 7260 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7261 7262 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7263 predicate(VM_Version::supports_cx8()); 7264 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7265 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7266 effect(KILL cr, KILL oldval); 7267 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7268 "MOV $res,0\n\t" 7269 "JNE,s fail\n\t" 7270 "MOV $res,1\n" 7271 "fail:" %} 7272 ins_encode( enc_cmpxchg8(mem_ptr), 7273 enc_flags_ne_to_boolean(res) ); 7274 ins_pipe( pipe_cmpxchg ); 7275 %} 7276 7277 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7278 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7279 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7280 effect(KILL cr, KILL oldval); 7281 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7282 "MOV $res,0\n\t" 7283 "JNE,s fail\n\t" 7284 "MOV $res,1\n" 7285 "fail:" %} 7286 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7287 ins_pipe( pipe_cmpxchg ); 7288 %} 7289 7290 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7291 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7292 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7293 effect(KILL cr, KILL oldval); 7294 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7295 "MOV $res,0\n\t" 7296 "JNE,s fail\n\t" 7297 "MOV $res,1\n" 7298 "fail:" %} 7299 ins_encode( enc_cmpxchgb(mem_ptr), 7300 enc_flags_ne_to_boolean(res) ); 7301 ins_pipe( pipe_cmpxchg ); 7302 %} 7303 7304 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7305 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7306 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7307 effect(KILL cr, KILL oldval); 7308 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7309 "MOV $res,0\n\t" 7310 "JNE,s fail\n\t" 7311 "MOV $res,1\n" 7312 "fail:" %} 7313 ins_encode( enc_cmpxchgw(mem_ptr), 7314 enc_flags_ne_to_boolean(res) ); 7315 ins_pipe( pipe_cmpxchg ); 7316 %} 7317 7318 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7319 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7320 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7321 effect(KILL cr, KILL oldval); 7322 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7323 "MOV $res,0\n\t" 7324 "JNE,s fail\n\t" 7325 "MOV $res,1\n" 7326 "fail:" %} 7327 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7328 ins_pipe( pipe_cmpxchg ); 7329 %} 7330 7331 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7332 predicate(VM_Version::supports_cx8()); 7333 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7334 effect(KILL cr); 7335 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7336 ins_encode( enc_cmpxchg8(mem_ptr) ); 7337 ins_pipe( pipe_cmpxchg ); 7338 %} 7339 7340 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7341 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7342 effect(KILL cr); 7343 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7344 ins_encode( enc_cmpxchg(mem_ptr) ); 7345 ins_pipe( pipe_cmpxchg ); 7346 %} 7347 7348 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7349 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7350 effect(KILL cr); 7351 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7352 ins_encode( enc_cmpxchgb(mem_ptr) ); 7353 ins_pipe( pipe_cmpxchg ); 7354 %} 7355 7356 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7357 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7358 effect(KILL cr); 7359 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7360 ins_encode( enc_cmpxchgw(mem_ptr) ); 7361 ins_pipe( pipe_cmpxchg ); 7362 %} 7363 7364 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7365 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7366 effect(KILL cr); 7367 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7368 ins_encode( enc_cmpxchg(mem_ptr) ); 7369 ins_pipe( pipe_cmpxchg ); 7370 %} 7371 7372 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7373 predicate(n->as_LoadStore()->result_not_used()); 7374 match(Set dummy (GetAndAddB mem add)); 7375 effect(KILL cr); 7376 format %{ "ADDB [$mem],$add" %} 7377 ins_encode %{ 7378 __ lock(); 7379 __ addb($mem$$Address, $add$$constant); 7380 %} 7381 ins_pipe( pipe_cmpxchg ); 7382 %} 7383 7384 // Important to match to xRegI: only 8-bit regs. 7385 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7386 match(Set newval (GetAndAddB mem newval)); 7387 effect(KILL cr); 7388 format %{ "XADDB [$mem],$newval" %} 7389 ins_encode %{ 7390 __ lock(); 7391 __ xaddb($mem$$Address, $newval$$Register); 7392 %} 7393 ins_pipe( pipe_cmpxchg ); 7394 %} 7395 7396 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7397 predicate(n->as_LoadStore()->result_not_used()); 7398 match(Set dummy (GetAndAddS mem add)); 7399 effect(KILL cr); 7400 format %{ "ADDS [$mem],$add" %} 7401 ins_encode %{ 7402 __ lock(); 7403 __ addw($mem$$Address, $add$$constant); 7404 %} 7405 ins_pipe( pipe_cmpxchg ); 7406 %} 7407 7408 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7409 match(Set newval (GetAndAddS mem newval)); 7410 effect(KILL cr); 7411 format %{ "XADDS [$mem],$newval" %} 7412 ins_encode %{ 7413 __ lock(); 7414 __ xaddw($mem$$Address, $newval$$Register); 7415 %} 7416 ins_pipe( pipe_cmpxchg ); 7417 %} 7418 7419 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7420 predicate(n->as_LoadStore()->result_not_used()); 7421 match(Set dummy (GetAndAddI mem add)); 7422 effect(KILL cr); 7423 format %{ "ADDL [$mem],$add" %} 7424 ins_encode %{ 7425 __ lock(); 7426 __ addl($mem$$Address, $add$$constant); 7427 %} 7428 ins_pipe( pipe_cmpxchg ); 7429 %} 7430 7431 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7432 match(Set newval (GetAndAddI mem newval)); 7433 effect(KILL cr); 7434 format %{ "XADDL [$mem],$newval" %} 7435 ins_encode %{ 7436 __ lock(); 7437 __ xaddl($mem$$Address, $newval$$Register); 7438 %} 7439 ins_pipe( pipe_cmpxchg ); 7440 %} 7441 7442 // Important to match to xRegI: only 8-bit regs. 7443 instruct xchgB( memory mem, xRegI newval) %{ 7444 match(Set newval (GetAndSetB mem newval)); 7445 format %{ "XCHGB $newval,[$mem]" %} 7446 ins_encode %{ 7447 __ xchgb($newval$$Register, $mem$$Address); 7448 %} 7449 ins_pipe( pipe_cmpxchg ); 7450 %} 7451 7452 instruct xchgS( memory mem, rRegI newval) %{ 7453 match(Set newval (GetAndSetS mem newval)); 7454 format %{ "XCHGW $newval,[$mem]" %} 7455 ins_encode %{ 7456 __ xchgw($newval$$Register, $mem$$Address); 7457 %} 7458 ins_pipe( pipe_cmpxchg ); 7459 %} 7460 7461 instruct xchgI( memory mem, rRegI newval) %{ 7462 match(Set newval (GetAndSetI mem newval)); 7463 format %{ "XCHGL $newval,[$mem]" %} 7464 ins_encode %{ 7465 __ xchgl($newval$$Register, $mem$$Address); 7466 %} 7467 ins_pipe( pipe_cmpxchg ); 7468 %} 7469 7470 instruct xchgP( memory mem, pRegP newval) %{ 7471 match(Set newval (GetAndSetP mem newval)); 7472 format %{ "XCHGL $newval,[$mem]" %} 7473 ins_encode %{ 7474 __ xchgl($newval$$Register, $mem$$Address); 7475 %} 7476 ins_pipe( pipe_cmpxchg ); 7477 %} 7478 7479 //----------Subtraction Instructions------------------------------------------- 7480 7481 // Integer Subtraction Instructions 7482 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7483 match(Set dst (SubI dst src)); 7484 effect(KILL cr); 7485 7486 size(2); 7487 format %{ "SUB $dst,$src" %} 7488 opcode(0x2B); 7489 ins_encode( OpcP, RegReg( dst, src) ); 7490 ins_pipe( ialu_reg_reg ); 7491 %} 7492 7493 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7494 match(Set dst (SubI dst src)); 7495 effect(KILL cr); 7496 7497 format %{ "SUB $dst,$src" %} 7498 opcode(0x81,0x05); /* Opcode 81 /5 */ 7499 // ins_encode( RegImm( dst, src) ); 7500 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7501 ins_pipe( ialu_reg ); 7502 %} 7503 7504 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7505 match(Set dst (SubI dst (LoadI src))); 7506 effect(KILL cr); 7507 7508 ins_cost(150); 7509 format %{ "SUB $dst,$src" %} 7510 opcode(0x2B); 7511 ins_encode( OpcP, RegMem( dst, src) ); 7512 ins_pipe( ialu_reg_mem ); 7513 %} 7514 7515 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7516 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7517 effect(KILL cr); 7518 7519 ins_cost(150); 7520 format %{ "SUB $dst,$src" %} 7521 opcode(0x29); /* Opcode 29 /r */ 7522 ins_encode( OpcP, RegMem( src, dst ) ); 7523 ins_pipe( ialu_mem_reg ); 7524 %} 7525 7526 // Subtract from a pointer 7527 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7528 match(Set dst (AddP dst (SubI zero src))); 7529 effect(KILL cr); 7530 7531 size(2); 7532 format %{ "SUB $dst,$src" %} 7533 opcode(0x2B); 7534 ins_encode( OpcP, RegReg( dst, src) ); 7535 ins_pipe( ialu_reg_reg ); 7536 %} 7537 7538 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7539 match(Set dst (SubI zero dst)); 7540 effect(KILL cr); 7541 7542 size(2); 7543 format %{ "NEG $dst" %} 7544 opcode(0xF7,0x03); // Opcode F7 /3 7545 ins_encode( OpcP, RegOpc( dst ) ); 7546 ins_pipe( ialu_reg ); 7547 %} 7548 7549 //----------Multiplication/Division Instructions------------------------------- 7550 // Integer Multiplication Instructions 7551 // Multiply Register 7552 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7553 match(Set dst (MulI dst src)); 7554 effect(KILL cr); 7555 7556 size(3); 7557 ins_cost(300); 7558 format %{ "IMUL $dst,$src" %} 7559 opcode(0xAF, 0x0F); 7560 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7561 ins_pipe( ialu_reg_reg_alu0 ); 7562 %} 7563 7564 // Multiply 32-bit Immediate 7565 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7566 match(Set dst (MulI src imm)); 7567 effect(KILL cr); 7568 7569 ins_cost(300); 7570 format %{ "IMUL $dst,$src,$imm" %} 7571 opcode(0x69); /* 69 /r id */ 7572 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7573 ins_pipe( ialu_reg_reg_alu0 ); 7574 %} 7575 7576 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7577 match(Set dst src); 7578 effect(KILL cr); 7579 7580 // Note that this is artificially increased to make it more expensive than loadConL 7581 ins_cost(250); 7582 format %{ "MOV EAX,$src\t// low word only" %} 7583 opcode(0xB8); 7584 ins_encode( LdImmL_Lo(dst, src) ); 7585 ins_pipe( ialu_reg_fat ); 7586 %} 7587 7588 // Multiply by 32-bit Immediate, taking the shifted high order results 7589 // (special case for shift by 32) 7590 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7591 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7592 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7593 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7594 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7595 effect(USE src1, KILL cr); 7596 7597 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7598 ins_cost(0*100 + 1*400 - 150); 7599 format %{ "IMUL EDX:EAX,$src1" %} 7600 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7601 ins_pipe( pipe_slow ); 7602 %} 7603 7604 // Multiply by 32-bit Immediate, taking the shifted high order results 7605 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7606 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7607 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7608 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7609 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7610 effect(USE src1, KILL cr); 7611 7612 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7613 ins_cost(1*100 + 1*400 - 150); 7614 format %{ "IMUL EDX:EAX,$src1\n\t" 7615 "SAR EDX,$cnt-32" %} 7616 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7617 ins_pipe( pipe_slow ); 7618 %} 7619 7620 // Multiply Memory 32-bit Immediate 7621 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7622 match(Set dst (MulI (LoadI src) imm)); 7623 effect(KILL cr); 7624 7625 ins_cost(300); 7626 format %{ "IMUL $dst,$src,$imm" %} 7627 opcode(0x69); /* 69 /r id */ 7628 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7629 ins_pipe( ialu_reg_mem_alu0 ); 7630 %} 7631 7632 // Multiply Memory 7633 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7634 match(Set dst (MulI dst (LoadI src))); 7635 effect(KILL cr); 7636 7637 ins_cost(350); 7638 format %{ "IMUL $dst,$src" %} 7639 opcode(0xAF, 0x0F); 7640 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7641 ins_pipe( ialu_reg_mem_alu0 ); 7642 %} 7643 7644 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7645 %{ 7646 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7647 effect(KILL cr, KILL src2); 7648 7649 expand %{ mulI_eReg(dst, src1, cr); 7650 mulI_eReg(src2, src3, cr); 7651 addI_eReg(dst, src2, cr); %} 7652 %} 7653 7654 // Multiply Register Int to Long 7655 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7656 // Basic Idea: long = (long)int * (long)int 7657 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7658 effect(DEF dst, USE src, USE src1, KILL flags); 7659 7660 ins_cost(300); 7661 format %{ "IMUL $dst,$src1" %} 7662 7663 ins_encode( long_int_multiply( dst, src1 ) ); 7664 ins_pipe( ialu_reg_reg_alu0 ); 7665 %} 7666 7667 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7668 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7669 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7670 effect(KILL flags); 7671 7672 ins_cost(300); 7673 format %{ "MUL $dst,$src1" %} 7674 7675 ins_encode( long_uint_multiply(dst, src1) ); 7676 ins_pipe( ialu_reg_reg_alu0 ); 7677 %} 7678 7679 // Multiply Register Long 7680 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7681 match(Set dst (MulL dst src)); 7682 effect(KILL cr, TEMP tmp); 7683 ins_cost(4*100+3*400); 7684 // Basic idea: lo(result) = lo(x_lo * y_lo) 7685 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7686 format %{ "MOV $tmp,$src.lo\n\t" 7687 "IMUL $tmp,EDX\n\t" 7688 "MOV EDX,$src.hi\n\t" 7689 "IMUL EDX,EAX\n\t" 7690 "ADD $tmp,EDX\n\t" 7691 "MUL EDX:EAX,$src.lo\n\t" 7692 "ADD EDX,$tmp" %} 7693 ins_encode( long_multiply( dst, src, tmp ) ); 7694 ins_pipe( pipe_slow ); 7695 %} 7696 7697 // Multiply Register Long where the left operand's high 32 bits are zero 7698 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7699 predicate(is_operand_hi32_zero(n->in(1))); 7700 match(Set dst (MulL dst src)); 7701 effect(KILL cr, TEMP tmp); 7702 ins_cost(2*100+2*400); 7703 // Basic idea: lo(result) = lo(x_lo * y_lo) 7704 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7705 format %{ "MOV $tmp,$src.hi\n\t" 7706 "IMUL $tmp,EAX\n\t" 7707 "MUL EDX:EAX,$src.lo\n\t" 7708 "ADD EDX,$tmp" %} 7709 ins_encode %{ 7710 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7711 __ imull($tmp$$Register, rax); 7712 __ mull($src$$Register); 7713 __ addl(rdx, $tmp$$Register); 7714 %} 7715 ins_pipe( pipe_slow ); 7716 %} 7717 7718 // Multiply Register Long where the right operand's high 32 bits are zero 7719 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7720 predicate(is_operand_hi32_zero(n->in(2))); 7721 match(Set dst (MulL dst src)); 7722 effect(KILL cr, TEMP tmp); 7723 ins_cost(2*100+2*400); 7724 // Basic idea: lo(result) = lo(x_lo * y_lo) 7725 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7726 format %{ "MOV $tmp,$src.lo\n\t" 7727 "IMUL $tmp,EDX\n\t" 7728 "MUL EDX:EAX,$src.lo\n\t" 7729 "ADD EDX,$tmp" %} 7730 ins_encode %{ 7731 __ movl($tmp$$Register, $src$$Register); 7732 __ imull($tmp$$Register, rdx); 7733 __ mull($src$$Register); 7734 __ addl(rdx, $tmp$$Register); 7735 %} 7736 ins_pipe( pipe_slow ); 7737 %} 7738 7739 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7740 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7741 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7742 match(Set dst (MulL dst src)); 7743 effect(KILL cr); 7744 ins_cost(1*400); 7745 // Basic idea: lo(result) = lo(x_lo * y_lo) 7746 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7747 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7748 ins_encode %{ 7749 __ mull($src$$Register); 7750 %} 7751 ins_pipe( pipe_slow ); 7752 %} 7753 7754 // Multiply Register Long by small constant 7755 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7756 match(Set dst (MulL dst src)); 7757 effect(KILL cr, TEMP tmp); 7758 ins_cost(2*100+2*400); 7759 size(12); 7760 // Basic idea: lo(result) = lo(src * EAX) 7761 // hi(result) = hi(src * EAX) + lo(src * EDX) 7762 format %{ "IMUL $tmp,EDX,$src\n\t" 7763 "MOV EDX,$src\n\t" 7764 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7765 "ADD EDX,$tmp" %} 7766 ins_encode( long_multiply_con( dst, src, tmp ) ); 7767 ins_pipe( pipe_slow ); 7768 %} 7769 7770 // Integer DIV with Register 7771 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7772 match(Set rax (DivI rax div)); 7773 effect(KILL rdx, KILL cr); 7774 size(26); 7775 ins_cost(30*100+10*100); 7776 format %{ "CMP EAX,0x80000000\n\t" 7777 "JNE,s normal\n\t" 7778 "XOR EDX,EDX\n\t" 7779 "CMP ECX,-1\n\t" 7780 "JE,s done\n" 7781 "normal: CDQ\n\t" 7782 "IDIV $div\n\t" 7783 "done:" %} 7784 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7785 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7786 ins_pipe( ialu_reg_reg_alu0 ); 7787 %} 7788 7789 // Divide Register Long 7790 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7791 match(Set dst (DivL src1 src2)); 7792 effect(CALL); 7793 ins_cost(10000); 7794 format %{ "PUSH $src1.hi\n\t" 7795 "PUSH $src1.lo\n\t" 7796 "PUSH $src2.hi\n\t" 7797 "PUSH $src2.lo\n\t" 7798 "CALL SharedRuntime::ldiv\n\t" 7799 "ADD ESP,16" %} 7800 ins_encode( long_div(src1,src2) ); 7801 ins_pipe( pipe_slow ); 7802 %} 7803 7804 // Integer DIVMOD with Register, both quotient and mod results 7805 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7806 match(DivModI rax div); 7807 effect(KILL cr); 7808 size(26); 7809 ins_cost(30*100+10*100); 7810 format %{ "CMP EAX,0x80000000\n\t" 7811 "JNE,s normal\n\t" 7812 "XOR EDX,EDX\n\t" 7813 "CMP ECX,-1\n\t" 7814 "JE,s done\n" 7815 "normal: CDQ\n\t" 7816 "IDIV $div\n\t" 7817 "done:" %} 7818 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7819 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7820 ins_pipe( pipe_slow ); 7821 %} 7822 7823 // Integer MOD with Register 7824 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7825 match(Set rdx (ModI rax div)); 7826 effect(KILL rax, KILL cr); 7827 7828 size(26); 7829 ins_cost(300); 7830 format %{ "CDQ\n\t" 7831 "IDIV $div" %} 7832 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7833 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7834 ins_pipe( ialu_reg_reg_alu0 ); 7835 %} 7836 7837 // Remainder Register Long 7838 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7839 match(Set dst (ModL src1 src2)); 7840 effect(CALL); 7841 ins_cost(10000); 7842 format %{ "PUSH $src1.hi\n\t" 7843 "PUSH $src1.lo\n\t" 7844 "PUSH $src2.hi\n\t" 7845 "PUSH $src2.lo\n\t" 7846 "CALL SharedRuntime::lrem\n\t" 7847 "ADD ESP,16" %} 7848 ins_encode( long_mod(src1,src2) ); 7849 ins_pipe( pipe_slow ); 7850 %} 7851 7852 // Divide Register Long (no special case since divisor != -1) 7853 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7854 match(Set dst (DivL dst imm)); 7855 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7856 ins_cost(1000); 7857 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7858 "XOR $tmp2,$tmp2\n\t" 7859 "CMP $tmp,EDX\n\t" 7860 "JA,s fast\n\t" 7861 "MOV $tmp2,EAX\n\t" 7862 "MOV EAX,EDX\n\t" 7863 "MOV EDX,0\n\t" 7864 "JLE,s pos\n\t" 7865 "LNEG EAX : $tmp2\n\t" 7866 "DIV $tmp # unsigned division\n\t" 7867 "XCHG EAX,$tmp2\n\t" 7868 "DIV $tmp\n\t" 7869 "LNEG $tmp2 : EAX\n\t" 7870 "JMP,s done\n" 7871 "pos:\n\t" 7872 "DIV $tmp\n\t" 7873 "XCHG EAX,$tmp2\n" 7874 "fast:\n\t" 7875 "DIV $tmp\n" 7876 "done:\n\t" 7877 "MOV EDX,$tmp2\n\t" 7878 "NEG EDX:EAX # if $imm < 0" %} 7879 ins_encode %{ 7880 int con = (int)$imm$$constant; 7881 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7882 int pcon = (con > 0) ? con : -con; 7883 Label Lfast, Lpos, Ldone; 7884 7885 __ movl($tmp$$Register, pcon); 7886 __ xorl($tmp2$$Register,$tmp2$$Register); 7887 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7888 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7889 7890 __ movl($tmp2$$Register, $dst$$Register); // save 7891 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7892 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7893 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7894 7895 // Negative dividend. 7896 // convert value to positive to use unsigned division 7897 __ lneg($dst$$Register, $tmp2$$Register); 7898 __ divl($tmp$$Register); 7899 __ xchgl($dst$$Register, $tmp2$$Register); 7900 __ divl($tmp$$Register); 7901 // revert result back to negative 7902 __ lneg($tmp2$$Register, $dst$$Register); 7903 __ jmpb(Ldone); 7904 7905 __ bind(Lpos); 7906 __ divl($tmp$$Register); // Use unsigned division 7907 __ xchgl($dst$$Register, $tmp2$$Register); 7908 // Fallthrow for final divide, tmp2 has 32 bit hi result 7909 7910 __ bind(Lfast); 7911 // fast path: src is positive 7912 __ divl($tmp$$Register); // Use unsigned division 7913 7914 __ bind(Ldone); 7915 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7916 if (con < 0) { 7917 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7918 } 7919 %} 7920 ins_pipe( pipe_slow ); 7921 %} 7922 7923 // Remainder Register Long (remainder fit into 32 bits) 7924 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7925 match(Set dst (ModL dst imm)); 7926 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7927 ins_cost(1000); 7928 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7929 "CMP $tmp,EDX\n\t" 7930 "JA,s fast\n\t" 7931 "MOV $tmp2,EAX\n\t" 7932 "MOV EAX,EDX\n\t" 7933 "MOV EDX,0\n\t" 7934 "JLE,s pos\n\t" 7935 "LNEG EAX : $tmp2\n\t" 7936 "DIV $tmp # unsigned division\n\t" 7937 "MOV EAX,$tmp2\n\t" 7938 "DIV $tmp\n\t" 7939 "NEG EDX\n\t" 7940 "JMP,s done\n" 7941 "pos:\n\t" 7942 "DIV $tmp\n\t" 7943 "MOV EAX,$tmp2\n" 7944 "fast:\n\t" 7945 "DIV $tmp\n" 7946 "done:\n\t" 7947 "MOV EAX,EDX\n\t" 7948 "SAR EDX,31\n\t" %} 7949 ins_encode %{ 7950 int con = (int)$imm$$constant; 7951 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7952 int pcon = (con > 0) ? con : -con; 7953 Label Lfast, Lpos, Ldone; 7954 7955 __ movl($tmp$$Register, pcon); 7956 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7957 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7958 7959 __ movl($tmp2$$Register, $dst$$Register); // save 7960 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7961 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7962 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7963 7964 // Negative dividend. 7965 // convert value to positive to use unsigned division 7966 __ lneg($dst$$Register, $tmp2$$Register); 7967 __ divl($tmp$$Register); 7968 __ movl($dst$$Register, $tmp2$$Register); 7969 __ divl($tmp$$Register); 7970 // revert remainder back to negative 7971 __ negl(HIGH_FROM_LOW($dst$$Register)); 7972 __ jmpb(Ldone); 7973 7974 __ bind(Lpos); 7975 __ divl($tmp$$Register); 7976 __ movl($dst$$Register, $tmp2$$Register); 7977 7978 __ bind(Lfast); 7979 // fast path: src is positive 7980 __ divl($tmp$$Register); 7981 7982 __ bind(Ldone); 7983 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7984 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7985 7986 %} 7987 ins_pipe( pipe_slow ); 7988 %} 7989 7990 // Integer Shift Instructions 7991 // Shift Left by one 7992 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7993 match(Set dst (LShiftI dst shift)); 7994 effect(KILL cr); 7995 7996 size(2); 7997 format %{ "SHL $dst,$shift" %} 7998 opcode(0xD1, 0x4); /* D1 /4 */ 7999 ins_encode( OpcP, RegOpc( dst ) ); 8000 ins_pipe( ialu_reg ); 8001 %} 8002 8003 // Shift Left by 8-bit immediate 8004 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8005 match(Set dst (LShiftI dst shift)); 8006 effect(KILL cr); 8007 8008 size(3); 8009 format %{ "SHL $dst,$shift" %} 8010 opcode(0xC1, 0x4); /* C1 /4 ib */ 8011 ins_encode( RegOpcImm( dst, shift) ); 8012 ins_pipe( ialu_reg ); 8013 %} 8014 8015 // Shift Left by variable 8016 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8017 match(Set dst (LShiftI dst shift)); 8018 effect(KILL cr); 8019 8020 size(2); 8021 format %{ "SHL $dst,$shift" %} 8022 opcode(0xD3, 0x4); /* D3 /4 */ 8023 ins_encode( OpcP, RegOpc( dst ) ); 8024 ins_pipe( ialu_reg_reg ); 8025 %} 8026 8027 // Arithmetic shift right by one 8028 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8029 match(Set dst (RShiftI dst shift)); 8030 effect(KILL cr); 8031 8032 size(2); 8033 format %{ "SAR $dst,$shift" %} 8034 opcode(0xD1, 0x7); /* D1 /7 */ 8035 ins_encode( OpcP, RegOpc( dst ) ); 8036 ins_pipe( ialu_reg ); 8037 %} 8038 8039 // Arithmetic shift right by one 8040 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8041 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8042 effect(KILL cr); 8043 format %{ "SAR $dst,$shift" %} 8044 opcode(0xD1, 0x7); /* D1 /7 */ 8045 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8046 ins_pipe( ialu_mem_imm ); 8047 %} 8048 8049 // Arithmetic Shift Right by 8-bit immediate 8050 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8051 match(Set dst (RShiftI dst shift)); 8052 effect(KILL cr); 8053 8054 size(3); 8055 format %{ "SAR $dst,$shift" %} 8056 opcode(0xC1, 0x7); /* C1 /7 ib */ 8057 ins_encode( RegOpcImm( dst, shift ) ); 8058 ins_pipe( ialu_mem_imm ); 8059 %} 8060 8061 // Arithmetic Shift Right by 8-bit immediate 8062 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8063 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8064 effect(KILL cr); 8065 8066 format %{ "SAR $dst,$shift" %} 8067 opcode(0xC1, 0x7); /* C1 /7 ib */ 8068 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8069 ins_pipe( ialu_mem_imm ); 8070 %} 8071 8072 // Arithmetic Shift Right by variable 8073 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8074 match(Set dst (RShiftI dst shift)); 8075 effect(KILL cr); 8076 8077 size(2); 8078 format %{ "SAR $dst,$shift" %} 8079 opcode(0xD3, 0x7); /* D3 /7 */ 8080 ins_encode( OpcP, RegOpc( dst ) ); 8081 ins_pipe( ialu_reg_reg ); 8082 %} 8083 8084 // Logical shift right by one 8085 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8086 match(Set dst (URShiftI dst shift)); 8087 effect(KILL cr); 8088 8089 size(2); 8090 format %{ "SHR $dst,$shift" %} 8091 opcode(0xD1, 0x5); /* D1 /5 */ 8092 ins_encode( OpcP, RegOpc( dst ) ); 8093 ins_pipe( ialu_reg ); 8094 %} 8095 8096 // Logical Shift Right by 8-bit immediate 8097 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8098 match(Set dst (URShiftI dst shift)); 8099 effect(KILL cr); 8100 8101 size(3); 8102 format %{ "SHR $dst,$shift" %} 8103 opcode(0xC1, 0x5); /* C1 /5 ib */ 8104 ins_encode( RegOpcImm( dst, shift) ); 8105 ins_pipe( ialu_reg ); 8106 %} 8107 8108 8109 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8110 // This idiom is used by the compiler for the i2b bytecode. 8111 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8112 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8113 8114 size(3); 8115 format %{ "MOVSX $dst,$src :8" %} 8116 ins_encode %{ 8117 __ movsbl($dst$$Register, $src$$Register); 8118 %} 8119 ins_pipe(ialu_reg_reg); 8120 %} 8121 8122 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8123 // This idiom is used by the compiler the i2s bytecode. 8124 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8125 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8126 8127 size(3); 8128 format %{ "MOVSX $dst,$src :16" %} 8129 ins_encode %{ 8130 __ movswl($dst$$Register, $src$$Register); 8131 %} 8132 ins_pipe(ialu_reg_reg); 8133 %} 8134 8135 8136 // Logical Shift Right by variable 8137 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8138 match(Set dst (URShiftI dst shift)); 8139 effect(KILL cr); 8140 8141 size(2); 8142 format %{ "SHR $dst,$shift" %} 8143 opcode(0xD3, 0x5); /* D3 /5 */ 8144 ins_encode( OpcP, RegOpc( dst ) ); 8145 ins_pipe( ialu_reg_reg ); 8146 %} 8147 8148 8149 //----------Logical Instructions----------------------------------------------- 8150 //----------Integer Logical Instructions--------------------------------------- 8151 // And Instructions 8152 // And Register with Register 8153 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8154 match(Set dst (AndI dst src)); 8155 effect(KILL cr); 8156 8157 size(2); 8158 format %{ "AND $dst,$src" %} 8159 opcode(0x23); 8160 ins_encode( OpcP, RegReg( dst, src) ); 8161 ins_pipe( ialu_reg_reg ); 8162 %} 8163 8164 // And Register with Immediate 8165 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8166 match(Set dst (AndI dst src)); 8167 effect(KILL cr); 8168 8169 format %{ "AND $dst,$src" %} 8170 opcode(0x81,0x04); /* Opcode 81 /4 */ 8171 // ins_encode( RegImm( dst, src) ); 8172 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8173 ins_pipe( ialu_reg ); 8174 %} 8175 8176 // And Register with Memory 8177 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8178 match(Set dst (AndI dst (LoadI src))); 8179 effect(KILL cr); 8180 8181 ins_cost(150); 8182 format %{ "AND $dst,$src" %} 8183 opcode(0x23); 8184 ins_encode( OpcP, RegMem( dst, src) ); 8185 ins_pipe( ialu_reg_mem ); 8186 %} 8187 8188 // And Memory with Register 8189 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8190 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8191 effect(KILL cr); 8192 8193 ins_cost(150); 8194 format %{ "AND $dst,$src" %} 8195 opcode(0x21); /* Opcode 21 /r */ 8196 ins_encode( OpcP, RegMem( src, dst ) ); 8197 ins_pipe( ialu_mem_reg ); 8198 %} 8199 8200 // And Memory with Immediate 8201 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8202 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8203 effect(KILL cr); 8204 8205 ins_cost(125); 8206 format %{ "AND $dst,$src" %} 8207 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8208 // ins_encode( MemImm( dst, src) ); 8209 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8210 ins_pipe( ialu_mem_imm ); 8211 %} 8212 8213 // BMI1 instructions 8214 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8215 match(Set dst (AndI (XorI src1 minus_1) src2)); 8216 predicate(UseBMI1Instructions); 8217 effect(KILL cr); 8218 8219 format %{ "ANDNL $dst, $src1, $src2" %} 8220 8221 ins_encode %{ 8222 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8223 %} 8224 ins_pipe(ialu_reg); 8225 %} 8226 8227 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8228 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8229 predicate(UseBMI1Instructions); 8230 effect(KILL cr); 8231 8232 ins_cost(125); 8233 format %{ "ANDNL $dst, $src1, $src2" %} 8234 8235 ins_encode %{ 8236 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8237 %} 8238 ins_pipe(ialu_reg_mem); 8239 %} 8240 8241 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8242 match(Set dst (AndI (SubI imm_zero src) src)); 8243 predicate(UseBMI1Instructions); 8244 effect(KILL cr); 8245 8246 format %{ "BLSIL $dst, $src" %} 8247 8248 ins_encode %{ 8249 __ blsil($dst$$Register, $src$$Register); 8250 %} 8251 ins_pipe(ialu_reg); 8252 %} 8253 8254 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8255 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8256 predicate(UseBMI1Instructions); 8257 effect(KILL cr); 8258 8259 ins_cost(125); 8260 format %{ "BLSIL $dst, $src" %} 8261 8262 ins_encode %{ 8263 __ blsil($dst$$Register, $src$$Address); 8264 %} 8265 ins_pipe(ialu_reg_mem); 8266 %} 8267 8268 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8269 %{ 8270 match(Set dst (XorI (AddI src minus_1) src)); 8271 predicate(UseBMI1Instructions); 8272 effect(KILL cr); 8273 8274 format %{ "BLSMSKL $dst, $src" %} 8275 8276 ins_encode %{ 8277 __ blsmskl($dst$$Register, $src$$Register); 8278 %} 8279 8280 ins_pipe(ialu_reg); 8281 %} 8282 8283 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8284 %{ 8285 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8286 predicate(UseBMI1Instructions); 8287 effect(KILL cr); 8288 8289 ins_cost(125); 8290 format %{ "BLSMSKL $dst, $src" %} 8291 8292 ins_encode %{ 8293 __ blsmskl($dst$$Register, $src$$Address); 8294 %} 8295 8296 ins_pipe(ialu_reg_mem); 8297 %} 8298 8299 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8300 %{ 8301 match(Set dst (AndI (AddI src minus_1) src) ); 8302 predicate(UseBMI1Instructions); 8303 effect(KILL cr); 8304 8305 format %{ "BLSRL $dst, $src" %} 8306 8307 ins_encode %{ 8308 __ blsrl($dst$$Register, $src$$Register); 8309 %} 8310 8311 ins_pipe(ialu_reg); 8312 %} 8313 8314 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8315 %{ 8316 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8317 predicate(UseBMI1Instructions); 8318 effect(KILL cr); 8319 8320 ins_cost(125); 8321 format %{ "BLSRL $dst, $src" %} 8322 8323 ins_encode %{ 8324 __ blsrl($dst$$Register, $src$$Address); 8325 %} 8326 8327 ins_pipe(ialu_reg_mem); 8328 %} 8329 8330 // Or Instructions 8331 // Or Register with Register 8332 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8333 match(Set dst (OrI dst src)); 8334 effect(KILL cr); 8335 8336 size(2); 8337 format %{ "OR $dst,$src" %} 8338 opcode(0x0B); 8339 ins_encode( OpcP, RegReg( dst, src) ); 8340 ins_pipe( ialu_reg_reg ); 8341 %} 8342 8343 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8344 match(Set dst (OrI dst (CastP2X src))); 8345 effect(KILL cr); 8346 8347 size(2); 8348 format %{ "OR $dst,$src" %} 8349 opcode(0x0B); 8350 ins_encode( OpcP, RegReg( dst, src) ); 8351 ins_pipe( ialu_reg_reg ); 8352 %} 8353 8354 8355 // Or Register with Immediate 8356 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8357 match(Set dst (OrI dst src)); 8358 effect(KILL cr); 8359 8360 format %{ "OR $dst,$src" %} 8361 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8362 // ins_encode( RegImm( dst, src) ); 8363 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8364 ins_pipe( ialu_reg ); 8365 %} 8366 8367 // Or Register with Memory 8368 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8369 match(Set dst (OrI dst (LoadI src))); 8370 effect(KILL cr); 8371 8372 ins_cost(150); 8373 format %{ "OR $dst,$src" %} 8374 opcode(0x0B); 8375 ins_encode( OpcP, RegMem( dst, src) ); 8376 ins_pipe( ialu_reg_mem ); 8377 %} 8378 8379 // Or Memory with Register 8380 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8381 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8382 effect(KILL cr); 8383 8384 ins_cost(150); 8385 format %{ "OR $dst,$src" %} 8386 opcode(0x09); /* Opcode 09 /r */ 8387 ins_encode( OpcP, RegMem( src, dst ) ); 8388 ins_pipe( ialu_mem_reg ); 8389 %} 8390 8391 // Or Memory with Immediate 8392 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8393 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8394 effect(KILL cr); 8395 8396 ins_cost(125); 8397 format %{ "OR $dst,$src" %} 8398 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8399 // ins_encode( MemImm( dst, src) ); 8400 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8401 ins_pipe( ialu_mem_imm ); 8402 %} 8403 8404 // ROL/ROR 8405 // ROL expand 8406 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8407 effect(USE_DEF dst, USE shift, KILL cr); 8408 8409 format %{ "ROL $dst, $shift" %} 8410 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8411 ins_encode( OpcP, RegOpc( dst )); 8412 ins_pipe( ialu_reg ); 8413 %} 8414 8415 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8416 effect(USE_DEF dst, USE shift, KILL cr); 8417 8418 format %{ "ROL $dst, $shift" %} 8419 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8420 ins_encode( RegOpcImm(dst, shift) ); 8421 ins_pipe(ialu_reg); 8422 %} 8423 8424 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8425 effect(USE_DEF dst, USE shift, KILL cr); 8426 8427 format %{ "ROL $dst, $shift" %} 8428 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8429 ins_encode(OpcP, RegOpc(dst)); 8430 ins_pipe( ialu_reg_reg ); 8431 %} 8432 // end of ROL expand 8433 8434 // ROL 32bit by one once 8435 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8436 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8437 8438 expand %{ 8439 rolI_eReg_imm1(dst, lshift, cr); 8440 %} 8441 %} 8442 8443 // ROL 32bit var by imm8 once 8444 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8445 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8446 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8447 8448 expand %{ 8449 rolI_eReg_imm8(dst, lshift, cr); 8450 %} 8451 %} 8452 8453 // ROL 32bit var by var once 8454 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8455 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8456 8457 expand %{ 8458 rolI_eReg_CL(dst, shift, cr); 8459 %} 8460 %} 8461 8462 // ROL 32bit var by var once 8463 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8464 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8465 8466 expand %{ 8467 rolI_eReg_CL(dst, shift, cr); 8468 %} 8469 %} 8470 8471 // ROR expand 8472 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8473 effect(USE_DEF dst, USE shift, KILL cr); 8474 8475 format %{ "ROR $dst, $shift" %} 8476 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8477 ins_encode( OpcP, RegOpc( dst ) ); 8478 ins_pipe( ialu_reg ); 8479 %} 8480 8481 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8482 effect (USE_DEF dst, USE shift, KILL cr); 8483 8484 format %{ "ROR $dst, $shift" %} 8485 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8486 ins_encode( RegOpcImm(dst, shift) ); 8487 ins_pipe( ialu_reg ); 8488 %} 8489 8490 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8491 effect(USE_DEF dst, USE shift, KILL cr); 8492 8493 format %{ "ROR $dst, $shift" %} 8494 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8495 ins_encode(OpcP, RegOpc(dst)); 8496 ins_pipe( ialu_reg_reg ); 8497 %} 8498 // end of ROR expand 8499 8500 // ROR right once 8501 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8502 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8503 8504 expand %{ 8505 rorI_eReg_imm1(dst, rshift, cr); 8506 %} 8507 %} 8508 8509 // ROR 32bit by immI8 once 8510 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8511 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8512 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8513 8514 expand %{ 8515 rorI_eReg_imm8(dst, rshift, cr); 8516 %} 8517 %} 8518 8519 // ROR 32bit var by var once 8520 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8521 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8522 8523 expand %{ 8524 rorI_eReg_CL(dst, shift, cr); 8525 %} 8526 %} 8527 8528 // ROR 32bit var by var once 8529 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8530 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8531 8532 expand %{ 8533 rorI_eReg_CL(dst, shift, cr); 8534 %} 8535 %} 8536 8537 // Xor Instructions 8538 // Xor Register with Register 8539 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8540 match(Set dst (XorI dst src)); 8541 effect(KILL cr); 8542 8543 size(2); 8544 format %{ "XOR $dst,$src" %} 8545 opcode(0x33); 8546 ins_encode( OpcP, RegReg( dst, src) ); 8547 ins_pipe( ialu_reg_reg ); 8548 %} 8549 8550 // Xor Register with Immediate -1 8551 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8552 match(Set dst (XorI dst imm)); 8553 8554 size(2); 8555 format %{ "NOT $dst" %} 8556 ins_encode %{ 8557 __ notl($dst$$Register); 8558 %} 8559 ins_pipe( ialu_reg ); 8560 %} 8561 8562 // Xor Register with Immediate 8563 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8564 match(Set dst (XorI dst src)); 8565 effect(KILL cr); 8566 8567 format %{ "XOR $dst,$src" %} 8568 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8569 // ins_encode( RegImm( dst, src) ); 8570 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8571 ins_pipe( ialu_reg ); 8572 %} 8573 8574 // Xor Register with Memory 8575 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8576 match(Set dst (XorI dst (LoadI src))); 8577 effect(KILL cr); 8578 8579 ins_cost(150); 8580 format %{ "XOR $dst,$src" %} 8581 opcode(0x33); 8582 ins_encode( OpcP, RegMem(dst, src) ); 8583 ins_pipe( ialu_reg_mem ); 8584 %} 8585 8586 // Xor Memory with Register 8587 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8588 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8589 effect(KILL cr); 8590 8591 ins_cost(150); 8592 format %{ "XOR $dst,$src" %} 8593 opcode(0x31); /* Opcode 31 /r */ 8594 ins_encode( OpcP, RegMem( src, dst ) ); 8595 ins_pipe( ialu_mem_reg ); 8596 %} 8597 8598 // Xor Memory with Immediate 8599 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8600 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8601 effect(KILL cr); 8602 8603 ins_cost(125); 8604 format %{ "XOR $dst,$src" %} 8605 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8606 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8607 ins_pipe( ialu_mem_imm ); 8608 %} 8609 8610 //----------Convert Int to Boolean--------------------------------------------- 8611 8612 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8613 effect( DEF dst, USE src ); 8614 format %{ "MOV $dst,$src" %} 8615 ins_encode( enc_Copy( dst, src) ); 8616 ins_pipe( ialu_reg_reg ); 8617 %} 8618 8619 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8620 effect( USE_DEF dst, USE src, KILL cr ); 8621 8622 size(4); 8623 format %{ "NEG $dst\n\t" 8624 "ADC $dst,$src" %} 8625 ins_encode( neg_reg(dst), 8626 OpcRegReg(0x13,dst,src) ); 8627 ins_pipe( ialu_reg_reg_long ); 8628 %} 8629 8630 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8631 match(Set dst (Conv2B src)); 8632 8633 expand %{ 8634 movI_nocopy(dst,src); 8635 ci2b(dst,src,cr); 8636 %} 8637 %} 8638 8639 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8640 effect( DEF dst, USE src ); 8641 format %{ "MOV $dst,$src" %} 8642 ins_encode( enc_Copy( dst, src) ); 8643 ins_pipe( ialu_reg_reg ); 8644 %} 8645 8646 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8647 effect( USE_DEF dst, USE src, KILL cr ); 8648 format %{ "NEG $dst\n\t" 8649 "ADC $dst,$src" %} 8650 ins_encode( neg_reg(dst), 8651 OpcRegReg(0x13,dst,src) ); 8652 ins_pipe( ialu_reg_reg_long ); 8653 %} 8654 8655 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8656 match(Set dst (Conv2B src)); 8657 8658 expand %{ 8659 movP_nocopy(dst,src); 8660 cp2b(dst,src,cr); 8661 %} 8662 %} 8663 8664 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8665 match(Set dst (CmpLTMask p q)); 8666 effect(KILL cr); 8667 ins_cost(400); 8668 8669 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8670 format %{ "XOR $dst,$dst\n\t" 8671 "CMP $p,$q\n\t" 8672 "SETlt $dst\n\t" 8673 "NEG $dst" %} 8674 ins_encode %{ 8675 Register Rp = $p$$Register; 8676 Register Rq = $q$$Register; 8677 Register Rd = $dst$$Register; 8678 Label done; 8679 __ xorl(Rd, Rd); 8680 __ cmpl(Rp, Rq); 8681 __ setb(Assembler::less, Rd); 8682 __ negl(Rd); 8683 %} 8684 8685 ins_pipe(pipe_slow); 8686 %} 8687 8688 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8689 match(Set dst (CmpLTMask dst zero)); 8690 effect(DEF dst, KILL cr); 8691 ins_cost(100); 8692 8693 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8694 ins_encode %{ 8695 __ sarl($dst$$Register, 31); 8696 %} 8697 ins_pipe(ialu_reg); 8698 %} 8699 8700 /* better to save a register than avoid a branch */ 8701 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8702 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8703 effect(KILL cr); 8704 ins_cost(400); 8705 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8706 "JGE done\n\t" 8707 "ADD $p,$y\n" 8708 "done: " %} 8709 ins_encode %{ 8710 Register Rp = $p$$Register; 8711 Register Rq = $q$$Register; 8712 Register Ry = $y$$Register; 8713 Label done; 8714 __ subl(Rp, Rq); 8715 __ jccb(Assembler::greaterEqual, done); 8716 __ addl(Rp, Ry); 8717 __ bind(done); 8718 %} 8719 8720 ins_pipe(pipe_cmplt); 8721 %} 8722 8723 /* better to save a register than avoid a branch */ 8724 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8725 match(Set y (AndI (CmpLTMask p q) y)); 8726 effect(KILL cr); 8727 8728 ins_cost(300); 8729 8730 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8731 "JLT done\n\t" 8732 "XORL $y, $y\n" 8733 "done: " %} 8734 ins_encode %{ 8735 Register Rp = $p$$Register; 8736 Register Rq = $q$$Register; 8737 Register Ry = $y$$Register; 8738 Label done; 8739 __ cmpl(Rp, Rq); 8740 __ jccb(Assembler::less, done); 8741 __ xorl(Ry, Ry); 8742 __ bind(done); 8743 %} 8744 8745 ins_pipe(pipe_cmplt); 8746 %} 8747 8748 /* If I enable this, I encourage spilling in the inner loop of compress. 8749 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8750 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8751 */ 8752 //----------Overflow Math Instructions----------------------------------------- 8753 8754 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8755 %{ 8756 match(Set cr (OverflowAddI op1 op2)); 8757 effect(DEF cr, USE_KILL op1, USE op2); 8758 8759 format %{ "ADD $op1, $op2\t# overflow check int" %} 8760 8761 ins_encode %{ 8762 __ addl($op1$$Register, $op2$$Register); 8763 %} 8764 ins_pipe(ialu_reg_reg); 8765 %} 8766 8767 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8768 %{ 8769 match(Set cr (OverflowAddI op1 op2)); 8770 effect(DEF cr, USE_KILL op1, USE op2); 8771 8772 format %{ "ADD $op1, $op2\t# overflow check int" %} 8773 8774 ins_encode %{ 8775 __ addl($op1$$Register, $op2$$constant); 8776 %} 8777 ins_pipe(ialu_reg_reg); 8778 %} 8779 8780 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8781 %{ 8782 match(Set cr (OverflowSubI op1 op2)); 8783 8784 format %{ "CMP $op1, $op2\t# overflow check int" %} 8785 ins_encode %{ 8786 __ cmpl($op1$$Register, $op2$$Register); 8787 %} 8788 ins_pipe(ialu_reg_reg); 8789 %} 8790 8791 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8792 %{ 8793 match(Set cr (OverflowSubI op1 op2)); 8794 8795 format %{ "CMP $op1, $op2\t# overflow check int" %} 8796 ins_encode %{ 8797 __ cmpl($op1$$Register, $op2$$constant); 8798 %} 8799 ins_pipe(ialu_reg_reg); 8800 %} 8801 8802 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8803 %{ 8804 match(Set cr (OverflowSubI zero op2)); 8805 effect(DEF cr, USE_KILL op2); 8806 8807 format %{ "NEG $op2\t# overflow check int" %} 8808 ins_encode %{ 8809 __ negl($op2$$Register); 8810 %} 8811 ins_pipe(ialu_reg_reg); 8812 %} 8813 8814 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8815 %{ 8816 match(Set cr (OverflowMulI op1 op2)); 8817 effect(DEF cr, USE_KILL op1, USE op2); 8818 8819 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8820 ins_encode %{ 8821 __ imull($op1$$Register, $op2$$Register); 8822 %} 8823 ins_pipe(ialu_reg_reg_alu0); 8824 %} 8825 8826 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8827 %{ 8828 match(Set cr (OverflowMulI op1 op2)); 8829 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8830 8831 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8832 ins_encode %{ 8833 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8834 %} 8835 ins_pipe(ialu_reg_reg_alu0); 8836 %} 8837 8838 // Integer Absolute Instructions 8839 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8840 %{ 8841 match(Set dst (AbsI src)); 8842 effect(TEMP dst, TEMP tmp, KILL cr); 8843 format %{ "movl $tmp, $src\n\t" 8844 "sarl $tmp, 31\n\t" 8845 "movl $dst, $src\n\t" 8846 "xorl $dst, $tmp\n\t" 8847 "subl $dst, $tmp\n" 8848 %} 8849 ins_encode %{ 8850 __ movl($tmp$$Register, $src$$Register); 8851 __ sarl($tmp$$Register, 31); 8852 __ movl($dst$$Register, $src$$Register); 8853 __ xorl($dst$$Register, $tmp$$Register); 8854 __ subl($dst$$Register, $tmp$$Register); 8855 %} 8856 8857 ins_pipe(ialu_reg_reg); 8858 %} 8859 8860 //----------Long Instructions------------------------------------------------ 8861 // Add Long Register with Register 8862 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8863 match(Set dst (AddL dst src)); 8864 effect(KILL cr); 8865 ins_cost(200); 8866 format %{ "ADD $dst.lo,$src.lo\n\t" 8867 "ADC $dst.hi,$src.hi" %} 8868 opcode(0x03, 0x13); 8869 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8870 ins_pipe( ialu_reg_reg_long ); 8871 %} 8872 8873 // Add Long Register with Immediate 8874 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8875 match(Set dst (AddL dst src)); 8876 effect(KILL cr); 8877 format %{ "ADD $dst.lo,$src.lo\n\t" 8878 "ADC $dst.hi,$src.hi" %} 8879 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8880 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8881 ins_pipe( ialu_reg_long ); 8882 %} 8883 8884 // Add Long Register with Memory 8885 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8886 match(Set dst (AddL dst (LoadL mem))); 8887 effect(KILL cr); 8888 ins_cost(125); 8889 format %{ "ADD $dst.lo,$mem\n\t" 8890 "ADC $dst.hi,$mem+4" %} 8891 opcode(0x03, 0x13); 8892 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8893 ins_pipe( ialu_reg_long_mem ); 8894 %} 8895 8896 // Subtract Long Register with Register. 8897 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8898 match(Set dst (SubL dst src)); 8899 effect(KILL cr); 8900 ins_cost(200); 8901 format %{ "SUB $dst.lo,$src.lo\n\t" 8902 "SBB $dst.hi,$src.hi" %} 8903 opcode(0x2B, 0x1B); 8904 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8905 ins_pipe( ialu_reg_reg_long ); 8906 %} 8907 8908 // Subtract Long Register with Immediate 8909 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8910 match(Set dst (SubL dst src)); 8911 effect(KILL cr); 8912 format %{ "SUB $dst.lo,$src.lo\n\t" 8913 "SBB $dst.hi,$src.hi" %} 8914 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8915 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8916 ins_pipe( ialu_reg_long ); 8917 %} 8918 8919 // Subtract Long Register with Memory 8920 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8921 match(Set dst (SubL dst (LoadL mem))); 8922 effect(KILL cr); 8923 ins_cost(125); 8924 format %{ "SUB $dst.lo,$mem\n\t" 8925 "SBB $dst.hi,$mem+4" %} 8926 opcode(0x2B, 0x1B); 8927 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8928 ins_pipe( ialu_reg_long_mem ); 8929 %} 8930 8931 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8932 match(Set dst (SubL zero dst)); 8933 effect(KILL cr); 8934 ins_cost(300); 8935 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8936 ins_encode( neg_long(dst) ); 8937 ins_pipe( ialu_reg_reg_long ); 8938 %} 8939 8940 // And Long Register with Register 8941 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8942 match(Set dst (AndL dst src)); 8943 effect(KILL cr); 8944 format %{ "AND $dst.lo,$src.lo\n\t" 8945 "AND $dst.hi,$src.hi" %} 8946 opcode(0x23,0x23); 8947 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8948 ins_pipe( ialu_reg_reg_long ); 8949 %} 8950 8951 // And Long Register with Immediate 8952 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8953 match(Set dst (AndL dst src)); 8954 effect(KILL cr); 8955 format %{ "AND $dst.lo,$src.lo\n\t" 8956 "AND $dst.hi,$src.hi" %} 8957 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8958 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8959 ins_pipe( ialu_reg_long ); 8960 %} 8961 8962 // And Long Register with Memory 8963 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8964 match(Set dst (AndL dst (LoadL mem))); 8965 effect(KILL cr); 8966 ins_cost(125); 8967 format %{ "AND $dst.lo,$mem\n\t" 8968 "AND $dst.hi,$mem+4" %} 8969 opcode(0x23, 0x23); 8970 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8971 ins_pipe( ialu_reg_long_mem ); 8972 %} 8973 8974 // BMI1 instructions 8975 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8976 match(Set dst (AndL (XorL src1 minus_1) src2)); 8977 predicate(UseBMI1Instructions); 8978 effect(KILL cr, TEMP dst); 8979 8980 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8981 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8982 %} 8983 8984 ins_encode %{ 8985 Register Rdst = $dst$$Register; 8986 Register Rsrc1 = $src1$$Register; 8987 Register Rsrc2 = $src2$$Register; 8988 __ andnl(Rdst, Rsrc1, Rsrc2); 8989 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8990 %} 8991 ins_pipe(ialu_reg_reg_long); 8992 %} 8993 8994 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8995 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8996 predicate(UseBMI1Instructions); 8997 effect(KILL cr, TEMP dst); 8998 8999 ins_cost(125); 9000 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9001 "ANDNL $dst.hi, $src1.hi, $src2+4" 9002 %} 9003 9004 ins_encode %{ 9005 Register Rdst = $dst$$Register; 9006 Register Rsrc1 = $src1$$Register; 9007 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9008 9009 __ andnl(Rdst, Rsrc1, $src2$$Address); 9010 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9011 %} 9012 ins_pipe(ialu_reg_mem); 9013 %} 9014 9015 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9016 match(Set dst (AndL (SubL imm_zero src) src)); 9017 predicate(UseBMI1Instructions); 9018 effect(KILL cr, TEMP dst); 9019 9020 format %{ "MOVL $dst.hi, 0\n\t" 9021 "BLSIL $dst.lo, $src.lo\n\t" 9022 "JNZ done\n\t" 9023 "BLSIL $dst.hi, $src.hi\n" 9024 "done:" 9025 %} 9026 9027 ins_encode %{ 9028 Label done; 9029 Register Rdst = $dst$$Register; 9030 Register Rsrc = $src$$Register; 9031 __ movl(HIGH_FROM_LOW(Rdst), 0); 9032 __ blsil(Rdst, Rsrc); 9033 __ jccb(Assembler::notZero, done); 9034 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9035 __ bind(done); 9036 %} 9037 ins_pipe(ialu_reg); 9038 %} 9039 9040 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9041 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9042 predicate(UseBMI1Instructions); 9043 effect(KILL cr, TEMP dst); 9044 9045 ins_cost(125); 9046 format %{ "MOVL $dst.hi, 0\n\t" 9047 "BLSIL $dst.lo, $src\n\t" 9048 "JNZ done\n\t" 9049 "BLSIL $dst.hi, $src+4\n" 9050 "done:" 9051 %} 9052 9053 ins_encode %{ 9054 Label done; 9055 Register Rdst = $dst$$Register; 9056 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9057 9058 __ movl(HIGH_FROM_LOW(Rdst), 0); 9059 __ blsil(Rdst, $src$$Address); 9060 __ jccb(Assembler::notZero, done); 9061 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9062 __ bind(done); 9063 %} 9064 ins_pipe(ialu_reg_mem); 9065 %} 9066 9067 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9068 %{ 9069 match(Set dst (XorL (AddL src minus_1) src)); 9070 predicate(UseBMI1Instructions); 9071 effect(KILL cr, TEMP dst); 9072 9073 format %{ "MOVL $dst.hi, 0\n\t" 9074 "BLSMSKL $dst.lo, $src.lo\n\t" 9075 "JNC done\n\t" 9076 "BLSMSKL $dst.hi, $src.hi\n" 9077 "done:" 9078 %} 9079 9080 ins_encode %{ 9081 Label done; 9082 Register Rdst = $dst$$Register; 9083 Register Rsrc = $src$$Register; 9084 __ movl(HIGH_FROM_LOW(Rdst), 0); 9085 __ blsmskl(Rdst, Rsrc); 9086 __ jccb(Assembler::carryClear, done); 9087 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9088 __ bind(done); 9089 %} 9090 9091 ins_pipe(ialu_reg); 9092 %} 9093 9094 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9095 %{ 9096 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9097 predicate(UseBMI1Instructions); 9098 effect(KILL cr, TEMP dst); 9099 9100 ins_cost(125); 9101 format %{ "MOVL $dst.hi, 0\n\t" 9102 "BLSMSKL $dst.lo, $src\n\t" 9103 "JNC done\n\t" 9104 "BLSMSKL $dst.hi, $src+4\n" 9105 "done:" 9106 %} 9107 9108 ins_encode %{ 9109 Label done; 9110 Register Rdst = $dst$$Register; 9111 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9112 9113 __ movl(HIGH_FROM_LOW(Rdst), 0); 9114 __ blsmskl(Rdst, $src$$Address); 9115 __ jccb(Assembler::carryClear, done); 9116 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9117 __ bind(done); 9118 %} 9119 9120 ins_pipe(ialu_reg_mem); 9121 %} 9122 9123 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9124 %{ 9125 match(Set dst (AndL (AddL src minus_1) src) ); 9126 predicate(UseBMI1Instructions); 9127 effect(KILL cr, TEMP dst); 9128 9129 format %{ "MOVL $dst.hi, $src.hi\n\t" 9130 "BLSRL $dst.lo, $src.lo\n\t" 9131 "JNC done\n\t" 9132 "BLSRL $dst.hi, $src.hi\n" 9133 "done:" 9134 %} 9135 9136 ins_encode %{ 9137 Label done; 9138 Register Rdst = $dst$$Register; 9139 Register Rsrc = $src$$Register; 9140 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9141 __ blsrl(Rdst, Rsrc); 9142 __ jccb(Assembler::carryClear, done); 9143 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9144 __ bind(done); 9145 %} 9146 9147 ins_pipe(ialu_reg); 9148 %} 9149 9150 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9151 %{ 9152 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9153 predicate(UseBMI1Instructions); 9154 effect(KILL cr, TEMP dst); 9155 9156 ins_cost(125); 9157 format %{ "MOVL $dst.hi, $src+4\n\t" 9158 "BLSRL $dst.lo, $src\n\t" 9159 "JNC done\n\t" 9160 "BLSRL $dst.hi, $src+4\n" 9161 "done:" 9162 %} 9163 9164 ins_encode %{ 9165 Label done; 9166 Register Rdst = $dst$$Register; 9167 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9168 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9169 __ blsrl(Rdst, $src$$Address); 9170 __ jccb(Assembler::carryClear, done); 9171 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9172 __ bind(done); 9173 %} 9174 9175 ins_pipe(ialu_reg_mem); 9176 %} 9177 9178 // Or Long Register with Register 9179 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9180 match(Set dst (OrL dst src)); 9181 effect(KILL cr); 9182 format %{ "OR $dst.lo,$src.lo\n\t" 9183 "OR $dst.hi,$src.hi" %} 9184 opcode(0x0B,0x0B); 9185 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9186 ins_pipe( ialu_reg_reg_long ); 9187 %} 9188 9189 // Or Long Register with Immediate 9190 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9191 match(Set dst (OrL dst src)); 9192 effect(KILL cr); 9193 format %{ "OR $dst.lo,$src.lo\n\t" 9194 "OR $dst.hi,$src.hi" %} 9195 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9196 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9197 ins_pipe( ialu_reg_long ); 9198 %} 9199 9200 // Or Long Register with Memory 9201 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9202 match(Set dst (OrL dst (LoadL mem))); 9203 effect(KILL cr); 9204 ins_cost(125); 9205 format %{ "OR $dst.lo,$mem\n\t" 9206 "OR $dst.hi,$mem+4" %} 9207 opcode(0x0B,0x0B); 9208 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9209 ins_pipe( ialu_reg_long_mem ); 9210 %} 9211 9212 // Xor Long Register with Register 9213 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9214 match(Set dst (XorL dst src)); 9215 effect(KILL cr); 9216 format %{ "XOR $dst.lo,$src.lo\n\t" 9217 "XOR $dst.hi,$src.hi" %} 9218 opcode(0x33,0x33); 9219 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9220 ins_pipe( ialu_reg_reg_long ); 9221 %} 9222 9223 // Xor Long Register with Immediate -1 9224 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9225 match(Set dst (XorL dst imm)); 9226 format %{ "NOT $dst.lo\n\t" 9227 "NOT $dst.hi" %} 9228 ins_encode %{ 9229 __ notl($dst$$Register); 9230 __ notl(HIGH_FROM_LOW($dst$$Register)); 9231 %} 9232 ins_pipe( ialu_reg_long ); 9233 %} 9234 9235 // Xor Long Register with Immediate 9236 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9237 match(Set dst (XorL dst src)); 9238 effect(KILL cr); 9239 format %{ "XOR $dst.lo,$src.lo\n\t" 9240 "XOR $dst.hi,$src.hi" %} 9241 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9242 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9243 ins_pipe( ialu_reg_long ); 9244 %} 9245 9246 // Xor Long Register with Memory 9247 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9248 match(Set dst (XorL dst (LoadL mem))); 9249 effect(KILL cr); 9250 ins_cost(125); 9251 format %{ "XOR $dst.lo,$mem\n\t" 9252 "XOR $dst.hi,$mem+4" %} 9253 opcode(0x33,0x33); 9254 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9255 ins_pipe( ialu_reg_long_mem ); 9256 %} 9257 9258 // Shift Left Long by 1 9259 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9260 predicate(UseNewLongLShift); 9261 match(Set dst (LShiftL dst cnt)); 9262 effect(KILL cr); 9263 ins_cost(100); 9264 format %{ "ADD $dst.lo,$dst.lo\n\t" 9265 "ADC $dst.hi,$dst.hi" %} 9266 ins_encode %{ 9267 __ addl($dst$$Register,$dst$$Register); 9268 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9269 %} 9270 ins_pipe( ialu_reg_long ); 9271 %} 9272 9273 // Shift Left Long by 2 9274 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9275 predicate(UseNewLongLShift); 9276 match(Set dst (LShiftL dst cnt)); 9277 effect(KILL cr); 9278 ins_cost(100); 9279 format %{ "ADD $dst.lo,$dst.lo\n\t" 9280 "ADC $dst.hi,$dst.hi\n\t" 9281 "ADD $dst.lo,$dst.lo\n\t" 9282 "ADC $dst.hi,$dst.hi" %} 9283 ins_encode %{ 9284 __ addl($dst$$Register,$dst$$Register); 9285 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9286 __ addl($dst$$Register,$dst$$Register); 9287 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9288 %} 9289 ins_pipe( ialu_reg_long ); 9290 %} 9291 9292 // Shift Left Long by 3 9293 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9294 predicate(UseNewLongLShift); 9295 match(Set dst (LShiftL dst cnt)); 9296 effect(KILL cr); 9297 ins_cost(100); 9298 format %{ "ADD $dst.lo,$dst.lo\n\t" 9299 "ADC $dst.hi,$dst.hi\n\t" 9300 "ADD $dst.lo,$dst.lo\n\t" 9301 "ADC $dst.hi,$dst.hi\n\t" 9302 "ADD $dst.lo,$dst.lo\n\t" 9303 "ADC $dst.hi,$dst.hi" %} 9304 ins_encode %{ 9305 __ addl($dst$$Register,$dst$$Register); 9306 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9307 __ addl($dst$$Register,$dst$$Register); 9308 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9309 __ addl($dst$$Register,$dst$$Register); 9310 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9311 %} 9312 ins_pipe( ialu_reg_long ); 9313 %} 9314 9315 // Shift Left Long by 1-31 9316 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9317 match(Set dst (LShiftL dst cnt)); 9318 effect(KILL cr); 9319 ins_cost(200); 9320 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9321 "SHL $dst.lo,$cnt" %} 9322 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9323 ins_encode( move_long_small_shift(dst,cnt) ); 9324 ins_pipe( ialu_reg_long ); 9325 %} 9326 9327 // Shift Left Long by 32-63 9328 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9329 match(Set dst (LShiftL dst cnt)); 9330 effect(KILL cr); 9331 ins_cost(300); 9332 format %{ "MOV $dst.hi,$dst.lo\n" 9333 "\tSHL $dst.hi,$cnt-32\n" 9334 "\tXOR $dst.lo,$dst.lo" %} 9335 opcode(0xC1, 0x4); /* C1 /4 ib */ 9336 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9337 ins_pipe( ialu_reg_long ); 9338 %} 9339 9340 // Shift Left Long by variable 9341 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9342 match(Set dst (LShiftL dst shift)); 9343 effect(KILL cr); 9344 ins_cost(500+200); 9345 size(17); 9346 format %{ "TEST $shift,32\n\t" 9347 "JEQ,s small\n\t" 9348 "MOV $dst.hi,$dst.lo\n\t" 9349 "XOR $dst.lo,$dst.lo\n" 9350 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9351 "SHL $dst.lo,$shift" %} 9352 ins_encode( shift_left_long( dst, shift ) ); 9353 ins_pipe( pipe_slow ); 9354 %} 9355 9356 // Shift Right Long by 1-31 9357 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9358 match(Set dst (URShiftL dst cnt)); 9359 effect(KILL cr); 9360 ins_cost(200); 9361 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9362 "SHR $dst.hi,$cnt" %} 9363 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9364 ins_encode( move_long_small_shift(dst,cnt) ); 9365 ins_pipe( ialu_reg_long ); 9366 %} 9367 9368 // Shift Right Long by 32-63 9369 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9370 match(Set dst (URShiftL dst cnt)); 9371 effect(KILL cr); 9372 ins_cost(300); 9373 format %{ "MOV $dst.lo,$dst.hi\n" 9374 "\tSHR $dst.lo,$cnt-32\n" 9375 "\tXOR $dst.hi,$dst.hi" %} 9376 opcode(0xC1, 0x5); /* C1 /5 ib */ 9377 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9378 ins_pipe( ialu_reg_long ); 9379 %} 9380 9381 // Shift Right Long by variable 9382 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9383 match(Set dst (URShiftL dst shift)); 9384 effect(KILL cr); 9385 ins_cost(600); 9386 size(17); 9387 format %{ "TEST $shift,32\n\t" 9388 "JEQ,s small\n\t" 9389 "MOV $dst.lo,$dst.hi\n\t" 9390 "XOR $dst.hi,$dst.hi\n" 9391 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9392 "SHR $dst.hi,$shift" %} 9393 ins_encode( shift_right_long( dst, shift ) ); 9394 ins_pipe( pipe_slow ); 9395 %} 9396 9397 // Shift Right Long by 1-31 9398 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9399 match(Set dst (RShiftL dst cnt)); 9400 effect(KILL cr); 9401 ins_cost(200); 9402 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9403 "SAR $dst.hi,$cnt" %} 9404 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9405 ins_encode( move_long_small_shift(dst,cnt) ); 9406 ins_pipe( ialu_reg_long ); 9407 %} 9408 9409 // Shift Right Long by 32-63 9410 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9411 match(Set dst (RShiftL dst cnt)); 9412 effect(KILL cr); 9413 ins_cost(300); 9414 format %{ "MOV $dst.lo,$dst.hi\n" 9415 "\tSAR $dst.lo,$cnt-32\n" 9416 "\tSAR $dst.hi,31" %} 9417 opcode(0xC1, 0x7); /* C1 /7 ib */ 9418 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9419 ins_pipe( ialu_reg_long ); 9420 %} 9421 9422 // Shift Right arithmetic Long by variable 9423 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9424 match(Set dst (RShiftL dst shift)); 9425 effect(KILL cr); 9426 ins_cost(600); 9427 size(18); 9428 format %{ "TEST $shift,32\n\t" 9429 "JEQ,s small\n\t" 9430 "MOV $dst.lo,$dst.hi\n\t" 9431 "SAR $dst.hi,31\n" 9432 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9433 "SAR $dst.hi,$shift" %} 9434 ins_encode( shift_right_arith_long( dst, shift ) ); 9435 ins_pipe( pipe_slow ); 9436 %} 9437 9438 9439 //----------Double Instructions------------------------------------------------ 9440 // Double Math 9441 9442 // Compare & branch 9443 9444 // P6 version of float compare, sets condition codes in EFLAGS 9445 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9446 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9447 match(Set cr (CmpD src1 src2)); 9448 effect(KILL rax); 9449 ins_cost(150); 9450 format %{ "FLD $src1\n\t" 9451 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9452 "JNP exit\n\t" 9453 "MOV ah,1 // saw a NaN, set CF\n\t" 9454 "SAHF\n" 9455 "exit:\tNOP // avoid branch to branch" %} 9456 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9457 ins_encode( Push_Reg_DPR(src1), 9458 OpcP, RegOpc(src2), 9459 cmpF_P6_fixup ); 9460 ins_pipe( pipe_slow ); 9461 %} 9462 9463 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9464 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9465 match(Set cr (CmpD src1 src2)); 9466 ins_cost(150); 9467 format %{ "FLD $src1\n\t" 9468 "FUCOMIP ST,$src2 // P6 instruction" %} 9469 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9470 ins_encode( Push_Reg_DPR(src1), 9471 OpcP, RegOpc(src2)); 9472 ins_pipe( pipe_slow ); 9473 %} 9474 9475 // Compare & branch 9476 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9477 predicate(UseSSE<=1); 9478 match(Set cr (CmpD src1 src2)); 9479 effect(KILL rax); 9480 ins_cost(200); 9481 format %{ "FLD $src1\n\t" 9482 "FCOMp $src2\n\t" 9483 "FNSTSW AX\n\t" 9484 "TEST AX,0x400\n\t" 9485 "JZ,s flags\n\t" 9486 "MOV AH,1\t# unordered treat as LT\n" 9487 "flags:\tSAHF" %} 9488 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9489 ins_encode( Push_Reg_DPR(src1), 9490 OpcP, RegOpc(src2), 9491 fpu_flags); 9492 ins_pipe( pipe_slow ); 9493 %} 9494 9495 // Compare vs zero into -1,0,1 9496 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9497 predicate(UseSSE<=1); 9498 match(Set dst (CmpD3 src1 zero)); 9499 effect(KILL cr, KILL rax); 9500 ins_cost(280); 9501 format %{ "FTSTD $dst,$src1" %} 9502 opcode(0xE4, 0xD9); 9503 ins_encode( Push_Reg_DPR(src1), 9504 OpcS, OpcP, PopFPU, 9505 CmpF_Result(dst)); 9506 ins_pipe( pipe_slow ); 9507 %} 9508 9509 // Compare into -1,0,1 9510 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9511 predicate(UseSSE<=1); 9512 match(Set dst (CmpD3 src1 src2)); 9513 effect(KILL cr, KILL rax); 9514 ins_cost(300); 9515 format %{ "FCMPD $dst,$src1,$src2" %} 9516 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9517 ins_encode( Push_Reg_DPR(src1), 9518 OpcP, RegOpc(src2), 9519 CmpF_Result(dst)); 9520 ins_pipe( pipe_slow ); 9521 %} 9522 9523 // float compare and set condition codes in EFLAGS by XMM regs 9524 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9525 predicate(UseSSE>=2); 9526 match(Set cr (CmpD src1 src2)); 9527 ins_cost(145); 9528 format %{ "UCOMISD $src1,$src2\n\t" 9529 "JNP,s exit\n\t" 9530 "PUSHF\t# saw NaN, set CF\n\t" 9531 "AND [rsp], #0xffffff2b\n\t" 9532 "POPF\n" 9533 "exit:" %} 9534 ins_encode %{ 9535 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9536 emit_cmpfp_fixup(_masm); 9537 %} 9538 ins_pipe( pipe_slow ); 9539 %} 9540 9541 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9542 predicate(UseSSE>=2); 9543 match(Set cr (CmpD src1 src2)); 9544 ins_cost(100); 9545 format %{ "UCOMISD $src1,$src2" %} 9546 ins_encode %{ 9547 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9548 %} 9549 ins_pipe( pipe_slow ); 9550 %} 9551 9552 // float compare and set condition codes in EFLAGS by XMM regs 9553 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9554 predicate(UseSSE>=2); 9555 match(Set cr (CmpD src1 (LoadD src2))); 9556 ins_cost(145); 9557 format %{ "UCOMISD $src1,$src2\n\t" 9558 "JNP,s exit\n\t" 9559 "PUSHF\t# saw NaN, set CF\n\t" 9560 "AND [rsp], #0xffffff2b\n\t" 9561 "POPF\n" 9562 "exit:" %} 9563 ins_encode %{ 9564 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9565 emit_cmpfp_fixup(_masm); 9566 %} 9567 ins_pipe( pipe_slow ); 9568 %} 9569 9570 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9571 predicate(UseSSE>=2); 9572 match(Set cr (CmpD src1 (LoadD src2))); 9573 ins_cost(100); 9574 format %{ "UCOMISD $src1,$src2" %} 9575 ins_encode %{ 9576 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9577 %} 9578 ins_pipe( pipe_slow ); 9579 %} 9580 9581 // Compare into -1,0,1 in XMM 9582 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9583 predicate(UseSSE>=2); 9584 match(Set dst (CmpD3 src1 src2)); 9585 effect(KILL cr); 9586 ins_cost(255); 9587 format %{ "UCOMISD $src1, $src2\n\t" 9588 "MOV $dst, #-1\n\t" 9589 "JP,s done\n\t" 9590 "JB,s done\n\t" 9591 "SETNE $dst\n\t" 9592 "MOVZB $dst, $dst\n" 9593 "done:" %} 9594 ins_encode %{ 9595 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9596 emit_cmpfp3(_masm, $dst$$Register); 9597 %} 9598 ins_pipe( pipe_slow ); 9599 %} 9600 9601 // Compare into -1,0,1 in XMM and memory 9602 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9603 predicate(UseSSE>=2); 9604 match(Set dst (CmpD3 src1 (LoadD src2))); 9605 effect(KILL cr); 9606 ins_cost(275); 9607 format %{ "UCOMISD $src1, $src2\n\t" 9608 "MOV $dst, #-1\n\t" 9609 "JP,s done\n\t" 9610 "JB,s done\n\t" 9611 "SETNE $dst\n\t" 9612 "MOVZB $dst, $dst\n" 9613 "done:" %} 9614 ins_encode %{ 9615 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9616 emit_cmpfp3(_masm, $dst$$Register); 9617 %} 9618 ins_pipe( pipe_slow ); 9619 %} 9620 9621 9622 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9623 predicate (UseSSE <=1); 9624 match(Set dst (SubD dst src)); 9625 9626 format %{ "FLD $src\n\t" 9627 "DSUBp $dst,ST" %} 9628 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9629 ins_cost(150); 9630 ins_encode( Push_Reg_DPR(src), 9631 OpcP, RegOpc(dst) ); 9632 ins_pipe( fpu_reg_reg ); 9633 %} 9634 9635 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9636 predicate (UseSSE <=1); 9637 match(Set dst (RoundDouble (SubD src1 src2))); 9638 ins_cost(250); 9639 9640 format %{ "FLD $src2\n\t" 9641 "DSUB ST,$src1\n\t" 9642 "FSTP_D $dst\t# D-round" %} 9643 opcode(0xD8, 0x5); 9644 ins_encode( Push_Reg_DPR(src2), 9645 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9646 ins_pipe( fpu_mem_reg_reg ); 9647 %} 9648 9649 9650 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9651 predicate (UseSSE <=1); 9652 match(Set dst (SubD dst (LoadD src))); 9653 ins_cost(150); 9654 9655 format %{ "FLD $src\n\t" 9656 "DSUBp $dst,ST" %} 9657 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9658 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9659 OpcP, RegOpc(dst) ); 9660 ins_pipe( fpu_reg_mem ); 9661 %} 9662 9663 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9664 predicate (UseSSE<=1); 9665 match(Set dst (AbsD src)); 9666 ins_cost(100); 9667 format %{ "FABS" %} 9668 opcode(0xE1, 0xD9); 9669 ins_encode( OpcS, OpcP ); 9670 ins_pipe( fpu_reg_reg ); 9671 %} 9672 9673 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9674 predicate(UseSSE<=1); 9675 match(Set dst (NegD src)); 9676 ins_cost(100); 9677 format %{ "FCHS" %} 9678 opcode(0xE0, 0xD9); 9679 ins_encode( OpcS, OpcP ); 9680 ins_pipe( fpu_reg_reg ); 9681 %} 9682 9683 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9684 predicate(UseSSE<=1); 9685 match(Set dst (AddD dst src)); 9686 format %{ "FLD $src\n\t" 9687 "DADD $dst,ST" %} 9688 size(4); 9689 ins_cost(150); 9690 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9691 ins_encode( Push_Reg_DPR(src), 9692 OpcP, RegOpc(dst) ); 9693 ins_pipe( fpu_reg_reg ); 9694 %} 9695 9696 9697 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9698 predicate(UseSSE<=1); 9699 match(Set dst (RoundDouble (AddD src1 src2))); 9700 ins_cost(250); 9701 9702 format %{ "FLD $src2\n\t" 9703 "DADD ST,$src1\n\t" 9704 "FSTP_D $dst\t# D-round" %} 9705 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9706 ins_encode( Push_Reg_DPR(src2), 9707 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9708 ins_pipe( fpu_mem_reg_reg ); 9709 %} 9710 9711 9712 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9713 predicate(UseSSE<=1); 9714 match(Set dst (AddD dst (LoadD src))); 9715 ins_cost(150); 9716 9717 format %{ "FLD $src\n\t" 9718 "DADDp $dst,ST" %} 9719 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9720 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9721 OpcP, RegOpc(dst) ); 9722 ins_pipe( fpu_reg_mem ); 9723 %} 9724 9725 // add-to-memory 9726 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9727 predicate(UseSSE<=1); 9728 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9729 ins_cost(150); 9730 9731 format %{ "FLD_D $dst\n\t" 9732 "DADD ST,$src\n\t" 9733 "FST_D $dst" %} 9734 opcode(0xDD, 0x0); 9735 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9736 Opcode(0xD8), RegOpc(src), 9737 set_instruction_start, 9738 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9739 ins_pipe( fpu_reg_mem ); 9740 %} 9741 9742 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9743 predicate(UseSSE<=1); 9744 match(Set dst (AddD dst con)); 9745 ins_cost(125); 9746 format %{ "FLD1\n\t" 9747 "DADDp $dst,ST" %} 9748 ins_encode %{ 9749 __ fld1(); 9750 __ faddp($dst$$reg); 9751 %} 9752 ins_pipe(fpu_reg); 9753 %} 9754 9755 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9756 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9757 match(Set dst (AddD dst con)); 9758 ins_cost(200); 9759 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9760 "DADDp $dst,ST" %} 9761 ins_encode %{ 9762 __ fld_d($constantaddress($con)); 9763 __ faddp($dst$$reg); 9764 %} 9765 ins_pipe(fpu_reg_mem); 9766 %} 9767 9768 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9769 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9770 match(Set dst (RoundDouble (AddD src con))); 9771 ins_cost(200); 9772 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9773 "DADD ST,$src\n\t" 9774 "FSTP_D $dst\t# D-round" %} 9775 ins_encode %{ 9776 __ fld_d($constantaddress($con)); 9777 __ fadd($src$$reg); 9778 __ fstp_d(Address(rsp, $dst$$disp)); 9779 %} 9780 ins_pipe(fpu_mem_reg_con); 9781 %} 9782 9783 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9784 predicate(UseSSE<=1); 9785 match(Set dst (MulD dst src)); 9786 format %{ "FLD $src\n\t" 9787 "DMULp $dst,ST" %} 9788 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9789 ins_cost(150); 9790 ins_encode( Push_Reg_DPR(src), 9791 OpcP, RegOpc(dst) ); 9792 ins_pipe( fpu_reg_reg ); 9793 %} 9794 9795 // Strict FP instruction biases argument before multiply then 9796 // biases result to avoid double rounding of subnormals. 9797 // 9798 // scale arg1 by multiplying arg1 by 2^(-15360) 9799 // load arg2 9800 // multiply scaled arg1 by arg2 9801 // rescale product by 2^(15360) 9802 // 9803 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9804 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9805 match(Set dst (MulD dst src)); 9806 ins_cost(1); // Select this instruction for all FP double multiplies 9807 9808 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9809 "DMULp $dst,ST\n\t" 9810 "FLD $src\n\t" 9811 "DMULp $dst,ST\n\t" 9812 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9813 "DMULp $dst,ST\n\t" %} 9814 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9815 ins_encode( strictfp_bias1(dst), 9816 Push_Reg_DPR(src), 9817 OpcP, RegOpc(dst), 9818 strictfp_bias2(dst) ); 9819 ins_pipe( fpu_reg_reg ); 9820 %} 9821 9822 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9823 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9824 match(Set dst (MulD dst con)); 9825 ins_cost(200); 9826 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9827 "DMULp $dst,ST" %} 9828 ins_encode %{ 9829 __ fld_d($constantaddress($con)); 9830 __ fmulp($dst$$reg); 9831 %} 9832 ins_pipe(fpu_reg_mem); 9833 %} 9834 9835 9836 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9837 predicate( UseSSE<=1 ); 9838 match(Set dst (MulD dst (LoadD src))); 9839 ins_cost(200); 9840 format %{ "FLD_D $src\n\t" 9841 "DMULp $dst,ST" %} 9842 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9843 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9844 OpcP, RegOpc(dst) ); 9845 ins_pipe( fpu_reg_mem ); 9846 %} 9847 9848 // 9849 // Cisc-alternate to reg-reg multiply 9850 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9851 predicate( UseSSE<=1 ); 9852 match(Set dst (MulD src (LoadD mem))); 9853 ins_cost(250); 9854 format %{ "FLD_D $mem\n\t" 9855 "DMUL ST,$src\n\t" 9856 "FSTP_D $dst" %} 9857 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9858 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9859 OpcReg_FPR(src), 9860 Pop_Reg_DPR(dst) ); 9861 ins_pipe( fpu_reg_reg_mem ); 9862 %} 9863 9864 9865 // MACRO3 -- addDPR a mulDPR 9866 // This instruction is a '2-address' instruction in that the result goes 9867 // back to src2. This eliminates a move from the macro; possibly the 9868 // register allocator will have to add it back (and maybe not). 9869 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9870 predicate( UseSSE<=1 ); 9871 match(Set src2 (AddD (MulD src0 src1) src2)); 9872 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9873 "DMUL ST,$src1\n\t" 9874 "DADDp $src2,ST" %} 9875 ins_cost(250); 9876 opcode(0xDD); /* LoadD DD /0 */ 9877 ins_encode( Push_Reg_FPR(src0), 9878 FMul_ST_reg(src1), 9879 FAddP_reg_ST(src2) ); 9880 ins_pipe( fpu_reg_reg_reg ); 9881 %} 9882 9883 9884 // MACRO3 -- subDPR a mulDPR 9885 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9886 predicate( UseSSE<=1 ); 9887 match(Set src2 (SubD (MulD src0 src1) src2)); 9888 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9889 "DMUL ST,$src1\n\t" 9890 "DSUBRp $src2,ST" %} 9891 ins_cost(250); 9892 ins_encode( Push_Reg_FPR(src0), 9893 FMul_ST_reg(src1), 9894 Opcode(0xDE), Opc_plus(0xE0,src2)); 9895 ins_pipe( fpu_reg_reg_reg ); 9896 %} 9897 9898 9899 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9900 predicate( UseSSE<=1 ); 9901 match(Set dst (DivD dst src)); 9902 9903 format %{ "FLD $src\n\t" 9904 "FDIVp $dst,ST" %} 9905 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9906 ins_cost(150); 9907 ins_encode( Push_Reg_DPR(src), 9908 OpcP, RegOpc(dst) ); 9909 ins_pipe( fpu_reg_reg ); 9910 %} 9911 9912 // Strict FP instruction biases argument before division then 9913 // biases result, to avoid double rounding of subnormals. 9914 // 9915 // scale dividend by multiplying dividend by 2^(-15360) 9916 // load divisor 9917 // divide scaled dividend by divisor 9918 // rescale quotient by 2^(15360) 9919 // 9920 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9921 predicate (UseSSE<=1); 9922 match(Set dst (DivD dst src)); 9923 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9924 ins_cost(01); 9925 9926 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9927 "DMULp $dst,ST\n\t" 9928 "FLD $src\n\t" 9929 "FDIVp $dst,ST\n\t" 9930 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9931 "DMULp $dst,ST\n\t" %} 9932 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9933 ins_encode( strictfp_bias1(dst), 9934 Push_Reg_DPR(src), 9935 OpcP, RegOpc(dst), 9936 strictfp_bias2(dst) ); 9937 ins_pipe( fpu_reg_reg ); 9938 %} 9939 9940 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9941 predicate(UseSSE<=1); 9942 match(Set dst (ModD dst src)); 9943 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9944 9945 format %{ "DMOD $dst,$src" %} 9946 ins_cost(250); 9947 ins_encode(Push_Reg_Mod_DPR(dst, src), 9948 emitModDPR(), 9949 Push_Result_Mod_DPR(src), 9950 Pop_Reg_DPR(dst)); 9951 ins_pipe( pipe_slow ); 9952 %} 9953 9954 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9955 predicate(UseSSE>=2); 9956 match(Set dst (ModD src0 src1)); 9957 effect(KILL rax, KILL cr); 9958 9959 format %{ "SUB ESP,8\t # DMOD\n" 9960 "\tMOVSD [ESP+0],$src1\n" 9961 "\tFLD_D [ESP+0]\n" 9962 "\tMOVSD [ESP+0],$src0\n" 9963 "\tFLD_D [ESP+0]\n" 9964 "loop:\tFPREM\n" 9965 "\tFWAIT\n" 9966 "\tFNSTSW AX\n" 9967 "\tSAHF\n" 9968 "\tJP loop\n" 9969 "\tFSTP_D [ESP+0]\n" 9970 "\tMOVSD $dst,[ESP+0]\n" 9971 "\tADD ESP,8\n" 9972 "\tFSTP ST0\t # Restore FPU Stack" 9973 %} 9974 ins_cost(250); 9975 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9976 ins_pipe( pipe_slow ); 9977 %} 9978 9979 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9980 predicate (UseSSE<=1); 9981 match(Set dst(AtanD dst src)); 9982 format %{ "DATA $dst,$src" %} 9983 opcode(0xD9, 0xF3); 9984 ins_encode( Push_Reg_DPR(src), 9985 OpcP, OpcS, RegOpc(dst) ); 9986 ins_pipe( pipe_slow ); 9987 %} 9988 9989 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9990 predicate (UseSSE>=2); 9991 match(Set dst(AtanD dst src)); 9992 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9993 format %{ "DATA $dst,$src" %} 9994 opcode(0xD9, 0xF3); 9995 ins_encode( Push_SrcD(src), 9996 OpcP, OpcS, Push_ResultD(dst) ); 9997 ins_pipe( pipe_slow ); 9998 %} 9999 10000 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10001 predicate (UseSSE<=1); 10002 match(Set dst (SqrtD src)); 10003 format %{ "DSQRT $dst,$src" %} 10004 opcode(0xFA, 0xD9); 10005 ins_encode( Push_Reg_DPR(src), 10006 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10007 ins_pipe( pipe_slow ); 10008 %} 10009 10010 //-------------Float Instructions------------------------------- 10011 // Float Math 10012 10013 // Code for float compare: 10014 // fcompp(); 10015 // fwait(); fnstsw_ax(); 10016 // sahf(); 10017 // movl(dst, unordered_result); 10018 // jcc(Assembler::parity, exit); 10019 // movl(dst, less_result); 10020 // jcc(Assembler::below, exit); 10021 // movl(dst, equal_result); 10022 // jcc(Assembler::equal, exit); 10023 // movl(dst, greater_result); 10024 // exit: 10025 10026 // P6 version of float compare, sets condition codes in EFLAGS 10027 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10028 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10029 match(Set cr (CmpF src1 src2)); 10030 effect(KILL rax); 10031 ins_cost(150); 10032 format %{ "FLD $src1\n\t" 10033 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10034 "JNP exit\n\t" 10035 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10036 "SAHF\n" 10037 "exit:\tNOP // avoid branch to branch" %} 10038 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10039 ins_encode( Push_Reg_DPR(src1), 10040 OpcP, RegOpc(src2), 10041 cmpF_P6_fixup ); 10042 ins_pipe( pipe_slow ); 10043 %} 10044 10045 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10046 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10047 match(Set cr (CmpF src1 src2)); 10048 ins_cost(100); 10049 format %{ "FLD $src1\n\t" 10050 "FUCOMIP ST,$src2 // P6 instruction" %} 10051 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10052 ins_encode( Push_Reg_DPR(src1), 10053 OpcP, RegOpc(src2)); 10054 ins_pipe( pipe_slow ); 10055 %} 10056 10057 10058 // Compare & branch 10059 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10060 predicate(UseSSE == 0); 10061 match(Set cr (CmpF src1 src2)); 10062 effect(KILL rax); 10063 ins_cost(200); 10064 format %{ "FLD $src1\n\t" 10065 "FCOMp $src2\n\t" 10066 "FNSTSW AX\n\t" 10067 "TEST AX,0x400\n\t" 10068 "JZ,s flags\n\t" 10069 "MOV AH,1\t# unordered treat as LT\n" 10070 "flags:\tSAHF" %} 10071 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10072 ins_encode( Push_Reg_DPR(src1), 10073 OpcP, RegOpc(src2), 10074 fpu_flags); 10075 ins_pipe( pipe_slow ); 10076 %} 10077 10078 // Compare vs zero into -1,0,1 10079 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10080 predicate(UseSSE == 0); 10081 match(Set dst (CmpF3 src1 zero)); 10082 effect(KILL cr, KILL rax); 10083 ins_cost(280); 10084 format %{ "FTSTF $dst,$src1" %} 10085 opcode(0xE4, 0xD9); 10086 ins_encode( Push_Reg_DPR(src1), 10087 OpcS, OpcP, PopFPU, 10088 CmpF_Result(dst)); 10089 ins_pipe( pipe_slow ); 10090 %} 10091 10092 // Compare into -1,0,1 10093 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10094 predicate(UseSSE == 0); 10095 match(Set dst (CmpF3 src1 src2)); 10096 effect(KILL cr, KILL rax); 10097 ins_cost(300); 10098 format %{ "FCMPF $dst,$src1,$src2" %} 10099 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10100 ins_encode( Push_Reg_DPR(src1), 10101 OpcP, RegOpc(src2), 10102 CmpF_Result(dst)); 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 // float compare and set condition codes in EFLAGS by XMM regs 10107 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10108 predicate(UseSSE>=1); 10109 match(Set cr (CmpF src1 src2)); 10110 ins_cost(145); 10111 format %{ "UCOMISS $src1,$src2\n\t" 10112 "JNP,s exit\n\t" 10113 "PUSHF\t# saw NaN, set CF\n\t" 10114 "AND [rsp], #0xffffff2b\n\t" 10115 "POPF\n" 10116 "exit:" %} 10117 ins_encode %{ 10118 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10119 emit_cmpfp_fixup(_masm); 10120 %} 10121 ins_pipe( pipe_slow ); 10122 %} 10123 10124 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10125 predicate(UseSSE>=1); 10126 match(Set cr (CmpF src1 src2)); 10127 ins_cost(100); 10128 format %{ "UCOMISS $src1,$src2" %} 10129 ins_encode %{ 10130 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10131 %} 10132 ins_pipe( pipe_slow ); 10133 %} 10134 10135 // float compare and set condition codes in EFLAGS by XMM regs 10136 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10137 predicate(UseSSE>=1); 10138 match(Set cr (CmpF src1 (LoadF src2))); 10139 ins_cost(165); 10140 format %{ "UCOMISS $src1,$src2\n\t" 10141 "JNP,s exit\n\t" 10142 "PUSHF\t# saw NaN, set CF\n\t" 10143 "AND [rsp], #0xffffff2b\n\t" 10144 "POPF\n" 10145 "exit:" %} 10146 ins_encode %{ 10147 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10148 emit_cmpfp_fixup(_masm); 10149 %} 10150 ins_pipe( pipe_slow ); 10151 %} 10152 10153 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10154 predicate(UseSSE>=1); 10155 match(Set cr (CmpF src1 (LoadF src2))); 10156 ins_cost(100); 10157 format %{ "UCOMISS $src1,$src2" %} 10158 ins_encode %{ 10159 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10160 %} 10161 ins_pipe( pipe_slow ); 10162 %} 10163 10164 // Compare into -1,0,1 in XMM 10165 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10166 predicate(UseSSE>=1); 10167 match(Set dst (CmpF3 src1 src2)); 10168 effect(KILL cr); 10169 ins_cost(255); 10170 format %{ "UCOMISS $src1, $src2\n\t" 10171 "MOV $dst, #-1\n\t" 10172 "JP,s done\n\t" 10173 "JB,s done\n\t" 10174 "SETNE $dst\n\t" 10175 "MOVZB $dst, $dst\n" 10176 "done:" %} 10177 ins_encode %{ 10178 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10179 emit_cmpfp3(_masm, $dst$$Register); 10180 %} 10181 ins_pipe( pipe_slow ); 10182 %} 10183 10184 // Compare into -1,0,1 in XMM and memory 10185 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10186 predicate(UseSSE>=1); 10187 match(Set dst (CmpF3 src1 (LoadF src2))); 10188 effect(KILL cr); 10189 ins_cost(275); 10190 format %{ "UCOMISS $src1, $src2\n\t" 10191 "MOV $dst, #-1\n\t" 10192 "JP,s done\n\t" 10193 "JB,s done\n\t" 10194 "SETNE $dst\n\t" 10195 "MOVZB $dst, $dst\n" 10196 "done:" %} 10197 ins_encode %{ 10198 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10199 emit_cmpfp3(_masm, $dst$$Register); 10200 %} 10201 ins_pipe( pipe_slow ); 10202 %} 10203 10204 // Spill to obtain 24-bit precision 10205 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10206 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10207 match(Set dst (SubF src1 src2)); 10208 10209 format %{ "FSUB $dst,$src1 - $src2" %} 10210 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10211 ins_encode( Push_Reg_FPR(src1), 10212 OpcReg_FPR(src2), 10213 Pop_Mem_FPR(dst) ); 10214 ins_pipe( fpu_mem_reg_reg ); 10215 %} 10216 // 10217 // This instruction does not round to 24-bits 10218 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10219 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10220 match(Set dst (SubF dst src)); 10221 10222 format %{ "FSUB $dst,$src" %} 10223 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10224 ins_encode( Push_Reg_FPR(src), 10225 OpcP, RegOpc(dst) ); 10226 ins_pipe( fpu_reg_reg ); 10227 %} 10228 10229 // Spill to obtain 24-bit precision 10230 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10231 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10232 match(Set dst (AddF src1 src2)); 10233 10234 format %{ "FADD $dst,$src1,$src2" %} 10235 opcode(0xD8, 0x0); /* D8 C0+i */ 10236 ins_encode( Push_Reg_FPR(src2), 10237 OpcReg_FPR(src1), 10238 Pop_Mem_FPR(dst) ); 10239 ins_pipe( fpu_mem_reg_reg ); 10240 %} 10241 // 10242 // This instruction does not round to 24-bits 10243 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10244 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10245 match(Set dst (AddF dst src)); 10246 10247 format %{ "FLD $src\n\t" 10248 "FADDp $dst,ST" %} 10249 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10250 ins_encode( Push_Reg_FPR(src), 10251 OpcP, RegOpc(dst) ); 10252 ins_pipe( fpu_reg_reg ); 10253 %} 10254 10255 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10256 predicate(UseSSE==0); 10257 match(Set dst (AbsF src)); 10258 ins_cost(100); 10259 format %{ "FABS" %} 10260 opcode(0xE1, 0xD9); 10261 ins_encode( OpcS, OpcP ); 10262 ins_pipe( fpu_reg_reg ); 10263 %} 10264 10265 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10266 predicate(UseSSE==0); 10267 match(Set dst (NegF src)); 10268 ins_cost(100); 10269 format %{ "FCHS" %} 10270 opcode(0xE0, 0xD9); 10271 ins_encode( OpcS, OpcP ); 10272 ins_pipe( fpu_reg_reg ); 10273 %} 10274 10275 // Cisc-alternate to addFPR_reg 10276 // Spill to obtain 24-bit precision 10277 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10278 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10279 match(Set dst (AddF src1 (LoadF src2))); 10280 10281 format %{ "FLD $src2\n\t" 10282 "FADD ST,$src1\n\t" 10283 "FSTP_S $dst" %} 10284 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10285 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10286 OpcReg_FPR(src1), 10287 Pop_Mem_FPR(dst) ); 10288 ins_pipe( fpu_mem_reg_mem ); 10289 %} 10290 // 10291 // Cisc-alternate to addFPR_reg 10292 // This instruction does not round to 24-bits 10293 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10294 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10295 match(Set dst (AddF dst (LoadF src))); 10296 10297 format %{ "FADD $dst,$src" %} 10298 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10299 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10300 OpcP, RegOpc(dst) ); 10301 ins_pipe( fpu_reg_mem ); 10302 %} 10303 10304 // // Following two instructions for _222_mpegaudio 10305 // Spill to obtain 24-bit precision 10306 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10307 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10308 match(Set dst (AddF src1 src2)); 10309 10310 format %{ "FADD $dst,$src1,$src2" %} 10311 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10312 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10313 OpcReg_FPR(src2), 10314 Pop_Mem_FPR(dst) ); 10315 ins_pipe( fpu_mem_reg_mem ); 10316 %} 10317 10318 // Cisc-spill variant 10319 // Spill to obtain 24-bit precision 10320 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10321 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10322 match(Set dst (AddF src1 (LoadF src2))); 10323 10324 format %{ "FADD $dst,$src1,$src2 cisc" %} 10325 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10326 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10327 set_instruction_start, 10328 OpcP, RMopc_Mem(secondary,src1), 10329 Pop_Mem_FPR(dst) ); 10330 ins_pipe( fpu_mem_mem_mem ); 10331 %} 10332 10333 // Spill to obtain 24-bit precision 10334 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10335 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10336 match(Set dst (AddF src1 src2)); 10337 10338 format %{ "FADD $dst,$src1,$src2" %} 10339 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10340 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10341 set_instruction_start, 10342 OpcP, RMopc_Mem(secondary,src1), 10343 Pop_Mem_FPR(dst) ); 10344 ins_pipe( fpu_mem_mem_mem ); 10345 %} 10346 10347 10348 // Spill to obtain 24-bit precision 10349 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10350 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10351 match(Set dst (AddF src con)); 10352 format %{ "FLD $src\n\t" 10353 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10354 "FSTP_S $dst" %} 10355 ins_encode %{ 10356 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10357 __ fadd_s($constantaddress($con)); 10358 __ fstp_s(Address(rsp, $dst$$disp)); 10359 %} 10360 ins_pipe(fpu_mem_reg_con); 10361 %} 10362 // 10363 // This instruction does not round to 24-bits 10364 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10365 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10366 match(Set dst (AddF src con)); 10367 format %{ "FLD $src\n\t" 10368 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10369 "FSTP $dst" %} 10370 ins_encode %{ 10371 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10372 __ fadd_s($constantaddress($con)); 10373 __ fstp_d($dst$$reg); 10374 %} 10375 ins_pipe(fpu_reg_reg_con); 10376 %} 10377 10378 // Spill to obtain 24-bit precision 10379 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10380 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10381 match(Set dst (MulF src1 src2)); 10382 10383 format %{ "FLD $src1\n\t" 10384 "FMUL $src2\n\t" 10385 "FSTP_S $dst" %} 10386 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10387 ins_encode( Push_Reg_FPR(src1), 10388 OpcReg_FPR(src2), 10389 Pop_Mem_FPR(dst) ); 10390 ins_pipe( fpu_mem_reg_reg ); 10391 %} 10392 // 10393 // This instruction does not round to 24-bits 10394 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10395 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10396 match(Set dst (MulF src1 src2)); 10397 10398 format %{ "FLD $src1\n\t" 10399 "FMUL $src2\n\t" 10400 "FSTP_S $dst" %} 10401 opcode(0xD8, 0x1); /* D8 C8+i */ 10402 ins_encode( Push_Reg_FPR(src2), 10403 OpcReg_FPR(src1), 10404 Pop_Reg_FPR(dst) ); 10405 ins_pipe( fpu_reg_reg_reg ); 10406 %} 10407 10408 10409 // Spill to obtain 24-bit precision 10410 // Cisc-alternate to reg-reg multiply 10411 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10412 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10413 match(Set dst (MulF src1 (LoadF src2))); 10414 10415 format %{ "FLD_S $src2\n\t" 10416 "FMUL $src1\n\t" 10417 "FSTP_S $dst" %} 10418 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10419 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10420 OpcReg_FPR(src1), 10421 Pop_Mem_FPR(dst) ); 10422 ins_pipe( fpu_mem_reg_mem ); 10423 %} 10424 // 10425 // This instruction does not round to 24-bits 10426 // Cisc-alternate to reg-reg multiply 10427 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10428 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10429 match(Set dst (MulF src1 (LoadF src2))); 10430 10431 format %{ "FMUL $dst,$src1,$src2" %} 10432 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10433 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10434 OpcReg_FPR(src1), 10435 Pop_Reg_FPR(dst) ); 10436 ins_pipe( fpu_reg_reg_mem ); 10437 %} 10438 10439 // Spill to obtain 24-bit precision 10440 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10441 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10442 match(Set dst (MulF src1 src2)); 10443 10444 format %{ "FMUL $dst,$src1,$src2" %} 10445 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10446 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10447 set_instruction_start, 10448 OpcP, RMopc_Mem(secondary,src1), 10449 Pop_Mem_FPR(dst) ); 10450 ins_pipe( fpu_mem_mem_mem ); 10451 %} 10452 10453 // Spill to obtain 24-bit precision 10454 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10455 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10456 match(Set dst (MulF src con)); 10457 10458 format %{ "FLD $src\n\t" 10459 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10460 "FSTP_S $dst" %} 10461 ins_encode %{ 10462 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10463 __ fmul_s($constantaddress($con)); 10464 __ fstp_s(Address(rsp, $dst$$disp)); 10465 %} 10466 ins_pipe(fpu_mem_reg_con); 10467 %} 10468 // 10469 // This instruction does not round to 24-bits 10470 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10471 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10472 match(Set dst (MulF src con)); 10473 10474 format %{ "FLD $src\n\t" 10475 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10476 "FSTP $dst" %} 10477 ins_encode %{ 10478 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10479 __ fmul_s($constantaddress($con)); 10480 __ fstp_d($dst$$reg); 10481 %} 10482 ins_pipe(fpu_reg_reg_con); 10483 %} 10484 10485 10486 // 10487 // MACRO1 -- subsume unshared load into mulFPR 10488 // This instruction does not round to 24-bits 10489 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10490 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10491 match(Set dst (MulF (LoadF mem1) src)); 10492 10493 format %{ "FLD $mem1 ===MACRO1===\n\t" 10494 "FMUL ST,$src\n\t" 10495 "FSTP $dst" %} 10496 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10497 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10498 OpcReg_FPR(src), 10499 Pop_Reg_FPR(dst) ); 10500 ins_pipe( fpu_reg_reg_mem ); 10501 %} 10502 // 10503 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10504 // This instruction does not round to 24-bits 10505 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10506 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10507 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10508 ins_cost(95); 10509 10510 format %{ "FLD $mem1 ===MACRO2===\n\t" 10511 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10512 "FADD ST,$src2\n\t" 10513 "FSTP $dst" %} 10514 opcode(0xD9); /* LoadF D9 /0 */ 10515 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10516 FMul_ST_reg(src1), 10517 FAdd_ST_reg(src2), 10518 Pop_Reg_FPR(dst) ); 10519 ins_pipe( fpu_reg_mem_reg_reg ); 10520 %} 10521 10522 // MACRO3 -- addFPR a mulFPR 10523 // This instruction does not round to 24-bits. It is a '2-address' 10524 // instruction in that the result goes back to src2. This eliminates 10525 // a move from the macro; possibly the register allocator will have 10526 // to add it back (and maybe not). 10527 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10528 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10529 match(Set src2 (AddF (MulF src0 src1) src2)); 10530 10531 format %{ "FLD $src0 ===MACRO3===\n\t" 10532 "FMUL ST,$src1\n\t" 10533 "FADDP $src2,ST" %} 10534 opcode(0xD9); /* LoadF D9 /0 */ 10535 ins_encode( Push_Reg_FPR(src0), 10536 FMul_ST_reg(src1), 10537 FAddP_reg_ST(src2) ); 10538 ins_pipe( fpu_reg_reg_reg ); 10539 %} 10540 10541 // MACRO4 -- divFPR subFPR 10542 // This instruction does not round to 24-bits 10543 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10544 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10545 match(Set dst (DivF (SubF src2 src1) src3)); 10546 10547 format %{ "FLD $src2 ===MACRO4===\n\t" 10548 "FSUB ST,$src1\n\t" 10549 "FDIV ST,$src3\n\t" 10550 "FSTP $dst" %} 10551 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10552 ins_encode( Push_Reg_FPR(src2), 10553 subFPR_divFPR_encode(src1,src3), 10554 Pop_Reg_FPR(dst) ); 10555 ins_pipe( fpu_reg_reg_reg_reg ); 10556 %} 10557 10558 // Spill to obtain 24-bit precision 10559 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10560 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10561 match(Set dst (DivF src1 src2)); 10562 10563 format %{ "FDIV $dst,$src1,$src2" %} 10564 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10565 ins_encode( Push_Reg_FPR(src1), 10566 OpcReg_FPR(src2), 10567 Pop_Mem_FPR(dst) ); 10568 ins_pipe( fpu_mem_reg_reg ); 10569 %} 10570 // 10571 // This instruction does not round to 24-bits 10572 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10573 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10574 match(Set dst (DivF dst src)); 10575 10576 format %{ "FDIV $dst,$src" %} 10577 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10578 ins_encode( Push_Reg_FPR(src), 10579 OpcP, RegOpc(dst) ); 10580 ins_pipe( fpu_reg_reg ); 10581 %} 10582 10583 10584 // Spill to obtain 24-bit precision 10585 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10586 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10587 match(Set dst (ModF src1 src2)); 10588 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10589 10590 format %{ "FMOD $dst,$src1,$src2" %} 10591 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10592 emitModDPR(), 10593 Push_Result_Mod_DPR(src2), 10594 Pop_Mem_FPR(dst)); 10595 ins_pipe( pipe_slow ); 10596 %} 10597 // 10598 // This instruction does not round to 24-bits 10599 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10600 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10601 match(Set dst (ModF dst src)); 10602 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10603 10604 format %{ "FMOD $dst,$src" %} 10605 ins_encode(Push_Reg_Mod_DPR(dst, src), 10606 emitModDPR(), 10607 Push_Result_Mod_DPR(src), 10608 Pop_Reg_FPR(dst)); 10609 ins_pipe( pipe_slow ); 10610 %} 10611 10612 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10613 predicate(UseSSE>=1); 10614 match(Set dst (ModF src0 src1)); 10615 effect(KILL rax, KILL cr); 10616 format %{ "SUB ESP,4\t # FMOD\n" 10617 "\tMOVSS [ESP+0],$src1\n" 10618 "\tFLD_S [ESP+0]\n" 10619 "\tMOVSS [ESP+0],$src0\n" 10620 "\tFLD_S [ESP+0]\n" 10621 "loop:\tFPREM\n" 10622 "\tFWAIT\n" 10623 "\tFNSTSW AX\n" 10624 "\tSAHF\n" 10625 "\tJP loop\n" 10626 "\tFSTP_S [ESP+0]\n" 10627 "\tMOVSS $dst,[ESP+0]\n" 10628 "\tADD ESP,4\n" 10629 "\tFSTP ST0\t # Restore FPU Stack" 10630 %} 10631 ins_cost(250); 10632 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10633 ins_pipe( pipe_slow ); 10634 %} 10635 10636 10637 //----------Arithmetic Conversion Instructions--------------------------------- 10638 // The conversions operations are all Alpha sorted. Please keep it that way! 10639 10640 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10641 predicate(UseSSE==0); 10642 match(Set dst (RoundFloat src)); 10643 ins_cost(125); 10644 format %{ "FST_S $dst,$src\t# F-round" %} 10645 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10646 ins_pipe( fpu_mem_reg ); 10647 %} 10648 10649 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10650 predicate(UseSSE<=1); 10651 match(Set dst (RoundDouble src)); 10652 ins_cost(125); 10653 format %{ "FST_D $dst,$src\t# D-round" %} 10654 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10655 ins_pipe( fpu_mem_reg ); 10656 %} 10657 10658 // Force rounding to 24-bit precision and 6-bit exponent 10659 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10660 predicate(UseSSE==0); 10661 match(Set dst (ConvD2F src)); 10662 format %{ "FST_S $dst,$src\t# F-round" %} 10663 expand %{ 10664 roundFloat_mem_reg(dst,src); 10665 %} 10666 %} 10667 10668 // Force rounding to 24-bit precision and 6-bit exponent 10669 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10670 predicate(UseSSE==1); 10671 match(Set dst (ConvD2F src)); 10672 effect( KILL cr ); 10673 format %{ "SUB ESP,4\n\t" 10674 "FST_S [ESP],$src\t# F-round\n\t" 10675 "MOVSS $dst,[ESP]\n\t" 10676 "ADD ESP,4" %} 10677 ins_encode %{ 10678 __ subptr(rsp, 4); 10679 if ($src$$reg != FPR1L_enc) { 10680 __ fld_s($src$$reg-1); 10681 __ fstp_s(Address(rsp, 0)); 10682 } else { 10683 __ fst_s(Address(rsp, 0)); 10684 } 10685 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10686 __ addptr(rsp, 4); 10687 %} 10688 ins_pipe( pipe_slow ); 10689 %} 10690 10691 // Force rounding double precision to single precision 10692 instruct convD2F_reg(regF dst, regD src) %{ 10693 predicate(UseSSE>=2); 10694 match(Set dst (ConvD2F src)); 10695 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10696 ins_encode %{ 10697 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10698 %} 10699 ins_pipe( pipe_slow ); 10700 %} 10701 10702 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10703 predicate(UseSSE==0); 10704 match(Set dst (ConvF2D src)); 10705 format %{ "FST_S $dst,$src\t# D-round" %} 10706 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10707 ins_pipe( fpu_reg_reg ); 10708 %} 10709 10710 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10711 predicate(UseSSE==1); 10712 match(Set dst (ConvF2D src)); 10713 format %{ "FST_D $dst,$src\t# D-round" %} 10714 expand %{ 10715 roundDouble_mem_reg(dst,src); 10716 %} 10717 %} 10718 10719 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10720 predicate(UseSSE==1); 10721 match(Set dst (ConvF2D src)); 10722 effect( KILL cr ); 10723 format %{ "SUB ESP,4\n\t" 10724 "MOVSS [ESP] $src\n\t" 10725 "FLD_S [ESP]\n\t" 10726 "ADD ESP,4\n\t" 10727 "FSTP $dst\t# D-round" %} 10728 ins_encode %{ 10729 __ subptr(rsp, 4); 10730 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10731 __ fld_s(Address(rsp, 0)); 10732 __ addptr(rsp, 4); 10733 __ fstp_d($dst$$reg); 10734 %} 10735 ins_pipe( pipe_slow ); 10736 %} 10737 10738 instruct convF2D_reg(regD dst, regF src) %{ 10739 predicate(UseSSE>=2); 10740 match(Set dst (ConvF2D src)); 10741 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10742 ins_encode %{ 10743 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10744 %} 10745 ins_pipe( pipe_slow ); 10746 %} 10747 10748 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10749 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10750 predicate(UseSSE<=1); 10751 match(Set dst (ConvD2I src)); 10752 effect( KILL tmp, KILL cr ); 10753 format %{ "FLD $src\t# Convert double to int \n\t" 10754 "FLDCW trunc mode\n\t" 10755 "SUB ESP,4\n\t" 10756 "FISTp [ESP + #0]\n\t" 10757 "FLDCW std/24-bit mode\n\t" 10758 "POP EAX\n\t" 10759 "CMP EAX,0x80000000\n\t" 10760 "JNE,s fast\n\t" 10761 "FLD_D $src\n\t" 10762 "CALL d2i_wrapper\n" 10763 "fast:" %} 10764 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10765 ins_pipe( pipe_slow ); 10766 %} 10767 10768 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10769 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10770 predicate(UseSSE>=2); 10771 match(Set dst (ConvD2I src)); 10772 effect( KILL tmp, KILL cr ); 10773 format %{ "CVTTSD2SI $dst, $src\n\t" 10774 "CMP $dst,0x80000000\n\t" 10775 "JNE,s fast\n\t" 10776 "SUB ESP, 8\n\t" 10777 "MOVSD [ESP], $src\n\t" 10778 "FLD_D [ESP]\n\t" 10779 "ADD ESP, 8\n\t" 10780 "CALL d2i_wrapper\n" 10781 "fast:" %} 10782 ins_encode %{ 10783 Label fast; 10784 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10785 __ cmpl($dst$$Register, 0x80000000); 10786 __ jccb(Assembler::notEqual, fast); 10787 __ subptr(rsp, 8); 10788 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10789 __ fld_d(Address(rsp, 0)); 10790 __ addptr(rsp, 8); 10791 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10792 __ post_call_nop(); 10793 __ bind(fast); 10794 %} 10795 ins_pipe( pipe_slow ); 10796 %} 10797 10798 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10799 predicate(UseSSE<=1); 10800 match(Set dst (ConvD2L src)); 10801 effect( KILL cr ); 10802 format %{ "FLD $src\t# Convert double to long\n\t" 10803 "FLDCW trunc mode\n\t" 10804 "SUB ESP,8\n\t" 10805 "FISTp [ESP + #0]\n\t" 10806 "FLDCW std/24-bit mode\n\t" 10807 "POP EAX\n\t" 10808 "POP EDX\n\t" 10809 "CMP EDX,0x80000000\n\t" 10810 "JNE,s fast\n\t" 10811 "TEST EAX,EAX\n\t" 10812 "JNE,s fast\n\t" 10813 "FLD $src\n\t" 10814 "CALL d2l_wrapper\n" 10815 "fast:" %} 10816 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10817 ins_pipe( pipe_slow ); 10818 %} 10819 10820 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10821 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10822 predicate (UseSSE>=2); 10823 match(Set dst (ConvD2L src)); 10824 effect( KILL cr ); 10825 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10826 "MOVSD [ESP],$src\n\t" 10827 "FLD_D [ESP]\n\t" 10828 "FLDCW trunc mode\n\t" 10829 "FISTp [ESP + #0]\n\t" 10830 "FLDCW std/24-bit mode\n\t" 10831 "POP EAX\n\t" 10832 "POP EDX\n\t" 10833 "CMP EDX,0x80000000\n\t" 10834 "JNE,s fast\n\t" 10835 "TEST EAX,EAX\n\t" 10836 "JNE,s fast\n\t" 10837 "SUB ESP,8\n\t" 10838 "MOVSD [ESP],$src\n\t" 10839 "FLD_D [ESP]\n\t" 10840 "ADD ESP,8\n\t" 10841 "CALL d2l_wrapper\n" 10842 "fast:" %} 10843 ins_encode %{ 10844 Label fast; 10845 __ subptr(rsp, 8); 10846 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10847 __ fld_d(Address(rsp, 0)); 10848 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10849 __ fistp_d(Address(rsp, 0)); 10850 // Restore the rounding mode, mask the exception 10851 if (Compile::current()->in_24_bit_fp_mode()) { 10852 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10853 } else { 10854 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10855 } 10856 // Load the converted long, adjust CPU stack 10857 __ pop(rax); 10858 __ pop(rdx); 10859 __ cmpl(rdx, 0x80000000); 10860 __ jccb(Assembler::notEqual, fast); 10861 __ testl(rax, rax); 10862 __ jccb(Assembler::notEqual, fast); 10863 __ subptr(rsp, 8); 10864 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10865 __ fld_d(Address(rsp, 0)); 10866 __ addptr(rsp, 8); 10867 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10868 __ post_call_nop(); 10869 __ bind(fast); 10870 %} 10871 ins_pipe( pipe_slow ); 10872 %} 10873 10874 // Convert a double to an int. Java semantics require we do complex 10875 // manglations in the corner cases. So we set the rounding mode to 10876 // 'zero', store the darned double down as an int, and reset the 10877 // rounding mode to 'nearest'. The hardware stores a flag value down 10878 // if we would overflow or converted a NAN; we check for this and 10879 // and go the slow path if needed. 10880 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10881 predicate(UseSSE==0); 10882 match(Set dst (ConvF2I src)); 10883 effect( KILL tmp, KILL cr ); 10884 format %{ "FLD $src\t# Convert float to int \n\t" 10885 "FLDCW trunc mode\n\t" 10886 "SUB ESP,4\n\t" 10887 "FISTp [ESP + #0]\n\t" 10888 "FLDCW std/24-bit mode\n\t" 10889 "POP EAX\n\t" 10890 "CMP EAX,0x80000000\n\t" 10891 "JNE,s fast\n\t" 10892 "FLD $src\n\t" 10893 "CALL d2i_wrapper\n" 10894 "fast:" %} 10895 // DPR2I_encoding works for FPR2I 10896 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10897 ins_pipe( pipe_slow ); 10898 %} 10899 10900 // Convert a float in xmm to an int reg. 10901 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10902 predicate(UseSSE>=1); 10903 match(Set dst (ConvF2I src)); 10904 effect( KILL tmp, KILL cr ); 10905 format %{ "CVTTSS2SI $dst, $src\n\t" 10906 "CMP $dst,0x80000000\n\t" 10907 "JNE,s fast\n\t" 10908 "SUB ESP, 4\n\t" 10909 "MOVSS [ESP], $src\n\t" 10910 "FLD [ESP]\n\t" 10911 "ADD ESP, 4\n\t" 10912 "CALL d2i_wrapper\n" 10913 "fast:" %} 10914 ins_encode %{ 10915 Label fast; 10916 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10917 __ cmpl($dst$$Register, 0x80000000); 10918 __ jccb(Assembler::notEqual, fast); 10919 __ subptr(rsp, 4); 10920 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10921 __ fld_s(Address(rsp, 0)); 10922 __ addptr(rsp, 4); 10923 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10924 __ post_call_nop(); 10925 __ bind(fast); 10926 %} 10927 ins_pipe( pipe_slow ); 10928 %} 10929 10930 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10931 predicate(UseSSE==0); 10932 match(Set dst (ConvF2L src)); 10933 effect( KILL cr ); 10934 format %{ "FLD $src\t# Convert float to long\n\t" 10935 "FLDCW trunc mode\n\t" 10936 "SUB ESP,8\n\t" 10937 "FISTp [ESP + #0]\n\t" 10938 "FLDCW std/24-bit mode\n\t" 10939 "POP EAX\n\t" 10940 "POP EDX\n\t" 10941 "CMP EDX,0x80000000\n\t" 10942 "JNE,s fast\n\t" 10943 "TEST EAX,EAX\n\t" 10944 "JNE,s fast\n\t" 10945 "FLD $src\n\t" 10946 "CALL d2l_wrapper\n" 10947 "fast:" %} 10948 // DPR2L_encoding works for FPR2L 10949 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10950 ins_pipe( pipe_slow ); 10951 %} 10952 10953 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10954 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10955 predicate (UseSSE>=1); 10956 match(Set dst (ConvF2L src)); 10957 effect( KILL cr ); 10958 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10959 "MOVSS [ESP],$src\n\t" 10960 "FLD_S [ESP]\n\t" 10961 "FLDCW trunc mode\n\t" 10962 "FISTp [ESP + #0]\n\t" 10963 "FLDCW std/24-bit mode\n\t" 10964 "POP EAX\n\t" 10965 "POP EDX\n\t" 10966 "CMP EDX,0x80000000\n\t" 10967 "JNE,s fast\n\t" 10968 "TEST EAX,EAX\n\t" 10969 "JNE,s fast\n\t" 10970 "SUB ESP,4\t# Convert float to long\n\t" 10971 "MOVSS [ESP],$src\n\t" 10972 "FLD_S [ESP]\n\t" 10973 "ADD ESP,4\n\t" 10974 "CALL d2l_wrapper\n" 10975 "fast:" %} 10976 ins_encode %{ 10977 Label fast; 10978 __ subptr(rsp, 8); 10979 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10980 __ fld_s(Address(rsp, 0)); 10981 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10982 __ fistp_d(Address(rsp, 0)); 10983 // Restore the rounding mode, mask the exception 10984 if (Compile::current()->in_24_bit_fp_mode()) { 10985 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10986 } else { 10987 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10988 } 10989 // Load the converted long, adjust CPU stack 10990 __ pop(rax); 10991 __ pop(rdx); 10992 __ cmpl(rdx, 0x80000000); 10993 __ jccb(Assembler::notEqual, fast); 10994 __ testl(rax, rax); 10995 __ jccb(Assembler::notEqual, fast); 10996 __ subptr(rsp, 4); 10997 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10998 __ fld_s(Address(rsp, 0)); 10999 __ addptr(rsp, 4); 11000 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11001 __ post_call_nop(); 11002 __ bind(fast); 11003 %} 11004 ins_pipe( pipe_slow ); 11005 %} 11006 11007 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11008 predicate( UseSSE<=1 ); 11009 match(Set dst (ConvI2D src)); 11010 format %{ "FILD $src\n\t" 11011 "FSTP $dst" %} 11012 opcode(0xDB, 0x0); /* DB /0 */ 11013 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11014 ins_pipe( fpu_reg_mem ); 11015 %} 11016 11017 instruct convI2D_reg(regD dst, rRegI src) %{ 11018 predicate( UseSSE>=2 && !UseXmmI2D ); 11019 match(Set dst (ConvI2D src)); 11020 format %{ "CVTSI2SD $dst,$src" %} 11021 ins_encode %{ 11022 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11023 %} 11024 ins_pipe( pipe_slow ); 11025 %} 11026 11027 instruct convI2D_mem(regD dst, memory mem) %{ 11028 predicate( UseSSE>=2 ); 11029 match(Set dst (ConvI2D (LoadI mem))); 11030 format %{ "CVTSI2SD $dst,$mem" %} 11031 ins_encode %{ 11032 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11033 %} 11034 ins_pipe( pipe_slow ); 11035 %} 11036 11037 instruct convXI2D_reg(regD dst, rRegI src) 11038 %{ 11039 predicate( UseSSE>=2 && UseXmmI2D ); 11040 match(Set dst (ConvI2D src)); 11041 11042 format %{ "MOVD $dst,$src\n\t" 11043 "CVTDQ2PD $dst,$dst\t# i2d" %} 11044 ins_encode %{ 11045 __ movdl($dst$$XMMRegister, $src$$Register); 11046 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11047 %} 11048 ins_pipe(pipe_slow); // XXX 11049 %} 11050 11051 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11052 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11053 match(Set dst (ConvI2D (LoadI mem))); 11054 format %{ "FILD $mem\n\t" 11055 "FSTP $dst" %} 11056 opcode(0xDB); /* DB /0 */ 11057 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11058 Pop_Reg_DPR(dst)); 11059 ins_pipe( fpu_reg_mem ); 11060 %} 11061 11062 // Convert a byte to a float; no rounding step needed. 11063 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11064 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11065 match(Set dst (ConvI2F src)); 11066 format %{ "FILD $src\n\t" 11067 "FSTP $dst" %} 11068 11069 opcode(0xDB, 0x0); /* DB /0 */ 11070 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11071 ins_pipe( fpu_reg_mem ); 11072 %} 11073 11074 // In 24-bit mode, force exponent rounding by storing back out 11075 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11076 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11077 match(Set dst (ConvI2F src)); 11078 ins_cost(200); 11079 format %{ "FILD $src\n\t" 11080 "FSTP_S $dst" %} 11081 opcode(0xDB, 0x0); /* DB /0 */ 11082 ins_encode( Push_Mem_I(src), 11083 Pop_Mem_FPR(dst)); 11084 ins_pipe( fpu_mem_mem ); 11085 %} 11086 11087 // In 24-bit mode, force exponent rounding by storing back out 11088 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11089 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11090 match(Set dst (ConvI2F (LoadI mem))); 11091 ins_cost(200); 11092 format %{ "FILD $mem\n\t" 11093 "FSTP_S $dst" %} 11094 opcode(0xDB); /* DB /0 */ 11095 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11096 Pop_Mem_FPR(dst)); 11097 ins_pipe( fpu_mem_mem ); 11098 %} 11099 11100 // This instruction does not round to 24-bits 11101 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11102 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11103 match(Set dst (ConvI2F src)); 11104 format %{ "FILD $src\n\t" 11105 "FSTP $dst" %} 11106 opcode(0xDB, 0x0); /* DB /0 */ 11107 ins_encode( Push_Mem_I(src), 11108 Pop_Reg_FPR(dst)); 11109 ins_pipe( fpu_reg_mem ); 11110 %} 11111 11112 // This instruction does not round to 24-bits 11113 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11114 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11115 match(Set dst (ConvI2F (LoadI mem))); 11116 format %{ "FILD $mem\n\t" 11117 "FSTP $dst" %} 11118 opcode(0xDB); /* DB /0 */ 11119 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11120 Pop_Reg_FPR(dst)); 11121 ins_pipe( fpu_reg_mem ); 11122 %} 11123 11124 // Convert an int to a float in xmm; no rounding step needed. 11125 instruct convI2F_reg(regF dst, rRegI src) %{ 11126 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11127 match(Set dst (ConvI2F src)); 11128 format %{ "CVTSI2SS $dst, $src" %} 11129 ins_encode %{ 11130 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11131 %} 11132 ins_pipe( pipe_slow ); 11133 %} 11134 11135 instruct convXI2F_reg(regF dst, rRegI src) 11136 %{ 11137 predicate( UseSSE>=2 && UseXmmI2F ); 11138 match(Set dst (ConvI2F src)); 11139 11140 format %{ "MOVD $dst,$src\n\t" 11141 "CVTDQ2PS $dst,$dst\t# i2f" %} 11142 ins_encode %{ 11143 __ movdl($dst$$XMMRegister, $src$$Register); 11144 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11145 %} 11146 ins_pipe(pipe_slow); // XXX 11147 %} 11148 11149 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11150 match(Set dst (ConvI2L src)); 11151 effect(KILL cr); 11152 ins_cost(375); 11153 format %{ "MOV $dst.lo,$src\n\t" 11154 "MOV $dst.hi,$src\n\t" 11155 "SAR $dst.hi,31" %} 11156 ins_encode(convert_int_long(dst,src)); 11157 ins_pipe( ialu_reg_reg_long ); 11158 %} 11159 11160 // Zero-extend convert int to long 11161 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11162 match(Set dst (AndL (ConvI2L src) mask) ); 11163 effect( KILL flags ); 11164 ins_cost(250); 11165 format %{ "MOV $dst.lo,$src\n\t" 11166 "XOR $dst.hi,$dst.hi" %} 11167 opcode(0x33); // XOR 11168 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11169 ins_pipe( ialu_reg_reg_long ); 11170 %} 11171 11172 // Zero-extend long 11173 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11174 match(Set dst (AndL src mask) ); 11175 effect( KILL flags ); 11176 ins_cost(250); 11177 format %{ "MOV $dst.lo,$src.lo\n\t" 11178 "XOR $dst.hi,$dst.hi\n\t" %} 11179 opcode(0x33); // XOR 11180 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11181 ins_pipe( ialu_reg_reg_long ); 11182 %} 11183 11184 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11185 predicate (UseSSE<=1); 11186 match(Set dst (ConvL2D src)); 11187 effect( KILL cr ); 11188 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11189 "PUSH $src.lo\n\t" 11190 "FILD ST,[ESP + #0]\n\t" 11191 "ADD ESP,8\n\t" 11192 "FSTP_D $dst\t# D-round" %} 11193 opcode(0xDF, 0x5); /* DF /5 */ 11194 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11195 ins_pipe( pipe_slow ); 11196 %} 11197 11198 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11199 predicate (UseSSE>=2); 11200 match(Set dst (ConvL2D src)); 11201 effect( KILL cr ); 11202 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11203 "PUSH $src.lo\n\t" 11204 "FILD_D [ESP]\n\t" 11205 "FSTP_D [ESP]\n\t" 11206 "MOVSD $dst,[ESP]\n\t" 11207 "ADD ESP,8" %} 11208 opcode(0xDF, 0x5); /* DF /5 */ 11209 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11210 ins_pipe( pipe_slow ); 11211 %} 11212 11213 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11214 predicate (UseSSE>=1); 11215 match(Set dst (ConvL2F src)); 11216 effect( KILL cr ); 11217 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11218 "PUSH $src.lo\n\t" 11219 "FILD_D [ESP]\n\t" 11220 "FSTP_S [ESP]\n\t" 11221 "MOVSS $dst,[ESP]\n\t" 11222 "ADD ESP,8" %} 11223 opcode(0xDF, 0x5); /* DF /5 */ 11224 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11225 ins_pipe( pipe_slow ); 11226 %} 11227 11228 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11229 match(Set dst (ConvL2F src)); 11230 effect( KILL cr ); 11231 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11232 "PUSH $src.lo\n\t" 11233 "FILD ST,[ESP + #0]\n\t" 11234 "ADD ESP,8\n\t" 11235 "FSTP_S $dst\t# F-round" %} 11236 opcode(0xDF, 0x5); /* DF /5 */ 11237 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11238 ins_pipe( pipe_slow ); 11239 %} 11240 11241 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11242 match(Set dst (ConvL2I src)); 11243 effect( DEF dst, USE src ); 11244 format %{ "MOV $dst,$src.lo" %} 11245 ins_encode(enc_CopyL_Lo(dst,src)); 11246 ins_pipe( ialu_reg_reg ); 11247 %} 11248 11249 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11250 match(Set dst (MoveF2I src)); 11251 effect( DEF dst, USE src ); 11252 ins_cost(100); 11253 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11254 ins_encode %{ 11255 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11256 %} 11257 ins_pipe( ialu_reg_mem ); 11258 %} 11259 11260 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11261 predicate(UseSSE==0); 11262 match(Set dst (MoveF2I src)); 11263 effect( DEF dst, USE src ); 11264 11265 ins_cost(125); 11266 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11267 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11268 ins_pipe( fpu_mem_reg ); 11269 %} 11270 11271 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11272 predicate(UseSSE>=1); 11273 match(Set dst (MoveF2I src)); 11274 effect( DEF dst, USE src ); 11275 11276 ins_cost(95); 11277 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11278 ins_encode %{ 11279 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11280 %} 11281 ins_pipe( pipe_slow ); 11282 %} 11283 11284 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11285 predicate(UseSSE>=2); 11286 match(Set dst (MoveF2I src)); 11287 effect( DEF dst, USE src ); 11288 ins_cost(85); 11289 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11290 ins_encode %{ 11291 __ movdl($dst$$Register, $src$$XMMRegister); 11292 %} 11293 ins_pipe( pipe_slow ); 11294 %} 11295 11296 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11297 match(Set dst (MoveI2F src)); 11298 effect( DEF dst, USE src ); 11299 11300 ins_cost(100); 11301 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11302 ins_encode %{ 11303 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11304 %} 11305 ins_pipe( ialu_mem_reg ); 11306 %} 11307 11308 11309 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11310 predicate(UseSSE==0); 11311 match(Set dst (MoveI2F src)); 11312 effect(DEF dst, USE src); 11313 11314 ins_cost(125); 11315 format %{ "FLD_S $src\n\t" 11316 "FSTP $dst\t# MoveI2F_stack_reg" %} 11317 opcode(0xD9); /* D9 /0, FLD m32real */ 11318 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11319 Pop_Reg_FPR(dst) ); 11320 ins_pipe( fpu_reg_mem ); 11321 %} 11322 11323 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11324 predicate(UseSSE>=1); 11325 match(Set dst (MoveI2F src)); 11326 effect( DEF dst, USE src ); 11327 11328 ins_cost(95); 11329 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11330 ins_encode %{ 11331 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11332 %} 11333 ins_pipe( pipe_slow ); 11334 %} 11335 11336 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11337 predicate(UseSSE>=2); 11338 match(Set dst (MoveI2F src)); 11339 effect( DEF dst, USE src ); 11340 11341 ins_cost(85); 11342 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11343 ins_encode %{ 11344 __ movdl($dst$$XMMRegister, $src$$Register); 11345 %} 11346 ins_pipe( pipe_slow ); 11347 %} 11348 11349 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11350 match(Set dst (MoveD2L src)); 11351 effect(DEF dst, USE src); 11352 11353 ins_cost(250); 11354 format %{ "MOV $dst.lo,$src\n\t" 11355 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11356 opcode(0x8B, 0x8B); 11357 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11358 ins_pipe( ialu_mem_long_reg ); 11359 %} 11360 11361 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11362 predicate(UseSSE<=1); 11363 match(Set dst (MoveD2L src)); 11364 effect(DEF dst, USE src); 11365 11366 ins_cost(125); 11367 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11368 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11369 ins_pipe( fpu_mem_reg ); 11370 %} 11371 11372 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11373 predicate(UseSSE>=2); 11374 match(Set dst (MoveD2L src)); 11375 effect(DEF dst, USE src); 11376 ins_cost(95); 11377 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11378 ins_encode %{ 11379 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11380 %} 11381 ins_pipe( pipe_slow ); 11382 %} 11383 11384 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11385 predicate(UseSSE>=2); 11386 match(Set dst (MoveD2L src)); 11387 effect(DEF dst, USE src, TEMP tmp); 11388 ins_cost(85); 11389 format %{ "MOVD $dst.lo,$src\n\t" 11390 "PSHUFLW $tmp,$src,0x4E\n\t" 11391 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11392 ins_encode %{ 11393 __ movdl($dst$$Register, $src$$XMMRegister); 11394 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11395 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11396 %} 11397 ins_pipe( pipe_slow ); 11398 %} 11399 11400 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11401 match(Set dst (MoveL2D src)); 11402 effect(DEF dst, USE src); 11403 11404 ins_cost(200); 11405 format %{ "MOV $dst,$src.lo\n\t" 11406 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11407 opcode(0x89, 0x89); 11408 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11409 ins_pipe( ialu_mem_long_reg ); 11410 %} 11411 11412 11413 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11414 predicate(UseSSE<=1); 11415 match(Set dst (MoveL2D src)); 11416 effect(DEF dst, USE src); 11417 ins_cost(125); 11418 11419 format %{ "FLD_D $src\n\t" 11420 "FSTP $dst\t# MoveL2D_stack_reg" %} 11421 opcode(0xDD); /* DD /0, FLD m64real */ 11422 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11423 Pop_Reg_DPR(dst) ); 11424 ins_pipe( fpu_reg_mem ); 11425 %} 11426 11427 11428 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11429 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11430 match(Set dst (MoveL2D src)); 11431 effect(DEF dst, USE src); 11432 11433 ins_cost(95); 11434 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11435 ins_encode %{ 11436 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11437 %} 11438 ins_pipe( pipe_slow ); 11439 %} 11440 11441 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11442 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11443 match(Set dst (MoveL2D src)); 11444 effect(DEF dst, USE src); 11445 11446 ins_cost(95); 11447 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11448 ins_encode %{ 11449 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11450 %} 11451 ins_pipe( pipe_slow ); 11452 %} 11453 11454 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11455 predicate(UseSSE>=2); 11456 match(Set dst (MoveL2D src)); 11457 effect(TEMP dst, USE src, TEMP tmp); 11458 ins_cost(85); 11459 format %{ "MOVD $dst,$src.lo\n\t" 11460 "MOVD $tmp,$src.hi\n\t" 11461 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11462 ins_encode %{ 11463 __ movdl($dst$$XMMRegister, $src$$Register); 11464 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11465 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11466 %} 11467 ins_pipe( pipe_slow ); 11468 %} 11469 11470 //----------------------------- CompressBits/ExpandBits ------------------------ 11471 11472 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11473 predicate(n->bottom_type()->isa_long()); 11474 match(Set dst (CompressBits src mask)); 11475 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11476 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11477 ins_encode %{ 11478 Label exit, partail_result; 11479 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11480 // Merge the results of upper and lower destination registers such that upper destination 11481 // results are contiguously laid out after the lower destination result. 11482 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11483 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11484 __ popcntl($rtmp$$Register, $mask$$Register); 11485 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11486 __ cmpl($rtmp$$Register, 32); 11487 __ jccb(Assembler::equal, exit); 11488 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11489 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11490 // Shift left the contents of upper destination register by true bit count of lower mask register 11491 // and merge with lower destination register. 11492 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11493 __ orl($dst$$Register, $rtmp$$Register); 11494 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11495 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11496 // since contents of upper destination have already been copied to lower destination 11497 // register. 11498 __ cmpl($rtmp$$Register, 0); 11499 __ jccb(Assembler::greater, partail_result); 11500 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11501 __ jmp(exit); 11502 __ bind(partail_result); 11503 // Perform right shift over upper destination register to move out bits already copied 11504 // to lower destination register. 11505 __ subl($rtmp$$Register, 32); 11506 __ negl($rtmp$$Register); 11507 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11508 __ bind(exit); 11509 %} 11510 ins_pipe( pipe_slow ); 11511 %} 11512 11513 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11514 predicate(n->bottom_type()->isa_long()); 11515 match(Set dst (ExpandBits src mask)); 11516 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11517 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11518 ins_encode %{ 11519 // Extraction operation sequentially reads the bits from source register starting from LSB 11520 // and lays them out into destination register at bit locations corresponding to true bits 11521 // in mask register. Thus number of source bits read are equal to combined true bit count 11522 // of mask register pair. 11523 Label exit, mask_clipping; 11524 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11525 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11526 __ popcntl($rtmp$$Register, $mask$$Register); 11527 // If true bit count of lower mask register is 32 then none of bit of lower source register 11528 // will feed to upper destination register. 11529 __ cmpl($rtmp$$Register, 32); 11530 __ jccb(Assembler::equal, exit); 11531 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11532 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11533 // Shift right the contents of lower source register to remove already consumed bits. 11534 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11535 // Extract the bits from lower source register starting from LSB under the influence 11536 // of upper mask register. 11537 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11538 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11539 __ subl($rtmp$$Register, 32); 11540 __ negl($rtmp$$Register); 11541 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11542 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11543 // Clear the set bits in upper mask register which have been used to extract the contents 11544 // from lower source register. 11545 __ bind(mask_clipping); 11546 __ blsrl($mask$$Register, $mask$$Register); 11547 __ decrementl($rtmp$$Register, 1); 11548 __ jccb(Assembler::greater, mask_clipping); 11549 // Starting from LSB extract the bits from upper source register under the influence of 11550 // remaining set bits in upper mask register. 11551 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11552 // Merge the partial results extracted from lower and upper source register bits. 11553 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11554 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11555 __ bind(exit); 11556 %} 11557 ins_pipe( pipe_slow ); 11558 %} 11559 11560 // ======================================================================= 11561 // fast clearing of an array 11562 // Small ClearArray non-AVX512. 11563 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11564 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11565 match(Set dummy (ClearArray cnt base)); 11566 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11567 11568 format %{ $$template 11569 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11570 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11571 $$emit$$"JG LARGE\n\t" 11572 $$emit$$"SHL ECX, 1\n\t" 11573 $$emit$$"DEC ECX\n\t" 11574 $$emit$$"JS DONE\t# Zero length\n\t" 11575 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11576 $$emit$$"DEC ECX\n\t" 11577 $$emit$$"JGE LOOP\n\t" 11578 $$emit$$"JMP DONE\n\t" 11579 $$emit$$"# LARGE:\n\t" 11580 if (UseFastStosb) { 11581 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11582 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11583 } else if (UseXMMForObjInit) { 11584 $$emit$$"MOV RDI,RAX\n\t" 11585 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11586 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11587 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11588 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11589 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11590 $$emit$$"ADD 0x40,RAX\n\t" 11591 $$emit$$"# L_zero_64_bytes:\n\t" 11592 $$emit$$"SUB 0x8,RCX\n\t" 11593 $$emit$$"JGE L_loop\n\t" 11594 $$emit$$"ADD 0x4,RCX\n\t" 11595 $$emit$$"JL L_tail\n\t" 11596 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11597 $$emit$$"ADD 0x20,RAX\n\t" 11598 $$emit$$"SUB 0x4,RCX\n\t" 11599 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11600 $$emit$$"ADD 0x4,RCX\n\t" 11601 $$emit$$"JLE L_end\n\t" 11602 $$emit$$"DEC RCX\n\t" 11603 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11604 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11605 $$emit$$"ADD 0x8,RAX\n\t" 11606 $$emit$$"DEC RCX\n\t" 11607 $$emit$$"JGE L_sloop\n\t" 11608 $$emit$$"# L_end:\n\t" 11609 } else { 11610 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11611 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11612 } 11613 $$emit$$"# DONE" 11614 %} 11615 ins_encode %{ 11616 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11617 $tmp$$XMMRegister, false, knoreg); 11618 %} 11619 ins_pipe( pipe_slow ); 11620 %} 11621 11622 // Small ClearArray AVX512 non-constant length. 11623 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11624 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11625 match(Set dummy (ClearArray cnt base)); 11626 ins_cost(125); 11627 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11628 11629 format %{ $$template 11630 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11631 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11632 $$emit$$"JG LARGE\n\t" 11633 $$emit$$"SHL ECX, 1\n\t" 11634 $$emit$$"DEC ECX\n\t" 11635 $$emit$$"JS DONE\t# Zero length\n\t" 11636 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11637 $$emit$$"DEC ECX\n\t" 11638 $$emit$$"JGE LOOP\n\t" 11639 $$emit$$"JMP DONE\n\t" 11640 $$emit$$"# LARGE:\n\t" 11641 if (UseFastStosb) { 11642 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11643 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11644 } else if (UseXMMForObjInit) { 11645 $$emit$$"MOV RDI,RAX\n\t" 11646 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11647 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11648 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11649 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11650 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11651 $$emit$$"ADD 0x40,RAX\n\t" 11652 $$emit$$"# L_zero_64_bytes:\n\t" 11653 $$emit$$"SUB 0x8,RCX\n\t" 11654 $$emit$$"JGE L_loop\n\t" 11655 $$emit$$"ADD 0x4,RCX\n\t" 11656 $$emit$$"JL L_tail\n\t" 11657 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11658 $$emit$$"ADD 0x20,RAX\n\t" 11659 $$emit$$"SUB 0x4,RCX\n\t" 11660 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11661 $$emit$$"ADD 0x4,RCX\n\t" 11662 $$emit$$"JLE L_end\n\t" 11663 $$emit$$"DEC RCX\n\t" 11664 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11665 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11666 $$emit$$"ADD 0x8,RAX\n\t" 11667 $$emit$$"DEC RCX\n\t" 11668 $$emit$$"JGE L_sloop\n\t" 11669 $$emit$$"# L_end:\n\t" 11670 } else { 11671 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11672 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11673 } 11674 $$emit$$"# DONE" 11675 %} 11676 ins_encode %{ 11677 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11678 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11679 %} 11680 ins_pipe( pipe_slow ); 11681 %} 11682 11683 // Large ClearArray non-AVX512. 11684 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11685 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11686 match(Set dummy (ClearArray cnt base)); 11687 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11688 format %{ $$template 11689 if (UseFastStosb) { 11690 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11691 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11692 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11693 } else if (UseXMMForObjInit) { 11694 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11695 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11696 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11697 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11698 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11699 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11700 $$emit$$"ADD 0x40,RAX\n\t" 11701 $$emit$$"# L_zero_64_bytes:\n\t" 11702 $$emit$$"SUB 0x8,RCX\n\t" 11703 $$emit$$"JGE L_loop\n\t" 11704 $$emit$$"ADD 0x4,RCX\n\t" 11705 $$emit$$"JL L_tail\n\t" 11706 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11707 $$emit$$"ADD 0x20,RAX\n\t" 11708 $$emit$$"SUB 0x4,RCX\n\t" 11709 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11710 $$emit$$"ADD 0x4,RCX\n\t" 11711 $$emit$$"JLE L_end\n\t" 11712 $$emit$$"DEC RCX\n\t" 11713 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11714 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11715 $$emit$$"ADD 0x8,RAX\n\t" 11716 $$emit$$"DEC RCX\n\t" 11717 $$emit$$"JGE L_sloop\n\t" 11718 $$emit$$"# L_end:\n\t" 11719 } else { 11720 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11721 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11722 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11723 } 11724 $$emit$$"# DONE" 11725 %} 11726 ins_encode %{ 11727 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11728 $tmp$$XMMRegister, true, knoreg); 11729 %} 11730 ins_pipe( pipe_slow ); 11731 %} 11732 11733 // Large ClearArray AVX512. 11734 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11735 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11736 match(Set dummy (ClearArray cnt base)); 11737 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11738 format %{ $$template 11739 if (UseFastStosb) { 11740 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11741 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11742 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11743 } else if (UseXMMForObjInit) { 11744 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11745 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11746 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11747 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11748 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11749 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11750 $$emit$$"ADD 0x40,RAX\n\t" 11751 $$emit$$"# L_zero_64_bytes:\n\t" 11752 $$emit$$"SUB 0x8,RCX\n\t" 11753 $$emit$$"JGE L_loop\n\t" 11754 $$emit$$"ADD 0x4,RCX\n\t" 11755 $$emit$$"JL L_tail\n\t" 11756 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11757 $$emit$$"ADD 0x20,RAX\n\t" 11758 $$emit$$"SUB 0x4,RCX\n\t" 11759 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11760 $$emit$$"ADD 0x4,RCX\n\t" 11761 $$emit$$"JLE L_end\n\t" 11762 $$emit$$"DEC RCX\n\t" 11763 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11764 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11765 $$emit$$"ADD 0x8,RAX\n\t" 11766 $$emit$$"DEC RCX\n\t" 11767 $$emit$$"JGE L_sloop\n\t" 11768 $$emit$$"# L_end:\n\t" 11769 } else { 11770 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11771 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11772 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11773 } 11774 $$emit$$"# DONE" 11775 %} 11776 ins_encode %{ 11777 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11778 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11779 %} 11780 ins_pipe( pipe_slow ); 11781 %} 11782 11783 // Small ClearArray AVX512 constant length. 11784 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11785 %{ 11786 predicate(!((ClearArrayNode*)n)->is_large() && 11787 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11788 match(Set dummy (ClearArray cnt base)); 11789 ins_cost(100); 11790 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11791 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11792 ins_encode %{ 11793 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11794 %} 11795 ins_pipe(pipe_slow); 11796 %} 11797 11798 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11799 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11800 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11801 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11802 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11803 11804 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11805 ins_encode %{ 11806 __ string_compare($str1$$Register, $str2$$Register, 11807 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11808 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11809 %} 11810 ins_pipe( pipe_slow ); 11811 %} 11812 11813 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11814 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11815 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11816 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11817 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11818 11819 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11820 ins_encode %{ 11821 __ string_compare($str1$$Register, $str2$$Register, 11822 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11823 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11824 %} 11825 ins_pipe( pipe_slow ); 11826 %} 11827 11828 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11829 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11830 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11831 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11832 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11833 11834 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11835 ins_encode %{ 11836 __ string_compare($str1$$Register, $str2$$Register, 11837 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11838 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11839 %} 11840 ins_pipe( pipe_slow ); 11841 %} 11842 11843 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11844 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11845 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11846 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11847 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11848 11849 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11850 ins_encode %{ 11851 __ string_compare($str1$$Register, $str2$$Register, 11852 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11853 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11854 %} 11855 ins_pipe( pipe_slow ); 11856 %} 11857 11858 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11859 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11860 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11861 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11862 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11863 11864 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11865 ins_encode %{ 11866 __ string_compare($str1$$Register, $str2$$Register, 11867 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11868 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11869 %} 11870 ins_pipe( pipe_slow ); 11871 %} 11872 11873 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11874 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11875 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11876 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11877 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11878 11879 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11880 ins_encode %{ 11881 __ string_compare($str1$$Register, $str2$$Register, 11882 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11883 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11884 %} 11885 ins_pipe( pipe_slow ); 11886 %} 11887 11888 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11889 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11890 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11891 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11892 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11893 11894 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11895 ins_encode %{ 11896 __ string_compare($str2$$Register, $str1$$Register, 11897 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11898 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11899 %} 11900 ins_pipe( pipe_slow ); 11901 %} 11902 11903 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11904 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11905 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11906 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11907 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11908 11909 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11910 ins_encode %{ 11911 __ string_compare($str2$$Register, $str1$$Register, 11912 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11913 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11914 %} 11915 ins_pipe( pipe_slow ); 11916 %} 11917 11918 // fast string equals 11919 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11920 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11921 predicate(!VM_Version::supports_avx512vlbw()); 11922 match(Set result (StrEquals (Binary str1 str2) cnt)); 11923 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11924 11925 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11926 ins_encode %{ 11927 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11928 $cnt$$Register, $result$$Register, $tmp3$$Register, 11929 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11930 %} 11931 11932 ins_pipe( pipe_slow ); 11933 %} 11934 11935 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11936 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11937 predicate(VM_Version::supports_avx512vlbw()); 11938 match(Set result (StrEquals (Binary str1 str2) cnt)); 11939 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11940 11941 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11942 ins_encode %{ 11943 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11944 $cnt$$Register, $result$$Register, $tmp3$$Register, 11945 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11946 %} 11947 11948 ins_pipe( pipe_slow ); 11949 %} 11950 11951 11952 // fast search of substring with known size. 11953 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11954 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11955 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11956 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11957 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11958 11959 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11960 ins_encode %{ 11961 int icnt2 = (int)$int_cnt2$$constant; 11962 if (icnt2 >= 16) { 11963 // IndexOf for constant substrings with size >= 16 elements 11964 // which don't need to be loaded through stack. 11965 __ string_indexofC8($str1$$Register, $str2$$Register, 11966 $cnt1$$Register, $cnt2$$Register, 11967 icnt2, $result$$Register, 11968 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11969 } else { 11970 // Small strings are loaded through stack if they cross page boundary. 11971 __ string_indexof($str1$$Register, $str2$$Register, 11972 $cnt1$$Register, $cnt2$$Register, 11973 icnt2, $result$$Register, 11974 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11975 } 11976 %} 11977 ins_pipe( pipe_slow ); 11978 %} 11979 11980 // fast search of substring with known size. 11981 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11982 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11983 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11984 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11985 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11986 11987 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11988 ins_encode %{ 11989 int icnt2 = (int)$int_cnt2$$constant; 11990 if (icnt2 >= 8) { 11991 // IndexOf for constant substrings with size >= 8 elements 11992 // which don't need to be loaded through stack. 11993 __ string_indexofC8($str1$$Register, $str2$$Register, 11994 $cnt1$$Register, $cnt2$$Register, 11995 icnt2, $result$$Register, 11996 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11997 } else { 11998 // Small strings are loaded through stack if they cross page boundary. 11999 __ string_indexof($str1$$Register, $str2$$Register, 12000 $cnt1$$Register, $cnt2$$Register, 12001 icnt2, $result$$Register, 12002 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12003 } 12004 %} 12005 ins_pipe( pipe_slow ); 12006 %} 12007 12008 // fast search of substring with known size. 12009 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12010 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12011 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12012 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12013 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12014 12015 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12016 ins_encode %{ 12017 int icnt2 = (int)$int_cnt2$$constant; 12018 if (icnt2 >= 8) { 12019 // IndexOf for constant substrings with size >= 8 elements 12020 // which don't need to be loaded through stack. 12021 __ string_indexofC8($str1$$Register, $str2$$Register, 12022 $cnt1$$Register, $cnt2$$Register, 12023 icnt2, $result$$Register, 12024 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12025 } else { 12026 // Small strings are loaded through stack if they cross page boundary. 12027 __ string_indexof($str1$$Register, $str2$$Register, 12028 $cnt1$$Register, $cnt2$$Register, 12029 icnt2, $result$$Register, 12030 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12031 } 12032 %} 12033 ins_pipe( pipe_slow ); 12034 %} 12035 12036 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12037 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12038 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 12039 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12040 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12041 12042 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12043 ins_encode %{ 12044 __ string_indexof($str1$$Register, $str2$$Register, 12045 $cnt1$$Register, $cnt2$$Register, 12046 (-1), $result$$Register, 12047 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 12048 %} 12049 ins_pipe( pipe_slow ); 12050 %} 12051 12052 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12053 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12054 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12055 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12056 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12057 12058 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12059 ins_encode %{ 12060 __ string_indexof($str1$$Register, $str2$$Register, 12061 $cnt1$$Register, $cnt2$$Register, 12062 (-1), $result$$Register, 12063 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12064 %} 12065 ins_pipe( pipe_slow ); 12066 %} 12067 12068 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12069 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12070 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12071 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12072 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12073 12074 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12075 ins_encode %{ 12076 __ string_indexof($str1$$Register, $str2$$Register, 12077 $cnt1$$Register, $cnt2$$Register, 12078 (-1), $result$$Register, 12079 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12080 %} 12081 ins_pipe( pipe_slow ); 12082 %} 12083 12084 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12085 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12086 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12087 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12088 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12089 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12090 ins_encode %{ 12091 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12092 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12093 %} 12094 ins_pipe( pipe_slow ); 12095 %} 12096 12097 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12098 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12099 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12100 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12101 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12102 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12103 ins_encode %{ 12104 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12105 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12106 %} 12107 ins_pipe( pipe_slow ); 12108 %} 12109 12110 12111 // fast array equals 12112 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12113 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12114 %{ 12115 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12116 match(Set result (AryEq ary1 ary2)); 12117 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12118 //ins_cost(300); 12119 12120 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12121 ins_encode %{ 12122 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12123 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12124 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12125 %} 12126 ins_pipe( pipe_slow ); 12127 %} 12128 12129 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12130 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12131 %{ 12132 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12133 match(Set result (AryEq ary1 ary2)); 12134 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12135 //ins_cost(300); 12136 12137 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12138 ins_encode %{ 12139 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12140 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12141 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12142 %} 12143 ins_pipe( pipe_slow ); 12144 %} 12145 12146 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12147 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12148 %{ 12149 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12150 match(Set result (AryEq ary1 ary2)); 12151 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12152 //ins_cost(300); 12153 12154 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12155 ins_encode %{ 12156 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12157 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12158 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12159 %} 12160 ins_pipe( pipe_slow ); 12161 %} 12162 12163 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12164 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12165 %{ 12166 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12167 match(Set result (AryEq ary1 ary2)); 12168 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12169 //ins_cost(300); 12170 12171 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12172 ins_encode %{ 12173 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12174 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12175 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12176 %} 12177 ins_pipe( pipe_slow ); 12178 %} 12179 12180 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12181 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12182 %{ 12183 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12184 match(Set result (CountPositives ary1 len)); 12185 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12186 12187 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12188 ins_encode %{ 12189 __ count_positives($ary1$$Register, $len$$Register, 12190 $result$$Register, $tmp3$$Register, 12191 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12192 %} 12193 ins_pipe( pipe_slow ); 12194 %} 12195 12196 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12197 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12198 %{ 12199 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12200 match(Set result (CountPositives ary1 len)); 12201 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12202 12203 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12204 ins_encode %{ 12205 __ count_positives($ary1$$Register, $len$$Register, 12206 $result$$Register, $tmp3$$Register, 12207 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12208 %} 12209 ins_pipe( pipe_slow ); 12210 %} 12211 12212 12213 // fast char[] to byte[] compression 12214 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12215 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12216 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12217 match(Set result (StrCompressedCopy src (Binary dst len))); 12218 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12219 12220 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12221 ins_encode %{ 12222 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12223 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12224 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12225 knoreg, knoreg); 12226 %} 12227 ins_pipe( pipe_slow ); 12228 %} 12229 12230 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12231 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12232 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12233 match(Set result (StrCompressedCopy src (Binary dst len))); 12234 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12235 12236 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12237 ins_encode %{ 12238 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12239 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12240 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12241 $ktmp1$$KRegister, $ktmp2$$KRegister); 12242 %} 12243 ins_pipe( pipe_slow ); 12244 %} 12245 12246 // fast byte[] to char[] inflation 12247 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12248 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12249 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12250 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12251 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12252 12253 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12254 ins_encode %{ 12255 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12256 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12257 %} 12258 ins_pipe( pipe_slow ); 12259 %} 12260 12261 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12262 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12263 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12264 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12265 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12266 12267 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12268 ins_encode %{ 12269 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12270 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12271 %} 12272 ins_pipe( pipe_slow ); 12273 %} 12274 12275 // encode char[] to byte[] in ISO_8859_1 12276 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12277 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12278 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12279 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12280 match(Set result (EncodeISOArray src (Binary dst len))); 12281 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12282 12283 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12284 ins_encode %{ 12285 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12286 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12287 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12288 %} 12289 ins_pipe( pipe_slow ); 12290 %} 12291 12292 // encode char[] to byte[] in ASCII 12293 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12294 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12295 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12296 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12297 match(Set result (EncodeISOArray src (Binary dst len))); 12298 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12299 12300 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12301 ins_encode %{ 12302 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12303 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12304 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12305 %} 12306 ins_pipe( pipe_slow ); 12307 %} 12308 12309 //----------Control Flow Instructions------------------------------------------ 12310 // Signed compare Instructions 12311 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12312 match(Set cr (CmpI op1 op2)); 12313 effect( DEF cr, USE op1, USE op2 ); 12314 format %{ "CMP $op1,$op2" %} 12315 opcode(0x3B); /* Opcode 3B /r */ 12316 ins_encode( OpcP, RegReg( op1, op2) ); 12317 ins_pipe( ialu_cr_reg_reg ); 12318 %} 12319 12320 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12321 match(Set cr (CmpI op1 op2)); 12322 effect( DEF cr, USE op1 ); 12323 format %{ "CMP $op1,$op2" %} 12324 opcode(0x81,0x07); /* Opcode 81 /7 */ 12325 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12326 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12327 ins_pipe( ialu_cr_reg_imm ); 12328 %} 12329 12330 // Cisc-spilled version of cmpI_eReg 12331 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12332 match(Set cr (CmpI op1 (LoadI op2))); 12333 12334 format %{ "CMP $op1,$op2" %} 12335 ins_cost(500); 12336 opcode(0x3B); /* Opcode 3B /r */ 12337 ins_encode( OpcP, RegMem( op1, op2) ); 12338 ins_pipe( ialu_cr_reg_mem ); 12339 %} 12340 12341 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12342 match(Set cr (CmpI src zero)); 12343 effect( DEF cr, USE src ); 12344 12345 format %{ "TEST $src,$src" %} 12346 opcode(0x85); 12347 ins_encode( OpcP, RegReg( src, src ) ); 12348 ins_pipe( ialu_cr_reg_imm ); 12349 %} 12350 12351 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12352 match(Set cr (CmpI (AndI src con) zero)); 12353 12354 format %{ "TEST $src,$con" %} 12355 opcode(0xF7,0x00); 12356 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12357 ins_pipe( ialu_cr_reg_imm ); 12358 %} 12359 12360 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12361 match(Set cr (CmpI (AndI src mem) zero)); 12362 12363 format %{ "TEST $src,$mem" %} 12364 opcode(0x85); 12365 ins_encode( OpcP, RegMem( src, mem ) ); 12366 ins_pipe( ialu_cr_reg_mem ); 12367 %} 12368 12369 // Unsigned compare Instructions; really, same as signed except they 12370 // produce an eFlagsRegU instead of eFlagsReg. 12371 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12372 match(Set cr (CmpU op1 op2)); 12373 12374 format %{ "CMPu $op1,$op2" %} 12375 opcode(0x3B); /* Opcode 3B /r */ 12376 ins_encode( OpcP, RegReg( op1, op2) ); 12377 ins_pipe( ialu_cr_reg_reg ); 12378 %} 12379 12380 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12381 match(Set cr (CmpU op1 op2)); 12382 12383 format %{ "CMPu $op1,$op2" %} 12384 opcode(0x81,0x07); /* Opcode 81 /7 */ 12385 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12386 ins_pipe( ialu_cr_reg_imm ); 12387 %} 12388 12389 // // Cisc-spilled version of cmpU_eReg 12390 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12391 match(Set cr (CmpU op1 (LoadI op2))); 12392 12393 format %{ "CMPu $op1,$op2" %} 12394 ins_cost(500); 12395 opcode(0x3B); /* Opcode 3B /r */ 12396 ins_encode( OpcP, RegMem( op1, op2) ); 12397 ins_pipe( ialu_cr_reg_mem ); 12398 %} 12399 12400 // // Cisc-spilled version of cmpU_eReg 12401 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12402 // match(Set cr (CmpU (LoadI op1) op2)); 12403 // 12404 // format %{ "CMPu $op1,$op2" %} 12405 // ins_cost(500); 12406 // opcode(0x39); /* Opcode 39 /r */ 12407 // ins_encode( OpcP, RegMem( op1, op2) ); 12408 //%} 12409 12410 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12411 match(Set cr (CmpU src zero)); 12412 12413 format %{ "TESTu $src,$src" %} 12414 opcode(0x85); 12415 ins_encode( OpcP, RegReg( src, src ) ); 12416 ins_pipe( ialu_cr_reg_imm ); 12417 %} 12418 12419 // Unsigned pointer compare Instructions 12420 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12421 match(Set cr (CmpP op1 op2)); 12422 12423 format %{ "CMPu $op1,$op2" %} 12424 opcode(0x3B); /* Opcode 3B /r */ 12425 ins_encode( OpcP, RegReg( op1, op2) ); 12426 ins_pipe( ialu_cr_reg_reg ); 12427 %} 12428 12429 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12430 match(Set cr (CmpP op1 op2)); 12431 12432 format %{ "CMPu $op1,$op2" %} 12433 opcode(0x81,0x07); /* Opcode 81 /7 */ 12434 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12435 ins_pipe( ialu_cr_reg_imm ); 12436 %} 12437 12438 // // Cisc-spilled version of cmpP_eReg 12439 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12440 match(Set cr (CmpP op1 (LoadP op2))); 12441 12442 format %{ "CMPu $op1,$op2" %} 12443 ins_cost(500); 12444 opcode(0x3B); /* Opcode 3B /r */ 12445 ins_encode( OpcP, RegMem( op1, op2) ); 12446 ins_pipe( ialu_cr_reg_mem ); 12447 %} 12448 12449 // // Cisc-spilled version of cmpP_eReg 12450 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12451 // match(Set cr (CmpP (LoadP op1) op2)); 12452 // 12453 // format %{ "CMPu $op1,$op2" %} 12454 // ins_cost(500); 12455 // opcode(0x39); /* Opcode 39 /r */ 12456 // ins_encode( OpcP, RegMem( op1, op2) ); 12457 //%} 12458 12459 // Compare raw pointer (used in out-of-heap check). 12460 // Only works because non-oop pointers must be raw pointers 12461 // and raw pointers have no anti-dependencies. 12462 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12463 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12464 match(Set cr (CmpP op1 (LoadP op2))); 12465 12466 format %{ "CMPu $op1,$op2" %} 12467 opcode(0x3B); /* Opcode 3B /r */ 12468 ins_encode( OpcP, RegMem( op1, op2) ); 12469 ins_pipe( ialu_cr_reg_mem ); 12470 %} 12471 12472 // 12473 // This will generate a signed flags result. This should be ok 12474 // since any compare to a zero should be eq/neq. 12475 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12476 match(Set cr (CmpP src zero)); 12477 12478 format %{ "TEST $src,$src" %} 12479 opcode(0x85); 12480 ins_encode( OpcP, RegReg( src, src ) ); 12481 ins_pipe( ialu_cr_reg_imm ); 12482 %} 12483 12484 // Cisc-spilled version of testP_reg 12485 // This will generate a signed flags result. This should be ok 12486 // since any compare to a zero should be eq/neq. 12487 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12488 match(Set cr (CmpP (LoadP op) zero)); 12489 12490 format %{ "TEST $op,0xFFFFFFFF" %} 12491 ins_cost(500); 12492 opcode(0xF7); /* Opcode F7 /0 */ 12493 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12494 ins_pipe( ialu_cr_reg_imm ); 12495 %} 12496 12497 // Yanked all unsigned pointer compare operations. 12498 // Pointer compares are done with CmpP which is already unsigned. 12499 12500 //----------Max and Min-------------------------------------------------------- 12501 // Min Instructions 12502 //// 12503 // *** Min and Max using the conditional move are slower than the 12504 // *** branch version on a Pentium III. 12505 // // Conditional move for min 12506 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12507 // effect( USE_DEF op2, USE op1, USE cr ); 12508 // format %{ "CMOVlt $op2,$op1\t! min" %} 12509 // opcode(0x4C,0x0F); 12510 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12511 // ins_pipe( pipe_cmov_reg ); 12512 //%} 12513 // 12514 //// Min Register with Register (P6 version) 12515 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12516 // predicate(VM_Version::supports_cmov() ); 12517 // match(Set op2 (MinI op1 op2)); 12518 // ins_cost(200); 12519 // expand %{ 12520 // eFlagsReg cr; 12521 // compI_eReg(cr,op1,op2); 12522 // cmovI_reg_lt(op2,op1,cr); 12523 // %} 12524 //%} 12525 12526 // Min Register with Register (generic version) 12527 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12528 match(Set dst (MinI dst src)); 12529 effect(KILL flags); 12530 ins_cost(300); 12531 12532 format %{ "MIN $dst,$src" %} 12533 opcode(0xCC); 12534 ins_encode( min_enc(dst,src) ); 12535 ins_pipe( pipe_slow ); 12536 %} 12537 12538 // Max Register with Register 12539 // *** Min and Max using the conditional move are slower than the 12540 // *** branch version on a Pentium III. 12541 // // Conditional move for max 12542 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12543 // effect( USE_DEF op2, USE op1, USE cr ); 12544 // format %{ "CMOVgt $op2,$op1\t! max" %} 12545 // opcode(0x4F,0x0F); 12546 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12547 // ins_pipe( pipe_cmov_reg ); 12548 //%} 12549 // 12550 // // Max Register with Register (P6 version) 12551 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12552 // predicate(VM_Version::supports_cmov() ); 12553 // match(Set op2 (MaxI op1 op2)); 12554 // ins_cost(200); 12555 // expand %{ 12556 // eFlagsReg cr; 12557 // compI_eReg(cr,op1,op2); 12558 // cmovI_reg_gt(op2,op1,cr); 12559 // %} 12560 //%} 12561 12562 // Max Register with Register (generic version) 12563 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12564 match(Set dst (MaxI dst src)); 12565 effect(KILL flags); 12566 ins_cost(300); 12567 12568 format %{ "MAX $dst,$src" %} 12569 opcode(0xCC); 12570 ins_encode( max_enc(dst,src) ); 12571 ins_pipe( pipe_slow ); 12572 %} 12573 12574 // ============================================================================ 12575 // Counted Loop limit node which represents exact final iterator value. 12576 // Note: the resulting value should fit into integer range since 12577 // counted loops have limit check on overflow. 12578 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12579 match(Set limit (LoopLimit (Binary init limit) stride)); 12580 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12581 ins_cost(300); 12582 12583 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12584 ins_encode %{ 12585 int strd = (int)$stride$$constant; 12586 assert(strd != 1 && strd != -1, "sanity"); 12587 int m1 = (strd > 0) ? 1 : -1; 12588 // Convert limit to long (EAX:EDX) 12589 __ cdql(); 12590 // Convert init to long (init:tmp) 12591 __ movl($tmp$$Register, $init$$Register); 12592 __ sarl($tmp$$Register, 31); 12593 // $limit - $init 12594 __ subl($limit$$Register, $init$$Register); 12595 __ sbbl($limit_hi$$Register, $tmp$$Register); 12596 // + ($stride - 1) 12597 if (strd > 0) { 12598 __ addl($limit$$Register, (strd - 1)); 12599 __ adcl($limit_hi$$Register, 0); 12600 __ movl($tmp$$Register, strd); 12601 } else { 12602 __ addl($limit$$Register, (strd + 1)); 12603 __ adcl($limit_hi$$Register, -1); 12604 __ lneg($limit_hi$$Register, $limit$$Register); 12605 __ movl($tmp$$Register, -strd); 12606 } 12607 // signed division: (EAX:EDX) / pos_stride 12608 __ idivl($tmp$$Register); 12609 if (strd < 0) { 12610 // restore sign 12611 __ negl($tmp$$Register); 12612 } 12613 // (EAX) * stride 12614 __ mull($tmp$$Register); 12615 // + init (ignore upper bits) 12616 __ addl($limit$$Register, $init$$Register); 12617 %} 12618 ins_pipe( pipe_slow ); 12619 %} 12620 12621 // ============================================================================ 12622 // Branch Instructions 12623 // Jump Table 12624 instruct jumpXtnd(rRegI switch_val) %{ 12625 match(Jump switch_val); 12626 ins_cost(350); 12627 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12628 ins_encode %{ 12629 // Jump to Address(table_base + switch_reg) 12630 Address index(noreg, $switch_val$$Register, Address::times_1); 12631 __ jump(ArrayAddress($constantaddress, index), noreg); 12632 %} 12633 ins_pipe(pipe_jmp); 12634 %} 12635 12636 // Jump Direct - Label defines a relative address from JMP+1 12637 instruct jmpDir(label labl) %{ 12638 match(Goto); 12639 effect(USE labl); 12640 12641 ins_cost(300); 12642 format %{ "JMP $labl" %} 12643 size(5); 12644 ins_encode %{ 12645 Label* L = $labl$$label; 12646 __ jmp(*L, false); // Always long jump 12647 %} 12648 ins_pipe( pipe_jmp ); 12649 %} 12650 12651 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12652 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12653 match(If cop cr); 12654 effect(USE labl); 12655 12656 ins_cost(300); 12657 format %{ "J$cop $labl" %} 12658 size(6); 12659 ins_encode %{ 12660 Label* L = $labl$$label; 12661 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12662 %} 12663 ins_pipe( pipe_jcc ); 12664 %} 12665 12666 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12667 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12668 match(CountedLoopEnd cop cr); 12669 effect(USE labl); 12670 12671 ins_cost(300); 12672 format %{ "J$cop $labl\t# Loop end" %} 12673 size(6); 12674 ins_encode %{ 12675 Label* L = $labl$$label; 12676 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12677 %} 12678 ins_pipe( pipe_jcc ); 12679 %} 12680 12681 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12682 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12683 match(CountedLoopEnd cop cmp); 12684 effect(USE labl); 12685 12686 ins_cost(300); 12687 format %{ "J$cop,u $labl\t# Loop end" %} 12688 size(6); 12689 ins_encode %{ 12690 Label* L = $labl$$label; 12691 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12692 %} 12693 ins_pipe( pipe_jcc ); 12694 %} 12695 12696 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12697 match(CountedLoopEnd cop cmp); 12698 effect(USE labl); 12699 12700 ins_cost(200); 12701 format %{ "J$cop,u $labl\t# Loop end" %} 12702 size(6); 12703 ins_encode %{ 12704 Label* L = $labl$$label; 12705 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12706 %} 12707 ins_pipe( pipe_jcc ); 12708 %} 12709 12710 // Jump Direct Conditional - using unsigned comparison 12711 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12712 match(If cop cmp); 12713 effect(USE labl); 12714 12715 ins_cost(300); 12716 format %{ "J$cop,u $labl" %} 12717 size(6); 12718 ins_encode %{ 12719 Label* L = $labl$$label; 12720 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12721 %} 12722 ins_pipe(pipe_jcc); 12723 %} 12724 12725 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12726 match(If cop cmp); 12727 effect(USE labl); 12728 12729 ins_cost(200); 12730 format %{ "J$cop,u $labl" %} 12731 size(6); 12732 ins_encode %{ 12733 Label* L = $labl$$label; 12734 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12735 %} 12736 ins_pipe(pipe_jcc); 12737 %} 12738 12739 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12740 match(If cop cmp); 12741 effect(USE labl); 12742 12743 ins_cost(200); 12744 format %{ $$template 12745 if ($cop$$cmpcode == Assembler::notEqual) { 12746 $$emit$$"JP,u $labl\n\t" 12747 $$emit$$"J$cop,u $labl" 12748 } else { 12749 $$emit$$"JP,u done\n\t" 12750 $$emit$$"J$cop,u $labl\n\t" 12751 $$emit$$"done:" 12752 } 12753 %} 12754 ins_encode %{ 12755 Label* l = $labl$$label; 12756 if ($cop$$cmpcode == Assembler::notEqual) { 12757 __ jcc(Assembler::parity, *l, false); 12758 __ jcc(Assembler::notEqual, *l, false); 12759 } else if ($cop$$cmpcode == Assembler::equal) { 12760 Label done; 12761 __ jccb(Assembler::parity, done); 12762 __ jcc(Assembler::equal, *l, false); 12763 __ bind(done); 12764 } else { 12765 ShouldNotReachHere(); 12766 } 12767 %} 12768 ins_pipe(pipe_jcc); 12769 %} 12770 12771 // ============================================================================ 12772 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12773 // array for an instance of the superklass. Set a hidden internal cache on a 12774 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12775 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12776 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12777 match(Set result (PartialSubtypeCheck sub super)); 12778 effect( KILL rcx, KILL cr ); 12779 12780 ins_cost(1100); // slightly larger than the next version 12781 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12782 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12783 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12784 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12785 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12786 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12787 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12788 "miss:\t" %} 12789 12790 opcode(0x1); // Force a XOR of EDI 12791 ins_encode( enc_PartialSubtypeCheck() ); 12792 ins_pipe( pipe_slow ); 12793 %} 12794 12795 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12796 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12797 effect( KILL rcx, KILL result ); 12798 12799 ins_cost(1000); 12800 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12801 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12802 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12803 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12804 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12805 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12806 "miss:\t" %} 12807 12808 opcode(0x0); // No need to XOR EDI 12809 ins_encode( enc_PartialSubtypeCheck() ); 12810 ins_pipe( pipe_slow ); 12811 %} 12812 12813 // ============================================================================ 12814 // Branch Instructions -- short offset versions 12815 // 12816 // These instructions are used to replace jumps of a long offset (the default 12817 // match) with jumps of a shorter offset. These instructions are all tagged 12818 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12819 // match rules in general matching. Instead, the ADLC generates a conversion 12820 // method in the MachNode which can be used to do in-place replacement of the 12821 // long variant with the shorter variant. The compiler will determine if a 12822 // branch can be taken by the is_short_branch_offset() predicate in the machine 12823 // specific code section of the file. 12824 12825 // Jump Direct - Label defines a relative address from JMP+1 12826 instruct jmpDir_short(label labl) %{ 12827 match(Goto); 12828 effect(USE labl); 12829 12830 ins_cost(300); 12831 format %{ "JMP,s $labl" %} 12832 size(2); 12833 ins_encode %{ 12834 Label* L = $labl$$label; 12835 __ jmpb(*L); 12836 %} 12837 ins_pipe( pipe_jmp ); 12838 ins_short_branch(1); 12839 %} 12840 12841 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12842 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12843 match(If cop cr); 12844 effect(USE labl); 12845 12846 ins_cost(300); 12847 format %{ "J$cop,s $labl" %} 12848 size(2); 12849 ins_encode %{ 12850 Label* L = $labl$$label; 12851 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12852 %} 12853 ins_pipe( pipe_jcc ); 12854 ins_short_branch(1); 12855 %} 12856 12857 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12858 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12859 match(CountedLoopEnd cop cr); 12860 effect(USE labl); 12861 12862 ins_cost(300); 12863 format %{ "J$cop,s $labl\t# Loop end" %} 12864 size(2); 12865 ins_encode %{ 12866 Label* L = $labl$$label; 12867 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12868 %} 12869 ins_pipe( pipe_jcc ); 12870 ins_short_branch(1); 12871 %} 12872 12873 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12874 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12875 match(CountedLoopEnd cop cmp); 12876 effect(USE labl); 12877 12878 ins_cost(300); 12879 format %{ "J$cop,us $labl\t# Loop end" %} 12880 size(2); 12881 ins_encode %{ 12882 Label* L = $labl$$label; 12883 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12884 %} 12885 ins_pipe( pipe_jcc ); 12886 ins_short_branch(1); 12887 %} 12888 12889 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12890 match(CountedLoopEnd cop cmp); 12891 effect(USE labl); 12892 12893 ins_cost(300); 12894 format %{ "J$cop,us $labl\t# Loop end" %} 12895 size(2); 12896 ins_encode %{ 12897 Label* L = $labl$$label; 12898 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12899 %} 12900 ins_pipe( pipe_jcc ); 12901 ins_short_branch(1); 12902 %} 12903 12904 // Jump Direct Conditional - using unsigned comparison 12905 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12906 match(If cop cmp); 12907 effect(USE labl); 12908 12909 ins_cost(300); 12910 format %{ "J$cop,us $labl" %} 12911 size(2); 12912 ins_encode %{ 12913 Label* L = $labl$$label; 12914 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12915 %} 12916 ins_pipe( pipe_jcc ); 12917 ins_short_branch(1); 12918 %} 12919 12920 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12921 match(If cop cmp); 12922 effect(USE labl); 12923 12924 ins_cost(300); 12925 format %{ "J$cop,us $labl" %} 12926 size(2); 12927 ins_encode %{ 12928 Label* L = $labl$$label; 12929 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12930 %} 12931 ins_pipe( pipe_jcc ); 12932 ins_short_branch(1); 12933 %} 12934 12935 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12936 match(If cop cmp); 12937 effect(USE labl); 12938 12939 ins_cost(300); 12940 format %{ $$template 12941 if ($cop$$cmpcode == Assembler::notEqual) { 12942 $$emit$$"JP,u,s $labl\n\t" 12943 $$emit$$"J$cop,u,s $labl" 12944 } else { 12945 $$emit$$"JP,u,s done\n\t" 12946 $$emit$$"J$cop,u,s $labl\n\t" 12947 $$emit$$"done:" 12948 } 12949 %} 12950 size(4); 12951 ins_encode %{ 12952 Label* l = $labl$$label; 12953 if ($cop$$cmpcode == Assembler::notEqual) { 12954 __ jccb(Assembler::parity, *l); 12955 __ jccb(Assembler::notEqual, *l); 12956 } else if ($cop$$cmpcode == Assembler::equal) { 12957 Label done; 12958 __ jccb(Assembler::parity, done); 12959 __ jccb(Assembler::equal, *l); 12960 __ bind(done); 12961 } else { 12962 ShouldNotReachHere(); 12963 } 12964 %} 12965 ins_pipe(pipe_jcc); 12966 ins_short_branch(1); 12967 %} 12968 12969 // ============================================================================ 12970 // Long Compare 12971 // 12972 // Currently we hold longs in 2 registers. Comparing such values efficiently 12973 // is tricky. The flavor of compare used depends on whether we are testing 12974 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12975 // The GE test is the negated LT test. The LE test can be had by commuting 12976 // the operands (yielding a GE test) and then negating; negate again for the 12977 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12978 // NE test is negated from that. 12979 12980 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12981 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12982 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12983 // are collapsed internally in the ADLC's dfa-gen code. The match for 12984 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12985 // foo match ends up with the wrong leaf. One fix is to not match both 12986 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12987 // both forms beat the trinary form of long-compare and both are very useful 12988 // on Intel which has so few registers. 12989 12990 // Manifest a CmpL result in an integer register. Very painful. 12991 // This is the test to avoid. 12992 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12993 match(Set dst (CmpL3 src1 src2)); 12994 effect( KILL flags ); 12995 ins_cost(1000); 12996 format %{ "XOR $dst,$dst\n\t" 12997 "CMP $src1.hi,$src2.hi\n\t" 12998 "JLT,s m_one\n\t" 12999 "JGT,s p_one\n\t" 13000 "CMP $src1.lo,$src2.lo\n\t" 13001 "JB,s m_one\n\t" 13002 "JEQ,s done\n" 13003 "p_one:\tINC $dst\n\t" 13004 "JMP,s done\n" 13005 "m_one:\tDEC $dst\n" 13006 "done:" %} 13007 ins_encode %{ 13008 Label p_one, m_one, done; 13009 __ xorptr($dst$$Register, $dst$$Register); 13010 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 13011 __ jccb(Assembler::less, m_one); 13012 __ jccb(Assembler::greater, p_one); 13013 __ cmpl($src1$$Register, $src2$$Register); 13014 __ jccb(Assembler::below, m_one); 13015 __ jccb(Assembler::equal, done); 13016 __ bind(p_one); 13017 __ incrementl($dst$$Register); 13018 __ jmpb(done); 13019 __ bind(m_one); 13020 __ decrementl($dst$$Register); 13021 __ bind(done); 13022 %} 13023 ins_pipe( pipe_slow ); 13024 %} 13025 13026 //====== 13027 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13028 // compares. Can be used for LE or GT compares by reversing arguments. 13029 // NOT GOOD FOR EQ/NE tests. 13030 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 13031 match( Set flags (CmpL src zero )); 13032 ins_cost(100); 13033 format %{ "TEST $src.hi,$src.hi" %} 13034 opcode(0x85); 13035 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 13036 ins_pipe( ialu_cr_reg_reg ); 13037 %} 13038 13039 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13040 // compares. Can be used for LE or GT compares by reversing arguments. 13041 // NOT GOOD FOR EQ/NE tests. 13042 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13043 match( Set flags (CmpL src1 src2 )); 13044 effect( TEMP tmp ); 13045 ins_cost(300); 13046 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13047 "MOV $tmp,$src1.hi\n\t" 13048 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13049 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13050 ins_pipe( ialu_cr_reg_reg ); 13051 %} 13052 13053 // Long compares reg < zero/req OR reg >= zero/req. 13054 // Just a wrapper for a normal branch, plus the predicate test. 13055 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13056 match(If cmp flags); 13057 effect(USE labl); 13058 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13059 expand %{ 13060 jmpCon(cmp,flags,labl); // JLT or JGE... 13061 %} 13062 %} 13063 13064 //====== 13065 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13066 // compares. Can be used for LE or GT compares by reversing arguments. 13067 // NOT GOOD FOR EQ/NE tests. 13068 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13069 match(Set flags (CmpUL src zero)); 13070 ins_cost(100); 13071 format %{ "TEST $src.hi,$src.hi" %} 13072 opcode(0x85); 13073 ins_encode(OpcP, RegReg_Hi2(src, src)); 13074 ins_pipe(ialu_cr_reg_reg); 13075 %} 13076 13077 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13078 // compares. Can be used for LE or GT compares by reversing arguments. 13079 // NOT GOOD FOR EQ/NE tests. 13080 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13081 match(Set flags (CmpUL src1 src2)); 13082 effect(TEMP tmp); 13083 ins_cost(300); 13084 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13085 "MOV $tmp,$src1.hi\n\t" 13086 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13087 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13088 ins_pipe(ialu_cr_reg_reg); 13089 %} 13090 13091 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13092 // Just a wrapper for a normal branch, plus the predicate test. 13093 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13094 match(If cmp flags); 13095 effect(USE labl); 13096 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13097 expand %{ 13098 jmpCon(cmp, flags, labl); // JLT or JGE... 13099 %} 13100 %} 13101 13102 // Compare 2 longs and CMOVE longs. 13103 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13104 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13105 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13106 ins_cost(400); 13107 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13108 "CMOV$cmp $dst.hi,$src.hi" %} 13109 opcode(0x0F,0x40); 13110 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13111 ins_pipe( pipe_cmov_reg_long ); 13112 %} 13113 13114 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13115 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13116 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13117 ins_cost(500); 13118 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13119 "CMOV$cmp $dst.hi,$src.hi" %} 13120 opcode(0x0F,0x40); 13121 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13122 ins_pipe( pipe_cmov_reg_long ); 13123 %} 13124 13125 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 13126 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13127 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13128 ins_cost(400); 13129 expand %{ 13130 cmovLL_reg_LTGE(cmp, flags, dst, src); 13131 %} 13132 %} 13133 13134 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 13135 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13136 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13137 ins_cost(500); 13138 expand %{ 13139 cmovLL_mem_LTGE(cmp, flags, dst, src); 13140 %} 13141 %} 13142 13143 // Compare 2 longs and CMOVE ints. 13144 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13145 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13146 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13147 ins_cost(200); 13148 format %{ "CMOV$cmp $dst,$src" %} 13149 opcode(0x0F,0x40); 13150 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13151 ins_pipe( pipe_cmov_reg ); 13152 %} 13153 13154 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13155 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13156 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13157 ins_cost(250); 13158 format %{ "CMOV$cmp $dst,$src" %} 13159 opcode(0x0F,0x40); 13160 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13161 ins_pipe( pipe_cmov_mem ); 13162 %} 13163 13164 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 13165 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13166 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13167 ins_cost(200); 13168 expand %{ 13169 cmovII_reg_LTGE(cmp, flags, dst, src); 13170 %} 13171 %} 13172 13173 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 13174 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13175 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13176 ins_cost(250); 13177 expand %{ 13178 cmovII_mem_LTGE(cmp, flags, dst, src); 13179 %} 13180 %} 13181 13182 // Compare 2 longs and CMOVE ptrs. 13183 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13184 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13185 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13186 ins_cost(200); 13187 format %{ "CMOV$cmp $dst,$src" %} 13188 opcode(0x0F,0x40); 13189 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13190 ins_pipe( pipe_cmov_reg ); 13191 %} 13192 13193 // Compare 2 unsigned longs and CMOVE ptrs. 13194 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 13195 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13196 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13197 ins_cost(200); 13198 expand %{ 13199 cmovPP_reg_LTGE(cmp,flags,dst,src); 13200 %} 13201 %} 13202 13203 // Compare 2 longs and CMOVE doubles 13204 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13205 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13206 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13207 ins_cost(200); 13208 expand %{ 13209 fcmovDPR_regS(cmp,flags,dst,src); 13210 %} 13211 %} 13212 13213 // Compare 2 longs and CMOVE doubles 13214 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13215 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13216 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13217 ins_cost(200); 13218 expand %{ 13219 fcmovD_regS(cmp,flags,dst,src); 13220 %} 13221 %} 13222 13223 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13224 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13225 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13226 ins_cost(200); 13227 expand %{ 13228 fcmovFPR_regS(cmp,flags,dst,src); 13229 %} 13230 %} 13231 13232 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13233 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13234 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13235 ins_cost(200); 13236 expand %{ 13237 fcmovF_regS(cmp,flags,dst,src); 13238 %} 13239 %} 13240 13241 //====== 13242 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13243 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13244 match( Set flags (CmpL src zero )); 13245 effect(TEMP tmp); 13246 ins_cost(200); 13247 format %{ "MOV $tmp,$src.lo\n\t" 13248 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13249 ins_encode( long_cmp_flags0( src, tmp ) ); 13250 ins_pipe( ialu_reg_reg_long ); 13251 %} 13252 13253 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13254 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13255 match( Set flags (CmpL src1 src2 )); 13256 ins_cost(200+300); 13257 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13258 "JNE,s skip\n\t" 13259 "CMP $src1.hi,$src2.hi\n\t" 13260 "skip:\t" %} 13261 ins_encode( long_cmp_flags1( src1, src2 ) ); 13262 ins_pipe( ialu_cr_reg_reg ); 13263 %} 13264 13265 // Long compare reg == zero/reg OR reg != zero/reg 13266 // Just a wrapper for a normal branch, plus the predicate test. 13267 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13268 match(If cmp flags); 13269 effect(USE labl); 13270 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13271 expand %{ 13272 jmpCon(cmp,flags,labl); // JEQ or JNE... 13273 %} 13274 %} 13275 13276 //====== 13277 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13278 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13279 match(Set flags (CmpUL src zero)); 13280 effect(TEMP tmp); 13281 ins_cost(200); 13282 format %{ "MOV $tmp,$src.lo\n\t" 13283 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13284 ins_encode(long_cmp_flags0(src, tmp)); 13285 ins_pipe(ialu_reg_reg_long); 13286 %} 13287 13288 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13289 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13290 match(Set flags (CmpUL src1 src2)); 13291 ins_cost(200+300); 13292 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13293 "JNE,s skip\n\t" 13294 "CMP $src1.hi,$src2.hi\n\t" 13295 "skip:\t" %} 13296 ins_encode(long_cmp_flags1(src1, src2)); 13297 ins_pipe(ialu_cr_reg_reg); 13298 %} 13299 13300 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13301 // Just a wrapper for a normal branch, plus the predicate test. 13302 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13303 match(If cmp flags); 13304 effect(USE labl); 13305 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13306 expand %{ 13307 jmpCon(cmp, flags, labl); // JEQ or JNE... 13308 %} 13309 %} 13310 13311 // Compare 2 longs and CMOVE longs. 13312 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13313 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13314 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13315 ins_cost(400); 13316 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13317 "CMOV$cmp $dst.hi,$src.hi" %} 13318 opcode(0x0F,0x40); 13319 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13320 ins_pipe( pipe_cmov_reg_long ); 13321 %} 13322 13323 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13324 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13325 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13326 ins_cost(500); 13327 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13328 "CMOV$cmp $dst.hi,$src.hi" %} 13329 opcode(0x0F,0x40); 13330 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13331 ins_pipe( pipe_cmov_reg_long ); 13332 %} 13333 13334 // Compare 2 longs and CMOVE ints. 13335 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13336 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13337 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13338 ins_cost(200); 13339 format %{ "CMOV$cmp $dst,$src" %} 13340 opcode(0x0F,0x40); 13341 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13342 ins_pipe( pipe_cmov_reg ); 13343 %} 13344 13345 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13346 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13347 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13348 ins_cost(250); 13349 format %{ "CMOV$cmp $dst,$src" %} 13350 opcode(0x0F,0x40); 13351 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13352 ins_pipe( pipe_cmov_mem ); 13353 %} 13354 13355 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13356 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13357 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13358 ins_cost(200); 13359 expand %{ 13360 cmovII_reg_EQNE(cmp, flags, dst, src); 13361 %} 13362 %} 13363 13364 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13365 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13366 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13367 ins_cost(250); 13368 expand %{ 13369 cmovII_mem_EQNE(cmp, flags, dst, src); 13370 %} 13371 %} 13372 13373 // Compare 2 longs and CMOVE ptrs. 13374 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13375 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13376 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13377 ins_cost(200); 13378 format %{ "CMOV$cmp $dst,$src" %} 13379 opcode(0x0F,0x40); 13380 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13381 ins_pipe( pipe_cmov_reg ); 13382 %} 13383 13384 // Compare 2 unsigned longs and CMOVE ptrs. 13385 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13386 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13387 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13388 ins_cost(200); 13389 expand %{ 13390 cmovPP_reg_EQNE(cmp,flags,dst,src); 13391 %} 13392 %} 13393 13394 // Compare 2 longs and CMOVE doubles 13395 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13396 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13397 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13398 ins_cost(200); 13399 expand %{ 13400 fcmovDPR_regS(cmp,flags,dst,src); 13401 %} 13402 %} 13403 13404 // Compare 2 longs and CMOVE doubles 13405 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13406 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13407 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13408 ins_cost(200); 13409 expand %{ 13410 fcmovD_regS(cmp,flags,dst,src); 13411 %} 13412 %} 13413 13414 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13415 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13416 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13417 ins_cost(200); 13418 expand %{ 13419 fcmovFPR_regS(cmp,flags,dst,src); 13420 %} 13421 %} 13422 13423 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13424 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13425 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13426 ins_cost(200); 13427 expand %{ 13428 fcmovF_regS(cmp,flags,dst,src); 13429 %} 13430 %} 13431 13432 //====== 13433 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13434 // Same as cmpL_reg_flags_LEGT except must negate src 13435 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13436 match( Set flags (CmpL src zero )); 13437 effect( TEMP tmp ); 13438 ins_cost(300); 13439 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13440 "CMP $tmp,$src.lo\n\t" 13441 "SBB $tmp,$src.hi\n\t" %} 13442 ins_encode( long_cmp_flags3(src, tmp) ); 13443 ins_pipe( ialu_reg_reg_long ); 13444 %} 13445 13446 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13447 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13448 // requires a commuted test to get the same result. 13449 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13450 match( Set flags (CmpL src1 src2 )); 13451 effect( TEMP tmp ); 13452 ins_cost(300); 13453 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13454 "MOV $tmp,$src2.hi\n\t" 13455 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13456 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13457 ins_pipe( ialu_cr_reg_reg ); 13458 %} 13459 13460 // Long compares reg < zero/req OR reg >= zero/req. 13461 // Just a wrapper for a normal branch, plus the predicate test 13462 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13463 match(If cmp flags); 13464 effect(USE labl); 13465 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13466 ins_cost(300); 13467 expand %{ 13468 jmpCon(cmp,flags,labl); // JGT or JLE... 13469 %} 13470 %} 13471 13472 //====== 13473 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13474 // Same as cmpUL_reg_flags_LEGT except must negate src 13475 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13476 match(Set flags (CmpUL src zero)); 13477 effect(TEMP tmp); 13478 ins_cost(300); 13479 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13480 "CMP $tmp,$src.lo\n\t" 13481 "SBB $tmp,$src.hi\n\t" %} 13482 ins_encode(long_cmp_flags3(src, tmp)); 13483 ins_pipe(ialu_reg_reg_long); 13484 %} 13485 13486 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13487 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13488 // requires a commuted test to get the same result. 13489 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13490 match(Set flags (CmpUL src1 src2)); 13491 effect(TEMP tmp); 13492 ins_cost(300); 13493 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13494 "MOV $tmp,$src2.hi\n\t" 13495 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13496 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13497 ins_pipe(ialu_cr_reg_reg); 13498 %} 13499 13500 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13501 // Just a wrapper for a normal branch, plus the predicate test 13502 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13503 match(If cmp flags); 13504 effect(USE labl); 13505 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13506 ins_cost(300); 13507 expand %{ 13508 jmpCon(cmp, flags, labl); // JGT or JLE... 13509 %} 13510 %} 13511 13512 // Compare 2 longs and CMOVE longs. 13513 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13514 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13515 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13516 ins_cost(400); 13517 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13518 "CMOV$cmp $dst.hi,$src.hi" %} 13519 opcode(0x0F,0x40); 13520 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13521 ins_pipe( pipe_cmov_reg_long ); 13522 %} 13523 13524 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13525 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13526 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13527 ins_cost(500); 13528 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13529 "CMOV$cmp $dst.hi,$src.hi+4" %} 13530 opcode(0x0F,0x40); 13531 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13532 ins_pipe( pipe_cmov_reg_long ); 13533 %} 13534 13535 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13536 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13537 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13538 ins_cost(400); 13539 expand %{ 13540 cmovLL_reg_LEGT(cmp, flags, dst, src); 13541 %} 13542 %} 13543 13544 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13545 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13546 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13547 ins_cost(500); 13548 expand %{ 13549 cmovLL_mem_LEGT(cmp, flags, dst, src); 13550 %} 13551 %} 13552 13553 // Compare 2 longs and CMOVE ints. 13554 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13555 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13556 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13557 ins_cost(200); 13558 format %{ "CMOV$cmp $dst,$src" %} 13559 opcode(0x0F,0x40); 13560 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13561 ins_pipe( pipe_cmov_reg ); 13562 %} 13563 13564 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13565 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13566 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13567 ins_cost(250); 13568 format %{ "CMOV$cmp $dst,$src" %} 13569 opcode(0x0F,0x40); 13570 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13571 ins_pipe( pipe_cmov_mem ); 13572 %} 13573 13574 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13575 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13576 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13577 ins_cost(200); 13578 expand %{ 13579 cmovII_reg_LEGT(cmp, flags, dst, src); 13580 %} 13581 %} 13582 13583 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13584 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13585 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13586 ins_cost(250); 13587 expand %{ 13588 cmovII_mem_LEGT(cmp, flags, dst, src); 13589 %} 13590 %} 13591 13592 // Compare 2 longs and CMOVE ptrs. 13593 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13594 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13595 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13596 ins_cost(200); 13597 format %{ "CMOV$cmp $dst,$src" %} 13598 opcode(0x0F,0x40); 13599 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13600 ins_pipe( pipe_cmov_reg ); 13601 %} 13602 13603 // Compare 2 unsigned longs and CMOVE ptrs. 13604 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13605 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13606 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13607 ins_cost(200); 13608 expand %{ 13609 cmovPP_reg_LEGT(cmp,flags,dst,src); 13610 %} 13611 %} 13612 13613 // Compare 2 longs and CMOVE doubles 13614 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13615 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13616 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13617 ins_cost(200); 13618 expand %{ 13619 fcmovDPR_regS(cmp,flags,dst,src); 13620 %} 13621 %} 13622 13623 // Compare 2 longs and CMOVE doubles 13624 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13625 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13626 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13627 ins_cost(200); 13628 expand %{ 13629 fcmovD_regS(cmp,flags,dst,src); 13630 %} 13631 %} 13632 13633 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13634 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13635 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13636 ins_cost(200); 13637 expand %{ 13638 fcmovFPR_regS(cmp,flags,dst,src); 13639 %} 13640 %} 13641 13642 13643 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13644 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13645 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13646 ins_cost(200); 13647 expand %{ 13648 fcmovF_regS(cmp,flags,dst,src); 13649 %} 13650 %} 13651 13652 13653 // ============================================================================ 13654 // Procedure Call/Return Instructions 13655 // Call Java Static Instruction 13656 // Note: If this code changes, the corresponding ret_addr_offset() and 13657 // compute_padding() functions will have to be adjusted. 13658 instruct CallStaticJavaDirect(method meth) %{ 13659 match(CallStaticJava); 13660 effect(USE meth); 13661 13662 ins_cost(300); 13663 format %{ "CALL,static " %} 13664 opcode(0xE8); /* E8 cd */ 13665 ins_encode( pre_call_resets, 13666 Java_Static_Call( meth ), 13667 call_epilog, 13668 post_call_FPU ); 13669 ins_pipe( pipe_slow ); 13670 ins_alignment(4); 13671 %} 13672 13673 // Call Java Dynamic Instruction 13674 // Note: If this code changes, the corresponding ret_addr_offset() and 13675 // compute_padding() functions will have to be adjusted. 13676 instruct CallDynamicJavaDirect(method meth) %{ 13677 match(CallDynamicJava); 13678 effect(USE meth); 13679 13680 ins_cost(300); 13681 format %{ "MOV EAX,(oop)-1\n\t" 13682 "CALL,dynamic" %} 13683 opcode(0xE8); /* E8 cd */ 13684 ins_encode( pre_call_resets, 13685 Java_Dynamic_Call( meth ), 13686 call_epilog, 13687 post_call_FPU ); 13688 ins_pipe( pipe_slow ); 13689 ins_alignment(4); 13690 %} 13691 13692 // Call Runtime Instruction 13693 instruct CallRuntimeDirect(method meth) %{ 13694 match(CallRuntime ); 13695 effect(USE meth); 13696 13697 ins_cost(300); 13698 format %{ "CALL,runtime " %} 13699 opcode(0xE8); /* E8 cd */ 13700 // Use FFREEs to clear entries in float stack 13701 ins_encode( pre_call_resets, 13702 FFree_Float_Stack_All, 13703 Java_To_Runtime( meth ), 13704 post_call_FPU ); 13705 ins_pipe( pipe_slow ); 13706 %} 13707 13708 // Call runtime without safepoint 13709 instruct CallLeafDirect(method meth) %{ 13710 match(CallLeaf); 13711 effect(USE meth); 13712 13713 ins_cost(300); 13714 format %{ "CALL_LEAF,runtime " %} 13715 opcode(0xE8); /* E8 cd */ 13716 ins_encode( pre_call_resets, 13717 FFree_Float_Stack_All, 13718 Java_To_Runtime( meth ), 13719 Verify_FPU_For_Leaf, post_call_FPU ); 13720 ins_pipe( pipe_slow ); 13721 %} 13722 13723 instruct CallLeafNoFPDirect(method meth) %{ 13724 match(CallLeafNoFP); 13725 effect(USE meth); 13726 13727 ins_cost(300); 13728 format %{ "CALL_LEAF_NOFP,runtime " %} 13729 opcode(0xE8); /* E8 cd */ 13730 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13731 ins_pipe( pipe_slow ); 13732 %} 13733 13734 13735 // Return Instruction 13736 // Remove the return address & jump to it. 13737 instruct Ret() %{ 13738 match(Return); 13739 format %{ "RET" %} 13740 opcode(0xC3); 13741 ins_encode(OpcP); 13742 ins_pipe( pipe_jmp ); 13743 %} 13744 13745 // Tail Call; Jump from runtime stub to Java code. 13746 // Also known as an 'interprocedural jump'. 13747 // Target of jump will eventually return to caller. 13748 // TailJump below removes the return address. 13749 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13750 match(TailCall jump_target method_ptr); 13751 ins_cost(300); 13752 format %{ "JMP $jump_target \t# EBX holds method" %} 13753 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13754 ins_encode( OpcP, RegOpc(jump_target) ); 13755 ins_pipe( pipe_jmp ); 13756 %} 13757 13758 13759 // Tail Jump; remove the return address; jump to target. 13760 // TailCall above leaves the return address around. 13761 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13762 match( TailJump jump_target ex_oop ); 13763 ins_cost(300); 13764 format %{ "POP EDX\t# pop return address into dummy\n\t" 13765 "JMP $jump_target " %} 13766 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13767 ins_encode( enc_pop_rdx, 13768 OpcP, RegOpc(jump_target) ); 13769 ins_pipe( pipe_jmp ); 13770 %} 13771 13772 // Create exception oop: created by stack-crawling runtime code. 13773 // Created exception is now available to this handler, and is setup 13774 // just prior to jumping to this handler. No code emitted. 13775 instruct CreateException( eAXRegP ex_oop ) 13776 %{ 13777 match(Set ex_oop (CreateEx)); 13778 13779 size(0); 13780 // use the following format syntax 13781 format %{ "# exception oop is in EAX; no code emitted" %} 13782 ins_encode(); 13783 ins_pipe( empty ); 13784 %} 13785 13786 13787 // Rethrow exception: 13788 // The exception oop will come in the first argument position. 13789 // Then JUMP (not call) to the rethrow stub code. 13790 instruct RethrowException() 13791 %{ 13792 match(Rethrow); 13793 13794 // use the following format syntax 13795 format %{ "JMP rethrow_stub" %} 13796 ins_encode(enc_rethrow); 13797 ins_pipe( pipe_jmp ); 13798 %} 13799 13800 // inlined locking and unlocking 13801 13802 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13803 predicate(Compile::current()->use_rtm()); 13804 match(Set cr (FastLock object box)); 13805 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13806 ins_cost(300); 13807 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13808 ins_encode %{ 13809 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13810 $scr$$Register, $cx1$$Register, $cx2$$Register, 13811 _rtm_counters, _stack_rtm_counters, 13812 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13813 true, ra_->C->profile_rtm()); 13814 %} 13815 ins_pipe(pipe_slow); 13816 %} 13817 13818 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13819 predicate(!Compile::current()->use_rtm()); 13820 match(Set cr (FastLock object box)); 13821 effect(TEMP tmp, TEMP scr, USE_KILL box); 13822 ins_cost(300); 13823 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13824 ins_encode %{ 13825 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13826 $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false); 13827 %} 13828 ins_pipe(pipe_slow); 13829 %} 13830 13831 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13832 match(Set cr (FastUnlock object box)); 13833 effect(TEMP tmp, USE_KILL box); 13834 ins_cost(300); 13835 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13836 ins_encode %{ 13837 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13838 %} 13839 ins_pipe(pipe_slow); 13840 %} 13841 13842 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13843 predicate(Matcher::vector_length(n) <= 32); 13844 match(Set dst (MaskAll src)); 13845 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13846 ins_encode %{ 13847 int mask_len = Matcher::vector_length(this); 13848 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13849 %} 13850 ins_pipe( pipe_slow ); 13851 %} 13852 13853 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13854 predicate(Matcher::vector_length(n) > 32); 13855 match(Set dst (MaskAll src)); 13856 effect(TEMP ktmp); 13857 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13858 ins_encode %{ 13859 int mask_len = Matcher::vector_length(this); 13860 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13861 %} 13862 ins_pipe( pipe_slow ); 13863 %} 13864 13865 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13866 predicate(Matcher::vector_length(n) > 32); 13867 match(Set dst (MaskAll src)); 13868 effect(TEMP ktmp); 13869 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13870 ins_encode %{ 13871 int mask_len = Matcher::vector_length(this); 13872 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13873 %} 13874 ins_pipe( pipe_slow ); 13875 %} 13876 13877 // ============================================================================ 13878 // Safepoint Instruction 13879 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13880 match(SafePoint poll); 13881 effect(KILL cr, USE poll); 13882 13883 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13884 ins_cost(125); 13885 // EBP would need size(3) 13886 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13887 ins_encode %{ 13888 __ relocate(relocInfo::poll_type); 13889 address pre_pc = __ pc(); 13890 __ testl(rax, Address($poll$$Register, 0)); 13891 address post_pc = __ pc(); 13892 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13893 %} 13894 ins_pipe(ialu_reg_mem); 13895 %} 13896 13897 13898 // ============================================================================ 13899 // This name is KNOWN by the ADLC and cannot be changed. 13900 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13901 // for this guy. 13902 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13903 match(Set dst (ThreadLocal)); 13904 effect(DEF dst, KILL cr); 13905 13906 format %{ "MOV $dst, Thread::current()" %} 13907 ins_encode %{ 13908 Register dstReg = as_Register($dst$$reg); 13909 __ get_thread(dstReg); 13910 %} 13911 ins_pipe( ialu_reg_fat ); 13912 %} 13913 13914 13915 13916 //----------PEEPHOLE RULES----------------------------------------------------- 13917 // These must follow all instruction definitions as they use the names 13918 // defined in the instructions definitions. 13919 // 13920 // peepmatch ( root_instr_name [preceding_instruction]* ); 13921 // 13922 // peepconstraint %{ 13923 // (instruction_number.operand_name relational_op instruction_number.operand_name 13924 // [, ...] ); 13925 // // instruction numbers are zero-based using left to right order in peepmatch 13926 // 13927 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13928 // // provide an instruction_number.operand_name for each operand that appears 13929 // // in the replacement instruction's match rule 13930 // 13931 // ---------VM FLAGS--------------------------------------------------------- 13932 // 13933 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13934 // 13935 // Each peephole rule is given an identifying number starting with zero and 13936 // increasing by one in the order seen by the parser. An individual peephole 13937 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13938 // on the command-line. 13939 // 13940 // ---------CURRENT LIMITATIONS---------------------------------------------- 13941 // 13942 // Only match adjacent instructions in same basic block 13943 // Only equality constraints 13944 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13945 // Only one replacement instruction 13946 // 13947 // ---------EXAMPLE---------------------------------------------------------- 13948 // 13949 // // pertinent parts of existing instructions in architecture description 13950 // instruct movI(rRegI dst, rRegI src) %{ 13951 // match(Set dst (CopyI src)); 13952 // %} 13953 // 13954 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13955 // match(Set dst (AddI dst src)); 13956 // effect(KILL cr); 13957 // %} 13958 // 13959 // // Change (inc mov) to lea 13960 // peephole %{ 13961 // // increment preceded by register-register move 13962 // peepmatch ( incI_eReg movI ); 13963 // // require that the destination register of the increment 13964 // // match the destination register of the move 13965 // peepconstraint ( 0.dst == 1.dst ); 13966 // // construct a replacement instruction that sets 13967 // // the destination to ( move's source register + one ) 13968 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13969 // %} 13970 // 13971 // Implementation no longer uses movX instructions since 13972 // machine-independent system no longer uses CopyX nodes. 13973 // 13974 // peephole %{ 13975 // peepmatch ( incI_eReg movI ); 13976 // peepconstraint ( 0.dst == 1.dst ); 13977 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13978 // %} 13979 // 13980 // peephole %{ 13981 // peepmatch ( decI_eReg movI ); 13982 // peepconstraint ( 0.dst == 1.dst ); 13983 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13984 // %} 13985 // 13986 // peephole %{ 13987 // peepmatch ( addI_eReg_imm movI ); 13988 // peepconstraint ( 0.dst == 1.dst ); 13989 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13990 // %} 13991 // 13992 // peephole %{ 13993 // peepmatch ( addP_eReg_imm movP ); 13994 // peepconstraint ( 0.dst == 1.dst ); 13995 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13996 // %} 13997 13998 // // Change load of spilled value to only a spill 13999 // instruct storeI(memory mem, rRegI src) %{ 14000 // match(Set mem (StoreI mem src)); 14001 // %} 14002 // 14003 // instruct loadI(rRegI dst, memory mem) %{ 14004 // match(Set dst (LoadI mem)); 14005 // %} 14006 // 14007 peephole %{ 14008 peepmatch ( loadI storeI ); 14009 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 14010 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 14011 %} 14012 14013 //----------SMARTSPILL RULES--------------------------------------------------- 14014 // These must follow all instruction definitions as they use the names 14015 // defined in the instructions definitions.