1 // 2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ _masm. 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 cbuf.insts()->emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 cbuf.insts()->emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(CodeBuffer &cbuf, int code) { 353 cbuf.insts()->emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 358 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 359 emit_opcode(cbuf, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(CodeBuffer &cbuf, int d8) { 364 cbuf.insts()->emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(CodeBuffer &cbuf, int d16) { 369 cbuf.insts()->emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(CodeBuffer &cbuf, int d32) { 374 cbuf.insts()->emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 379 int format) { 380 cbuf.relocate(cbuf.insts_mark(), reloc, format); 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 cbuf.relocate(cbuf.insts_mark(), rspec, format); 393 cbuf.insts()->emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 398 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (cbuf, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(cbuf, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(cbuf, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(cbuf, 0x1, reg_encoding, base); 423 emit_d8(cbuf, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (cbuf, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(cbuf, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (cbuf, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 450 emit_rm(cbuf, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 456 emit_rm(cbuf, scale, index, base); 457 emit_d8(cbuf, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 462 emit_rm(cbuf, scale, index, 0x04); 463 } else { 464 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 465 emit_rm(cbuf, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (cbuf, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( cbuf, 0x8B ); 483 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler& _masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 C2_MacroAssembler _masm(&cbuf); 613 614 int framesize = C->output()->frame_size_in_bytes(); 615 int bangsize = C->output()->bang_size_in_bytes(); 616 617 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 618 619 C->output()->set_frame_complete(cbuf.insts_size()); 620 621 if (C->has_mach_constant_base_node()) { 622 // NOTE: We set the table base offset here because users might be 623 // emitted before MachConstantBaseNode. 624 ConstantTable& constant_table = C->output()->constant_table(); 625 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 626 } 627 } 628 629 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 630 return MachNode::size(ra_); // too many variables; just compute it the hard way 631 } 632 633 int MachPrologNode::reloc() const { 634 return 0; // a large enough number 635 } 636 637 //============================================================================= 638 #ifndef PRODUCT 639 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 640 Compile *C = ra_->C; 641 int framesize = C->output()->frame_size_in_bytes(); 642 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 643 // Remove two words for return addr and rbp, 644 framesize -= 2*wordSize; 645 646 if (C->max_vector_size() > 16) { 647 st->print("VZEROUPPER"); 648 st->cr(); st->print("\t"); 649 } 650 if (C->in_24_bit_fp_mode()) { 651 st->print("FLDCW standard control word"); 652 st->cr(); st->print("\t"); 653 } 654 if (framesize) { 655 st->print("ADD ESP,%d\t# Destroy frame",framesize); 656 st->cr(); st->print("\t"); 657 } 658 st->print_cr("POPL EBP"); st->print("\t"); 659 if (do_polling() && C->is_method_compilation()) { 660 st->print("CMPL rsp, poll_offset[thread] \n\t" 661 "JA #safepoint_stub\t" 662 "# Safepoint: poll for GC"); 663 } 664 } 665 #endif 666 667 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 668 Compile *C = ra_->C; 669 MacroAssembler _masm(&cbuf); 670 671 if (C->max_vector_size() > 16) { 672 // Clear upper bits of YMM registers when current compiled code uses 673 // wide vectors to avoid AVX <-> SSE transition penalty during call. 674 _masm.vzeroupper(); 675 } 676 // If method set FPU control word, restore to standard control word 677 if (C->in_24_bit_fp_mode()) { 678 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 679 } 680 681 int framesize = C->output()->frame_size_in_bytes(); 682 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 683 // Remove two words for return addr and rbp, 684 framesize -= 2*wordSize; 685 686 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 687 688 if (framesize >= 128) { 689 emit_opcode(cbuf, 0x81); // add SP, #framesize 690 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 691 emit_d32(cbuf, framesize); 692 } else if (framesize) { 693 emit_opcode(cbuf, 0x83); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d8(cbuf, framesize); 696 } 697 698 emit_opcode(cbuf, 0x58 | EBP_enc); 699 700 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 701 __ reserved_stack_check(); 702 } 703 704 if (do_polling() && C->is_method_compilation()) { 705 Register thread = as_Register(EBX_enc); 706 MacroAssembler masm(&cbuf); 707 __ get_thread(thread); 708 Label dummy_label; 709 Label* code_stub = &dummy_label; 710 if (!C->output()->in_scratch_emit_size()) { 711 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 712 C->output()->add_stub(stub); 713 code_stub = &stub->entry(); 714 } 715 __ relocate(relocInfo::poll_return_type); 716 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 717 } 718 } 719 720 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 721 return MachNode::size(ra_); // too many variables; just compute it 722 // the hard way 723 } 724 725 int MachEpilogNode::reloc() const { 726 return 0; // a large enough number 727 } 728 729 const Pipeline * MachEpilogNode::pipeline() const { 730 return MachNode::pipeline_class(); 731 } 732 733 //============================================================================= 734 735 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 736 static enum RC rc_class( OptoReg::Name reg ) { 737 738 if( !OptoReg::is_valid(reg) ) return rc_bad; 739 if (OptoReg::is_stack(reg)) return rc_stack; 740 741 VMReg r = OptoReg::as_VMReg(reg); 742 if (r->is_Register()) return rc_int; 743 if (r->is_FloatRegister()) { 744 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 745 return rc_float; 746 } 747 if (r->is_KRegister()) return rc_kreg; 748 assert(r->is_XMMRegister(), "must be"); 749 return rc_xmm; 750 } 751 752 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 753 int opcode, const char *op_str, int size, outputStream* st ) { 754 if( cbuf ) { 755 emit_opcode (*cbuf, opcode ); 756 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 757 #ifndef PRODUCT 758 } else if( !do_size ) { 759 if( size != 0 ) st->print("\n\t"); 760 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 761 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 762 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 763 } else { // FLD, FST, PUSH, POP 764 st->print("%s [ESP + #%d]",op_str,offset); 765 } 766 #endif 767 } 768 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 769 return size+3+offset_size; 770 } 771 772 // Helper for XMM registers. Extra opcode bits, limited syntax. 773 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 774 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 775 int in_size_in_bits = Assembler::EVEX_32bit; 776 int evex_encoding = 0; 777 if (reg_lo+1 == reg_hi) { 778 in_size_in_bits = Assembler::EVEX_64bit; 779 evex_encoding = Assembler::VEX_W; 780 } 781 if (cbuf) { 782 MacroAssembler _masm(cbuf); 783 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 784 // it maps more cases to single byte displacement 785 _masm.set_managed(); 786 if (reg_lo+1 == reg_hi) { // double move? 787 if (is_load) { 788 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 789 } else { 790 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 791 } 792 } else { 793 if (is_load) { 794 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } 799 #ifndef PRODUCT 800 } else if (!do_size) { 801 if (size != 0) st->print("\n\t"); 802 if (reg_lo+1 == reg_hi) { // double move? 803 if (is_load) st->print("%s %s,[ESP + #%d]", 804 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 805 Matcher::regName[reg_lo], offset); 806 else st->print("MOVSD [ESP + #%d],%s", 807 offset, Matcher::regName[reg_lo]); 808 } else { 809 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 810 Matcher::regName[reg_lo], offset); 811 else st->print("MOVSS [ESP + #%d],%s", 812 offset, Matcher::regName[reg_lo]); 813 } 814 #endif 815 } 816 bool is_single_byte = false; 817 if ((UseAVX > 2) && (offset != 0)) { 818 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 819 } 820 int offset_size = 0; 821 if (UseAVX > 2 ) { 822 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 823 } else { 824 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 825 } 826 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 827 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 828 return size+5+offset_size; 829 } 830 831 832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 833 int src_hi, int dst_hi, int size, outputStream* st ) { 834 if (cbuf) { 835 MacroAssembler _masm(cbuf); 836 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 837 _masm.set_managed(); 838 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 839 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 840 as_XMMRegister(Matcher::_regEncode[src_lo])); 841 } else { 842 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 843 as_XMMRegister(Matcher::_regEncode[src_lo])); 844 } 845 #ifndef PRODUCT 846 } else if (!do_size) { 847 if (size != 0) st->print("\n\t"); 848 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 849 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 850 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 851 } else { 852 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 853 } 854 } else { 855 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 856 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } 861 #endif 862 } 863 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 864 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 865 int sz = (UseAVX > 2) ? 6 : 4; 866 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 867 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 868 return size + sz; 869 } 870 871 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 872 int src_hi, int dst_hi, int size, outputStream* st ) { 873 // 32-bit 874 if (cbuf) { 875 MacroAssembler _masm(cbuf); 876 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 877 _masm.set_managed(); 878 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 879 as_Register(Matcher::_regEncode[src_lo])); 880 #ifndef PRODUCT 881 } else if (!do_size) { 882 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 883 #endif 884 } 885 return (UseAVX> 2) ? 6 : 4; 886 } 887 888 889 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 890 int src_hi, int dst_hi, int size, outputStream* st ) { 891 // 32-bit 892 if (cbuf) { 893 MacroAssembler _masm(cbuf); 894 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 895 _masm.set_managed(); 896 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 897 as_XMMRegister(Matcher::_regEncode[src_lo])); 898 #ifndef PRODUCT 899 } else if (!do_size) { 900 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 901 #endif 902 } 903 return (UseAVX> 2) ? 6 : 4; 904 } 905 906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 907 if( cbuf ) { 908 emit_opcode(*cbuf, 0x8B ); 909 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 910 #ifndef PRODUCT 911 } else if( !do_size ) { 912 if( size != 0 ) st->print("\n\t"); 913 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 914 #endif 915 } 916 return size+2; 917 } 918 919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 920 int offset, int size, outputStream* st ) { 921 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 922 if( cbuf ) { 923 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 924 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 925 #ifndef PRODUCT 926 } else if( !do_size ) { 927 if( size != 0 ) st->print("\n\t"); 928 st->print("FLD %s",Matcher::regName[src_lo]); 929 #endif 930 } 931 size += 2; 932 } 933 934 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 935 const char *op_str; 936 int op; 937 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 938 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 939 op = 0xDD; 940 } else { // 32-bit store 941 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 942 op = 0xD9; 943 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 944 } 945 946 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 947 } 948 949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 950 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 951 int src_hi, int dst_hi, uint ireg, outputStream* st); 952 953 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 954 int stack_offset, int reg, uint ireg, outputStream* st); 955 956 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 957 int dst_offset, uint ireg, outputStream* st) { 958 if (cbuf) { 959 MacroAssembler _masm(cbuf); 960 switch (ireg) { 961 case Op_VecS: 962 __ pushl(Address(rsp, src_offset)); 963 __ popl (Address(rsp, dst_offset)); 964 break; 965 case Op_VecD: 966 __ pushl(Address(rsp, src_offset)); 967 __ popl (Address(rsp, dst_offset)); 968 __ pushl(Address(rsp, src_offset+4)); 969 __ popl (Address(rsp, dst_offset+4)); 970 break; 971 case Op_VecX: 972 __ movdqu(Address(rsp, -16), xmm0); 973 __ movdqu(xmm0, Address(rsp, src_offset)); 974 __ movdqu(Address(rsp, dst_offset), xmm0); 975 __ movdqu(xmm0, Address(rsp, -16)); 976 break; 977 case Op_VecY: 978 __ vmovdqu(Address(rsp, -32), xmm0); 979 __ vmovdqu(xmm0, Address(rsp, src_offset)); 980 __ vmovdqu(Address(rsp, dst_offset), xmm0); 981 __ vmovdqu(xmm0, Address(rsp, -32)); 982 break; 983 case Op_VecZ: 984 __ evmovdquq(Address(rsp, -64), xmm0, 2); 985 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 986 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 987 __ evmovdquq(xmm0, Address(rsp, -64), 2); 988 break; 989 default: 990 ShouldNotReachHere(); 991 } 992 #ifndef PRODUCT 993 } else { 994 switch (ireg) { 995 case Op_VecS: 996 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 997 "popl [rsp + #%d]", 998 src_offset, dst_offset); 999 break; 1000 case Op_VecD: 1001 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1002 "popq [rsp + #%d]\n\t" 1003 "pushl [rsp + #%d]\n\t" 1004 "popq [rsp + #%d]", 1005 src_offset, dst_offset, src_offset+4, dst_offset+4); 1006 break; 1007 case Op_VecX: 1008 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1009 "movdqu xmm0, [rsp + #%d]\n\t" 1010 "movdqu [rsp + #%d], xmm0\n\t" 1011 "movdqu xmm0, [rsp - #16]", 1012 src_offset, dst_offset); 1013 break; 1014 case Op_VecY: 1015 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1016 "vmovdqu xmm0, [rsp + #%d]\n\t" 1017 "vmovdqu [rsp + #%d], xmm0\n\t" 1018 "vmovdqu xmm0, [rsp - #32]", 1019 src_offset, dst_offset); 1020 break; 1021 case Op_VecZ: 1022 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1023 "vmovdqu xmm0, [rsp + #%d]\n\t" 1024 "vmovdqu [rsp + #%d], xmm0\n\t" 1025 "vmovdqu xmm0, [rsp - #64]", 1026 src_offset, dst_offset); 1027 break; 1028 default: 1029 ShouldNotReachHere(); 1030 } 1031 #endif 1032 } 1033 } 1034 1035 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1036 // Get registers to move 1037 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1038 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1039 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1040 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1041 1042 enum RC src_second_rc = rc_class(src_second); 1043 enum RC src_first_rc = rc_class(src_first); 1044 enum RC dst_second_rc = rc_class(dst_second); 1045 enum RC dst_first_rc = rc_class(dst_first); 1046 1047 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1048 1049 // Generate spill code! 1050 int size = 0; 1051 1052 if( src_first == dst_first && src_second == dst_second ) 1053 return size; // Self copy, no move 1054 1055 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) { 1056 uint ireg = ideal_reg(); 1057 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1058 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1059 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1060 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1061 // mem -> mem 1062 int src_offset = ra_->reg2offset(src_first); 1063 int dst_offset = ra_->reg2offset(dst_first); 1064 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1065 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1066 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1067 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1068 int stack_offset = ra_->reg2offset(dst_first); 1069 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1070 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1071 int stack_offset = ra_->reg2offset(src_first); 1072 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1073 } else { 1074 ShouldNotReachHere(); 1075 } 1076 return 0; 1077 } 1078 1079 // -------------------------------------- 1080 // Check for mem-mem move. push/pop to move. 1081 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1082 if( src_second == dst_first ) { // overlapping stack copy ranges 1083 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1084 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1085 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1086 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1087 } 1088 // move low bits 1089 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1090 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1091 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1092 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1093 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1094 } 1095 return size; 1096 } 1097 1098 // -------------------------------------- 1099 // Check for integer reg-reg copy 1100 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1101 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1102 1103 // Check for integer store 1104 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1105 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1106 1107 // Check for integer load 1108 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1109 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1110 1111 // Check for integer reg-xmm reg copy 1112 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1113 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1114 "no 64 bit integer-float reg moves" ); 1115 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1116 } 1117 // -------------------------------------- 1118 // Check for float reg-reg copy 1119 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1120 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1121 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1122 if( cbuf ) { 1123 1124 // Note the mucking with the register encode to compensate for the 0/1 1125 // indexing issue mentioned in a comment in the reg_def sections 1126 // for FPR registers many lines above here. 1127 1128 if( src_first != FPR1L_num ) { 1129 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1130 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1131 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1132 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1133 } else { 1134 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1135 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1136 } 1137 #ifndef PRODUCT 1138 } else if( !do_size ) { 1139 if( size != 0 ) st->print("\n\t"); 1140 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1141 else st->print( "FST %s", Matcher::regName[dst_first]); 1142 #endif 1143 } 1144 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1145 } 1146 1147 // Check for float store 1148 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1149 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1150 } 1151 1152 // Check for float load 1153 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1154 int offset = ra_->reg2offset(src_first); 1155 const char *op_str; 1156 int op; 1157 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1158 op_str = "FLD_D"; 1159 op = 0xDD; 1160 } else { // 32-bit load 1161 op_str = "FLD_S"; 1162 op = 0xD9; 1163 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1164 } 1165 if( cbuf ) { 1166 emit_opcode (*cbuf, op ); 1167 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1168 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1169 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1170 #ifndef PRODUCT 1171 } else if( !do_size ) { 1172 if( size != 0 ) st->print("\n\t"); 1173 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1174 #endif 1175 } 1176 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1177 return size + 3+offset_size+2; 1178 } 1179 1180 // Check for xmm reg-reg copy 1181 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1182 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1183 (src_first+1 == src_second && dst_first+1 == dst_second), 1184 "no non-adjacent float-moves" ); 1185 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1186 } 1187 1188 // Check for xmm reg-integer reg copy 1189 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1190 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1191 "no 64 bit float-integer reg moves" ); 1192 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1193 } 1194 1195 // Check for xmm store 1196 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1197 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1198 } 1199 1200 // Check for float xmm load 1201 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1202 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1203 } 1204 1205 // Copy from float reg to xmm reg 1206 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1207 // copy to the top of stack from floating point reg 1208 // and use LEA to preserve flags 1209 if( cbuf ) { 1210 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1211 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1212 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1213 emit_d8(*cbuf,0xF8); 1214 #ifndef PRODUCT 1215 } else if( !do_size ) { 1216 if( size != 0 ) st->print("\n\t"); 1217 st->print("LEA ESP,[ESP-8]"); 1218 #endif 1219 } 1220 size += 4; 1221 1222 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1223 1224 // Copy from the temp memory to the xmm reg. 1225 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1226 1227 if( cbuf ) { 1228 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1229 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1230 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1231 emit_d8(*cbuf,0x08); 1232 #ifndef PRODUCT 1233 } else if( !do_size ) { 1234 if( size != 0 ) st->print("\n\t"); 1235 st->print("LEA ESP,[ESP+8]"); 1236 #endif 1237 } 1238 size += 4; 1239 return size; 1240 } 1241 1242 // AVX-512 opmask specific spilling. 1243 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1244 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1245 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1246 int offset = ra_->reg2offset(src_first); 1247 if (cbuf != nullptr) { 1248 MacroAssembler _masm(cbuf); 1249 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1250 #ifndef PRODUCT 1251 } else { 1252 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); 1253 #endif 1254 } 1255 return 0; 1256 } 1257 1258 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1259 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1260 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1261 int offset = ra_->reg2offset(dst_first); 1262 if (cbuf != nullptr) { 1263 MacroAssembler _masm(cbuf); 1264 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1265 #ifndef PRODUCT 1266 } else { 1267 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); 1268 #endif 1269 } 1270 return 0; 1271 } 1272 1273 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1274 Unimplemented(); 1275 return 0; 1276 } 1277 1278 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1279 Unimplemented(); 1280 return 0; 1281 } 1282 1283 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1284 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1285 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1286 if (cbuf != nullptr) { 1287 MacroAssembler _masm(cbuf); 1288 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1289 #ifndef PRODUCT 1290 } else { 1291 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); 1292 #endif 1293 } 1294 return 0; 1295 } 1296 1297 assert( size > 0, "missed a case" ); 1298 1299 // -------------------------------------------------------------------- 1300 // Check for second bits still needing moving. 1301 if( src_second == dst_second ) 1302 return size; // Self copy; no move 1303 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1304 1305 // Check for second word int-int move 1306 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1307 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1308 1309 // Check for second word integer store 1310 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1311 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1312 1313 // Check for second word integer load 1314 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1315 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1316 1317 Unimplemented(); 1318 return 0; // Mute compiler 1319 } 1320 1321 #ifndef PRODUCT 1322 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1323 implementation( NULL, ra_, false, st ); 1324 } 1325 #endif 1326 1327 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1328 implementation( &cbuf, ra_, false, NULL ); 1329 } 1330 1331 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1332 return MachNode::size(ra_); 1333 } 1334 1335 1336 //============================================================================= 1337 #ifndef PRODUCT 1338 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1339 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1340 int reg = ra_->get_reg_first(this); 1341 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1342 } 1343 #endif 1344 1345 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1346 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1347 int reg = ra_->get_encode(this); 1348 if( offset >= 128 ) { 1349 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1350 emit_rm(cbuf, 0x2, reg, 0x04); 1351 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1352 emit_d32(cbuf, offset); 1353 } 1354 else { 1355 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1356 emit_rm(cbuf, 0x1, reg, 0x04); 1357 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1358 emit_d8(cbuf, offset); 1359 } 1360 } 1361 1362 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1363 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1364 if( offset >= 128 ) { 1365 return 7; 1366 } 1367 else { 1368 return 4; 1369 } 1370 } 1371 1372 //============================================================================= 1373 #ifndef PRODUCT 1374 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1375 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1376 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1377 st->print_cr("\tNOP"); 1378 st->print_cr("\tNOP"); 1379 if( !OptoBreakpoint ) 1380 st->print_cr("\tNOP"); 1381 } 1382 #endif 1383 1384 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1385 MacroAssembler masm(&cbuf); 1386 #ifdef ASSERT 1387 uint insts_size = cbuf.insts_size(); 1388 #endif 1389 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1390 masm.jump_cc(Assembler::notEqual, 1391 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1392 /* WARNING these NOPs are critical so that verified entry point is properly 1393 aligned for patching by NativeJump::patch_verified_entry() */ 1394 int nops_cnt = 2; 1395 if( !OptoBreakpoint ) // Leave space for int3 1396 nops_cnt += 1; 1397 masm.nop(nops_cnt); 1398 1399 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1400 } 1401 1402 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1403 return OptoBreakpoint ? 11 : 12; 1404 } 1405 1406 1407 //============================================================================= 1408 1409 // Vector calling convention not supported. 1410 const bool Matcher::supports_vector_calling_convention() { 1411 return false; 1412 } 1413 1414 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1415 Unimplemented(); 1416 return OptoRegPair(0, 0); 1417 } 1418 1419 // Is this branch offset short enough that a short branch can be used? 1420 // 1421 // NOTE: If the platform does not provide any short branch variants, then 1422 // this method should return false for offset 0. 1423 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1424 // The passed offset is relative to address of the branch. 1425 // On 86 a branch displacement is calculated relative to address 1426 // of a next instruction. 1427 offset -= br_size; 1428 1429 // the short version of jmpConUCF2 contains multiple branches, 1430 // making the reach slightly less 1431 if (rule == jmpConUCF2_rule) 1432 return (-126 <= offset && offset <= 125); 1433 return (-128 <= offset && offset <= 127); 1434 } 1435 1436 // Return whether or not this register is ever used as an argument. This 1437 // function is used on startup to build the trampoline stubs in generateOptoStub. 1438 // Registers not mentioned will be killed by the VM call in the trampoline, and 1439 // arguments in those registers not be available to the callee. 1440 bool Matcher::can_be_java_arg( int reg ) { 1441 if( reg == ECX_num || reg == EDX_num ) return true; 1442 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1443 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1444 return false; 1445 } 1446 1447 bool Matcher::is_spillable_arg( int reg ) { 1448 return can_be_java_arg(reg); 1449 } 1450 1451 uint Matcher::int_pressure_limit() 1452 { 1453 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1454 } 1455 1456 uint Matcher::float_pressure_limit() 1457 { 1458 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1459 } 1460 1461 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1462 // Use hardware integer DIV instruction when 1463 // it is faster than a code which use multiply. 1464 // Only when constant divisor fits into 32 bit 1465 // (min_jint is excluded to get only correct 1466 // positive 32 bit values from negative). 1467 return VM_Version::has_fast_idiv() && 1468 (divisor == (int)divisor && divisor != min_jint); 1469 } 1470 1471 // Register for DIVI projection of divmodI 1472 RegMask Matcher::divI_proj_mask() { 1473 return EAX_REG_mask(); 1474 } 1475 1476 // Register for MODI projection of divmodI 1477 RegMask Matcher::modI_proj_mask() { 1478 return EDX_REG_mask(); 1479 } 1480 1481 // Register for DIVL projection of divmodL 1482 RegMask Matcher::divL_proj_mask() { 1483 ShouldNotReachHere(); 1484 return RegMask(); 1485 } 1486 1487 // Register for MODL projection of divmodL 1488 RegMask Matcher::modL_proj_mask() { 1489 ShouldNotReachHere(); 1490 return RegMask(); 1491 } 1492 1493 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1494 return NO_REG_mask(); 1495 } 1496 1497 // Returns true if the high 32 bits of the value is known to be zero. 1498 bool is_operand_hi32_zero(Node* n) { 1499 int opc = n->Opcode(); 1500 if (opc == Op_AndL) { 1501 Node* o2 = n->in(2); 1502 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1503 return true; 1504 } 1505 } 1506 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1507 return true; 1508 } 1509 return false; 1510 } 1511 1512 %} 1513 1514 //----------ENCODING BLOCK----------------------------------------------------- 1515 // This block specifies the encoding classes used by the compiler to output 1516 // byte streams. Encoding classes generate functions which are called by 1517 // Machine Instruction Nodes in order to generate the bit encoding of the 1518 // instruction. Operands specify their base encoding interface with the 1519 // interface keyword. There are currently supported four interfaces, 1520 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1521 // operand to generate a function which returns its register number when 1522 // queried. CONST_INTER causes an operand to generate a function which 1523 // returns the value of the constant when queried. MEMORY_INTER causes an 1524 // operand to generate four functions which return the Base Register, the 1525 // Index Register, the Scale Value, and the Offset Value of the operand when 1526 // queried. COND_INTER causes an operand to generate six functions which 1527 // return the encoding code (ie - encoding bits for the instruction) 1528 // associated with each basic boolean condition for a conditional instruction. 1529 // Instructions specify two basic values for encoding. They use the 1530 // ins_encode keyword to specify their encoding class (which must be one of 1531 // the class names specified in the encoding block), and they use the 1532 // opcode keyword to specify, in order, their primary, secondary, and 1533 // tertiary opcode. Only the opcode sections which a particular instruction 1534 // needs for encoding need to be specified. 1535 encode %{ 1536 // Build emit functions for each basic byte or larger field in the intel 1537 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1538 // code in the enc_class source block. Emit functions will live in the 1539 // main source block for now. In future, we can generalize this by 1540 // adding a syntax that specifies the sizes of fields in an order, 1541 // so that the adlc can build the emit functions automagically 1542 1543 // Emit primary opcode 1544 enc_class OpcP %{ 1545 emit_opcode(cbuf, $primary); 1546 %} 1547 1548 // Emit secondary opcode 1549 enc_class OpcS %{ 1550 emit_opcode(cbuf, $secondary); 1551 %} 1552 1553 // Emit opcode directly 1554 enc_class Opcode(immI d8) %{ 1555 emit_opcode(cbuf, $d8$$constant); 1556 %} 1557 1558 enc_class SizePrefix %{ 1559 emit_opcode(cbuf,0x66); 1560 %} 1561 1562 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1563 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1564 %} 1565 1566 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1567 emit_opcode(cbuf,$opcode$$constant); 1568 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1569 %} 1570 1571 enc_class mov_r32_imm0( rRegI dst ) %{ 1572 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1573 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1574 %} 1575 1576 enc_class cdq_enc %{ 1577 // Full implementation of Java idiv and irem; checks for 1578 // special case as described in JVM spec., p.243 & p.271. 1579 // 1580 // normal case special case 1581 // 1582 // input : rax,: dividend min_int 1583 // reg: divisor -1 1584 // 1585 // output: rax,: quotient (= rax, idiv reg) min_int 1586 // rdx: remainder (= rax, irem reg) 0 1587 // 1588 // Code sequnce: 1589 // 1590 // 81 F8 00 00 00 80 cmp rax,80000000h 1591 // 0F 85 0B 00 00 00 jne normal_case 1592 // 33 D2 xor rdx,edx 1593 // 83 F9 FF cmp rcx,0FFh 1594 // 0F 84 03 00 00 00 je done 1595 // normal_case: 1596 // 99 cdq 1597 // F7 F9 idiv rax,ecx 1598 // done: 1599 // 1600 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1601 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1602 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1603 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1604 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1605 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1606 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1607 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1608 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1609 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1610 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1611 // normal_case: 1612 emit_opcode(cbuf,0x99); // cdq 1613 // idiv (note: must be emitted by the user of this rule) 1614 // normal: 1615 %} 1616 1617 // Dense encoding for older common ops 1618 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1619 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1620 %} 1621 1622 1623 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1624 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1625 // Check for 8-bit immediate, and set sign extend bit in opcode 1626 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1627 emit_opcode(cbuf, $primary | 0x02); 1628 } 1629 else { // If 32-bit immediate 1630 emit_opcode(cbuf, $primary); 1631 } 1632 %} 1633 1634 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1635 // Emit primary opcode and set sign-extend bit 1636 // Check for 8-bit immediate, and set sign extend bit in opcode 1637 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1638 emit_opcode(cbuf, $primary | 0x02); } 1639 else { // If 32-bit immediate 1640 emit_opcode(cbuf, $primary); 1641 } 1642 // Emit r/m byte with secondary opcode, after primary opcode. 1643 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1644 %} 1645 1646 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1647 // Check for 8-bit immediate, and set sign extend bit in opcode 1648 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1649 $$$emit8$imm$$constant; 1650 } 1651 else { // If 32-bit immediate 1652 // Output immediate 1653 $$$emit32$imm$$constant; 1654 } 1655 %} 1656 1657 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1658 // Emit primary opcode and set sign-extend bit 1659 // Check for 8-bit immediate, and set sign extend bit in opcode 1660 int con = (int)$imm$$constant; // Throw away top bits 1661 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1662 // Emit r/m byte with secondary opcode, after primary opcode. 1663 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1664 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1665 else emit_d32(cbuf,con); 1666 %} 1667 1668 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1669 // Emit primary opcode and set sign-extend bit 1670 // Check for 8-bit immediate, and set sign extend bit in opcode 1671 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1672 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1673 // Emit r/m byte with tertiary opcode, after primary opcode. 1674 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1675 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1676 else emit_d32(cbuf,con); 1677 %} 1678 1679 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1680 emit_cc(cbuf, $secondary, $dst$$reg ); 1681 %} 1682 1683 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1684 int destlo = $dst$$reg; 1685 int desthi = HIGH_FROM_LOW_ENC(destlo); 1686 // bswap lo 1687 emit_opcode(cbuf, 0x0F); 1688 emit_cc(cbuf, 0xC8, destlo); 1689 // bswap hi 1690 emit_opcode(cbuf, 0x0F); 1691 emit_cc(cbuf, 0xC8, desthi); 1692 // xchg lo and hi 1693 emit_opcode(cbuf, 0x87); 1694 emit_rm(cbuf, 0x3, destlo, desthi); 1695 %} 1696 1697 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1698 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1699 %} 1700 1701 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1702 $$$emit8$primary; 1703 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1704 %} 1705 1706 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1707 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1708 emit_d8(cbuf, op >> 8 ); 1709 emit_d8(cbuf, op & 255); 1710 %} 1711 1712 // emulate a CMOV with a conditional branch around a MOV 1713 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1714 // Invert sense of branch from sense of CMOV 1715 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1716 emit_d8( cbuf, $brOffs$$constant ); 1717 %} 1718 1719 enc_class enc_PartialSubtypeCheck( ) %{ 1720 Register Redi = as_Register(EDI_enc); // result register 1721 Register Reax = as_Register(EAX_enc); // super class 1722 Register Recx = as_Register(ECX_enc); // killed 1723 Register Resi = as_Register(ESI_enc); // sub class 1724 Label miss; 1725 1726 MacroAssembler _masm(&cbuf); 1727 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1728 NULL, &miss, 1729 /*set_cond_codes:*/ true); 1730 if ($primary) { 1731 __ xorptr(Redi, Redi); 1732 } 1733 __ bind(miss); 1734 %} 1735 1736 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1737 MacroAssembler masm(&cbuf); 1738 int start = masm.offset(); 1739 if (UseSSE >= 2) { 1740 if (VerifyFPU) { 1741 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1742 } 1743 } else { 1744 // External c_calling_convention expects the FPU stack to be 'clean'. 1745 // Compiled code leaves it dirty. Do cleanup now. 1746 masm.empty_FPU_stack(); 1747 } 1748 if (sizeof_FFree_Float_Stack_All == -1) { 1749 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1750 } else { 1751 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1752 } 1753 %} 1754 1755 enc_class Verify_FPU_For_Leaf %{ 1756 if( VerifyFPU ) { 1757 MacroAssembler masm(&cbuf); 1758 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1759 } 1760 %} 1761 1762 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1763 // This is the instruction starting address for relocation info. 1764 MacroAssembler _masm(&cbuf); 1765 cbuf.set_insts_mark(); 1766 $$$emit8$primary; 1767 // CALL directly to the runtime 1768 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1769 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1770 __ post_call_nop(); 1771 1772 if (UseSSE >= 2) { 1773 MacroAssembler _masm(&cbuf); 1774 BasicType rt = tf()->return_type(); 1775 1776 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1777 // A C runtime call where the return value is unused. In SSE2+ 1778 // mode the result needs to be removed from the FPU stack. It's 1779 // likely that this function call could be removed by the 1780 // optimizer if the C function is a pure function. 1781 __ ffree(0); 1782 } else if (rt == T_FLOAT) { 1783 __ lea(rsp, Address(rsp, -4)); 1784 __ fstp_s(Address(rsp, 0)); 1785 __ movflt(xmm0, Address(rsp, 0)); 1786 __ lea(rsp, Address(rsp, 4)); 1787 } else if (rt == T_DOUBLE) { 1788 __ lea(rsp, Address(rsp, -8)); 1789 __ fstp_d(Address(rsp, 0)); 1790 __ movdbl(xmm0, Address(rsp, 0)); 1791 __ lea(rsp, Address(rsp, 8)); 1792 } 1793 } 1794 %} 1795 1796 enc_class pre_call_resets %{ 1797 // If method sets FPU control word restore it here 1798 debug_only(int off0 = cbuf.insts_size()); 1799 if (ra_->C->in_24_bit_fp_mode()) { 1800 MacroAssembler _masm(&cbuf); 1801 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1802 } 1803 // Clear upper bits of YMM registers when current compiled code uses 1804 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1805 MacroAssembler _masm(&cbuf); 1806 __ vzeroupper(); 1807 debug_only(int off1 = cbuf.insts_size()); 1808 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1809 %} 1810 1811 enc_class post_call_FPU %{ 1812 // If method sets FPU control word do it here also 1813 if (Compile::current()->in_24_bit_fp_mode()) { 1814 MacroAssembler masm(&cbuf); 1815 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1816 } 1817 %} 1818 1819 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1820 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1821 // who we intended to call. 1822 MacroAssembler _masm(&cbuf); 1823 cbuf.set_insts_mark(); 1824 $$$emit8$primary; 1825 1826 if (!_method) { 1827 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1828 runtime_call_Relocation::spec(), 1829 RELOC_IMM32); 1830 __ post_call_nop(); 1831 } else { 1832 int method_index = resolved_method_index(cbuf); 1833 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1834 : static_call_Relocation::spec(method_index); 1835 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1836 rspec, RELOC_DISP32); 1837 __ post_call_nop(); 1838 address mark = cbuf.insts_mark(); 1839 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1840 // Calls of the same statically bound method can share 1841 // a stub to the interpreter. 1842 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off()); 1843 } else { 1844 // Emit stubs for static call. 1845 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark); 1846 if (stub == NULL) { 1847 ciEnv::current()->record_failure("CodeCache is full"); 1848 return; 1849 } 1850 } 1851 } 1852 %} 1853 1854 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1855 MacroAssembler _masm(&cbuf); 1856 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1857 __ post_call_nop(); 1858 %} 1859 1860 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1861 int disp = in_bytes(Method::from_compiled_offset()); 1862 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1863 1864 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1865 MacroAssembler _masm(&cbuf); 1866 cbuf.set_insts_mark(); 1867 $$$emit8$primary; 1868 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1869 emit_d8(cbuf, disp); // Displacement 1870 __ post_call_nop(); 1871 %} 1872 1873 // Following encoding is no longer used, but may be restored if calling 1874 // convention changes significantly. 1875 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1876 // 1877 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1878 // // int ic_reg = Matcher::inline_cache_reg(); 1879 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1880 // // int imo_reg = Matcher::interpreter_method_reg(); 1881 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1882 // 1883 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1884 // // // so we load it immediately before the call 1885 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1886 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1887 // 1888 // // xor rbp,ebp 1889 // emit_opcode(cbuf, 0x33); 1890 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1891 // 1892 // // CALL to interpreter. 1893 // cbuf.set_insts_mark(); 1894 // $$$emit8$primary; 1895 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1896 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1897 // %} 1898 1899 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1900 $$$emit8$primary; 1901 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1902 $$$emit8$shift$$constant; 1903 %} 1904 1905 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1906 // Load immediate does not have a zero or sign extended version 1907 // for 8-bit immediates 1908 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1909 $$$emit32$src$$constant; 1910 %} 1911 1912 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1913 // Load immediate does not have a zero or sign extended version 1914 // for 8-bit immediates 1915 emit_opcode(cbuf, $primary + $dst$$reg); 1916 $$$emit32$src$$constant; 1917 %} 1918 1919 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1920 // Load immediate does not have a zero or sign extended version 1921 // for 8-bit immediates 1922 int dst_enc = $dst$$reg; 1923 int src_con = $src$$constant & 0x0FFFFFFFFL; 1924 if (src_con == 0) { 1925 // xor dst, dst 1926 emit_opcode(cbuf, 0x33); 1927 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1928 } else { 1929 emit_opcode(cbuf, $primary + dst_enc); 1930 emit_d32(cbuf, src_con); 1931 } 1932 %} 1933 1934 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1935 // Load immediate does not have a zero or sign extended version 1936 // for 8-bit immediates 1937 int dst_enc = $dst$$reg + 2; 1938 int src_con = ((julong)($src$$constant)) >> 32; 1939 if (src_con == 0) { 1940 // xor dst, dst 1941 emit_opcode(cbuf, 0x33); 1942 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1943 } else { 1944 emit_opcode(cbuf, $primary + dst_enc); 1945 emit_d32(cbuf, src_con); 1946 } 1947 %} 1948 1949 1950 // Encode a reg-reg copy. If it is useless, then empty encoding. 1951 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1952 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1953 %} 1954 1955 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1956 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1957 %} 1958 1959 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1960 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1961 %} 1962 1963 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1964 $$$emit8$primary; 1965 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1966 %} 1967 1968 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1969 $$$emit8$secondary; 1970 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1971 %} 1972 1973 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1974 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1975 %} 1976 1977 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1978 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1979 %} 1980 1981 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1982 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1983 %} 1984 1985 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1986 // Output immediate 1987 $$$emit32$src$$constant; 1988 %} 1989 1990 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1991 // Output Float immediate bits 1992 jfloat jf = $src$$constant; 1993 int jf_as_bits = jint_cast( jf ); 1994 emit_d32(cbuf, jf_as_bits); 1995 %} 1996 1997 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1998 // Output Float immediate bits 1999 jfloat jf = $src$$constant; 2000 int jf_as_bits = jint_cast( jf ); 2001 emit_d32(cbuf, jf_as_bits); 2002 %} 2003 2004 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2005 // Output immediate 2006 $$$emit16$src$$constant; 2007 %} 2008 2009 enc_class Con_d32(immI src) %{ 2010 emit_d32(cbuf,$src$$constant); 2011 %} 2012 2013 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2014 // Output immediate memory reference 2015 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2016 emit_d32(cbuf, 0x00); 2017 %} 2018 2019 enc_class lock_prefix( ) %{ 2020 emit_opcode(cbuf,0xF0); // [Lock] 2021 %} 2022 2023 // Cmp-xchg long value. 2024 // Note: we need to swap rbx, and rcx before and after the 2025 // cmpxchg8 instruction because the instruction uses 2026 // rcx as the high order word of the new value to store but 2027 // our register encoding uses rbx,. 2028 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2029 2030 // XCHG rbx,ecx 2031 emit_opcode(cbuf,0x87); 2032 emit_opcode(cbuf,0xD9); 2033 // [Lock] 2034 emit_opcode(cbuf,0xF0); 2035 // CMPXCHG8 [Eptr] 2036 emit_opcode(cbuf,0x0F); 2037 emit_opcode(cbuf,0xC7); 2038 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2039 // XCHG rbx,ecx 2040 emit_opcode(cbuf,0x87); 2041 emit_opcode(cbuf,0xD9); 2042 %} 2043 2044 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2045 // [Lock] 2046 emit_opcode(cbuf,0xF0); 2047 2048 // CMPXCHG [Eptr] 2049 emit_opcode(cbuf,0x0F); 2050 emit_opcode(cbuf,0xB1); 2051 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2052 %} 2053 2054 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2055 // [Lock] 2056 emit_opcode(cbuf,0xF0); 2057 2058 // CMPXCHGB [Eptr] 2059 emit_opcode(cbuf,0x0F); 2060 emit_opcode(cbuf,0xB0); 2061 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2062 %} 2063 2064 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2065 // [Lock] 2066 emit_opcode(cbuf,0xF0); 2067 2068 // 16-bit mode 2069 emit_opcode(cbuf, 0x66); 2070 2071 // CMPXCHGW [Eptr] 2072 emit_opcode(cbuf,0x0F); 2073 emit_opcode(cbuf,0xB1); 2074 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2075 %} 2076 2077 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2078 int res_encoding = $res$$reg; 2079 2080 // MOV res,0 2081 emit_opcode( cbuf, 0xB8 + res_encoding); 2082 emit_d32( cbuf, 0 ); 2083 // JNE,s fail 2084 emit_opcode(cbuf,0x75); 2085 emit_d8(cbuf, 5 ); 2086 // MOV res,1 2087 emit_opcode( cbuf, 0xB8 + res_encoding); 2088 emit_d32( cbuf, 1 ); 2089 // fail: 2090 %} 2091 2092 enc_class set_instruction_start( ) %{ 2093 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2094 %} 2095 2096 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2097 int reg_encoding = $ereg$$reg; 2098 int base = $mem$$base; 2099 int index = $mem$$index; 2100 int scale = $mem$$scale; 2101 int displace = $mem$$disp; 2102 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2103 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2104 %} 2105 2106 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2107 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2108 int base = $mem$$base; 2109 int index = $mem$$index; 2110 int scale = $mem$$scale; 2111 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2112 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2113 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2114 %} 2115 2116 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2117 int r1, r2; 2118 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2119 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2120 emit_opcode(cbuf,0x0F); 2121 emit_opcode(cbuf,$tertiary); 2122 emit_rm(cbuf, 0x3, r1, r2); 2123 emit_d8(cbuf,$cnt$$constant); 2124 emit_d8(cbuf,$primary); 2125 emit_rm(cbuf, 0x3, $secondary, r1); 2126 emit_d8(cbuf,$cnt$$constant); 2127 %} 2128 2129 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2130 emit_opcode( cbuf, 0x8B ); // Move 2131 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2132 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2133 emit_d8(cbuf,$primary); 2134 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2135 emit_d8(cbuf,$cnt$$constant-32); 2136 } 2137 emit_d8(cbuf,$primary); 2138 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2139 emit_d8(cbuf,31); 2140 %} 2141 2142 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2143 int r1, r2; 2144 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2145 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2146 2147 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2148 emit_rm(cbuf, 0x3, r1, r2); 2149 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2150 emit_opcode(cbuf,$primary); 2151 emit_rm(cbuf, 0x3, $secondary, r1); 2152 emit_d8(cbuf,$cnt$$constant-32); 2153 } 2154 emit_opcode(cbuf,0x33); // XOR r2,r2 2155 emit_rm(cbuf, 0x3, r2, r2); 2156 %} 2157 2158 // Clone of RegMem but accepts an extra parameter to access each 2159 // half of a double in memory; it never needs relocation info. 2160 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2161 emit_opcode(cbuf,$opcode$$constant); 2162 int reg_encoding = $rm_reg$$reg; 2163 int base = $mem$$base; 2164 int index = $mem$$index; 2165 int scale = $mem$$scale; 2166 int displace = $mem$$disp + $disp_for_half$$constant; 2167 relocInfo::relocType disp_reloc = relocInfo::none; 2168 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2169 %} 2170 2171 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2172 // 2173 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2174 // and it never needs relocation information. 2175 // Frequently used to move data between FPU's Stack Top and memory. 2176 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2177 int rm_byte_opcode = $rm_opcode$$constant; 2178 int base = $mem$$base; 2179 int index = $mem$$index; 2180 int scale = $mem$$scale; 2181 int displace = $mem$$disp; 2182 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2183 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2184 %} 2185 2186 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2187 int rm_byte_opcode = $rm_opcode$$constant; 2188 int base = $mem$$base; 2189 int index = $mem$$index; 2190 int scale = $mem$$scale; 2191 int displace = $mem$$disp; 2192 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2193 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2194 %} 2195 2196 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2197 int reg_encoding = $dst$$reg; 2198 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2199 int index = 0x04; // 0x04 indicates no index 2200 int scale = 0x00; // 0x00 indicates no scale 2201 int displace = $src1$$constant; // 0x00 indicates no displacement 2202 relocInfo::relocType disp_reloc = relocInfo::none; 2203 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2204 %} 2205 2206 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2207 // Compare dst,src 2208 emit_opcode(cbuf,0x3B); 2209 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2210 // jmp dst < src around move 2211 emit_opcode(cbuf,0x7C); 2212 emit_d8(cbuf,2); 2213 // move dst,src 2214 emit_opcode(cbuf,0x8B); 2215 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2216 %} 2217 2218 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2219 // Compare dst,src 2220 emit_opcode(cbuf,0x3B); 2221 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2222 // jmp dst > src around move 2223 emit_opcode(cbuf,0x7F); 2224 emit_d8(cbuf,2); 2225 // move dst,src 2226 emit_opcode(cbuf,0x8B); 2227 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2228 %} 2229 2230 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2231 // If src is FPR1, we can just FST to store it. 2232 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2233 int reg_encoding = 0x2; // Just store 2234 int base = $mem$$base; 2235 int index = $mem$$index; 2236 int scale = $mem$$scale; 2237 int displace = $mem$$disp; 2238 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2239 if( $src$$reg != FPR1L_enc ) { 2240 reg_encoding = 0x3; // Store & pop 2241 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2242 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2243 } 2244 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2245 emit_opcode(cbuf,$primary); 2246 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2247 %} 2248 2249 enc_class neg_reg(rRegI dst) %{ 2250 // NEG $dst 2251 emit_opcode(cbuf,0xF7); 2252 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2253 %} 2254 2255 enc_class setLT_reg(eCXRegI dst) %{ 2256 // SETLT $dst 2257 emit_opcode(cbuf,0x0F); 2258 emit_opcode(cbuf,0x9C); 2259 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2260 %} 2261 2262 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2263 int tmpReg = $tmp$$reg; 2264 2265 // SUB $p,$q 2266 emit_opcode(cbuf,0x2B); 2267 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2268 // SBB $tmp,$tmp 2269 emit_opcode(cbuf,0x1B); 2270 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2271 // AND $tmp,$y 2272 emit_opcode(cbuf,0x23); 2273 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2274 // ADD $p,$tmp 2275 emit_opcode(cbuf,0x03); 2276 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2277 %} 2278 2279 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2280 // TEST shift,32 2281 emit_opcode(cbuf,0xF7); 2282 emit_rm(cbuf, 0x3, 0, ECX_enc); 2283 emit_d32(cbuf,0x20); 2284 // JEQ,s small 2285 emit_opcode(cbuf, 0x74); 2286 emit_d8(cbuf, 0x04); 2287 // MOV $dst.hi,$dst.lo 2288 emit_opcode( cbuf, 0x8B ); 2289 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2290 // CLR $dst.lo 2291 emit_opcode(cbuf, 0x33); 2292 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2293 // small: 2294 // SHLD $dst.hi,$dst.lo,$shift 2295 emit_opcode(cbuf,0x0F); 2296 emit_opcode(cbuf,0xA5); 2297 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2298 // SHL $dst.lo,$shift" 2299 emit_opcode(cbuf,0xD3); 2300 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2301 %} 2302 2303 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2304 // TEST shift,32 2305 emit_opcode(cbuf,0xF7); 2306 emit_rm(cbuf, 0x3, 0, ECX_enc); 2307 emit_d32(cbuf,0x20); 2308 // JEQ,s small 2309 emit_opcode(cbuf, 0x74); 2310 emit_d8(cbuf, 0x04); 2311 // MOV $dst.lo,$dst.hi 2312 emit_opcode( cbuf, 0x8B ); 2313 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2314 // CLR $dst.hi 2315 emit_opcode(cbuf, 0x33); 2316 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2317 // small: 2318 // SHRD $dst.lo,$dst.hi,$shift 2319 emit_opcode(cbuf,0x0F); 2320 emit_opcode(cbuf,0xAD); 2321 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2322 // SHR $dst.hi,$shift" 2323 emit_opcode(cbuf,0xD3); 2324 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2325 %} 2326 2327 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2328 // TEST shift,32 2329 emit_opcode(cbuf,0xF7); 2330 emit_rm(cbuf, 0x3, 0, ECX_enc); 2331 emit_d32(cbuf,0x20); 2332 // JEQ,s small 2333 emit_opcode(cbuf, 0x74); 2334 emit_d8(cbuf, 0x05); 2335 // MOV $dst.lo,$dst.hi 2336 emit_opcode( cbuf, 0x8B ); 2337 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2338 // SAR $dst.hi,31 2339 emit_opcode(cbuf, 0xC1); 2340 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2341 emit_d8(cbuf, 0x1F ); 2342 // small: 2343 // SHRD $dst.lo,$dst.hi,$shift 2344 emit_opcode(cbuf,0x0F); 2345 emit_opcode(cbuf,0xAD); 2346 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2347 // SAR $dst.hi,$shift" 2348 emit_opcode(cbuf,0xD3); 2349 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2350 %} 2351 2352 2353 // ----------------- Encodings for floating point unit ----------------- 2354 // May leave result in FPU-TOS or FPU reg depending on opcodes 2355 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2356 $$$emit8$primary; 2357 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2358 %} 2359 2360 // Pop argument in FPR0 with FSTP ST(0) 2361 enc_class PopFPU() %{ 2362 emit_opcode( cbuf, 0xDD ); 2363 emit_d8( cbuf, 0xD8 ); 2364 %} 2365 2366 // !!!!! equivalent to Pop_Reg_F 2367 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2368 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2369 emit_d8( cbuf, 0xD8+$dst$$reg ); 2370 %} 2371 2372 enc_class Push_Reg_DPR( regDPR dst ) %{ 2373 emit_opcode( cbuf, 0xD9 ); 2374 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2375 %} 2376 2377 enc_class strictfp_bias1( regDPR dst ) %{ 2378 emit_opcode( cbuf, 0xDB ); // FLD m80real 2379 emit_opcode( cbuf, 0x2D ); 2380 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2381 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2382 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2383 %} 2384 2385 enc_class strictfp_bias2( regDPR dst ) %{ 2386 emit_opcode( cbuf, 0xDB ); // FLD m80real 2387 emit_opcode( cbuf, 0x2D ); 2388 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2389 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2390 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2391 %} 2392 2393 // Special case for moving an integer register to a stack slot. 2394 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2395 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2396 %} 2397 2398 // Special case for moving a register to a stack slot. 2399 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2400 // Opcode already emitted 2401 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2402 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2403 emit_d32(cbuf, $dst$$disp); // Displacement 2404 %} 2405 2406 // Push the integer in stackSlot 'src' onto FP-stack 2407 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2408 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2409 %} 2410 2411 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2412 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2413 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2414 %} 2415 2416 // Same as Pop_Mem_F except for opcode 2417 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2418 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2419 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2420 %} 2421 2422 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2423 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2424 emit_d8( cbuf, 0xD8+$dst$$reg ); 2425 %} 2426 2427 enc_class Push_Reg_FPR( regFPR dst ) %{ 2428 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2429 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2430 %} 2431 2432 // Push FPU's float to a stack-slot, and pop FPU-stack 2433 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2434 int pop = 0x02; 2435 if ($src$$reg != FPR1L_enc) { 2436 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2437 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2438 pop = 0x03; 2439 } 2440 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2441 %} 2442 2443 // Push FPU's double to a stack-slot, and pop FPU-stack 2444 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2445 int pop = 0x02; 2446 if ($src$$reg != FPR1L_enc) { 2447 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2448 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2449 pop = 0x03; 2450 } 2451 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2452 %} 2453 2454 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2455 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2456 int pop = 0xD0 - 1; // -1 since we skip FLD 2457 if ($src$$reg != FPR1L_enc) { 2458 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2459 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2460 pop = 0xD8; 2461 } 2462 emit_opcode( cbuf, 0xDD ); 2463 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2464 %} 2465 2466 2467 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2468 // load dst in FPR0 2469 emit_opcode( cbuf, 0xD9 ); 2470 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2471 if ($src$$reg != FPR1L_enc) { 2472 // fincstp 2473 emit_opcode (cbuf, 0xD9); 2474 emit_opcode (cbuf, 0xF7); 2475 // swap src with FPR1: 2476 // FXCH FPR1 with src 2477 emit_opcode(cbuf, 0xD9); 2478 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2479 // fdecstp 2480 emit_opcode (cbuf, 0xD9); 2481 emit_opcode (cbuf, 0xF6); 2482 } 2483 %} 2484 2485 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2486 MacroAssembler _masm(&cbuf); 2487 __ subptr(rsp, 8); 2488 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2489 __ fld_d(Address(rsp, 0)); 2490 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2491 __ fld_d(Address(rsp, 0)); 2492 %} 2493 2494 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2495 MacroAssembler _masm(&cbuf); 2496 __ subptr(rsp, 4); 2497 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2498 __ fld_s(Address(rsp, 0)); 2499 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2500 __ fld_s(Address(rsp, 0)); 2501 %} 2502 2503 enc_class Push_ResultD(regD dst) %{ 2504 MacroAssembler _masm(&cbuf); 2505 __ fstp_d(Address(rsp, 0)); 2506 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2507 __ addptr(rsp, 8); 2508 %} 2509 2510 enc_class Push_ResultF(regF dst, immI d8) %{ 2511 MacroAssembler _masm(&cbuf); 2512 __ fstp_s(Address(rsp, 0)); 2513 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2514 __ addptr(rsp, $d8$$constant); 2515 %} 2516 2517 enc_class Push_SrcD(regD src) %{ 2518 MacroAssembler _masm(&cbuf); 2519 __ subptr(rsp, 8); 2520 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2521 __ fld_d(Address(rsp, 0)); 2522 %} 2523 2524 enc_class push_stack_temp_qword() %{ 2525 MacroAssembler _masm(&cbuf); 2526 __ subptr(rsp, 8); 2527 %} 2528 2529 enc_class pop_stack_temp_qword() %{ 2530 MacroAssembler _masm(&cbuf); 2531 __ addptr(rsp, 8); 2532 %} 2533 2534 enc_class push_xmm_to_fpr1(regD src) %{ 2535 MacroAssembler _masm(&cbuf); 2536 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2537 __ fld_d(Address(rsp, 0)); 2538 %} 2539 2540 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2541 if ($src$$reg != FPR1L_enc) { 2542 // fincstp 2543 emit_opcode (cbuf, 0xD9); 2544 emit_opcode (cbuf, 0xF7); 2545 // FXCH FPR1 with src 2546 emit_opcode(cbuf, 0xD9); 2547 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2548 // fdecstp 2549 emit_opcode (cbuf, 0xD9); 2550 emit_opcode (cbuf, 0xF6); 2551 } 2552 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2553 // // FSTP FPR$dst$$reg 2554 // emit_opcode( cbuf, 0xDD ); 2555 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2556 %} 2557 2558 enc_class fnstsw_sahf_skip_parity() %{ 2559 // fnstsw ax 2560 emit_opcode( cbuf, 0xDF ); 2561 emit_opcode( cbuf, 0xE0 ); 2562 // sahf 2563 emit_opcode( cbuf, 0x9E ); 2564 // jnp ::skip 2565 emit_opcode( cbuf, 0x7B ); 2566 emit_opcode( cbuf, 0x05 ); 2567 %} 2568 2569 enc_class emitModDPR() %{ 2570 // fprem must be iterative 2571 // :: loop 2572 // fprem 2573 emit_opcode( cbuf, 0xD9 ); 2574 emit_opcode( cbuf, 0xF8 ); 2575 // wait 2576 emit_opcode( cbuf, 0x9b ); 2577 // fnstsw ax 2578 emit_opcode( cbuf, 0xDF ); 2579 emit_opcode( cbuf, 0xE0 ); 2580 // sahf 2581 emit_opcode( cbuf, 0x9E ); 2582 // jp ::loop 2583 emit_opcode( cbuf, 0x0F ); 2584 emit_opcode( cbuf, 0x8A ); 2585 emit_opcode( cbuf, 0xF4 ); 2586 emit_opcode( cbuf, 0xFF ); 2587 emit_opcode( cbuf, 0xFF ); 2588 emit_opcode( cbuf, 0xFF ); 2589 %} 2590 2591 enc_class fpu_flags() %{ 2592 // fnstsw_ax 2593 emit_opcode( cbuf, 0xDF); 2594 emit_opcode( cbuf, 0xE0); 2595 // test ax,0x0400 2596 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2597 emit_opcode( cbuf, 0xA9 ); 2598 emit_d16 ( cbuf, 0x0400 ); 2599 // // // This sequence works, but stalls for 12-16 cycles on PPro 2600 // // test rax,0x0400 2601 // emit_opcode( cbuf, 0xA9 ); 2602 // emit_d32 ( cbuf, 0x00000400 ); 2603 // 2604 // jz exit (no unordered comparison) 2605 emit_opcode( cbuf, 0x74 ); 2606 emit_d8 ( cbuf, 0x02 ); 2607 // mov ah,1 - treat as LT case (set carry flag) 2608 emit_opcode( cbuf, 0xB4 ); 2609 emit_d8 ( cbuf, 0x01 ); 2610 // sahf 2611 emit_opcode( cbuf, 0x9E); 2612 %} 2613 2614 enc_class cmpF_P6_fixup() %{ 2615 // Fixup the integer flags in case comparison involved a NaN 2616 // 2617 // JNP exit (no unordered comparison, P-flag is set by NaN) 2618 emit_opcode( cbuf, 0x7B ); 2619 emit_d8 ( cbuf, 0x03 ); 2620 // MOV AH,1 - treat as LT case (set carry flag) 2621 emit_opcode( cbuf, 0xB4 ); 2622 emit_d8 ( cbuf, 0x01 ); 2623 // SAHF 2624 emit_opcode( cbuf, 0x9E); 2625 // NOP // target for branch to avoid branch to branch 2626 emit_opcode( cbuf, 0x90); 2627 %} 2628 2629 // fnstsw_ax(); 2630 // sahf(); 2631 // movl(dst, nan_result); 2632 // jcc(Assembler::parity, exit); 2633 // movl(dst, less_result); 2634 // jcc(Assembler::below, exit); 2635 // movl(dst, equal_result); 2636 // jcc(Assembler::equal, exit); 2637 // movl(dst, greater_result); 2638 2639 // less_result = 1; 2640 // greater_result = -1; 2641 // equal_result = 0; 2642 // nan_result = -1; 2643 2644 enc_class CmpF_Result(rRegI dst) %{ 2645 // fnstsw_ax(); 2646 emit_opcode( cbuf, 0xDF); 2647 emit_opcode( cbuf, 0xE0); 2648 // sahf 2649 emit_opcode( cbuf, 0x9E); 2650 // movl(dst, nan_result); 2651 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2652 emit_d32( cbuf, -1 ); 2653 // jcc(Assembler::parity, exit); 2654 emit_opcode( cbuf, 0x7A ); 2655 emit_d8 ( cbuf, 0x13 ); 2656 // movl(dst, less_result); 2657 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2658 emit_d32( cbuf, -1 ); 2659 // jcc(Assembler::below, exit); 2660 emit_opcode( cbuf, 0x72 ); 2661 emit_d8 ( cbuf, 0x0C ); 2662 // movl(dst, equal_result); 2663 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2664 emit_d32( cbuf, 0 ); 2665 // jcc(Assembler::equal, exit); 2666 emit_opcode( cbuf, 0x74 ); 2667 emit_d8 ( cbuf, 0x05 ); 2668 // movl(dst, greater_result); 2669 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2670 emit_d32( cbuf, 1 ); 2671 %} 2672 2673 2674 // Compare the longs and set flags 2675 // BROKEN! Do Not use as-is 2676 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2677 // CMP $src1.hi,$src2.hi 2678 emit_opcode( cbuf, 0x3B ); 2679 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2680 // JNE,s done 2681 emit_opcode(cbuf,0x75); 2682 emit_d8(cbuf, 2 ); 2683 // CMP $src1.lo,$src2.lo 2684 emit_opcode( cbuf, 0x3B ); 2685 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2686 // done: 2687 %} 2688 2689 enc_class convert_int_long( regL dst, rRegI src ) %{ 2690 // mov $dst.lo,$src 2691 int dst_encoding = $dst$$reg; 2692 int src_encoding = $src$$reg; 2693 encode_Copy( cbuf, dst_encoding , src_encoding ); 2694 // mov $dst.hi,$src 2695 encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2696 // sar $dst.hi,31 2697 emit_opcode( cbuf, 0xC1 ); 2698 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2699 emit_d8(cbuf, 0x1F ); 2700 %} 2701 2702 enc_class convert_long_double( eRegL src ) %{ 2703 // push $src.hi 2704 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2705 // push $src.lo 2706 emit_opcode(cbuf, 0x50+$src$$reg ); 2707 // fild 64-bits at [SP] 2708 emit_opcode(cbuf,0xdf); 2709 emit_d8(cbuf, 0x6C); 2710 emit_d8(cbuf, 0x24); 2711 emit_d8(cbuf, 0x00); 2712 // pop stack 2713 emit_opcode(cbuf, 0x83); // add SP, #8 2714 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2715 emit_d8(cbuf, 0x8); 2716 %} 2717 2718 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2719 // IMUL EDX:EAX,$src1 2720 emit_opcode( cbuf, 0xF7 ); 2721 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2722 // SAR EDX,$cnt-32 2723 int shift_count = ((int)$cnt$$constant) - 32; 2724 if (shift_count > 0) { 2725 emit_opcode(cbuf, 0xC1); 2726 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2727 emit_d8(cbuf, shift_count); 2728 } 2729 %} 2730 2731 // this version doesn't have add sp, 8 2732 enc_class convert_long_double2( eRegL src ) %{ 2733 // push $src.hi 2734 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2735 // push $src.lo 2736 emit_opcode(cbuf, 0x50+$src$$reg ); 2737 // fild 64-bits at [SP] 2738 emit_opcode(cbuf,0xdf); 2739 emit_d8(cbuf, 0x6C); 2740 emit_d8(cbuf, 0x24); 2741 emit_d8(cbuf, 0x00); 2742 %} 2743 2744 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2745 // Basic idea: long = (long)int * (long)int 2746 // IMUL EDX:EAX, src 2747 emit_opcode( cbuf, 0xF7 ); 2748 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2749 %} 2750 2751 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2752 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2753 // MUL EDX:EAX, src 2754 emit_opcode( cbuf, 0xF7 ); 2755 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2756 %} 2757 2758 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2759 // Basic idea: lo(result) = lo(x_lo * y_lo) 2760 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2761 // MOV $tmp,$src.lo 2762 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2763 // IMUL $tmp,EDX 2764 emit_opcode( cbuf, 0x0F ); 2765 emit_opcode( cbuf, 0xAF ); 2766 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2767 // MOV EDX,$src.hi 2768 encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2769 // IMUL EDX,EAX 2770 emit_opcode( cbuf, 0x0F ); 2771 emit_opcode( cbuf, 0xAF ); 2772 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2773 // ADD $tmp,EDX 2774 emit_opcode( cbuf, 0x03 ); 2775 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2776 // MUL EDX:EAX,$src.lo 2777 emit_opcode( cbuf, 0xF7 ); 2778 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2779 // ADD EDX,ESI 2780 emit_opcode( cbuf, 0x03 ); 2781 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2782 %} 2783 2784 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2785 // Basic idea: lo(result) = lo(src * y_lo) 2786 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2787 // IMUL $tmp,EDX,$src 2788 emit_opcode( cbuf, 0x6B ); 2789 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2790 emit_d8( cbuf, (int)$src$$constant ); 2791 // MOV EDX,$src 2792 emit_opcode(cbuf, 0xB8 + EDX_enc); 2793 emit_d32( cbuf, (int)$src$$constant ); 2794 // MUL EDX:EAX,EDX 2795 emit_opcode( cbuf, 0xF7 ); 2796 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2797 // ADD EDX,ESI 2798 emit_opcode( cbuf, 0x03 ); 2799 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2800 %} 2801 2802 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2803 // PUSH src1.hi 2804 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2805 // PUSH src1.lo 2806 emit_opcode(cbuf, 0x50+$src1$$reg ); 2807 // PUSH src2.hi 2808 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2809 // PUSH src2.lo 2810 emit_opcode(cbuf, 0x50+$src2$$reg ); 2811 // CALL directly to the runtime 2812 MacroAssembler _masm(&cbuf); 2813 cbuf.set_insts_mark(); 2814 emit_opcode(cbuf,0xE8); // Call into runtime 2815 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2816 __ post_call_nop(); 2817 // Restore stack 2818 emit_opcode(cbuf, 0x83); // add SP, #framesize 2819 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2820 emit_d8(cbuf, 4*4); 2821 %} 2822 2823 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2824 // PUSH src1.hi 2825 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2826 // PUSH src1.lo 2827 emit_opcode(cbuf, 0x50+$src1$$reg ); 2828 // PUSH src2.hi 2829 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2830 // PUSH src2.lo 2831 emit_opcode(cbuf, 0x50+$src2$$reg ); 2832 // CALL directly to the runtime 2833 MacroAssembler _masm(&cbuf); 2834 cbuf.set_insts_mark(); 2835 emit_opcode(cbuf,0xE8); // Call into runtime 2836 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2837 __ post_call_nop(); 2838 // Restore stack 2839 emit_opcode(cbuf, 0x83); // add SP, #framesize 2840 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2841 emit_d8(cbuf, 4*4); 2842 %} 2843 2844 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2845 // MOV $tmp,$src.lo 2846 emit_opcode(cbuf, 0x8B); 2847 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2848 // OR $tmp,$src.hi 2849 emit_opcode(cbuf, 0x0B); 2850 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2851 %} 2852 2853 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2854 // CMP $src1.lo,$src2.lo 2855 emit_opcode( cbuf, 0x3B ); 2856 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2857 // JNE,s skip 2858 emit_cc(cbuf, 0x70, 0x5); 2859 emit_d8(cbuf,2); 2860 // CMP $src1.hi,$src2.hi 2861 emit_opcode( cbuf, 0x3B ); 2862 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2863 %} 2864 2865 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2866 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2867 emit_opcode( cbuf, 0x3B ); 2868 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2869 // MOV $tmp,$src1.hi 2870 emit_opcode( cbuf, 0x8B ); 2871 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2872 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2873 emit_opcode( cbuf, 0x1B ); 2874 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2875 %} 2876 2877 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2878 // XOR $tmp,$tmp 2879 emit_opcode(cbuf,0x33); // XOR 2880 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2881 // CMP $tmp,$src.lo 2882 emit_opcode( cbuf, 0x3B ); 2883 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2884 // SBB $tmp,$src.hi 2885 emit_opcode( cbuf, 0x1B ); 2886 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2887 %} 2888 2889 // Sniff, sniff... smells like Gnu Superoptimizer 2890 enc_class neg_long( eRegL dst ) %{ 2891 emit_opcode(cbuf,0xF7); // NEG hi 2892 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2893 emit_opcode(cbuf,0xF7); // NEG lo 2894 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2895 emit_opcode(cbuf,0x83); // SBB hi,0 2896 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2897 emit_d8 (cbuf,0 ); 2898 %} 2899 2900 enc_class enc_pop_rdx() %{ 2901 emit_opcode(cbuf,0x5A); 2902 %} 2903 2904 enc_class enc_rethrow() %{ 2905 MacroAssembler _masm(&cbuf); 2906 cbuf.set_insts_mark(); 2907 emit_opcode(cbuf, 0xE9); // jmp entry 2908 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2909 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2910 __ post_call_nop(); 2911 %} 2912 2913 2914 // Convert a double to an int. Java semantics require we do complex 2915 // manglelations in the corner cases. So we set the rounding mode to 2916 // 'zero', store the darned double down as an int, and reset the 2917 // rounding mode to 'nearest'. The hardware throws an exception which 2918 // patches up the correct value directly to the stack. 2919 enc_class DPR2I_encoding( regDPR src ) %{ 2920 // Flip to round-to-zero mode. We attempted to allow invalid-op 2921 // exceptions here, so that a NAN or other corner-case value will 2922 // thrown an exception (but normal values get converted at full speed). 2923 // However, I2C adapters and other float-stack manglers leave pending 2924 // invalid-op exceptions hanging. We would have to clear them before 2925 // enabling them and that is more expensive than just testing for the 2926 // invalid value Intel stores down in the corner cases. 2927 emit_opcode(cbuf,0xD9); // FLDCW trunc 2928 emit_opcode(cbuf,0x2D); 2929 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2930 // Allocate a word 2931 emit_opcode(cbuf,0x83); // SUB ESP,4 2932 emit_opcode(cbuf,0xEC); 2933 emit_d8(cbuf,0x04); 2934 // Encoding assumes a double has been pushed into FPR0. 2935 // Store down the double as an int, popping the FPU stack 2936 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2937 emit_opcode(cbuf,0x1C); 2938 emit_d8(cbuf,0x24); 2939 // Restore the rounding mode; mask the exception 2940 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2941 emit_opcode(cbuf,0x2D); 2942 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2943 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2944 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2945 2946 // Load the converted int; adjust CPU stack 2947 emit_opcode(cbuf,0x58); // POP EAX 2948 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2949 emit_d32 (cbuf,0x80000000); // 0x80000000 2950 emit_opcode(cbuf,0x75); // JNE around_slow_call 2951 emit_d8 (cbuf,0x07); // Size of slow_call 2952 // Push src onto stack slow-path 2953 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2954 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2955 // CALL directly to the runtime 2956 MacroAssembler _masm(&cbuf); 2957 cbuf.set_insts_mark(); 2958 emit_opcode(cbuf,0xE8); // Call into runtime 2959 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2960 __ post_call_nop(); 2961 // Carry on here... 2962 %} 2963 2964 enc_class DPR2L_encoding( regDPR src ) %{ 2965 emit_opcode(cbuf,0xD9); // FLDCW trunc 2966 emit_opcode(cbuf,0x2D); 2967 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2968 // Allocate a word 2969 emit_opcode(cbuf,0x83); // SUB ESP,8 2970 emit_opcode(cbuf,0xEC); 2971 emit_d8(cbuf,0x08); 2972 // Encoding assumes a double has been pushed into FPR0. 2973 // Store down the double as a long, popping the FPU stack 2974 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2975 emit_opcode(cbuf,0x3C); 2976 emit_d8(cbuf,0x24); 2977 // Restore the rounding mode; mask the exception 2978 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2979 emit_opcode(cbuf,0x2D); 2980 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2981 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2982 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2983 2984 // Load the converted int; adjust CPU stack 2985 emit_opcode(cbuf,0x58); // POP EAX 2986 emit_opcode(cbuf,0x5A); // POP EDX 2987 emit_opcode(cbuf,0x81); // CMP EDX,imm 2988 emit_d8 (cbuf,0xFA); // rdx 2989 emit_d32 (cbuf,0x80000000); // 0x80000000 2990 emit_opcode(cbuf,0x75); // JNE around_slow_call 2991 emit_d8 (cbuf,0x07+4); // Size of slow_call 2992 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2993 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2994 emit_opcode(cbuf,0x75); // JNE around_slow_call 2995 emit_d8 (cbuf,0x07); // Size of slow_call 2996 // Push src onto stack slow-path 2997 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2998 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2999 // CALL directly to the runtime 3000 MacroAssembler _masm(&cbuf); 3001 cbuf.set_insts_mark(); 3002 emit_opcode(cbuf,0xE8); // Call into runtime 3003 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3004 __ post_call_nop(); 3005 // Carry on here... 3006 %} 3007 3008 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3009 // Operand was loaded from memory into fp ST (stack top) 3010 // FMUL ST,$src /* D8 C8+i */ 3011 emit_opcode(cbuf, 0xD8); 3012 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3013 %} 3014 3015 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3016 // FADDP ST,src2 /* D8 C0+i */ 3017 emit_opcode(cbuf, 0xD8); 3018 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3019 //could use FADDP src2,fpST /* DE C0+i */ 3020 %} 3021 3022 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3023 // FADDP src2,ST /* DE C0+i */ 3024 emit_opcode(cbuf, 0xDE); 3025 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3026 %} 3027 3028 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3029 // Operand has been loaded into fp ST (stack top) 3030 // FSUB ST,$src1 3031 emit_opcode(cbuf, 0xD8); 3032 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3033 3034 // FDIV 3035 emit_opcode(cbuf, 0xD8); 3036 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3037 %} 3038 3039 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3040 // Operand was loaded from memory into fp ST (stack top) 3041 // FADD ST,$src /* D8 C0+i */ 3042 emit_opcode(cbuf, 0xD8); 3043 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3044 3045 // FMUL ST,src2 /* D8 C*+i */ 3046 emit_opcode(cbuf, 0xD8); 3047 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3048 %} 3049 3050 3051 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3052 // Operand was loaded from memory into fp ST (stack top) 3053 // FADD ST,$src /* D8 C0+i */ 3054 emit_opcode(cbuf, 0xD8); 3055 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3056 3057 // FMULP src2,ST /* DE C8+i */ 3058 emit_opcode(cbuf, 0xDE); 3059 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3060 %} 3061 3062 // Atomically load the volatile long 3063 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3064 emit_opcode(cbuf,0xDF); 3065 int rm_byte_opcode = 0x05; 3066 int base = $mem$$base; 3067 int index = $mem$$index; 3068 int scale = $mem$$scale; 3069 int displace = $mem$$disp; 3070 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3071 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3072 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3073 %} 3074 3075 // Volatile Store Long. Must be atomic, so move it into 3076 // the FP TOS and then do a 64-bit FIST. Has to probe the 3077 // target address before the store (for null-ptr checks) 3078 // so the memory operand is used twice in the encoding. 3079 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3080 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3081 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3082 emit_opcode(cbuf,0xDF); 3083 int rm_byte_opcode = 0x07; 3084 int base = $mem$$base; 3085 int index = $mem$$index; 3086 int scale = $mem$$scale; 3087 int displace = $mem$$disp; 3088 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3089 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3090 %} 3091 3092 %} 3093 3094 3095 //----------FRAME-------------------------------------------------------------- 3096 // Definition of frame structure and management information. 3097 // 3098 // S T A C K L A Y O U T Allocators stack-slot number 3099 // | (to get allocators register number 3100 // G Owned by | | v add OptoReg::stack0()) 3101 // r CALLER | | 3102 // o | +--------+ pad to even-align allocators stack-slot 3103 // w V | pad0 | numbers; owned by CALLER 3104 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3105 // h ^ | in | 5 3106 // | | args | 4 Holes in incoming args owned by SELF 3107 // | | | | 3 3108 // | | +--------+ 3109 // V | | old out| Empty on Intel, window on Sparc 3110 // | old |preserve| Must be even aligned. 3111 // | SP-+--------+----> Matcher::_old_SP, even aligned 3112 // | | in | 3 area for Intel ret address 3113 // Owned by |preserve| Empty on Sparc. 3114 // SELF +--------+ 3115 // | | pad2 | 2 pad to align old SP 3116 // | +--------+ 1 3117 // | | locks | 0 3118 // | +--------+----> OptoReg::stack0(), even aligned 3119 // | | pad1 | 11 pad to align new SP 3120 // | +--------+ 3121 // | | | 10 3122 // | | spills | 9 spills 3123 // V | | 8 (pad0 slot for callee) 3124 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3125 // ^ | out | 7 3126 // | | args | 6 Holes in outgoing args owned by CALLEE 3127 // Owned by +--------+ 3128 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3129 // | new |preserve| Must be even-aligned. 3130 // | SP-+--------+----> Matcher::_new_SP, even aligned 3131 // | | | 3132 // 3133 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3134 // known from SELF's arguments and the Java calling convention. 3135 // Region 6-7 is determined per call site. 3136 // Note 2: If the calling convention leaves holes in the incoming argument 3137 // area, those holes are owned by SELF. Holes in the outgoing area 3138 // are owned by the CALLEE. Holes should not be necessary in the 3139 // incoming area, as the Java calling convention is completely under 3140 // the control of the AD file. Doubles can be sorted and packed to 3141 // avoid holes. Holes in the outgoing arguments may be necessary for 3142 // varargs C calling conventions. 3143 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3144 // even aligned with pad0 as needed. 3145 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3146 // region 6-11 is even aligned; it may be padded out more so that 3147 // the region from SP to FP meets the minimum stack alignment. 3148 3149 frame %{ 3150 // These three registers define part of the calling convention 3151 // between compiled code and the interpreter. 3152 inline_cache_reg(EAX); // Inline Cache Register 3153 3154 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3155 cisc_spilling_operand_name(indOffset32); 3156 3157 // Number of stack slots consumed by locking an object 3158 sync_stack_slots(1); 3159 3160 // Compiled code's Frame Pointer 3161 frame_pointer(ESP); 3162 // Interpreter stores its frame pointer in a register which is 3163 // stored to the stack by I2CAdaptors. 3164 // I2CAdaptors convert from interpreted java to compiled java. 3165 interpreter_frame_pointer(EBP); 3166 3167 // Stack alignment requirement 3168 // Alignment size in bytes (128-bit -> 16 bytes) 3169 stack_alignment(StackAlignmentInBytes); 3170 3171 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3172 // for calls to C. Supports the var-args backing area for register parms. 3173 varargs_C_out_slots_killed(0); 3174 3175 // The after-PROLOG location of the return address. Location of 3176 // return address specifies a type (REG or STACK) and a number 3177 // representing the register number (i.e. - use a register name) or 3178 // stack slot. 3179 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3180 // Otherwise, it is above the locks and verification slot and alignment word 3181 return_addr(STACK - 1 + 3182 align_up((Compile::current()->in_preserve_stack_slots() + 3183 Compile::current()->fixed_slots()), 3184 stack_alignment_in_slots())); 3185 3186 // Location of C & interpreter return values 3187 c_return_value %{ 3188 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3189 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3190 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3191 3192 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3193 // that C functions return float and double results in XMM0. 3194 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3195 return OptoRegPair(XMM0b_num,XMM0_num); 3196 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3197 return OptoRegPair(OptoReg::Bad,XMM0_num); 3198 3199 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3200 %} 3201 3202 // Location of return values 3203 return_value %{ 3204 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3205 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3206 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3207 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3208 return OptoRegPair(XMM0b_num,XMM0_num); 3209 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3210 return OptoRegPair(OptoReg::Bad,XMM0_num); 3211 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3212 %} 3213 3214 %} 3215 3216 //----------ATTRIBUTES--------------------------------------------------------- 3217 //----------Operand Attributes------------------------------------------------- 3218 op_attrib op_cost(0); // Required cost attribute 3219 3220 //----------Instruction Attributes--------------------------------------------- 3221 ins_attrib ins_cost(100); // Required cost attribute 3222 ins_attrib ins_size(8); // Required size attribute (in bits) 3223 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3224 // non-matching short branch variant of some 3225 // long branch? 3226 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3227 // specifies the alignment that some part of the instruction (not 3228 // necessarily the start) requires. If > 1, a compute_padding() 3229 // function must be provided for the instruction 3230 3231 //----------OPERANDS----------------------------------------------------------- 3232 // Operand definitions must precede instruction definitions for correct parsing 3233 // in the ADLC because operands constitute user defined types which are used in 3234 // instruction definitions. 3235 3236 //----------Simple Operands---------------------------------------------------- 3237 // Immediate Operands 3238 // Integer Immediate 3239 operand immI() %{ 3240 match(ConI); 3241 3242 op_cost(10); 3243 format %{ %} 3244 interface(CONST_INTER); 3245 %} 3246 3247 // Constant for test vs zero 3248 operand immI_0() %{ 3249 predicate(n->get_int() == 0); 3250 match(ConI); 3251 3252 op_cost(0); 3253 format %{ %} 3254 interface(CONST_INTER); 3255 %} 3256 3257 // Constant for increment 3258 operand immI_1() %{ 3259 predicate(n->get_int() == 1); 3260 match(ConI); 3261 3262 op_cost(0); 3263 format %{ %} 3264 interface(CONST_INTER); 3265 %} 3266 3267 // Constant for decrement 3268 operand immI_M1() %{ 3269 predicate(n->get_int() == -1); 3270 match(ConI); 3271 3272 op_cost(0); 3273 format %{ %} 3274 interface(CONST_INTER); 3275 %} 3276 3277 // Valid scale values for addressing modes 3278 operand immI2() %{ 3279 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3280 match(ConI); 3281 3282 format %{ %} 3283 interface(CONST_INTER); 3284 %} 3285 3286 operand immI8() %{ 3287 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3288 match(ConI); 3289 3290 op_cost(5); 3291 format %{ %} 3292 interface(CONST_INTER); 3293 %} 3294 3295 operand immU8() %{ 3296 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3297 match(ConI); 3298 3299 op_cost(5); 3300 format %{ %} 3301 interface(CONST_INTER); 3302 %} 3303 3304 operand immI16() %{ 3305 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3306 match(ConI); 3307 3308 op_cost(10); 3309 format %{ %} 3310 interface(CONST_INTER); 3311 %} 3312 3313 // Int Immediate non-negative 3314 operand immU31() 3315 %{ 3316 predicate(n->get_int() >= 0); 3317 match(ConI); 3318 3319 op_cost(0); 3320 format %{ %} 3321 interface(CONST_INTER); 3322 %} 3323 3324 // Constant for long shifts 3325 operand immI_32() %{ 3326 predicate( n->get_int() == 32 ); 3327 match(ConI); 3328 3329 op_cost(0); 3330 format %{ %} 3331 interface(CONST_INTER); 3332 %} 3333 3334 operand immI_1_31() %{ 3335 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3336 match(ConI); 3337 3338 op_cost(0); 3339 format %{ %} 3340 interface(CONST_INTER); 3341 %} 3342 3343 operand immI_32_63() %{ 3344 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3345 match(ConI); 3346 op_cost(0); 3347 3348 format %{ %} 3349 interface(CONST_INTER); 3350 %} 3351 3352 operand immI_2() %{ 3353 predicate( n->get_int() == 2 ); 3354 match(ConI); 3355 3356 op_cost(0); 3357 format %{ %} 3358 interface(CONST_INTER); 3359 %} 3360 3361 operand immI_3() %{ 3362 predicate( n->get_int() == 3 ); 3363 match(ConI); 3364 3365 op_cost(0); 3366 format %{ %} 3367 interface(CONST_INTER); 3368 %} 3369 3370 operand immI_4() 3371 %{ 3372 predicate(n->get_int() == 4); 3373 match(ConI); 3374 3375 op_cost(0); 3376 format %{ %} 3377 interface(CONST_INTER); 3378 %} 3379 3380 operand immI_8() 3381 %{ 3382 predicate(n->get_int() == 8); 3383 match(ConI); 3384 3385 op_cost(0); 3386 format %{ %} 3387 interface(CONST_INTER); 3388 %} 3389 3390 // Pointer Immediate 3391 operand immP() %{ 3392 match(ConP); 3393 3394 op_cost(10); 3395 format %{ %} 3396 interface(CONST_INTER); 3397 %} 3398 3399 // NULL Pointer Immediate 3400 operand immP0() %{ 3401 predicate( n->get_ptr() == 0 ); 3402 match(ConP); 3403 op_cost(0); 3404 3405 format %{ %} 3406 interface(CONST_INTER); 3407 %} 3408 3409 // Long Immediate 3410 operand immL() %{ 3411 match(ConL); 3412 3413 op_cost(20); 3414 format %{ %} 3415 interface(CONST_INTER); 3416 %} 3417 3418 // Long Immediate zero 3419 operand immL0() %{ 3420 predicate( n->get_long() == 0L ); 3421 match(ConL); 3422 op_cost(0); 3423 3424 format %{ %} 3425 interface(CONST_INTER); 3426 %} 3427 3428 // Long Immediate zero 3429 operand immL_M1() %{ 3430 predicate( n->get_long() == -1L ); 3431 match(ConL); 3432 op_cost(0); 3433 3434 format %{ %} 3435 interface(CONST_INTER); 3436 %} 3437 3438 // Long immediate from 0 to 127. 3439 // Used for a shorter form of long mul by 10. 3440 operand immL_127() %{ 3441 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3442 match(ConL); 3443 op_cost(0); 3444 3445 format %{ %} 3446 interface(CONST_INTER); 3447 %} 3448 3449 // Long Immediate: low 32-bit mask 3450 operand immL_32bits() %{ 3451 predicate(n->get_long() == 0xFFFFFFFFL); 3452 match(ConL); 3453 op_cost(0); 3454 3455 format %{ %} 3456 interface(CONST_INTER); 3457 %} 3458 3459 // Long Immediate: low 32-bit mask 3460 operand immL32() %{ 3461 predicate(n->get_long() == (int)(n->get_long())); 3462 match(ConL); 3463 op_cost(20); 3464 3465 format %{ %} 3466 interface(CONST_INTER); 3467 %} 3468 3469 //Double Immediate zero 3470 operand immDPR0() %{ 3471 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3472 // bug that generates code such that NaNs compare equal to 0.0 3473 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3474 match(ConD); 3475 3476 op_cost(5); 3477 format %{ %} 3478 interface(CONST_INTER); 3479 %} 3480 3481 // Double Immediate one 3482 operand immDPR1() %{ 3483 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3484 match(ConD); 3485 3486 op_cost(5); 3487 format %{ %} 3488 interface(CONST_INTER); 3489 %} 3490 3491 // Double Immediate 3492 operand immDPR() %{ 3493 predicate(UseSSE<=1); 3494 match(ConD); 3495 3496 op_cost(5); 3497 format %{ %} 3498 interface(CONST_INTER); 3499 %} 3500 3501 operand immD() %{ 3502 predicate(UseSSE>=2); 3503 match(ConD); 3504 3505 op_cost(5); 3506 format %{ %} 3507 interface(CONST_INTER); 3508 %} 3509 3510 // Double Immediate zero 3511 operand immD0() %{ 3512 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3513 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3514 // compare equal to -0.0. 3515 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3516 match(ConD); 3517 3518 format %{ %} 3519 interface(CONST_INTER); 3520 %} 3521 3522 // Float Immediate zero 3523 operand immFPR0() %{ 3524 predicate(UseSSE == 0 && n->getf() == 0.0F); 3525 match(ConF); 3526 3527 op_cost(5); 3528 format %{ %} 3529 interface(CONST_INTER); 3530 %} 3531 3532 // Float Immediate one 3533 operand immFPR1() %{ 3534 predicate(UseSSE == 0 && n->getf() == 1.0F); 3535 match(ConF); 3536 3537 op_cost(5); 3538 format %{ %} 3539 interface(CONST_INTER); 3540 %} 3541 3542 // Float Immediate 3543 operand immFPR() %{ 3544 predicate( UseSSE == 0 ); 3545 match(ConF); 3546 3547 op_cost(5); 3548 format %{ %} 3549 interface(CONST_INTER); 3550 %} 3551 3552 // Float Immediate 3553 operand immF() %{ 3554 predicate(UseSSE >= 1); 3555 match(ConF); 3556 3557 op_cost(5); 3558 format %{ %} 3559 interface(CONST_INTER); 3560 %} 3561 3562 // Float Immediate zero. Zero and not -0.0 3563 operand immF0() %{ 3564 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3565 match(ConF); 3566 3567 op_cost(5); 3568 format %{ %} 3569 interface(CONST_INTER); 3570 %} 3571 3572 // Immediates for special shifts (sign extend) 3573 3574 // Constants for increment 3575 operand immI_16() %{ 3576 predicate( n->get_int() == 16 ); 3577 match(ConI); 3578 3579 format %{ %} 3580 interface(CONST_INTER); 3581 %} 3582 3583 operand immI_24() %{ 3584 predicate( n->get_int() == 24 ); 3585 match(ConI); 3586 3587 format %{ %} 3588 interface(CONST_INTER); 3589 %} 3590 3591 // Constant for byte-wide masking 3592 operand immI_255() %{ 3593 predicate( n->get_int() == 255 ); 3594 match(ConI); 3595 3596 format %{ %} 3597 interface(CONST_INTER); 3598 %} 3599 3600 // Constant for short-wide masking 3601 operand immI_65535() %{ 3602 predicate(n->get_int() == 65535); 3603 match(ConI); 3604 3605 format %{ %} 3606 interface(CONST_INTER); 3607 %} 3608 3609 operand kReg() 3610 %{ 3611 constraint(ALLOC_IN_RC(vectmask_reg)); 3612 match(RegVectMask); 3613 format %{%} 3614 interface(REG_INTER); 3615 %} 3616 3617 operand kReg_K1() 3618 %{ 3619 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3620 match(RegVectMask); 3621 format %{%} 3622 interface(REG_INTER); 3623 %} 3624 3625 operand kReg_K2() 3626 %{ 3627 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3628 match(RegVectMask); 3629 format %{%} 3630 interface(REG_INTER); 3631 %} 3632 3633 // Special Registers 3634 operand kReg_K3() 3635 %{ 3636 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3637 match(RegVectMask); 3638 format %{%} 3639 interface(REG_INTER); 3640 %} 3641 3642 operand kReg_K4() 3643 %{ 3644 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3645 match(RegVectMask); 3646 format %{%} 3647 interface(REG_INTER); 3648 %} 3649 3650 operand kReg_K5() 3651 %{ 3652 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3653 match(RegVectMask); 3654 format %{%} 3655 interface(REG_INTER); 3656 %} 3657 3658 operand kReg_K6() 3659 %{ 3660 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3661 match(RegVectMask); 3662 format %{%} 3663 interface(REG_INTER); 3664 %} 3665 3666 // Special Registers 3667 operand kReg_K7() 3668 %{ 3669 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3670 match(RegVectMask); 3671 format %{%} 3672 interface(REG_INTER); 3673 %} 3674 3675 // Register Operands 3676 // Integer Register 3677 operand rRegI() %{ 3678 constraint(ALLOC_IN_RC(int_reg)); 3679 match(RegI); 3680 match(xRegI); 3681 match(eAXRegI); 3682 match(eBXRegI); 3683 match(eCXRegI); 3684 match(eDXRegI); 3685 match(eDIRegI); 3686 match(eSIRegI); 3687 3688 format %{ %} 3689 interface(REG_INTER); 3690 %} 3691 3692 // Subset of Integer Register 3693 operand xRegI(rRegI reg) %{ 3694 constraint(ALLOC_IN_RC(int_x_reg)); 3695 match(reg); 3696 match(eAXRegI); 3697 match(eBXRegI); 3698 match(eCXRegI); 3699 match(eDXRegI); 3700 3701 format %{ %} 3702 interface(REG_INTER); 3703 %} 3704 3705 // Special Registers 3706 operand eAXRegI(xRegI reg) %{ 3707 constraint(ALLOC_IN_RC(eax_reg)); 3708 match(reg); 3709 match(rRegI); 3710 3711 format %{ "EAX" %} 3712 interface(REG_INTER); 3713 %} 3714 3715 // Special Registers 3716 operand eBXRegI(xRegI reg) %{ 3717 constraint(ALLOC_IN_RC(ebx_reg)); 3718 match(reg); 3719 match(rRegI); 3720 3721 format %{ "EBX" %} 3722 interface(REG_INTER); 3723 %} 3724 3725 operand eCXRegI(xRegI reg) %{ 3726 constraint(ALLOC_IN_RC(ecx_reg)); 3727 match(reg); 3728 match(rRegI); 3729 3730 format %{ "ECX" %} 3731 interface(REG_INTER); 3732 %} 3733 3734 operand eDXRegI(xRegI reg) %{ 3735 constraint(ALLOC_IN_RC(edx_reg)); 3736 match(reg); 3737 match(rRegI); 3738 3739 format %{ "EDX" %} 3740 interface(REG_INTER); 3741 %} 3742 3743 operand eDIRegI(xRegI reg) %{ 3744 constraint(ALLOC_IN_RC(edi_reg)); 3745 match(reg); 3746 match(rRegI); 3747 3748 format %{ "EDI" %} 3749 interface(REG_INTER); 3750 %} 3751 3752 operand naxRegI() %{ 3753 constraint(ALLOC_IN_RC(nax_reg)); 3754 match(RegI); 3755 match(eCXRegI); 3756 match(eDXRegI); 3757 match(eSIRegI); 3758 match(eDIRegI); 3759 3760 format %{ %} 3761 interface(REG_INTER); 3762 %} 3763 3764 operand nadxRegI() %{ 3765 constraint(ALLOC_IN_RC(nadx_reg)); 3766 match(RegI); 3767 match(eBXRegI); 3768 match(eCXRegI); 3769 match(eSIRegI); 3770 match(eDIRegI); 3771 3772 format %{ %} 3773 interface(REG_INTER); 3774 %} 3775 3776 operand ncxRegI() %{ 3777 constraint(ALLOC_IN_RC(ncx_reg)); 3778 match(RegI); 3779 match(eAXRegI); 3780 match(eDXRegI); 3781 match(eSIRegI); 3782 match(eDIRegI); 3783 3784 format %{ %} 3785 interface(REG_INTER); 3786 %} 3787 3788 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3789 // // 3790 operand eSIRegI(xRegI reg) %{ 3791 constraint(ALLOC_IN_RC(esi_reg)); 3792 match(reg); 3793 match(rRegI); 3794 3795 format %{ "ESI" %} 3796 interface(REG_INTER); 3797 %} 3798 3799 // Pointer Register 3800 operand anyRegP() %{ 3801 constraint(ALLOC_IN_RC(any_reg)); 3802 match(RegP); 3803 match(eAXRegP); 3804 match(eBXRegP); 3805 match(eCXRegP); 3806 match(eDIRegP); 3807 match(eRegP); 3808 3809 format %{ %} 3810 interface(REG_INTER); 3811 %} 3812 3813 operand eRegP() %{ 3814 constraint(ALLOC_IN_RC(int_reg)); 3815 match(RegP); 3816 match(eAXRegP); 3817 match(eBXRegP); 3818 match(eCXRegP); 3819 match(eDIRegP); 3820 3821 format %{ %} 3822 interface(REG_INTER); 3823 %} 3824 3825 operand rRegP() %{ 3826 constraint(ALLOC_IN_RC(int_reg)); 3827 match(RegP); 3828 match(eAXRegP); 3829 match(eBXRegP); 3830 match(eCXRegP); 3831 match(eDIRegP); 3832 3833 format %{ %} 3834 interface(REG_INTER); 3835 %} 3836 3837 // On windows95, EBP is not safe to use for implicit null tests. 3838 operand eRegP_no_EBP() %{ 3839 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3840 match(RegP); 3841 match(eAXRegP); 3842 match(eBXRegP); 3843 match(eCXRegP); 3844 match(eDIRegP); 3845 3846 op_cost(100); 3847 format %{ %} 3848 interface(REG_INTER); 3849 %} 3850 3851 operand naxRegP() %{ 3852 constraint(ALLOC_IN_RC(nax_reg)); 3853 match(RegP); 3854 match(eBXRegP); 3855 match(eDXRegP); 3856 match(eCXRegP); 3857 match(eSIRegP); 3858 match(eDIRegP); 3859 3860 format %{ %} 3861 interface(REG_INTER); 3862 %} 3863 3864 operand nabxRegP() %{ 3865 constraint(ALLOC_IN_RC(nabx_reg)); 3866 match(RegP); 3867 match(eCXRegP); 3868 match(eDXRegP); 3869 match(eSIRegP); 3870 match(eDIRegP); 3871 3872 format %{ %} 3873 interface(REG_INTER); 3874 %} 3875 3876 operand pRegP() %{ 3877 constraint(ALLOC_IN_RC(p_reg)); 3878 match(RegP); 3879 match(eBXRegP); 3880 match(eDXRegP); 3881 match(eSIRegP); 3882 match(eDIRegP); 3883 3884 format %{ %} 3885 interface(REG_INTER); 3886 %} 3887 3888 // Special Registers 3889 // Return a pointer value 3890 operand eAXRegP(eRegP reg) %{ 3891 constraint(ALLOC_IN_RC(eax_reg)); 3892 match(reg); 3893 format %{ "EAX" %} 3894 interface(REG_INTER); 3895 %} 3896 3897 // Used in AtomicAdd 3898 operand eBXRegP(eRegP reg) %{ 3899 constraint(ALLOC_IN_RC(ebx_reg)); 3900 match(reg); 3901 format %{ "EBX" %} 3902 interface(REG_INTER); 3903 %} 3904 3905 // Tail-call (interprocedural jump) to interpreter 3906 operand eCXRegP(eRegP reg) %{ 3907 constraint(ALLOC_IN_RC(ecx_reg)); 3908 match(reg); 3909 format %{ "ECX" %} 3910 interface(REG_INTER); 3911 %} 3912 3913 operand eDXRegP(eRegP reg) %{ 3914 constraint(ALLOC_IN_RC(edx_reg)); 3915 match(reg); 3916 format %{ "EDX" %} 3917 interface(REG_INTER); 3918 %} 3919 3920 operand eSIRegP(eRegP reg) %{ 3921 constraint(ALLOC_IN_RC(esi_reg)); 3922 match(reg); 3923 format %{ "ESI" %} 3924 interface(REG_INTER); 3925 %} 3926 3927 // Used in rep stosw 3928 operand eDIRegP(eRegP reg) %{ 3929 constraint(ALLOC_IN_RC(edi_reg)); 3930 match(reg); 3931 format %{ "EDI" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 operand eRegL() %{ 3936 constraint(ALLOC_IN_RC(long_reg)); 3937 match(RegL); 3938 match(eADXRegL); 3939 3940 format %{ %} 3941 interface(REG_INTER); 3942 %} 3943 3944 operand eADXRegL( eRegL reg ) %{ 3945 constraint(ALLOC_IN_RC(eadx_reg)); 3946 match(reg); 3947 3948 format %{ "EDX:EAX" %} 3949 interface(REG_INTER); 3950 %} 3951 3952 operand eBCXRegL( eRegL reg ) %{ 3953 constraint(ALLOC_IN_RC(ebcx_reg)); 3954 match(reg); 3955 3956 format %{ "EBX:ECX" %} 3957 interface(REG_INTER); 3958 %} 3959 3960 operand eBDPRegL( eRegL reg ) %{ 3961 constraint(ALLOC_IN_RC(ebpd_reg)); 3962 match(reg); 3963 3964 format %{ "EBP:EDI" %} 3965 interface(REG_INTER); 3966 %} 3967 // Special case for integer high multiply 3968 operand eADXRegL_low_only() %{ 3969 constraint(ALLOC_IN_RC(eadx_reg)); 3970 match(RegL); 3971 3972 format %{ "EAX" %} 3973 interface(REG_INTER); 3974 %} 3975 3976 // Flags register, used as output of compare instructions 3977 operand rFlagsReg() %{ 3978 constraint(ALLOC_IN_RC(int_flags)); 3979 match(RegFlags); 3980 3981 format %{ "EFLAGS" %} 3982 interface(REG_INTER); 3983 %} 3984 3985 // Flags register, used as output of compare instructions 3986 operand eFlagsReg() %{ 3987 constraint(ALLOC_IN_RC(int_flags)); 3988 match(RegFlags); 3989 3990 format %{ "EFLAGS" %} 3991 interface(REG_INTER); 3992 %} 3993 3994 // Flags register, used as output of FLOATING POINT compare instructions 3995 operand eFlagsRegU() %{ 3996 constraint(ALLOC_IN_RC(int_flags)); 3997 match(RegFlags); 3998 3999 format %{ "EFLAGS_U" %} 4000 interface(REG_INTER); 4001 %} 4002 4003 operand eFlagsRegUCF() %{ 4004 constraint(ALLOC_IN_RC(int_flags)); 4005 match(RegFlags); 4006 predicate(false); 4007 4008 format %{ "EFLAGS_U_CF" %} 4009 interface(REG_INTER); 4010 %} 4011 4012 // Condition Code Register used by long compare 4013 operand flagsReg_long_LTGE() %{ 4014 constraint(ALLOC_IN_RC(int_flags)); 4015 match(RegFlags); 4016 format %{ "FLAGS_LTGE" %} 4017 interface(REG_INTER); 4018 %} 4019 operand flagsReg_long_EQNE() %{ 4020 constraint(ALLOC_IN_RC(int_flags)); 4021 match(RegFlags); 4022 format %{ "FLAGS_EQNE" %} 4023 interface(REG_INTER); 4024 %} 4025 operand flagsReg_long_LEGT() %{ 4026 constraint(ALLOC_IN_RC(int_flags)); 4027 match(RegFlags); 4028 format %{ "FLAGS_LEGT" %} 4029 interface(REG_INTER); 4030 %} 4031 4032 // Condition Code Register used by unsigned long compare 4033 operand flagsReg_ulong_LTGE() %{ 4034 constraint(ALLOC_IN_RC(int_flags)); 4035 match(RegFlags); 4036 format %{ "FLAGS_U_LTGE" %} 4037 interface(REG_INTER); 4038 %} 4039 operand flagsReg_ulong_EQNE() %{ 4040 constraint(ALLOC_IN_RC(int_flags)); 4041 match(RegFlags); 4042 format %{ "FLAGS_U_EQNE" %} 4043 interface(REG_INTER); 4044 %} 4045 operand flagsReg_ulong_LEGT() %{ 4046 constraint(ALLOC_IN_RC(int_flags)); 4047 match(RegFlags); 4048 format %{ "FLAGS_U_LEGT" %} 4049 interface(REG_INTER); 4050 %} 4051 4052 // Float register operands 4053 operand regDPR() %{ 4054 predicate( UseSSE < 2 ); 4055 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4056 match(RegD); 4057 match(regDPR1); 4058 match(regDPR2); 4059 format %{ %} 4060 interface(REG_INTER); 4061 %} 4062 4063 operand regDPR1(regDPR reg) %{ 4064 predicate( UseSSE < 2 ); 4065 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4066 match(reg); 4067 format %{ "FPR1" %} 4068 interface(REG_INTER); 4069 %} 4070 4071 operand regDPR2(regDPR reg) %{ 4072 predicate( UseSSE < 2 ); 4073 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4074 match(reg); 4075 format %{ "FPR2" %} 4076 interface(REG_INTER); 4077 %} 4078 4079 operand regnotDPR1(regDPR reg) %{ 4080 predicate( UseSSE < 2 ); 4081 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4082 match(reg); 4083 format %{ %} 4084 interface(REG_INTER); 4085 %} 4086 4087 // Float register operands 4088 operand regFPR() %{ 4089 predicate( UseSSE < 2 ); 4090 constraint(ALLOC_IN_RC(fp_flt_reg)); 4091 match(RegF); 4092 match(regFPR1); 4093 format %{ %} 4094 interface(REG_INTER); 4095 %} 4096 4097 // Float register operands 4098 operand regFPR1(regFPR reg) %{ 4099 predicate( UseSSE < 2 ); 4100 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4101 match(reg); 4102 format %{ "FPR1" %} 4103 interface(REG_INTER); 4104 %} 4105 4106 // XMM Float register operands 4107 operand regF() %{ 4108 predicate( UseSSE>=1 ); 4109 constraint(ALLOC_IN_RC(float_reg_legacy)); 4110 match(RegF); 4111 format %{ %} 4112 interface(REG_INTER); 4113 %} 4114 4115 operand legRegF() %{ 4116 predicate( UseSSE>=1 ); 4117 constraint(ALLOC_IN_RC(float_reg_legacy)); 4118 match(RegF); 4119 format %{ %} 4120 interface(REG_INTER); 4121 %} 4122 4123 // Float register operands 4124 operand vlRegF() %{ 4125 constraint(ALLOC_IN_RC(float_reg_vl)); 4126 match(RegF); 4127 4128 format %{ %} 4129 interface(REG_INTER); 4130 %} 4131 4132 // XMM Double register operands 4133 operand regD() %{ 4134 predicate( UseSSE>=2 ); 4135 constraint(ALLOC_IN_RC(double_reg_legacy)); 4136 match(RegD); 4137 format %{ %} 4138 interface(REG_INTER); 4139 %} 4140 4141 // Double register operands 4142 operand legRegD() %{ 4143 predicate( UseSSE>=2 ); 4144 constraint(ALLOC_IN_RC(double_reg_legacy)); 4145 match(RegD); 4146 format %{ %} 4147 interface(REG_INTER); 4148 %} 4149 4150 operand vlRegD() %{ 4151 constraint(ALLOC_IN_RC(double_reg_vl)); 4152 match(RegD); 4153 4154 format %{ %} 4155 interface(REG_INTER); 4156 %} 4157 4158 //----------Memory Operands---------------------------------------------------- 4159 // Direct Memory Operand 4160 operand direct(immP addr) %{ 4161 match(addr); 4162 4163 format %{ "[$addr]" %} 4164 interface(MEMORY_INTER) %{ 4165 base(0xFFFFFFFF); 4166 index(0x4); 4167 scale(0x0); 4168 disp($addr); 4169 %} 4170 %} 4171 4172 // Indirect Memory Operand 4173 operand indirect(eRegP reg) %{ 4174 constraint(ALLOC_IN_RC(int_reg)); 4175 match(reg); 4176 4177 format %{ "[$reg]" %} 4178 interface(MEMORY_INTER) %{ 4179 base($reg); 4180 index(0x4); 4181 scale(0x0); 4182 disp(0x0); 4183 %} 4184 %} 4185 4186 // Indirect Memory Plus Short Offset Operand 4187 operand indOffset8(eRegP reg, immI8 off) %{ 4188 match(AddP reg off); 4189 4190 format %{ "[$reg + $off]" %} 4191 interface(MEMORY_INTER) %{ 4192 base($reg); 4193 index(0x4); 4194 scale(0x0); 4195 disp($off); 4196 %} 4197 %} 4198 4199 // Indirect Memory Plus Long Offset Operand 4200 operand indOffset32(eRegP reg, immI off) %{ 4201 match(AddP reg off); 4202 4203 format %{ "[$reg + $off]" %} 4204 interface(MEMORY_INTER) %{ 4205 base($reg); 4206 index(0x4); 4207 scale(0x0); 4208 disp($off); 4209 %} 4210 %} 4211 4212 // Indirect Memory Plus Long Offset Operand 4213 operand indOffset32X(rRegI reg, immP off) %{ 4214 match(AddP off reg); 4215 4216 format %{ "[$reg + $off]" %} 4217 interface(MEMORY_INTER) %{ 4218 base($reg); 4219 index(0x4); 4220 scale(0x0); 4221 disp($off); 4222 %} 4223 %} 4224 4225 // Indirect Memory Plus Index Register Plus Offset Operand 4226 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4227 match(AddP (AddP reg ireg) off); 4228 4229 op_cost(10); 4230 format %{"[$reg + $off + $ireg]" %} 4231 interface(MEMORY_INTER) %{ 4232 base($reg); 4233 index($ireg); 4234 scale(0x0); 4235 disp($off); 4236 %} 4237 %} 4238 4239 // Indirect Memory Plus Index Register Plus Offset Operand 4240 operand indIndex(eRegP reg, rRegI ireg) %{ 4241 match(AddP reg ireg); 4242 4243 op_cost(10); 4244 format %{"[$reg + $ireg]" %} 4245 interface(MEMORY_INTER) %{ 4246 base($reg); 4247 index($ireg); 4248 scale(0x0); 4249 disp(0x0); 4250 %} 4251 %} 4252 4253 // // ------------------------------------------------------------------------- 4254 // // 486 architecture doesn't support "scale * index + offset" with out a base 4255 // // ------------------------------------------------------------------------- 4256 // // Scaled Memory Operands 4257 // // Indirect Memory Times Scale Plus Offset Operand 4258 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4259 // match(AddP off (LShiftI ireg scale)); 4260 // 4261 // op_cost(10); 4262 // format %{"[$off + $ireg << $scale]" %} 4263 // interface(MEMORY_INTER) %{ 4264 // base(0x4); 4265 // index($ireg); 4266 // scale($scale); 4267 // disp($off); 4268 // %} 4269 // %} 4270 4271 // Indirect Memory Times Scale Plus Index Register 4272 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4273 match(AddP reg (LShiftI ireg scale)); 4274 4275 op_cost(10); 4276 format %{"[$reg + $ireg << $scale]" %} 4277 interface(MEMORY_INTER) %{ 4278 base($reg); 4279 index($ireg); 4280 scale($scale); 4281 disp(0x0); 4282 %} 4283 %} 4284 4285 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4286 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4287 match(AddP (AddP reg (LShiftI ireg scale)) off); 4288 4289 op_cost(10); 4290 format %{"[$reg + $off + $ireg << $scale]" %} 4291 interface(MEMORY_INTER) %{ 4292 base($reg); 4293 index($ireg); 4294 scale($scale); 4295 disp($off); 4296 %} 4297 %} 4298 4299 //----------Load Long Memory Operands------------------------------------------ 4300 // The load-long idiom will use it's address expression again after loading 4301 // the first word of the long. If the load-long destination overlaps with 4302 // registers used in the addressing expression, the 2nd half will be loaded 4303 // from a clobbered address. Fix this by requiring that load-long use 4304 // address registers that do not overlap with the load-long target. 4305 4306 // load-long support 4307 operand load_long_RegP() %{ 4308 constraint(ALLOC_IN_RC(esi_reg)); 4309 match(RegP); 4310 match(eSIRegP); 4311 op_cost(100); 4312 format %{ %} 4313 interface(REG_INTER); 4314 %} 4315 4316 // Indirect Memory Operand Long 4317 operand load_long_indirect(load_long_RegP reg) %{ 4318 constraint(ALLOC_IN_RC(esi_reg)); 4319 match(reg); 4320 4321 format %{ "[$reg]" %} 4322 interface(MEMORY_INTER) %{ 4323 base($reg); 4324 index(0x4); 4325 scale(0x0); 4326 disp(0x0); 4327 %} 4328 %} 4329 4330 // Indirect Memory Plus Long Offset Operand 4331 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4332 match(AddP reg off); 4333 4334 format %{ "[$reg + $off]" %} 4335 interface(MEMORY_INTER) %{ 4336 base($reg); 4337 index(0x4); 4338 scale(0x0); 4339 disp($off); 4340 %} 4341 %} 4342 4343 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4344 4345 4346 //----------Special Memory Operands-------------------------------------------- 4347 // Stack Slot Operand - This operand is used for loading and storing temporary 4348 // values on the stack where a match requires a value to 4349 // flow through memory. 4350 operand stackSlotP(sRegP reg) %{ 4351 constraint(ALLOC_IN_RC(stack_slots)); 4352 // No match rule because this operand is only generated in matching 4353 format %{ "[$reg]" %} 4354 interface(MEMORY_INTER) %{ 4355 base(0x4); // ESP 4356 index(0x4); // No Index 4357 scale(0x0); // No Scale 4358 disp($reg); // Stack Offset 4359 %} 4360 %} 4361 4362 operand stackSlotI(sRegI reg) %{ 4363 constraint(ALLOC_IN_RC(stack_slots)); 4364 // No match rule because this operand is only generated in matching 4365 format %{ "[$reg]" %} 4366 interface(MEMORY_INTER) %{ 4367 base(0x4); // ESP 4368 index(0x4); // No Index 4369 scale(0x0); // No Scale 4370 disp($reg); // Stack Offset 4371 %} 4372 %} 4373 4374 operand stackSlotF(sRegF reg) %{ 4375 constraint(ALLOC_IN_RC(stack_slots)); 4376 // No match rule because this operand is only generated in matching 4377 format %{ "[$reg]" %} 4378 interface(MEMORY_INTER) %{ 4379 base(0x4); // ESP 4380 index(0x4); // No Index 4381 scale(0x0); // No Scale 4382 disp($reg); // Stack Offset 4383 %} 4384 %} 4385 4386 operand stackSlotD(sRegD reg) %{ 4387 constraint(ALLOC_IN_RC(stack_slots)); 4388 // No match rule because this operand is only generated in matching 4389 format %{ "[$reg]" %} 4390 interface(MEMORY_INTER) %{ 4391 base(0x4); // ESP 4392 index(0x4); // No Index 4393 scale(0x0); // No Scale 4394 disp($reg); // Stack Offset 4395 %} 4396 %} 4397 4398 operand stackSlotL(sRegL reg) %{ 4399 constraint(ALLOC_IN_RC(stack_slots)); 4400 // No match rule because this operand is only generated in matching 4401 format %{ "[$reg]" %} 4402 interface(MEMORY_INTER) %{ 4403 base(0x4); // ESP 4404 index(0x4); // No Index 4405 scale(0x0); // No Scale 4406 disp($reg); // Stack Offset 4407 %} 4408 %} 4409 4410 //----------Conditional Branch Operands---------------------------------------- 4411 // Comparison Op - This is the operation of the comparison, and is limited to 4412 // the following set of codes: 4413 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4414 // 4415 // Other attributes of the comparison, such as unsignedness, are specified 4416 // by the comparison instruction that sets a condition code flags register. 4417 // That result is represented by a flags operand whose subtype is appropriate 4418 // to the unsignedness (etc.) of the comparison. 4419 // 4420 // Later, the instruction which matches both the Comparison Op (a Bool) and 4421 // the flags (produced by the Cmp) specifies the coding of the comparison op 4422 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4423 4424 // Comparison Code 4425 operand cmpOp() %{ 4426 match(Bool); 4427 4428 format %{ "" %} 4429 interface(COND_INTER) %{ 4430 equal(0x4, "e"); 4431 not_equal(0x5, "ne"); 4432 less(0xC, "l"); 4433 greater_equal(0xD, "ge"); 4434 less_equal(0xE, "le"); 4435 greater(0xF, "g"); 4436 overflow(0x0, "o"); 4437 no_overflow(0x1, "no"); 4438 %} 4439 %} 4440 4441 // Comparison Code, unsigned compare. Used by FP also, with 4442 // C2 (unordered) turned into GT or LT already. The other bits 4443 // C0 and C3 are turned into Carry & Zero flags. 4444 operand cmpOpU() %{ 4445 match(Bool); 4446 4447 format %{ "" %} 4448 interface(COND_INTER) %{ 4449 equal(0x4, "e"); 4450 not_equal(0x5, "ne"); 4451 less(0x2, "b"); 4452 greater_equal(0x3, "nb"); 4453 less_equal(0x6, "be"); 4454 greater(0x7, "nbe"); 4455 overflow(0x0, "o"); 4456 no_overflow(0x1, "no"); 4457 %} 4458 %} 4459 4460 // Floating comparisons that don't require any fixup for the unordered case 4461 operand cmpOpUCF() %{ 4462 match(Bool); 4463 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4464 n->as_Bool()->_test._test == BoolTest::ge || 4465 n->as_Bool()->_test._test == BoolTest::le || 4466 n->as_Bool()->_test._test == BoolTest::gt); 4467 format %{ "" %} 4468 interface(COND_INTER) %{ 4469 equal(0x4, "e"); 4470 not_equal(0x5, "ne"); 4471 less(0x2, "b"); 4472 greater_equal(0x3, "nb"); 4473 less_equal(0x6, "be"); 4474 greater(0x7, "nbe"); 4475 overflow(0x0, "o"); 4476 no_overflow(0x1, "no"); 4477 %} 4478 %} 4479 4480 4481 // Floating comparisons that can be fixed up with extra conditional jumps 4482 operand cmpOpUCF2() %{ 4483 match(Bool); 4484 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4485 n->as_Bool()->_test._test == BoolTest::eq); 4486 format %{ "" %} 4487 interface(COND_INTER) %{ 4488 equal(0x4, "e"); 4489 not_equal(0x5, "ne"); 4490 less(0x2, "b"); 4491 greater_equal(0x3, "nb"); 4492 less_equal(0x6, "be"); 4493 greater(0x7, "nbe"); 4494 overflow(0x0, "o"); 4495 no_overflow(0x1, "no"); 4496 %} 4497 %} 4498 4499 // Comparison Code for FP conditional move 4500 operand cmpOp_fcmov() %{ 4501 match(Bool); 4502 4503 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4504 n->as_Bool()->_test._test != BoolTest::no_overflow); 4505 format %{ "" %} 4506 interface(COND_INTER) %{ 4507 equal (0x0C8); 4508 not_equal (0x1C8); 4509 less (0x0C0); 4510 greater_equal(0x1C0); 4511 less_equal (0x0D0); 4512 greater (0x1D0); 4513 overflow(0x0, "o"); // not really supported by the instruction 4514 no_overflow(0x1, "no"); // not really supported by the instruction 4515 %} 4516 %} 4517 4518 // Comparison Code used in long compares 4519 operand cmpOp_commute() %{ 4520 match(Bool); 4521 4522 format %{ "" %} 4523 interface(COND_INTER) %{ 4524 equal(0x4, "e"); 4525 not_equal(0x5, "ne"); 4526 less(0xF, "g"); 4527 greater_equal(0xE, "le"); 4528 less_equal(0xD, "ge"); 4529 greater(0xC, "l"); 4530 overflow(0x0, "o"); 4531 no_overflow(0x1, "no"); 4532 %} 4533 %} 4534 4535 // Comparison Code used in unsigned long compares 4536 operand cmpOpU_commute() %{ 4537 match(Bool); 4538 4539 format %{ "" %} 4540 interface(COND_INTER) %{ 4541 equal(0x4, "e"); 4542 not_equal(0x5, "ne"); 4543 less(0x7, "nbe"); 4544 greater_equal(0x6, "be"); 4545 less_equal(0x3, "nb"); 4546 greater(0x2, "b"); 4547 overflow(0x0, "o"); 4548 no_overflow(0x1, "no"); 4549 %} 4550 %} 4551 4552 //----------OPERAND CLASSES---------------------------------------------------- 4553 // Operand Classes are groups of operands that are used as to simplify 4554 // instruction definitions by not requiring the AD writer to specify separate 4555 // instructions for every form of operand when the instruction accepts 4556 // multiple operand types with the same basic encoding and format. The classic 4557 // case of this is memory operands. 4558 4559 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4560 indIndex, indIndexScale, indIndexScaleOffset); 4561 4562 // Long memory operations are encoded in 2 instructions and a +4 offset. 4563 // This means some kind of offset is always required and you cannot use 4564 // an oop as the offset (done when working on static globals). 4565 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4566 indIndex, indIndexScale, indIndexScaleOffset); 4567 4568 4569 //----------PIPELINE----------------------------------------------------------- 4570 // Rules which define the behavior of the target architectures pipeline. 4571 pipeline %{ 4572 4573 //----------ATTRIBUTES--------------------------------------------------------- 4574 attributes %{ 4575 variable_size_instructions; // Fixed size instructions 4576 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4577 instruction_unit_size = 1; // An instruction is 1 bytes long 4578 instruction_fetch_unit_size = 16; // The processor fetches one line 4579 instruction_fetch_units = 1; // of 16 bytes 4580 4581 // List of nop instructions 4582 nops( MachNop ); 4583 %} 4584 4585 //----------RESOURCES---------------------------------------------------------- 4586 // Resources are the functional units available to the machine 4587 4588 // Generic P2/P3 pipeline 4589 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4590 // 3 instructions decoded per cycle. 4591 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4592 // 2 ALU op, only ALU0 handles mul/div instructions. 4593 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4594 MS0, MS1, MEM = MS0 | MS1, 4595 BR, FPU, 4596 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4597 4598 //----------PIPELINE DESCRIPTION----------------------------------------------- 4599 // Pipeline Description specifies the stages in the machine's pipeline 4600 4601 // Generic P2/P3 pipeline 4602 pipe_desc(S0, S1, S2, S3, S4, S5); 4603 4604 //----------PIPELINE CLASSES--------------------------------------------------- 4605 // Pipeline Classes describe the stages in which input and output are 4606 // referenced by the hardware pipeline. 4607 4608 // Naming convention: ialu or fpu 4609 // Then: _reg 4610 // Then: _reg if there is a 2nd register 4611 // Then: _long if it's a pair of instructions implementing a long 4612 // Then: _fat if it requires the big decoder 4613 // Or: _mem if it requires the big decoder and a memory unit. 4614 4615 // Integer ALU reg operation 4616 pipe_class ialu_reg(rRegI dst) %{ 4617 single_instruction; 4618 dst : S4(write); 4619 dst : S3(read); 4620 DECODE : S0; // any decoder 4621 ALU : S3; // any alu 4622 %} 4623 4624 // Long ALU reg operation 4625 pipe_class ialu_reg_long(eRegL dst) %{ 4626 instruction_count(2); 4627 dst : S4(write); 4628 dst : S3(read); 4629 DECODE : S0(2); // any 2 decoders 4630 ALU : S3(2); // both alus 4631 %} 4632 4633 // Integer ALU reg operation using big decoder 4634 pipe_class ialu_reg_fat(rRegI dst) %{ 4635 single_instruction; 4636 dst : S4(write); 4637 dst : S3(read); 4638 D0 : S0; // big decoder only 4639 ALU : S3; // any alu 4640 %} 4641 4642 // Long ALU reg operation using big decoder 4643 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4644 instruction_count(2); 4645 dst : S4(write); 4646 dst : S3(read); 4647 D0 : S0(2); // big decoder only; twice 4648 ALU : S3(2); // any 2 alus 4649 %} 4650 4651 // Integer ALU reg-reg operation 4652 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4653 single_instruction; 4654 dst : S4(write); 4655 src : S3(read); 4656 DECODE : S0; // any decoder 4657 ALU : S3; // any alu 4658 %} 4659 4660 // Long ALU reg-reg operation 4661 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4662 instruction_count(2); 4663 dst : S4(write); 4664 src : S3(read); 4665 DECODE : S0(2); // any 2 decoders 4666 ALU : S3(2); // both alus 4667 %} 4668 4669 // Integer ALU reg-reg operation 4670 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4671 single_instruction; 4672 dst : S4(write); 4673 src : S3(read); 4674 D0 : S0; // big decoder only 4675 ALU : S3; // any alu 4676 %} 4677 4678 // Long ALU reg-reg operation 4679 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4680 instruction_count(2); 4681 dst : S4(write); 4682 src : S3(read); 4683 D0 : S0(2); // big decoder only; twice 4684 ALU : S3(2); // both alus 4685 %} 4686 4687 // Integer ALU reg-mem operation 4688 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4689 single_instruction; 4690 dst : S5(write); 4691 mem : S3(read); 4692 D0 : S0; // big decoder only 4693 ALU : S4; // any alu 4694 MEM : S3; // any mem 4695 %} 4696 4697 // Long ALU reg-mem operation 4698 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4699 instruction_count(2); 4700 dst : S5(write); 4701 mem : S3(read); 4702 D0 : S0(2); // big decoder only; twice 4703 ALU : S4(2); // any 2 alus 4704 MEM : S3(2); // both mems 4705 %} 4706 4707 // Integer mem operation (prefetch) 4708 pipe_class ialu_mem(memory mem) 4709 %{ 4710 single_instruction; 4711 mem : S3(read); 4712 D0 : S0; // big decoder only 4713 MEM : S3; // any mem 4714 %} 4715 4716 // Integer Store to Memory 4717 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4718 single_instruction; 4719 mem : S3(read); 4720 src : S5(read); 4721 D0 : S0; // big decoder only 4722 ALU : S4; // any alu 4723 MEM : S3; 4724 %} 4725 4726 // Long Store to Memory 4727 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4728 instruction_count(2); 4729 mem : S3(read); 4730 src : S5(read); 4731 D0 : S0(2); // big decoder only; twice 4732 ALU : S4(2); // any 2 alus 4733 MEM : S3(2); // Both mems 4734 %} 4735 4736 // Integer Store to Memory 4737 pipe_class ialu_mem_imm(memory mem) %{ 4738 single_instruction; 4739 mem : S3(read); 4740 D0 : S0; // big decoder only 4741 ALU : S4; // any alu 4742 MEM : S3; 4743 %} 4744 4745 // Integer ALU0 reg-reg operation 4746 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4747 single_instruction; 4748 dst : S4(write); 4749 src : S3(read); 4750 D0 : S0; // Big decoder only 4751 ALU0 : S3; // only alu0 4752 %} 4753 4754 // Integer ALU0 reg-mem operation 4755 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4756 single_instruction; 4757 dst : S5(write); 4758 mem : S3(read); 4759 D0 : S0; // big decoder only 4760 ALU0 : S4; // ALU0 only 4761 MEM : S3; // any mem 4762 %} 4763 4764 // Integer ALU reg-reg operation 4765 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4766 single_instruction; 4767 cr : S4(write); 4768 src1 : S3(read); 4769 src2 : S3(read); 4770 DECODE : S0; // any decoder 4771 ALU : S3; // any alu 4772 %} 4773 4774 // Integer ALU reg-imm operation 4775 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4776 single_instruction; 4777 cr : S4(write); 4778 src1 : S3(read); 4779 DECODE : S0; // any decoder 4780 ALU : S3; // any alu 4781 %} 4782 4783 // Integer ALU reg-mem operation 4784 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4785 single_instruction; 4786 cr : S4(write); 4787 src1 : S3(read); 4788 src2 : S3(read); 4789 D0 : S0; // big decoder only 4790 ALU : S4; // any alu 4791 MEM : S3; 4792 %} 4793 4794 // Conditional move reg-reg 4795 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4796 instruction_count(4); 4797 y : S4(read); 4798 q : S3(read); 4799 p : S3(read); 4800 DECODE : S0(4); // any decoder 4801 %} 4802 4803 // Conditional move reg-reg 4804 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4805 single_instruction; 4806 dst : S4(write); 4807 src : S3(read); 4808 cr : S3(read); 4809 DECODE : S0; // any decoder 4810 %} 4811 4812 // Conditional move reg-mem 4813 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4814 single_instruction; 4815 dst : S4(write); 4816 src : S3(read); 4817 cr : S3(read); 4818 DECODE : S0; // any decoder 4819 MEM : S3; 4820 %} 4821 4822 // Conditional move reg-reg long 4823 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4824 single_instruction; 4825 dst : S4(write); 4826 src : S3(read); 4827 cr : S3(read); 4828 DECODE : S0(2); // any 2 decoders 4829 %} 4830 4831 // Conditional move double reg-reg 4832 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4833 single_instruction; 4834 dst : S4(write); 4835 src : S3(read); 4836 cr : S3(read); 4837 DECODE : S0; // any decoder 4838 %} 4839 4840 // Float reg-reg operation 4841 pipe_class fpu_reg(regDPR dst) %{ 4842 instruction_count(2); 4843 dst : S3(read); 4844 DECODE : S0(2); // any 2 decoders 4845 FPU : S3; 4846 %} 4847 4848 // Float reg-reg operation 4849 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4850 instruction_count(2); 4851 dst : S4(write); 4852 src : S3(read); 4853 DECODE : S0(2); // any 2 decoders 4854 FPU : S3; 4855 %} 4856 4857 // Float reg-reg operation 4858 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4859 instruction_count(3); 4860 dst : S4(write); 4861 src1 : S3(read); 4862 src2 : S3(read); 4863 DECODE : S0(3); // any 3 decoders 4864 FPU : S3(2); 4865 %} 4866 4867 // Float reg-reg operation 4868 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4869 instruction_count(4); 4870 dst : S4(write); 4871 src1 : S3(read); 4872 src2 : S3(read); 4873 src3 : S3(read); 4874 DECODE : S0(4); // any 3 decoders 4875 FPU : S3(2); 4876 %} 4877 4878 // Float reg-reg operation 4879 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4880 instruction_count(4); 4881 dst : S4(write); 4882 src1 : S3(read); 4883 src2 : S3(read); 4884 src3 : S3(read); 4885 DECODE : S1(3); // any 3 decoders 4886 D0 : S0; // Big decoder only 4887 FPU : S3(2); 4888 MEM : S3; 4889 %} 4890 4891 // Float reg-mem operation 4892 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4893 instruction_count(2); 4894 dst : S5(write); 4895 mem : S3(read); 4896 D0 : S0; // big decoder only 4897 DECODE : S1; // any decoder for FPU POP 4898 FPU : S4; 4899 MEM : S3; // any mem 4900 %} 4901 4902 // Float reg-mem operation 4903 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4904 instruction_count(3); 4905 dst : S5(write); 4906 src1 : S3(read); 4907 mem : S3(read); 4908 D0 : S0; // big decoder only 4909 DECODE : S1(2); // any decoder for FPU POP 4910 FPU : S4; 4911 MEM : S3; // any mem 4912 %} 4913 4914 // Float mem-reg operation 4915 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4916 instruction_count(2); 4917 src : S5(read); 4918 mem : S3(read); 4919 DECODE : S0; // any decoder for FPU PUSH 4920 D0 : S1; // big decoder only 4921 FPU : S4; 4922 MEM : S3; // any mem 4923 %} 4924 4925 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4926 instruction_count(3); 4927 src1 : S3(read); 4928 src2 : S3(read); 4929 mem : S3(read); 4930 DECODE : S0(2); // any decoder for FPU PUSH 4931 D0 : S1; // big decoder only 4932 FPU : S4; 4933 MEM : S3; // any mem 4934 %} 4935 4936 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4937 instruction_count(3); 4938 src1 : S3(read); 4939 src2 : S3(read); 4940 mem : S4(read); 4941 DECODE : S0; // any decoder for FPU PUSH 4942 D0 : S0(2); // big decoder only 4943 FPU : S4; 4944 MEM : S3(2); // any mem 4945 %} 4946 4947 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4948 instruction_count(2); 4949 src1 : S3(read); 4950 dst : S4(read); 4951 D0 : S0(2); // big decoder only 4952 MEM : S3(2); // any mem 4953 %} 4954 4955 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4956 instruction_count(3); 4957 src1 : S3(read); 4958 src2 : S3(read); 4959 dst : S4(read); 4960 D0 : S0(3); // big decoder only 4961 FPU : S4; 4962 MEM : S3(3); // any mem 4963 %} 4964 4965 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4966 instruction_count(3); 4967 src1 : S4(read); 4968 mem : S4(read); 4969 DECODE : S0; // any decoder for FPU PUSH 4970 D0 : S0(2); // big decoder only 4971 FPU : S4; 4972 MEM : S3(2); // any mem 4973 %} 4974 4975 // Float load constant 4976 pipe_class fpu_reg_con(regDPR dst) %{ 4977 instruction_count(2); 4978 dst : S5(write); 4979 D0 : S0; // big decoder only for the load 4980 DECODE : S1; // any decoder for FPU POP 4981 FPU : S4; 4982 MEM : S3; // any mem 4983 %} 4984 4985 // Float load constant 4986 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4987 instruction_count(3); 4988 dst : S5(write); 4989 src : S3(read); 4990 D0 : S0; // big decoder only for the load 4991 DECODE : S1(2); // any decoder for FPU POP 4992 FPU : S4; 4993 MEM : S3; // any mem 4994 %} 4995 4996 // UnConditional branch 4997 pipe_class pipe_jmp( label labl ) %{ 4998 single_instruction; 4999 BR : S3; 5000 %} 5001 5002 // Conditional branch 5003 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5004 single_instruction; 5005 cr : S1(read); 5006 BR : S3; 5007 %} 5008 5009 // Allocation idiom 5010 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5011 instruction_count(1); force_serialization; 5012 fixed_latency(6); 5013 heap_ptr : S3(read); 5014 DECODE : S0(3); 5015 D0 : S2; 5016 MEM : S3; 5017 ALU : S3(2); 5018 dst : S5(write); 5019 BR : S5; 5020 %} 5021 5022 // Generic big/slow expanded idiom 5023 pipe_class pipe_slow( ) %{ 5024 instruction_count(10); multiple_bundles; force_serialization; 5025 fixed_latency(100); 5026 D0 : S0(2); 5027 MEM : S3(2); 5028 %} 5029 5030 // The real do-nothing guy 5031 pipe_class empty( ) %{ 5032 instruction_count(0); 5033 %} 5034 5035 // Define the class for the Nop node 5036 define %{ 5037 MachNop = empty; 5038 %} 5039 5040 %} 5041 5042 //----------INSTRUCTIONS------------------------------------------------------- 5043 // 5044 // match -- States which machine-independent subtree may be replaced 5045 // by this instruction. 5046 // ins_cost -- The estimated cost of this instruction is used by instruction 5047 // selection to identify a minimum cost tree of machine 5048 // instructions that matches a tree of machine-independent 5049 // instructions. 5050 // format -- A string providing the disassembly for this instruction. 5051 // The value of an instruction's operand may be inserted 5052 // by referring to it with a '$' prefix. 5053 // opcode -- Three instruction opcodes may be provided. These are referred 5054 // to within an encode class as $primary, $secondary, and $tertiary 5055 // respectively. The primary opcode is commonly used to 5056 // indicate the type of machine instruction, while secondary 5057 // and tertiary are often used for prefix options or addressing 5058 // modes. 5059 // ins_encode -- A list of encode classes with parameters. The encode class 5060 // name must have been defined in an 'enc_class' specification 5061 // in the encode section of the architecture description. 5062 5063 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 5064 // Load Float 5065 instruct MoveF2LEG(legRegF dst, regF src) %{ 5066 match(Set dst src); 5067 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5068 ins_encode %{ 5069 ShouldNotReachHere(); 5070 %} 5071 ins_pipe( fpu_reg_reg ); 5072 %} 5073 5074 // Load Float 5075 instruct MoveLEG2F(regF dst, legRegF src) %{ 5076 match(Set dst src); 5077 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5078 ins_encode %{ 5079 ShouldNotReachHere(); 5080 %} 5081 ins_pipe( fpu_reg_reg ); 5082 %} 5083 5084 // Load Float 5085 instruct MoveF2VL(vlRegF dst, regF src) %{ 5086 match(Set dst src); 5087 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5088 ins_encode %{ 5089 ShouldNotReachHere(); 5090 %} 5091 ins_pipe( fpu_reg_reg ); 5092 %} 5093 5094 // Load Float 5095 instruct MoveVL2F(regF dst, vlRegF src) %{ 5096 match(Set dst src); 5097 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5098 ins_encode %{ 5099 ShouldNotReachHere(); 5100 %} 5101 ins_pipe( fpu_reg_reg ); 5102 %} 5103 5104 5105 5106 // Load Double 5107 instruct MoveD2LEG(legRegD dst, regD src) %{ 5108 match(Set dst src); 5109 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5110 ins_encode %{ 5111 ShouldNotReachHere(); 5112 %} 5113 ins_pipe( fpu_reg_reg ); 5114 %} 5115 5116 // Load Double 5117 instruct MoveLEG2D(regD dst, legRegD src) %{ 5118 match(Set dst src); 5119 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5120 ins_encode %{ 5121 ShouldNotReachHere(); 5122 %} 5123 ins_pipe( fpu_reg_reg ); 5124 %} 5125 5126 // Load Double 5127 instruct MoveD2VL(vlRegD dst, regD src) %{ 5128 match(Set dst src); 5129 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5130 ins_encode %{ 5131 ShouldNotReachHere(); 5132 %} 5133 ins_pipe( fpu_reg_reg ); 5134 %} 5135 5136 // Load Double 5137 instruct MoveVL2D(regD dst, vlRegD src) %{ 5138 match(Set dst src); 5139 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5140 ins_encode %{ 5141 ShouldNotReachHere(); 5142 %} 5143 ins_pipe( fpu_reg_reg ); 5144 %} 5145 5146 //----------BSWAP-Instruction-------------------------------------------------- 5147 instruct bytes_reverse_int(rRegI dst) %{ 5148 match(Set dst (ReverseBytesI dst)); 5149 5150 format %{ "BSWAP $dst" %} 5151 opcode(0x0F, 0xC8); 5152 ins_encode( OpcP, OpcSReg(dst) ); 5153 ins_pipe( ialu_reg ); 5154 %} 5155 5156 instruct bytes_reverse_long(eRegL dst) %{ 5157 match(Set dst (ReverseBytesL dst)); 5158 5159 format %{ "BSWAP $dst.lo\n\t" 5160 "BSWAP $dst.hi\n\t" 5161 "XCHG $dst.lo $dst.hi" %} 5162 5163 ins_cost(125); 5164 ins_encode( bswap_long_bytes(dst) ); 5165 ins_pipe( ialu_reg_reg); 5166 %} 5167 5168 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5169 match(Set dst (ReverseBytesUS dst)); 5170 effect(KILL cr); 5171 5172 format %{ "BSWAP $dst\n\t" 5173 "SHR $dst,16\n\t" %} 5174 ins_encode %{ 5175 __ bswapl($dst$$Register); 5176 __ shrl($dst$$Register, 16); 5177 %} 5178 ins_pipe( ialu_reg ); 5179 %} 5180 5181 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5182 match(Set dst (ReverseBytesS dst)); 5183 effect(KILL cr); 5184 5185 format %{ "BSWAP $dst\n\t" 5186 "SAR $dst,16\n\t" %} 5187 ins_encode %{ 5188 __ bswapl($dst$$Register); 5189 __ sarl($dst$$Register, 16); 5190 %} 5191 ins_pipe( ialu_reg ); 5192 %} 5193 5194 5195 //---------- Zeros Count Instructions ------------------------------------------ 5196 5197 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5198 predicate(UseCountLeadingZerosInstruction); 5199 match(Set dst (CountLeadingZerosI src)); 5200 effect(KILL cr); 5201 5202 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5203 ins_encode %{ 5204 __ lzcntl($dst$$Register, $src$$Register); 5205 %} 5206 ins_pipe(ialu_reg); 5207 %} 5208 5209 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5210 predicate(!UseCountLeadingZerosInstruction); 5211 match(Set dst (CountLeadingZerosI src)); 5212 effect(KILL cr); 5213 5214 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5215 "JNZ skip\n\t" 5216 "MOV $dst, -1\n" 5217 "skip:\n\t" 5218 "NEG $dst\n\t" 5219 "ADD $dst, 31" %} 5220 ins_encode %{ 5221 Register Rdst = $dst$$Register; 5222 Register Rsrc = $src$$Register; 5223 Label skip; 5224 __ bsrl(Rdst, Rsrc); 5225 __ jccb(Assembler::notZero, skip); 5226 __ movl(Rdst, -1); 5227 __ bind(skip); 5228 __ negl(Rdst); 5229 __ addl(Rdst, BitsPerInt - 1); 5230 %} 5231 ins_pipe(ialu_reg); 5232 %} 5233 5234 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5235 predicate(UseCountLeadingZerosInstruction); 5236 match(Set dst (CountLeadingZerosL src)); 5237 effect(TEMP dst, KILL cr); 5238 5239 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5240 "JNC done\n\t" 5241 "LZCNT $dst, $src.lo\n\t" 5242 "ADD $dst, 32\n" 5243 "done:" %} 5244 ins_encode %{ 5245 Register Rdst = $dst$$Register; 5246 Register Rsrc = $src$$Register; 5247 Label done; 5248 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5249 __ jccb(Assembler::carryClear, done); 5250 __ lzcntl(Rdst, Rsrc); 5251 __ addl(Rdst, BitsPerInt); 5252 __ bind(done); 5253 %} 5254 ins_pipe(ialu_reg); 5255 %} 5256 5257 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5258 predicate(!UseCountLeadingZerosInstruction); 5259 match(Set dst (CountLeadingZerosL src)); 5260 effect(TEMP dst, KILL cr); 5261 5262 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5263 "JZ msw_is_zero\n\t" 5264 "ADD $dst, 32\n\t" 5265 "JMP not_zero\n" 5266 "msw_is_zero:\n\t" 5267 "BSR $dst, $src.lo\n\t" 5268 "JNZ not_zero\n\t" 5269 "MOV $dst, -1\n" 5270 "not_zero:\n\t" 5271 "NEG $dst\n\t" 5272 "ADD $dst, 63\n" %} 5273 ins_encode %{ 5274 Register Rdst = $dst$$Register; 5275 Register Rsrc = $src$$Register; 5276 Label msw_is_zero; 5277 Label not_zero; 5278 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5279 __ jccb(Assembler::zero, msw_is_zero); 5280 __ addl(Rdst, BitsPerInt); 5281 __ jmpb(not_zero); 5282 __ bind(msw_is_zero); 5283 __ bsrl(Rdst, Rsrc); 5284 __ jccb(Assembler::notZero, not_zero); 5285 __ movl(Rdst, -1); 5286 __ bind(not_zero); 5287 __ negl(Rdst); 5288 __ addl(Rdst, BitsPerLong - 1); 5289 %} 5290 ins_pipe(ialu_reg); 5291 %} 5292 5293 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5294 predicate(UseCountTrailingZerosInstruction); 5295 match(Set dst (CountTrailingZerosI src)); 5296 effect(KILL cr); 5297 5298 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5299 ins_encode %{ 5300 __ tzcntl($dst$$Register, $src$$Register); 5301 %} 5302 ins_pipe(ialu_reg); 5303 %} 5304 5305 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5306 predicate(!UseCountTrailingZerosInstruction); 5307 match(Set dst (CountTrailingZerosI src)); 5308 effect(KILL cr); 5309 5310 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5311 "JNZ done\n\t" 5312 "MOV $dst, 32\n" 5313 "done:" %} 5314 ins_encode %{ 5315 Register Rdst = $dst$$Register; 5316 Label done; 5317 __ bsfl(Rdst, $src$$Register); 5318 __ jccb(Assembler::notZero, done); 5319 __ movl(Rdst, BitsPerInt); 5320 __ bind(done); 5321 %} 5322 ins_pipe(ialu_reg); 5323 %} 5324 5325 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5326 predicate(UseCountTrailingZerosInstruction); 5327 match(Set dst (CountTrailingZerosL src)); 5328 effect(TEMP dst, KILL cr); 5329 5330 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5331 "JNC done\n\t" 5332 "TZCNT $dst, $src.hi\n\t" 5333 "ADD $dst, 32\n" 5334 "done:" %} 5335 ins_encode %{ 5336 Register Rdst = $dst$$Register; 5337 Register Rsrc = $src$$Register; 5338 Label done; 5339 __ tzcntl(Rdst, Rsrc); 5340 __ jccb(Assembler::carryClear, done); 5341 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5342 __ addl(Rdst, BitsPerInt); 5343 __ bind(done); 5344 %} 5345 ins_pipe(ialu_reg); 5346 %} 5347 5348 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5349 predicate(!UseCountTrailingZerosInstruction); 5350 match(Set dst (CountTrailingZerosL src)); 5351 effect(TEMP dst, KILL cr); 5352 5353 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5354 "JNZ done\n\t" 5355 "BSF $dst, $src.hi\n\t" 5356 "JNZ msw_not_zero\n\t" 5357 "MOV $dst, 32\n" 5358 "msw_not_zero:\n\t" 5359 "ADD $dst, 32\n" 5360 "done:" %} 5361 ins_encode %{ 5362 Register Rdst = $dst$$Register; 5363 Register Rsrc = $src$$Register; 5364 Label msw_not_zero; 5365 Label done; 5366 __ bsfl(Rdst, Rsrc); 5367 __ jccb(Assembler::notZero, done); 5368 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5369 __ jccb(Assembler::notZero, msw_not_zero); 5370 __ movl(Rdst, BitsPerInt); 5371 __ bind(msw_not_zero); 5372 __ addl(Rdst, BitsPerInt); 5373 __ bind(done); 5374 %} 5375 ins_pipe(ialu_reg); 5376 %} 5377 5378 5379 //---------- Population Count Instructions ------------------------------------- 5380 5381 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5382 predicate(UsePopCountInstruction); 5383 match(Set dst (PopCountI src)); 5384 effect(KILL cr); 5385 5386 format %{ "POPCNT $dst, $src" %} 5387 ins_encode %{ 5388 __ popcntl($dst$$Register, $src$$Register); 5389 %} 5390 ins_pipe(ialu_reg); 5391 %} 5392 5393 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5394 predicate(UsePopCountInstruction); 5395 match(Set dst (PopCountI (LoadI mem))); 5396 effect(KILL cr); 5397 5398 format %{ "POPCNT $dst, $mem" %} 5399 ins_encode %{ 5400 __ popcntl($dst$$Register, $mem$$Address); 5401 %} 5402 ins_pipe(ialu_reg); 5403 %} 5404 5405 // Note: Long.bitCount(long) returns an int. 5406 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5407 predicate(UsePopCountInstruction); 5408 match(Set dst (PopCountL src)); 5409 effect(KILL cr, TEMP tmp, TEMP dst); 5410 5411 format %{ "POPCNT $dst, $src.lo\n\t" 5412 "POPCNT $tmp, $src.hi\n\t" 5413 "ADD $dst, $tmp" %} 5414 ins_encode %{ 5415 __ popcntl($dst$$Register, $src$$Register); 5416 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5417 __ addl($dst$$Register, $tmp$$Register); 5418 %} 5419 ins_pipe(ialu_reg); 5420 %} 5421 5422 // Note: Long.bitCount(long) returns an int. 5423 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5424 predicate(UsePopCountInstruction); 5425 match(Set dst (PopCountL (LoadL mem))); 5426 effect(KILL cr, TEMP tmp, TEMP dst); 5427 5428 format %{ "POPCNT $dst, $mem\n\t" 5429 "POPCNT $tmp, $mem+4\n\t" 5430 "ADD $dst, $tmp" %} 5431 ins_encode %{ 5432 //__ popcntl($dst$$Register, $mem$$Address$$first); 5433 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5434 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5435 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5436 __ addl($dst$$Register, $tmp$$Register); 5437 %} 5438 ins_pipe(ialu_reg); 5439 %} 5440 5441 5442 //----------Load/Store/Move Instructions--------------------------------------- 5443 //----------Load Instructions-------------------------------------------------- 5444 // Load Byte (8bit signed) 5445 instruct loadB(xRegI dst, memory mem) %{ 5446 match(Set dst (LoadB mem)); 5447 5448 ins_cost(125); 5449 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5450 5451 ins_encode %{ 5452 __ movsbl($dst$$Register, $mem$$Address); 5453 %} 5454 5455 ins_pipe(ialu_reg_mem); 5456 %} 5457 5458 // Load Byte (8bit signed) into Long Register 5459 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5460 match(Set dst (ConvI2L (LoadB mem))); 5461 effect(KILL cr); 5462 5463 ins_cost(375); 5464 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5465 "MOV $dst.hi,$dst.lo\n\t" 5466 "SAR $dst.hi,7" %} 5467 5468 ins_encode %{ 5469 __ movsbl($dst$$Register, $mem$$Address); 5470 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5471 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5472 %} 5473 5474 ins_pipe(ialu_reg_mem); 5475 %} 5476 5477 // Load Unsigned Byte (8bit UNsigned) 5478 instruct loadUB(xRegI dst, memory mem) %{ 5479 match(Set dst (LoadUB mem)); 5480 5481 ins_cost(125); 5482 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5483 5484 ins_encode %{ 5485 __ movzbl($dst$$Register, $mem$$Address); 5486 %} 5487 5488 ins_pipe(ialu_reg_mem); 5489 %} 5490 5491 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5492 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5493 match(Set dst (ConvI2L (LoadUB mem))); 5494 effect(KILL cr); 5495 5496 ins_cost(250); 5497 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5498 "XOR $dst.hi,$dst.hi" %} 5499 5500 ins_encode %{ 5501 Register Rdst = $dst$$Register; 5502 __ movzbl(Rdst, $mem$$Address); 5503 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5504 %} 5505 5506 ins_pipe(ialu_reg_mem); 5507 %} 5508 5509 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5510 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5511 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5512 effect(KILL cr); 5513 5514 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5515 "XOR $dst.hi,$dst.hi\n\t" 5516 "AND $dst.lo,right_n_bits($mask, 8)" %} 5517 ins_encode %{ 5518 Register Rdst = $dst$$Register; 5519 __ movzbl(Rdst, $mem$$Address); 5520 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5521 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5522 %} 5523 ins_pipe(ialu_reg_mem); 5524 %} 5525 5526 // Load Short (16bit signed) 5527 instruct loadS(rRegI dst, memory mem) %{ 5528 match(Set dst (LoadS mem)); 5529 5530 ins_cost(125); 5531 format %{ "MOVSX $dst,$mem\t# short" %} 5532 5533 ins_encode %{ 5534 __ movswl($dst$$Register, $mem$$Address); 5535 %} 5536 5537 ins_pipe(ialu_reg_mem); 5538 %} 5539 5540 // Load Short (16 bit signed) to Byte (8 bit signed) 5541 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5542 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5543 5544 ins_cost(125); 5545 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5546 ins_encode %{ 5547 __ movsbl($dst$$Register, $mem$$Address); 5548 %} 5549 ins_pipe(ialu_reg_mem); 5550 %} 5551 5552 // Load Short (16bit signed) into Long Register 5553 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5554 match(Set dst (ConvI2L (LoadS mem))); 5555 effect(KILL cr); 5556 5557 ins_cost(375); 5558 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5559 "MOV $dst.hi,$dst.lo\n\t" 5560 "SAR $dst.hi,15" %} 5561 5562 ins_encode %{ 5563 __ movswl($dst$$Register, $mem$$Address); 5564 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5565 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5566 %} 5567 5568 ins_pipe(ialu_reg_mem); 5569 %} 5570 5571 // Load Unsigned Short/Char (16bit unsigned) 5572 instruct loadUS(rRegI dst, memory mem) %{ 5573 match(Set dst (LoadUS mem)); 5574 5575 ins_cost(125); 5576 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5577 5578 ins_encode %{ 5579 __ movzwl($dst$$Register, $mem$$Address); 5580 %} 5581 5582 ins_pipe(ialu_reg_mem); 5583 %} 5584 5585 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5586 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5587 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5588 5589 ins_cost(125); 5590 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5591 ins_encode %{ 5592 __ movsbl($dst$$Register, $mem$$Address); 5593 %} 5594 ins_pipe(ialu_reg_mem); 5595 %} 5596 5597 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5598 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5599 match(Set dst (ConvI2L (LoadUS mem))); 5600 effect(KILL cr); 5601 5602 ins_cost(250); 5603 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5604 "XOR $dst.hi,$dst.hi" %} 5605 5606 ins_encode %{ 5607 __ movzwl($dst$$Register, $mem$$Address); 5608 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5609 %} 5610 5611 ins_pipe(ialu_reg_mem); 5612 %} 5613 5614 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5615 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5616 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5617 effect(KILL cr); 5618 5619 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5620 "XOR $dst.hi,$dst.hi" %} 5621 ins_encode %{ 5622 Register Rdst = $dst$$Register; 5623 __ movzbl(Rdst, $mem$$Address); 5624 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5625 %} 5626 ins_pipe(ialu_reg_mem); 5627 %} 5628 5629 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5630 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5631 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5632 effect(KILL cr); 5633 5634 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5635 "XOR $dst.hi,$dst.hi\n\t" 5636 "AND $dst.lo,right_n_bits($mask, 16)" %} 5637 ins_encode %{ 5638 Register Rdst = $dst$$Register; 5639 __ movzwl(Rdst, $mem$$Address); 5640 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5641 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5642 %} 5643 ins_pipe(ialu_reg_mem); 5644 %} 5645 5646 // Load Integer 5647 instruct loadI(rRegI dst, memory mem) %{ 5648 match(Set dst (LoadI mem)); 5649 5650 ins_cost(125); 5651 format %{ "MOV $dst,$mem\t# int" %} 5652 5653 ins_encode %{ 5654 __ movl($dst$$Register, $mem$$Address); 5655 %} 5656 5657 ins_pipe(ialu_reg_mem); 5658 %} 5659 5660 // Load Integer (32 bit signed) to Byte (8 bit signed) 5661 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5662 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5663 5664 ins_cost(125); 5665 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5666 ins_encode %{ 5667 __ movsbl($dst$$Register, $mem$$Address); 5668 %} 5669 ins_pipe(ialu_reg_mem); 5670 %} 5671 5672 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5673 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5674 match(Set dst (AndI (LoadI mem) mask)); 5675 5676 ins_cost(125); 5677 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5678 ins_encode %{ 5679 __ movzbl($dst$$Register, $mem$$Address); 5680 %} 5681 ins_pipe(ialu_reg_mem); 5682 %} 5683 5684 // Load Integer (32 bit signed) to Short (16 bit signed) 5685 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5686 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5687 5688 ins_cost(125); 5689 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5690 ins_encode %{ 5691 __ movswl($dst$$Register, $mem$$Address); 5692 %} 5693 ins_pipe(ialu_reg_mem); 5694 %} 5695 5696 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5697 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5698 match(Set dst (AndI (LoadI mem) mask)); 5699 5700 ins_cost(125); 5701 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5702 ins_encode %{ 5703 __ movzwl($dst$$Register, $mem$$Address); 5704 %} 5705 ins_pipe(ialu_reg_mem); 5706 %} 5707 5708 // Load Integer into Long Register 5709 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5710 match(Set dst (ConvI2L (LoadI mem))); 5711 effect(KILL cr); 5712 5713 ins_cost(375); 5714 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5715 "MOV $dst.hi,$dst.lo\n\t" 5716 "SAR $dst.hi,31" %} 5717 5718 ins_encode %{ 5719 __ movl($dst$$Register, $mem$$Address); 5720 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5721 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5722 %} 5723 5724 ins_pipe(ialu_reg_mem); 5725 %} 5726 5727 // Load Integer with mask 0xFF into Long Register 5728 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5729 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5730 effect(KILL cr); 5731 5732 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5733 "XOR $dst.hi,$dst.hi" %} 5734 ins_encode %{ 5735 Register Rdst = $dst$$Register; 5736 __ movzbl(Rdst, $mem$$Address); 5737 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5738 %} 5739 ins_pipe(ialu_reg_mem); 5740 %} 5741 5742 // Load Integer with mask 0xFFFF into Long Register 5743 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5744 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5745 effect(KILL cr); 5746 5747 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5748 "XOR $dst.hi,$dst.hi" %} 5749 ins_encode %{ 5750 Register Rdst = $dst$$Register; 5751 __ movzwl(Rdst, $mem$$Address); 5752 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5753 %} 5754 ins_pipe(ialu_reg_mem); 5755 %} 5756 5757 // Load Integer with 31-bit mask into Long Register 5758 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5759 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5760 effect(KILL cr); 5761 5762 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5763 "XOR $dst.hi,$dst.hi\n\t" 5764 "AND $dst.lo,$mask" %} 5765 ins_encode %{ 5766 Register Rdst = $dst$$Register; 5767 __ movl(Rdst, $mem$$Address); 5768 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5769 __ andl(Rdst, $mask$$constant); 5770 %} 5771 ins_pipe(ialu_reg_mem); 5772 %} 5773 5774 // Load Unsigned Integer into Long Register 5775 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5776 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5777 effect(KILL cr); 5778 5779 ins_cost(250); 5780 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5781 "XOR $dst.hi,$dst.hi" %} 5782 5783 ins_encode %{ 5784 __ movl($dst$$Register, $mem$$Address); 5785 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5786 %} 5787 5788 ins_pipe(ialu_reg_mem); 5789 %} 5790 5791 // Load Long. Cannot clobber address while loading, so restrict address 5792 // register to ESI 5793 instruct loadL(eRegL dst, load_long_memory mem) %{ 5794 predicate(!((LoadLNode*)n)->require_atomic_access()); 5795 match(Set dst (LoadL mem)); 5796 5797 ins_cost(250); 5798 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5799 "MOV $dst.hi,$mem+4" %} 5800 5801 ins_encode %{ 5802 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5803 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5804 __ movl($dst$$Register, Amemlo); 5805 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5806 %} 5807 5808 ins_pipe(ialu_reg_long_mem); 5809 %} 5810 5811 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5812 // then store it down to the stack and reload on the int 5813 // side. 5814 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5815 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5816 match(Set dst (LoadL mem)); 5817 5818 ins_cost(200); 5819 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5820 "FISTp $dst" %} 5821 ins_encode(enc_loadL_volatile(mem,dst)); 5822 ins_pipe( fpu_reg_mem ); 5823 %} 5824 5825 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5826 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5827 match(Set dst (LoadL mem)); 5828 effect(TEMP tmp); 5829 ins_cost(180); 5830 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5831 "MOVSD $dst,$tmp" %} 5832 ins_encode %{ 5833 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5834 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5835 %} 5836 ins_pipe( pipe_slow ); 5837 %} 5838 5839 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5840 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5841 match(Set dst (LoadL mem)); 5842 effect(TEMP tmp); 5843 ins_cost(160); 5844 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5845 "MOVD $dst.lo,$tmp\n\t" 5846 "PSRLQ $tmp,32\n\t" 5847 "MOVD $dst.hi,$tmp" %} 5848 ins_encode %{ 5849 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5850 __ movdl($dst$$Register, $tmp$$XMMRegister); 5851 __ psrlq($tmp$$XMMRegister, 32); 5852 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5853 %} 5854 ins_pipe( pipe_slow ); 5855 %} 5856 5857 // Load Range 5858 instruct loadRange(rRegI dst, memory mem) %{ 5859 match(Set dst (LoadRange mem)); 5860 5861 ins_cost(125); 5862 format %{ "MOV $dst,$mem" %} 5863 opcode(0x8B); 5864 ins_encode( OpcP, RegMem(dst,mem)); 5865 ins_pipe( ialu_reg_mem ); 5866 %} 5867 5868 5869 // Load Pointer 5870 instruct loadP(eRegP dst, memory mem) %{ 5871 match(Set dst (LoadP mem)); 5872 5873 ins_cost(125); 5874 format %{ "MOV $dst,$mem" %} 5875 opcode(0x8B); 5876 ins_encode( OpcP, RegMem(dst,mem)); 5877 ins_pipe( ialu_reg_mem ); 5878 %} 5879 5880 // Load Klass Pointer 5881 instruct loadKlass(eRegP dst, memory mem) %{ 5882 match(Set dst (LoadKlass mem)); 5883 5884 ins_cost(125); 5885 format %{ "MOV $dst,$mem" %} 5886 opcode(0x8B); 5887 ins_encode( OpcP, RegMem(dst,mem)); 5888 ins_pipe( ialu_reg_mem ); 5889 %} 5890 5891 // Load Double 5892 instruct loadDPR(regDPR dst, memory mem) %{ 5893 predicate(UseSSE<=1); 5894 match(Set dst (LoadD mem)); 5895 5896 ins_cost(150); 5897 format %{ "FLD_D ST,$mem\n\t" 5898 "FSTP $dst" %} 5899 opcode(0xDD); /* DD /0 */ 5900 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5901 Pop_Reg_DPR(dst) ); 5902 ins_pipe( fpu_reg_mem ); 5903 %} 5904 5905 // Load Double to XMM 5906 instruct loadD(regD dst, memory mem) %{ 5907 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5908 match(Set dst (LoadD mem)); 5909 ins_cost(145); 5910 format %{ "MOVSD $dst,$mem" %} 5911 ins_encode %{ 5912 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5913 %} 5914 ins_pipe( pipe_slow ); 5915 %} 5916 5917 instruct loadD_partial(regD dst, memory mem) %{ 5918 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5919 match(Set dst (LoadD mem)); 5920 ins_cost(145); 5921 format %{ "MOVLPD $dst,$mem" %} 5922 ins_encode %{ 5923 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5924 %} 5925 ins_pipe( pipe_slow ); 5926 %} 5927 5928 // Load to XMM register (single-precision floating point) 5929 // MOVSS instruction 5930 instruct loadF(regF dst, memory mem) %{ 5931 predicate(UseSSE>=1); 5932 match(Set dst (LoadF mem)); 5933 ins_cost(145); 5934 format %{ "MOVSS $dst,$mem" %} 5935 ins_encode %{ 5936 __ movflt ($dst$$XMMRegister, $mem$$Address); 5937 %} 5938 ins_pipe( pipe_slow ); 5939 %} 5940 5941 // Load Float 5942 instruct loadFPR(regFPR dst, memory mem) %{ 5943 predicate(UseSSE==0); 5944 match(Set dst (LoadF mem)); 5945 5946 ins_cost(150); 5947 format %{ "FLD_S ST,$mem\n\t" 5948 "FSTP $dst" %} 5949 opcode(0xD9); /* D9 /0 */ 5950 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5951 Pop_Reg_FPR(dst) ); 5952 ins_pipe( fpu_reg_mem ); 5953 %} 5954 5955 // Load Effective Address 5956 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5957 match(Set dst mem); 5958 5959 ins_cost(110); 5960 format %{ "LEA $dst,$mem" %} 5961 opcode(0x8D); 5962 ins_encode( OpcP, RegMem(dst,mem)); 5963 ins_pipe( ialu_reg_reg_fat ); 5964 %} 5965 5966 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5967 match(Set dst mem); 5968 5969 ins_cost(110); 5970 format %{ "LEA $dst,$mem" %} 5971 opcode(0x8D); 5972 ins_encode( OpcP, RegMem(dst,mem)); 5973 ins_pipe( ialu_reg_reg_fat ); 5974 %} 5975 5976 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5977 match(Set dst mem); 5978 5979 ins_cost(110); 5980 format %{ "LEA $dst,$mem" %} 5981 opcode(0x8D); 5982 ins_encode( OpcP, RegMem(dst,mem)); 5983 ins_pipe( ialu_reg_reg_fat ); 5984 %} 5985 5986 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5987 match(Set dst mem); 5988 5989 ins_cost(110); 5990 format %{ "LEA $dst,$mem" %} 5991 opcode(0x8D); 5992 ins_encode( OpcP, RegMem(dst,mem)); 5993 ins_pipe( ialu_reg_reg_fat ); 5994 %} 5995 5996 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5997 match(Set dst mem); 5998 5999 ins_cost(110); 6000 format %{ "LEA $dst,$mem" %} 6001 opcode(0x8D); 6002 ins_encode( OpcP, RegMem(dst,mem)); 6003 ins_pipe( ialu_reg_reg_fat ); 6004 %} 6005 6006 // Load Constant 6007 instruct loadConI(rRegI dst, immI src) %{ 6008 match(Set dst src); 6009 6010 format %{ "MOV $dst,$src" %} 6011 ins_encode( LdImmI(dst, src) ); 6012 ins_pipe( ialu_reg_fat ); 6013 %} 6014 6015 // Load Constant zero 6016 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 6017 match(Set dst src); 6018 effect(KILL cr); 6019 6020 ins_cost(50); 6021 format %{ "XOR $dst,$dst" %} 6022 opcode(0x33); /* + rd */ 6023 ins_encode( OpcP, RegReg( dst, dst ) ); 6024 ins_pipe( ialu_reg ); 6025 %} 6026 6027 instruct loadConP(eRegP dst, immP src) %{ 6028 match(Set dst src); 6029 6030 format %{ "MOV $dst,$src" %} 6031 opcode(0xB8); /* + rd */ 6032 ins_encode( LdImmP(dst, src) ); 6033 ins_pipe( ialu_reg_fat ); 6034 %} 6035 6036 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6037 match(Set dst src); 6038 effect(KILL cr); 6039 ins_cost(200); 6040 format %{ "MOV $dst.lo,$src.lo\n\t" 6041 "MOV $dst.hi,$src.hi" %} 6042 opcode(0xB8); 6043 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6044 ins_pipe( ialu_reg_long_fat ); 6045 %} 6046 6047 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6048 match(Set dst src); 6049 effect(KILL cr); 6050 ins_cost(150); 6051 format %{ "XOR $dst.lo,$dst.lo\n\t" 6052 "XOR $dst.hi,$dst.hi" %} 6053 opcode(0x33,0x33); 6054 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6055 ins_pipe( ialu_reg_long ); 6056 %} 6057 6058 // The instruction usage is guarded by predicate in operand immFPR(). 6059 instruct loadConFPR(regFPR dst, immFPR con) %{ 6060 match(Set dst con); 6061 ins_cost(125); 6062 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6063 "FSTP $dst" %} 6064 ins_encode %{ 6065 __ fld_s($constantaddress($con)); 6066 __ fstp_d($dst$$reg); 6067 %} 6068 ins_pipe(fpu_reg_con); 6069 %} 6070 6071 // The instruction usage is guarded by predicate in operand immFPR0(). 6072 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6073 match(Set dst con); 6074 ins_cost(125); 6075 format %{ "FLDZ ST\n\t" 6076 "FSTP $dst" %} 6077 ins_encode %{ 6078 __ fldz(); 6079 __ fstp_d($dst$$reg); 6080 %} 6081 ins_pipe(fpu_reg_con); 6082 %} 6083 6084 // The instruction usage is guarded by predicate in operand immFPR1(). 6085 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6086 match(Set dst con); 6087 ins_cost(125); 6088 format %{ "FLD1 ST\n\t" 6089 "FSTP $dst" %} 6090 ins_encode %{ 6091 __ fld1(); 6092 __ fstp_d($dst$$reg); 6093 %} 6094 ins_pipe(fpu_reg_con); 6095 %} 6096 6097 // The instruction usage is guarded by predicate in operand immF(). 6098 instruct loadConF(regF dst, immF con) %{ 6099 match(Set dst con); 6100 ins_cost(125); 6101 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6102 ins_encode %{ 6103 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6104 %} 6105 ins_pipe(pipe_slow); 6106 %} 6107 6108 // The instruction usage is guarded by predicate in operand immF0(). 6109 instruct loadConF0(regF dst, immF0 src) %{ 6110 match(Set dst src); 6111 ins_cost(100); 6112 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6113 ins_encode %{ 6114 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6115 %} 6116 ins_pipe(pipe_slow); 6117 %} 6118 6119 // The instruction usage is guarded by predicate in operand immDPR(). 6120 instruct loadConDPR(regDPR dst, immDPR con) %{ 6121 match(Set dst con); 6122 ins_cost(125); 6123 6124 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6125 "FSTP $dst" %} 6126 ins_encode %{ 6127 __ fld_d($constantaddress($con)); 6128 __ fstp_d($dst$$reg); 6129 %} 6130 ins_pipe(fpu_reg_con); 6131 %} 6132 6133 // The instruction usage is guarded by predicate in operand immDPR0(). 6134 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6135 match(Set dst con); 6136 ins_cost(125); 6137 6138 format %{ "FLDZ ST\n\t" 6139 "FSTP $dst" %} 6140 ins_encode %{ 6141 __ fldz(); 6142 __ fstp_d($dst$$reg); 6143 %} 6144 ins_pipe(fpu_reg_con); 6145 %} 6146 6147 // The instruction usage is guarded by predicate in operand immDPR1(). 6148 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6149 match(Set dst con); 6150 ins_cost(125); 6151 6152 format %{ "FLD1 ST\n\t" 6153 "FSTP $dst" %} 6154 ins_encode %{ 6155 __ fld1(); 6156 __ fstp_d($dst$$reg); 6157 %} 6158 ins_pipe(fpu_reg_con); 6159 %} 6160 6161 // The instruction usage is guarded by predicate in operand immD(). 6162 instruct loadConD(regD dst, immD con) %{ 6163 match(Set dst con); 6164 ins_cost(125); 6165 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6166 ins_encode %{ 6167 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6168 %} 6169 ins_pipe(pipe_slow); 6170 %} 6171 6172 // The instruction usage is guarded by predicate in operand immD0(). 6173 instruct loadConD0(regD dst, immD0 src) %{ 6174 match(Set dst src); 6175 ins_cost(100); 6176 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6177 ins_encode %{ 6178 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6179 %} 6180 ins_pipe( pipe_slow ); 6181 %} 6182 6183 // Load Stack Slot 6184 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6185 match(Set dst src); 6186 ins_cost(125); 6187 6188 format %{ "MOV $dst,$src" %} 6189 opcode(0x8B); 6190 ins_encode( OpcP, RegMem(dst,src)); 6191 ins_pipe( ialu_reg_mem ); 6192 %} 6193 6194 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6195 match(Set dst src); 6196 6197 ins_cost(200); 6198 format %{ "MOV $dst,$src.lo\n\t" 6199 "MOV $dst+4,$src.hi" %} 6200 opcode(0x8B, 0x8B); 6201 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6202 ins_pipe( ialu_mem_long_reg ); 6203 %} 6204 6205 // Load Stack Slot 6206 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6207 match(Set dst src); 6208 ins_cost(125); 6209 6210 format %{ "MOV $dst,$src" %} 6211 opcode(0x8B); 6212 ins_encode( OpcP, RegMem(dst,src)); 6213 ins_pipe( ialu_reg_mem ); 6214 %} 6215 6216 // Load Stack Slot 6217 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6218 match(Set dst src); 6219 ins_cost(125); 6220 6221 format %{ "FLD_S $src\n\t" 6222 "FSTP $dst" %} 6223 opcode(0xD9); /* D9 /0, FLD m32real */ 6224 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6225 Pop_Reg_FPR(dst) ); 6226 ins_pipe( fpu_reg_mem ); 6227 %} 6228 6229 // Load Stack Slot 6230 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6231 match(Set dst src); 6232 ins_cost(125); 6233 6234 format %{ "FLD_D $src\n\t" 6235 "FSTP $dst" %} 6236 opcode(0xDD); /* DD /0, FLD m64real */ 6237 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6238 Pop_Reg_DPR(dst) ); 6239 ins_pipe( fpu_reg_mem ); 6240 %} 6241 6242 // Prefetch instructions for allocation. 6243 // Must be safe to execute with invalid address (cannot fault). 6244 6245 instruct prefetchAlloc0( memory mem ) %{ 6246 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6247 match(PrefetchAllocation mem); 6248 ins_cost(0); 6249 size(0); 6250 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6251 ins_encode(); 6252 ins_pipe(empty); 6253 %} 6254 6255 instruct prefetchAlloc( memory mem ) %{ 6256 predicate(AllocatePrefetchInstr==3); 6257 match( PrefetchAllocation mem ); 6258 ins_cost(100); 6259 6260 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6261 ins_encode %{ 6262 __ prefetchw($mem$$Address); 6263 %} 6264 ins_pipe(ialu_mem); 6265 %} 6266 6267 instruct prefetchAllocNTA( memory mem ) %{ 6268 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6269 match(PrefetchAllocation mem); 6270 ins_cost(100); 6271 6272 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6273 ins_encode %{ 6274 __ prefetchnta($mem$$Address); 6275 %} 6276 ins_pipe(ialu_mem); 6277 %} 6278 6279 instruct prefetchAllocT0( memory mem ) %{ 6280 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6281 match(PrefetchAllocation mem); 6282 ins_cost(100); 6283 6284 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6285 ins_encode %{ 6286 __ prefetcht0($mem$$Address); 6287 %} 6288 ins_pipe(ialu_mem); 6289 %} 6290 6291 instruct prefetchAllocT2( memory mem ) %{ 6292 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6293 match(PrefetchAllocation mem); 6294 ins_cost(100); 6295 6296 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6297 ins_encode %{ 6298 __ prefetcht2($mem$$Address); 6299 %} 6300 ins_pipe(ialu_mem); 6301 %} 6302 6303 //----------Store Instructions------------------------------------------------- 6304 6305 // Store Byte 6306 instruct storeB(memory mem, xRegI src) %{ 6307 match(Set mem (StoreB mem src)); 6308 6309 ins_cost(125); 6310 format %{ "MOV8 $mem,$src" %} 6311 opcode(0x88); 6312 ins_encode( OpcP, RegMem( src, mem ) ); 6313 ins_pipe( ialu_mem_reg ); 6314 %} 6315 6316 // Store Char/Short 6317 instruct storeC(memory mem, rRegI src) %{ 6318 match(Set mem (StoreC mem src)); 6319 6320 ins_cost(125); 6321 format %{ "MOV16 $mem,$src" %} 6322 opcode(0x89, 0x66); 6323 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6324 ins_pipe( ialu_mem_reg ); 6325 %} 6326 6327 // Store Integer 6328 instruct storeI(memory mem, rRegI src) %{ 6329 match(Set mem (StoreI mem src)); 6330 6331 ins_cost(125); 6332 format %{ "MOV $mem,$src" %} 6333 opcode(0x89); 6334 ins_encode( OpcP, RegMem( src, mem ) ); 6335 ins_pipe( ialu_mem_reg ); 6336 %} 6337 6338 // Store Long 6339 instruct storeL(long_memory mem, eRegL src) %{ 6340 predicate(!((StoreLNode*)n)->require_atomic_access()); 6341 match(Set mem (StoreL mem src)); 6342 6343 ins_cost(200); 6344 format %{ "MOV $mem,$src.lo\n\t" 6345 "MOV $mem+4,$src.hi" %} 6346 opcode(0x89, 0x89); 6347 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6348 ins_pipe( ialu_mem_long_reg ); 6349 %} 6350 6351 // Store Long to Integer 6352 instruct storeL2I(memory mem, eRegL src) %{ 6353 match(Set mem (StoreI mem (ConvL2I src))); 6354 6355 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6356 ins_encode %{ 6357 __ movl($mem$$Address, $src$$Register); 6358 %} 6359 ins_pipe(ialu_mem_reg); 6360 %} 6361 6362 // Volatile Store Long. Must be atomic, so move it into 6363 // the FP TOS and then do a 64-bit FIST. Has to probe the 6364 // target address before the store (for null-ptr checks) 6365 // so the memory operand is used twice in the encoding. 6366 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6367 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6368 match(Set mem (StoreL mem src)); 6369 effect( KILL cr ); 6370 ins_cost(400); 6371 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6372 "FILD $src\n\t" 6373 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6374 opcode(0x3B); 6375 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6376 ins_pipe( fpu_reg_mem ); 6377 %} 6378 6379 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6380 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6381 match(Set mem (StoreL mem src)); 6382 effect( TEMP tmp, KILL cr ); 6383 ins_cost(380); 6384 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6385 "MOVSD $tmp,$src\n\t" 6386 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6387 ins_encode %{ 6388 __ cmpl(rax, $mem$$Address); 6389 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6390 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6391 %} 6392 ins_pipe( pipe_slow ); 6393 %} 6394 6395 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6396 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6397 match(Set mem (StoreL mem src)); 6398 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6399 ins_cost(360); 6400 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6401 "MOVD $tmp,$src.lo\n\t" 6402 "MOVD $tmp2,$src.hi\n\t" 6403 "PUNPCKLDQ $tmp,$tmp2\n\t" 6404 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6405 ins_encode %{ 6406 __ cmpl(rax, $mem$$Address); 6407 __ movdl($tmp$$XMMRegister, $src$$Register); 6408 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6409 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6410 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6411 %} 6412 ins_pipe( pipe_slow ); 6413 %} 6414 6415 // Store Pointer; for storing unknown oops and raw pointers 6416 instruct storeP(memory mem, anyRegP src) %{ 6417 match(Set mem (StoreP mem src)); 6418 6419 ins_cost(125); 6420 format %{ "MOV $mem,$src" %} 6421 opcode(0x89); 6422 ins_encode( OpcP, RegMem( src, mem ) ); 6423 ins_pipe( ialu_mem_reg ); 6424 %} 6425 6426 // Store Integer Immediate 6427 instruct storeImmI(memory mem, immI src) %{ 6428 match(Set mem (StoreI mem src)); 6429 6430 ins_cost(150); 6431 format %{ "MOV $mem,$src" %} 6432 opcode(0xC7); /* C7 /0 */ 6433 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6434 ins_pipe( ialu_mem_imm ); 6435 %} 6436 6437 // Store Short/Char Immediate 6438 instruct storeImmI16(memory mem, immI16 src) %{ 6439 predicate(UseStoreImmI16); 6440 match(Set mem (StoreC mem src)); 6441 6442 ins_cost(150); 6443 format %{ "MOV16 $mem,$src" %} 6444 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6445 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6446 ins_pipe( ialu_mem_imm ); 6447 %} 6448 6449 // Store Pointer Immediate; null pointers or constant oops that do not 6450 // need card-mark barriers. 6451 instruct storeImmP(memory mem, immP src) %{ 6452 match(Set mem (StoreP mem src)); 6453 6454 ins_cost(150); 6455 format %{ "MOV $mem,$src" %} 6456 opcode(0xC7); /* C7 /0 */ 6457 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6458 ins_pipe( ialu_mem_imm ); 6459 %} 6460 6461 // Store Byte Immediate 6462 instruct storeImmB(memory mem, immI8 src) %{ 6463 match(Set mem (StoreB mem src)); 6464 6465 ins_cost(150); 6466 format %{ "MOV8 $mem,$src" %} 6467 opcode(0xC6); /* C6 /0 */ 6468 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6469 ins_pipe( ialu_mem_imm ); 6470 %} 6471 6472 // Store CMS card-mark Immediate 6473 instruct storeImmCM(memory mem, immI8 src) %{ 6474 match(Set mem (StoreCM mem src)); 6475 6476 ins_cost(150); 6477 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6478 opcode(0xC6); /* C6 /0 */ 6479 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6480 ins_pipe( ialu_mem_imm ); 6481 %} 6482 6483 // Store Double 6484 instruct storeDPR( memory mem, regDPR1 src) %{ 6485 predicate(UseSSE<=1); 6486 match(Set mem (StoreD mem src)); 6487 6488 ins_cost(100); 6489 format %{ "FST_D $mem,$src" %} 6490 opcode(0xDD); /* DD /2 */ 6491 ins_encode( enc_FPR_store(mem,src) ); 6492 ins_pipe( fpu_mem_reg ); 6493 %} 6494 6495 // Store double does rounding on x86 6496 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6497 predicate(UseSSE<=1); 6498 match(Set mem (StoreD mem (RoundDouble src))); 6499 6500 ins_cost(100); 6501 format %{ "FST_D $mem,$src\t# round" %} 6502 opcode(0xDD); /* DD /2 */ 6503 ins_encode( enc_FPR_store(mem,src) ); 6504 ins_pipe( fpu_mem_reg ); 6505 %} 6506 6507 // Store XMM register to memory (double-precision floating points) 6508 // MOVSD instruction 6509 instruct storeD(memory mem, regD src) %{ 6510 predicate(UseSSE>=2); 6511 match(Set mem (StoreD mem src)); 6512 ins_cost(95); 6513 format %{ "MOVSD $mem,$src" %} 6514 ins_encode %{ 6515 __ movdbl($mem$$Address, $src$$XMMRegister); 6516 %} 6517 ins_pipe( pipe_slow ); 6518 %} 6519 6520 // Store XMM register to memory (single-precision floating point) 6521 // MOVSS instruction 6522 instruct storeF(memory mem, regF src) %{ 6523 predicate(UseSSE>=1); 6524 match(Set mem (StoreF mem src)); 6525 ins_cost(95); 6526 format %{ "MOVSS $mem,$src" %} 6527 ins_encode %{ 6528 __ movflt($mem$$Address, $src$$XMMRegister); 6529 %} 6530 ins_pipe( pipe_slow ); 6531 %} 6532 6533 6534 // Store Float 6535 instruct storeFPR( memory mem, regFPR1 src) %{ 6536 predicate(UseSSE==0); 6537 match(Set mem (StoreF mem src)); 6538 6539 ins_cost(100); 6540 format %{ "FST_S $mem,$src" %} 6541 opcode(0xD9); /* D9 /2 */ 6542 ins_encode( enc_FPR_store(mem,src) ); 6543 ins_pipe( fpu_mem_reg ); 6544 %} 6545 6546 // Store Float does rounding on x86 6547 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6548 predicate(UseSSE==0); 6549 match(Set mem (StoreF mem (RoundFloat src))); 6550 6551 ins_cost(100); 6552 format %{ "FST_S $mem,$src\t# round" %} 6553 opcode(0xD9); /* D9 /2 */ 6554 ins_encode( enc_FPR_store(mem,src) ); 6555 ins_pipe( fpu_mem_reg ); 6556 %} 6557 6558 // Store Float does rounding on x86 6559 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6560 predicate(UseSSE<=1); 6561 match(Set mem (StoreF mem (ConvD2F src))); 6562 6563 ins_cost(100); 6564 format %{ "FST_S $mem,$src\t# D-round" %} 6565 opcode(0xD9); /* D9 /2 */ 6566 ins_encode( enc_FPR_store(mem,src) ); 6567 ins_pipe( fpu_mem_reg ); 6568 %} 6569 6570 // Store immediate Float value (it is faster than store from FPU register) 6571 // The instruction usage is guarded by predicate in operand immFPR(). 6572 instruct storeFPR_imm( memory mem, immFPR src) %{ 6573 match(Set mem (StoreF mem src)); 6574 6575 ins_cost(50); 6576 format %{ "MOV $mem,$src\t# store float" %} 6577 opcode(0xC7); /* C7 /0 */ 6578 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6579 ins_pipe( ialu_mem_imm ); 6580 %} 6581 6582 // Store immediate Float value (it is faster than store from XMM register) 6583 // The instruction usage is guarded by predicate in operand immF(). 6584 instruct storeF_imm( memory mem, immF src) %{ 6585 match(Set mem (StoreF mem src)); 6586 6587 ins_cost(50); 6588 format %{ "MOV $mem,$src\t# store float" %} 6589 opcode(0xC7); /* C7 /0 */ 6590 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6591 ins_pipe( ialu_mem_imm ); 6592 %} 6593 6594 // Store Integer to stack slot 6595 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6596 match(Set dst src); 6597 6598 ins_cost(100); 6599 format %{ "MOV $dst,$src" %} 6600 opcode(0x89); 6601 ins_encode( OpcPRegSS( dst, src ) ); 6602 ins_pipe( ialu_mem_reg ); 6603 %} 6604 6605 // Store Integer to stack slot 6606 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6607 match(Set dst src); 6608 6609 ins_cost(100); 6610 format %{ "MOV $dst,$src" %} 6611 opcode(0x89); 6612 ins_encode( OpcPRegSS( dst, src ) ); 6613 ins_pipe( ialu_mem_reg ); 6614 %} 6615 6616 // Store Long to stack slot 6617 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6618 match(Set dst src); 6619 6620 ins_cost(200); 6621 format %{ "MOV $dst,$src.lo\n\t" 6622 "MOV $dst+4,$src.hi" %} 6623 opcode(0x89, 0x89); 6624 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6625 ins_pipe( ialu_mem_long_reg ); 6626 %} 6627 6628 //----------MemBar Instructions----------------------------------------------- 6629 // Memory barrier flavors 6630 6631 instruct membar_acquire() %{ 6632 match(MemBarAcquire); 6633 match(LoadFence); 6634 ins_cost(400); 6635 6636 size(0); 6637 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6638 ins_encode(); 6639 ins_pipe(empty); 6640 %} 6641 6642 instruct membar_acquire_lock() %{ 6643 match(MemBarAcquireLock); 6644 ins_cost(0); 6645 6646 size(0); 6647 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6648 ins_encode( ); 6649 ins_pipe(empty); 6650 %} 6651 6652 instruct membar_release() %{ 6653 match(MemBarRelease); 6654 match(StoreFence); 6655 ins_cost(400); 6656 6657 size(0); 6658 format %{ "MEMBAR-release ! (empty encoding)" %} 6659 ins_encode( ); 6660 ins_pipe(empty); 6661 %} 6662 6663 instruct membar_release_lock() %{ 6664 match(MemBarReleaseLock); 6665 ins_cost(0); 6666 6667 size(0); 6668 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6669 ins_encode( ); 6670 ins_pipe(empty); 6671 %} 6672 6673 instruct membar_volatile(eFlagsReg cr) %{ 6674 match(MemBarVolatile); 6675 effect(KILL cr); 6676 ins_cost(400); 6677 6678 format %{ 6679 $$template 6680 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6681 %} 6682 ins_encode %{ 6683 __ membar(Assembler::StoreLoad); 6684 %} 6685 ins_pipe(pipe_slow); 6686 %} 6687 6688 instruct unnecessary_membar_volatile() %{ 6689 match(MemBarVolatile); 6690 predicate(Matcher::post_store_load_barrier(n)); 6691 ins_cost(0); 6692 6693 size(0); 6694 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6695 ins_encode( ); 6696 ins_pipe(empty); 6697 %} 6698 6699 instruct membar_storestore() %{ 6700 match(MemBarStoreStore); 6701 match(StoreStoreFence); 6702 ins_cost(0); 6703 6704 size(0); 6705 format %{ "MEMBAR-storestore (empty encoding)" %} 6706 ins_encode( ); 6707 ins_pipe(empty); 6708 %} 6709 6710 //----------Move Instructions-------------------------------------------------- 6711 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6712 match(Set dst (CastX2P src)); 6713 format %{ "# X2P $dst, $src" %} 6714 ins_encode( /*empty encoding*/ ); 6715 ins_cost(0); 6716 ins_pipe(empty); 6717 %} 6718 6719 instruct castP2X(rRegI dst, eRegP src ) %{ 6720 match(Set dst (CastP2X src)); 6721 ins_cost(50); 6722 format %{ "MOV $dst, $src\t# CastP2X" %} 6723 ins_encode( enc_Copy( dst, src) ); 6724 ins_pipe( ialu_reg_reg ); 6725 %} 6726 6727 //----------Conditional Move--------------------------------------------------- 6728 // Conditional move 6729 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6730 predicate(!VM_Version::supports_cmov() ); 6731 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6732 ins_cost(200); 6733 format %{ "J$cop,us skip\t# signed cmove\n\t" 6734 "MOV $dst,$src\n" 6735 "skip:" %} 6736 ins_encode %{ 6737 Label Lskip; 6738 // Invert sense of branch from sense of CMOV 6739 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6740 __ movl($dst$$Register, $src$$Register); 6741 __ bind(Lskip); 6742 %} 6743 ins_pipe( pipe_cmov_reg ); 6744 %} 6745 6746 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6747 predicate(!VM_Version::supports_cmov() ); 6748 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6749 ins_cost(200); 6750 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6751 "MOV $dst,$src\n" 6752 "skip:" %} 6753 ins_encode %{ 6754 Label Lskip; 6755 // Invert sense of branch from sense of CMOV 6756 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6757 __ movl($dst$$Register, $src$$Register); 6758 __ bind(Lskip); 6759 %} 6760 ins_pipe( pipe_cmov_reg ); 6761 %} 6762 6763 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6764 predicate(VM_Version::supports_cmov() ); 6765 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6766 ins_cost(200); 6767 format %{ "CMOV$cop $dst,$src" %} 6768 opcode(0x0F,0x40); 6769 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6770 ins_pipe( pipe_cmov_reg ); 6771 %} 6772 6773 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6774 predicate(VM_Version::supports_cmov() ); 6775 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6776 ins_cost(200); 6777 format %{ "CMOV$cop $dst,$src" %} 6778 opcode(0x0F,0x40); 6779 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6780 ins_pipe( pipe_cmov_reg ); 6781 %} 6782 6783 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6784 predicate(VM_Version::supports_cmov() ); 6785 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6786 ins_cost(200); 6787 expand %{ 6788 cmovI_regU(cop, cr, dst, src); 6789 %} 6790 %} 6791 6792 // Conditional move 6793 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6794 predicate(VM_Version::supports_cmov() ); 6795 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6796 ins_cost(250); 6797 format %{ "CMOV$cop $dst,$src" %} 6798 opcode(0x0F,0x40); 6799 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6800 ins_pipe( pipe_cmov_mem ); 6801 %} 6802 6803 // Conditional move 6804 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6805 predicate(VM_Version::supports_cmov() ); 6806 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6807 ins_cost(250); 6808 format %{ "CMOV$cop $dst,$src" %} 6809 opcode(0x0F,0x40); 6810 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6811 ins_pipe( pipe_cmov_mem ); 6812 %} 6813 6814 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6815 predicate(VM_Version::supports_cmov() ); 6816 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6817 ins_cost(250); 6818 expand %{ 6819 cmovI_memU(cop, cr, dst, src); 6820 %} 6821 %} 6822 6823 // Conditional move 6824 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6825 predicate(VM_Version::supports_cmov() ); 6826 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6827 ins_cost(200); 6828 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6829 opcode(0x0F,0x40); 6830 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6831 ins_pipe( pipe_cmov_reg ); 6832 %} 6833 6834 // Conditional move (non-P6 version) 6835 // Note: a CMoveP is generated for stubs and native wrappers 6836 // regardless of whether we are on a P6, so we 6837 // emulate a cmov here 6838 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6839 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6840 ins_cost(300); 6841 format %{ "Jn$cop skip\n\t" 6842 "MOV $dst,$src\t# pointer\n" 6843 "skip:" %} 6844 opcode(0x8b); 6845 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6846 ins_pipe( pipe_cmov_reg ); 6847 %} 6848 6849 // Conditional move 6850 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6851 predicate(VM_Version::supports_cmov() ); 6852 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6853 ins_cost(200); 6854 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6855 opcode(0x0F,0x40); 6856 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6857 ins_pipe( pipe_cmov_reg ); 6858 %} 6859 6860 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6861 predicate(VM_Version::supports_cmov() ); 6862 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6863 ins_cost(200); 6864 expand %{ 6865 cmovP_regU(cop, cr, dst, src); 6866 %} 6867 %} 6868 6869 // DISABLED: Requires the ADLC to emit a bottom_type call that 6870 // correctly meets the two pointer arguments; one is an incoming 6871 // register but the other is a memory operand. ALSO appears to 6872 // be buggy with implicit null checks. 6873 // 6874 //// Conditional move 6875 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6876 // predicate(VM_Version::supports_cmov() ); 6877 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6878 // ins_cost(250); 6879 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6880 // opcode(0x0F,0x40); 6881 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6882 // ins_pipe( pipe_cmov_mem ); 6883 //%} 6884 // 6885 //// Conditional move 6886 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6887 // predicate(VM_Version::supports_cmov() ); 6888 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6889 // ins_cost(250); 6890 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6891 // opcode(0x0F,0x40); 6892 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6893 // ins_pipe( pipe_cmov_mem ); 6894 //%} 6895 6896 // Conditional move 6897 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6898 predicate(UseSSE<=1); 6899 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6900 ins_cost(200); 6901 format %{ "FCMOV$cop $dst,$src\t# double" %} 6902 opcode(0xDA); 6903 ins_encode( enc_cmov_dpr(cop,src) ); 6904 ins_pipe( pipe_cmovDPR_reg ); 6905 %} 6906 6907 // Conditional move 6908 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6909 predicate(UseSSE==0); 6910 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6911 ins_cost(200); 6912 format %{ "FCMOV$cop $dst,$src\t# float" %} 6913 opcode(0xDA); 6914 ins_encode( enc_cmov_dpr(cop,src) ); 6915 ins_pipe( pipe_cmovDPR_reg ); 6916 %} 6917 6918 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6919 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6920 predicate(UseSSE<=1); 6921 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6922 ins_cost(200); 6923 format %{ "Jn$cop skip\n\t" 6924 "MOV $dst,$src\t# double\n" 6925 "skip:" %} 6926 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6927 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6928 ins_pipe( pipe_cmovDPR_reg ); 6929 %} 6930 6931 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6932 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6933 predicate(UseSSE==0); 6934 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6935 ins_cost(200); 6936 format %{ "Jn$cop skip\n\t" 6937 "MOV $dst,$src\t# float\n" 6938 "skip:" %} 6939 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6940 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6941 ins_pipe( pipe_cmovDPR_reg ); 6942 %} 6943 6944 // No CMOVE with SSE/SSE2 6945 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6946 predicate (UseSSE>=1); 6947 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6948 ins_cost(200); 6949 format %{ "Jn$cop skip\n\t" 6950 "MOVSS $dst,$src\t# float\n" 6951 "skip:" %} 6952 ins_encode %{ 6953 Label skip; 6954 // Invert sense of branch from sense of CMOV 6955 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6956 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6957 __ bind(skip); 6958 %} 6959 ins_pipe( pipe_slow ); 6960 %} 6961 6962 // No CMOVE with SSE/SSE2 6963 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6964 predicate (UseSSE>=2); 6965 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6966 ins_cost(200); 6967 format %{ "Jn$cop skip\n\t" 6968 "MOVSD $dst,$src\t# float\n" 6969 "skip:" %} 6970 ins_encode %{ 6971 Label skip; 6972 // Invert sense of branch from sense of CMOV 6973 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6974 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6975 __ bind(skip); 6976 %} 6977 ins_pipe( pipe_slow ); 6978 %} 6979 6980 // unsigned version 6981 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6982 predicate (UseSSE>=1); 6983 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6984 ins_cost(200); 6985 format %{ "Jn$cop skip\n\t" 6986 "MOVSS $dst,$src\t# float\n" 6987 "skip:" %} 6988 ins_encode %{ 6989 Label skip; 6990 // Invert sense of branch from sense of CMOV 6991 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6992 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6993 __ bind(skip); 6994 %} 6995 ins_pipe( pipe_slow ); 6996 %} 6997 6998 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6999 predicate (UseSSE>=1); 7000 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7001 ins_cost(200); 7002 expand %{ 7003 fcmovF_regU(cop, cr, dst, src); 7004 %} 7005 %} 7006 7007 // unsigned version 7008 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7009 predicate (UseSSE>=2); 7010 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7011 ins_cost(200); 7012 format %{ "Jn$cop skip\n\t" 7013 "MOVSD $dst,$src\t# float\n" 7014 "skip:" %} 7015 ins_encode %{ 7016 Label skip; 7017 // Invert sense of branch from sense of CMOV 7018 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7019 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7020 __ bind(skip); 7021 %} 7022 ins_pipe( pipe_slow ); 7023 %} 7024 7025 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7026 predicate (UseSSE>=2); 7027 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7028 ins_cost(200); 7029 expand %{ 7030 fcmovD_regU(cop, cr, dst, src); 7031 %} 7032 %} 7033 7034 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7035 predicate(VM_Version::supports_cmov() ); 7036 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7037 ins_cost(200); 7038 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7039 "CMOV$cop $dst.hi,$src.hi" %} 7040 opcode(0x0F,0x40); 7041 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7042 ins_pipe( pipe_cmov_reg_long ); 7043 %} 7044 7045 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7046 predicate(VM_Version::supports_cmov() ); 7047 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7048 ins_cost(200); 7049 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7050 "CMOV$cop $dst.hi,$src.hi" %} 7051 opcode(0x0F,0x40); 7052 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7053 ins_pipe( pipe_cmov_reg_long ); 7054 %} 7055 7056 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7057 predicate(VM_Version::supports_cmov() ); 7058 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7059 ins_cost(200); 7060 expand %{ 7061 cmovL_regU(cop, cr, dst, src); 7062 %} 7063 %} 7064 7065 //----------Arithmetic Instructions-------------------------------------------- 7066 //----------Addition Instructions---------------------------------------------- 7067 7068 // Integer Addition Instructions 7069 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7070 match(Set dst (AddI dst src)); 7071 effect(KILL cr); 7072 7073 size(2); 7074 format %{ "ADD $dst,$src" %} 7075 opcode(0x03); 7076 ins_encode( OpcP, RegReg( dst, src) ); 7077 ins_pipe( ialu_reg_reg ); 7078 %} 7079 7080 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7081 match(Set dst (AddI dst src)); 7082 effect(KILL cr); 7083 7084 format %{ "ADD $dst,$src" %} 7085 opcode(0x81, 0x00); /* /0 id */ 7086 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7087 ins_pipe( ialu_reg ); 7088 %} 7089 7090 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7091 predicate(UseIncDec); 7092 match(Set dst (AddI dst src)); 7093 effect(KILL cr); 7094 7095 size(1); 7096 format %{ "INC $dst" %} 7097 opcode(0x40); /* */ 7098 ins_encode( Opc_plus( primary, dst ) ); 7099 ins_pipe( ialu_reg ); 7100 %} 7101 7102 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7103 match(Set dst (AddI src0 src1)); 7104 ins_cost(110); 7105 7106 format %{ "LEA $dst,[$src0 + $src1]" %} 7107 opcode(0x8D); /* 0x8D /r */ 7108 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7109 ins_pipe( ialu_reg_reg ); 7110 %} 7111 7112 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7113 match(Set dst (AddP src0 src1)); 7114 ins_cost(110); 7115 7116 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7117 opcode(0x8D); /* 0x8D /r */ 7118 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7119 ins_pipe( ialu_reg_reg ); 7120 %} 7121 7122 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7123 predicate(UseIncDec); 7124 match(Set dst (AddI dst src)); 7125 effect(KILL cr); 7126 7127 size(1); 7128 format %{ "DEC $dst" %} 7129 opcode(0x48); /* */ 7130 ins_encode( Opc_plus( primary, dst ) ); 7131 ins_pipe( ialu_reg ); 7132 %} 7133 7134 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7135 match(Set dst (AddP dst src)); 7136 effect(KILL cr); 7137 7138 size(2); 7139 format %{ "ADD $dst,$src" %} 7140 opcode(0x03); 7141 ins_encode( OpcP, RegReg( dst, src) ); 7142 ins_pipe( ialu_reg_reg ); 7143 %} 7144 7145 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7146 match(Set dst (AddP dst src)); 7147 effect(KILL cr); 7148 7149 format %{ "ADD $dst,$src" %} 7150 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7151 // ins_encode( RegImm( dst, src) ); 7152 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7153 ins_pipe( ialu_reg ); 7154 %} 7155 7156 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7157 match(Set dst (AddI dst (LoadI src))); 7158 effect(KILL cr); 7159 7160 ins_cost(150); 7161 format %{ "ADD $dst,$src" %} 7162 opcode(0x03); 7163 ins_encode( OpcP, RegMem( dst, src) ); 7164 ins_pipe( ialu_reg_mem ); 7165 %} 7166 7167 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7168 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7169 effect(KILL cr); 7170 7171 ins_cost(150); 7172 format %{ "ADD $dst,$src" %} 7173 opcode(0x01); /* Opcode 01 /r */ 7174 ins_encode( OpcP, RegMem( src, dst ) ); 7175 ins_pipe( ialu_mem_reg ); 7176 %} 7177 7178 // Add Memory with Immediate 7179 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7180 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7181 effect(KILL cr); 7182 7183 ins_cost(125); 7184 format %{ "ADD $dst,$src" %} 7185 opcode(0x81); /* Opcode 81 /0 id */ 7186 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7187 ins_pipe( ialu_mem_imm ); 7188 %} 7189 7190 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7191 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7192 effect(KILL cr); 7193 7194 ins_cost(125); 7195 format %{ "INC $dst" %} 7196 opcode(0xFF); /* Opcode FF /0 */ 7197 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7198 ins_pipe( ialu_mem_imm ); 7199 %} 7200 7201 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7202 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7203 effect(KILL cr); 7204 7205 ins_cost(125); 7206 format %{ "DEC $dst" %} 7207 opcode(0xFF); /* Opcode FF /1 */ 7208 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7209 ins_pipe( ialu_mem_imm ); 7210 %} 7211 7212 7213 instruct checkCastPP( eRegP dst ) %{ 7214 match(Set dst (CheckCastPP dst)); 7215 7216 size(0); 7217 format %{ "#checkcastPP of $dst" %} 7218 ins_encode( /*empty encoding*/ ); 7219 ins_pipe( empty ); 7220 %} 7221 7222 instruct castPP( eRegP dst ) %{ 7223 match(Set dst (CastPP dst)); 7224 format %{ "#castPP of $dst" %} 7225 ins_encode( /*empty encoding*/ ); 7226 ins_pipe( empty ); 7227 %} 7228 7229 instruct castII( rRegI dst ) %{ 7230 match(Set dst (CastII dst)); 7231 format %{ "#castII of $dst" %} 7232 ins_encode( /*empty encoding*/ ); 7233 ins_cost(0); 7234 ins_pipe( empty ); 7235 %} 7236 7237 instruct castLL( eRegL dst ) %{ 7238 match(Set dst (CastLL dst)); 7239 format %{ "#castLL of $dst" %} 7240 ins_encode( /*empty encoding*/ ); 7241 ins_cost(0); 7242 ins_pipe( empty ); 7243 %} 7244 7245 instruct castFF( regF dst ) %{ 7246 predicate(UseSSE >= 1); 7247 match(Set dst (CastFF dst)); 7248 format %{ "#castFF of $dst" %} 7249 ins_encode( /*empty encoding*/ ); 7250 ins_cost(0); 7251 ins_pipe( empty ); 7252 %} 7253 7254 instruct castDD( regD dst ) %{ 7255 predicate(UseSSE >= 2); 7256 match(Set dst (CastDD dst)); 7257 format %{ "#castDD of $dst" %} 7258 ins_encode( /*empty encoding*/ ); 7259 ins_cost(0); 7260 ins_pipe( empty ); 7261 %} 7262 7263 instruct castFF_PR( regFPR dst ) %{ 7264 predicate(UseSSE < 1); 7265 match(Set dst (CastFF dst)); 7266 format %{ "#castFF of $dst" %} 7267 ins_encode( /*empty encoding*/ ); 7268 ins_cost(0); 7269 ins_pipe( empty ); 7270 %} 7271 7272 instruct castDD_PR( regDPR dst ) %{ 7273 predicate(UseSSE < 2); 7274 match(Set dst (CastDD dst)); 7275 format %{ "#castDD of $dst" %} 7276 ins_encode( /*empty encoding*/ ); 7277 ins_cost(0); 7278 ins_pipe( empty ); 7279 %} 7280 7281 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7282 7283 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7284 predicate(VM_Version::supports_cx8()); 7285 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7286 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7287 effect(KILL cr, KILL oldval); 7288 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7289 "MOV $res,0\n\t" 7290 "JNE,s fail\n\t" 7291 "MOV $res,1\n" 7292 "fail:" %} 7293 ins_encode( enc_cmpxchg8(mem_ptr), 7294 enc_flags_ne_to_boolean(res) ); 7295 ins_pipe( pipe_cmpxchg ); 7296 %} 7297 7298 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7299 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7300 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7301 effect(KILL cr, KILL oldval); 7302 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7303 "MOV $res,0\n\t" 7304 "JNE,s fail\n\t" 7305 "MOV $res,1\n" 7306 "fail:" %} 7307 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7308 ins_pipe( pipe_cmpxchg ); 7309 %} 7310 7311 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7312 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7313 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7314 effect(KILL cr, KILL oldval); 7315 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7316 "MOV $res,0\n\t" 7317 "JNE,s fail\n\t" 7318 "MOV $res,1\n" 7319 "fail:" %} 7320 ins_encode( enc_cmpxchgb(mem_ptr), 7321 enc_flags_ne_to_boolean(res) ); 7322 ins_pipe( pipe_cmpxchg ); 7323 %} 7324 7325 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7326 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7327 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7328 effect(KILL cr, KILL oldval); 7329 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7330 "MOV $res,0\n\t" 7331 "JNE,s fail\n\t" 7332 "MOV $res,1\n" 7333 "fail:" %} 7334 ins_encode( enc_cmpxchgw(mem_ptr), 7335 enc_flags_ne_to_boolean(res) ); 7336 ins_pipe( pipe_cmpxchg ); 7337 %} 7338 7339 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7340 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7341 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7342 effect(KILL cr, KILL oldval); 7343 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7344 "MOV $res,0\n\t" 7345 "JNE,s fail\n\t" 7346 "MOV $res,1\n" 7347 "fail:" %} 7348 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7349 ins_pipe( pipe_cmpxchg ); 7350 %} 7351 7352 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7353 predicate(VM_Version::supports_cx8()); 7354 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7355 effect(KILL cr); 7356 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7357 ins_encode( enc_cmpxchg8(mem_ptr) ); 7358 ins_pipe( pipe_cmpxchg ); 7359 %} 7360 7361 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7362 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7363 effect(KILL cr); 7364 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7365 ins_encode( enc_cmpxchg(mem_ptr) ); 7366 ins_pipe( pipe_cmpxchg ); 7367 %} 7368 7369 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7370 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7371 effect(KILL cr); 7372 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7373 ins_encode( enc_cmpxchgb(mem_ptr) ); 7374 ins_pipe( pipe_cmpxchg ); 7375 %} 7376 7377 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7378 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7379 effect(KILL cr); 7380 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7381 ins_encode( enc_cmpxchgw(mem_ptr) ); 7382 ins_pipe( pipe_cmpxchg ); 7383 %} 7384 7385 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7386 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7387 effect(KILL cr); 7388 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7389 ins_encode( enc_cmpxchg(mem_ptr) ); 7390 ins_pipe( pipe_cmpxchg ); 7391 %} 7392 7393 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7394 predicate(n->as_LoadStore()->result_not_used()); 7395 match(Set dummy (GetAndAddB mem add)); 7396 effect(KILL cr); 7397 format %{ "ADDB [$mem],$add" %} 7398 ins_encode %{ 7399 __ lock(); 7400 __ addb($mem$$Address, $add$$constant); 7401 %} 7402 ins_pipe( pipe_cmpxchg ); 7403 %} 7404 7405 // Important to match to xRegI: only 8-bit regs. 7406 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7407 match(Set newval (GetAndAddB mem newval)); 7408 effect(KILL cr); 7409 format %{ "XADDB [$mem],$newval" %} 7410 ins_encode %{ 7411 __ lock(); 7412 __ xaddb($mem$$Address, $newval$$Register); 7413 %} 7414 ins_pipe( pipe_cmpxchg ); 7415 %} 7416 7417 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7418 predicate(n->as_LoadStore()->result_not_used()); 7419 match(Set dummy (GetAndAddS mem add)); 7420 effect(KILL cr); 7421 format %{ "ADDS [$mem],$add" %} 7422 ins_encode %{ 7423 __ lock(); 7424 __ addw($mem$$Address, $add$$constant); 7425 %} 7426 ins_pipe( pipe_cmpxchg ); 7427 %} 7428 7429 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7430 match(Set newval (GetAndAddS mem newval)); 7431 effect(KILL cr); 7432 format %{ "XADDS [$mem],$newval" %} 7433 ins_encode %{ 7434 __ lock(); 7435 __ xaddw($mem$$Address, $newval$$Register); 7436 %} 7437 ins_pipe( pipe_cmpxchg ); 7438 %} 7439 7440 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7441 predicate(n->as_LoadStore()->result_not_used()); 7442 match(Set dummy (GetAndAddI mem add)); 7443 effect(KILL cr); 7444 format %{ "ADDL [$mem],$add" %} 7445 ins_encode %{ 7446 __ lock(); 7447 __ addl($mem$$Address, $add$$constant); 7448 %} 7449 ins_pipe( pipe_cmpxchg ); 7450 %} 7451 7452 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7453 match(Set newval (GetAndAddI mem newval)); 7454 effect(KILL cr); 7455 format %{ "XADDL [$mem],$newval" %} 7456 ins_encode %{ 7457 __ lock(); 7458 __ xaddl($mem$$Address, $newval$$Register); 7459 %} 7460 ins_pipe( pipe_cmpxchg ); 7461 %} 7462 7463 // Important to match to xRegI: only 8-bit regs. 7464 instruct xchgB( memory mem, xRegI newval) %{ 7465 match(Set newval (GetAndSetB mem newval)); 7466 format %{ "XCHGB $newval,[$mem]" %} 7467 ins_encode %{ 7468 __ xchgb($newval$$Register, $mem$$Address); 7469 %} 7470 ins_pipe( pipe_cmpxchg ); 7471 %} 7472 7473 instruct xchgS( memory mem, rRegI newval) %{ 7474 match(Set newval (GetAndSetS mem newval)); 7475 format %{ "XCHGW $newval,[$mem]" %} 7476 ins_encode %{ 7477 __ xchgw($newval$$Register, $mem$$Address); 7478 %} 7479 ins_pipe( pipe_cmpxchg ); 7480 %} 7481 7482 instruct xchgI( memory mem, rRegI newval) %{ 7483 match(Set newval (GetAndSetI mem newval)); 7484 format %{ "XCHGL $newval,[$mem]" %} 7485 ins_encode %{ 7486 __ xchgl($newval$$Register, $mem$$Address); 7487 %} 7488 ins_pipe( pipe_cmpxchg ); 7489 %} 7490 7491 instruct xchgP( memory mem, pRegP newval) %{ 7492 match(Set newval (GetAndSetP mem newval)); 7493 format %{ "XCHGL $newval,[$mem]" %} 7494 ins_encode %{ 7495 __ xchgl($newval$$Register, $mem$$Address); 7496 %} 7497 ins_pipe( pipe_cmpxchg ); 7498 %} 7499 7500 //----------Subtraction Instructions------------------------------------------- 7501 7502 // Integer Subtraction Instructions 7503 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7504 match(Set dst (SubI dst src)); 7505 effect(KILL cr); 7506 7507 size(2); 7508 format %{ "SUB $dst,$src" %} 7509 opcode(0x2B); 7510 ins_encode( OpcP, RegReg( dst, src) ); 7511 ins_pipe( ialu_reg_reg ); 7512 %} 7513 7514 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7515 match(Set dst (SubI dst src)); 7516 effect(KILL cr); 7517 7518 format %{ "SUB $dst,$src" %} 7519 opcode(0x81,0x05); /* Opcode 81 /5 */ 7520 // ins_encode( RegImm( dst, src) ); 7521 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7522 ins_pipe( ialu_reg ); 7523 %} 7524 7525 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7526 match(Set dst (SubI dst (LoadI src))); 7527 effect(KILL cr); 7528 7529 ins_cost(150); 7530 format %{ "SUB $dst,$src" %} 7531 opcode(0x2B); 7532 ins_encode( OpcP, RegMem( dst, src) ); 7533 ins_pipe( ialu_reg_mem ); 7534 %} 7535 7536 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7537 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7538 effect(KILL cr); 7539 7540 ins_cost(150); 7541 format %{ "SUB $dst,$src" %} 7542 opcode(0x29); /* Opcode 29 /r */ 7543 ins_encode( OpcP, RegMem( src, dst ) ); 7544 ins_pipe( ialu_mem_reg ); 7545 %} 7546 7547 // Subtract from a pointer 7548 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7549 match(Set dst (AddP dst (SubI zero src))); 7550 effect(KILL cr); 7551 7552 size(2); 7553 format %{ "SUB $dst,$src" %} 7554 opcode(0x2B); 7555 ins_encode( OpcP, RegReg( dst, src) ); 7556 ins_pipe( ialu_reg_reg ); 7557 %} 7558 7559 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7560 match(Set dst (SubI zero dst)); 7561 effect(KILL cr); 7562 7563 size(2); 7564 format %{ "NEG $dst" %} 7565 opcode(0xF7,0x03); // Opcode F7 /3 7566 ins_encode( OpcP, RegOpc( dst ) ); 7567 ins_pipe( ialu_reg ); 7568 %} 7569 7570 //----------Multiplication/Division Instructions------------------------------- 7571 // Integer Multiplication Instructions 7572 // Multiply Register 7573 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7574 match(Set dst (MulI dst src)); 7575 effect(KILL cr); 7576 7577 size(3); 7578 ins_cost(300); 7579 format %{ "IMUL $dst,$src" %} 7580 opcode(0xAF, 0x0F); 7581 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7582 ins_pipe( ialu_reg_reg_alu0 ); 7583 %} 7584 7585 // Multiply 32-bit Immediate 7586 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7587 match(Set dst (MulI src imm)); 7588 effect(KILL cr); 7589 7590 ins_cost(300); 7591 format %{ "IMUL $dst,$src,$imm" %} 7592 opcode(0x69); /* 69 /r id */ 7593 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7594 ins_pipe( ialu_reg_reg_alu0 ); 7595 %} 7596 7597 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7598 match(Set dst src); 7599 effect(KILL cr); 7600 7601 // Note that this is artificially increased to make it more expensive than loadConL 7602 ins_cost(250); 7603 format %{ "MOV EAX,$src\t// low word only" %} 7604 opcode(0xB8); 7605 ins_encode( LdImmL_Lo(dst, src) ); 7606 ins_pipe( ialu_reg_fat ); 7607 %} 7608 7609 // Multiply by 32-bit Immediate, taking the shifted high order results 7610 // (special case for shift by 32) 7611 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7612 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7613 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7614 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7615 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7616 effect(USE src1, KILL cr); 7617 7618 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7619 ins_cost(0*100 + 1*400 - 150); 7620 format %{ "IMUL EDX:EAX,$src1" %} 7621 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7622 ins_pipe( pipe_slow ); 7623 %} 7624 7625 // Multiply by 32-bit Immediate, taking the shifted high order results 7626 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7627 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7628 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7629 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7630 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7631 effect(USE src1, KILL cr); 7632 7633 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7634 ins_cost(1*100 + 1*400 - 150); 7635 format %{ "IMUL EDX:EAX,$src1\n\t" 7636 "SAR EDX,$cnt-32" %} 7637 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7638 ins_pipe( pipe_slow ); 7639 %} 7640 7641 // Multiply Memory 32-bit Immediate 7642 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7643 match(Set dst (MulI (LoadI src) imm)); 7644 effect(KILL cr); 7645 7646 ins_cost(300); 7647 format %{ "IMUL $dst,$src,$imm" %} 7648 opcode(0x69); /* 69 /r id */ 7649 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7650 ins_pipe( ialu_reg_mem_alu0 ); 7651 %} 7652 7653 // Multiply Memory 7654 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7655 match(Set dst (MulI dst (LoadI src))); 7656 effect(KILL cr); 7657 7658 ins_cost(350); 7659 format %{ "IMUL $dst,$src" %} 7660 opcode(0xAF, 0x0F); 7661 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7662 ins_pipe( ialu_reg_mem_alu0 ); 7663 %} 7664 7665 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7666 %{ 7667 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7668 effect(KILL cr, KILL src2); 7669 7670 expand %{ mulI_eReg(dst, src1, cr); 7671 mulI_eReg(src2, src3, cr); 7672 addI_eReg(dst, src2, cr); %} 7673 %} 7674 7675 // Multiply Register Int to Long 7676 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7677 // Basic Idea: long = (long)int * (long)int 7678 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7679 effect(DEF dst, USE src, USE src1, KILL flags); 7680 7681 ins_cost(300); 7682 format %{ "IMUL $dst,$src1" %} 7683 7684 ins_encode( long_int_multiply( dst, src1 ) ); 7685 ins_pipe( ialu_reg_reg_alu0 ); 7686 %} 7687 7688 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7689 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7690 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7691 effect(KILL flags); 7692 7693 ins_cost(300); 7694 format %{ "MUL $dst,$src1" %} 7695 7696 ins_encode( long_uint_multiply(dst, src1) ); 7697 ins_pipe( ialu_reg_reg_alu0 ); 7698 %} 7699 7700 // Multiply Register Long 7701 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7702 match(Set dst (MulL dst src)); 7703 effect(KILL cr, TEMP tmp); 7704 ins_cost(4*100+3*400); 7705 // Basic idea: lo(result) = lo(x_lo * y_lo) 7706 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7707 format %{ "MOV $tmp,$src.lo\n\t" 7708 "IMUL $tmp,EDX\n\t" 7709 "MOV EDX,$src.hi\n\t" 7710 "IMUL EDX,EAX\n\t" 7711 "ADD $tmp,EDX\n\t" 7712 "MUL EDX:EAX,$src.lo\n\t" 7713 "ADD EDX,$tmp" %} 7714 ins_encode( long_multiply( dst, src, tmp ) ); 7715 ins_pipe( pipe_slow ); 7716 %} 7717 7718 // Multiply Register Long where the left operand's high 32 bits are zero 7719 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7720 predicate(is_operand_hi32_zero(n->in(1))); 7721 match(Set dst (MulL dst src)); 7722 effect(KILL cr, TEMP tmp); 7723 ins_cost(2*100+2*400); 7724 // Basic idea: lo(result) = lo(x_lo * y_lo) 7725 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7726 format %{ "MOV $tmp,$src.hi\n\t" 7727 "IMUL $tmp,EAX\n\t" 7728 "MUL EDX:EAX,$src.lo\n\t" 7729 "ADD EDX,$tmp" %} 7730 ins_encode %{ 7731 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7732 __ imull($tmp$$Register, rax); 7733 __ mull($src$$Register); 7734 __ addl(rdx, $tmp$$Register); 7735 %} 7736 ins_pipe( pipe_slow ); 7737 %} 7738 7739 // Multiply Register Long where the right operand's high 32 bits are zero 7740 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7741 predicate(is_operand_hi32_zero(n->in(2))); 7742 match(Set dst (MulL dst src)); 7743 effect(KILL cr, TEMP tmp); 7744 ins_cost(2*100+2*400); 7745 // Basic idea: lo(result) = lo(x_lo * y_lo) 7746 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7747 format %{ "MOV $tmp,$src.lo\n\t" 7748 "IMUL $tmp,EDX\n\t" 7749 "MUL EDX:EAX,$src.lo\n\t" 7750 "ADD EDX,$tmp" %} 7751 ins_encode %{ 7752 __ movl($tmp$$Register, $src$$Register); 7753 __ imull($tmp$$Register, rdx); 7754 __ mull($src$$Register); 7755 __ addl(rdx, $tmp$$Register); 7756 %} 7757 ins_pipe( pipe_slow ); 7758 %} 7759 7760 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7761 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7762 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7763 match(Set dst (MulL dst src)); 7764 effect(KILL cr); 7765 ins_cost(1*400); 7766 // Basic idea: lo(result) = lo(x_lo * y_lo) 7767 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7768 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7769 ins_encode %{ 7770 __ mull($src$$Register); 7771 %} 7772 ins_pipe( pipe_slow ); 7773 %} 7774 7775 // Multiply Register Long by small constant 7776 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7777 match(Set dst (MulL dst src)); 7778 effect(KILL cr, TEMP tmp); 7779 ins_cost(2*100+2*400); 7780 size(12); 7781 // Basic idea: lo(result) = lo(src * EAX) 7782 // hi(result) = hi(src * EAX) + lo(src * EDX) 7783 format %{ "IMUL $tmp,EDX,$src\n\t" 7784 "MOV EDX,$src\n\t" 7785 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7786 "ADD EDX,$tmp" %} 7787 ins_encode( long_multiply_con( dst, src, tmp ) ); 7788 ins_pipe( pipe_slow ); 7789 %} 7790 7791 // Integer DIV with Register 7792 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7793 match(Set rax (DivI rax div)); 7794 effect(KILL rdx, KILL cr); 7795 size(26); 7796 ins_cost(30*100+10*100); 7797 format %{ "CMP EAX,0x80000000\n\t" 7798 "JNE,s normal\n\t" 7799 "XOR EDX,EDX\n\t" 7800 "CMP ECX,-1\n\t" 7801 "JE,s done\n" 7802 "normal: CDQ\n\t" 7803 "IDIV $div\n\t" 7804 "done:" %} 7805 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7806 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7807 ins_pipe( ialu_reg_reg_alu0 ); 7808 %} 7809 7810 // Divide Register Long 7811 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7812 match(Set dst (DivL src1 src2)); 7813 effect(CALL); 7814 ins_cost(10000); 7815 format %{ "PUSH $src1.hi\n\t" 7816 "PUSH $src1.lo\n\t" 7817 "PUSH $src2.hi\n\t" 7818 "PUSH $src2.lo\n\t" 7819 "CALL SharedRuntime::ldiv\n\t" 7820 "ADD ESP,16" %} 7821 ins_encode( long_div(src1,src2) ); 7822 ins_pipe( pipe_slow ); 7823 %} 7824 7825 // Integer DIVMOD with Register, both quotient and mod results 7826 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7827 match(DivModI rax div); 7828 effect(KILL cr); 7829 size(26); 7830 ins_cost(30*100+10*100); 7831 format %{ "CMP EAX,0x80000000\n\t" 7832 "JNE,s normal\n\t" 7833 "XOR EDX,EDX\n\t" 7834 "CMP ECX,-1\n\t" 7835 "JE,s done\n" 7836 "normal: CDQ\n\t" 7837 "IDIV $div\n\t" 7838 "done:" %} 7839 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7840 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7841 ins_pipe( pipe_slow ); 7842 %} 7843 7844 // Integer MOD with Register 7845 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7846 match(Set rdx (ModI rax div)); 7847 effect(KILL rax, KILL cr); 7848 7849 size(26); 7850 ins_cost(300); 7851 format %{ "CDQ\n\t" 7852 "IDIV $div" %} 7853 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7854 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7855 ins_pipe( ialu_reg_reg_alu0 ); 7856 %} 7857 7858 // Remainder Register Long 7859 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7860 match(Set dst (ModL src1 src2)); 7861 effect(CALL); 7862 ins_cost(10000); 7863 format %{ "PUSH $src1.hi\n\t" 7864 "PUSH $src1.lo\n\t" 7865 "PUSH $src2.hi\n\t" 7866 "PUSH $src2.lo\n\t" 7867 "CALL SharedRuntime::lrem\n\t" 7868 "ADD ESP,16" %} 7869 ins_encode( long_mod(src1,src2) ); 7870 ins_pipe( pipe_slow ); 7871 %} 7872 7873 // Divide Register Long (no special case since divisor != -1) 7874 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7875 match(Set dst (DivL dst imm)); 7876 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7877 ins_cost(1000); 7878 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7879 "XOR $tmp2,$tmp2\n\t" 7880 "CMP $tmp,EDX\n\t" 7881 "JA,s fast\n\t" 7882 "MOV $tmp2,EAX\n\t" 7883 "MOV EAX,EDX\n\t" 7884 "MOV EDX,0\n\t" 7885 "JLE,s pos\n\t" 7886 "LNEG EAX : $tmp2\n\t" 7887 "DIV $tmp # unsigned division\n\t" 7888 "XCHG EAX,$tmp2\n\t" 7889 "DIV $tmp\n\t" 7890 "LNEG $tmp2 : EAX\n\t" 7891 "JMP,s done\n" 7892 "pos:\n\t" 7893 "DIV $tmp\n\t" 7894 "XCHG EAX,$tmp2\n" 7895 "fast:\n\t" 7896 "DIV $tmp\n" 7897 "done:\n\t" 7898 "MOV EDX,$tmp2\n\t" 7899 "NEG EDX:EAX # if $imm < 0" %} 7900 ins_encode %{ 7901 int con = (int)$imm$$constant; 7902 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7903 int pcon = (con > 0) ? con : -con; 7904 Label Lfast, Lpos, Ldone; 7905 7906 __ movl($tmp$$Register, pcon); 7907 __ xorl($tmp2$$Register,$tmp2$$Register); 7908 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7909 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7910 7911 __ movl($tmp2$$Register, $dst$$Register); // save 7912 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7913 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7914 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7915 7916 // Negative dividend. 7917 // convert value to positive to use unsigned division 7918 __ lneg($dst$$Register, $tmp2$$Register); 7919 __ divl($tmp$$Register); 7920 __ xchgl($dst$$Register, $tmp2$$Register); 7921 __ divl($tmp$$Register); 7922 // revert result back to negative 7923 __ lneg($tmp2$$Register, $dst$$Register); 7924 __ jmpb(Ldone); 7925 7926 __ bind(Lpos); 7927 __ divl($tmp$$Register); // Use unsigned division 7928 __ xchgl($dst$$Register, $tmp2$$Register); 7929 // Fallthrow for final divide, tmp2 has 32 bit hi result 7930 7931 __ bind(Lfast); 7932 // fast path: src is positive 7933 __ divl($tmp$$Register); // Use unsigned division 7934 7935 __ bind(Ldone); 7936 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7937 if (con < 0) { 7938 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7939 } 7940 %} 7941 ins_pipe( pipe_slow ); 7942 %} 7943 7944 // Remainder Register Long (remainder fit into 32 bits) 7945 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7946 match(Set dst (ModL dst imm)); 7947 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7948 ins_cost(1000); 7949 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7950 "CMP $tmp,EDX\n\t" 7951 "JA,s fast\n\t" 7952 "MOV $tmp2,EAX\n\t" 7953 "MOV EAX,EDX\n\t" 7954 "MOV EDX,0\n\t" 7955 "JLE,s pos\n\t" 7956 "LNEG EAX : $tmp2\n\t" 7957 "DIV $tmp # unsigned division\n\t" 7958 "MOV EAX,$tmp2\n\t" 7959 "DIV $tmp\n\t" 7960 "NEG EDX\n\t" 7961 "JMP,s done\n" 7962 "pos:\n\t" 7963 "DIV $tmp\n\t" 7964 "MOV EAX,$tmp2\n" 7965 "fast:\n\t" 7966 "DIV $tmp\n" 7967 "done:\n\t" 7968 "MOV EAX,EDX\n\t" 7969 "SAR EDX,31\n\t" %} 7970 ins_encode %{ 7971 int con = (int)$imm$$constant; 7972 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7973 int pcon = (con > 0) ? con : -con; 7974 Label Lfast, Lpos, Ldone; 7975 7976 __ movl($tmp$$Register, pcon); 7977 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7978 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7979 7980 __ movl($tmp2$$Register, $dst$$Register); // save 7981 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7982 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7983 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7984 7985 // Negative dividend. 7986 // convert value to positive to use unsigned division 7987 __ lneg($dst$$Register, $tmp2$$Register); 7988 __ divl($tmp$$Register); 7989 __ movl($dst$$Register, $tmp2$$Register); 7990 __ divl($tmp$$Register); 7991 // revert remainder back to negative 7992 __ negl(HIGH_FROM_LOW($dst$$Register)); 7993 __ jmpb(Ldone); 7994 7995 __ bind(Lpos); 7996 __ divl($tmp$$Register); 7997 __ movl($dst$$Register, $tmp2$$Register); 7998 7999 __ bind(Lfast); 8000 // fast path: src is positive 8001 __ divl($tmp$$Register); 8002 8003 __ bind(Ldone); 8004 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8005 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8006 8007 %} 8008 ins_pipe( pipe_slow ); 8009 %} 8010 8011 // Integer Shift Instructions 8012 // Shift Left by one 8013 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8014 match(Set dst (LShiftI dst shift)); 8015 effect(KILL cr); 8016 8017 size(2); 8018 format %{ "SHL $dst,$shift" %} 8019 opcode(0xD1, 0x4); /* D1 /4 */ 8020 ins_encode( OpcP, RegOpc( dst ) ); 8021 ins_pipe( ialu_reg ); 8022 %} 8023 8024 // Shift Left by 8-bit immediate 8025 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8026 match(Set dst (LShiftI dst shift)); 8027 effect(KILL cr); 8028 8029 size(3); 8030 format %{ "SHL $dst,$shift" %} 8031 opcode(0xC1, 0x4); /* C1 /4 ib */ 8032 ins_encode( RegOpcImm( dst, shift) ); 8033 ins_pipe( ialu_reg ); 8034 %} 8035 8036 // Shift Left by variable 8037 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8038 match(Set dst (LShiftI dst shift)); 8039 effect(KILL cr); 8040 8041 size(2); 8042 format %{ "SHL $dst,$shift" %} 8043 opcode(0xD3, 0x4); /* D3 /4 */ 8044 ins_encode( OpcP, RegOpc( dst ) ); 8045 ins_pipe( ialu_reg_reg ); 8046 %} 8047 8048 // Arithmetic shift right by one 8049 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8050 match(Set dst (RShiftI dst shift)); 8051 effect(KILL cr); 8052 8053 size(2); 8054 format %{ "SAR $dst,$shift" %} 8055 opcode(0xD1, 0x7); /* D1 /7 */ 8056 ins_encode( OpcP, RegOpc( dst ) ); 8057 ins_pipe( ialu_reg ); 8058 %} 8059 8060 // Arithmetic shift right by one 8061 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8062 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8063 effect(KILL cr); 8064 format %{ "SAR $dst,$shift" %} 8065 opcode(0xD1, 0x7); /* D1 /7 */ 8066 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8067 ins_pipe( ialu_mem_imm ); 8068 %} 8069 8070 // Arithmetic Shift Right by 8-bit immediate 8071 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8072 match(Set dst (RShiftI dst shift)); 8073 effect(KILL cr); 8074 8075 size(3); 8076 format %{ "SAR $dst,$shift" %} 8077 opcode(0xC1, 0x7); /* C1 /7 ib */ 8078 ins_encode( RegOpcImm( dst, shift ) ); 8079 ins_pipe( ialu_mem_imm ); 8080 %} 8081 8082 // Arithmetic Shift Right by 8-bit immediate 8083 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8084 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8085 effect(KILL cr); 8086 8087 format %{ "SAR $dst,$shift" %} 8088 opcode(0xC1, 0x7); /* C1 /7 ib */ 8089 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8090 ins_pipe( ialu_mem_imm ); 8091 %} 8092 8093 // Arithmetic Shift Right by variable 8094 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8095 match(Set dst (RShiftI dst shift)); 8096 effect(KILL cr); 8097 8098 size(2); 8099 format %{ "SAR $dst,$shift" %} 8100 opcode(0xD3, 0x7); /* D3 /7 */ 8101 ins_encode( OpcP, RegOpc( dst ) ); 8102 ins_pipe( ialu_reg_reg ); 8103 %} 8104 8105 // Logical shift right by one 8106 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8107 match(Set dst (URShiftI dst shift)); 8108 effect(KILL cr); 8109 8110 size(2); 8111 format %{ "SHR $dst,$shift" %} 8112 opcode(0xD1, 0x5); /* D1 /5 */ 8113 ins_encode( OpcP, RegOpc( dst ) ); 8114 ins_pipe( ialu_reg ); 8115 %} 8116 8117 // Logical Shift Right by 8-bit immediate 8118 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8119 match(Set dst (URShiftI dst shift)); 8120 effect(KILL cr); 8121 8122 size(3); 8123 format %{ "SHR $dst,$shift" %} 8124 opcode(0xC1, 0x5); /* C1 /5 ib */ 8125 ins_encode( RegOpcImm( dst, shift) ); 8126 ins_pipe( ialu_reg ); 8127 %} 8128 8129 8130 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8131 // This idiom is used by the compiler for the i2b bytecode. 8132 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8133 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8134 8135 size(3); 8136 format %{ "MOVSX $dst,$src :8" %} 8137 ins_encode %{ 8138 __ movsbl($dst$$Register, $src$$Register); 8139 %} 8140 ins_pipe(ialu_reg_reg); 8141 %} 8142 8143 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8144 // This idiom is used by the compiler the i2s bytecode. 8145 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8146 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8147 8148 size(3); 8149 format %{ "MOVSX $dst,$src :16" %} 8150 ins_encode %{ 8151 __ movswl($dst$$Register, $src$$Register); 8152 %} 8153 ins_pipe(ialu_reg_reg); 8154 %} 8155 8156 8157 // Logical Shift Right by variable 8158 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8159 match(Set dst (URShiftI dst shift)); 8160 effect(KILL cr); 8161 8162 size(2); 8163 format %{ "SHR $dst,$shift" %} 8164 opcode(0xD3, 0x5); /* D3 /5 */ 8165 ins_encode( OpcP, RegOpc( dst ) ); 8166 ins_pipe( ialu_reg_reg ); 8167 %} 8168 8169 8170 //----------Logical Instructions----------------------------------------------- 8171 //----------Integer Logical Instructions--------------------------------------- 8172 // And Instructions 8173 // And Register with Register 8174 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8175 match(Set dst (AndI dst src)); 8176 effect(KILL cr); 8177 8178 size(2); 8179 format %{ "AND $dst,$src" %} 8180 opcode(0x23); 8181 ins_encode( OpcP, RegReg( dst, src) ); 8182 ins_pipe( ialu_reg_reg ); 8183 %} 8184 8185 // And Register with Immediate 8186 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8187 match(Set dst (AndI dst src)); 8188 effect(KILL cr); 8189 8190 format %{ "AND $dst,$src" %} 8191 opcode(0x81,0x04); /* Opcode 81 /4 */ 8192 // ins_encode( RegImm( dst, src) ); 8193 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8194 ins_pipe( ialu_reg ); 8195 %} 8196 8197 // And Register with Memory 8198 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8199 match(Set dst (AndI dst (LoadI src))); 8200 effect(KILL cr); 8201 8202 ins_cost(150); 8203 format %{ "AND $dst,$src" %} 8204 opcode(0x23); 8205 ins_encode( OpcP, RegMem( dst, src) ); 8206 ins_pipe( ialu_reg_mem ); 8207 %} 8208 8209 // And Memory with Register 8210 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8211 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8212 effect(KILL cr); 8213 8214 ins_cost(150); 8215 format %{ "AND $dst,$src" %} 8216 opcode(0x21); /* Opcode 21 /r */ 8217 ins_encode( OpcP, RegMem( src, dst ) ); 8218 ins_pipe( ialu_mem_reg ); 8219 %} 8220 8221 // And Memory with Immediate 8222 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8223 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8224 effect(KILL cr); 8225 8226 ins_cost(125); 8227 format %{ "AND $dst,$src" %} 8228 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8229 // ins_encode( MemImm( dst, src) ); 8230 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8231 ins_pipe( ialu_mem_imm ); 8232 %} 8233 8234 // BMI1 instructions 8235 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8236 match(Set dst (AndI (XorI src1 minus_1) src2)); 8237 predicate(UseBMI1Instructions); 8238 effect(KILL cr); 8239 8240 format %{ "ANDNL $dst, $src1, $src2" %} 8241 8242 ins_encode %{ 8243 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8244 %} 8245 ins_pipe(ialu_reg); 8246 %} 8247 8248 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8249 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8250 predicate(UseBMI1Instructions); 8251 effect(KILL cr); 8252 8253 ins_cost(125); 8254 format %{ "ANDNL $dst, $src1, $src2" %} 8255 8256 ins_encode %{ 8257 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8258 %} 8259 ins_pipe(ialu_reg_mem); 8260 %} 8261 8262 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8263 match(Set dst (AndI (SubI imm_zero src) src)); 8264 predicate(UseBMI1Instructions); 8265 effect(KILL cr); 8266 8267 format %{ "BLSIL $dst, $src" %} 8268 8269 ins_encode %{ 8270 __ blsil($dst$$Register, $src$$Register); 8271 %} 8272 ins_pipe(ialu_reg); 8273 %} 8274 8275 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8276 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8277 predicate(UseBMI1Instructions); 8278 effect(KILL cr); 8279 8280 ins_cost(125); 8281 format %{ "BLSIL $dst, $src" %} 8282 8283 ins_encode %{ 8284 __ blsil($dst$$Register, $src$$Address); 8285 %} 8286 ins_pipe(ialu_reg_mem); 8287 %} 8288 8289 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8290 %{ 8291 match(Set dst (XorI (AddI src minus_1) src)); 8292 predicate(UseBMI1Instructions); 8293 effect(KILL cr); 8294 8295 format %{ "BLSMSKL $dst, $src" %} 8296 8297 ins_encode %{ 8298 __ blsmskl($dst$$Register, $src$$Register); 8299 %} 8300 8301 ins_pipe(ialu_reg); 8302 %} 8303 8304 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8305 %{ 8306 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8307 predicate(UseBMI1Instructions); 8308 effect(KILL cr); 8309 8310 ins_cost(125); 8311 format %{ "BLSMSKL $dst, $src" %} 8312 8313 ins_encode %{ 8314 __ blsmskl($dst$$Register, $src$$Address); 8315 %} 8316 8317 ins_pipe(ialu_reg_mem); 8318 %} 8319 8320 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8321 %{ 8322 match(Set dst (AndI (AddI src minus_1) src) ); 8323 predicate(UseBMI1Instructions); 8324 effect(KILL cr); 8325 8326 format %{ "BLSRL $dst, $src" %} 8327 8328 ins_encode %{ 8329 __ blsrl($dst$$Register, $src$$Register); 8330 %} 8331 8332 ins_pipe(ialu_reg); 8333 %} 8334 8335 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8336 %{ 8337 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8338 predicate(UseBMI1Instructions); 8339 effect(KILL cr); 8340 8341 ins_cost(125); 8342 format %{ "BLSRL $dst, $src" %} 8343 8344 ins_encode %{ 8345 __ blsrl($dst$$Register, $src$$Address); 8346 %} 8347 8348 ins_pipe(ialu_reg_mem); 8349 %} 8350 8351 // Or Instructions 8352 // Or Register with Register 8353 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8354 match(Set dst (OrI dst src)); 8355 effect(KILL cr); 8356 8357 size(2); 8358 format %{ "OR $dst,$src" %} 8359 opcode(0x0B); 8360 ins_encode( OpcP, RegReg( dst, src) ); 8361 ins_pipe( ialu_reg_reg ); 8362 %} 8363 8364 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8365 match(Set dst (OrI dst (CastP2X src))); 8366 effect(KILL cr); 8367 8368 size(2); 8369 format %{ "OR $dst,$src" %} 8370 opcode(0x0B); 8371 ins_encode( OpcP, RegReg( dst, src) ); 8372 ins_pipe( ialu_reg_reg ); 8373 %} 8374 8375 8376 // Or Register with Immediate 8377 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8378 match(Set dst (OrI dst src)); 8379 effect(KILL cr); 8380 8381 format %{ "OR $dst,$src" %} 8382 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8383 // ins_encode( RegImm( dst, src) ); 8384 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8385 ins_pipe( ialu_reg ); 8386 %} 8387 8388 // Or Register with Memory 8389 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8390 match(Set dst (OrI dst (LoadI src))); 8391 effect(KILL cr); 8392 8393 ins_cost(150); 8394 format %{ "OR $dst,$src" %} 8395 opcode(0x0B); 8396 ins_encode( OpcP, RegMem( dst, src) ); 8397 ins_pipe( ialu_reg_mem ); 8398 %} 8399 8400 // Or Memory with Register 8401 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8402 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8403 effect(KILL cr); 8404 8405 ins_cost(150); 8406 format %{ "OR $dst,$src" %} 8407 opcode(0x09); /* Opcode 09 /r */ 8408 ins_encode( OpcP, RegMem( src, dst ) ); 8409 ins_pipe( ialu_mem_reg ); 8410 %} 8411 8412 // Or Memory with Immediate 8413 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8414 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8415 effect(KILL cr); 8416 8417 ins_cost(125); 8418 format %{ "OR $dst,$src" %} 8419 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8420 // ins_encode( MemImm( dst, src) ); 8421 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8422 ins_pipe( ialu_mem_imm ); 8423 %} 8424 8425 // ROL/ROR 8426 // ROL expand 8427 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8428 effect(USE_DEF dst, USE shift, KILL cr); 8429 8430 format %{ "ROL $dst, $shift" %} 8431 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8432 ins_encode( OpcP, RegOpc( dst )); 8433 ins_pipe( ialu_reg ); 8434 %} 8435 8436 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8437 effect(USE_DEF dst, USE shift, KILL cr); 8438 8439 format %{ "ROL $dst, $shift" %} 8440 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8441 ins_encode( RegOpcImm(dst, shift) ); 8442 ins_pipe(ialu_reg); 8443 %} 8444 8445 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8446 effect(USE_DEF dst, USE shift, KILL cr); 8447 8448 format %{ "ROL $dst, $shift" %} 8449 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8450 ins_encode(OpcP, RegOpc(dst)); 8451 ins_pipe( ialu_reg_reg ); 8452 %} 8453 // end of ROL expand 8454 8455 // ROL 32bit by one once 8456 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8457 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8458 8459 expand %{ 8460 rolI_eReg_imm1(dst, lshift, cr); 8461 %} 8462 %} 8463 8464 // ROL 32bit var by imm8 once 8465 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8466 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8467 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8468 8469 expand %{ 8470 rolI_eReg_imm8(dst, lshift, cr); 8471 %} 8472 %} 8473 8474 // ROL 32bit var by var once 8475 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8476 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8477 8478 expand %{ 8479 rolI_eReg_CL(dst, shift, cr); 8480 %} 8481 %} 8482 8483 // ROL 32bit var by var once 8484 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8485 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8486 8487 expand %{ 8488 rolI_eReg_CL(dst, shift, cr); 8489 %} 8490 %} 8491 8492 // ROR expand 8493 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8494 effect(USE_DEF dst, USE shift, KILL cr); 8495 8496 format %{ "ROR $dst, $shift" %} 8497 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8498 ins_encode( OpcP, RegOpc( dst ) ); 8499 ins_pipe( ialu_reg ); 8500 %} 8501 8502 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8503 effect (USE_DEF dst, USE shift, KILL cr); 8504 8505 format %{ "ROR $dst, $shift" %} 8506 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8507 ins_encode( RegOpcImm(dst, shift) ); 8508 ins_pipe( ialu_reg ); 8509 %} 8510 8511 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8512 effect(USE_DEF dst, USE shift, KILL cr); 8513 8514 format %{ "ROR $dst, $shift" %} 8515 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8516 ins_encode(OpcP, RegOpc(dst)); 8517 ins_pipe( ialu_reg_reg ); 8518 %} 8519 // end of ROR expand 8520 8521 // ROR right once 8522 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8523 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8524 8525 expand %{ 8526 rorI_eReg_imm1(dst, rshift, cr); 8527 %} 8528 %} 8529 8530 // ROR 32bit by immI8 once 8531 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8532 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8533 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8534 8535 expand %{ 8536 rorI_eReg_imm8(dst, rshift, cr); 8537 %} 8538 %} 8539 8540 // ROR 32bit var by var once 8541 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8542 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8543 8544 expand %{ 8545 rorI_eReg_CL(dst, shift, cr); 8546 %} 8547 %} 8548 8549 // ROR 32bit var by var once 8550 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8551 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8552 8553 expand %{ 8554 rorI_eReg_CL(dst, shift, cr); 8555 %} 8556 %} 8557 8558 // Xor Instructions 8559 // Xor Register with Register 8560 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8561 match(Set dst (XorI dst src)); 8562 effect(KILL cr); 8563 8564 size(2); 8565 format %{ "XOR $dst,$src" %} 8566 opcode(0x33); 8567 ins_encode( OpcP, RegReg( dst, src) ); 8568 ins_pipe( ialu_reg_reg ); 8569 %} 8570 8571 // Xor Register with Immediate -1 8572 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8573 match(Set dst (XorI dst imm)); 8574 8575 size(2); 8576 format %{ "NOT $dst" %} 8577 ins_encode %{ 8578 __ notl($dst$$Register); 8579 %} 8580 ins_pipe( ialu_reg ); 8581 %} 8582 8583 // Xor Register with Immediate 8584 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8585 match(Set dst (XorI dst src)); 8586 effect(KILL cr); 8587 8588 format %{ "XOR $dst,$src" %} 8589 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8590 // ins_encode( RegImm( dst, src) ); 8591 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8592 ins_pipe( ialu_reg ); 8593 %} 8594 8595 // Xor Register with Memory 8596 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8597 match(Set dst (XorI dst (LoadI src))); 8598 effect(KILL cr); 8599 8600 ins_cost(150); 8601 format %{ "XOR $dst,$src" %} 8602 opcode(0x33); 8603 ins_encode( OpcP, RegMem(dst, src) ); 8604 ins_pipe( ialu_reg_mem ); 8605 %} 8606 8607 // Xor Memory with Register 8608 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8609 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8610 effect(KILL cr); 8611 8612 ins_cost(150); 8613 format %{ "XOR $dst,$src" %} 8614 opcode(0x31); /* Opcode 31 /r */ 8615 ins_encode( OpcP, RegMem( src, dst ) ); 8616 ins_pipe( ialu_mem_reg ); 8617 %} 8618 8619 // Xor Memory with Immediate 8620 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8621 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8622 effect(KILL cr); 8623 8624 ins_cost(125); 8625 format %{ "XOR $dst,$src" %} 8626 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8627 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8628 ins_pipe( ialu_mem_imm ); 8629 %} 8630 8631 //----------Convert Int to Boolean--------------------------------------------- 8632 8633 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8634 effect( DEF dst, USE src ); 8635 format %{ "MOV $dst,$src" %} 8636 ins_encode( enc_Copy( dst, src) ); 8637 ins_pipe( ialu_reg_reg ); 8638 %} 8639 8640 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8641 effect( USE_DEF dst, USE src, KILL cr ); 8642 8643 size(4); 8644 format %{ "NEG $dst\n\t" 8645 "ADC $dst,$src" %} 8646 ins_encode( neg_reg(dst), 8647 OpcRegReg(0x13,dst,src) ); 8648 ins_pipe( ialu_reg_reg_long ); 8649 %} 8650 8651 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8652 match(Set dst (Conv2B src)); 8653 8654 expand %{ 8655 movI_nocopy(dst,src); 8656 ci2b(dst,src,cr); 8657 %} 8658 %} 8659 8660 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8661 effect( DEF dst, USE src ); 8662 format %{ "MOV $dst,$src" %} 8663 ins_encode( enc_Copy( dst, src) ); 8664 ins_pipe( ialu_reg_reg ); 8665 %} 8666 8667 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8668 effect( USE_DEF dst, USE src, KILL cr ); 8669 format %{ "NEG $dst\n\t" 8670 "ADC $dst,$src" %} 8671 ins_encode( neg_reg(dst), 8672 OpcRegReg(0x13,dst,src) ); 8673 ins_pipe( ialu_reg_reg_long ); 8674 %} 8675 8676 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8677 match(Set dst (Conv2B src)); 8678 8679 expand %{ 8680 movP_nocopy(dst,src); 8681 cp2b(dst,src,cr); 8682 %} 8683 %} 8684 8685 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8686 match(Set dst (CmpLTMask p q)); 8687 effect(KILL cr); 8688 ins_cost(400); 8689 8690 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8691 format %{ "XOR $dst,$dst\n\t" 8692 "CMP $p,$q\n\t" 8693 "SETlt $dst\n\t" 8694 "NEG $dst" %} 8695 ins_encode %{ 8696 Register Rp = $p$$Register; 8697 Register Rq = $q$$Register; 8698 Register Rd = $dst$$Register; 8699 Label done; 8700 __ xorl(Rd, Rd); 8701 __ cmpl(Rp, Rq); 8702 __ setb(Assembler::less, Rd); 8703 __ negl(Rd); 8704 %} 8705 8706 ins_pipe(pipe_slow); 8707 %} 8708 8709 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8710 match(Set dst (CmpLTMask dst zero)); 8711 effect(DEF dst, KILL cr); 8712 ins_cost(100); 8713 8714 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8715 ins_encode %{ 8716 __ sarl($dst$$Register, 31); 8717 %} 8718 ins_pipe(ialu_reg); 8719 %} 8720 8721 /* better to save a register than avoid a branch */ 8722 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8723 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8724 effect(KILL cr); 8725 ins_cost(400); 8726 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8727 "JGE done\n\t" 8728 "ADD $p,$y\n" 8729 "done: " %} 8730 ins_encode %{ 8731 Register Rp = $p$$Register; 8732 Register Rq = $q$$Register; 8733 Register Ry = $y$$Register; 8734 Label done; 8735 __ subl(Rp, Rq); 8736 __ jccb(Assembler::greaterEqual, done); 8737 __ addl(Rp, Ry); 8738 __ bind(done); 8739 %} 8740 8741 ins_pipe(pipe_cmplt); 8742 %} 8743 8744 /* better to save a register than avoid a branch */ 8745 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8746 match(Set y (AndI (CmpLTMask p q) y)); 8747 effect(KILL cr); 8748 8749 ins_cost(300); 8750 8751 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8752 "JLT done\n\t" 8753 "XORL $y, $y\n" 8754 "done: " %} 8755 ins_encode %{ 8756 Register Rp = $p$$Register; 8757 Register Rq = $q$$Register; 8758 Register Ry = $y$$Register; 8759 Label done; 8760 __ cmpl(Rp, Rq); 8761 __ jccb(Assembler::less, done); 8762 __ xorl(Ry, Ry); 8763 __ bind(done); 8764 %} 8765 8766 ins_pipe(pipe_cmplt); 8767 %} 8768 8769 /* If I enable this, I encourage spilling in the inner loop of compress. 8770 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8771 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8772 */ 8773 //----------Overflow Math Instructions----------------------------------------- 8774 8775 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8776 %{ 8777 match(Set cr (OverflowAddI op1 op2)); 8778 effect(DEF cr, USE_KILL op1, USE op2); 8779 8780 format %{ "ADD $op1, $op2\t# overflow check int" %} 8781 8782 ins_encode %{ 8783 __ addl($op1$$Register, $op2$$Register); 8784 %} 8785 ins_pipe(ialu_reg_reg); 8786 %} 8787 8788 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8789 %{ 8790 match(Set cr (OverflowAddI op1 op2)); 8791 effect(DEF cr, USE_KILL op1, USE op2); 8792 8793 format %{ "ADD $op1, $op2\t# overflow check int" %} 8794 8795 ins_encode %{ 8796 __ addl($op1$$Register, $op2$$constant); 8797 %} 8798 ins_pipe(ialu_reg_reg); 8799 %} 8800 8801 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8802 %{ 8803 match(Set cr (OverflowSubI op1 op2)); 8804 8805 format %{ "CMP $op1, $op2\t# overflow check int" %} 8806 ins_encode %{ 8807 __ cmpl($op1$$Register, $op2$$Register); 8808 %} 8809 ins_pipe(ialu_reg_reg); 8810 %} 8811 8812 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8813 %{ 8814 match(Set cr (OverflowSubI op1 op2)); 8815 8816 format %{ "CMP $op1, $op2\t# overflow check int" %} 8817 ins_encode %{ 8818 __ cmpl($op1$$Register, $op2$$constant); 8819 %} 8820 ins_pipe(ialu_reg_reg); 8821 %} 8822 8823 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8824 %{ 8825 match(Set cr (OverflowSubI zero op2)); 8826 effect(DEF cr, USE_KILL op2); 8827 8828 format %{ "NEG $op2\t# overflow check int" %} 8829 ins_encode %{ 8830 __ negl($op2$$Register); 8831 %} 8832 ins_pipe(ialu_reg_reg); 8833 %} 8834 8835 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8836 %{ 8837 match(Set cr (OverflowMulI op1 op2)); 8838 effect(DEF cr, USE_KILL op1, USE op2); 8839 8840 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8841 ins_encode %{ 8842 __ imull($op1$$Register, $op2$$Register); 8843 %} 8844 ins_pipe(ialu_reg_reg_alu0); 8845 %} 8846 8847 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8848 %{ 8849 match(Set cr (OverflowMulI op1 op2)); 8850 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8851 8852 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8853 ins_encode %{ 8854 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8855 %} 8856 ins_pipe(ialu_reg_reg_alu0); 8857 %} 8858 8859 // Integer Absolute Instructions 8860 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8861 %{ 8862 match(Set dst (AbsI src)); 8863 effect(TEMP dst, TEMP tmp, KILL cr); 8864 format %{ "movl $tmp, $src\n\t" 8865 "sarl $tmp, 31\n\t" 8866 "movl $dst, $src\n\t" 8867 "xorl $dst, $tmp\n\t" 8868 "subl $dst, $tmp\n" 8869 %} 8870 ins_encode %{ 8871 __ movl($tmp$$Register, $src$$Register); 8872 __ sarl($tmp$$Register, 31); 8873 __ movl($dst$$Register, $src$$Register); 8874 __ xorl($dst$$Register, $tmp$$Register); 8875 __ subl($dst$$Register, $tmp$$Register); 8876 %} 8877 8878 ins_pipe(ialu_reg_reg); 8879 %} 8880 8881 //----------Long Instructions------------------------------------------------ 8882 // Add Long Register with Register 8883 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8884 match(Set dst (AddL dst src)); 8885 effect(KILL cr); 8886 ins_cost(200); 8887 format %{ "ADD $dst.lo,$src.lo\n\t" 8888 "ADC $dst.hi,$src.hi" %} 8889 opcode(0x03, 0x13); 8890 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8891 ins_pipe( ialu_reg_reg_long ); 8892 %} 8893 8894 // Add Long Register with Immediate 8895 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8896 match(Set dst (AddL dst src)); 8897 effect(KILL cr); 8898 format %{ "ADD $dst.lo,$src.lo\n\t" 8899 "ADC $dst.hi,$src.hi" %} 8900 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8901 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8902 ins_pipe( ialu_reg_long ); 8903 %} 8904 8905 // Add Long Register with Memory 8906 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8907 match(Set dst (AddL dst (LoadL mem))); 8908 effect(KILL cr); 8909 ins_cost(125); 8910 format %{ "ADD $dst.lo,$mem\n\t" 8911 "ADC $dst.hi,$mem+4" %} 8912 opcode(0x03, 0x13); 8913 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8914 ins_pipe( ialu_reg_long_mem ); 8915 %} 8916 8917 // Subtract Long Register with Register. 8918 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8919 match(Set dst (SubL dst src)); 8920 effect(KILL cr); 8921 ins_cost(200); 8922 format %{ "SUB $dst.lo,$src.lo\n\t" 8923 "SBB $dst.hi,$src.hi" %} 8924 opcode(0x2B, 0x1B); 8925 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8926 ins_pipe( ialu_reg_reg_long ); 8927 %} 8928 8929 // Subtract Long Register with Immediate 8930 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8931 match(Set dst (SubL dst src)); 8932 effect(KILL cr); 8933 format %{ "SUB $dst.lo,$src.lo\n\t" 8934 "SBB $dst.hi,$src.hi" %} 8935 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8936 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8937 ins_pipe( ialu_reg_long ); 8938 %} 8939 8940 // Subtract Long Register with Memory 8941 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8942 match(Set dst (SubL dst (LoadL mem))); 8943 effect(KILL cr); 8944 ins_cost(125); 8945 format %{ "SUB $dst.lo,$mem\n\t" 8946 "SBB $dst.hi,$mem+4" %} 8947 opcode(0x2B, 0x1B); 8948 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8949 ins_pipe( ialu_reg_long_mem ); 8950 %} 8951 8952 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8953 match(Set dst (SubL zero dst)); 8954 effect(KILL cr); 8955 ins_cost(300); 8956 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8957 ins_encode( neg_long(dst) ); 8958 ins_pipe( ialu_reg_reg_long ); 8959 %} 8960 8961 // And Long Register with Register 8962 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8963 match(Set dst (AndL dst src)); 8964 effect(KILL cr); 8965 format %{ "AND $dst.lo,$src.lo\n\t" 8966 "AND $dst.hi,$src.hi" %} 8967 opcode(0x23,0x23); 8968 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8969 ins_pipe( ialu_reg_reg_long ); 8970 %} 8971 8972 // And Long Register with Immediate 8973 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8974 match(Set dst (AndL dst src)); 8975 effect(KILL cr); 8976 format %{ "AND $dst.lo,$src.lo\n\t" 8977 "AND $dst.hi,$src.hi" %} 8978 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8979 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8980 ins_pipe( ialu_reg_long ); 8981 %} 8982 8983 // And Long Register with Memory 8984 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8985 match(Set dst (AndL dst (LoadL mem))); 8986 effect(KILL cr); 8987 ins_cost(125); 8988 format %{ "AND $dst.lo,$mem\n\t" 8989 "AND $dst.hi,$mem+4" %} 8990 opcode(0x23, 0x23); 8991 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8992 ins_pipe( ialu_reg_long_mem ); 8993 %} 8994 8995 // BMI1 instructions 8996 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8997 match(Set dst (AndL (XorL src1 minus_1) src2)); 8998 predicate(UseBMI1Instructions); 8999 effect(KILL cr, TEMP dst); 9000 9001 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9002 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9003 %} 9004 9005 ins_encode %{ 9006 Register Rdst = $dst$$Register; 9007 Register Rsrc1 = $src1$$Register; 9008 Register Rsrc2 = $src2$$Register; 9009 __ andnl(Rdst, Rsrc1, Rsrc2); 9010 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9011 %} 9012 ins_pipe(ialu_reg_reg_long); 9013 %} 9014 9015 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9016 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9017 predicate(UseBMI1Instructions); 9018 effect(KILL cr, TEMP dst); 9019 9020 ins_cost(125); 9021 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9022 "ANDNL $dst.hi, $src1.hi, $src2+4" 9023 %} 9024 9025 ins_encode %{ 9026 Register Rdst = $dst$$Register; 9027 Register Rsrc1 = $src1$$Register; 9028 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9029 9030 __ andnl(Rdst, Rsrc1, $src2$$Address); 9031 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9032 %} 9033 ins_pipe(ialu_reg_mem); 9034 %} 9035 9036 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9037 match(Set dst (AndL (SubL imm_zero src) src)); 9038 predicate(UseBMI1Instructions); 9039 effect(KILL cr, TEMP dst); 9040 9041 format %{ "MOVL $dst.hi, 0\n\t" 9042 "BLSIL $dst.lo, $src.lo\n\t" 9043 "JNZ done\n\t" 9044 "BLSIL $dst.hi, $src.hi\n" 9045 "done:" 9046 %} 9047 9048 ins_encode %{ 9049 Label done; 9050 Register Rdst = $dst$$Register; 9051 Register Rsrc = $src$$Register; 9052 __ movl(HIGH_FROM_LOW(Rdst), 0); 9053 __ blsil(Rdst, Rsrc); 9054 __ jccb(Assembler::notZero, done); 9055 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9056 __ bind(done); 9057 %} 9058 ins_pipe(ialu_reg); 9059 %} 9060 9061 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9062 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9063 predicate(UseBMI1Instructions); 9064 effect(KILL cr, TEMP dst); 9065 9066 ins_cost(125); 9067 format %{ "MOVL $dst.hi, 0\n\t" 9068 "BLSIL $dst.lo, $src\n\t" 9069 "JNZ done\n\t" 9070 "BLSIL $dst.hi, $src+4\n" 9071 "done:" 9072 %} 9073 9074 ins_encode %{ 9075 Label done; 9076 Register Rdst = $dst$$Register; 9077 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9078 9079 __ movl(HIGH_FROM_LOW(Rdst), 0); 9080 __ blsil(Rdst, $src$$Address); 9081 __ jccb(Assembler::notZero, done); 9082 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9083 __ bind(done); 9084 %} 9085 ins_pipe(ialu_reg_mem); 9086 %} 9087 9088 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9089 %{ 9090 match(Set dst (XorL (AddL src minus_1) src)); 9091 predicate(UseBMI1Instructions); 9092 effect(KILL cr, TEMP dst); 9093 9094 format %{ "MOVL $dst.hi, 0\n\t" 9095 "BLSMSKL $dst.lo, $src.lo\n\t" 9096 "JNC done\n\t" 9097 "BLSMSKL $dst.hi, $src.hi\n" 9098 "done:" 9099 %} 9100 9101 ins_encode %{ 9102 Label done; 9103 Register Rdst = $dst$$Register; 9104 Register Rsrc = $src$$Register; 9105 __ movl(HIGH_FROM_LOW(Rdst), 0); 9106 __ blsmskl(Rdst, Rsrc); 9107 __ jccb(Assembler::carryClear, done); 9108 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9109 __ bind(done); 9110 %} 9111 9112 ins_pipe(ialu_reg); 9113 %} 9114 9115 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9116 %{ 9117 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9118 predicate(UseBMI1Instructions); 9119 effect(KILL cr, TEMP dst); 9120 9121 ins_cost(125); 9122 format %{ "MOVL $dst.hi, 0\n\t" 9123 "BLSMSKL $dst.lo, $src\n\t" 9124 "JNC done\n\t" 9125 "BLSMSKL $dst.hi, $src+4\n" 9126 "done:" 9127 %} 9128 9129 ins_encode %{ 9130 Label done; 9131 Register Rdst = $dst$$Register; 9132 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9133 9134 __ movl(HIGH_FROM_LOW(Rdst), 0); 9135 __ blsmskl(Rdst, $src$$Address); 9136 __ jccb(Assembler::carryClear, done); 9137 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9138 __ bind(done); 9139 %} 9140 9141 ins_pipe(ialu_reg_mem); 9142 %} 9143 9144 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9145 %{ 9146 match(Set dst (AndL (AddL src minus_1) src) ); 9147 predicate(UseBMI1Instructions); 9148 effect(KILL cr, TEMP dst); 9149 9150 format %{ "MOVL $dst.hi, $src.hi\n\t" 9151 "BLSRL $dst.lo, $src.lo\n\t" 9152 "JNC done\n\t" 9153 "BLSRL $dst.hi, $src.hi\n" 9154 "done:" 9155 %} 9156 9157 ins_encode %{ 9158 Label done; 9159 Register Rdst = $dst$$Register; 9160 Register Rsrc = $src$$Register; 9161 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9162 __ blsrl(Rdst, Rsrc); 9163 __ jccb(Assembler::carryClear, done); 9164 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9165 __ bind(done); 9166 %} 9167 9168 ins_pipe(ialu_reg); 9169 %} 9170 9171 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9172 %{ 9173 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9174 predicate(UseBMI1Instructions); 9175 effect(KILL cr, TEMP dst); 9176 9177 ins_cost(125); 9178 format %{ "MOVL $dst.hi, $src+4\n\t" 9179 "BLSRL $dst.lo, $src\n\t" 9180 "JNC done\n\t" 9181 "BLSRL $dst.hi, $src+4\n" 9182 "done:" 9183 %} 9184 9185 ins_encode %{ 9186 Label done; 9187 Register Rdst = $dst$$Register; 9188 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9189 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9190 __ blsrl(Rdst, $src$$Address); 9191 __ jccb(Assembler::carryClear, done); 9192 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9193 __ bind(done); 9194 %} 9195 9196 ins_pipe(ialu_reg_mem); 9197 %} 9198 9199 // Or Long Register with Register 9200 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9201 match(Set dst (OrL dst src)); 9202 effect(KILL cr); 9203 format %{ "OR $dst.lo,$src.lo\n\t" 9204 "OR $dst.hi,$src.hi" %} 9205 opcode(0x0B,0x0B); 9206 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9207 ins_pipe( ialu_reg_reg_long ); 9208 %} 9209 9210 // Or Long Register with Immediate 9211 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9212 match(Set dst (OrL dst src)); 9213 effect(KILL cr); 9214 format %{ "OR $dst.lo,$src.lo\n\t" 9215 "OR $dst.hi,$src.hi" %} 9216 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9217 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9218 ins_pipe( ialu_reg_long ); 9219 %} 9220 9221 // Or Long Register with Memory 9222 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9223 match(Set dst (OrL dst (LoadL mem))); 9224 effect(KILL cr); 9225 ins_cost(125); 9226 format %{ "OR $dst.lo,$mem\n\t" 9227 "OR $dst.hi,$mem+4" %} 9228 opcode(0x0B,0x0B); 9229 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9230 ins_pipe( ialu_reg_long_mem ); 9231 %} 9232 9233 // Xor Long Register with Register 9234 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9235 match(Set dst (XorL dst src)); 9236 effect(KILL cr); 9237 format %{ "XOR $dst.lo,$src.lo\n\t" 9238 "XOR $dst.hi,$src.hi" %} 9239 opcode(0x33,0x33); 9240 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9241 ins_pipe( ialu_reg_reg_long ); 9242 %} 9243 9244 // Xor Long Register with Immediate -1 9245 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9246 match(Set dst (XorL dst imm)); 9247 format %{ "NOT $dst.lo\n\t" 9248 "NOT $dst.hi" %} 9249 ins_encode %{ 9250 __ notl($dst$$Register); 9251 __ notl(HIGH_FROM_LOW($dst$$Register)); 9252 %} 9253 ins_pipe( ialu_reg_long ); 9254 %} 9255 9256 // Xor Long Register with Immediate 9257 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9258 match(Set dst (XorL dst src)); 9259 effect(KILL cr); 9260 format %{ "XOR $dst.lo,$src.lo\n\t" 9261 "XOR $dst.hi,$src.hi" %} 9262 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9263 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9264 ins_pipe( ialu_reg_long ); 9265 %} 9266 9267 // Xor Long Register with Memory 9268 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9269 match(Set dst (XorL dst (LoadL mem))); 9270 effect(KILL cr); 9271 ins_cost(125); 9272 format %{ "XOR $dst.lo,$mem\n\t" 9273 "XOR $dst.hi,$mem+4" %} 9274 opcode(0x33,0x33); 9275 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9276 ins_pipe( ialu_reg_long_mem ); 9277 %} 9278 9279 // Shift Left Long by 1 9280 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9281 predicate(UseNewLongLShift); 9282 match(Set dst (LShiftL dst cnt)); 9283 effect(KILL cr); 9284 ins_cost(100); 9285 format %{ "ADD $dst.lo,$dst.lo\n\t" 9286 "ADC $dst.hi,$dst.hi" %} 9287 ins_encode %{ 9288 __ addl($dst$$Register,$dst$$Register); 9289 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9290 %} 9291 ins_pipe( ialu_reg_long ); 9292 %} 9293 9294 // Shift Left Long by 2 9295 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9296 predicate(UseNewLongLShift); 9297 match(Set dst (LShiftL dst cnt)); 9298 effect(KILL cr); 9299 ins_cost(100); 9300 format %{ "ADD $dst.lo,$dst.lo\n\t" 9301 "ADC $dst.hi,$dst.hi\n\t" 9302 "ADD $dst.lo,$dst.lo\n\t" 9303 "ADC $dst.hi,$dst.hi" %} 9304 ins_encode %{ 9305 __ addl($dst$$Register,$dst$$Register); 9306 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9307 __ addl($dst$$Register,$dst$$Register); 9308 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9309 %} 9310 ins_pipe( ialu_reg_long ); 9311 %} 9312 9313 // Shift Left Long by 3 9314 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9315 predicate(UseNewLongLShift); 9316 match(Set dst (LShiftL dst cnt)); 9317 effect(KILL cr); 9318 ins_cost(100); 9319 format %{ "ADD $dst.lo,$dst.lo\n\t" 9320 "ADC $dst.hi,$dst.hi\n\t" 9321 "ADD $dst.lo,$dst.lo\n\t" 9322 "ADC $dst.hi,$dst.hi\n\t" 9323 "ADD $dst.lo,$dst.lo\n\t" 9324 "ADC $dst.hi,$dst.hi" %} 9325 ins_encode %{ 9326 __ addl($dst$$Register,$dst$$Register); 9327 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9328 __ addl($dst$$Register,$dst$$Register); 9329 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9330 __ addl($dst$$Register,$dst$$Register); 9331 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9332 %} 9333 ins_pipe( ialu_reg_long ); 9334 %} 9335 9336 // Shift Left Long by 1-31 9337 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9338 match(Set dst (LShiftL dst cnt)); 9339 effect(KILL cr); 9340 ins_cost(200); 9341 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9342 "SHL $dst.lo,$cnt" %} 9343 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9344 ins_encode( move_long_small_shift(dst,cnt) ); 9345 ins_pipe( ialu_reg_long ); 9346 %} 9347 9348 // Shift Left Long by 32-63 9349 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9350 match(Set dst (LShiftL dst cnt)); 9351 effect(KILL cr); 9352 ins_cost(300); 9353 format %{ "MOV $dst.hi,$dst.lo\n" 9354 "\tSHL $dst.hi,$cnt-32\n" 9355 "\tXOR $dst.lo,$dst.lo" %} 9356 opcode(0xC1, 0x4); /* C1 /4 ib */ 9357 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9358 ins_pipe( ialu_reg_long ); 9359 %} 9360 9361 // Shift Left Long by variable 9362 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9363 match(Set dst (LShiftL dst shift)); 9364 effect(KILL cr); 9365 ins_cost(500+200); 9366 size(17); 9367 format %{ "TEST $shift,32\n\t" 9368 "JEQ,s small\n\t" 9369 "MOV $dst.hi,$dst.lo\n\t" 9370 "XOR $dst.lo,$dst.lo\n" 9371 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9372 "SHL $dst.lo,$shift" %} 9373 ins_encode( shift_left_long( dst, shift ) ); 9374 ins_pipe( pipe_slow ); 9375 %} 9376 9377 // Shift Right Long by 1-31 9378 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9379 match(Set dst (URShiftL dst cnt)); 9380 effect(KILL cr); 9381 ins_cost(200); 9382 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9383 "SHR $dst.hi,$cnt" %} 9384 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9385 ins_encode( move_long_small_shift(dst,cnt) ); 9386 ins_pipe( ialu_reg_long ); 9387 %} 9388 9389 // Shift Right Long by 32-63 9390 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9391 match(Set dst (URShiftL dst cnt)); 9392 effect(KILL cr); 9393 ins_cost(300); 9394 format %{ "MOV $dst.lo,$dst.hi\n" 9395 "\tSHR $dst.lo,$cnt-32\n" 9396 "\tXOR $dst.hi,$dst.hi" %} 9397 opcode(0xC1, 0x5); /* C1 /5 ib */ 9398 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9399 ins_pipe( ialu_reg_long ); 9400 %} 9401 9402 // Shift Right Long by variable 9403 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9404 match(Set dst (URShiftL dst shift)); 9405 effect(KILL cr); 9406 ins_cost(600); 9407 size(17); 9408 format %{ "TEST $shift,32\n\t" 9409 "JEQ,s small\n\t" 9410 "MOV $dst.lo,$dst.hi\n\t" 9411 "XOR $dst.hi,$dst.hi\n" 9412 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9413 "SHR $dst.hi,$shift" %} 9414 ins_encode( shift_right_long( dst, shift ) ); 9415 ins_pipe( pipe_slow ); 9416 %} 9417 9418 // Shift Right Long by 1-31 9419 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9420 match(Set dst (RShiftL dst cnt)); 9421 effect(KILL cr); 9422 ins_cost(200); 9423 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9424 "SAR $dst.hi,$cnt" %} 9425 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9426 ins_encode( move_long_small_shift(dst,cnt) ); 9427 ins_pipe( ialu_reg_long ); 9428 %} 9429 9430 // Shift Right Long by 32-63 9431 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9432 match(Set dst (RShiftL dst cnt)); 9433 effect(KILL cr); 9434 ins_cost(300); 9435 format %{ "MOV $dst.lo,$dst.hi\n" 9436 "\tSAR $dst.lo,$cnt-32\n" 9437 "\tSAR $dst.hi,31" %} 9438 opcode(0xC1, 0x7); /* C1 /7 ib */ 9439 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9440 ins_pipe( ialu_reg_long ); 9441 %} 9442 9443 // Shift Right arithmetic Long by variable 9444 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9445 match(Set dst (RShiftL dst shift)); 9446 effect(KILL cr); 9447 ins_cost(600); 9448 size(18); 9449 format %{ "TEST $shift,32\n\t" 9450 "JEQ,s small\n\t" 9451 "MOV $dst.lo,$dst.hi\n\t" 9452 "SAR $dst.hi,31\n" 9453 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9454 "SAR $dst.hi,$shift" %} 9455 ins_encode( shift_right_arith_long( dst, shift ) ); 9456 ins_pipe( pipe_slow ); 9457 %} 9458 9459 9460 //----------Double Instructions------------------------------------------------ 9461 // Double Math 9462 9463 // Compare & branch 9464 9465 // P6 version of float compare, sets condition codes in EFLAGS 9466 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9467 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9468 match(Set cr (CmpD src1 src2)); 9469 effect(KILL rax); 9470 ins_cost(150); 9471 format %{ "FLD $src1\n\t" 9472 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9473 "JNP exit\n\t" 9474 "MOV ah,1 // saw a NaN, set CF\n\t" 9475 "SAHF\n" 9476 "exit:\tNOP // avoid branch to branch" %} 9477 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9478 ins_encode( Push_Reg_DPR(src1), 9479 OpcP, RegOpc(src2), 9480 cmpF_P6_fixup ); 9481 ins_pipe( pipe_slow ); 9482 %} 9483 9484 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9485 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9486 match(Set cr (CmpD src1 src2)); 9487 ins_cost(150); 9488 format %{ "FLD $src1\n\t" 9489 "FUCOMIP ST,$src2 // P6 instruction" %} 9490 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9491 ins_encode( Push_Reg_DPR(src1), 9492 OpcP, RegOpc(src2)); 9493 ins_pipe( pipe_slow ); 9494 %} 9495 9496 // Compare & branch 9497 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9498 predicate(UseSSE<=1); 9499 match(Set cr (CmpD src1 src2)); 9500 effect(KILL rax); 9501 ins_cost(200); 9502 format %{ "FLD $src1\n\t" 9503 "FCOMp $src2\n\t" 9504 "FNSTSW AX\n\t" 9505 "TEST AX,0x400\n\t" 9506 "JZ,s flags\n\t" 9507 "MOV AH,1\t# unordered treat as LT\n" 9508 "flags:\tSAHF" %} 9509 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9510 ins_encode( Push_Reg_DPR(src1), 9511 OpcP, RegOpc(src2), 9512 fpu_flags); 9513 ins_pipe( pipe_slow ); 9514 %} 9515 9516 // Compare vs zero into -1,0,1 9517 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9518 predicate(UseSSE<=1); 9519 match(Set dst (CmpD3 src1 zero)); 9520 effect(KILL cr, KILL rax); 9521 ins_cost(280); 9522 format %{ "FTSTD $dst,$src1" %} 9523 opcode(0xE4, 0xD9); 9524 ins_encode( Push_Reg_DPR(src1), 9525 OpcS, OpcP, PopFPU, 9526 CmpF_Result(dst)); 9527 ins_pipe( pipe_slow ); 9528 %} 9529 9530 // Compare into -1,0,1 9531 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9532 predicate(UseSSE<=1); 9533 match(Set dst (CmpD3 src1 src2)); 9534 effect(KILL cr, KILL rax); 9535 ins_cost(300); 9536 format %{ "FCMPD $dst,$src1,$src2" %} 9537 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9538 ins_encode( Push_Reg_DPR(src1), 9539 OpcP, RegOpc(src2), 9540 CmpF_Result(dst)); 9541 ins_pipe( pipe_slow ); 9542 %} 9543 9544 // float compare and set condition codes in EFLAGS by XMM regs 9545 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9546 predicate(UseSSE>=2); 9547 match(Set cr (CmpD src1 src2)); 9548 ins_cost(145); 9549 format %{ "UCOMISD $src1,$src2\n\t" 9550 "JNP,s exit\n\t" 9551 "PUSHF\t# saw NaN, set CF\n\t" 9552 "AND [rsp], #0xffffff2b\n\t" 9553 "POPF\n" 9554 "exit:" %} 9555 ins_encode %{ 9556 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9557 emit_cmpfp_fixup(_masm); 9558 %} 9559 ins_pipe( pipe_slow ); 9560 %} 9561 9562 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9563 predicate(UseSSE>=2); 9564 match(Set cr (CmpD src1 src2)); 9565 ins_cost(100); 9566 format %{ "UCOMISD $src1,$src2" %} 9567 ins_encode %{ 9568 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9569 %} 9570 ins_pipe( pipe_slow ); 9571 %} 9572 9573 // float compare and set condition codes in EFLAGS by XMM regs 9574 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9575 predicate(UseSSE>=2); 9576 match(Set cr (CmpD src1 (LoadD src2))); 9577 ins_cost(145); 9578 format %{ "UCOMISD $src1,$src2\n\t" 9579 "JNP,s exit\n\t" 9580 "PUSHF\t# saw NaN, set CF\n\t" 9581 "AND [rsp], #0xffffff2b\n\t" 9582 "POPF\n" 9583 "exit:" %} 9584 ins_encode %{ 9585 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9586 emit_cmpfp_fixup(_masm); 9587 %} 9588 ins_pipe( pipe_slow ); 9589 %} 9590 9591 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9592 predicate(UseSSE>=2); 9593 match(Set cr (CmpD src1 (LoadD src2))); 9594 ins_cost(100); 9595 format %{ "UCOMISD $src1,$src2" %} 9596 ins_encode %{ 9597 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9598 %} 9599 ins_pipe( pipe_slow ); 9600 %} 9601 9602 // Compare into -1,0,1 in XMM 9603 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9604 predicate(UseSSE>=2); 9605 match(Set dst (CmpD3 src1 src2)); 9606 effect(KILL cr); 9607 ins_cost(255); 9608 format %{ "UCOMISD $src1, $src2\n\t" 9609 "MOV $dst, #-1\n\t" 9610 "JP,s done\n\t" 9611 "JB,s done\n\t" 9612 "SETNE $dst\n\t" 9613 "MOVZB $dst, $dst\n" 9614 "done:" %} 9615 ins_encode %{ 9616 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9617 emit_cmpfp3(_masm, $dst$$Register); 9618 %} 9619 ins_pipe( pipe_slow ); 9620 %} 9621 9622 // Compare into -1,0,1 in XMM and memory 9623 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9624 predicate(UseSSE>=2); 9625 match(Set dst (CmpD3 src1 (LoadD src2))); 9626 effect(KILL cr); 9627 ins_cost(275); 9628 format %{ "UCOMISD $src1, $src2\n\t" 9629 "MOV $dst, #-1\n\t" 9630 "JP,s done\n\t" 9631 "JB,s done\n\t" 9632 "SETNE $dst\n\t" 9633 "MOVZB $dst, $dst\n" 9634 "done:" %} 9635 ins_encode %{ 9636 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9637 emit_cmpfp3(_masm, $dst$$Register); 9638 %} 9639 ins_pipe( pipe_slow ); 9640 %} 9641 9642 9643 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9644 predicate (UseSSE <=1); 9645 match(Set dst (SubD dst src)); 9646 9647 format %{ "FLD $src\n\t" 9648 "DSUBp $dst,ST" %} 9649 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9650 ins_cost(150); 9651 ins_encode( Push_Reg_DPR(src), 9652 OpcP, RegOpc(dst) ); 9653 ins_pipe( fpu_reg_reg ); 9654 %} 9655 9656 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9657 predicate (UseSSE <=1); 9658 match(Set dst (RoundDouble (SubD src1 src2))); 9659 ins_cost(250); 9660 9661 format %{ "FLD $src2\n\t" 9662 "DSUB ST,$src1\n\t" 9663 "FSTP_D $dst\t# D-round" %} 9664 opcode(0xD8, 0x5); 9665 ins_encode( Push_Reg_DPR(src2), 9666 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9667 ins_pipe( fpu_mem_reg_reg ); 9668 %} 9669 9670 9671 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9672 predicate (UseSSE <=1); 9673 match(Set dst (SubD dst (LoadD src))); 9674 ins_cost(150); 9675 9676 format %{ "FLD $src\n\t" 9677 "DSUBp $dst,ST" %} 9678 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9679 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9680 OpcP, RegOpc(dst) ); 9681 ins_pipe( fpu_reg_mem ); 9682 %} 9683 9684 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9685 predicate (UseSSE<=1); 9686 match(Set dst (AbsD src)); 9687 ins_cost(100); 9688 format %{ "FABS" %} 9689 opcode(0xE1, 0xD9); 9690 ins_encode( OpcS, OpcP ); 9691 ins_pipe( fpu_reg_reg ); 9692 %} 9693 9694 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9695 predicate(UseSSE<=1); 9696 match(Set dst (NegD src)); 9697 ins_cost(100); 9698 format %{ "FCHS" %} 9699 opcode(0xE0, 0xD9); 9700 ins_encode( OpcS, OpcP ); 9701 ins_pipe( fpu_reg_reg ); 9702 %} 9703 9704 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9705 predicate(UseSSE<=1); 9706 match(Set dst (AddD dst src)); 9707 format %{ "FLD $src\n\t" 9708 "DADD $dst,ST" %} 9709 size(4); 9710 ins_cost(150); 9711 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9712 ins_encode( Push_Reg_DPR(src), 9713 OpcP, RegOpc(dst) ); 9714 ins_pipe( fpu_reg_reg ); 9715 %} 9716 9717 9718 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9719 predicate(UseSSE<=1); 9720 match(Set dst (RoundDouble (AddD src1 src2))); 9721 ins_cost(250); 9722 9723 format %{ "FLD $src2\n\t" 9724 "DADD ST,$src1\n\t" 9725 "FSTP_D $dst\t# D-round" %} 9726 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9727 ins_encode( Push_Reg_DPR(src2), 9728 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9729 ins_pipe( fpu_mem_reg_reg ); 9730 %} 9731 9732 9733 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9734 predicate(UseSSE<=1); 9735 match(Set dst (AddD dst (LoadD src))); 9736 ins_cost(150); 9737 9738 format %{ "FLD $src\n\t" 9739 "DADDp $dst,ST" %} 9740 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9741 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9742 OpcP, RegOpc(dst) ); 9743 ins_pipe( fpu_reg_mem ); 9744 %} 9745 9746 // add-to-memory 9747 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9748 predicate(UseSSE<=1); 9749 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9750 ins_cost(150); 9751 9752 format %{ "FLD_D $dst\n\t" 9753 "DADD ST,$src\n\t" 9754 "FST_D $dst" %} 9755 opcode(0xDD, 0x0); 9756 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9757 Opcode(0xD8), RegOpc(src), 9758 set_instruction_start, 9759 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9760 ins_pipe( fpu_reg_mem ); 9761 %} 9762 9763 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9764 predicate(UseSSE<=1); 9765 match(Set dst (AddD dst con)); 9766 ins_cost(125); 9767 format %{ "FLD1\n\t" 9768 "DADDp $dst,ST" %} 9769 ins_encode %{ 9770 __ fld1(); 9771 __ faddp($dst$$reg); 9772 %} 9773 ins_pipe(fpu_reg); 9774 %} 9775 9776 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9777 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9778 match(Set dst (AddD dst con)); 9779 ins_cost(200); 9780 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9781 "DADDp $dst,ST" %} 9782 ins_encode %{ 9783 __ fld_d($constantaddress($con)); 9784 __ faddp($dst$$reg); 9785 %} 9786 ins_pipe(fpu_reg_mem); 9787 %} 9788 9789 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9790 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9791 match(Set dst (RoundDouble (AddD src con))); 9792 ins_cost(200); 9793 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9794 "DADD ST,$src\n\t" 9795 "FSTP_D $dst\t# D-round" %} 9796 ins_encode %{ 9797 __ fld_d($constantaddress($con)); 9798 __ fadd($src$$reg); 9799 __ fstp_d(Address(rsp, $dst$$disp)); 9800 %} 9801 ins_pipe(fpu_mem_reg_con); 9802 %} 9803 9804 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9805 predicate(UseSSE<=1); 9806 match(Set dst (MulD dst src)); 9807 format %{ "FLD $src\n\t" 9808 "DMULp $dst,ST" %} 9809 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9810 ins_cost(150); 9811 ins_encode( Push_Reg_DPR(src), 9812 OpcP, RegOpc(dst) ); 9813 ins_pipe( fpu_reg_reg ); 9814 %} 9815 9816 // Strict FP instruction biases argument before multiply then 9817 // biases result to avoid double rounding of subnormals. 9818 // 9819 // scale arg1 by multiplying arg1 by 2^(-15360) 9820 // load arg2 9821 // multiply scaled arg1 by arg2 9822 // rescale product by 2^(15360) 9823 // 9824 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9825 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9826 match(Set dst (MulD dst src)); 9827 ins_cost(1); // Select this instruction for all FP double multiplies 9828 9829 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9830 "DMULp $dst,ST\n\t" 9831 "FLD $src\n\t" 9832 "DMULp $dst,ST\n\t" 9833 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9834 "DMULp $dst,ST\n\t" %} 9835 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9836 ins_encode( strictfp_bias1(dst), 9837 Push_Reg_DPR(src), 9838 OpcP, RegOpc(dst), 9839 strictfp_bias2(dst) ); 9840 ins_pipe( fpu_reg_reg ); 9841 %} 9842 9843 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9844 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9845 match(Set dst (MulD dst con)); 9846 ins_cost(200); 9847 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9848 "DMULp $dst,ST" %} 9849 ins_encode %{ 9850 __ fld_d($constantaddress($con)); 9851 __ fmulp($dst$$reg); 9852 %} 9853 ins_pipe(fpu_reg_mem); 9854 %} 9855 9856 9857 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9858 predicate( UseSSE<=1 ); 9859 match(Set dst (MulD dst (LoadD src))); 9860 ins_cost(200); 9861 format %{ "FLD_D $src\n\t" 9862 "DMULp $dst,ST" %} 9863 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9864 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9865 OpcP, RegOpc(dst) ); 9866 ins_pipe( fpu_reg_mem ); 9867 %} 9868 9869 // 9870 // Cisc-alternate to reg-reg multiply 9871 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9872 predicate( UseSSE<=1 ); 9873 match(Set dst (MulD src (LoadD mem))); 9874 ins_cost(250); 9875 format %{ "FLD_D $mem\n\t" 9876 "DMUL ST,$src\n\t" 9877 "FSTP_D $dst" %} 9878 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9879 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9880 OpcReg_FPR(src), 9881 Pop_Reg_DPR(dst) ); 9882 ins_pipe( fpu_reg_reg_mem ); 9883 %} 9884 9885 9886 // MACRO3 -- addDPR a mulDPR 9887 // This instruction is a '2-address' instruction in that the result goes 9888 // back to src2. This eliminates a move from the macro; possibly the 9889 // register allocator will have to add it back (and maybe not). 9890 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9891 predicate( UseSSE<=1 ); 9892 match(Set src2 (AddD (MulD src0 src1) src2)); 9893 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9894 "DMUL ST,$src1\n\t" 9895 "DADDp $src2,ST" %} 9896 ins_cost(250); 9897 opcode(0xDD); /* LoadD DD /0 */ 9898 ins_encode( Push_Reg_FPR(src0), 9899 FMul_ST_reg(src1), 9900 FAddP_reg_ST(src2) ); 9901 ins_pipe( fpu_reg_reg_reg ); 9902 %} 9903 9904 9905 // MACRO3 -- subDPR a mulDPR 9906 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9907 predicate( UseSSE<=1 ); 9908 match(Set src2 (SubD (MulD src0 src1) src2)); 9909 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9910 "DMUL ST,$src1\n\t" 9911 "DSUBRp $src2,ST" %} 9912 ins_cost(250); 9913 ins_encode( Push_Reg_FPR(src0), 9914 FMul_ST_reg(src1), 9915 Opcode(0xDE), Opc_plus(0xE0,src2)); 9916 ins_pipe( fpu_reg_reg_reg ); 9917 %} 9918 9919 9920 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9921 predicate( UseSSE<=1 ); 9922 match(Set dst (DivD dst src)); 9923 9924 format %{ "FLD $src\n\t" 9925 "FDIVp $dst,ST" %} 9926 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9927 ins_cost(150); 9928 ins_encode( Push_Reg_DPR(src), 9929 OpcP, RegOpc(dst) ); 9930 ins_pipe( fpu_reg_reg ); 9931 %} 9932 9933 // Strict FP instruction biases argument before division then 9934 // biases result, to avoid double rounding of subnormals. 9935 // 9936 // scale dividend by multiplying dividend by 2^(-15360) 9937 // load divisor 9938 // divide scaled dividend by divisor 9939 // rescale quotient by 2^(15360) 9940 // 9941 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9942 predicate (UseSSE<=1); 9943 match(Set dst (DivD dst src)); 9944 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9945 ins_cost(01); 9946 9947 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9948 "DMULp $dst,ST\n\t" 9949 "FLD $src\n\t" 9950 "FDIVp $dst,ST\n\t" 9951 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9952 "DMULp $dst,ST\n\t" %} 9953 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9954 ins_encode( strictfp_bias1(dst), 9955 Push_Reg_DPR(src), 9956 OpcP, RegOpc(dst), 9957 strictfp_bias2(dst) ); 9958 ins_pipe( fpu_reg_reg ); 9959 %} 9960 9961 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9962 predicate(UseSSE<=1); 9963 match(Set dst (ModD dst src)); 9964 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9965 9966 format %{ "DMOD $dst,$src" %} 9967 ins_cost(250); 9968 ins_encode(Push_Reg_Mod_DPR(dst, src), 9969 emitModDPR(), 9970 Push_Result_Mod_DPR(src), 9971 Pop_Reg_DPR(dst)); 9972 ins_pipe( pipe_slow ); 9973 %} 9974 9975 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9976 predicate(UseSSE>=2); 9977 match(Set dst (ModD src0 src1)); 9978 effect(KILL rax, KILL cr); 9979 9980 format %{ "SUB ESP,8\t # DMOD\n" 9981 "\tMOVSD [ESP+0],$src1\n" 9982 "\tFLD_D [ESP+0]\n" 9983 "\tMOVSD [ESP+0],$src0\n" 9984 "\tFLD_D [ESP+0]\n" 9985 "loop:\tFPREM\n" 9986 "\tFWAIT\n" 9987 "\tFNSTSW AX\n" 9988 "\tSAHF\n" 9989 "\tJP loop\n" 9990 "\tFSTP_D [ESP+0]\n" 9991 "\tMOVSD $dst,[ESP+0]\n" 9992 "\tADD ESP,8\n" 9993 "\tFSTP ST0\t # Restore FPU Stack" 9994 %} 9995 ins_cost(250); 9996 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9997 ins_pipe( pipe_slow ); 9998 %} 9999 10000 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10001 predicate (UseSSE<=1); 10002 match(Set dst(AtanD dst src)); 10003 format %{ "DATA $dst,$src" %} 10004 opcode(0xD9, 0xF3); 10005 ins_encode( Push_Reg_DPR(src), 10006 OpcP, OpcS, RegOpc(dst) ); 10007 ins_pipe( pipe_slow ); 10008 %} 10009 10010 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10011 predicate (UseSSE>=2); 10012 match(Set dst(AtanD dst src)); 10013 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10014 format %{ "DATA $dst,$src" %} 10015 opcode(0xD9, 0xF3); 10016 ins_encode( Push_SrcD(src), 10017 OpcP, OpcS, Push_ResultD(dst) ); 10018 ins_pipe( pipe_slow ); 10019 %} 10020 10021 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10022 predicate (UseSSE<=1); 10023 match(Set dst (SqrtD src)); 10024 format %{ "DSQRT $dst,$src" %} 10025 opcode(0xFA, 0xD9); 10026 ins_encode( Push_Reg_DPR(src), 10027 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10028 ins_pipe( pipe_slow ); 10029 %} 10030 10031 //-------------Float Instructions------------------------------- 10032 // Float Math 10033 10034 // Code for float compare: 10035 // fcompp(); 10036 // fwait(); fnstsw_ax(); 10037 // sahf(); 10038 // movl(dst, unordered_result); 10039 // jcc(Assembler::parity, exit); 10040 // movl(dst, less_result); 10041 // jcc(Assembler::below, exit); 10042 // movl(dst, equal_result); 10043 // jcc(Assembler::equal, exit); 10044 // movl(dst, greater_result); 10045 // exit: 10046 10047 // P6 version of float compare, sets condition codes in EFLAGS 10048 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10049 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10050 match(Set cr (CmpF src1 src2)); 10051 effect(KILL rax); 10052 ins_cost(150); 10053 format %{ "FLD $src1\n\t" 10054 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10055 "JNP exit\n\t" 10056 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10057 "SAHF\n" 10058 "exit:\tNOP // avoid branch to branch" %} 10059 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10060 ins_encode( Push_Reg_DPR(src1), 10061 OpcP, RegOpc(src2), 10062 cmpF_P6_fixup ); 10063 ins_pipe( pipe_slow ); 10064 %} 10065 10066 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10067 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10068 match(Set cr (CmpF src1 src2)); 10069 ins_cost(100); 10070 format %{ "FLD $src1\n\t" 10071 "FUCOMIP ST,$src2 // P6 instruction" %} 10072 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10073 ins_encode( Push_Reg_DPR(src1), 10074 OpcP, RegOpc(src2)); 10075 ins_pipe( pipe_slow ); 10076 %} 10077 10078 10079 // Compare & branch 10080 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10081 predicate(UseSSE == 0); 10082 match(Set cr (CmpF src1 src2)); 10083 effect(KILL rax); 10084 ins_cost(200); 10085 format %{ "FLD $src1\n\t" 10086 "FCOMp $src2\n\t" 10087 "FNSTSW AX\n\t" 10088 "TEST AX,0x400\n\t" 10089 "JZ,s flags\n\t" 10090 "MOV AH,1\t# unordered treat as LT\n" 10091 "flags:\tSAHF" %} 10092 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10093 ins_encode( Push_Reg_DPR(src1), 10094 OpcP, RegOpc(src2), 10095 fpu_flags); 10096 ins_pipe( pipe_slow ); 10097 %} 10098 10099 // Compare vs zero into -1,0,1 10100 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10101 predicate(UseSSE == 0); 10102 match(Set dst (CmpF3 src1 zero)); 10103 effect(KILL cr, KILL rax); 10104 ins_cost(280); 10105 format %{ "FTSTF $dst,$src1" %} 10106 opcode(0xE4, 0xD9); 10107 ins_encode( Push_Reg_DPR(src1), 10108 OpcS, OpcP, PopFPU, 10109 CmpF_Result(dst)); 10110 ins_pipe( pipe_slow ); 10111 %} 10112 10113 // Compare into -1,0,1 10114 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10115 predicate(UseSSE == 0); 10116 match(Set dst (CmpF3 src1 src2)); 10117 effect(KILL cr, KILL rax); 10118 ins_cost(300); 10119 format %{ "FCMPF $dst,$src1,$src2" %} 10120 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10121 ins_encode( Push_Reg_DPR(src1), 10122 OpcP, RegOpc(src2), 10123 CmpF_Result(dst)); 10124 ins_pipe( pipe_slow ); 10125 %} 10126 10127 // float compare and set condition codes in EFLAGS by XMM regs 10128 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10129 predicate(UseSSE>=1); 10130 match(Set cr (CmpF src1 src2)); 10131 ins_cost(145); 10132 format %{ "UCOMISS $src1,$src2\n\t" 10133 "JNP,s exit\n\t" 10134 "PUSHF\t# saw NaN, set CF\n\t" 10135 "AND [rsp], #0xffffff2b\n\t" 10136 "POPF\n" 10137 "exit:" %} 10138 ins_encode %{ 10139 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10140 emit_cmpfp_fixup(_masm); 10141 %} 10142 ins_pipe( pipe_slow ); 10143 %} 10144 10145 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10146 predicate(UseSSE>=1); 10147 match(Set cr (CmpF src1 src2)); 10148 ins_cost(100); 10149 format %{ "UCOMISS $src1,$src2" %} 10150 ins_encode %{ 10151 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10152 %} 10153 ins_pipe( pipe_slow ); 10154 %} 10155 10156 // float compare and set condition codes in EFLAGS by XMM regs 10157 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10158 predicate(UseSSE>=1); 10159 match(Set cr (CmpF src1 (LoadF src2))); 10160 ins_cost(165); 10161 format %{ "UCOMISS $src1,$src2\n\t" 10162 "JNP,s exit\n\t" 10163 "PUSHF\t# saw NaN, set CF\n\t" 10164 "AND [rsp], #0xffffff2b\n\t" 10165 "POPF\n" 10166 "exit:" %} 10167 ins_encode %{ 10168 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10169 emit_cmpfp_fixup(_masm); 10170 %} 10171 ins_pipe( pipe_slow ); 10172 %} 10173 10174 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10175 predicate(UseSSE>=1); 10176 match(Set cr (CmpF src1 (LoadF src2))); 10177 ins_cost(100); 10178 format %{ "UCOMISS $src1,$src2" %} 10179 ins_encode %{ 10180 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10181 %} 10182 ins_pipe( pipe_slow ); 10183 %} 10184 10185 // Compare into -1,0,1 in XMM 10186 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10187 predicate(UseSSE>=1); 10188 match(Set dst (CmpF3 src1 src2)); 10189 effect(KILL cr); 10190 ins_cost(255); 10191 format %{ "UCOMISS $src1, $src2\n\t" 10192 "MOV $dst, #-1\n\t" 10193 "JP,s done\n\t" 10194 "JB,s done\n\t" 10195 "SETNE $dst\n\t" 10196 "MOVZB $dst, $dst\n" 10197 "done:" %} 10198 ins_encode %{ 10199 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10200 emit_cmpfp3(_masm, $dst$$Register); 10201 %} 10202 ins_pipe( pipe_slow ); 10203 %} 10204 10205 // Compare into -1,0,1 in XMM and memory 10206 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10207 predicate(UseSSE>=1); 10208 match(Set dst (CmpF3 src1 (LoadF src2))); 10209 effect(KILL cr); 10210 ins_cost(275); 10211 format %{ "UCOMISS $src1, $src2\n\t" 10212 "MOV $dst, #-1\n\t" 10213 "JP,s done\n\t" 10214 "JB,s done\n\t" 10215 "SETNE $dst\n\t" 10216 "MOVZB $dst, $dst\n" 10217 "done:" %} 10218 ins_encode %{ 10219 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10220 emit_cmpfp3(_masm, $dst$$Register); 10221 %} 10222 ins_pipe( pipe_slow ); 10223 %} 10224 10225 // Spill to obtain 24-bit precision 10226 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10227 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10228 match(Set dst (SubF src1 src2)); 10229 10230 format %{ "FSUB $dst,$src1 - $src2" %} 10231 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10232 ins_encode( Push_Reg_FPR(src1), 10233 OpcReg_FPR(src2), 10234 Pop_Mem_FPR(dst) ); 10235 ins_pipe( fpu_mem_reg_reg ); 10236 %} 10237 // 10238 // This instruction does not round to 24-bits 10239 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10240 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10241 match(Set dst (SubF dst src)); 10242 10243 format %{ "FSUB $dst,$src" %} 10244 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10245 ins_encode( Push_Reg_FPR(src), 10246 OpcP, RegOpc(dst) ); 10247 ins_pipe( fpu_reg_reg ); 10248 %} 10249 10250 // Spill to obtain 24-bit precision 10251 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10252 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10253 match(Set dst (AddF src1 src2)); 10254 10255 format %{ "FADD $dst,$src1,$src2" %} 10256 opcode(0xD8, 0x0); /* D8 C0+i */ 10257 ins_encode( Push_Reg_FPR(src2), 10258 OpcReg_FPR(src1), 10259 Pop_Mem_FPR(dst) ); 10260 ins_pipe( fpu_mem_reg_reg ); 10261 %} 10262 // 10263 // This instruction does not round to 24-bits 10264 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10265 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10266 match(Set dst (AddF dst src)); 10267 10268 format %{ "FLD $src\n\t" 10269 "FADDp $dst,ST" %} 10270 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10271 ins_encode( Push_Reg_FPR(src), 10272 OpcP, RegOpc(dst) ); 10273 ins_pipe( fpu_reg_reg ); 10274 %} 10275 10276 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10277 predicate(UseSSE==0); 10278 match(Set dst (AbsF src)); 10279 ins_cost(100); 10280 format %{ "FABS" %} 10281 opcode(0xE1, 0xD9); 10282 ins_encode( OpcS, OpcP ); 10283 ins_pipe( fpu_reg_reg ); 10284 %} 10285 10286 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10287 predicate(UseSSE==0); 10288 match(Set dst (NegF src)); 10289 ins_cost(100); 10290 format %{ "FCHS" %} 10291 opcode(0xE0, 0xD9); 10292 ins_encode( OpcS, OpcP ); 10293 ins_pipe( fpu_reg_reg ); 10294 %} 10295 10296 // Cisc-alternate to addFPR_reg 10297 // Spill to obtain 24-bit precision 10298 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10299 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10300 match(Set dst (AddF src1 (LoadF src2))); 10301 10302 format %{ "FLD $src2\n\t" 10303 "FADD ST,$src1\n\t" 10304 "FSTP_S $dst" %} 10305 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10306 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10307 OpcReg_FPR(src1), 10308 Pop_Mem_FPR(dst) ); 10309 ins_pipe( fpu_mem_reg_mem ); 10310 %} 10311 // 10312 // Cisc-alternate to addFPR_reg 10313 // This instruction does not round to 24-bits 10314 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10315 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10316 match(Set dst (AddF dst (LoadF src))); 10317 10318 format %{ "FADD $dst,$src" %} 10319 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10320 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10321 OpcP, RegOpc(dst) ); 10322 ins_pipe( fpu_reg_mem ); 10323 %} 10324 10325 // // Following two instructions for _222_mpegaudio 10326 // Spill to obtain 24-bit precision 10327 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10328 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10329 match(Set dst (AddF src1 src2)); 10330 10331 format %{ "FADD $dst,$src1,$src2" %} 10332 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10333 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10334 OpcReg_FPR(src2), 10335 Pop_Mem_FPR(dst) ); 10336 ins_pipe( fpu_mem_reg_mem ); 10337 %} 10338 10339 // Cisc-spill variant 10340 // Spill to obtain 24-bit precision 10341 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10342 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10343 match(Set dst (AddF src1 (LoadF src2))); 10344 10345 format %{ "FADD $dst,$src1,$src2 cisc" %} 10346 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10347 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10348 set_instruction_start, 10349 OpcP, RMopc_Mem(secondary,src1), 10350 Pop_Mem_FPR(dst) ); 10351 ins_pipe( fpu_mem_mem_mem ); 10352 %} 10353 10354 // Spill to obtain 24-bit precision 10355 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10356 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10357 match(Set dst (AddF src1 src2)); 10358 10359 format %{ "FADD $dst,$src1,$src2" %} 10360 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10361 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10362 set_instruction_start, 10363 OpcP, RMopc_Mem(secondary,src1), 10364 Pop_Mem_FPR(dst) ); 10365 ins_pipe( fpu_mem_mem_mem ); 10366 %} 10367 10368 10369 // Spill to obtain 24-bit precision 10370 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10371 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10372 match(Set dst (AddF src con)); 10373 format %{ "FLD $src\n\t" 10374 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10375 "FSTP_S $dst" %} 10376 ins_encode %{ 10377 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10378 __ fadd_s($constantaddress($con)); 10379 __ fstp_s(Address(rsp, $dst$$disp)); 10380 %} 10381 ins_pipe(fpu_mem_reg_con); 10382 %} 10383 // 10384 // This instruction does not round to 24-bits 10385 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10386 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10387 match(Set dst (AddF src con)); 10388 format %{ "FLD $src\n\t" 10389 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10390 "FSTP $dst" %} 10391 ins_encode %{ 10392 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10393 __ fadd_s($constantaddress($con)); 10394 __ fstp_d($dst$$reg); 10395 %} 10396 ins_pipe(fpu_reg_reg_con); 10397 %} 10398 10399 // Spill to obtain 24-bit precision 10400 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10401 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10402 match(Set dst (MulF src1 src2)); 10403 10404 format %{ "FLD $src1\n\t" 10405 "FMUL $src2\n\t" 10406 "FSTP_S $dst" %} 10407 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10408 ins_encode( Push_Reg_FPR(src1), 10409 OpcReg_FPR(src2), 10410 Pop_Mem_FPR(dst) ); 10411 ins_pipe( fpu_mem_reg_reg ); 10412 %} 10413 // 10414 // This instruction does not round to 24-bits 10415 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10416 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10417 match(Set dst (MulF src1 src2)); 10418 10419 format %{ "FLD $src1\n\t" 10420 "FMUL $src2\n\t" 10421 "FSTP_S $dst" %} 10422 opcode(0xD8, 0x1); /* D8 C8+i */ 10423 ins_encode( Push_Reg_FPR(src2), 10424 OpcReg_FPR(src1), 10425 Pop_Reg_FPR(dst) ); 10426 ins_pipe( fpu_reg_reg_reg ); 10427 %} 10428 10429 10430 // Spill to obtain 24-bit precision 10431 // Cisc-alternate to reg-reg multiply 10432 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10433 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10434 match(Set dst (MulF src1 (LoadF src2))); 10435 10436 format %{ "FLD_S $src2\n\t" 10437 "FMUL $src1\n\t" 10438 "FSTP_S $dst" %} 10439 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10440 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10441 OpcReg_FPR(src1), 10442 Pop_Mem_FPR(dst) ); 10443 ins_pipe( fpu_mem_reg_mem ); 10444 %} 10445 // 10446 // This instruction does not round to 24-bits 10447 // Cisc-alternate to reg-reg multiply 10448 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10449 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10450 match(Set dst (MulF src1 (LoadF src2))); 10451 10452 format %{ "FMUL $dst,$src1,$src2" %} 10453 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10454 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10455 OpcReg_FPR(src1), 10456 Pop_Reg_FPR(dst) ); 10457 ins_pipe( fpu_reg_reg_mem ); 10458 %} 10459 10460 // Spill to obtain 24-bit precision 10461 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10462 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10463 match(Set dst (MulF src1 src2)); 10464 10465 format %{ "FMUL $dst,$src1,$src2" %} 10466 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10467 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10468 set_instruction_start, 10469 OpcP, RMopc_Mem(secondary,src1), 10470 Pop_Mem_FPR(dst) ); 10471 ins_pipe( fpu_mem_mem_mem ); 10472 %} 10473 10474 // Spill to obtain 24-bit precision 10475 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10476 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10477 match(Set dst (MulF src con)); 10478 10479 format %{ "FLD $src\n\t" 10480 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10481 "FSTP_S $dst" %} 10482 ins_encode %{ 10483 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10484 __ fmul_s($constantaddress($con)); 10485 __ fstp_s(Address(rsp, $dst$$disp)); 10486 %} 10487 ins_pipe(fpu_mem_reg_con); 10488 %} 10489 // 10490 // This instruction does not round to 24-bits 10491 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10492 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10493 match(Set dst (MulF src con)); 10494 10495 format %{ "FLD $src\n\t" 10496 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10497 "FSTP $dst" %} 10498 ins_encode %{ 10499 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10500 __ fmul_s($constantaddress($con)); 10501 __ fstp_d($dst$$reg); 10502 %} 10503 ins_pipe(fpu_reg_reg_con); 10504 %} 10505 10506 10507 // 10508 // MACRO1 -- subsume unshared load into mulFPR 10509 // This instruction does not round to 24-bits 10510 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10511 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10512 match(Set dst (MulF (LoadF mem1) src)); 10513 10514 format %{ "FLD $mem1 ===MACRO1===\n\t" 10515 "FMUL ST,$src\n\t" 10516 "FSTP $dst" %} 10517 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10518 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10519 OpcReg_FPR(src), 10520 Pop_Reg_FPR(dst) ); 10521 ins_pipe( fpu_reg_reg_mem ); 10522 %} 10523 // 10524 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10525 // This instruction does not round to 24-bits 10526 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10527 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10528 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10529 ins_cost(95); 10530 10531 format %{ "FLD $mem1 ===MACRO2===\n\t" 10532 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10533 "FADD ST,$src2\n\t" 10534 "FSTP $dst" %} 10535 opcode(0xD9); /* LoadF D9 /0 */ 10536 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10537 FMul_ST_reg(src1), 10538 FAdd_ST_reg(src2), 10539 Pop_Reg_FPR(dst) ); 10540 ins_pipe( fpu_reg_mem_reg_reg ); 10541 %} 10542 10543 // MACRO3 -- addFPR a mulFPR 10544 // This instruction does not round to 24-bits. It is a '2-address' 10545 // instruction in that the result goes back to src2. This eliminates 10546 // a move from the macro; possibly the register allocator will have 10547 // to add it back (and maybe not). 10548 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10549 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10550 match(Set src2 (AddF (MulF src0 src1) src2)); 10551 10552 format %{ "FLD $src0 ===MACRO3===\n\t" 10553 "FMUL ST,$src1\n\t" 10554 "FADDP $src2,ST" %} 10555 opcode(0xD9); /* LoadF D9 /0 */ 10556 ins_encode( Push_Reg_FPR(src0), 10557 FMul_ST_reg(src1), 10558 FAddP_reg_ST(src2) ); 10559 ins_pipe( fpu_reg_reg_reg ); 10560 %} 10561 10562 // MACRO4 -- divFPR subFPR 10563 // This instruction does not round to 24-bits 10564 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10565 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10566 match(Set dst (DivF (SubF src2 src1) src3)); 10567 10568 format %{ "FLD $src2 ===MACRO4===\n\t" 10569 "FSUB ST,$src1\n\t" 10570 "FDIV ST,$src3\n\t" 10571 "FSTP $dst" %} 10572 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10573 ins_encode( Push_Reg_FPR(src2), 10574 subFPR_divFPR_encode(src1,src3), 10575 Pop_Reg_FPR(dst) ); 10576 ins_pipe( fpu_reg_reg_reg_reg ); 10577 %} 10578 10579 // Spill to obtain 24-bit precision 10580 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10581 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10582 match(Set dst (DivF src1 src2)); 10583 10584 format %{ "FDIV $dst,$src1,$src2" %} 10585 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10586 ins_encode( Push_Reg_FPR(src1), 10587 OpcReg_FPR(src2), 10588 Pop_Mem_FPR(dst) ); 10589 ins_pipe( fpu_mem_reg_reg ); 10590 %} 10591 // 10592 // This instruction does not round to 24-bits 10593 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10594 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10595 match(Set dst (DivF dst src)); 10596 10597 format %{ "FDIV $dst,$src" %} 10598 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10599 ins_encode( Push_Reg_FPR(src), 10600 OpcP, RegOpc(dst) ); 10601 ins_pipe( fpu_reg_reg ); 10602 %} 10603 10604 10605 // Spill to obtain 24-bit precision 10606 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10607 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10608 match(Set dst (ModF src1 src2)); 10609 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10610 10611 format %{ "FMOD $dst,$src1,$src2" %} 10612 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10613 emitModDPR(), 10614 Push_Result_Mod_DPR(src2), 10615 Pop_Mem_FPR(dst)); 10616 ins_pipe( pipe_slow ); 10617 %} 10618 // 10619 // This instruction does not round to 24-bits 10620 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10621 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10622 match(Set dst (ModF dst src)); 10623 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10624 10625 format %{ "FMOD $dst,$src" %} 10626 ins_encode(Push_Reg_Mod_DPR(dst, src), 10627 emitModDPR(), 10628 Push_Result_Mod_DPR(src), 10629 Pop_Reg_FPR(dst)); 10630 ins_pipe( pipe_slow ); 10631 %} 10632 10633 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10634 predicate(UseSSE>=1); 10635 match(Set dst (ModF src0 src1)); 10636 effect(KILL rax, KILL cr); 10637 format %{ "SUB ESP,4\t # FMOD\n" 10638 "\tMOVSS [ESP+0],$src1\n" 10639 "\tFLD_S [ESP+0]\n" 10640 "\tMOVSS [ESP+0],$src0\n" 10641 "\tFLD_S [ESP+0]\n" 10642 "loop:\tFPREM\n" 10643 "\tFWAIT\n" 10644 "\tFNSTSW AX\n" 10645 "\tSAHF\n" 10646 "\tJP loop\n" 10647 "\tFSTP_S [ESP+0]\n" 10648 "\tMOVSS $dst,[ESP+0]\n" 10649 "\tADD ESP,4\n" 10650 "\tFSTP ST0\t # Restore FPU Stack" 10651 %} 10652 ins_cost(250); 10653 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10654 ins_pipe( pipe_slow ); 10655 %} 10656 10657 10658 //----------Arithmetic Conversion Instructions--------------------------------- 10659 // The conversions operations are all Alpha sorted. Please keep it that way! 10660 10661 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10662 predicate(UseSSE==0); 10663 match(Set dst (RoundFloat src)); 10664 ins_cost(125); 10665 format %{ "FST_S $dst,$src\t# F-round" %} 10666 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10667 ins_pipe( fpu_mem_reg ); 10668 %} 10669 10670 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10671 predicate(UseSSE<=1); 10672 match(Set dst (RoundDouble src)); 10673 ins_cost(125); 10674 format %{ "FST_D $dst,$src\t# D-round" %} 10675 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10676 ins_pipe( fpu_mem_reg ); 10677 %} 10678 10679 // Force rounding to 24-bit precision and 6-bit exponent 10680 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10681 predicate(UseSSE==0); 10682 match(Set dst (ConvD2F src)); 10683 format %{ "FST_S $dst,$src\t# F-round" %} 10684 expand %{ 10685 roundFloat_mem_reg(dst,src); 10686 %} 10687 %} 10688 10689 // Force rounding to 24-bit precision and 6-bit exponent 10690 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10691 predicate(UseSSE==1); 10692 match(Set dst (ConvD2F src)); 10693 effect( KILL cr ); 10694 format %{ "SUB ESP,4\n\t" 10695 "FST_S [ESP],$src\t# F-round\n\t" 10696 "MOVSS $dst,[ESP]\n\t" 10697 "ADD ESP,4" %} 10698 ins_encode %{ 10699 __ subptr(rsp, 4); 10700 if ($src$$reg != FPR1L_enc) { 10701 __ fld_s($src$$reg-1); 10702 __ fstp_s(Address(rsp, 0)); 10703 } else { 10704 __ fst_s(Address(rsp, 0)); 10705 } 10706 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10707 __ addptr(rsp, 4); 10708 %} 10709 ins_pipe( pipe_slow ); 10710 %} 10711 10712 // Force rounding double precision to single precision 10713 instruct convD2F_reg(regF dst, regD src) %{ 10714 predicate(UseSSE>=2); 10715 match(Set dst (ConvD2F src)); 10716 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10717 ins_encode %{ 10718 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10719 %} 10720 ins_pipe( pipe_slow ); 10721 %} 10722 10723 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10724 predicate(UseSSE==0); 10725 match(Set dst (ConvF2D src)); 10726 format %{ "FST_S $dst,$src\t# D-round" %} 10727 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10728 ins_pipe( fpu_reg_reg ); 10729 %} 10730 10731 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10732 predicate(UseSSE==1); 10733 match(Set dst (ConvF2D src)); 10734 format %{ "FST_D $dst,$src\t# D-round" %} 10735 expand %{ 10736 roundDouble_mem_reg(dst,src); 10737 %} 10738 %} 10739 10740 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10741 predicate(UseSSE==1); 10742 match(Set dst (ConvF2D src)); 10743 effect( KILL cr ); 10744 format %{ "SUB ESP,4\n\t" 10745 "MOVSS [ESP] $src\n\t" 10746 "FLD_S [ESP]\n\t" 10747 "ADD ESP,4\n\t" 10748 "FSTP $dst\t# D-round" %} 10749 ins_encode %{ 10750 __ subptr(rsp, 4); 10751 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10752 __ fld_s(Address(rsp, 0)); 10753 __ addptr(rsp, 4); 10754 __ fstp_d($dst$$reg); 10755 %} 10756 ins_pipe( pipe_slow ); 10757 %} 10758 10759 instruct convF2D_reg(regD dst, regF src) %{ 10760 predicate(UseSSE>=2); 10761 match(Set dst (ConvF2D src)); 10762 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10763 ins_encode %{ 10764 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10765 %} 10766 ins_pipe( pipe_slow ); 10767 %} 10768 10769 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10770 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10771 predicate(UseSSE<=1); 10772 match(Set dst (ConvD2I src)); 10773 effect( KILL tmp, KILL cr ); 10774 format %{ "FLD $src\t# Convert double to int \n\t" 10775 "FLDCW trunc mode\n\t" 10776 "SUB ESP,4\n\t" 10777 "FISTp [ESP + #0]\n\t" 10778 "FLDCW std/24-bit mode\n\t" 10779 "POP EAX\n\t" 10780 "CMP EAX,0x80000000\n\t" 10781 "JNE,s fast\n\t" 10782 "FLD_D $src\n\t" 10783 "CALL d2i_wrapper\n" 10784 "fast:" %} 10785 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10786 ins_pipe( pipe_slow ); 10787 %} 10788 10789 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10790 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10791 predicate(UseSSE>=2); 10792 match(Set dst (ConvD2I src)); 10793 effect( KILL tmp, KILL cr ); 10794 format %{ "CVTTSD2SI $dst, $src\n\t" 10795 "CMP $dst,0x80000000\n\t" 10796 "JNE,s fast\n\t" 10797 "SUB ESP, 8\n\t" 10798 "MOVSD [ESP], $src\n\t" 10799 "FLD_D [ESP]\n\t" 10800 "ADD ESP, 8\n\t" 10801 "CALL d2i_wrapper\n" 10802 "fast:" %} 10803 ins_encode %{ 10804 Label fast; 10805 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10806 __ cmpl($dst$$Register, 0x80000000); 10807 __ jccb(Assembler::notEqual, fast); 10808 __ subptr(rsp, 8); 10809 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10810 __ fld_d(Address(rsp, 0)); 10811 __ addptr(rsp, 8); 10812 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10813 __ post_call_nop(); 10814 __ bind(fast); 10815 %} 10816 ins_pipe( pipe_slow ); 10817 %} 10818 10819 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10820 predicate(UseSSE<=1); 10821 match(Set dst (ConvD2L src)); 10822 effect( KILL cr ); 10823 format %{ "FLD $src\t# Convert double to long\n\t" 10824 "FLDCW trunc mode\n\t" 10825 "SUB ESP,8\n\t" 10826 "FISTp [ESP + #0]\n\t" 10827 "FLDCW std/24-bit mode\n\t" 10828 "POP EAX\n\t" 10829 "POP EDX\n\t" 10830 "CMP EDX,0x80000000\n\t" 10831 "JNE,s fast\n\t" 10832 "TEST EAX,EAX\n\t" 10833 "JNE,s fast\n\t" 10834 "FLD $src\n\t" 10835 "CALL d2l_wrapper\n" 10836 "fast:" %} 10837 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10838 ins_pipe( pipe_slow ); 10839 %} 10840 10841 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10842 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10843 predicate (UseSSE>=2); 10844 match(Set dst (ConvD2L src)); 10845 effect( KILL cr ); 10846 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10847 "MOVSD [ESP],$src\n\t" 10848 "FLD_D [ESP]\n\t" 10849 "FLDCW trunc mode\n\t" 10850 "FISTp [ESP + #0]\n\t" 10851 "FLDCW std/24-bit mode\n\t" 10852 "POP EAX\n\t" 10853 "POP EDX\n\t" 10854 "CMP EDX,0x80000000\n\t" 10855 "JNE,s fast\n\t" 10856 "TEST EAX,EAX\n\t" 10857 "JNE,s fast\n\t" 10858 "SUB ESP,8\n\t" 10859 "MOVSD [ESP],$src\n\t" 10860 "FLD_D [ESP]\n\t" 10861 "ADD ESP,8\n\t" 10862 "CALL d2l_wrapper\n" 10863 "fast:" %} 10864 ins_encode %{ 10865 Label fast; 10866 __ subptr(rsp, 8); 10867 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10868 __ fld_d(Address(rsp, 0)); 10869 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10870 __ fistp_d(Address(rsp, 0)); 10871 // Restore the rounding mode, mask the exception 10872 if (Compile::current()->in_24_bit_fp_mode()) { 10873 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10874 } else { 10875 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10876 } 10877 // Load the converted long, adjust CPU stack 10878 __ pop(rax); 10879 __ pop(rdx); 10880 __ cmpl(rdx, 0x80000000); 10881 __ jccb(Assembler::notEqual, fast); 10882 __ testl(rax, rax); 10883 __ jccb(Assembler::notEqual, fast); 10884 __ subptr(rsp, 8); 10885 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10886 __ fld_d(Address(rsp, 0)); 10887 __ addptr(rsp, 8); 10888 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10889 __ post_call_nop(); 10890 __ bind(fast); 10891 %} 10892 ins_pipe( pipe_slow ); 10893 %} 10894 10895 // Convert a double to an int. Java semantics require we do complex 10896 // manglations in the corner cases. So we set the rounding mode to 10897 // 'zero', store the darned double down as an int, and reset the 10898 // rounding mode to 'nearest'. The hardware stores a flag value down 10899 // if we would overflow or converted a NAN; we check for this and 10900 // and go the slow path if needed. 10901 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10902 predicate(UseSSE==0); 10903 match(Set dst (ConvF2I src)); 10904 effect( KILL tmp, KILL cr ); 10905 format %{ "FLD $src\t# Convert float to int \n\t" 10906 "FLDCW trunc mode\n\t" 10907 "SUB ESP,4\n\t" 10908 "FISTp [ESP + #0]\n\t" 10909 "FLDCW std/24-bit mode\n\t" 10910 "POP EAX\n\t" 10911 "CMP EAX,0x80000000\n\t" 10912 "JNE,s fast\n\t" 10913 "FLD $src\n\t" 10914 "CALL d2i_wrapper\n" 10915 "fast:" %} 10916 // DPR2I_encoding works for FPR2I 10917 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10918 ins_pipe( pipe_slow ); 10919 %} 10920 10921 // Convert a float in xmm to an int reg. 10922 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10923 predicate(UseSSE>=1); 10924 match(Set dst (ConvF2I src)); 10925 effect( KILL tmp, KILL cr ); 10926 format %{ "CVTTSS2SI $dst, $src\n\t" 10927 "CMP $dst,0x80000000\n\t" 10928 "JNE,s fast\n\t" 10929 "SUB ESP, 4\n\t" 10930 "MOVSS [ESP], $src\n\t" 10931 "FLD [ESP]\n\t" 10932 "ADD ESP, 4\n\t" 10933 "CALL d2i_wrapper\n" 10934 "fast:" %} 10935 ins_encode %{ 10936 Label fast; 10937 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10938 __ cmpl($dst$$Register, 0x80000000); 10939 __ jccb(Assembler::notEqual, fast); 10940 __ subptr(rsp, 4); 10941 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10942 __ fld_s(Address(rsp, 0)); 10943 __ addptr(rsp, 4); 10944 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10945 __ post_call_nop(); 10946 __ bind(fast); 10947 %} 10948 ins_pipe( pipe_slow ); 10949 %} 10950 10951 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10952 predicate(UseSSE==0); 10953 match(Set dst (ConvF2L src)); 10954 effect( KILL cr ); 10955 format %{ "FLD $src\t# Convert float to long\n\t" 10956 "FLDCW trunc mode\n\t" 10957 "SUB ESP,8\n\t" 10958 "FISTp [ESP + #0]\n\t" 10959 "FLDCW std/24-bit mode\n\t" 10960 "POP EAX\n\t" 10961 "POP EDX\n\t" 10962 "CMP EDX,0x80000000\n\t" 10963 "JNE,s fast\n\t" 10964 "TEST EAX,EAX\n\t" 10965 "JNE,s fast\n\t" 10966 "FLD $src\n\t" 10967 "CALL d2l_wrapper\n" 10968 "fast:" %} 10969 // DPR2L_encoding works for FPR2L 10970 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10971 ins_pipe( pipe_slow ); 10972 %} 10973 10974 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10975 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10976 predicate (UseSSE>=1); 10977 match(Set dst (ConvF2L src)); 10978 effect( KILL cr ); 10979 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10980 "MOVSS [ESP],$src\n\t" 10981 "FLD_S [ESP]\n\t" 10982 "FLDCW trunc mode\n\t" 10983 "FISTp [ESP + #0]\n\t" 10984 "FLDCW std/24-bit mode\n\t" 10985 "POP EAX\n\t" 10986 "POP EDX\n\t" 10987 "CMP EDX,0x80000000\n\t" 10988 "JNE,s fast\n\t" 10989 "TEST EAX,EAX\n\t" 10990 "JNE,s fast\n\t" 10991 "SUB ESP,4\t# Convert float to long\n\t" 10992 "MOVSS [ESP],$src\n\t" 10993 "FLD_S [ESP]\n\t" 10994 "ADD ESP,4\n\t" 10995 "CALL d2l_wrapper\n" 10996 "fast:" %} 10997 ins_encode %{ 10998 Label fast; 10999 __ subptr(rsp, 8); 11000 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11001 __ fld_s(Address(rsp, 0)); 11002 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 11003 __ fistp_d(Address(rsp, 0)); 11004 // Restore the rounding mode, mask the exception 11005 if (Compile::current()->in_24_bit_fp_mode()) { 11006 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 11007 } else { 11008 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 11009 } 11010 // Load the converted long, adjust CPU stack 11011 __ pop(rax); 11012 __ pop(rdx); 11013 __ cmpl(rdx, 0x80000000); 11014 __ jccb(Assembler::notEqual, fast); 11015 __ testl(rax, rax); 11016 __ jccb(Assembler::notEqual, fast); 11017 __ subptr(rsp, 4); 11018 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11019 __ fld_s(Address(rsp, 0)); 11020 __ addptr(rsp, 4); 11021 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11022 __ post_call_nop(); 11023 __ bind(fast); 11024 %} 11025 ins_pipe( pipe_slow ); 11026 %} 11027 11028 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11029 predicate( UseSSE<=1 ); 11030 match(Set dst (ConvI2D src)); 11031 format %{ "FILD $src\n\t" 11032 "FSTP $dst" %} 11033 opcode(0xDB, 0x0); /* DB /0 */ 11034 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11035 ins_pipe( fpu_reg_mem ); 11036 %} 11037 11038 instruct convI2D_reg(regD dst, rRegI src) %{ 11039 predicate( UseSSE>=2 && !UseXmmI2D ); 11040 match(Set dst (ConvI2D src)); 11041 format %{ "CVTSI2SD $dst,$src" %} 11042 ins_encode %{ 11043 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11044 %} 11045 ins_pipe( pipe_slow ); 11046 %} 11047 11048 instruct convI2D_mem(regD dst, memory mem) %{ 11049 predicate( UseSSE>=2 ); 11050 match(Set dst (ConvI2D (LoadI mem))); 11051 format %{ "CVTSI2SD $dst,$mem" %} 11052 ins_encode %{ 11053 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11054 %} 11055 ins_pipe( pipe_slow ); 11056 %} 11057 11058 instruct convXI2D_reg(regD dst, rRegI src) 11059 %{ 11060 predicate( UseSSE>=2 && UseXmmI2D ); 11061 match(Set dst (ConvI2D src)); 11062 11063 format %{ "MOVD $dst,$src\n\t" 11064 "CVTDQ2PD $dst,$dst\t# i2d" %} 11065 ins_encode %{ 11066 __ movdl($dst$$XMMRegister, $src$$Register); 11067 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11068 %} 11069 ins_pipe(pipe_slow); // XXX 11070 %} 11071 11072 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11073 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11074 match(Set dst (ConvI2D (LoadI mem))); 11075 format %{ "FILD $mem\n\t" 11076 "FSTP $dst" %} 11077 opcode(0xDB); /* DB /0 */ 11078 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11079 Pop_Reg_DPR(dst)); 11080 ins_pipe( fpu_reg_mem ); 11081 %} 11082 11083 // Convert a byte to a float; no rounding step needed. 11084 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11085 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11086 match(Set dst (ConvI2F src)); 11087 format %{ "FILD $src\n\t" 11088 "FSTP $dst" %} 11089 11090 opcode(0xDB, 0x0); /* DB /0 */ 11091 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11092 ins_pipe( fpu_reg_mem ); 11093 %} 11094 11095 // In 24-bit mode, force exponent rounding by storing back out 11096 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11097 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11098 match(Set dst (ConvI2F src)); 11099 ins_cost(200); 11100 format %{ "FILD $src\n\t" 11101 "FSTP_S $dst" %} 11102 opcode(0xDB, 0x0); /* DB /0 */ 11103 ins_encode( Push_Mem_I(src), 11104 Pop_Mem_FPR(dst)); 11105 ins_pipe( fpu_mem_mem ); 11106 %} 11107 11108 // In 24-bit mode, force exponent rounding by storing back out 11109 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11110 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11111 match(Set dst (ConvI2F (LoadI mem))); 11112 ins_cost(200); 11113 format %{ "FILD $mem\n\t" 11114 "FSTP_S $dst" %} 11115 opcode(0xDB); /* DB /0 */ 11116 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11117 Pop_Mem_FPR(dst)); 11118 ins_pipe( fpu_mem_mem ); 11119 %} 11120 11121 // This instruction does not round to 24-bits 11122 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11123 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11124 match(Set dst (ConvI2F src)); 11125 format %{ "FILD $src\n\t" 11126 "FSTP $dst" %} 11127 opcode(0xDB, 0x0); /* DB /0 */ 11128 ins_encode( Push_Mem_I(src), 11129 Pop_Reg_FPR(dst)); 11130 ins_pipe( fpu_reg_mem ); 11131 %} 11132 11133 // This instruction does not round to 24-bits 11134 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11135 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11136 match(Set dst (ConvI2F (LoadI mem))); 11137 format %{ "FILD $mem\n\t" 11138 "FSTP $dst" %} 11139 opcode(0xDB); /* DB /0 */ 11140 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11141 Pop_Reg_FPR(dst)); 11142 ins_pipe( fpu_reg_mem ); 11143 %} 11144 11145 // Convert an int to a float in xmm; no rounding step needed. 11146 instruct convI2F_reg(regF dst, rRegI src) %{ 11147 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11148 match(Set dst (ConvI2F src)); 11149 format %{ "CVTSI2SS $dst, $src" %} 11150 ins_encode %{ 11151 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11152 %} 11153 ins_pipe( pipe_slow ); 11154 %} 11155 11156 instruct convXI2F_reg(regF dst, rRegI src) 11157 %{ 11158 predicate( UseSSE>=2 && UseXmmI2F ); 11159 match(Set dst (ConvI2F src)); 11160 11161 format %{ "MOVD $dst,$src\n\t" 11162 "CVTDQ2PS $dst,$dst\t# i2f" %} 11163 ins_encode %{ 11164 __ movdl($dst$$XMMRegister, $src$$Register); 11165 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11166 %} 11167 ins_pipe(pipe_slow); // XXX 11168 %} 11169 11170 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11171 match(Set dst (ConvI2L src)); 11172 effect(KILL cr); 11173 ins_cost(375); 11174 format %{ "MOV $dst.lo,$src\n\t" 11175 "MOV $dst.hi,$src\n\t" 11176 "SAR $dst.hi,31" %} 11177 ins_encode(convert_int_long(dst,src)); 11178 ins_pipe( ialu_reg_reg_long ); 11179 %} 11180 11181 // Zero-extend convert int to long 11182 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11183 match(Set dst (AndL (ConvI2L src) mask) ); 11184 effect( KILL flags ); 11185 ins_cost(250); 11186 format %{ "MOV $dst.lo,$src\n\t" 11187 "XOR $dst.hi,$dst.hi" %} 11188 opcode(0x33); // XOR 11189 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11190 ins_pipe( ialu_reg_reg_long ); 11191 %} 11192 11193 // Zero-extend long 11194 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11195 match(Set dst (AndL src mask) ); 11196 effect( KILL flags ); 11197 ins_cost(250); 11198 format %{ "MOV $dst.lo,$src.lo\n\t" 11199 "XOR $dst.hi,$dst.hi\n\t" %} 11200 opcode(0x33); // XOR 11201 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11202 ins_pipe( ialu_reg_reg_long ); 11203 %} 11204 11205 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11206 predicate (UseSSE<=1); 11207 match(Set dst (ConvL2D src)); 11208 effect( KILL cr ); 11209 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11210 "PUSH $src.lo\n\t" 11211 "FILD ST,[ESP + #0]\n\t" 11212 "ADD ESP,8\n\t" 11213 "FSTP_D $dst\t# D-round" %} 11214 opcode(0xDF, 0x5); /* DF /5 */ 11215 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11216 ins_pipe( pipe_slow ); 11217 %} 11218 11219 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11220 predicate (UseSSE>=2); 11221 match(Set dst (ConvL2D src)); 11222 effect( KILL cr ); 11223 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11224 "PUSH $src.lo\n\t" 11225 "FILD_D [ESP]\n\t" 11226 "FSTP_D [ESP]\n\t" 11227 "MOVSD $dst,[ESP]\n\t" 11228 "ADD ESP,8" %} 11229 opcode(0xDF, 0x5); /* DF /5 */ 11230 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11231 ins_pipe( pipe_slow ); 11232 %} 11233 11234 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11235 predicate (UseSSE>=1); 11236 match(Set dst (ConvL2F src)); 11237 effect( KILL cr ); 11238 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11239 "PUSH $src.lo\n\t" 11240 "FILD_D [ESP]\n\t" 11241 "FSTP_S [ESP]\n\t" 11242 "MOVSS $dst,[ESP]\n\t" 11243 "ADD ESP,8" %} 11244 opcode(0xDF, 0x5); /* DF /5 */ 11245 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11246 ins_pipe( pipe_slow ); 11247 %} 11248 11249 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11250 match(Set dst (ConvL2F src)); 11251 effect( KILL cr ); 11252 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11253 "PUSH $src.lo\n\t" 11254 "FILD ST,[ESP + #0]\n\t" 11255 "ADD ESP,8\n\t" 11256 "FSTP_S $dst\t# F-round" %} 11257 opcode(0xDF, 0x5); /* DF /5 */ 11258 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11259 ins_pipe( pipe_slow ); 11260 %} 11261 11262 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11263 match(Set dst (ConvL2I src)); 11264 effect( DEF dst, USE src ); 11265 format %{ "MOV $dst,$src.lo" %} 11266 ins_encode(enc_CopyL_Lo(dst,src)); 11267 ins_pipe( ialu_reg_reg ); 11268 %} 11269 11270 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11271 match(Set dst (MoveF2I src)); 11272 effect( DEF dst, USE src ); 11273 ins_cost(100); 11274 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11275 ins_encode %{ 11276 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11277 %} 11278 ins_pipe( ialu_reg_mem ); 11279 %} 11280 11281 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11282 predicate(UseSSE==0); 11283 match(Set dst (MoveF2I src)); 11284 effect( DEF dst, USE src ); 11285 11286 ins_cost(125); 11287 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11288 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11289 ins_pipe( fpu_mem_reg ); 11290 %} 11291 11292 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11293 predicate(UseSSE>=1); 11294 match(Set dst (MoveF2I src)); 11295 effect( DEF dst, USE src ); 11296 11297 ins_cost(95); 11298 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11299 ins_encode %{ 11300 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11301 %} 11302 ins_pipe( pipe_slow ); 11303 %} 11304 11305 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11306 predicate(UseSSE>=2); 11307 match(Set dst (MoveF2I src)); 11308 effect( DEF dst, USE src ); 11309 ins_cost(85); 11310 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11311 ins_encode %{ 11312 __ movdl($dst$$Register, $src$$XMMRegister); 11313 %} 11314 ins_pipe( pipe_slow ); 11315 %} 11316 11317 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11318 match(Set dst (MoveI2F src)); 11319 effect( DEF dst, USE src ); 11320 11321 ins_cost(100); 11322 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11323 ins_encode %{ 11324 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11325 %} 11326 ins_pipe( ialu_mem_reg ); 11327 %} 11328 11329 11330 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11331 predicate(UseSSE==0); 11332 match(Set dst (MoveI2F src)); 11333 effect(DEF dst, USE src); 11334 11335 ins_cost(125); 11336 format %{ "FLD_S $src\n\t" 11337 "FSTP $dst\t# MoveI2F_stack_reg" %} 11338 opcode(0xD9); /* D9 /0, FLD m32real */ 11339 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11340 Pop_Reg_FPR(dst) ); 11341 ins_pipe( fpu_reg_mem ); 11342 %} 11343 11344 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11345 predicate(UseSSE>=1); 11346 match(Set dst (MoveI2F src)); 11347 effect( DEF dst, USE src ); 11348 11349 ins_cost(95); 11350 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11351 ins_encode %{ 11352 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11353 %} 11354 ins_pipe( pipe_slow ); 11355 %} 11356 11357 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11358 predicate(UseSSE>=2); 11359 match(Set dst (MoveI2F src)); 11360 effect( DEF dst, USE src ); 11361 11362 ins_cost(85); 11363 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11364 ins_encode %{ 11365 __ movdl($dst$$XMMRegister, $src$$Register); 11366 %} 11367 ins_pipe( pipe_slow ); 11368 %} 11369 11370 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11371 match(Set dst (MoveD2L src)); 11372 effect(DEF dst, USE src); 11373 11374 ins_cost(250); 11375 format %{ "MOV $dst.lo,$src\n\t" 11376 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11377 opcode(0x8B, 0x8B); 11378 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11379 ins_pipe( ialu_mem_long_reg ); 11380 %} 11381 11382 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11383 predicate(UseSSE<=1); 11384 match(Set dst (MoveD2L src)); 11385 effect(DEF dst, USE src); 11386 11387 ins_cost(125); 11388 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11389 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11390 ins_pipe( fpu_mem_reg ); 11391 %} 11392 11393 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11394 predicate(UseSSE>=2); 11395 match(Set dst (MoveD2L src)); 11396 effect(DEF dst, USE src); 11397 ins_cost(95); 11398 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11399 ins_encode %{ 11400 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11401 %} 11402 ins_pipe( pipe_slow ); 11403 %} 11404 11405 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11406 predicate(UseSSE>=2); 11407 match(Set dst (MoveD2L src)); 11408 effect(DEF dst, USE src, TEMP tmp); 11409 ins_cost(85); 11410 format %{ "MOVD $dst.lo,$src\n\t" 11411 "PSHUFLW $tmp,$src,0x4E\n\t" 11412 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11413 ins_encode %{ 11414 __ movdl($dst$$Register, $src$$XMMRegister); 11415 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11416 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11417 %} 11418 ins_pipe( pipe_slow ); 11419 %} 11420 11421 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11422 match(Set dst (MoveL2D src)); 11423 effect(DEF dst, USE src); 11424 11425 ins_cost(200); 11426 format %{ "MOV $dst,$src.lo\n\t" 11427 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11428 opcode(0x89, 0x89); 11429 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11430 ins_pipe( ialu_mem_long_reg ); 11431 %} 11432 11433 11434 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11435 predicate(UseSSE<=1); 11436 match(Set dst (MoveL2D src)); 11437 effect(DEF dst, USE src); 11438 ins_cost(125); 11439 11440 format %{ "FLD_D $src\n\t" 11441 "FSTP $dst\t# MoveL2D_stack_reg" %} 11442 opcode(0xDD); /* DD /0, FLD m64real */ 11443 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11444 Pop_Reg_DPR(dst) ); 11445 ins_pipe( fpu_reg_mem ); 11446 %} 11447 11448 11449 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11450 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11451 match(Set dst (MoveL2D src)); 11452 effect(DEF dst, USE src); 11453 11454 ins_cost(95); 11455 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11456 ins_encode %{ 11457 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11458 %} 11459 ins_pipe( pipe_slow ); 11460 %} 11461 11462 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11463 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11464 match(Set dst (MoveL2D src)); 11465 effect(DEF dst, USE src); 11466 11467 ins_cost(95); 11468 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11469 ins_encode %{ 11470 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11471 %} 11472 ins_pipe( pipe_slow ); 11473 %} 11474 11475 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11476 predicate(UseSSE>=2); 11477 match(Set dst (MoveL2D src)); 11478 effect(TEMP dst, USE src, TEMP tmp); 11479 ins_cost(85); 11480 format %{ "MOVD $dst,$src.lo\n\t" 11481 "MOVD $tmp,$src.hi\n\t" 11482 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11483 ins_encode %{ 11484 __ movdl($dst$$XMMRegister, $src$$Register); 11485 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11486 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11487 %} 11488 ins_pipe( pipe_slow ); 11489 %} 11490 11491 //----------------------------- CompressBits/ExpandBits ------------------------ 11492 11493 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11494 predicate(n->bottom_type()->isa_long()); 11495 match(Set dst (CompressBits src mask)); 11496 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11497 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11498 ins_encode %{ 11499 Label exit, partail_result; 11500 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11501 // Merge the results of upper and lower destination registers such that upper destination 11502 // results are contiguously laid out after the lower destination result. 11503 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11504 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11505 __ popcntl($rtmp$$Register, $mask$$Register); 11506 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11507 __ cmpl($rtmp$$Register, 32); 11508 __ jccb(Assembler::equal, exit); 11509 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11510 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11511 // Shift left the contents of upper destination register by true bit count of lower mask register 11512 // and merge with lower destination register. 11513 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11514 __ orl($dst$$Register, $rtmp$$Register); 11515 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11516 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11517 // since contents of upper destination have already been copied to lower destination 11518 // register. 11519 __ cmpl($rtmp$$Register, 0); 11520 __ jccb(Assembler::greater, partail_result); 11521 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11522 __ jmp(exit); 11523 __ bind(partail_result); 11524 // Perform right shift over upper destination register to move out bits already copied 11525 // to lower destination register. 11526 __ subl($rtmp$$Register, 32); 11527 __ negl($rtmp$$Register); 11528 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11529 __ bind(exit); 11530 %} 11531 ins_pipe( pipe_slow ); 11532 %} 11533 11534 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11535 predicate(n->bottom_type()->isa_long()); 11536 match(Set dst (ExpandBits src mask)); 11537 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11538 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11539 ins_encode %{ 11540 // Extraction operation sequentially reads the bits from source register starting from LSB 11541 // and lays them out into destination register at bit locations corresponding to true bits 11542 // in mask register. Thus number of source bits read are equal to combined true bit count 11543 // of mask register pair. 11544 Label exit, mask_clipping; 11545 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11546 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11547 __ popcntl($rtmp$$Register, $mask$$Register); 11548 // If true bit count of lower mask register is 32 then none of bit of lower source register 11549 // will feed to upper destination register. 11550 __ cmpl($rtmp$$Register, 32); 11551 __ jccb(Assembler::equal, exit); 11552 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11553 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11554 // Shift right the contents of lower source register to remove already consumed bits. 11555 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11556 // Extract the bits from lower source register starting from LSB under the influence 11557 // of upper mask register. 11558 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11559 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11560 __ subl($rtmp$$Register, 32); 11561 __ negl($rtmp$$Register); 11562 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11563 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11564 // Clear the set bits in upper mask register which have been used to extract the contents 11565 // from lower source register. 11566 __ bind(mask_clipping); 11567 __ blsrl($mask$$Register, $mask$$Register); 11568 __ decrementl($rtmp$$Register, 1); 11569 __ jccb(Assembler::greater, mask_clipping); 11570 // Starting from LSB extract the bits from upper source register under the influence of 11571 // remaining set bits in upper mask register. 11572 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11573 // Merge the partial results extracted from lower and upper source register bits. 11574 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11575 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11576 __ bind(exit); 11577 %} 11578 ins_pipe( pipe_slow ); 11579 %} 11580 11581 // ======================================================================= 11582 // fast clearing of an array 11583 // Small ClearArray non-AVX512. 11584 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11585 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11586 match(Set dummy (ClearArray cnt base)); 11587 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11588 11589 format %{ $$template 11590 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11591 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11592 $$emit$$"JG LARGE\n\t" 11593 $$emit$$"SHL ECX, 1\n\t" 11594 $$emit$$"DEC ECX\n\t" 11595 $$emit$$"JS DONE\t# Zero length\n\t" 11596 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11597 $$emit$$"DEC ECX\n\t" 11598 $$emit$$"JGE LOOP\n\t" 11599 $$emit$$"JMP DONE\n\t" 11600 $$emit$$"# LARGE:\n\t" 11601 if (UseFastStosb) { 11602 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11603 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11604 } else if (UseXMMForObjInit) { 11605 $$emit$$"MOV RDI,RAX\n\t" 11606 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11607 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11608 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11609 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11610 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11611 $$emit$$"ADD 0x40,RAX\n\t" 11612 $$emit$$"# L_zero_64_bytes:\n\t" 11613 $$emit$$"SUB 0x8,RCX\n\t" 11614 $$emit$$"JGE L_loop\n\t" 11615 $$emit$$"ADD 0x4,RCX\n\t" 11616 $$emit$$"JL L_tail\n\t" 11617 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11618 $$emit$$"ADD 0x20,RAX\n\t" 11619 $$emit$$"SUB 0x4,RCX\n\t" 11620 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11621 $$emit$$"ADD 0x4,RCX\n\t" 11622 $$emit$$"JLE L_end\n\t" 11623 $$emit$$"DEC RCX\n\t" 11624 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11625 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11626 $$emit$$"ADD 0x8,RAX\n\t" 11627 $$emit$$"DEC RCX\n\t" 11628 $$emit$$"JGE L_sloop\n\t" 11629 $$emit$$"# L_end:\n\t" 11630 } else { 11631 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11632 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11633 } 11634 $$emit$$"# DONE" 11635 %} 11636 ins_encode %{ 11637 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11638 $tmp$$XMMRegister, false, knoreg); 11639 %} 11640 ins_pipe( pipe_slow ); 11641 %} 11642 11643 // Small ClearArray AVX512 non-constant length. 11644 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11645 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11646 match(Set dummy (ClearArray cnt base)); 11647 ins_cost(125); 11648 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11649 11650 format %{ $$template 11651 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11652 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11653 $$emit$$"JG LARGE\n\t" 11654 $$emit$$"SHL ECX, 1\n\t" 11655 $$emit$$"DEC ECX\n\t" 11656 $$emit$$"JS DONE\t# Zero length\n\t" 11657 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11658 $$emit$$"DEC ECX\n\t" 11659 $$emit$$"JGE LOOP\n\t" 11660 $$emit$$"JMP DONE\n\t" 11661 $$emit$$"# LARGE:\n\t" 11662 if (UseFastStosb) { 11663 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11664 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11665 } else if (UseXMMForObjInit) { 11666 $$emit$$"MOV RDI,RAX\n\t" 11667 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11668 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11669 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11670 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11671 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11672 $$emit$$"ADD 0x40,RAX\n\t" 11673 $$emit$$"# L_zero_64_bytes:\n\t" 11674 $$emit$$"SUB 0x8,RCX\n\t" 11675 $$emit$$"JGE L_loop\n\t" 11676 $$emit$$"ADD 0x4,RCX\n\t" 11677 $$emit$$"JL L_tail\n\t" 11678 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11679 $$emit$$"ADD 0x20,RAX\n\t" 11680 $$emit$$"SUB 0x4,RCX\n\t" 11681 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11682 $$emit$$"ADD 0x4,RCX\n\t" 11683 $$emit$$"JLE L_end\n\t" 11684 $$emit$$"DEC RCX\n\t" 11685 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11686 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11687 $$emit$$"ADD 0x8,RAX\n\t" 11688 $$emit$$"DEC RCX\n\t" 11689 $$emit$$"JGE L_sloop\n\t" 11690 $$emit$$"# L_end:\n\t" 11691 } else { 11692 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11693 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11694 } 11695 $$emit$$"# DONE" 11696 %} 11697 ins_encode %{ 11698 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11699 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11700 %} 11701 ins_pipe( pipe_slow ); 11702 %} 11703 11704 // Large ClearArray non-AVX512. 11705 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11706 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11707 match(Set dummy (ClearArray cnt base)); 11708 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11709 format %{ $$template 11710 if (UseFastStosb) { 11711 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11712 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11713 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11714 } else if (UseXMMForObjInit) { 11715 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11716 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11717 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11718 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11719 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11720 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11721 $$emit$$"ADD 0x40,RAX\n\t" 11722 $$emit$$"# L_zero_64_bytes:\n\t" 11723 $$emit$$"SUB 0x8,RCX\n\t" 11724 $$emit$$"JGE L_loop\n\t" 11725 $$emit$$"ADD 0x4,RCX\n\t" 11726 $$emit$$"JL L_tail\n\t" 11727 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11728 $$emit$$"ADD 0x20,RAX\n\t" 11729 $$emit$$"SUB 0x4,RCX\n\t" 11730 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11731 $$emit$$"ADD 0x4,RCX\n\t" 11732 $$emit$$"JLE L_end\n\t" 11733 $$emit$$"DEC RCX\n\t" 11734 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11735 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11736 $$emit$$"ADD 0x8,RAX\n\t" 11737 $$emit$$"DEC RCX\n\t" 11738 $$emit$$"JGE L_sloop\n\t" 11739 $$emit$$"# L_end:\n\t" 11740 } else { 11741 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11742 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11743 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11744 } 11745 $$emit$$"# DONE" 11746 %} 11747 ins_encode %{ 11748 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11749 $tmp$$XMMRegister, true, knoreg); 11750 %} 11751 ins_pipe( pipe_slow ); 11752 %} 11753 11754 // Large ClearArray AVX512. 11755 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11756 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11757 match(Set dummy (ClearArray cnt base)); 11758 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11759 format %{ $$template 11760 if (UseFastStosb) { 11761 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11762 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11763 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11764 } else if (UseXMMForObjInit) { 11765 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11766 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11767 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11768 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11769 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11770 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11771 $$emit$$"ADD 0x40,RAX\n\t" 11772 $$emit$$"# L_zero_64_bytes:\n\t" 11773 $$emit$$"SUB 0x8,RCX\n\t" 11774 $$emit$$"JGE L_loop\n\t" 11775 $$emit$$"ADD 0x4,RCX\n\t" 11776 $$emit$$"JL L_tail\n\t" 11777 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11778 $$emit$$"ADD 0x20,RAX\n\t" 11779 $$emit$$"SUB 0x4,RCX\n\t" 11780 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11781 $$emit$$"ADD 0x4,RCX\n\t" 11782 $$emit$$"JLE L_end\n\t" 11783 $$emit$$"DEC RCX\n\t" 11784 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11785 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11786 $$emit$$"ADD 0x8,RAX\n\t" 11787 $$emit$$"DEC RCX\n\t" 11788 $$emit$$"JGE L_sloop\n\t" 11789 $$emit$$"# L_end:\n\t" 11790 } else { 11791 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11792 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11793 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11794 } 11795 $$emit$$"# DONE" 11796 %} 11797 ins_encode %{ 11798 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11799 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11800 %} 11801 ins_pipe( pipe_slow ); 11802 %} 11803 11804 // Small ClearArray AVX512 constant length. 11805 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11806 %{ 11807 predicate(!((ClearArrayNode*)n)->is_large() && 11808 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11809 match(Set dummy (ClearArray cnt base)); 11810 ins_cost(100); 11811 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11812 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11813 ins_encode %{ 11814 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11815 %} 11816 ins_pipe(pipe_slow); 11817 %} 11818 11819 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11820 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11821 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11822 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11823 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11824 11825 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11826 ins_encode %{ 11827 __ string_compare($str1$$Register, $str2$$Register, 11828 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11829 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11830 %} 11831 ins_pipe( pipe_slow ); 11832 %} 11833 11834 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11835 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11836 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11837 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11838 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11839 11840 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11841 ins_encode %{ 11842 __ string_compare($str1$$Register, $str2$$Register, 11843 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11844 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11845 %} 11846 ins_pipe( pipe_slow ); 11847 %} 11848 11849 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11850 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11851 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11852 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11853 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11854 11855 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11856 ins_encode %{ 11857 __ string_compare($str1$$Register, $str2$$Register, 11858 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11859 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11860 %} 11861 ins_pipe( pipe_slow ); 11862 %} 11863 11864 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11865 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11866 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11867 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11868 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11869 11870 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11871 ins_encode %{ 11872 __ string_compare($str1$$Register, $str2$$Register, 11873 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11874 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11875 %} 11876 ins_pipe( pipe_slow ); 11877 %} 11878 11879 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11880 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11881 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11882 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11883 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11884 11885 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11886 ins_encode %{ 11887 __ string_compare($str1$$Register, $str2$$Register, 11888 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11889 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11890 %} 11891 ins_pipe( pipe_slow ); 11892 %} 11893 11894 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11895 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11896 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11897 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11898 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11899 11900 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11901 ins_encode %{ 11902 __ string_compare($str1$$Register, $str2$$Register, 11903 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11904 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11905 %} 11906 ins_pipe( pipe_slow ); 11907 %} 11908 11909 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11910 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11911 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11912 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11913 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11914 11915 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11916 ins_encode %{ 11917 __ string_compare($str2$$Register, $str1$$Register, 11918 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11919 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11920 %} 11921 ins_pipe( pipe_slow ); 11922 %} 11923 11924 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11925 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11926 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11927 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11928 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11929 11930 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11931 ins_encode %{ 11932 __ string_compare($str2$$Register, $str1$$Register, 11933 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11934 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11935 %} 11936 ins_pipe( pipe_slow ); 11937 %} 11938 11939 // fast string equals 11940 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11941 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11942 predicate(!VM_Version::supports_avx512vlbw()); 11943 match(Set result (StrEquals (Binary str1 str2) cnt)); 11944 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11945 11946 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11947 ins_encode %{ 11948 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11949 $cnt$$Register, $result$$Register, $tmp3$$Register, 11950 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11951 %} 11952 11953 ins_pipe( pipe_slow ); 11954 %} 11955 11956 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11957 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11958 predicate(VM_Version::supports_avx512vlbw()); 11959 match(Set result (StrEquals (Binary str1 str2) cnt)); 11960 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11961 11962 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11963 ins_encode %{ 11964 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11965 $cnt$$Register, $result$$Register, $tmp3$$Register, 11966 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11967 %} 11968 11969 ins_pipe( pipe_slow ); 11970 %} 11971 11972 11973 // fast search of substring with known size. 11974 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11975 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11976 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11977 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11978 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11979 11980 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11981 ins_encode %{ 11982 int icnt2 = (int)$int_cnt2$$constant; 11983 if (icnt2 >= 16) { 11984 // IndexOf for constant substrings with size >= 16 elements 11985 // which don't need to be loaded through stack. 11986 __ string_indexofC8($str1$$Register, $str2$$Register, 11987 $cnt1$$Register, $cnt2$$Register, 11988 icnt2, $result$$Register, 11989 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11990 } else { 11991 // Small strings are loaded through stack if they cross page boundary. 11992 __ string_indexof($str1$$Register, $str2$$Register, 11993 $cnt1$$Register, $cnt2$$Register, 11994 icnt2, $result$$Register, 11995 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11996 } 11997 %} 11998 ins_pipe( pipe_slow ); 11999 %} 12000 12001 // fast search of substring with known size. 12002 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12003 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12004 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12005 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12006 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12007 12008 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12009 ins_encode %{ 12010 int icnt2 = (int)$int_cnt2$$constant; 12011 if (icnt2 >= 8) { 12012 // IndexOf for constant substrings with size >= 8 elements 12013 // which don't need to be loaded through stack. 12014 __ string_indexofC8($str1$$Register, $str2$$Register, 12015 $cnt1$$Register, $cnt2$$Register, 12016 icnt2, $result$$Register, 12017 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12018 } else { 12019 // Small strings are loaded through stack if they cross page boundary. 12020 __ string_indexof($str1$$Register, $str2$$Register, 12021 $cnt1$$Register, $cnt2$$Register, 12022 icnt2, $result$$Register, 12023 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12024 } 12025 %} 12026 ins_pipe( pipe_slow ); 12027 %} 12028 12029 // fast search of substring with known size. 12030 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12031 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12032 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12033 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12034 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12035 12036 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12037 ins_encode %{ 12038 int icnt2 = (int)$int_cnt2$$constant; 12039 if (icnt2 >= 8) { 12040 // IndexOf for constant substrings with size >= 8 elements 12041 // which don't need to be loaded through stack. 12042 __ string_indexofC8($str1$$Register, $str2$$Register, 12043 $cnt1$$Register, $cnt2$$Register, 12044 icnt2, $result$$Register, 12045 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12046 } else { 12047 // Small strings are loaded through stack if they cross page boundary. 12048 __ string_indexof($str1$$Register, $str2$$Register, 12049 $cnt1$$Register, $cnt2$$Register, 12050 icnt2, $result$$Register, 12051 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12052 } 12053 %} 12054 ins_pipe( pipe_slow ); 12055 %} 12056 12057 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12058 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12059 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 12060 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12061 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12062 12063 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12064 ins_encode %{ 12065 __ string_indexof($str1$$Register, $str2$$Register, 12066 $cnt1$$Register, $cnt2$$Register, 12067 (-1), $result$$Register, 12068 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 12069 %} 12070 ins_pipe( pipe_slow ); 12071 %} 12072 12073 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12074 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12075 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12076 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12077 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12078 12079 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12080 ins_encode %{ 12081 __ string_indexof($str1$$Register, $str2$$Register, 12082 $cnt1$$Register, $cnt2$$Register, 12083 (-1), $result$$Register, 12084 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12085 %} 12086 ins_pipe( pipe_slow ); 12087 %} 12088 12089 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12090 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12091 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12092 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12093 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12094 12095 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12096 ins_encode %{ 12097 __ string_indexof($str1$$Register, $str2$$Register, 12098 $cnt1$$Register, $cnt2$$Register, 12099 (-1), $result$$Register, 12100 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12101 %} 12102 ins_pipe( pipe_slow ); 12103 %} 12104 12105 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12106 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12107 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12108 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12109 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12110 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12111 ins_encode %{ 12112 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12113 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12114 %} 12115 ins_pipe( pipe_slow ); 12116 %} 12117 12118 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12119 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12120 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12121 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12122 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12123 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12124 ins_encode %{ 12125 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12126 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12127 %} 12128 ins_pipe( pipe_slow ); 12129 %} 12130 12131 12132 // fast array equals 12133 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12134 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12135 %{ 12136 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12137 match(Set result (AryEq ary1 ary2)); 12138 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12139 //ins_cost(300); 12140 12141 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12142 ins_encode %{ 12143 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12144 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12145 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12146 %} 12147 ins_pipe( pipe_slow ); 12148 %} 12149 12150 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12151 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12152 %{ 12153 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12154 match(Set result (AryEq ary1 ary2)); 12155 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12156 //ins_cost(300); 12157 12158 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12159 ins_encode %{ 12160 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12161 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12162 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12163 %} 12164 ins_pipe( pipe_slow ); 12165 %} 12166 12167 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12168 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12169 %{ 12170 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12171 match(Set result (AryEq ary1 ary2)); 12172 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12173 //ins_cost(300); 12174 12175 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12176 ins_encode %{ 12177 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12178 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12179 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12180 %} 12181 ins_pipe( pipe_slow ); 12182 %} 12183 12184 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12185 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12186 %{ 12187 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12188 match(Set result (AryEq ary1 ary2)); 12189 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12190 //ins_cost(300); 12191 12192 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12193 ins_encode %{ 12194 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12195 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12196 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12197 %} 12198 ins_pipe( pipe_slow ); 12199 %} 12200 12201 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12202 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12203 %{ 12204 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12205 match(Set result (CountPositives ary1 len)); 12206 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12207 12208 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12209 ins_encode %{ 12210 __ count_positives($ary1$$Register, $len$$Register, 12211 $result$$Register, $tmp3$$Register, 12212 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12213 %} 12214 ins_pipe( pipe_slow ); 12215 %} 12216 12217 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12218 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12219 %{ 12220 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12221 match(Set result (CountPositives ary1 len)); 12222 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12223 12224 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12225 ins_encode %{ 12226 __ count_positives($ary1$$Register, $len$$Register, 12227 $result$$Register, $tmp3$$Register, 12228 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12229 %} 12230 ins_pipe( pipe_slow ); 12231 %} 12232 12233 12234 // fast char[] to byte[] compression 12235 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12236 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12237 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12238 match(Set result (StrCompressedCopy src (Binary dst len))); 12239 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12240 12241 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12242 ins_encode %{ 12243 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12244 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12245 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12246 knoreg, knoreg); 12247 %} 12248 ins_pipe( pipe_slow ); 12249 %} 12250 12251 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12252 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12253 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12254 match(Set result (StrCompressedCopy src (Binary dst len))); 12255 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12256 12257 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12258 ins_encode %{ 12259 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12260 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12261 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12262 $ktmp1$$KRegister, $ktmp2$$KRegister); 12263 %} 12264 ins_pipe( pipe_slow ); 12265 %} 12266 12267 // fast byte[] to char[] inflation 12268 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12269 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12270 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12271 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12272 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12273 12274 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12275 ins_encode %{ 12276 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12277 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12278 %} 12279 ins_pipe( pipe_slow ); 12280 %} 12281 12282 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12283 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12284 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12285 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12286 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12287 12288 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12289 ins_encode %{ 12290 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12291 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12292 %} 12293 ins_pipe( pipe_slow ); 12294 %} 12295 12296 // encode char[] to byte[] in ISO_8859_1 12297 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12298 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12299 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12300 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12301 match(Set result (EncodeISOArray src (Binary dst len))); 12302 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12303 12304 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12305 ins_encode %{ 12306 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12307 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12308 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12309 %} 12310 ins_pipe( pipe_slow ); 12311 %} 12312 12313 // encode char[] to byte[] in ASCII 12314 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12315 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12316 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12317 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12318 match(Set result (EncodeISOArray src (Binary dst len))); 12319 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12320 12321 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12322 ins_encode %{ 12323 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12324 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12325 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12326 %} 12327 ins_pipe( pipe_slow ); 12328 %} 12329 12330 //----------Control Flow Instructions------------------------------------------ 12331 // Signed compare Instructions 12332 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12333 match(Set cr (CmpI op1 op2)); 12334 effect( DEF cr, USE op1, USE op2 ); 12335 format %{ "CMP $op1,$op2" %} 12336 opcode(0x3B); /* Opcode 3B /r */ 12337 ins_encode( OpcP, RegReg( op1, op2) ); 12338 ins_pipe( ialu_cr_reg_reg ); 12339 %} 12340 12341 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12342 match(Set cr (CmpI op1 op2)); 12343 effect( DEF cr, USE op1 ); 12344 format %{ "CMP $op1,$op2" %} 12345 opcode(0x81,0x07); /* Opcode 81 /7 */ 12346 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12347 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12348 ins_pipe( ialu_cr_reg_imm ); 12349 %} 12350 12351 // Cisc-spilled version of cmpI_eReg 12352 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12353 match(Set cr (CmpI op1 (LoadI op2))); 12354 12355 format %{ "CMP $op1,$op2" %} 12356 ins_cost(500); 12357 opcode(0x3B); /* Opcode 3B /r */ 12358 ins_encode( OpcP, RegMem( op1, op2) ); 12359 ins_pipe( ialu_cr_reg_mem ); 12360 %} 12361 12362 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12363 match(Set cr (CmpI src zero)); 12364 effect( DEF cr, USE src ); 12365 12366 format %{ "TEST $src,$src" %} 12367 opcode(0x85); 12368 ins_encode( OpcP, RegReg( src, src ) ); 12369 ins_pipe( ialu_cr_reg_imm ); 12370 %} 12371 12372 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12373 match(Set cr (CmpI (AndI src con) zero)); 12374 12375 format %{ "TEST $src,$con" %} 12376 opcode(0xF7,0x00); 12377 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12378 ins_pipe( ialu_cr_reg_imm ); 12379 %} 12380 12381 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12382 match(Set cr (CmpI (AndI src mem) zero)); 12383 12384 format %{ "TEST $src,$mem" %} 12385 opcode(0x85); 12386 ins_encode( OpcP, RegMem( src, mem ) ); 12387 ins_pipe( ialu_cr_reg_mem ); 12388 %} 12389 12390 // Unsigned compare Instructions; really, same as signed except they 12391 // produce an eFlagsRegU instead of eFlagsReg. 12392 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12393 match(Set cr (CmpU op1 op2)); 12394 12395 format %{ "CMPu $op1,$op2" %} 12396 opcode(0x3B); /* Opcode 3B /r */ 12397 ins_encode( OpcP, RegReg( op1, op2) ); 12398 ins_pipe( ialu_cr_reg_reg ); 12399 %} 12400 12401 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12402 match(Set cr (CmpU op1 op2)); 12403 12404 format %{ "CMPu $op1,$op2" %} 12405 opcode(0x81,0x07); /* Opcode 81 /7 */ 12406 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12407 ins_pipe( ialu_cr_reg_imm ); 12408 %} 12409 12410 // // Cisc-spilled version of cmpU_eReg 12411 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12412 match(Set cr (CmpU op1 (LoadI op2))); 12413 12414 format %{ "CMPu $op1,$op2" %} 12415 ins_cost(500); 12416 opcode(0x3B); /* Opcode 3B /r */ 12417 ins_encode( OpcP, RegMem( op1, op2) ); 12418 ins_pipe( ialu_cr_reg_mem ); 12419 %} 12420 12421 // // Cisc-spilled version of cmpU_eReg 12422 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12423 // match(Set cr (CmpU (LoadI op1) op2)); 12424 // 12425 // format %{ "CMPu $op1,$op2" %} 12426 // ins_cost(500); 12427 // opcode(0x39); /* Opcode 39 /r */ 12428 // ins_encode( OpcP, RegMem( op1, op2) ); 12429 //%} 12430 12431 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12432 match(Set cr (CmpU src zero)); 12433 12434 format %{ "TESTu $src,$src" %} 12435 opcode(0x85); 12436 ins_encode( OpcP, RegReg( src, src ) ); 12437 ins_pipe( ialu_cr_reg_imm ); 12438 %} 12439 12440 // Unsigned pointer compare Instructions 12441 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12442 match(Set cr (CmpP op1 op2)); 12443 12444 format %{ "CMPu $op1,$op2" %} 12445 opcode(0x3B); /* Opcode 3B /r */ 12446 ins_encode( OpcP, RegReg( op1, op2) ); 12447 ins_pipe( ialu_cr_reg_reg ); 12448 %} 12449 12450 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12451 match(Set cr (CmpP op1 op2)); 12452 12453 format %{ "CMPu $op1,$op2" %} 12454 opcode(0x81,0x07); /* Opcode 81 /7 */ 12455 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12456 ins_pipe( ialu_cr_reg_imm ); 12457 %} 12458 12459 // // Cisc-spilled version of cmpP_eReg 12460 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12461 match(Set cr (CmpP op1 (LoadP op2))); 12462 12463 format %{ "CMPu $op1,$op2" %} 12464 ins_cost(500); 12465 opcode(0x3B); /* Opcode 3B /r */ 12466 ins_encode( OpcP, RegMem( op1, op2) ); 12467 ins_pipe( ialu_cr_reg_mem ); 12468 %} 12469 12470 // // Cisc-spilled version of cmpP_eReg 12471 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12472 // match(Set cr (CmpP (LoadP op1) op2)); 12473 // 12474 // format %{ "CMPu $op1,$op2" %} 12475 // ins_cost(500); 12476 // opcode(0x39); /* Opcode 39 /r */ 12477 // ins_encode( OpcP, RegMem( op1, op2) ); 12478 //%} 12479 12480 // Compare raw pointer (used in out-of-heap check). 12481 // Only works because non-oop pointers must be raw pointers 12482 // and raw pointers have no anti-dependencies. 12483 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12484 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12485 match(Set cr (CmpP op1 (LoadP op2))); 12486 12487 format %{ "CMPu $op1,$op2" %} 12488 opcode(0x3B); /* Opcode 3B /r */ 12489 ins_encode( OpcP, RegMem( op1, op2) ); 12490 ins_pipe( ialu_cr_reg_mem ); 12491 %} 12492 12493 // 12494 // This will generate a signed flags result. This should be ok 12495 // since any compare to a zero should be eq/neq. 12496 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12497 match(Set cr (CmpP src zero)); 12498 12499 format %{ "TEST $src,$src" %} 12500 opcode(0x85); 12501 ins_encode( OpcP, RegReg( src, src ) ); 12502 ins_pipe( ialu_cr_reg_imm ); 12503 %} 12504 12505 // Cisc-spilled version of testP_reg 12506 // This will generate a signed flags result. This should be ok 12507 // since any compare to a zero should be eq/neq. 12508 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12509 match(Set cr (CmpP (LoadP op) zero)); 12510 12511 format %{ "TEST $op,0xFFFFFFFF" %} 12512 ins_cost(500); 12513 opcode(0xF7); /* Opcode F7 /0 */ 12514 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12515 ins_pipe( ialu_cr_reg_imm ); 12516 %} 12517 12518 // Yanked all unsigned pointer compare operations. 12519 // Pointer compares are done with CmpP which is already unsigned. 12520 12521 //----------Max and Min-------------------------------------------------------- 12522 // Min Instructions 12523 //// 12524 // *** Min and Max using the conditional move are slower than the 12525 // *** branch version on a Pentium III. 12526 // // Conditional move for min 12527 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12528 // effect( USE_DEF op2, USE op1, USE cr ); 12529 // format %{ "CMOVlt $op2,$op1\t! min" %} 12530 // opcode(0x4C,0x0F); 12531 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12532 // ins_pipe( pipe_cmov_reg ); 12533 //%} 12534 // 12535 //// Min Register with Register (P6 version) 12536 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12537 // predicate(VM_Version::supports_cmov() ); 12538 // match(Set op2 (MinI op1 op2)); 12539 // ins_cost(200); 12540 // expand %{ 12541 // eFlagsReg cr; 12542 // compI_eReg(cr,op1,op2); 12543 // cmovI_reg_lt(op2,op1,cr); 12544 // %} 12545 //%} 12546 12547 // Min Register with Register (generic version) 12548 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12549 match(Set dst (MinI dst src)); 12550 effect(KILL flags); 12551 ins_cost(300); 12552 12553 format %{ "MIN $dst,$src" %} 12554 opcode(0xCC); 12555 ins_encode( min_enc(dst,src) ); 12556 ins_pipe( pipe_slow ); 12557 %} 12558 12559 // Max Register with Register 12560 // *** Min and Max using the conditional move are slower than the 12561 // *** branch version on a Pentium III. 12562 // // Conditional move for max 12563 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12564 // effect( USE_DEF op2, USE op1, USE cr ); 12565 // format %{ "CMOVgt $op2,$op1\t! max" %} 12566 // opcode(0x4F,0x0F); 12567 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12568 // ins_pipe( pipe_cmov_reg ); 12569 //%} 12570 // 12571 // // Max Register with Register (P6 version) 12572 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12573 // predicate(VM_Version::supports_cmov() ); 12574 // match(Set op2 (MaxI op1 op2)); 12575 // ins_cost(200); 12576 // expand %{ 12577 // eFlagsReg cr; 12578 // compI_eReg(cr,op1,op2); 12579 // cmovI_reg_gt(op2,op1,cr); 12580 // %} 12581 //%} 12582 12583 // Max Register with Register (generic version) 12584 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12585 match(Set dst (MaxI dst src)); 12586 effect(KILL flags); 12587 ins_cost(300); 12588 12589 format %{ "MAX $dst,$src" %} 12590 opcode(0xCC); 12591 ins_encode( max_enc(dst,src) ); 12592 ins_pipe( pipe_slow ); 12593 %} 12594 12595 // ============================================================================ 12596 // Counted Loop limit node which represents exact final iterator value. 12597 // Note: the resulting value should fit into integer range since 12598 // counted loops have limit check on overflow. 12599 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12600 match(Set limit (LoopLimit (Binary init limit) stride)); 12601 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12602 ins_cost(300); 12603 12604 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12605 ins_encode %{ 12606 int strd = (int)$stride$$constant; 12607 assert(strd != 1 && strd != -1, "sanity"); 12608 int m1 = (strd > 0) ? 1 : -1; 12609 // Convert limit to long (EAX:EDX) 12610 __ cdql(); 12611 // Convert init to long (init:tmp) 12612 __ movl($tmp$$Register, $init$$Register); 12613 __ sarl($tmp$$Register, 31); 12614 // $limit - $init 12615 __ subl($limit$$Register, $init$$Register); 12616 __ sbbl($limit_hi$$Register, $tmp$$Register); 12617 // + ($stride - 1) 12618 if (strd > 0) { 12619 __ addl($limit$$Register, (strd - 1)); 12620 __ adcl($limit_hi$$Register, 0); 12621 __ movl($tmp$$Register, strd); 12622 } else { 12623 __ addl($limit$$Register, (strd + 1)); 12624 __ adcl($limit_hi$$Register, -1); 12625 __ lneg($limit_hi$$Register, $limit$$Register); 12626 __ movl($tmp$$Register, -strd); 12627 } 12628 // signed division: (EAX:EDX) / pos_stride 12629 __ idivl($tmp$$Register); 12630 if (strd < 0) { 12631 // restore sign 12632 __ negl($tmp$$Register); 12633 } 12634 // (EAX) * stride 12635 __ mull($tmp$$Register); 12636 // + init (ignore upper bits) 12637 __ addl($limit$$Register, $init$$Register); 12638 %} 12639 ins_pipe( pipe_slow ); 12640 %} 12641 12642 // ============================================================================ 12643 // Branch Instructions 12644 // Jump Table 12645 instruct jumpXtnd(rRegI switch_val) %{ 12646 match(Jump switch_val); 12647 ins_cost(350); 12648 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12649 ins_encode %{ 12650 // Jump to Address(table_base + switch_reg) 12651 Address index(noreg, $switch_val$$Register, Address::times_1); 12652 __ jump(ArrayAddress($constantaddress, index), noreg); 12653 %} 12654 ins_pipe(pipe_jmp); 12655 %} 12656 12657 // Jump Direct - Label defines a relative address from JMP+1 12658 instruct jmpDir(label labl) %{ 12659 match(Goto); 12660 effect(USE labl); 12661 12662 ins_cost(300); 12663 format %{ "JMP $labl" %} 12664 size(5); 12665 ins_encode %{ 12666 Label* L = $labl$$label; 12667 __ jmp(*L, false); // Always long jump 12668 %} 12669 ins_pipe( pipe_jmp ); 12670 %} 12671 12672 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12673 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12674 match(If cop cr); 12675 effect(USE labl); 12676 12677 ins_cost(300); 12678 format %{ "J$cop $labl" %} 12679 size(6); 12680 ins_encode %{ 12681 Label* L = $labl$$label; 12682 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12683 %} 12684 ins_pipe( pipe_jcc ); 12685 %} 12686 12687 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12688 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12689 match(CountedLoopEnd cop cr); 12690 effect(USE labl); 12691 12692 ins_cost(300); 12693 format %{ "J$cop $labl\t# Loop end" %} 12694 size(6); 12695 ins_encode %{ 12696 Label* L = $labl$$label; 12697 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12698 %} 12699 ins_pipe( pipe_jcc ); 12700 %} 12701 12702 // Jump Direct Conditional - using unsigned comparison 12703 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12704 match(If cop cmp); 12705 effect(USE labl); 12706 12707 ins_cost(300); 12708 format %{ "J$cop,u $labl" %} 12709 size(6); 12710 ins_encode %{ 12711 Label* L = $labl$$label; 12712 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12713 %} 12714 ins_pipe(pipe_jcc); 12715 %} 12716 12717 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12718 match(If cop cmp); 12719 effect(USE labl); 12720 12721 ins_cost(200); 12722 format %{ "J$cop,u $labl" %} 12723 size(6); 12724 ins_encode %{ 12725 Label* L = $labl$$label; 12726 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12727 %} 12728 ins_pipe(pipe_jcc); 12729 %} 12730 12731 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12732 match(If cop cmp); 12733 effect(USE labl); 12734 12735 ins_cost(200); 12736 format %{ $$template 12737 if ($cop$$cmpcode == Assembler::notEqual) { 12738 $$emit$$"JP,u $labl\n\t" 12739 $$emit$$"J$cop,u $labl" 12740 } else { 12741 $$emit$$"JP,u done\n\t" 12742 $$emit$$"J$cop,u $labl\n\t" 12743 $$emit$$"done:" 12744 } 12745 %} 12746 ins_encode %{ 12747 Label* l = $labl$$label; 12748 if ($cop$$cmpcode == Assembler::notEqual) { 12749 __ jcc(Assembler::parity, *l, false); 12750 __ jcc(Assembler::notEqual, *l, false); 12751 } else if ($cop$$cmpcode == Assembler::equal) { 12752 Label done; 12753 __ jccb(Assembler::parity, done); 12754 __ jcc(Assembler::equal, *l, false); 12755 __ bind(done); 12756 } else { 12757 ShouldNotReachHere(); 12758 } 12759 %} 12760 ins_pipe(pipe_jcc); 12761 %} 12762 12763 // ============================================================================ 12764 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12765 // array for an instance of the superklass. Set a hidden internal cache on a 12766 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12767 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12768 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12769 match(Set result (PartialSubtypeCheck sub super)); 12770 effect( KILL rcx, KILL cr ); 12771 12772 ins_cost(1100); // slightly larger than the next version 12773 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12774 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12775 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12776 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12777 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12778 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12779 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12780 "miss:\t" %} 12781 12782 opcode(0x1); // Force a XOR of EDI 12783 ins_encode( enc_PartialSubtypeCheck() ); 12784 ins_pipe( pipe_slow ); 12785 %} 12786 12787 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12788 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12789 effect( KILL rcx, KILL result ); 12790 12791 ins_cost(1000); 12792 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12793 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12794 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12795 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12796 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12797 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12798 "miss:\t" %} 12799 12800 opcode(0x0); // No need to XOR EDI 12801 ins_encode( enc_PartialSubtypeCheck() ); 12802 ins_pipe( pipe_slow ); 12803 %} 12804 12805 // ============================================================================ 12806 // Branch Instructions -- short offset versions 12807 // 12808 // These instructions are used to replace jumps of a long offset (the default 12809 // match) with jumps of a shorter offset. These instructions are all tagged 12810 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12811 // match rules in general matching. Instead, the ADLC generates a conversion 12812 // method in the MachNode which can be used to do in-place replacement of the 12813 // long variant with the shorter variant. The compiler will determine if a 12814 // branch can be taken by the is_short_branch_offset() predicate in the machine 12815 // specific code section of the file. 12816 12817 // Jump Direct - Label defines a relative address from JMP+1 12818 instruct jmpDir_short(label labl) %{ 12819 match(Goto); 12820 effect(USE labl); 12821 12822 ins_cost(300); 12823 format %{ "JMP,s $labl" %} 12824 size(2); 12825 ins_encode %{ 12826 Label* L = $labl$$label; 12827 __ jmpb(*L); 12828 %} 12829 ins_pipe( pipe_jmp ); 12830 ins_short_branch(1); 12831 %} 12832 12833 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12834 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12835 match(If cop cr); 12836 effect(USE labl); 12837 12838 ins_cost(300); 12839 format %{ "J$cop,s $labl" %} 12840 size(2); 12841 ins_encode %{ 12842 Label* L = $labl$$label; 12843 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12844 %} 12845 ins_pipe( pipe_jcc ); 12846 ins_short_branch(1); 12847 %} 12848 12849 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12850 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12851 match(CountedLoopEnd cop cr); 12852 effect(USE labl); 12853 12854 ins_cost(300); 12855 format %{ "J$cop,s $labl\t# Loop end" %} 12856 size(2); 12857 ins_encode %{ 12858 Label* L = $labl$$label; 12859 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12860 %} 12861 ins_pipe( pipe_jcc ); 12862 ins_short_branch(1); 12863 %} 12864 12865 // Jump Direct Conditional - using unsigned comparison 12866 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12867 match(If cop cmp); 12868 effect(USE labl); 12869 12870 ins_cost(300); 12871 format %{ "J$cop,us $labl" %} 12872 size(2); 12873 ins_encode %{ 12874 Label* L = $labl$$label; 12875 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12876 %} 12877 ins_pipe( pipe_jcc ); 12878 ins_short_branch(1); 12879 %} 12880 12881 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12882 match(If cop cmp); 12883 effect(USE labl); 12884 12885 ins_cost(300); 12886 format %{ "J$cop,us $labl" %} 12887 size(2); 12888 ins_encode %{ 12889 Label* L = $labl$$label; 12890 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12891 %} 12892 ins_pipe( pipe_jcc ); 12893 ins_short_branch(1); 12894 %} 12895 12896 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12897 match(If cop cmp); 12898 effect(USE labl); 12899 12900 ins_cost(300); 12901 format %{ $$template 12902 if ($cop$$cmpcode == Assembler::notEqual) { 12903 $$emit$$"JP,u,s $labl\n\t" 12904 $$emit$$"J$cop,u,s $labl" 12905 } else { 12906 $$emit$$"JP,u,s done\n\t" 12907 $$emit$$"J$cop,u,s $labl\n\t" 12908 $$emit$$"done:" 12909 } 12910 %} 12911 size(4); 12912 ins_encode %{ 12913 Label* l = $labl$$label; 12914 if ($cop$$cmpcode == Assembler::notEqual) { 12915 __ jccb(Assembler::parity, *l); 12916 __ jccb(Assembler::notEqual, *l); 12917 } else if ($cop$$cmpcode == Assembler::equal) { 12918 Label done; 12919 __ jccb(Assembler::parity, done); 12920 __ jccb(Assembler::equal, *l); 12921 __ bind(done); 12922 } else { 12923 ShouldNotReachHere(); 12924 } 12925 %} 12926 ins_pipe(pipe_jcc); 12927 ins_short_branch(1); 12928 %} 12929 12930 // ============================================================================ 12931 // Long Compare 12932 // 12933 // Currently we hold longs in 2 registers. Comparing such values efficiently 12934 // is tricky. The flavor of compare used depends on whether we are testing 12935 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12936 // The GE test is the negated LT test. The LE test can be had by commuting 12937 // the operands (yielding a GE test) and then negating; negate again for the 12938 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12939 // NE test is negated from that. 12940 12941 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12942 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12943 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12944 // are collapsed internally in the ADLC's dfa-gen code. The match for 12945 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12946 // foo match ends up with the wrong leaf. One fix is to not match both 12947 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12948 // both forms beat the trinary form of long-compare and both are very useful 12949 // on Intel which has so few registers. 12950 12951 // Manifest a CmpL result in an integer register. Very painful. 12952 // This is the test to avoid. 12953 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12954 match(Set dst (CmpL3 src1 src2)); 12955 effect( KILL flags ); 12956 ins_cost(1000); 12957 format %{ "XOR $dst,$dst\n\t" 12958 "CMP $src1.hi,$src2.hi\n\t" 12959 "JLT,s m_one\n\t" 12960 "JGT,s p_one\n\t" 12961 "CMP $src1.lo,$src2.lo\n\t" 12962 "JB,s m_one\n\t" 12963 "JEQ,s done\n" 12964 "p_one:\tINC $dst\n\t" 12965 "JMP,s done\n" 12966 "m_one:\tDEC $dst\n" 12967 "done:" %} 12968 ins_encode %{ 12969 Label p_one, m_one, done; 12970 __ xorptr($dst$$Register, $dst$$Register); 12971 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12972 __ jccb(Assembler::less, m_one); 12973 __ jccb(Assembler::greater, p_one); 12974 __ cmpl($src1$$Register, $src2$$Register); 12975 __ jccb(Assembler::below, m_one); 12976 __ jccb(Assembler::equal, done); 12977 __ bind(p_one); 12978 __ incrementl($dst$$Register); 12979 __ jmpb(done); 12980 __ bind(m_one); 12981 __ decrementl($dst$$Register); 12982 __ bind(done); 12983 %} 12984 ins_pipe( pipe_slow ); 12985 %} 12986 12987 //====== 12988 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12989 // compares. Can be used for LE or GT compares by reversing arguments. 12990 // NOT GOOD FOR EQ/NE tests. 12991 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12992 match( Set flags (CmpL src zero )); 12993 ins_cost(100); 12994 format %{ "TEST $src.hi,$src.hi" %} 12995 opcode(0x85); 12996 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12997 ins_pipe( ialu_cr_reg_reg ); 12998 %} 12999 13000 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13001 // compares. Can be used for LE or GT compares by reversing arguments. 13002 // NOT GOOD FOR EQ/NE tests. 13003 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13004 match( Set flags (CmpL src1 src2 )); 13005 effect( TEMP tmp ); 13006 ins_cost(300); 13007 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13008 "MOV $tmp,$src1.hi\n\t" 13009 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13010 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13011 ins_pipe( ialu_cr_reg_reg ); 13012 %} 13013 13014 // Long compares reg < zero/req OR reg >= zero/req. 13015 // Just a wrapper for a normal branch, plus the predicate test. 13016 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13017 match(If cmp flags); 13018 effect(USE labl); 13019 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13020 expand %{ 13021 jmpCon(cmp,flags,labl); // JLT or JGE... 13022 %} 13023 %} 13024 13025 //====== 13026 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13027 // compares. Can be used for LE or GT compares by reversing arguments. 13028 // NOT GOOD FOR EQ/NE tests. 13029 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13030 match(Set flags (CmpUL src zero)); 13031 ins_cost(100); 13032 format %{ "TEST $src.hi,$src.hi" %} 13033 opcode(0x85); 13034 ins_encode(OpcP, RegReg_Hi2(src, src)); 13035 ins_pipe(ialu_cr_reg_reg); 13036 %} 13037 13038 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13039 // compares. Can be used for LE or GT compares by reversing arguments. 13040 // NOT GOOD FOR EQ/NE tests. 13041 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13042 match(Set flags (CmpUL src1 src2)); 13043 effect(TEMP tmp); 13044 ins_cost(300); 13045 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13046 "MOV $tmp,$src1.hi\n\t" 13047 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13048 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13049 ins_pipe(ialu_cr_reg_reg); 13050 %} 13051 13052 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13053 // Just a wrapper for a normal branch, plus the predicate test. 13054 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13055 match(If cmp flags); 13056 effect(USE labl); 13057 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13058 expand %{ 13059 jmpCon(cmp, flags, labl); // JLT or JGE... 13060 %} 13061 %} 13062 13063 // Compare 2 longs and CMOVE longs. 13064 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13065 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13066 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13067 ins_cost(400); 13068 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13069 "CMOV$cmp $dst.hi,$src.hi" %} 13070 opcode(0x0F,0x40); 13071 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13072 ins_pipe( pipe_cmov_reg_long ); 13073 %} 13074 13075 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13076 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13077 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13078 ins_cost(500); 13079 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13080 "CMOV$cmp $dst.hi,$src.hi" %} 13081 opcode(0x0F,0x40); 13082 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13083 ins_pipe( pipe_cmov_reg_long ); 13084 %} 13085 13086 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 13087 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13088 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13089 ins_cost(400); 13090 expand %{ 13091 cmovLL_reg_LTGE(cmp, flags, dst, src); 13092 %} 13093 %} 13094 13095 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 13096 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13097 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13098 ins_cost(500); 13099 expand %{ 13100 cmovLL_mem_LTGE(cmp, flags, dst, src); 13101 %} 13102 %} 13103 13104 // Compare 2 longs and CMOVE ints. 13105 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13106 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13107 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13108 ins_cost(200); 13109 format %{ "CMOV$cmp $dst,$src" %} 13110 opcode(0x0F,0x40); 13111 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13112 ins_pipe( pipe_cmov_reg ); 13113 %} 13114 13115 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13116 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13117 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13118 ins_cost(250); 13119 format %{ "CMOV$cmp $dst,$src" %} 13120 opcode(0x0F,0x40); 13121 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13122 ins_pipe( pipe_cmov_mem ); 13123 %} 13124 13125 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 13126 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13127 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13128 ins_cost(200); 13129 expand %{ 13130 cmovII_reg_LTGE(cmp, flags, dst, src); 13131 %} 13132 %} 13133 13134 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 13135 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13136 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13137 ins_cost(250); 13138 expand %{ 13139 cmovII_mem_LTGE(cmp, flags, dst, src); 13140 %} 13141 %} 13142 13143 // Compare 2 longs and CMOVE ptrs. 13144 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13145 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13146 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13147 ins_cost(200); 13148 format %{ "CMOV$cmp $dst,$src" %} 13149 opcode(0x0F,0x40); 13150 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13151 ins_pipe( pipe_cmov_reg ); 13152 %} 13153 13154 // Compare 2 unsigned longs and CMOVE ptrs. 13155 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 13156 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13157 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13158 ins_cost(200); 13159 expand %{ 13160 cmovPP_reg_LTGE(cmp,flags,dst,src); 13161 %} 13162 %} 13163 13164 // Compare 2 longs and CMOVE doubles 13165 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13166 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13167 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13168 ins_cost(200); 13169 expand %{ 13170 fcmovDPR_regS(cmp,flags,dst,src); 13171 %} 13172 %} 13173 13174 // Compare 2 longs and CMOVE doubles 13175 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13176 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13177 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13178 ins_cost(200); 13179 expand %{ 13180 fcmovD_regS(cmp,flags,dst,src); 13181 %} 13182 %} 13183 13184 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13185 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13186 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13187 ins_cost(200); 13188 expand %{ 13189 fcmovFPR_regS(cmp,flags,dst,src); 13190 %} 13191 %} 13192 13193 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13194 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13195 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13196 ins_cost(200); 13197 expand %{ 13198 fcmovF_regS(cmp,flags,dst,src); 13199 %} 13200 %} 13201 13202 //====== 13203 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13204 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13205 match( Set flags (CmpL src zero )); 13206 effect(TEMP tmp); 13207 ins_cost(200); 13208 format %{ "MOV $tmp,$src.lo\n\t" 13209 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13210 ins_encode( long_cmp_flags0( src, tmp ) ); 13211 ins_pipe( ialu_reg_reg_long ); 13212 %} 13213 13214 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13215 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13216 match( Set flags (CmpL src1 src2 )); 13217 ins_cost(200+300); 13218 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13219 "JNE,s skip\n\t" 13220 "CMP $src1.hi,$src2.hi\n\t" 13221 "skip:\t" %} 13222 ins_encode( long_cmp_flags1( src1, src2 ) ); 13223 ins_pipe( ialu_cr_reg_reg ); 13224 %} 13225 13226 // Long compare reg == zero/reg OR reg != zero/reg 13227 // Just a wrapper for a normal branch, plus the predicate test. 13228 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13229 match(If cmp flags); 13230 effect(USE labl); 13231 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13232 expand %{ 13233 jmpCon(cmp,flags,labl); // JEQ or JNE... 13234 %} 13235 %} 13236 13237 //====== 13238 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13239 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13240 match(Set flags (CmpUL src zero)); 13241 effect(TEMP tmp); 13242 ins_cost(200); 13243 format %{ "MOV $tmp,$src.lo\n\t" 13244 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13245 ins_encode(long_cmp_flags0(src, tmp)); 13246 ins_pipe(ialu_reg_reg_long); 13247 %} 13248 13249 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13250 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13251 match(Set flags (CmpUL src1 src2)); 13252 ins_cost(200+300); 13253 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13254 "JNE,s skip\n\t" 13255 "CMP $src1.hi,$src2.hi\n\t" 13256 "skip:\t" %} 13257 ins_encode(long_cmp_flags1(src1, src2)); 13258 ins_pipe(ialu_cr_reg_reg); 13259 %} 13260 13261 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13262 // Just a wrapper for a normal branch, plus the predicate test. 13263 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13264 match(If cmp flags); 13265 effect(USE labl); 13266 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13267 expand %{ 13268 jmpCon(cmp, flags, labl); // JEQ or JNE... 13269 %} 13270 %} 13271 13272 // Compare 2 longs and CMOVE longs. 13273 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13274 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13275 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13276 ins_cost(400); 13277 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13278 "CMOV$cmp $dst.hi,$src.hi" %} 13279 opcode(0x0F,0x40); 13280 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13281 ins_pipe( pipe_cmov_reg_long ); 13282 %} 13283 13284 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13285 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13286 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13287 ins_cost(500); 13288 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13289 "CMOV$cmp $dst.hi,$src.hi" %} 13290 opcode(0x0F,0x40); 13291 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13292 ins_pipe( pipe_cmov_reg_long ); 13293 %} 13294 13295 // Compare 2 longs and CMOVE ints. 13296 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13297 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13298 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13299 ins_cost(200); 13300 format %{ "CMOV$cmp $dst,$src" %} 13301 opcode(0x0F,0x40); 13302 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13303 ins_pipe( pipe_cmov_reg ); 13304 %} 13305 13306 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13307 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13308 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13309 ins_cost(250); 13310 format %{ "CMOV$cmp $dst,$src" %} 13311 opcode(0x0F,0x40); 13312 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13313 ins_pipe( pipe_cmov_mem ); 13314 %} 13315 13316 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13317 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13318 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13319 ins_cost(200); 13320 expand %{ 13321 cmovII_reg_EQNE(cmp, flags, dst, src); 13322 %} 13323 %} 13324 13325 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13326 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13327 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13328 ins_cost(250); 13329 expand %{ 13330 cmovII_mem_EQNE(cmp, flags, dst, src); 13331 %} 13332 %} 13333 13334 // Compare 2 longs and CMOVE ptrs. 13335 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13336 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13337 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13338 ins_cost(200); 13339 format %{ "CMOV$cmp $dst,$src" %} 13340 opcode(0x0F,0x40); 13341 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13342 ins_pipe( pipe_cmov_reg ); 13343 %} 13344 13345 // Compare 2 unsigned longs and CMOVE ptrs. 13346 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13347 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13348 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13349 ins_cost(200); 13350 expand %{ 13351 cmovPP_reg_EQNE(cmp,flags,dst,src); 13352 %} 13353 %} 13354 13355 // Compare 2 longs and CMOVE doubles 13356 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13357 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13358 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13359 ins_cost(200); 13360 expand %{ 13361 fcmovDPR_regS(cmp,flags,dst,src); 13362 %} 13363 %} 13364 13365 // Compare 2 longs and CMOVE doubles 13366 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13367 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13368 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13369 ins_cost(200); 13370 expand %{ 13371 fcmovD_regS(cmp,flags,dst,src); 13372 %} 13373 %} 13374 13375 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13376 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13377 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13378 ins_cost(200); 13379 expand %{ 13380 fcmovFPR_regS(cmp,flags,dst,src); 13381 %} 13382 %} 13383 13384 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13385 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13386 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13387 ins_cost(200); 13388 expand %{ 13389 fcmovF_regS(cmp,flags,dst,src); 13390 %} 13391 %} 13392 13393 //====== 13394 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13395 // Same as cmpL_reg_flags_LEGT except must negate src 13396 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13397 match( Set flags (CmpL src zero )); 13398 effect( TEMP tmp ); 13399 ins_cost(300); 13400 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13401 "CMP $tmp,$src.lo\n\t" 13402 "SBB $tmp,$src.hi\n\t" %} 13403 ins_encode( long_cmp_flags3(src, tmp) ); 13404 ins_pipe( ialu_reg_reg_long ); 13405 %} 13406 13407 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13408 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13409 // requires a commuted test to get the same result. 13410 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13411 match( Set flags (CmpL src1 src2 )); 13412 effect( TEMP tmp ); 13413 ins_cost(300); 13414 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13415 "MOV $tmp,$src2.hi\n\t" 13416 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13417 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13418 ins_pipe( ialu_cr_reg_reg ); 13419 %} 13420 13421 // Long compares reg < zero/req OR reg >= zero/req. 13422 // Just a wrapper for a normal branch, plus the predicate test 13423 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13424 match(If cmp flags); 13425 effect(USE labl); 13426 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13427 ins_cost(300); 13428 expand %{ 13429 jmpCon(cmp,flags,labl); // JGT or JLE... 13430 %} 13431 %} 13432 13433 //====== 13434 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13435 // Same as cmpUL_reg_flags_LEGT except must negate src 13436 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13437 match(Set flags (CmpUL src zero)); 13438 effect(TEMP tmp); 13439 ins_cost(300); 13440 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13441 "CMP $tmp,$src.lo\n\t" 13442 "SBB $tmp,$src.hi\n\t" %} 13443 ins_encode(long_cmp_flags3(src, tmp)); 13444 ins_pipe(ialu_reg_reg_long); 13445 %} 13446 13447 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13448 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13449 // requires a commuted test to get the same result. 13450 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13451 match(Set flags (CmpUL src1 src2)); 13452 effect(TEMP tmp); 13453 ins_cost(300); 13454 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13455 "MOV $tmp,$src2.hi\n\t" 13456 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13457 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13458 ins_pipe(ialu_cr_reg_reg); 13459 %} 13460 13461 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13462 // Just a wrapper for a normal branch, plus the predicate test 13463 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13464 match(If cmp flags); 13465 effect(USE labl); 13466 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13467 ins_cost(300); 13468 expand %{ 13469 jmpCon(cmp, flags, labl); // JGT or JLE... 13470 %} 13471 %} 13472 13473 // Compare 2 longs and CMOVE longs. 13474 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13475 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13476 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13477 ins_cost(400); 13478 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13479 "CMOV$cmp $dst.hi,$src.hi" %} 13480 opcode(0x0F,0x40); 13481 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13482 ins_pipe( pipe_cmov_reg_long ); 13483 %} 13484 13485 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13486 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13487 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13488 ins_cost(500); 13489 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13490 "CMOV$cmp $dst.hi,$src.hi+4" %} 13491 opcode(0x0F,0x40); 13492 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13493 ins_pipe( pipe_cmov_reg_long ); 13494 %} 13495 13496 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13497 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13498 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13499 ins_cost(400); 13500 expand %{ 13501 cmovLL_reg_LEGT(cmp, flags, dst, src); 13502 %} 13503 %} 13504 13505 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13506 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13507 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13508 ins_cost(500); 13509 expand %{ 13510 cmovLL_mem_LEGT(cmp, flags, dst, src); 13511 %} 13512 %} 13513 13514 // Compare 2 longs and CMOVE ints. 13515 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13516 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13517 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13518 ins_cost(200); 13519 format %{ "CMOV$cmp $dst,$src" %} 13520 opcode(0x0F,0x40); 13521 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13522 ins_pipe( pipe_cmov_reg ); 13523 %} 13524 13525 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13526 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13527 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13528 ins_cost(250); 13529 format %{ "CMOV$cmp $dst,$src" %} 13530 opcode(0x0F,0x40); 13531 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13532 ins_pipe( pipe_cmov_mem ); 13533 %} 13534 13535 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13536 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13537 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13538 ins_cost(200); 13539 expand %{ 13540 cmovII_reg_LEGT(cmp, flags, dst, src); 13541 %} 13542 %} 13543 13544 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13545 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13546 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13547 ins_cost(250); 13548 expand %{ 13549 cmovII_mem_LEGT(cmp, flags, dst, src); 13550 %} 13551 %} 13552 13553 // Compare 2 longs and CMOVE ptrs. 13554 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13555 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13556 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13557 ins_cost(200); 13558 format %{ "CMOV$cmp $dst,$src" %} 13559 opcode(0x0F,0x40); 13560 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13561 ins_pipe( pipe_cmov_reg ); 13562 %} 13563 13564 // Compare 2 unsigned longs and CMOVE ptrs. 13565 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13566 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13567 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13568 ins_cost(200); 13569 expand %{ 13570 cmovPP_reg_LEGT(cmp,flags,dst,src); 13571 %} 13572 %} 13573 13574 // Compare 2 longs and CMOVE doubles 13575 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13576 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13577 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13578 ins_cost(200); 13579 expand %{ 13580 fcmovDPR_regS(cmp,flags,dst,src); 13581 %} 13582 %} 13583 13584 // Compare 2 longs and CMOVE doubles 13585 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13586 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13587 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13588 ins_cost(200); 13589 expand %{ 13590 fcmovD_regS(cmp,flags,dst,src); 13591 %} 13592 %} 13593 13594 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13595 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13596 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13597 ins_cost(200); 13598 expand %{ 13599 fcmovFPR_regS(cmp,flags,dst,src); 13600 %} 13601 %} 13602 13603 13604 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13605 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13606 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13607 ins_cost(200); 13608 expand %{ 13609 fcmovF_regS(cmp,flags,dst,src); 13610 %} 13611 %} 13612 13613 13614 // ============================================================================ 13615 // Procedure Call/Return Instructions 13616 // Call Java Static Instruction 13617 // Note: If this code changes, the corresponding ret_addr_offset() and 13618 // compute_padding() functions will have to be adjusted. 13619 instruct CallStaticJavaDirect(method meth) %{ 13620 match(CallStaticJava); 13621 effect(USE meth); 13622 13623 ins_cost(300); 13624 format %{ "CALL,static " %} 13625 opcode(0xE8); /* E8 cd */ 13626 ins_encode( pre_call_resets, 13627 Java_Static_Call( meth ), 13628 call_epilog, 13629 post_call_FPU ); 13630 ins_pipe( pipe_slow ); 13631 ins_alignment(4); 13632 %} 13633 13634 // Call Java Dynamic Instruction 13635 // Note: If this code changes, the corresponding ret_addr_offset() and 13636 // compute_padding() functions will have to be adjusted. 13637 instruct CallDynamicJavaDirect(method meth) %{ 13638 match(CallDynamicJava); 13639 effect(USE meth); 13640 13641 ins_cost(300); 13642 format %{ "MOV EAX,(oop)-1\n\t" 13643 "CALL,dynamic" %} 13644 opcode(0xE8); /* E8 cd */ 13645 ins_encode( pre_call_resets, 13646 Java_Dynamic_Call( meth ), 13647 call_epilog, 13648 post_call_FPU ); 13649 ins_pipe( pipe_slow ); 13650 ins_alignment(4); 13651 %} 13652 13653 // Call Runtime Instruction 13654 instruct CallRuntimeDirect(method meth) %{ 13655 match(CallRuntime ); 13656 effect(USE meth); 13657 13658 ins_cost(300); 13659 format %{ "CALL,runtime " %} 13660 opcode(0xE8); /* E8 cd */ 13661 // Use FFREEs to clear entries in float stack 13662 ins_encode( pre_call_resets, 13663 FFree_Float_Stack_All, 13664 Java_To_Runtime( meth ), 13665 post_call_FPU ); 13666 ins_pipe( pipe_slow ); 13667 %} 13668 13669 // Call runtime without safepoint 13670 instruct CallLeafDirect(method meth) %{ 13671 match(CallLeaf); 13672 effect(USE meth); 13673 13674 ins_cost(300); 13675 format %{ "CALL_LEAF,runtime " %} 13676 opcode(0xE8); /* E8 cd */ 13677 ins_encode( pre_call_resets, 13678 FFree_Float_Stack_All, 13679 Java_To_Runtime( meth ), 13680 Verify_FPU_For_Leaf, post_call_FPU ); 13681 ins_pipe( pipe_slow ); 13682 %} 13683 13684 instruct CallLeafNoFPDirect(method meth) %{ 13685 match(CallLeafNoFP); 13686 effect(USE meth); 13687 13688 ins_cost(300); 13689 format %{ "CALL_LEAF_NOFP,runtime " %} 13690 opcode(0xE8); /* E8 cd */ 13691 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13692 ins_pipe( pipe_slow ); 13693 %} 13694 13695 13696 // Return Instruction 13697 // Remove the return address & jump to it. 13698 instruct Ret() %{ 13699 match(Return); 13700 format %{ "RET" %} 13701 opcode(0xC3); 13702 ins_encode(OpcP); 13703 ins_pipe( pipe_jmp ); 13704 %} 13705 13706 // Tail Call; Jump from runtime stub to Java code. 13707 // Also known as an 'interprocedural jump'. 13708 // Target of jump will eventually return to caller. 13709 // TailJump below removes the return address. 13710 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been 13711 // emitted just above the TailCall which has reset ebp to the caller state. 13712 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13713 match(TailCall jump_target method_ptr); 13714 ins_cost(300); 13715 format %{ "JMP $jump_target \t# EBX holds method" %} 13716 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13717 ins_encode( OpcP, RegOpc(jump_target) ); 13718 ins_pipe( pipe_jmp ); 13719 %} 13720 13721 13722 // Tail Jump; remove the return address; jump to target. 13723 // TailCall above leaves the return address around. 13724 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13725 match( TailJump jump_target ex_oop ); 13726 ins_cost(300); 13727 format %{ "POP EDX\t# pop return address into dummy\n\t" 13728 "JMP $jump_target " %} 13729 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13730 ins_encode( enc_pop_rdx, 13731 OpcP, RegOpc(jump_target) ); 13732 ins_pipe( pipe_jmp ); 13733 %} 13734 13735 // Create exception oop: created by stack-crawling runtime code. 13736 // Created exception is now available to this handler, and is setup 13737 // just prior to jumping to this handler. No code emitted. 13738 instruct CreateException( eAXRegP ex_oop ) 13739 %{ 13740 match(Set ex_oop (CreateEx)); 13741 13742 size(0); 13743 // use the following format syntax 13744 format %{ "# exception oop is in EAX; no code emitted" %} 13745 ins_encode(); 13746 ins_pipe( empty ); 13747 %} 13748 13749 13750 // Rethrow exception: 13751 // The exception oop will come in the first argument position. 13752 // Then JUMP (not call) to the rethrow stub code. 13753 instruct RethrowException() 13754 %{ 13755 match(Rethrow); 13756 13757 // use the following format syntax 13758 format %{ "JMP rethrow_stub" %} 13759 ins_encode(enc_rethrow); 13760 ins_pipe( pipe_jmp ); 13761 %} 13762 13763 // inlined locking and unlocking 13764 13765 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{ 13766 predicate(Compile::current()->use_rtm()); 13767 match(Set cr (FastLock object box)); 13768 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread); 13769 ins_cost(300); 13770 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13771 ins_encode %{ 13772 __ get_thread($thread$$Register); 13773 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13774 $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register, 13775 _rtm_counters, _stack_rtm_counters, 13776 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13777 true, ra_->C->profile_rtm()); 13778 %} 13779 ins_pipe(pipe_slow); 13780 %} 13781 13782 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13783 predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm()); 13784 match(Set cr (FastLock object box)); 13785 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13786 ins_cost(300); 13787 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13788 ins_encode %{ 13789 __ get_thread($thread$$Register); 13790 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13791 $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false); 13792 %} 13793 ins_pipe(pipe_slow); 13794 %} 13795 13796 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13797 predicate(LockingMode != LM_LIGHTWEIGHT); 13798 match(Set cr (FastUnlock object box)); 13799 effect(TEMP tmp, USE_KILL box); 13800 ins_cost(300); 13801 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13802 ins_encode %{ 13803 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13804 %} 13805 ins_pipe(pipe_slow); 13806 %} 13807 13808 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{ 13809 predicate(LockingMode == LM_LIGHTWEIGHT); 13810 match(Set cr (FastLock object box)); 13811 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread); 13812 ins_cost(300); 13813 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %} 13814 ins_encode %{ 13815 __ get_thread($thread$$Register); 13816 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13817 %} 13818 ins_pipe(pipe_slow); 13819 %} 13820 13821 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{ 13822 predicate(LockingMode == LM_LIGHTWEIGHT); 13823 match(Set cr (FastUnlock object eax_reg)); 13824 effect(TEMP tmp, USE_KILL eax_reg, TEMP thread); 13825 ins_cost(300); 13826 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %} 13827 ins_encode %{ 13828 __ get_thread($thread$$Register); 13829 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13830 %} 13831 ins_pipe(pipe_slow); 13832 %} 13833 13834 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13835 predicate(Matcher::vector_length(n) <= 32); 13836 match(Set dst (MaskAll src)); 13837 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13838 ins_encode %{ 13839 int mask_len = Matcher::vector_length(this); 13840 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13841 %} 13842 ins_pipe( pipe_slow ); 13843 %} 13844 13845 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13846 predicate(Matcher::vector_length(n) > 32); 13847 match(Set dst (MaskAll src)); 13848 effect(TEMP ktmp); 13849 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13850 ins_encode %{ 13851 int mask_len = Matcher::vector_length(this); 13852 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13853 %} 13854 ins_pipe( pipe_slow ); 13855 %} 13856 13857 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13858 predicate(Matcher::vector_length(n) > 32); 13859 match(Set dst (MaskAll src)); 13860 effect(TEMP ktmp); 13861 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13862 ins_encode %{ 13863 int mask_len = Matcher::vector_length(this); 13864 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13865 %} 13866 ins_pipe( pipe_slow ); 13867 %} 13868 13869 // ============================================================================ 13870 // Safepoint Instruction 13871 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13872 match(SafePoint poll); 13873 effect(KILL cr, USE poll); 13874 13875 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13876 ins_cost(125); 13877 // EBP would need size(3) 13878 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13879 ins_encode %{ 13880 __ relocate(relocInfo::poll_type); 13881 address pre_pc = __ pc(); 13882 __ testl(rax, Address($poll$$Register, 0)); 13883 address post_pc = __ pc(); 13884 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13885 %} 13886 ins_pipe(ialu_reg_mem); 13887 %} 13888 13889 13890 // ============================================================================ 13891 // This name is KNOWN by the ADLC and cannot be changed. 13892 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13893 // for this guy. 13894 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13895 match(Set dst (ThreadLocal)); 13896 effect(DEF dst, KILL cr); 13897 13898 format %{ "MOV $dst, Thread::current()" %} 13899 ins_encode %{ 13900 Register dstReg = as_Register($dst$$reg); 13901 __ get_thread(dstReg); 13902 %} 13903 ins_pipe( ialu_reg_fat ); 13904 %} 13905 13906 13907 13908 //----------PEEPHOLE RULES----------------------------------------------------- 13909 // These must follow all instruction definitions as they use the names 13910 // defined in the instructions definitions. 13911 // 13912 // peepmatch ( root_instr_name [preceding_instruction]* ); 13913 // 13914 // peepconstraint %{ 13915 // (instruction_number.operand_name relational_op instruction_number.operand_name 13916 // [, ...] ); 13917 // // instruction numbers are zero-based using left to right order in peepmatch 13918 // 13919 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13920 // // provide an instruction_number.operand_name for each operand that appears 13921 // // in the replacement instruction's match rule 13922 // 13923 // ---------VM FLAGS--------------------------------------------------------- 13924 // 13925 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13926 // 13927 // Each peephole rule is given an identifying number starting with zero and 13928 // increasing by one in the order seen by the parser. An individual peephole 13929 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13930 // on the command-line. 13931 // 13932 // ---------CURRENT LIMITATIONS---------------------------------------------- 13933 // 13934 // Only match adjacent instructions in same basic block 13935 // Only equality constraints 13936 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13937 // Only one replacement instruction 13938 // 13939 // ---------EXAMPLE---------------------------------------------------------- 13940 // 13941 // // pertinent parts of existing instructions in architecture description 13942 // instruct movI(rRegI dst, rRegI src) %{ 13943 // match(Set dst (CopyI src)); 13944 // %} 13945 // 13946 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13947 // match(Set dst (AddI dst src)); 13948 // effect(KILL cr); 13949 // %} 13950 // 13951 // // Change (inc mov) to lea 13952 // peephole %{ 13953 // // increment preceded by register-register move 13954 // peepmatch ( incI_eReg movI ); 13955 // // require that the destination register of the increment 13956 // // match the destination register of the move 13957 // peepconstraint ( 0.dst == 1.dst ); 13958 // // construct a replacement instruction that sets 13959 // // the destination to ( move's source register + one ) 13960 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13961 // %} 13962 // 13963 // Implementation no longer uses movX instructions since 13964 // machine-independent system no longer uses CopyX nodes. 13965 // 13966 // peephole %{ 13967 // peepmatch ( incI_eReg movI ); 13968 // peepconstraint ( 0.dst == 1.dst ); 13969 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13970 // %} 13971 // 13972 // peephole %{ 13973 // peepmatch ( decI_eReg movI ); 13974 // peepconstraint ( 0.dst == 1.dst ); 13975 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13976 // %} 13977 // 13978 // peephole %{ 13979 // peepmatch ( addI_eReg_imm movI ); 13980 // peepconstraint ( 0.dst == 1.dst ); 13981 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13982 // %} 13983 // 13984 // peephole %{ 13985 // peepmatch ( addP_eReg_imm movP ); 13986 // peepconstraint ( 0.dst == 1.dst ); 13987 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13988 // %} 13989 13990 // // Change load of spilled value to only a spill 13991 // instruct storeI(memory mem, rRegI src) %{ 13992 // match(Set mem (StoreI mem src)); 13993 // %} 13994 // 13995 // instruct loadI(rRegI dst, memory mem) %{ 13996 // match(Set dst (LoadI mem)); 13997 // %} 13998 // 13999 peephole %{ 14000 peepmatch ( loadI storeI ); 14001 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 14002 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 14003 %} 14004 14005 //----------SMARTSPILL RULES--------------------------------------------------- 14006 // These must follow all instruction definitions as they use the names 14007 // defined in the instructions definitions.