1 // 2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ _masm. 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 cbuf.insts()->emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 cbuf.insts()->emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(CodeBuffer &cbuf, int code) { 353 cbuf.insts()->emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 358 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 359 emit_opcode(cbuf, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(CodeBuffer &cbuf, int d8) { 364 cbuf.insts()->emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(CodeBuffer &cbuf, int d16) { 369 cbuf.insts()->emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(CodeBuffer &cbuf, int d32) { 374 cbuf.insts()->emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 379 int format) { 380 cbuf.relocate(cbuf.insts_mark(), reloc, format); 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 cbuf.relocate(cbuf.insts_mark(), rspec, format); 393 cbuf.insts()->emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 398 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (cbuf, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(cbuf, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(cbuf, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(cbuf, 0x1, reg_encoding, base); 423 emit_d8(cbuf, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (cbuf, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(cbuf, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (cbuf, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 450 emit_rm(cbuf, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 456 emit_rm(cbuf, scale, index, base); 457 emit_d8(cbuf, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 462 emit_rm(cbuf, scale, index, 0x04); 463 } else { 464 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 465 emit_rm(cbuf, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (cbuf, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( cbuf, 0x8B ); 483 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler& _masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 C2_MacroAssembler _masm(&cbuf); 613 614 __ verified_entry(C); 615 616 C->output()->set_frame_complete(cbuf.insts_size()); 617 618 if (C->has_mach_constant_base_node()) { 619 // NOTE: We set the table base offset here because users might be 620 // emitted before MachConstantBaseNode. 621 ConstantTable& constant_table = C->output()->constant_table(); 622 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 623 } 624 } 625 626 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 627 return MachNode::size(ra_); // too many variables; just compute it the hard way 628 } 629 630 int MachPrologNode::reloc() const { 631 return 0; // a large enough number 632 } 633 634 //============================================================================= 635 #ifndef PRODUCT 636 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 637 Compile *C = ra_->C; 638 int framesize = C->output()->frame_size_in_bytes(); 639 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 640 // Remove two words for return addr and rbp, 641 framesize -= 2*wordSize; 642 643 if (C->max_vector_size() > 16) { 644 st->print("VZEROUPPER"); 645 st->cr(); st->print("\t"); 646 } 647 if (C->in_24_bit_fp_mode()) { 648 st->print("FLDCW standard control word"); 649 st->cr(); st->print("\t"); 650 } 651 if (framesize) { 652 st->print("ADD ESP,%d\t# Destroy frame",framesize); 653 st->cr(); st->print("\t"); 654 } 655 st->print_cr("POPL EBP"); st->print("\t"); 656 if (do_polling() && C->is_method_compilation()) { 657 st->print("CMPL rsp, poll_offset[thread] \n\t" 658 "JA #safepoint_stub\t" 659 "# Safepoint: poll for GC"); 660 } 661 } 662 #endif 663 664 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 665 Compile *C = ra_->C; 666 MacroAssembler _masm(&cbuf); 667 668 if (C->max_vector_size() > 16) { 669 // Clear upper bits of YMM registers when current compiled code uses 670 // wide vectors to avoid AVX <-> SSE transition penalty during call. 671 _masm.vzeroupper(); 672 } 673 // If method set FPU control word, restore to standard control word 674 if (C->in_24_bit_fp_mode()) { 675 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 676 } 677 678 int framesize = C->output()->frame_size_in_bytes(); 679 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 680 // Remove two words for return addr and rbp, 681 framesize -= 2*wordSize; 682 683 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 684 685 if (framesize >= 128) { 686 emit_opcode(cbuf, 0x81); // add SP, #framesize 687 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 688 emit_d32(cbuf, framesize); 689 } else if (framesize) { 690 emit_opcode(cbuf, 0x83); // add SP, #framesize 691 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 692 emit_d8(cbuf, framesize); 693 } 694 695 emit_opcode(cbuf, 0x58 | EBP_enc); 696 697 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 698 __ reserved_stack_check(); 699 } 700 701 if (do_polling() && C->is_method_compilation()) { 702 Register thread = as_Register(EBX_enc); 703 MacroAssembler masm(&cbuf); 704 __ get_thread(thread); 705 Label dummy_label; 706 Label* code_stub = &dummy_label; 707 if (!C->output()->in_scratch_emit_size()) { 708 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 709 C->output()->add_stub(stub); 710 code_stub = &stub->entry(); 711 } 712 __ relocate(relocInfo::poll_return_type); 713 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 714 } 715 } 716 717 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 718 return MachNode::size(ra_); // too many variables; just compute it 719 // the hard way 720 } 721 722 int MachEpilogNode::reloc() const { 723 return 0; // a large enough number 724 } 725 726 const Pipeline * MachEpilogNode::pipeline() const { 727 return MachNode::pipeline_class(); 728 } 729 730 //============================================================================= 731 732 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 733 static enum RC rc_class( OptoReg::Name reg ) { 734 735 if( !OptoReg::is_valid(reg) ) return rc_bad; 736 if (OptoReg::is_stack(reg)) return rc_stack; 737 738 VMReg r = OptoReg::as_VMReg(reg); 739 if (r->is_Register()) return rc_int; 740 if (r->is_FloatRegister()) { 741 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 742 return rc_float; 743 } 744 if (r->is_KRegister()) return rc_kreg; 745 assert(r->is_XMMRegister(), "must be"); 746 return rc_xmm; 747 } 748 749 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 750 int opcode, const char *op_str, int size, outputStream* st ) { 751 if( cbuf ) { 752 emit_opcode (*cbuf, opcode ); 753 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 754 #ifndef PRODUCT 755 } else if( !do_size ) { 756 if( size != 0 ) st->print("\n\t"); 757 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 758 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 759 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 760 } else { // FLD, FST, PUSH, POP 761 st->print("%s [ESP + #%d]",op_str,offset); 762 } 763 #endif 764 } 765 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 766 return size+3+offset_size; 767 } 768 769 // Helper for XMM registers. Extra opcode bits, limited syntax. 770 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 771 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 772 int in_size_in_bits = Assembler::EVEX_32bit; 773 int evex_encoding = 0; 774 if (reg_lo+1 == reg_hi) { 775 in_size_in_bits = Assembler::EVEX_64bit; 776 evex_encoding = Assembler::VEX_W; 777 } 778 if (cbuf) { 779 MacroAssembler _masm(cbuf); 780 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 781 // it maps more cases to single byte displacement 782 _masm.set_managed(); 783 if (reg_lo+1 == reg_hi) { // double move? 784 if (is_load) { 785 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 786 } else { 787 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 788 } 789 } else { 790 if (is_load) { 791 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 792 } else { 793 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 794 } 795 } 796 #ifndef PRODUCT 797 } else if (!do_size) { 798 if (size != 0) st->print("\n\t"); 799 if (reg_lo+1 == reg_hi) { // double move? 800 if (is_load) st->print("%s %s,[ESP + #%d]", 801 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 802 Matcher::regName[reg_lo], offset); 803 else st->print("MOVSD [ESP + #%d],%s", 804 offset, Matcher::regName[reg_lo]); 805 } else { 806 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 807 Matcher::regName[reg_lo], offset); 808 else st->print("MOVSS [ESP + #%d],%s", 809 offset, Matcher::regName[reg_lo]); 810 } 811 #endif 812 } 813 bool is_single_byte = false; 814 if ((UseAVX > 2) && (offset != 0)) { 815 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 816 } 817 int offset_size = 0; 818 if (UseAVX > 2 ) { 819 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 820 } else { 821 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 822 } 823 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 824 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 825 return size+5+offset_size; 826 } 827 828 829 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 830 int src_hi, int dst_hi, int size, outputStream* st ) { 831 if (cbuf) { 832 MacroAssembler _masm(cbuf); 833 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 834 _masm.set_managed(); 835 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 836 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 837 as_XMMRegister(Matcher::_regEncode[src_lo])); 838 } else { 839 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 840 as_XMMRegister(Matcher::_regEncode[src_lo])); 841 } 842 #ifndef PRODUCT 843 } else if (!do_size) { 844 if (size != 0) st->print("\n\t"); 845 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 846 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 847 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 848 } else { 849 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 850 } 851 } else { 852 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 853 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 854 } else { 855 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 856 } 857 } 858 #endif 859 } 860 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 861 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 862 int sz = (UseAVX > 2) ? 6 : 4; 863 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 864 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 865 return size + sz; 866 } 867 868 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 869 int src_hi, int dst_hi, int size, outputStream* st ) { 870 // 32-bit 871 if (cbuf) { 872 MacroAssembler _masm(cbuf); 873 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 874 _masm.set_managed(); 875 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 876 as_Register(Matcher::_regEncode[src_lo])); 877 #ifndef PRODUCT 878 } else if (!do_size) { 879 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 880 #endif 881 } 882 return (UseAVX> 2) ? 6 : 4; 883 } 884 885 886 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 887 int src_hi, int dst_hi, int size, outputStream* st ) { 888 // 32-bit 889 if (cbuf) { 890 MacroAssembler _masm(cbuf); 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 _masm.set_managed(); 893 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 894 as_XMMRegister(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 904 if( cbuf ) { 905 emit_opcode(*cbuf, 0x8B ); 906 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 907 #ifndef PRODUCT 908 } else if( !do_size ) { 909 if( size != 0 ) st->print("\n\t"); 910 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 911 #endif 912 } 913 return size+2; 914 } 915 916 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 917 int offset, int size, outputStream* st ) { 918 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 919 if( cbuf ) { 920 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 921 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 922 #ifndef PRODUCT 923 } else if( !do_size ) { 924 if( size != 0 ) st->print("\n\t"); 925 st->print("FLD %s",Matcher::regName[src_lo]); 926 #endif 927 } 928 size += 2; 929 } 930 931 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 932 const char *op_str; 933 int op; 934 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 935 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 936 op = 0xDD; 937 } else { // 32-bit store 938 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 939 op = 0xD9; 940 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 941 } 942 943 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 944 } 945 946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 947 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 948 int src_hi, int dst_hi, uint ireg, outputStream* st); 949 950 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 951 int stack_offset, int reg, uint ireg, outputStream* st); 952 953 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 954 int dst_offset, uint ireg, outputStream* st) { 955 if (cbuf) { 956 MacroAssembler _masm(cbuf); 957 switch (ireg) { 958 case Op_VecS: 959 __ pushl(Address(rsp, src_offset)); 960 __ popl (Address(rsp, dst_offset)); 961 break; 962 case Op_VecD: 963 __ pushl(Address(rsp, src_offset)); 964 __ popl (Address(rsp, dst_offset)); 965 __ pushl(Address(rsp, src_offset+4)); 966 __ popl (Address(rsp, dst_offset+4)); 967 break; 968 case Op_VecX: 969 __ movdqu(Address(rsp, -16), xmm0); 970 __ movdqu(xmm0, Address(rsp, src_offset)); 971 __ movdqu(Address(rsp, dst_offset), xmm0); 972 __ movdqu(xmm0, Address(rsp, -16)); 973 break; 974 case Op_VecY: 975 __ vmovdqu(Address(rsp, -32), xmm0); 976 __ vmovdqu(xmm0, Address(rsp, src_offset)); 977 __ vmovdqu(Address(rsp, dst_offset), xmm0); 978 __ vmovdqu(xmm0, Address(rsp, -32)); 979 break; 980 case Op_VecZ: 981 __ evmovdquq(Address(rsp, -64), xmm0, 2); 982 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 983 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 984 __ evmovdquq(xmm0, Address(rsp, -64), 2); 985 break; 986 default: 987 ShouldNotReachHere(); 988 } 989 #ifndef PRODUCT 990 } else { 991 switch (ireg) { 992 case Op_VecS: 993 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 994 "popl [rsp + #%d]", 995 src_offset, dst_offset); 996 break; 997 case Op_VecD: 998 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 999 "popq [rsp + #%d]\n\t" 1000 "pushl [rsp + #%d]\n\t" 1001 "popq [rsp + #%d]", 1002 src_offset, dst_offset, src_offset+4, dst_offset+4); 1003 break; 1004 case Op_VecX: 1005 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1006 "movdqu xmm0, [rsp + #%d]\n\t" 1007 "movdqu [rsp + #%d], xmm0\n\t" 1008 "movdqu xmm0, [rsp - #16]", 1009 src_offset, dst_offset); 1010 break; 1011 case Op_VecY: 1012 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1013 "vmovdqu xmm0, [rsp + #%d]\n\t" 1014 "vmovdqu [rsp + #%d], xmm0\n\t" 1015 "vmovdqu xmm0, [rsp - #32]", 1016 src_offset, dst_offset); 1017 break; 1018 case Op_VecZ: 1019 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1020 "vmovdqu xmm0, [rsp + #%d]\n\t" 1021 "vmovdqu [rsp + #%d], xmm0\n\t" 1022 "vmovdqu xmm0, [rsp - #64]", 1023 src_offset, dst_offset); 1024 break; 1025 default: 1026 ShouldNotReachHere(); 1027 } 1028 #endif 1029 } 1030 } 1031 1032 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1033 // Get registers to move 1034 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1035 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1036 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1037 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1038 1039 enum RC src_second_rc = rc_class(src_second); 1040 enum RC src_first_rc = rc_class(src_first); 1041 enum RC dst_second_rc = rc_class(dst_second); 1042 enum RC dst_first_rc = rc_class(dst_first); 1043 1044 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1045 1046 // Generate spill code! 1047 int size = 0; 1048 1049 if( src_first == dst_first && src_second == dst_second ) 1050 return size; // Self copy, no move 1051 1052 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) { 1053 uint ireg = ideal_reg(); 1054 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1055 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1056 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1057 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1058 // mem -> mem 1059 int src_offset = ra_->reg2offset(src_first); 1060 int dst_offset = ra_->reg2offset(dst_first); 1061 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1062 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1063 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1064 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1065 int stack_offset = ra_->reg2offset(dst_first); 1066 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1067 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1068 int stack_offset = ra_->reg2offset(src_first); 1069 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1070 } else { 1071 ShouldNotReachHere(); 1072 } 1073 return 0; 1074 } 1075 1076 // -------------------------------------- 1077 // Check for mem-mem move. push/pop to move. 1078 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1079 if( src_second == dst_first ) { // overlapping stack copy ranges 1080 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1081 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1082 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1083 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1084 } 1085 // move low bits 1086 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1087 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1088 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1089 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1090 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1091 } 1092 return size; 1093 } 1094 1095 // -------------------------------------- 1096 // Check for integer reg-reg copy 1097 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1098 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1099 1100 // Check for integer store 1101 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1102 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1103 1104 // Check for integer load 1105 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1106 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1107 1108 // Check for integer reg-xmm reg copy 1109 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1110 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1111 "no 64 bit integer-float reg moves" ); 1112 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1113 } 1114 // -------------------------------------- 1115 // Check for float reg-reg copy 1116 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1117 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1118 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1119 if( cbuf ) { 1120 1121 // Note the mucking with the register encode to compensate for the 0/1 1122 // indexing issue mentioned in a comment in the reg_def sections 1123 // for FPR registers many lines above here. 1124 1125 if( src_first != FPR1L_num ) { 1126 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1127 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1128 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1129 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1130 } else { 1131 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1132 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1133 } 1134 #ifndef PRODUCT 1135 } else if( !do_size ) { 1136 if( size != 0 ) st->print("\n\t"); 1137 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1138 else st->print( "FST %s", Matcher::regName[dst_first]); 1139 #endif 1140 } 1141 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1142 } 1143 1144 // Check for float store 1145 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1146 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1147 } 1148 1149 // Check for float load 1150 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1151 int offset = ra_->reg2offset(src_first); 1152 const char *op_str; 1153 int op; 1154 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1155 op_str = "FLD_D"; 1156 op = 0xDD; 1157 } else { // 32-bit load 1158 op_str = "FLD_S"; 1159 op = 0xD9; 1160 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1161 } 1162 if( cbuf ) { 1163 emit_opcode (*cbuf, op ); 1164 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 #ifndef PRODUCT 1168 } else if( !do_size ) { 1169 if( size != 0 ) st->print("\n\t"); 1170 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1171 #endif 1172 } 1173 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1174 return size + 3+offset_size+2; 1175 } 1176 1177 // Check for xmm reg-reg copy 1178 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1179 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1180 (src_first+1 == src_second && dst_first+1 == dst_second), 1181 "no non-adjacent float-moves" ); 1182 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1183 } 1184 1185 // Check for xmm reg-integer reg copy 1186 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1187 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1188 "no 64 bit float-integer reg moves" ); 1189 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1190 } 1191 1192 // Check for xmm store 1193 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1194 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1195 } 1196 1197 // Check for float xmm load 1198 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1199 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1200 } 1201 1202 // Copy from float reg to xmm reg 1203 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1204 // copy to the top of stack from floating point reg 1205 // and use LEA to preserve flags 1206 if( cbuf ) { 1207 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1208 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1209 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1210 emit_d8(*cbuf,0xF8); 1211 #ifndef PRODUCT 1212 } else if( !do_size ) { 1213 if( size != 0 ) st->print("\n\t"); 1214 st->print("LEA ESP,[ESP-8]"); 1215 #endif 1216 } 1217 size += 4; 1218 1219 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1220 1221 // Copy from the temp memory to the xmm reg. 1222 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1223 1224 if( cbuf ) { 1225 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1226 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1227 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1228 emit_d8(*cbuf,0x08); 1229 #ifndef PRODUCT 1230 } else if( !do_size ) { 1231 if( size != 0 ) st->print("\n\t"); 1232 st->print("LEA ESP,[ESP+8]"); 1233 #endif 1234 } 1235 size += 4; 1236 return size; 1237 } 1238 1239 // AVX-512 opmask specific spilling. 1240 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1241 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1242 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1243 int offset = ra_->reg2offset(src_first); 1244 if (cbuf != nullptr) { 1245 MacroAssembler _masm(cbuf); 1246 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1247 #ifndef PRODUCT 1248 } else { 1249 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); 1250 #endif 1251 } 1252 return 0; 1253 } 1254 1255 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1256 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1257 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1258 int offset = ra_->reg2offset(dst_first); 1259 if (cbuf != nullptr) { 1260 MacroAssembler _masm(cbuf); 1261 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1262 #ifndef PRODUCT 1263 } else { 1264 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); 1265 #endif 1266 } 1267 return 0; 1268 } 1269 1270 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1271 Unimplemented(); 1272 return 0; 1273 } 1274 1275 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1276 Unimplemented(); 1277 return 0; 1278 } 1279 1280 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1281 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1282 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1283 if (cbuf != nullptr) { 1284 MacroAssembler _masm(cbuf); 1285 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1286 #ifndef PRODUCT 1287 } else { 1288 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); 1289 #endif 1290 } 1291 return 0; 1292 } 1293 1294 assert( size > 0, "missed a case" ); 1295 1296 // -------------------------------------------------------------------- 1297 // Check for second bits still needing moving. 1298 if( src_second == dst_second ) 1299 return size; // Self copy; no move 1300 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1301 1302 // Check for second word int-int move 1303 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1304 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1305 1306 // Check for second word integer store 1307 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1308 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1309 1310 // Check for second word integer load 1311 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1312 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1313 1314 Unimplemented(); 1315 return 0; // Mute compiler 1316 } 1317 1318 #ifndef PRODUCT 1319 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1320 implementation( nullptr, ra_, false, st ); 1321 } 1322 #endif 1323 1324 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1325 implementation( &cbuf, ra_, false, nullptr ); 1326 } 1327 1328 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1329 return MachNode::size(ra_); 1330 } 1331 1332 1333 //============================================================================= 1334 #ifndef PRODUCT 1335 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1336 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1337 int reg = ra_->get_reg_first(this); 1338 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1339 } 1340 #endif 1341 1342 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 int reg = ra_->get_encode(this); 1345 if( offset >= 128 ) { 1346 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1347 emit_rm(cbuf, 0x2, reg, 0x04); 1348 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1349 emit_d32(cbuf, offset); 1350 } 1351 else { 1352 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1353 emit_rm(cbuf, 0x1, reg, 0x04); 1354 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1355 emit_d8(cbuf, offset); 1356 } 1357 } 1358 1359 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1360 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1361 if( offset >= 128 ) { 1362 return 7; 1363 } 1364 else { 1365 return 4; 1366 } 1367 } 1368 1369 //============================================================================= 1370 #ifndef PRODUCT 1371 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1372 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1373 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1374 st->print_cr("\tNOP"); 1375 st->print_cr("\tNOP"); 1376 if( !OptoBreakpoint ) 1377 st->print_cr("\tNOP"); 1378 } 1379 #endif 1380 1381 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1382 MacroAssembler masm(&cbuf); 1383 #ifdef ASSERT 1384 uint insts_size = cbuf.insts_size(); 1385 #endif 1386 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1387 masm.jump_cc(Assembler::notEqual, 1388 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1389 /* WARNING these NOPs are critical so that verified entry point is properly 1390 aligned for patching by NativeJump::patch_verified_entry() */ 1391 int nops_cnt = 2; 1392 if( !OptoBreakpoint ) // Leave space for int3 1393 nops_cnt += 1; 1394 masm.nop(nops_cnt); 1395 1396 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1397 } 1398 1399 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1400 return OptoBreakpoint ? 11 : 12; 1401 } 1402 1403 1404 //============================================================================= 1405 1406 // Vector calling convention not supported. 1407 bool Matcher::supports_vector_calling_convention() { 1408 return false; 1409 } 1410 1411 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1412 Unimplemented(); 1413 return OptoRegPair(0, 0); 1414 } 1415 1416 // Is this branch offset short enough that a short branch can be used? 1417 // 1418 // NOTE: If the platform does not provide any short branch variants, then 1419 // this method should return false for offset 0. 1420 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1421 // The passed offset is relative to address of the branch. 1422 // On 86 a branch displacement is calculated relative to address 1423 // of a next instruction. 1424 offset -= br_size; 1425 1426 // the short version of jmpConUCF2 contains multiple branches, 1427 // making the reach slightly less 1428 if (rule == jmpConUCF2_rule) 1429 return (-126 <= offset && offset <= 125); 1430 return (-128 <= offset && offset <= 127); 1431 } 1432 1433 // Return whether or not this register is ever used as an argument. This 1434 // function is used on startup to build the trampoline stubs in generateOptoStub. 1435 // Registers not mentioned will be killed by the VM call in the trampoline, and 1436 // arguments in those registers not be available to the callee. 1437 bool Matcher::can_be_java_arg( int reg ) { 1438 if( reg == ECX_num || reg == EDX_num ) return true; 1439 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1440 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1441 return false; 1442 } 1443 1444 bool Matcher::is_spillable_arg( int reg ) { 1445 return can_be_java_arg(reg); 1446 } 1447 1448 uint Matcher::int_pressure_limit() 1449 { 1450 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1451 } 1452 1453 uint Matcher::float_pressure_limit() 1454 { 1455 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1456 } 1457 1458 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1459 // Use hardware integer DIV instruction when 1460 // it is faster than a code which use multiply. 1461 // Only when constant divisor fits into 32 bit 1462 // (min_jint is excluded to get only correct 1463 // positive 32 bit values from negative). 1464 return VM_Version::has_fast_idiv() && 1465 (divisor == (int)divisor && divisor != min_jint); 1466 } 1467 1468 // Register for DIVI projection of divmodI 1469 RegMask Matcher::divI_proj_mask() { 1470 return EAX_REG_mask(); 1471 } 1472 1473 // Register for MODI projection of divmodI 1474 RegMask Matcher::modI_proj_mask() { 1475 return EDX_REG_mask(); 1476 } 1477 1478 // Register for DIVL projection of divmodL 1479 RegMask Matcher::divL_proj_mask() { 1480 ShouldNotReachHere(); 1481 return RegMask(); 1482 } 1483 1484 // Register for MODL projection of divmodL 1485 RegMask Matcher::modL_proj_mask() { 1486 ShouldNotReachHere(); 1487 return RegMask(); 1488 } 1489 1490 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1491 return NO_REG_mask(); 1492 } 1493 1494 // Returns true if the high 32 bits of the value is known to be zero. 1495 bool is_operand_hi32_zero(Node* n) { 1496 int opc = n->Opcode(); 1497 if (opc == Op_AndL) { 1498 Node* o2 = n->in(2); 1499 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1500 return true; 1501 } 1502 } 1503 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1504 return true; 1505 } 1506 return false; 1507 } 1508 1509 %} 1510 1511 //----------ENCODING BLOCK----------------------------------------------------- 1512 // This block specifies the encoding classes used by the compiler to output 1513 // byte streams. Encoding classes generate functions which are called by 1514 // Machine Instruction Nodes in order to generate the bit encoding of the 1515 // instruction. Operands specify their base encoding interface with the 1516 // interface keyword. There are currently supported four interfaces, 1517 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1518 // operand to generate a function which returns its register number when 1519 // queried. CONST_INTER causes an operand to generate a function which 1520 // returns the value of the constant when queried. MEMORY_INTER causes an 1521 // operand to generate four functions which return the Base Register, the 1522 // Index Register, the Scale Value, and the Offset Value of the operand when 1523 // queried. COND_INTER causes an operand to generate six functions which 1524 // return the encoding code (ie - encoding bits for the instruction) 1525 // associated with each basic boolean condition for a conditional instruction. 1526 // Instructions specify two basic values for encoding. They use the 1527 // ins_encode keyword to specify their encoding class (which must be one of 1528 // the class names specified in the encoding block), and they use the 1529 // opcode keyword to specify, in order, their primary, secondary, and 1530 // tertiary opcode. Only the opcode sections which a particular instruction 1531 // needs for encoding need to be specified. 1532 encode %{ 1533 // Build emit functions for each basic byte or larger field in the intel 1534 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1535 // code in the enc_class source block. Emit functions will live in the 1536 // main source block for now. In future, we can generalize this by 1537 // adding a syntax that specifies the sizes of fields in an order, 1538 // so that the adlc can build the emit functions automagically 1539 1540 // Emit primary opcode 1541 enc_class OpcP %{ 1542 emit_opcode(cbuf, $primary); 1543 %} 1544 1545 // Emit secondary opcode 1546 enc_class OpcS %{ 1547 emit_opcode(cbuf, $secondary); 1548 %} 1549 1550 // Emit opcode directly 1551 enc_class Opcode(immI d8) %{ 1552 emit_opcode(cbuf, $d8$$constant); 1553 %} 1554 1555 enc_class SizePrefix %{ 1556 emit_opcode(cbuf,0x66); 1557 %} 1558 1559 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1560 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1561 %} 1562 1563 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1564 emit_opcode(cbuf,$opcode$$constant); 1565 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1566 %} 1567 1568 enc_class mov_r32_imm0( rRegI dst ) %{ 1569 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1570 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1571 %} 1572 1573 enc_class cdq_enc %{ 1574 // Full implementation of Java idiv and irem; checks for 1575 // special case as described in JVM spec., p.243 & p.271. 1576 // 1577 // normal case special case 1578 // 1579 // input : rax,: dividend min_int 1580 // reg: divisor -1 1581 // 1582 // output: rax,: quotient (= rax, idiv reg) min_int 1583 // rdx: remainder (= rax, irem reg) 0 1584 // 1585 // Code sequnce: 1586 // 1587 // 81 F8 00 00 00 80 cmp rax,80000000h 1588 // 0F 85 0B 00 00 00 jne normal_case 1589 // 33 D2 xor rdx,edx 1590 // 83 F9 FF cmp rcx,0FFh 1591 // 0F 84 03 00 00 00 je done 1592 // normal_case: 1593 // 99 cdq 1594 // F7 F9 idiv rax,ecx 1595 // done: 1596 // 1597 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1598 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1599 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1600 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1601 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1602 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1603 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1604 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1605 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1606 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1607 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1608 // normal_case: 1609 emit_opcode(cbuf,0x99); // cdq 1610 // idiv (note: must be emitted by the user of this rule) 1611 // normal: 1612 %} 1613 1614 // Dense encoding for older common ops 1615 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1616 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1617 %} 1618 1619 1620 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1621 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1622 // Check for 8-bit immediate, and set sign extend bit in opcode 1623 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1624 emit_opcode(cbuf, $primary | 0x02); 1625 } 1626 else { // If 32-bit immediate 1627 emit_opcode(cbuf, $primary); 1628 } 1629 %} 1630 1631 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1632 // Emit primary opcode and set sign-extend bit 1633 // Check for 8-bit immediate, and set sign extend bit in opcode 1634 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1635 emit_opcode(cbuf, $primary | 0x02); } 1636 else { // If 32-bit immediate 1637 emit_opcode(cbuf, $primary); 1638 } 1639 // Emit r/m byte with secondary opcode, after primary opcode. 1640 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1641 %} 1642 1643 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1644 // Check for 8-bit immediate, and set sign extend bit in opcode 1645 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1646 $$$emit8$imm$$constant; 1647 } 1648 else { // If 32-bit immediate 1649 // Output immediate 1650 $$$emit32$imm$$constant; 1651 } 1652 %} 1653 1654 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1655 // Emit primary opcode and set sign-extend bit 1656 // Check for 8-bit immediate, and set sign extend bit in opcode 1657 int con = (int)$imm$$constant; // Throw away top bits 1658 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1659 // Emit r/m byte with secondary opcode, after primary opcode. 1660 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1661 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1662 else emit_d32(cbuf,con); 1663 %} 1664 1665 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1666 // Emit primary opcode and set sign-extend bit 1667 // Check for 8-bit immediate, and set sign extend bit in opcode 1668 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1669 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1670 // Emit r/m byte with tertiary opcode, after primary opcode. 1671 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1672 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1673 else emit_d32(cbuf,con); 1674 %} 1675 1676 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1677 emit_cc(cbuf, $secondary, $dst$$reg ); 1678 %} 1679 1680 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1681 int destlo = $dst$$reg; 1682 int desthi = HIGH_FROM_LOW_ENC(destlo); 1683 // bswap lo 1684 emit_opcode(cbuf, 0x0F); 1685 emit_cc(cbuf, 0xC8, destlo); 1686 // bswap hi 1687 emit_opcode(cbuf, 0x0F); 1688 emit_cc(cbuf, 0xC8, desthi); 1689 // xchg lo and hi 1690 emit_opcode(cbuf, 0x87); 1691 emit_rm(cbuf, 0x3, destlo, desthi); 1692 %} 1693 1694 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1695 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1696 %} 1697 1698 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1699 $$$emit8$primary; 1700 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1701 %} 1702 1703 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1704 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1705 emit_d8(cbuf, op >> 8 ); 1706 emit_d8(cbuf, op & 255); 1707 %} 1708 1709 // emulate a CMOV with a conditional branch around a MOV 1710 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1711 // Invert sense of branch from sense of CMOV 1712 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1713 emit_d8( cbuf, $brOffs$$constant ); 1714 %} 1715 1716 enc_class enc_PartialSubtypeCheck( ) %{ 1717 Register Redi = as_Register(EDI_enc); // result register 1718 Register Reax = as_Register(EAX_enc); // super class 1719 Register Recx = as_Register(ECX_enc); // killed 1720 Register Resi = as_Register(ESI_enc); // sub class 1721 Label miss; 1722 1723 MacroAssembler _masm(&cbuf); 1724 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1725 nullptr, &miss, 1726 /*set_cond_codes:*/ true); 1727 if ($primary) { 1728 __ xorptr(Redi, Redi); 1729 } 1730 __ bind(miss); 1731 %} 1732 1733 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1734 MacroAssembler masm(&cbuf); 1735 int start = masm.offset(); 1736 if (UseSSE >= 2) { 1737 if (VerifyFPU) { 1738 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1739 } 1740 } else { 1741 // External c_calling_convention expects the FPU stack to be 'clean'. 1742 // Compiled code leaves it dirty. Do cleanup now. 1743 masm.empty_FPU_stack(); 1744 } 1745 if (sizeof_FFree_Float_Stack_All == -1) { 1746 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1747 } else { 1748 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1749 } 1750 %} 1751 1752 enc_class Verify_FPU_For_Leaf %{ 1753 if( VerifyFPU ) { 1754 MacroAssembler masm(&cbuf); 1755 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1756 } 1757 %} 1758 1759 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1760 // This is the instruction starting address for relocation info. 1761 MacroAssembler _masm(&cbuf); 1762 cbuf.set_insts_mark(); 1763 $$$emit8$primary; 1764 // CALL directly to the runtime 1765 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1766 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1767 __ post_call_nop(); 1768 1769 if (UseSSE >= 2) { 1770 MacroAssembler _masm(&cbuf); 1771 BasicType rt = tf()->return_type(); 1772 1773 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1774 // A C runtime call where the return value is unused. In SSE2+ 1775 // mode the result needs to be removed from the FPU stack. It's 1776 // likely that this function call could be removed by the 1777 // optimizer if the C function is a pure function. 1778 __ ffree(0); 1779 } else if (rt == T_FLOAT) { 1780 __ lea(rsp, Address(rsp, -4)); 1781 __ fstp_s(Address(rsp, 0)); 1782 __ movflt(xmm0, Address(rsp, 0)); 1783 __ lea(rsp, Address(rsp, 4)); 1784 } else if (rt == T_DOUBLE) { 1785 __ lea(rsp, Address(rsp, -8)); 1786 __ fstp_d(Address(rsp, 0)); 1787 __ movdbl(xmm0, Address(rsp, 0)); 1788 __ lea(rsp, Address(rsp, 8)); 1789 } 1790 } 1791 %} 1792 1793 enc_class pre_call_resets %{ 1794 // If method sets FPU control word restore it here 1795 debug_only(int off0 = cbuf.insts_size()); 1796 if (ra_->C->in_24_bit_fp_mode()) { 1797 MacroAssembler _masm(&cbuf); 1798 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1799 } 1800 // Clear upper bits of YMM registers when current compiled code uses 1801 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1802 MacroAssembler _masm(&cbuf); 1803 __ vzeroupper(); 1804 debug_only(int off1 = cbuf.insts_size()); 1805 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1806 %} 1807 1808 enc_class post_call_FPU %{ 1809 // If method sets FPU control word do it here also 1810 if (Compile::current()->in_24_bit_fp_mode()) { 1811 MacroAssembler masm(&cbuf); 1812 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1813 } 1814 %} 1815 1816 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1817 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1818 // who we intended to call. 1819 MacroAssembler _masm(&cbuf); 1820 cbuf.set_insts_mark(); 1821 $$$emit8$primary; 1822 1823 if (!_method) { 1824 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1825 runtime_call_Relocation::spec(), 1826 RELOC_IMM32); 1827 __ post_call_nop(); 1828 } else { 1829 int method_index = resolved_method_index(cbuf); 1830 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1831 : static_call_Relocation::spec(method_index); 1832 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1833 rspec, RELOC_DISP32); 1834 __ post_call_nop(); 1835 address mark = cbuf.insts_mark(); 1836 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1837 // Calls of the same statically bound method can share 1838 // a stub to the interpreter. 1839 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off()); 1840 } else { 1841 // Emit stubs for static call. 1842 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark); 1843 if (stub == nullptr) { 1844 ciEnv::current()->record_failure("CodeCache is full"); 1845 return; 1846 } 1847 } 1848 } 1849 %} 1850 1851 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1852 MacroAssembler _masm(&cbuf); 1853 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1854 __ post_call_nop(); 1855 %} 1856 1857 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1858 int disp = in_bytes(Method::from_compiled_offset()); 1859 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1860 1861 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1862 MacroAssembler _masm(&cbuf); 1863 cbuf.set_insts_mark(); 1864 $$$emit8$primary; 1865 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1866 emit_d8(cbuf, disp); // Displacement 1867 __ post_call_nop(); 1868 %} 1869 1870 // Following encoding is no longer used, but may be restored if calling 1871 // convention changes significantly. 1872 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1873 // 1874 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1875 // // int ic_reg = Matcher::inline_cache_reg(); 1876 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1877 // // int imo_reg = Matcher::interpreter_method_reg(); 1878 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1879 // 1880 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1881 // // // so we load it immediately before the call 1882 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1883 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1884 // 1885 // // xor rbp,ebp 1886 // emit_opcode(cbuf, 0x33); 1887 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1888 // 1889 // // CALL to interpreter. 1890 // cbuf.set_insts_mark(); 1891 // $$$emit8$primary; 1892 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1893 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1894 // %} 1895 1896 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1897 $$$emit8$primary; 1898 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1899 $$$emit8$shift$$constant; 1900 %} 1901 1902 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1903 // Load immediate does not have a zero or sign extended version 1904 // for 8-bit immediates 1905 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1906 $$$emit32$src$$constant; 1907 %} 1908 1909 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1910 // Load immediate does not have a zero or sign extended version 1911 // for 8-bit immediates 1912 emit_opcode(cbuf, $primary + $dst$$reg); 1913 $$$emit32$src$$constant; 1914 %} 1915 1916 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1917 // Load immediate does not have a zero or sign extended version 1918 // for 8-bit immediates 1919 int dst_enc = $dst$$reg; 1920 int src_con = $src$$constant & 0x0FFFFFFFFL; 1921 if (src_con == 0) { 1922 // xor dst, dst 1923 emit_opcode(cbuf, 0x33); 1924 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1925 } else { 1926 emit_opcode(cbuf, $primary + dst_enc); 1927 emit_d32(cbuf, src_con); 1928 } 1929 %} 1930 1931 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1932 // Load immediate does not have a zero or sign extended version 1933 // for 8-bit immediates 1934 int dst_enc = $dst$$reg + 2; 1935 int src_con = ((julong)($src$$constant)) >> 32; 1936 if (src_con == 0) { 1937 // xor dst, dst 1938 emit_opcode(cbuf, 0x33); 1939 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1940 } else { 1941 emit_opcode(cbuf, $primary + dst_enc); 1942 emit_d32(cbuf, src_con); 1943 } 1944 %} 1945 1946 1947 // Encode a reg-reg copy. If it is useless, then empty encoding. 1948 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1949 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1950 %} 1951 1952 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1953 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1954 %} 1955 1956 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1957 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1958 %} 1959 1960 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1961 $$$emit8$primary; 1962 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1963 %} 1964 1965 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1966 $$$emit8$secondary; 1967 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1968 %} 1969 1970 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1971 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1972 %} 1973 1974 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1975 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1976 %} 1977 1978 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1979 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1980 %} 1981 1982 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1983 // Output immediate 1984 $$$emit32$src$$constant; 1985 %} 1986 1987 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1988 // Output Float immediate bits 1989 jfloat jf = $src$$constant; 1990 int jf_as_bits = jint_cast( jf ); 1991 emit_d32(cbuf, jf_as_bits); 1992 %} 1993 1994 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1995 // Output Float immediate bits 1996 jfloat jf = $src$$constant; 1997 int jf_as_bits = jint_cast( jf ); 1998 emit_d32(cbuf, jf_as_bits); 1999 %} 2000 2001 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2002 // Output immediate 2003 $$$emit16$src$$constant; 2004 %} 2005 2006 enc_class Con_d32(immI src) %{ 2007 emit_d32(cbuf,$src$$constant); 2008 %} 2009 2010 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2011 // Output immediate memory reference 2012 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2013 emit_d32(cbuf, 0x00); 2014 %} 2015 2016 enc_class lock_prefix( ) %{ 2017 emit_opcode(cbuf,0xF0); // [Lock] 2018 %} 2019 2020 // Cmp-xchg long value. 2021 // Note: we need to swap rbx, and rcx before and after the 2022 // cmpxchg8 instruction because the instruction uses 2023 // rcx as the high order word of the new value to store but 2024 // our register encoding uses rbx,. 2025 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2026 2027 // XCHG rbx,ecx 2028 emit_opcode(cbuf,0x87); 2029 emit_opcode(cbuf,0xD9); 2030 // [Lock] 2031 emit_opcode(cbuf,0xF0); 2032 // CMPXCHG8 [Eptr] 2033 emit_opcode(cbuf,0x0F); 2034 emit_opcode(cbuf,0xC7); 2035 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2036 // XCHG rbx,ecx 2037 emit_opcode(cbuf,0x87); 2038 emit_opcode(cbuf,0xD9); 2039 %} 2040 2041 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2042 // [Lock] 2043 emit_opcode(cbuf,0xF0); 2044 2045 // CMPXCHG [Eptr] 2046 emit_opcode(cbuf,0x0F); 2047 emit_opcode(cbuf,0xB1); 2048 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2049 %} 2050 2051 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2052 // [Lock] 2053 emit_opcode(cbuf,0xF0); 2054 2055 // CMPXCHGB [Eptr] 2056 emit_opcode(cbuf,0x0F); 2057 emit_opcode(cbuf,0xB0); 2058 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2059 %} 2060 2061 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2062 // [Lock] 2063 emit_opcode(cbuf,0xF0); 2064 2065 // 16-bit mode 2066 emit_opcode(cbuf, 0x66); 2067 2068 // CMPXCHGW [Eptr] 2069 emit_opcode(cbuf,0x0F); 2070 emit_opcode(cbuf,0xB1); 2071 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2072 %} 2073 2074 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2075 int res_encoding = $res$$reg; 2076 2077 // MOV res,0 2078 emit_opcode( cbuf, 0xB8 + res_encoding); 2079 emit_d32( cbuf, 0 ); 2080 // JNE,s fail 2081 emit_opcode(cbuf,0x75); 2082 emit_d8(cbuf, 5 ); 2083 // MOV res,1 2084 emit_opcode( cbuf, 0xB8 + res_encoding); 2085 emit_d32( cbuf, 1 ); 2086 // fail: 2087 %} 2088 2089 enc_class set_instruction_start( ) %{ 2090 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2091 %} 2092 2093 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2094 int reg_encoding = $ereg$$reg; 2095 int base = $mem$$base; 2096 int index = $mem$$index; 2097 int scale = $mem$$scale; 2098 int displace = $mem$$disp; 2099 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2100 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2101 %} 2102 2103 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2104 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2105 int base = $mem$$base; 2106 int index = $mem$$index; 2107 int scale = $mem$$scale; 2108 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2109 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2110 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2111 %} 2112 2113 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2114 int r1, r2; 2115 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2116 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2117 emit_opcode(cbuf,0x0F); 2118 emit_opcode(cbuf,$tertiary); 2119 emit_rm(cbuf, 0x3, r1, r2); 2120 emit_d8(cbuf,$cnt$$constant); 2121 emit_d8(cbuf,$primary); 2122 emit_rm(cbuf, 0x3, $secondary, r1); 2123 emit_d8(cbuf,$cnt$$constant); 2124 %} 2125 2126 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2127 emit_opcode( cbuf, 0x8B ); // Move 2128 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2129 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2130 emit_d8(cbuf,$primary); 2131 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2132 emit_d8(cbuf,$cnt$$constant-32); 2133 } 2134 emit_d8(cbuf,$primary); 2135 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2136 emit_d8(cbuf,31); 2137 %} 2138 2139 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2140 int r1, r2; 2141 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2142 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2143 2144 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2145 emit_rm(cbuf, 0x3, r1, r2); 2146 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2147 emit_opcode(cbuf,$primary); 2148 emit_rm(cbuf, 0x3, $secondary, r1); 2149 emit_d8(cbuf,$cnt$$constant-32); 2150 } 2151 emit_opcode(cbuf,0x33); // XOR r2,r2 2152 emit_rm(cbuf, 0x3, r2, r2); 2153 %} 2154 2155 // Clone of RegMem but accepts an extra parameter to access each 2156 // half of a double in memory; it never needs relocation info. 2157 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2158 emit_opcode(cbuf,$opcode$$constant); 2159 int reg_encoding = $rm_reg$$reg; 2160 int base = $mem$$base; 2161 int index = $mem$$index; 2162 int scale = $mem$$scale; 2163 int displace = $mem$$disp + $disp_for_half$$constant; 2164 relocInfo::relocType disp_reloc = relocInfo::none; 2165 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2166 %} 2167 2168 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2169 // 2170 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2171 // and it never needs relocation information. 2172 // Frequently used to move data between FPU's Stack Top and memory. 2173 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2174 int rm_byte_opcode = $rm_opcode$$constant; 2175 int base = $mem$$base; 2176 int index = $mem$$index; 2177 int scale = $mem$$scale; 2178 int displace = $mem$$disp; 2179 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2180 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2181 %} 2182 2183 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2184 int rm_byte_opcode = $rm_opcode$$constant; 2185 int base = $mem$$base; 2186 int index = $mem$$index; 2187 int scale = $mem$$scale; 2188 int displace = $mem$$disp; 2189 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2190 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2191 %} 2192 2193 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2194 int reg_encoding = $dst$$reg; 2195 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2196 int index = 0x04; // 0x04 indicates no index 2197 int scale = 0x00; // 0x00 indicates no scale 2198 int displace = $src1$$constant; // 0x00 indicates no displacement 2199 relocInfo::relocType disp_reloc = relocInfo::none; 2200 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2201 %} 2202 2203 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2204 // Compare dst,src 2205 emit_opcode(cbuf,0x3B); 2206 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2207 // jmp dst < src around move 2208 emit_opcode(cbuf,0x7C); 2209 emit_d8(cbuf,2); 2210 // move dst,src 2211 emit_opcode(cbuf,0x8B); 2212 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2213 %} 2214 2215 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2216 // Compare dst,src 2217 emit_opcode(cbuf,0x3B); 2218 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2219 // jmp dst > src around move 2220 emit_opcode(cbuf,0x7F); 2221 emit_d8(cbuf,2); 2222 // move dst,src 2223 emit_opcode(cbuf,0x8B); 2224 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2225 %} 2226 2227 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2228 // If src is FPR1, we can just FST to store it. 2229 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2230 int reg_encoding = 0x2; // Just store 2231 int base = $mem$$base; 2232 int index = $mem$$index; 2233 int scale = $mem$$scale; 2234 int displace = $mem$$disp; 2235 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2236 if( $src$$reg != FPR1L_enc ) { 2237 reg_encoding = 0x3; // Store & pop 2238 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2239 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2240 } 2241 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2242 emit_opcode(cbuf,$primary); 2243 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2244 %} 2245 2246 enc_class neg_reg(rRegI dst) %{ 2247 // NEG $dst 2248 emit_opcode(cbuf,0xF7); 2249 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2250 %} 2251 2252 enc_class setLT_reg(eCXRegI dst) %{ 2253 // SETLT $dst 2254 emit_opcode(cbuf,0x0F); 2255 emit_opcode(cbuf,0x9C); 2256 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2257 %} 2258 2259 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2260 int tmpReg = $tmp$$reg; 2261 2262 // SUB $p,$q 2263 emit_opcode(cbuf,0x2B); 2264 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2265 // SBB $tmp,$tmp 2266 emit_opcode(cbuf,0x1B); 2267 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2268 // AND $tmp,$y 2269 emit_opcode(cbuf,0x23); 2270 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2271 // ADD $p,$tmp 2272 emit_opcode(cbuf,0x03); 2273 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2274 %} 2275 2276 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2277 // TEST shift,32 2278 emit_opcode(cbuf,0xF7); 2279 emit_rm(cbuf, 0x3, 0, ECX_enc); 2280 emit_d32(cbuf,0x20); 2281 // JEQ,s small 2282 emit_opcode(cbuf, 0x74); 2283 emit_d8(cbuf, 0x04); 2284 // MOV $dst.hi,$dst.lo 2285 emit_opcode( cbuf, 0x8B ); 2286 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2287 // CLR $dst.lo 2288 emit_opcode(cbuf, 0x33); 2289 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2290 // small: 2291 // SHLD $dst.hi,$dst.lo,$shift 2292 emit_opcode(cbuf,0x0F); 2293 emit_opcode(cbuf,0xA5); 2294 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2295 // SHL $dst.lo,$shift" 2296 emit_opcode(cbuf,0xD3); 2297 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2298 %} 2299 2300 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2301 // TEST shift,32 2302 emit_opcode(cbuf,0xF7); 2303 emit_rm(cbuf, 0x3, 0, ECX_enc); 2304 emit_d32(cbuf,0x20); 2305 // JEQ,s small 2306 emit_opcode(cbuf, 0x74); 2307 emit_d8(cbuf, 0x04); 2308 // MOV $dst.lo,$dst.hi 2309 emit_opcode( cbuf, 0x8B ); 2310 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2311 // CLR $dst.hi 2312 emit_opcode(cbuf, 0x33); 2313 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2314 // small: 2315 // SHRD $dst.lo,$dst.hi,$shift 2316 emit_opcode(cbuf,0x0F); 2317 emit_opcode(cbuf,0xAD); 2318 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2319 // SHR $dst.hi,$shift" 2320 emit_opcode(cbuf,0xD3); 2321 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2322 %} 2323 2324 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2325 // TEST shift,32 2326 emit_opcode(cbuf,0xF7); 2327 emit_rm(cbuf, 0x3, 0, ECX_enc); 2328 emit_d32(cbuf,0x20); 2329 // JEQ,s small 2330 emit_opcode(cbuf, 0x74); 2331 emit_d8(cbuf, 0x05); 2332 // MOV $dst.lo,$dst.hi 2333 emit_opcode( cbuf, 0x8B ); 2334 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2335 // SAR $dst.hi,31 2336 emit_opcode(cbuf, 0xC1); 2337 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2338 emit_d8(cbuf, 0x1F ); 2339 // small: 2340 // SHRD $dst.lo,$dst.hi,$shift 2341 emit_opcode(cbuf,0x0F); 2342 emit_opcode(cbuf,0xAD); 2343 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2344 // SAR $dst.hi,$shift" 2345 emit_opcode(cbuf,0xD3); 2346 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2347 %} 2348 2349 2350 // ----------------- Encodings for floating point unit ----------------- 2351 // May leave result in FPU-TOS or FPU reg depending on opcodes 2352 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2353 $$$emit8$primary; 2354 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2355 %} 2356 2357 // Pop argument in FPR0 with FSTP ST(0) 2358 enc_class PopFPU() %{ 2359 emit_opcode( cbuf, 0xDD ); 2360 emit_d8( cbuf, 0xD8 ); 2361 %} 2362 2363 // !!!!! equivalent to Pop_Reg_F 2364 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2365 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2366 emit_d8( cbuf, 0xD8+$dst$$reg ); 2367 %} 2368 2369 enc_class Push_Reg_DPR( regDPR dst ) %{ 2370 emit_opcode( cbuf, 0xD9 ); 2371 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2372 %} 2373 2374 enc_class strictfp_bias1( regDPR dst ) %{ 2375 emit_opcode( cbuf, 0xDB ); // FLD m80real 2376 emit_opcode( cbuf, 0x2D ); 2377 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2378 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2379 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2380 %} 2381 2382 enc_class strictfp_bias2( regDPR dst ) %{ 2383 emit_opcode( cbuf, 0xDB ); // FLD m80real 2384 emit_opcode( cbuf, 0x2D ); 2385 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2386 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2387 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2388 %} 2389 2390 // Special case for moving an integer register to a stack slot. 2391 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2392 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2393 %} 2394 2395 // Special case for moving a register to a stack slot. 2396 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2397 // Opcode already emitted 2398 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2399 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2400 emit_d32(cbuf, $dst$$disp); // Displacement 2401 %} 2402 2403 // Push the integer in stackSlot 'src' onto FP-stack 2404 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2405 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2406 %} 2407 2408 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2409 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2410 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2411 %} 2412 2413 // Same as Pop_Mem_F except for opcode 2414 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2415 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2416 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2417 %} 2418 2419 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2420 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2421 emit_d8( cbuf, 0xD8+$dst$$reg ); 2422 %} 2423 2424 enc_class Push_Reg_FPR( regFPR dst ) %{ 2425 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2426 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2427 %} 2428 2429 // Push FPU's float to a stack-slot, and pop FPU-stack 2430 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2431 int pop = 0x02; 2432 if ($src$$reg != FPR1L_enc) { 2433 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2434 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2435 pop = 0x03; 2436 } 2437 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2438 %} 2439 2440 // Push FPU's double to a stack-slot, and pop FPU-stack 2441 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2442 int pop = 0x02; 2443 if ($src$$reg != FPR1L_enc) { 2444 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2445 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2446 pop = 0x03; 2447 } 2448 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2449 %} 2450 2451 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2452 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2453 int pop = 0xD0 - 1; // -1 since we skip FLD 2454 if ($src$$reg != FPR1L_enc) { 2455 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2456 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2457 pop = 0xD8; 2458 } 2459 emit_opcode( cbuf, 0xDD ); 2460 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2461 %} 2462 2463 2464 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2465 // load dst in FPR0 2466 emit_opcode( cbuf, 0xD9 ); 2467 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2468 if ($src$$reg != FPR1L_enc) { 2469 // fincstp 2470 emit_opcode (cbuf, 0xD9); 2471 emit_opcode (cbuf, 0xF7); 2472 // swap src with FPR1: 2473 // FXCH FPR1 with src 2474 emit_opcode(cbuf, 0xD9); 2475 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2476 // fdecstp 2477 emit_opcode (cbuf, 0xD9); 2478 emit_opcode (cbuf, 0xF6); 2479 } 2480 %} 2481 2482 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2483 MacroAssembler _masm(&cbuf); 2484 __ subptr(rsp, 8); 2485 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2486 __ fld_d(Address(rsp, 0)); 2487 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2488 __ fld_d(Address(rsp, 0)); 2489 %} 2490 2491 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2492 MacroAssembler _masm(&cbuf); 2493 __ subptr(rsp, 4); 2494 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2495 __ fld_s(Address(rsp, 0)); 2496 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2497 __ fld_s(Address(rsp, 0)); 2498 %} 2499 2500 enc_class Push_ResultD(regD dst) %{ 2501 MacroAssembler _masm(&cbuf); 2502 __ fstp_d(Address(rsp, 0)); 2503 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2504 __ addptr(rsp, 8); 2505 %} 2506 2507 enc_class Push_ResultF(regF dst, immI d8) %{ 2508 MacroAssembler _masm(&cbuf); 2509 __ fstp_s(Address(rsp, 0)); 2510 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2511 __ addptr(rsp, $d8$$constant); 2512 %} 2513 2514 enc_class Push_SrcD(regD src) %{ 2515 MacroAssembler _masm(&cbuf); 2516 __ subptr(rsp, 8); 2517 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2518 __ fld_d(Address(rsp, 0)); 2519 %} 2520 2521 enc_class push_stack_temp_qword() %{ 2522 MacroAssembler _masm(&cbuf); 2523 __ subptr(rsp, 8); 2524 %} 2525 2526 enc_class pop_stack_temp_qword() %{ 2527 MacroAssembler _masm(&cbuf); 2528 __ addptr(rsp, 8); 2529 %} 2530 2531 enc_class push_xmm_to_fpr1(regD src) %{ 2532 MacroAssembler _masm(&cbuf); 2533 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2534 __ fld_d(Address(rsp, 0)); 2535 %} 2536 2537 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2538 if ($src$$reg != FPR1L_enc) { 2539 // fincstp 2540 emit_opcode (cbuf, 0xD9); 2541 emit_opcode (cbuf, 0xF7); 2542 // FXCH FPR1 with src 2543 emit_opcode(cbuf, 0xD9); 2544 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2545 // fdecstp 2546 emit_opcode (cbuf, 0xD9); 2547 emit_opcode (cbuf, 0xF6); 2548 } 2549 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2550 // // FSTP FPR$dst$$reg 2551 // emit_opcode( cbuf, 0xDD ); 2552 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2553 %} 2554 2555 enc_class fnstsw_sahf_skip_parity() %{ 2556 // fnstsw ax 2557 emit_opcode( cbuf, 0xDF ); 2558 emit_opcode( cbuf, 0xE0 ); 2559 // sahf 2560 emit_opcode( cbuf, 0x9E ); 2561 // jnp ::skip 2562 emit_opcode( cbuf, 0x7B ); 2563 emit_opcode( cbuf, 0x05 ); 2564 %} 2565 2566 enc_class emitModDPR() %{ 2567 // fprem must be iterative 2568 // :: loop 2569 // fprem 2570 emit_opcode( cbuf, 0xD9 ); 2571 emit_opcode( cbuf, 0xF8 ); 2572 // wait 2573 emit_opcode( cbuf, 0x9b ); 2574 // fnstsw ax 2575 emit_opcode( cbuf, 0xDF ); 2576 emit_opcode( cbuf, 0xE0 ); 2577 // sahf 2578 emit_opcode( cbuf, 0x9E ); 2579 // jp ::loop 2580 emit_opcode( cbuf, 0x0F ); 2581 emit_opcode( cbuf, 0x8A ); 2582 emit_opcode( cbuf, 0xF4 ); 2583 emit_opcode( cbuf, 0xFF ); 2584 emit_opcode( cbuf, 0xFF ); 2585 emit_opcode( cbuf, 0xFF ); 2586 %} 2587 2588 enc_class fpu_flags() %{ 2589 // fnstsw_ax 2590 emit_opcode( cbuf, 0xDF); 2591 emit_opcode( cbuf, 0xE0); 2592 // test ax,0x0400 2593 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2594 emit_opcode( cbuf, 0xA9 ); 2595 emit_d16 ( cbuf, 0x0400 ); 2596 // // // This sequence works, but stalls for 12-16 cycles on PPro 2597 // // test rax,0x0400 2598 // emit_opcode( cbuf, 0xA9 ); 2599 // emit_d32 ( cbuf, 0x00000400 ); 2600 // 2601 // jz exit (no unordered comparison) 2602 emit_opcode( cbuf, 0x74 ); 2603 emit_d8 ( cbuf, 0x02 ); 2604 // mov ah,1 - treat as LT case (set carry flag) 2605 emit_opcode( cbuf, 0xB4 ); 2606 emit_d8 ( cbuf, 0x01 ); 2607 // sahf 2608 emit_opcode( cbuf, 0x9E); 2609 %} 2610 2611 enc_class cmpF_P6_fixup() %{ 2612 // Fixup the integer flags in case comparison involved a NaN 2613 // 2614 // JNP exit (no unordered comparison, P-flag is set by NaN) 2615 emit_opcode( cbuf, 0x7B ); 2616 emit_d8 ( cbuf, 0x03 ); 2617 // MOV AH,1 - treat as LT case (set carry flag) 2618 emit_opcode( cbuf, 0xB4 ); 2619 emit_d8 ( cbuf, 0x01 ); 2620 // SAHF 2621 emit_opcode( cbuf, 0x9E); 2622 // NOP // target for branch to avoid branch to branch 2623 emit_opcode( cbuf, 0x90); 2624 %} 2625 2626 // fnstsw_ax(); 2627 // sahf(); 2628 // movl(dst, nan_result); 2629 // jcc(Assembler::parity, exit); 2630 // movl(dst, less_result); 2631 // jcc(Assembler::below, exit); 2632 // movl(dst, equal_result); 2633 // jcc(Assembler::equal, exit); 2634 // movl(dst, greater_result); 2635 2636 // less_result = 1; 2637 // greater_result = -1; 2638 // equal_result = 0; 2639 // nan_result = -1; 2640 2641 enc_class CmpF_Result(rRegI dst) %{ 2642 // fnstsw_ax(); 2643 emit_opcode( cbuf, 0xDF); 2644 emit_opcode( cbuf, 0xE0); 2645 // sahf 2646 emit_opcode( cbuf, 0x9E); 2647 // movl(dst, nan_result); 2648 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2649 emit_d32( cbuf, -1 ); 2650 // jcc(Assembler::parity, exit); 2651 emit_opcode( cbuf, 0x7A ); 2652 emit_d8 ( cbuf, 0x13 ); 2653 // movl(dst, less_result); 2654 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2655 emit_d32( cbuf, -1 ); 2656 // jcc(Assembler::below, exit); 2657 emit_opcode( cbuf, 0x72 ); 2658 emit_d8 ( cbuf, 0x0C ); 2659 // movl(dst, equal_result); 2660 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2661 emit_d32( cbuf, 0 ); 2662 // jcc(Assembler::equal, exit); 2663 emit_opcode( cbuf, 0x74 ); 2664 emit_d8 ( cbuf, 0x05 ); 2665 // movl(dst, greater_result); 2666 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2667 emit_d32( cbuf, 1 ); 2668 %} 2669 2670 2671 // Compare the longs and set flags 2672 // BROKEN! Do Not use as-is 2673 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2674 // CMP $src1.hi,$src2.hi 2675 emit_opcode( cbuf, 0x3B ); 2676 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2677 // JNE,s done 2678 emit_opcode(cbuf,0x75); 2679 emit_d8(cbuf, 2 ); 2680 // CMP $src1.lo,$src2.lo 2681 emit_opcode( cbuf, 0x3B ); 2682 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2683 // done: 2684 %} 2685 2686 enc_class convert_int_long( regL dst, rRegI src ) %{ 2687 // mov $dst.lo,$src 2688 int dst_encoding = $dst$$reg; 2689 int src_encoding = $src$$reg; 2690 encode_Copy( cbuf, dst_encoding , src_encoding ); 2691 // mov $dst.hi,$src 2692 encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2693 // sar $dst.hi,31 2694 emit_opcode( cbuf, 0xC1 ); 2695 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2696 emit_d8(cbuf, 0x1F ); 2697 %} 2698 2699 enc_class convert_long_double( eRegL src ) %{ 2700 // push $src.hi 2701 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2702 // push $src.lo 2703 emit_opcode(cbuf, 0x50+$src$$reg ); 2704 // fild 64-bits at [SP] 2705 emit_opcode(cbuf,0xdf); 2706 emit_d8(cbuf, 0x6C); 2707 emit_d8(cbuf, 0x24); 2708 emit_d8(cbuf, 0x00); 2709 // pop stack 2710 emit_opcode(cbuf, 0x83); // add SP, #8 2711 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2712 emit_d8(cbuf, 0x8); 2713 %} 2714 2715 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2716 // IMUL EDX:EAX,$src1 2717 emit_opcode( cbuf, 0xF7 ); 2718 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2719 // SAR EDX,$cnt-32 2720 int shift_count = ((int)$cnt$$constant) - 32; 2721 if (shift_count > 0) { 2722 emit_opcode(cbuf, 0xC1); 2723 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2724 emit_d8(cbuf, shift_count); 2725 } 2726 %} 2727 2728 // this version doesn't have add sp, 8 2729 enc_class convert_long_double2( eRegL src ) %{ 2730 // push $src.hi 2731 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2732 // push $src.lo 2733 emit_opcode(cbuf, 0x50+$src$$reg ); 2734 // fild 64-bits at [SP] 2735 emit_opcode(cbuf,0xdf); 2736 emit_d8(cbuf, 0x6C); 2737 emit_d8(cbuf, 0x24); 2738 emit_d8(cbuf, 0x00); 2739 %} 2740 2741 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2742 // Basic idea: long = (long)int * (long)int 2743 // IMUL EDX:EAX, src 2744 emit_opcode( cbuf, 0xF7 ); 2745 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2746 %} 2747 2748 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2749 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2750 // MUL EDX:EAX, src 2751 emit_opcode( cbuf, 0xF7 ); 2752 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2753 %} 2754 2755 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2756 // Basic idea: lo(result) = lo(x_lo * y_lo) 2757 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2758 // MOV $tmp,$src.lo 2759 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2760 // IMUL $tmp,EDX 2761 emit_opcode( cbuf, 0x0F ); 2762 emit_opcode( cbuf, 0xAF ); 2763 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2764 // MOV EDX,$src.hi 2765 encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2766 // IMUL EDX,EAX 2767 emit_opcode( cbuf, 0x0F ); 2768 emit_opcode( cbuf, 0xAF ); 2769 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2770 // ADD $tmp,EDX 2771 emit_opcode( cbuf, 0x03 ); 2772 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2773 // MUL EDX:EAX,$src.lo 2774 emit_opcode( cbuf, 0xF7 ); 2775 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2776 // ADD EDX,ESI 2777 emit_opcode( cbuf, 0x03 ); 2778 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2779 %} 2780 2781 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2782 // Basic idea: lo(result) = lo(src * y_lo) 2783 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2784 // IMUL $tmp,EDX,$src 2785 emit_opcode( cbuf, 0x6B ); 2786 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2787 emit_d8( cbuf, (int)$src$$constant ); 2788 // MOV EDX,$src 2789 emit_opcode(cbuf, 0xB8 + EDX_enc); 2790 emit_d32( cbuf, (int)$src$$constant ); 2791 // MUL EDX:EAX,EDX 2792 emit_opcode( cbuf, 0xF7 ); 2793 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2794 // ADD EDX,ESI 2795 emit_opcode( cbuf, 0x03 ); 2796 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2797 %} 2798 2799 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2800 // PUSH src1.hi 2801 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2802 // PUSH src1.lo 2803 emit_opcode(cbuf, 0x50+$src1$$reg ); 2804 // PUSH src2.hi 2805 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2806 // PUSH src2.lo 2807 emit_opcode(cbuf, 0x50+$src2$$reg ); 2808 // CALL directly to the runtime 2809 MacroAssembler _masm(&cbuf); 2810 cbuf.set_insts_mark(); 2811 emit_opcode(cbuf,0xE8); // Call into runtime 2812 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2813 __ post_call_nop(); 2814 // Restore stack 2815 emit_opcode(cbuf, 0x83); // add SP, #framesize 2816 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2817 emit_d8(cbuf, 4*4); 2818 %} 2819 2820 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2821 // PUSH src1.hi 2822 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2823 // PUSH src1.lo 2824 emit_opcode(cbuf, 0x50+$src1$$reg ); 2825 // PUSH src2.hi 2826 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2827 // PUSH src2.lo 2828 emit_opcode(cbuf, 0x50+$src2$$reg ); 2829 // CALL directly to the runtime 2830 MacroAssembler _masm(&cbuf); 2831 cbuf.set_insts_mark(); 2832 emit_opcode(cbuf,0xE8); // Call into runtime 2833 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2834 __ post_call_nop(); 2835 // Restore stack 2836 emit_opcode(cbuf, 0x83); // add SP, #framesize 2837 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2838 emit_d8(cbuf, 4*4); 2839 %} 2840 2841 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2842 // MOV $tmp,$src.lo 2843 emit_opcode(cbuf, 0x8B); 2844 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2845 // OR $tmp,$src.hi 2846 emit_opcode(cbuf, 0x0B); 2847 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2848 %} 2849 2850 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2851 // CMP $src1.lo,$src2.lo 2852 emit_opcode( cbuf, 0x3B ); 2853 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2854 // JNE,s skip 2855 emit_cc(cbuf, 0x70, 0x5); 2856 emit_d8(cbuf,2); 2857 // CMP $src1.hi,$src2.hi 2858 emit_opcode( cbuf, 0x3B ); 2859 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2860 %} 2861 2862 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2863 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2864 emit_opcode( cbuf, 0x3B ); 2865 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2866 // MOV $tmp,$src1.hi 2867 emit_opcode( cbuf, 0x8B ); 2868 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2869 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2870 emit_opcode( cbuf, 0x1B ); 2871 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2872 %} 2873 2874 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2875 // XOR $tmp,$tmp 2876 emit_opcode(cbuf,0x33); // XOR 2877 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2878 // CMP $tmp,$src.lo 2879 emit_opcode( cbuf, 0x3B ); 2880 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2881 // SBB $tmp,$src.hi 2882 emit_opcode( cbuf, 0x1B ); 2883 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2884 %} 2885 2886 // Sniff, sniff... smells like Gnu Superoptimizer 2887 enc_class neg_long( eRegL dst ) %{ 2888 emit_opcode(cbuf,0xF7); // NEG hi 2889 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2890 emit_opcode(cbuf,0xF7); // NEG lo 2891 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2892 emit_opcode(cbuf,0x83); // SBB hi,0 2893 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2894 emit_d8 (cbuf,0 ); 2895 %} 2896 2897 enc_class enc_pop_rdx() %{ 2898 emit_opcode(cbuf,0x5A); 2899 %} 2900 2901 enc_class enc_rethrow() %{ 2902 MacroAssembler _masm(&cbuf); 2903 cbuf.set_insts_mark(); 2904 emit_opcode(cbuf, 0xE9); // jmp entry 2905 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2906 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2907 __ post_call_nop(); 2908 %} 2909 2910 2911 // Convert a double to an int. Java semantics require we do complex 2912 // manglelations in the corner cases. So we set the rounding mode to 2913 // 'zero', store the darned double down as an int, and reset the 2914 // rounding mode to 'nearest'. The hardware throws an exception which 2915 // patches up the correct value directly to the stack. 2916 enc_class DPR2I_encoding( regDPR src ) %{ 2917 // Flip to round-to-zero mode. We attempted to allow invalid-op 2918 // exceptions here, so that a NAN or other corner-case value will 2919 // thrown an exception (but normal values get converted at full speed). 2920 // However, I2C adapters and other float-stack manglers leave pending 2921 // invalid-op exceptions hanging. We would have to clear them before 2922 // enabling them and that is more expensive than just testing for the 2923 // invalid value Intel stores down in the corner cases. 2924 emit_opcode(cbuf,0xD9); // FLDCW trunc 2925 emit_opcode(cbuf,0x2D); 2926 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2927 // Allocate a word 2928 emit_opcode(cbuf,0x83); // SUB ESP,4 2929 emit_opcode(cbuf,0xEC); 2930 emit_d8(cbuf,0x04); 2931 // Encoding assumes a double has been pushed into FPR0. 2932 // Store down the double as an int, popping the FPU stack 2933 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2934 emit_opcode(cbuf,0x1C); 2935 emit_d8(cbuf,0x24); 2936 // Restore the rounding mode; mask the exception 2937 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2938 emit_opcode(cbuf,0x2D); 2939 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2940 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2941 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2942 2943 // Load the converted int; adjust CPU stack 2944 emit_opcode(cbuf,0x58); // POP EAX 2945 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2946 emit_d32 (cbuf,0x80000000); // 0x80000000 2947 emit_opcode(cbuf,0x75); // JNE around_slow_call 2948 emit_d8 (cbuf,0x07); // Size of slow_call 2949 // Push src onto stack slow-path 2950 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2951 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2952 // CALL directly to the runtime 2953 MacroAssembler _masm(&cbuf); 2954 cbuf.set_insts_mark(); 2955 emit_opcode(cbuf,0xE8); // Call into runtime 2956 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2957 __ post_call_nop(); 2958 // Carry on here... 2959 %} 2960 2961 enc_class DPR2L_encoding( regDPR src ) %{ 2962 emit_opcode(cbuf,0xD9); // FLDCW trunc 2963 emit_opcode(cbuf,0x2D); 2964 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2965 // Allocate a word 2966 emit_opcode(cbuf,0x83); // SUB ESP,8 2967 emit_opcode(cbuf,0xEC); 2968 emit_d8(cbuf,0x08); 2969 // Encoding assumes a double has been pushed into FPR0. 2970 // Store down the double as a long, popping the FPU stack 2971 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2972 emit_opcode(cbuf,0x3C); 2973 emit_d8(cbuf,0x24); 2974 // Restore the rounding mode; mask the exception 2975 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2976 emit_opcode(cbuf,0x2D); 2977 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2978 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2979 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2980 2981 // Load the converted int; adjust CPU stack 2982 emit_opcode(cbuf,0x58); // POP EAX 2983 emit_opcode(cbuf,0x5A); // POP EDX 2984 emit_opcode(cbuf,0x81); // CMP EDX,imm 2985 emit_d8 (cbuf,0xFA); // rdx 2986 emit_d32 (cbuf,0x80000000); // 0x80000000 2987 emit_opcode(cbuf,0x75); // JNE around_slow_call 2988 emit_d8 (cbuf,0x07+4); // Size of slow_call 2989 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2990 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2991 emit_opcode(cbuf,0x75); // JNE around_slow_call 2992 emit_d8 (cbuf,0x07); // Size of slow_call 2993 // Push src onto stack slow-path 2994 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2995 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2996 // CALL directly to the runtime 2997 MacroAssembler _masm(&cbuf); 2998 cbuf.set_insts_mark(); 2999 emit_opcode(cbuf,0xE8); // Call into runtime 3000 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3001 __ post_call_nop(); 3002 // Carry on here... 3003 %} 3004 3005 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3006 // Operand was loaded from memory into fp ST (stack top) 3007 // FMUL ST,$src /* D8 C8+i */ 3008 emit_opcode(cbuf, 0xD8); 3009 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3010 %} 3011 3012 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3013 // FADDP ST,src2 /* D8 C0+i */ 3014 emit_opcode(cbuf, 0xD8); 3015 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3016 //could use FADDP src2,fpST /* DE C0+i */ 3017 %} 3018 3019 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3020 // FADDP src2,ST /* DE C0+i */ 3021 emit_opcode(cbuf, 0xDE); 3022 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3023 %} 3024 3025 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3026 // Operand has been loaded into fp ST (stack top) 3027 // FSUB ST,$src1 3028 emit_opcode(cbuf, 0xD8); 3029 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3030 3031 // FDIV 3032 emit_opcode(cbuf, 0xD8); 3033 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3034 %} 3035 3036 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3037 // Operand was loaded from memory into fp ST (stack top) 3038 // FADD ST,$src /* D8 C0+i */ 3039 emit_opcode(cbuf, 0xD8); 3040 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3041 3042 // FMUL ST,src2 /* D8 C*+i */ 3043 emit_opcode(cbuf, 0xD8); 3044 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3045 %} 3046 3047 3048 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3049 // Operand was loaded from memory into fp ST (stack top) 3050 // FADD ST,$src /* D8 C0+i */ 3051 emit_opcode(cbuf, 0xD8); 3052 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3053 3054 // FMULP src2,ST /* DE C8+i */ 3055 emit_opcode(cbuf, 0xDE); 3056 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3057 %} 3058 3059 // Atomically load the volatile long 3060 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3061 emit_opcode(cbuf,0xDF); 3062 int rm_byte_opcode = 0x05; 3063 int base = $mem$$base; 3064 int index = $mem$$index; 3065 int scale = $mem$$scale; 3066 int displace = $mem$$disp; 3067 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3068 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3069 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3070 %} 3071 3072 // Volatile Store Long. Must be atomic, so move it into 3073 // the FP TOS and then do a 64-bit FIST. Has to probe the 3074 // target address before the store (for null-ptr checks) 3075 // so the memory operand is used twice in the encoding. 3076 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3077 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3078 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3079 emit_opcode(cbuf,0xDF); 3080 int rm_byte_opcode = 0x07; 3081 int base = $mem$$base; 3082 int index = $mem$$index; 3083 int scale = $mem$$scale; 3084 int displace = $mem$$disp; 3085 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3086 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3087 %} 3088 3089 %} 3090 3091 3092 //----------FRAME-------------------------------------------------------------- 3093 // Definition of frame structure and management information. 3094 // 3095 // S T A C K L A Y O U T Allocators stack-slot number 3096 // | (to get allocators register number 3097 // G Owned by | | v add OptoReg::stack0()) 3098 // r CALLER | | 3099 // o | +--------+ pad to even-align allocators stack-slot 3100 // w V | pad0 | numbers; owned by CALLER 3101 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3102 // h ^ | in | 5 3103 // | | args | 4 Holes in incoming args owned by SELF 3104 // | | | | 3 3105 // | | +--------+ 3106 // V | | old out| Empty on Intel, window on Sparc 3107 // | old |preserve| Must be even aligned. 3108 // | SP-+--------+----> Matcher::_old_SP, even aligned 3109 // | | in | 3 area for Intel ret address 3110 // Owned by |preserve| Empty on Sparc. 3111 // SELF +--------+ 3112 // | | pad2 | 2 pad to align old SP 3113 // | +--------+ 1 3114 // | | locks | 0 3115 // | +--------+----> OptoReg::stack0(), even aligned 3116 // | | pad1 | 11 pad to align new SP 3117 // | +--------+ 3118 // | | | 10 3119 // | | spills | 9 spills 3120 // V | | 8 (pad0 slot for callee) 3121 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3122 // ^ | out | 7 3123 // | | args | 6 Holes in outgoing args owned by CALLEE 3124 // Owned by +--------+ 3125 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3126 // | new |preserve| Must be even-aligned. 3127 // | SP-+--------+----> Matcher::_new_SP, even aligned 3128 // | | | 3129 // 3130 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3131 // known from SELF's arguments and the Java calling convention. 3132 // Region 6-7 is determined per call site. 3133 // Note 2: If the calling convention leaves holes in the incoming argument 3134 // area, those holes are owned by SELF. Holes in the outgoing area 3135 // are owned by the CALLEE. Holes should not be necessary in the 3136 // incoming area, as the Java calling convention is completely under 3137 // the control of the AD file. Doubles can be sorted and packed to 3138 // avoid holes. Holes in the outgoing arguments may be necessary for 3139 // varargs C calling conventions. 3140 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3141 // even aligned with pad0 as needed. 3142 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3143 // region 6-11 is even aligned; it may be padded out more so that 3144 // the region from SP to FP meets the minimum stack alignment. 3145 3146 frame %{ 3147 // These three registers define part of the calling convention 3148 // between compiled code and the interpreter. 3149 inline_cache_reg(EAX); // Inline Cache Register 3150 3151 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3152 cisc_spilling_operand_name(indOffset32); 3153 3154 // Number of stack slots consumed by locking an object 3155 sync_stack_slots(1); 3156 3157 // Compiled code's Frame Pointer 3158 frame_pointer(ESP); 3159 // Interpreter stores its frame pointer in a register which is 3160 // stored to the stack by I2CAdaptors. 3161 // I2CAdaptors convert from interpreted java to compiled java. 3162 interpreter_frame_pointer(EBP); 3163 3164 // Stack alignment requirement 3165 // Alignment size in bytes (128-bit -> 16 bytes) 3166 stack_alignment(StackAlignmentInBytes); 3167 3168 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3169 // for calls to C. Supports the var-args backing area for register parms. 3170 varargs_C_out_slots_killed(0); 3171 3172 // The after-PROLOG location of the return address. Location of 3173 // return address specifies a type (REG or STACK) and a number 3174 // representing the register number (i.e. - use a register name) or 3175 // stack slot. 3176 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3177 // Otherwise, it is above the locks and verification slot and alignment word 3178 return_addr(STACK - 1 + 3179 align_up((Compile::current()->in_preserve_stack_slots() + 3180 Compile::current()->fixed_slots()), 3181 stack_alignment_in_slots())); 3182 3183 // Location of C & interpreter return values 3184 c_return_value %{ 3185 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3186 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3187 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3188 3189 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3190 // that C functions return float and double results in XMM0. 3191 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3192 return OptoRegPair(XMM0b_num,XMM0_num); 3193 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3194 return OptoRegPair(OptoReg::Bad,XMM0_num); 3195 3196 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3197 %} 3198 3199 // Location of return values 3200 return_value %{ 3201 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3202 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3203 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3204 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3205 return OptoRegPair(XMM0b_num,XMM0_num); 3206 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3207 return OptoRegPair(OptoReg::Bad,XMM0_num); 3208 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3209 %} 3210 3211 %} 3212 3213 //----------ATTRIBUTES--------------------------------------------------------- 3214 //----------Operand Attributes------------------------------------------------- 3215 op_attrib op_cost(0); // Required cost attribute 3216 3217 //----------Instruction Attributes--------------------------------------------- 3218 ins_attrib ins_cost(100); // Required cost attribute 3219 ins_attrib ins_size(8); // Required size attribute (in bits) 3220 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3221 // non-matching short branch variant of some 3222 // long branch? 3223 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3224 // specifies the alignment that some part of the instruction (not 3225 // necessarily the start) requires. If > 1, a compute_padding() 3226 // function must be provided for the instruction 3227 3228 //----------OPERANDS----------------------------------------------------------- 3229 // Operand definitions must precede instruction definitions for correct parsing 3230 // in the ADLC because operands constitute user defined types which are used in 3231 // instruction definitions. 3232 3233 //----------Simple Operands---------------------------------------------------- 3234 // Immediate Operands 3235 // Integer Immediate 3236 operand immI() %{ 3237 match(ConI); 3238 3239 op_cost(10); 3240 format %{ %} 3241 interface(CONST_INTER); 3242 %} 3243 3244 // Constant for test vs zero 3245 operand immI_0() %{ 3246 predicate(n->get_int() == 0); 3247 match(ConI); 3248 3249 op_cost(0); 3250 format %{ %} 3251 interface(CONST_INTER); 3252 %} 3253 3254 // Constant for increment 3255 operand immI_1() %{ 3256 predicate(n->get_int() == 1); 3257 match(ConI); 3258 3259 op_cost(0); 3260 format %{ %} 3261 interface(CONST_INTER); 3262 %} 3263 3264 // Constant for decrement 3265 operand immI_M1() %{ 3266 predicate(n->get_int() == -1); 3267 match(ConI); 3268 3269 op_cost(0); 3270 format %{ %} 3271 interface(CONST_INTER); 3272 %} 3273 3274 // Valid scale values for addressing modes 3275 operand immI2() %{ 3276 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3277 match(ConI); 3278 3279 format %{ %} 3280 interface(CONST_INTER); 3281 %} 3282 3283 operand immI8() %{ 3284 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3285 match(ConI); 3286 3287 op_cost(5); 3288 format %{ %} 3289 interface(CONST_INTER); 3290 %} 3291 3292 operand immU8() %{ 3293 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3294 match(ConI); 3295 3296 op_cost(5); 3297 format %{ %} 3298 interface(CONST_INTER); 3299 %} 3300 3301 operand immI16() %{ 3302 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3303 match(ConI); 3304 3305 op_cost(10); 3306 format %{ %} 3307 interface(CONST_INTER); 3308 %} 3309 3310 // Int Immediate non-negative 3311 operand immU31() 3312 %{ 3313 predicate(n->get_int() >= 0); 3314 match(ConI); 3315 3316 op_cost(0); 3317 format %{ %} 3318 interface(CONST_INTER); 3319 %} 3320 3321 // Constant for long shifts 3322 operand immI_32() %{ 3323 predicate( n->get_int() == 32 ); 3324 match(ConI); 3325 3326 op_cost(0); 3327 format %{ %} 3328 interface(CONST_INTER); 3329 %} 3330 3331 operand immI_1_31() %{ 3332 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3333 match(ConI); 3334 3335 op_cost(0); 3336 format %{ %} 3337 interface(CONST_INTER); 3338 %} 3339 3340 operand immI_32_63() %{ 3341 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3342 match(ConI); 3343 op_cost(0); 3344 3345 format %{ %} 3346 interface(CONST_INTER); 3347 %} 3348 3349 operand immI_2() %{ 3350 predicate( n->get_int() == 2 ); 3351 match(ConI); 3352 3353 op_cost(0); 3354 format %{ %} 3355 interface(CONST_INTER); 3356 %} 3357 3358 operand immI_3() %{ 3359 predicate( n->get_int() == 3 ); 3360 match(ConI); 3361 3362 op_cost(0); 3363 format %{ %} 3364 interface(CONST_INTER); 3365 %} 3366 3367 operand immI_4() 3368 %{ 3369 predicate(n->get_int() == 4); 3370 match(ConI); 3371 3372 op_cost(0); 3373 format %{ %} 3374 interface(CONST_INTER); 3375 %} 3376 3377 operand immI_8() 3378 %{ 3379 predicate(n->get_int() == 8); 3380 match(ConI); 3381 3382 op_cost(0); 3383 format %{ %} 3384 interface(CONST_INTER); 3385 %} 3386 3387 // Pointer Immediate 3388 operand immP() %{ 3389 match(ConP); 3390 3391 op_cost(10); 3392 format %{ %} 3393 interface(CONST_INTER); 3394 %} 3395 3396 // nullptr Pointer Immediate 3397 operand immP0() %{ 3398 predicate( n->get_ptr() == 0 ); 3399 match(ConP); 3400 op_cost(0); 3401 3402 format %{ %} 3403 interface(CONST_INTER); 3404 %} 3405 3406 // Long Immediate 3407 operand immL() %{ 3408 match(ConL); 3409 3410 op_cost(20); 3411 format %{ %} 3412 interface(CONST_INTER); 3413 %} 3414 3415 // Long Immediate zero 3416 operand immL0() %{ 3417 predicate( n->get_long() == 0L ); 3418 match(ConL); 3419 op_cost(0); 3420 3421 format %{ %} 3422 interface(CONST_INTER); 3423 %} 3424 3425 // Long Immediate zero 3426 operand immL_M1() %{ 3427 predicate( n->get_long() == -1L ); 3428 match(ConL); 3429 op_cost(0); 3430 3431 format %{ %} 3432 interface(CONST_INTER); 3433 %} 3434 3435 // Long immediate from 0 to 127. 3436 // Used for a shorter form of long mul by 10. 3437 operand immL_127() %{ 3438 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3439 match(ConL); 3440 op_cost(0); 3441 3442 format %{ %} 3443 interface(CONST_INTER); 3444 %} 3445 3446 // Long Immediate: low 32-bit mask 3447 operand immL_32bits() %{ 3448 predicate(n->get_long() == 0xFFFFFFFFL); 3449 match(ConL); 3450 op_cost(0); 3451 3452 format %{ %} 3453 interface(CONST_INTER); 3454 %} 3455 3456 // Long Immediate: low 32-bit mask 3457 operand immL32() %{ 3458 predicate(n->get_long() == (int)(n->get_long())); 3459 match(ConL); 3460 op_cost(20); 3461 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 //Double Immediate zero 3467 operand immDPR0() %{ 3468 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3469 // bug that generates code such that NaNs compare equal to 0.0 3470 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3471 match(ConD); 3472 3473 op_cost(5); 3474 format %{ %} 3475 interface(CONST_INTER); 3476 %} 3477 3478 // Double Immediate one 3479 operand immDPR1() %{ 3480 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3481 match(ConD); 3482 3483 op_cost(5); 3484 format %{ %} 3485 interface(CONST_INTER); 3486 %} 3487 3488 // Double Immediate 3489 operand immDPR() %{ 3490 predicate(UseSSE<=1); 3491 match(ConD); 3492 3493 op_cost(5); 3494 format %{ %} 3495 interface(CONST_INTER); 3496 %} 3497 3498 operand immD() %{ 3499 predicate(UseSSE>=2); 3500 match(ConD); 3501 3502 op_cost(5); 3503 format %{ %} 3504 interface(CONST_INTER); 3505 %} 3506 3507 // Double Immediate zero 3508 operand immD0() %{ 3509 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3510 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3511 // compare equal to -0.0. 3512 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3513 match(ConD); 3514 3515 format %{ %} 3516 interface(CONST_INTER); 3517 %} 3518 3519 // Float Immediate zero 3520 operand immFPR0() %{ 3521 predicate(UseSSE == 0 && n->getf() == 0.0F); 3522 match(ConF); 3523 3524 op_cost(5); 3525 format %{ %} 3526 interface(CONST_INTER); 3527 %} 3528 3529 // Float Immediate one 3530 operand immFPR1() %{ 3531 predicate(UseSSE == 0 && n->getf() == 1.0F); 3532 match(ConF); 3533 3534 op_cost(5); 3535 format %{ %} 3536 interface(CONST_INTER); 3537 %} 3538 3539 // Float Immediate 3540 operand immFPR() %{ 3541 predicate( UseSSE == 0 ); 3542 match(ConF); 3543 3544 op_cost(5); 3545 format %{ %} 3546 interface(CONST_INTER); 3547 %} 3548 3549 // Float Immediate 3550 operand immF() %{ 3551 predicate(UseSSE >= 1); 3552 match(ConF); 3553 3554 op_cost(5); 3555 format %{ %} 3556 interface(CONST_INTER); 3557 %} 3558 3559 // Float Immediate zero. Zero and not -0.0 3560 operand immF0() %{ 3561 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3562 match(ConF); 3563 3564 op_cost(5); 3565 format %{ %} 3566 interface(CONST_INTER); 3567 %} 3568 3569 // Immediates for special shifts (sign extend) 3570 3571 // Constants for increment 3572 operand immI_16() %{ 3573 predicate( n->get_int() == 16 ); 3574 match(ConI); 3575 3576 format %{ %} 3577 interface(CONST_INTER); 3578 %} 3579 3580 operand immI_24() %{ 3581 predicate( n->get_int() == 24 ); 3582 match(ConI); 3583 3584 format %{ %} 3585 interface(CONST_INTER); 3586 %} 3587 3588 // Constant for byte-wide masking 3589 operand immI_255() %{ 3590 predicate( n->get_int() == 255 ); 3591 match(ConI); 3592 3593 format %{ %} 3594 interface(CONST_INTER); 3595 %} 3596 3597 // Constant for short-wide masking 3598 operand immI_65535() %{ 3599 predicate(n->get_int() == 65535); 3600 match(ConI); 3601 3602 format %{ %} 3603 interface(CONST_INTER); 3604 %} 3605 3606 operand kReg() 3607 %{ 3608 constraint(ALLOC_IN_RC(vectmask_reg)); 3609 match(RegVectMask); 3610 format %{%} 3611 interface(REG_INTER); 3612 %} 3613 3614 operand kReg_K1() 3615 %{ 3616 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3617 match(RegVectMask); 3618 format %{%} 3619 interface(REG_INTER); 3620 %} 3621 3622 operand kReg_K2() 3623 %{ 3624 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3625 match(RegVectMask); 3626 format %{%} 3627 interface(REG_INTER); 3628 %} 3629 3630 // Special Registers 3631 operand kReg_K3() 3632 %{ 3633 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3634 match(RegVectMask); 3635 format %{%} 3636 interface(REG_INTER); 3637 %} 3638 3639 operand kReg_K4() 3640 %{ 3641 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3642 match(RegVectMask); 3643 format %{%} 3644 interface(REG_INTER); 3645 %} 3646 3647 operand kReg_K5() 3648 %{ 3649 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3650 match(RegVectMask); 3651 format %{%} 3652 interface(REG_INTER); 3653 %} 3654 3655 operand kReg_K6() 3656 %{ 3657 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3658 match(RegVectMask); 3659 format %{%} 3660 interface(REG_INTER); 3661 %} 3662 3663 // Special Registers 3664 operand kReg_K7() 3665 %{ 3666 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3667 match(RegVectMask); 3668 format %{%} 3669 interface(REG_INTER); 3670 %} 3671 3672 // Register Operands 3673 // Integer Register 3674 operand rRegI() %{ 3675 constraint(ALLOC_IN_RC(int_reg)); 3676 match(RegI); 3677 match(xRegI); 3678 match(eAXRegI); 3679 match(eBXRegI); 3680 match(eCXRegI); 3681 match(eDXRegI); 3682 match(eDIRegI); 3683 match(eSIRegI); 3684 3685 format %{ %} 3686 interface(REG_INTER); 3687 %} 3688 3689 // Subset of Integer Register 3690 operand xRegI(rRegI reg) %{ 3691 constraint(ALLOC_IN_RC(int_x_reg)); 3692 match(reg); 3693 match(eAXRegI); 3694 match(eBXRegI); 3695 match(eCXRegI); 3696 match(eDXRegI); 3697 3698 format %{ %} 3699 interface(REG_INTER); 3700 %} 3701 3702 // Special Registers 3703 operand eAXRegI(xRegI reg) %{ 3704 constraint(ALLOC_IN_RC(eax_reg)); 3705 match(reg); 3706 match(rRegI); 3707 3708 format %{ "EAX" %} 3709 interface(REG_INTER); 3710 %} 3711 3712 // Special Registers 3713 operand eBXRegI(xRegI reg) %{ 3714 constraint(ALLOC_IN_RC(ebx_reg)); 3715 match(reg); 3716 match(rRegI); 3717 3718 format %{ "EBX" %} 3719 interface(REG_INTER); 3720 %} 3721 3722 operand eCXRegI(xRegI reg) %{ 3723 constraint(ALLOC_IN_RC(ecx_reg)); 3724 match(reg); 3725 match(rRegI); 3726 3727 format %{ "ECX" %} 3728 interface(REG_INTER); 3729 %} 3730 3731 operand eDXRegI(xRegI reg) %{ 3732 constraint(ALLOC_IN_RC(edx_reg)); 3733 match(reg); 3734 match(rRegI); 3735 3736 format %{ "EDX" %} 3737 interface(REG_INTER); 3738 %} 3739 3740 operand eDIRegI(xRegI reg) %{ 3741 constraint(ALLOC_IN_RC(edi_reg)); 3742 match(reg); 3743 match(rRegI); 3744 3745 format %{ "EDI" %} 3746 interface(REG_INTER); 3747 %} 3748 3749 operand naxRegI() %{ 3750 constraint(ALLOC_IN_RC(nax_reg)); 3751 match(RegI); 3752 match(eCXRegI); 3753 match(eDXRegI); 3754 match(eSIRegI); 3755 match(eDIRegI); 3756 3757 format %{ %} 3758 interface(REG_INTER); 3759 %} 3760 3761 operand nadxRegI() %{ 3762 constraint(ALLOC_IN_RC(nadx_reg)); 3763 match(RegI); 3764 match(eBXRegI); 3765 match(eCXRegI); 3766 match(eSIRegI); 3767 match(eDIRegI); 3768 3769 format %{ %} 3770 interface(REG_INTER); 3771 %} 3772 3773 operand ncxRegI() %{ 3774 constraint(ALLOC_IN_RC(ncx_reg)); 3775 match(RegI); 3776 match(eAXRegI); 3777 match(eDXRegI); 3778 match(eSIRegI); 3779 match(eDIRegI); 3780 3781 format %{ %} 3782 interface(REG_INTER); 3783 %} 3784 3785 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3786 // // 3787 operand eSIRegI(xRegI reg) %{ 3788 constraint(ALLOC_IN_RC(esi_reg)); 3789 match(reg); 3790 match(rRegI); 3791 3792 format %{ "ESI" %} 3793 interface(REG_INTER); 3794 %} 3795 3796 // Pointer Register 3797 operand anyRegP() %{ 3798 constraint(ALLOC_IN_RC(any_reg)); 3799 match(RegP); 3800 match(eAXRegP); 3801 match(eBXRegP); 3802 match(eCXRegP); 3803 match(eDIRegP); 3804 match(eRegP); 3805 3806 format %{ %} 3807 interface(REG_INTER); 3808 %} 3809 3810 operand eRegP() %{ 3811 constraint(ALLOC_IN_RC(int_reg)); 3812 match(RegP); 3813 match(eAXRegP); 3814 match(eBXRegP); 3815 match(eCXRegP); 3816 match(eDIRegP); 3817 3818 format %{ %} 3819 interface(REG_INTER); 3820 %} 3821 3822 operand rRegP() %{ 3823 constraint(ALLOC_IN_RC(int_reg)); 3824 match(RegP); 3825 match(eAXRegP); 3826 match(eBXRegP); 3827 match(eCXRegP); 3828 match(eDIRegP); 3829 3830 format %{ %} 3831 interface(REG_INTER); 3832 %} 3833 3834 // On windows95, EBP is not safe to use for implicit null tests. 3835 operand eRegP_no_EBP() %{ 3836 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3837 match(RegP); 3838 match(eAXRegP); 3839 match(eBXRegP); 3840 match(eCXRegP); 3841 match(eDIRegP); 3842 3843 op_cost(100); 3844 format %{ %} 3845 interface(REG_INTER); 3846 %} 3847 3848 operand naxRegP() %{ 3849 constraint(ALLOC_IN_RC(nax_reg)); 3850 match(RegP); 3851 match(eBXRegP); 3852 match(eDXRegP); 3853 match(eCXRegP); 3854 match(eSIRegP); 3855 match(eDIRegP); 3856 3857 format %{ %} 3858 interface(REG_INTER); 3859 %} 3860 3861 operand nabxRegP() %{ 3862 constraint(ALLOC_IN_RC(nabx_reg)); 3863 match(RegP); 3864 match(eCXRegP); 3865 match(eDXRegP); 3866 match(eSIRegP); 3867 match(eDIRegP); 3868 3869 format %{ %} 3870 interface(REG_INTER); 3871 %} 3872 3873 operand pRegP() %{ 3874 constraint(ALLOC_IN_RC(p_reg)); 3875 match(RegP); 3876 match(eBXRegP); 3877 match(eDXRegP); 3878 match(eSIRegP); 3879 match(eDIRegP); 3880 3881 format %{ %} 3882 interface(REG_INTER); 3883 %} 3884 3885 // Special Registers 3886 // Return a pointer value 3887 operand eAXRegP(eRegP reg) %{ 3888 constraint(ALLOC_IN_RC(eax_reg)); 3889 match(reg); 3890 format %{ "EAX" %} 3891 interface(REG_INTER); 3892 %} 3893 3894 // Used in AtomicAdd 3895 operand eBXRegP(eRegP reg) %{ 3896 constraint(ALLOC_IN_RC(ebx_reg)); 3897 match(reg); 3898 format %{ "EBX" %} 3899 interface(REG_INTER); 3900 %} 3901 3902 // Tail-call (interprocedural jump) to interpreter 3903 operand eCXRegP(eRegP reg) %{ 3904 constraint(ALLOC_IN_RC(ecx_reg)); 3905 match(reg); 3906 format %{ "ECX" %} 3907 interface(REG_INTER); 3908 %} 3909 3910 operand eDXRegP(eRegP reg) %{ 3911 constraint(ALLOC_IN_RC(edx_reg)); 3912 match(reg); 3913 format %{ "EDX" %} 3914 interface(REG_INTER); 3915 %} 3916 3917 operand eSIRegP(eRegP reg) %{ 3918 constraint(ALLOC_IN_RC(esi_reg)); 3919 match(reg); 3920 format %{ "ESI" %} 3921 interface(REG_INTER); 3922 %} 3923 3924 // Used in rep stosw 3925 operand eDIRegP(eRegP reg) %{ 3926 constraint(ALLOC_IN_RC(edi_reg)); 3927 match(reg); 3928 format %{ "EDI" %} 3929 interface(REG_INTER); 3930 %} 3931 3932 operand eRegL() %{ 3933 constraint(ALLOC_IN_RC(long_reg)); 3934 match(RegL); 3935 match(eADXRegL); 3936 3937 format %{ %} 3938 interface(REG_INTER); 3939 %} 3940 3941 operand eADXRegL( eRegL reg ) %{ 3942 constraint(ALLOC_IN_RC(eadx_reg)); 3943 match(reg); 3944 3945 format %{ "EDX:EAX" %} 3946 interface(REG_INTER); 3947 %} 3948 3949 operand eBCXRegL( eRegL reg ) %{ 3950 constraint(ALLOC_IN_RC(ebcx_reg)); 3951 match(reg); 3952 3953 format %{ "EBX:ECX" %} 3954 interface(REG_INTER); 3955 %} 3956 3957 operand eBDPRegL( eRegL reg ) %{ 3958 constraint(ALLOC_IN_RC(ebpd_reg)); 3959 match(reg); 3960 3961 format %{ "EBP:EDI" %} 3962 interface(REG_INTER); 3963 %} 3964 // Special case for integer high multiply 3965 operand eADXRegL_low_only() %{ 3966 constraint(ALLOC_IN_RC(eadx_reg)); 3967 match(RegL); 3968 3969 format %{ "EAX" %} 3970 interface(REG_INTER); 3971 %} 3972 3973 // Flags register, used as output of compare instructions 3974 operand rFlagsReg() %{ 3975 constraint(ALLOC_IN_RC(int_flags)); 3976 match(RegFlags); 3977 3978 format %{ "EFLAGS" %} 3979 interface(REG_INTER); 3980 %} 3981 3982 // Flags register, used as output of compare instructions 3983 operand eFlagsReg() %{ 3984 constraint(ALLOC_IN_RC(int_flags)); 3985 match(RegFlags); 3986 3987 format %{ "EFLAGS" %} 3988 interface(REG_INTER); 3989 %} 3990 3991 // Flags register, used as output of FLOATING POINT compare instructions 3992 operand eFlagsRegU() %{ 3993 constraint(ALLOC_IN_RC(int_flags)); 3994 match(RegFlags); 3995 3996 format %{ "EFLAGS_U" %} 3997 interface(REG_INTER); 3998 %} 3999 4000 operand eFlagsRegUCF() %{ 4001 constraint(ALLOC_IN_RC(int_flags)); 4002 match(RegFlags); 4003 predicate(false); 4004 4005 format %{ "EFLAGS_U_CF" %} 4006 interface(REG_INTER); 4007 %} 4008 4009 // Condition Code Register used by long compare 4010 operand flagsReg_long_LTGE() %{ 4011 constraint(ALLOC_IN_RC(int_flags)); 4012 match(RegFlags); 4013 format %{ "FLAGS_LTGE" %} 4014 interface(REG_INTER); 4015 %} 4016 operand flagsReg_long_EQNE() %{ 4017 constraint(ALLOC_IN_RC(int_flags)); 4018 match(RegFlags); 4019 format %{ "FLAGS_EQNE" %} 4020 interface(REG_INTER); 4021 %} 4022 operand flagsReg_long_LEGT() %{ 4023 constraint(ALLOC_IN_RC(int_flags)); 4024 match(RegFlags); 4025 format %{ "FLAGS_LEGT" %} 4026 interface(REG_INTER); 4027 %} 4028 4029 // Condition Code Register used by unsigned long compare 4030 operand flagsReg_ulong_LTGE() %{ 4031 constraint(ALLOC_IN_RC(int_flags)); 4032 match(RegFlags); 4033 format %{ "FLAGS_U_LTGE" %} 4034 interface(REG_INTER); 4035 %} 4036 operand flagsReg_ulong_EQNE() %{ 4037 constraint(ALLOC_IN_RC(int_flags)); 4038 match(RegFlags); 4039 format %{ "FLAGS_U_EQNE" %} 4040 interface(REG_INTER); 4041 %} 4042 operand flagsReg_ulong_LEGT() %{ 4043 constraint(ALLOC_IN_RC(int_flags)); 4044 match(RegFlags); 4045 format %{ "FLAGS_U_LEGT" %} 4046 interface(REG_INTER); 4047 %} 4048 4049 // Float register operands 4050 operand regDPR() %{ 4051 predicate( UseSSE < 2 ); 4052 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4053 match(RegD); 4054 match(regDPR1); 4055 match(regDPR2); 4056 format %{ %} 4057 interface(REG_INTER); 4058 %} 4059 4060 operand regDPR1(regDPR reg) %{ 4061 predicate( UseSSE < 2 ); 4062 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4063 match(reg); 4064 format %{ "FPR1" %} 4065 interface(REG_INTER); 4066 %} 4067 4068 operand regDPR2(regDPR reg) %{ 4069 predicate( UseSSE < 2 ); 4070 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4071 match(reg); 4072 format %{ "FPR2" %} 4073 interface(REG_INTER); 4074 %} 4075 4076 operand regnotDPR1(regDPR reg) %{ 4077 predicate( UseSSE < 2 ); 4078 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4079 match(reg); 4080 format %{ %} 4081 interface(REG_INTER); 4082 %} 4083 4084 // Float register operands 4085 operand regFPR() %{ 4086 predicate( UseSSE < 2 ); 4087 constraint(ALLOC_IN_RC(fp_flt_reg)); 4088 match(RegF); 4089 match(regFPR1); 4090 format %{ %} 4091 interface(REG_INTER); 4092 %} 4093 4094 // Float register operands 4095 operand regFPR1(regFPR reg) %{ 4096 predicate( UseSSE < 2 ); 4097 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4098 match(reg); 4099 format %{ "FPR1" %} 4100 interface(REG_INTER); 4101 %} 4102 4103 // XMM Float register operands 4104 operand regF() %{ 4105 predicate( UseSSE>=1 ); 4106 constraint(ALLOC_IN_RC(float_reg_legacy)); 4107 match(RegF); 4108 format %{ %} 4109 interface(REG_INTER); 4110 %} 4111 4112 operand legRegF() %{ 4113 predicate( UseSSE>=1 ); 4114 constraint(ALLOC_IN_RC(float_reg_legacy)); 4115 match(RegF); 4116 format %{ %} 4117 interface(REG_INTER); 4118 %} 4119 4120 // Float register operands 4121 operand vlRegF() %{ 4122 constraint(ALLOC_IN_RC(float_reg_vl)); 4123 match(RegF); 4124 4125 format %{ %} 4126 interface(REG_INTER); 4127 %} 4128 4129 // XMM Double register operands 4130 operand regD() %{ 4131 predicate( UseSSE>=2 ); 4132 constraint(ALLOC_IN_RC(double_reg_legacy)); 4133 match(RegD); 4134 format %{ %} 4135 interface(REG_INTER); 4136 %} 4137 4138 // Double register operands 4139 operand legRegD() %{ 4140 predicate( UseSSE>=2 ); 4141 constraint(ALLOC_IN_RC(double_reg_legacy)); 4142 match(RegD); 4143 format %{ %} 4144 interface(REG_INTER); 4145 %} 4146 4147 operand vlRegD() %{ 4148 constraint(ALLOC_IN_RC(double_reg_vl)); 4149 match(RegD); 4150 4151 format %{ %} 4152 interface(REG_INTER); 4153 %} 4154 4155 //----------Memory Operands---------------------------------------------------- 4156 // Direct Memory Operand 4157 operand direct(immP addr) %{ 4158 match(addr); 4159 4160 format %{ "[$addr]" %} 4161 interface(MEMORY_INTER) %{ 4162 base(0xFFFFFFFF); 4163 index(0x4); 4164 scale(0x0); 4165 disp($addr); 4166 %} 4167 %} 4168 4169 // Indirect Memory Operand 4170 operand indirect(eRegP reg) %{ 4171 constraint(ALLOC_IN_RC(int_reg)); 4172 match(reg); 4173 4174 format %{ "[$reg]" %} 4175 interface(MEMORY_INTER) %{ 4176 base($reg); 4177 index(0x4); 4178 scale(0x0); 4179 disp(0x0); 4180 %} 4181 %} 4182 4183 // Indirect Memory Plus Short Offset Operand 4184 operand indOffset8(eRegP reg, immI8 off) %{ 4185 match(AddP reg off); 4186 4187 format %{ "[$reg + $off]" %} 4188 interface(MEMORY_INTER) %{ 4189 base($reg); 4190 index(0x4); 4191 scale(0x0); 4192 disp($off); 4193 %} 4194 %} 4195 4196 // Indirect Memory Plus Long Offset Operand 4197 operand indOffset32(eRegP reg, immI off) %{ 4198 match(AddP reg off); 4199 4200 format %{ "[$reg + $off]" %} 4201 interface(MEMORY_INTER) %{ 4202 base($reg); 4203 index(0x4); 4204 scale(0x0); 4205 disp($off); 4206 %} 4207 %} 4208 4209 // Indirect Memory Plus Long Offset Operand 4210 operand indOffset32X(rRegI reg, immP off) %{ 4211 match(AddP off reg); 4212 4213 format %{ "[$reg + $off]" %} 4214 interface(MEMORY_INTER) %{ 4215 base($reg); 4216 index(0x4); 4217 scale(0x0); 4218 disp($off); 4219 %} 4220 %} 4221 4222 // Indirect Memory Plus Index Register Plus Offset Operand 4223 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4224 match(AddP (AddP reg ireg) off); 4225 4226 op_cost(10); 4227 format %{"[$reg + $off + $ireg]" %} 4228 interface(MEMORY_INTER) %{ 4229 base($reg); 4230 index($ireg); 4231 scale(0x0); 4232 disp($off); 4233 %} 4234 %} 4235 4236 // Indirect Memory Plus Index Register Plus Offset Operand 4237 operand indIndex(eRegP reg, rRegI ireg) %{ 4238 match(AddP reg ireg); 4239 4240 op_cost(10); 4241 format %{"[$reg + $ireg]" %} 4242 interface(MEMORY_INTER) %{ 4243 base($reg); 4244 index($ireg); 4245 scale(0x0); 4246 disp(0x0); 4247 %} 4248 %} 4249 4250 // // ------------------------------------------------------------------------- 4251 // // 486 architecture doesn't support "scale * index + offset" with out a base 4252 // // ------------------------------------------------------------------------- 4253 // // Scaled Memory Operands 4254 // // Indirect Memory Times Scale Plus Offset Operand 4255 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4256 // match(AddP off (LShiftI ireg scale)); 4257 // 4258 // op_cost(10); 4259 // format %{"[$off + $ireg << $scale]" %} 4260 // interface(MEMORY_INTER) %{ 4261 // base(0x4); 4262 // index($ireg); 4263 // scale($scale); 4264 // disp($off); 4265 // %} 4266 // %} 4267 4268 // Indirect Memory Times Scale Plus Index Register 4269 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4270 match(AddP reg (LShiftI ireg scale)); 4271 4272 op_cost(10); 4273 format %{"[$reg + $ireg << $scale]" %} 4274 interface(MEMORY_INTER) %{ 4275 base($reg); 4276 index($ireg); 4277 scale($scale); 4278 disp(0x0); 4279 %} 4280 %} 4281 4282 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4283 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4284 match(AddP (AddP reg (LShiftI ireg scale)) off); 4285 4286 op_cost(10); 4287 format %{"[$reg + $off + $ireg << $scale]" %} 4288 interface(MEMORY_INTER) %{ 4289 base($reg); 4290 index($ireg); 4291 scale($scale); 4292 disp($off); 4293 %} 4294 %} 4295 4296 //----------Load Long Memory Operands------------------------------------------ 4297 // The load-long idiom will use it's address expression again after loading 4298 // the first word of the long. If the load-long destination overlaps with 4299 // registers used in the addressing expression, the 2nd half will be loaded 4300 // from a clobbered address. Fix this by requiring that load-long use 4301 // address registers that do not overlap with the load-long target. 4302 4303 // load-long support 4304 operand load_long_RegP() %{ 4305 constraint(ALLOC_IN_RC(esi_reg)); 4306 match(RegP); 4307 match(eSIRegP); 4308 op_cost(100); 4309 format %{ %} 4310 interface(REG_INTER); 4311 %} 4312 4313 // Indirect Memory Operand Long 4314 operand load_long_indirect(load_long_RegP reg) %{ 4315 constraint(ALLOC_IN_RC(esi_reg)); 4316 match(reg); 4317 4318 format %{ "[$reg]" %} 4319 interface(MEMORY_INTER) %{ 4320 base($reg); 4321 index(0x4); 4322 scale(0x0); 4323 disp(0x0); 4324 %} 4325 %} 4326 4327 // Indirect Memory Plus Long Offset Operand 4328 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4329 match(AddP reg off); 4330 4331 format %{ "[$reg + $off]" %} 4332 interface(MEMORY_INTER) %{ 4333 base($reg); 4334 index(0x4); 4335 scale(0x0); 4336 disp($off); 4337 %} 4338 %} 4339 4340 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4341 4342 4343 //----------Special Memory Operands-------------------------------------------- 4344 // Stack Slot Operand - This operand is used for loading and storing temporary 4345 // values on the stack where a match requires a value to 4346 // flow through memory. 4347 operand stackSlotP(sRegP reg) %{ 4348 constraint(ALLOC_IN_RC(stack_slots)); 4349 // No match rule because this operand is only generated in matching 4350 format %{ "[$reg]" %} 4351 interface(MEMORY_INTER) %{ 4352 base(0x4); // ESP 4353 index(0x4); // No Index 4354 scale(0x0); // No Scale 4355 disp($reg); // Stack Offset 4356 %} 4357 %} 4358 4359 operand stackSlotI(sRegI reg) %{ 4360 constraint(ALLOC_IN_RC(stack_slots)); 4361 // No match rule because this operand is only generated in matching 4362 format %{ "[$reg]" %} 4363 interface(MEMORY_INTER) %{ 4364 base(0x4); // ESP 4365 index(0x4); // No Index 4366 scale(0x0); // No Scale 4367 disp($reg); // Stack Offset 4368 %} 4369 %} 4370 4371 operand stackSlotF(sRegF reg) %{ 4372 constraint(ALLOC_IN_RC(stack_slots)); 4373 // No match rule because this operand is only generated in matching 4374 format %{ "[$reg]" %} 4375 interface(MEMORY_INTER) %{ 4376 base(0x4); // ESP 4377 index(0x4); // No Index 4378 scale(0x0); // No Scale 4379 disp($reg); // Stack Offset 4380 %} 4381 %} 4382 4383 operand stackSlotD(sRegD reg) %{ 4384 constraint(ALLOC_IN_RC(stack_slots)); 4385 // No match rule because this operand is only generated in matching 4386 format %{ "[$reg]" %} 4387 interface(MEMORY_INTER) %{ 4388 base(0x4); // ESP 4389 index(0x4); // No Index 4390 scale(0x0); // No Scale 4391 disp($reg); // Stack Offset 4392 %} 4393 %} 4394 4395 operand stackSlotL(sRegL reg) %{ 4396 constraint(ALLOC_IN_RC(stack_slots)); 4397 // No match rule because this operand is only generated in matching 4398 format %{ "[$reg]" %} 4399 interface(MEMORY_INTER) %{ 4400 base(0x4); // ESP 4401 index(0x4); // No Index 4402 scale(0x0); // No Scale 4403 disp($reg); // Stack Offset 4404 %} 4405 %} 4406 4407 //----------Conditional Branch Operands---------------------------------------- 4408 // Comparison Op - This is the operation of the comparison, and is limited to 4409 // the following set of codes: 4410 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4411 // 4412 // Other attributes of the comparison, such as unsignedness, are specified 4413 // by the comparison instruction that sets a condition code flags register. 4414 // That result is represented by a flags operand whose subtype is appropriate 4415 // to the unsignedness (etc.) of the comparison. 4416 // 4417 // Later, the instruction which matches both the Comparison Op (a Bool) and 4418 // the flags (produced by the Cmp) specifies the coding of the comparison op 4419 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4420 4421 // Comparison Code 4422 operand cmpOp() %{ 4423 match(Bool); 4424 4425 format %{ "" %} 4426 interface(COND_INTER) %{ 4427 equal(0x4, "e"); 4428 not_equal(0x5, "ne"); 4429 less(0xC, "l"); 4430 greater_equal(0xD, "ge"); 4431 less_equal(0xE, "le"); 4432 greater(0xF, "g"); 4433 overflow(0x0, "o"); 4434 no_overflow(0x1, "no"); 4435 %} 4436 %} 4437 4438 // Comparison Code, unsigned compare. Used by FP also, with 4439 // C2 (unordered) turned into GT or LT already. The other bits 4440 // C0 and C3 are turned into Carry & Zero flags. 4441 operand cmpOpU() %{ 4442 match(Bool); 4443 4444 format %{ "" %} 4445 interface(COND_INTER) %{ 4446 equal(0x4, "e"); 4447 not_equal(0x5, "ne"); 4448 less(0x2, "b"); 4449 greater_equal(0x3, "nb"); 4450 less_equal(0x6, "be"); 4451 greater(0x7, "nbe"); 4452 overflow(0x0, "o"); 4453 no_overflow(0x1, "no"); 4454 %} 4455 %} 4456 4457 // Floating comparisons that don't require any fixup for the unordered case 4458 operand cmpOpUCF() %{ 4459 match(Bool); 4460 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4461 n->as_Bool()->_test._test == BoolTest::ge || 4462 n->as_Bool()->_test._test == BoolTest::le || 4463 n->as_Bool()->_test._test == BoolTest::gt); 4464 format %{ "" %} 4465 interface(COND_INTER) %{ 4466 equal(0x4, "e"); 4467 not_equal(0x5, "ne"); 4468 less(0x2, "b"); 4469 greater_equal(0x3, "nb"); 4470 less_equal(0x6, "be"); 4471 greater(0x7, "nbe"); 4472 overflow(0x0, "o"); 4473 no_overflow(0x1, "no"); 4474 %} 4475 %} 4476 4477 4478 // Floating comparisons that can be fixed up with extra conditional jumps 4479 operand cmpOpUCF2() %{ 4480 match(Bool); 4481 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4482 n->as_Bool()->_test._test == BoolTest::eq); 4483 format %{ "" %} 4484 interface(COND_INTER) %{ 4485 equal(0x4, "e"); 4486 not_equal(0x5, "ne"); 4487 less(0x2, "b"); 4488 greater_equal(0x3, "nb"); 4489 less_equal(0x6, "be"); 4490 greater(0x7, "nbe"); 4491 overflow(0x0, "o"); 4492 no_overflow(0x1, "no"); 4493 %} 4494 %} 4495 4496 // Comparison Code for FP conditional move 4497 operand cmpOp_fcmov() %{ 4498 match(Bool); 4499 4500 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4501 n->as_Bool()->_test._test != BoolTest::no_overflow); 4502 format %{ "" %} 4503 interface(COND_INTER) %{ 4504 equal (0x0C8); 4505 not_equal (0x1C8); 4506 less (0x0C0); 4507 greater_equal(0x1C0); 4508 less_equal (0x0D0); 4509 greater (0x1D0); 4510 overflow(0x0, "o"); // not really supported by the instruction 4511 no_overflow(0x1, "no"); // not really supported by the instruction 4512 %} 4513 %} 4514 4515 // Comparison Code used in long compares 4516 operand cmpOp_commute() %{ 4517 match(Bool); 4518 4519 format %{ "" %} 4520 interface(COND_INTER) %{ 4521 equal(0x4, "e"); 4522 not_equal(0x5, "ne"); 4523 less(0xF, "g"); 4524 greater_equal(0xE, "le"); 4525 less_equal(0xD, "ge"); 4526 greater(0xC, "l"); 4527 overflow(0x0, "o"); 4528 no_overflow(0x1, "no"); 4529 %} 4530 %} 4531 4532 // Comparison Code used in unsigned long compares 4533 operand cmpOpU_commute() %{ 4534 match(Bool); 4535 4536 format %{ "" %} 4537 interface(COND_INTER) %{ 4538 equal(0x4, "e"); 4539 not_equal(0x5, "ne"); 4540 less(0x7, "nbe"); 4541 greater_equal(0x6, "be"); 4542 less_equal(0x3, "nb"); 4543 greater(0x2, "b"); 4544 overflow(0x0, "o"); 4545 no_overflow(0x1, "no"); 4546 %} 4547 %} 4548 4549 //----------OPERAND CLASSES---------------------------------------------------- 4550 // Operand Classes are groups of operands that are used as to simplify 4551 // instruction definitions by not requiring the AD writer to specify separate 4552 // instructions for every form of operand when the instruction accepts 4553 // multiple operand types with the same basic encoding and format. The classic 4554 // case of this is memory operands. 4555 4556 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4557 indIndex, indIndexScale, indIndexScaleOffset); 4558 4559 // Long memory operations are encoded in 2 instructions and a +4 offset. 4560 // This means some kind of offset is always required and you cannot use 4561 // an oop as the offset (done when working on static globals). 4562 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4563 indIndex, indIndexScale, indIndexScaleOffset); 4564 4565 4566 //----------PIPELINE----------------------------------------------------------- 4567 // Rules which define the behavior of the target architectures pipeline. 4568 pipeline %{ 4569 4570 //----------ATTRIBUTES--------------------------------------------------------- 4571 attributes %{ 4572 variable_size_instructions; // Fixed size instructions 4573 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4574 instruction_unit_size = 1; // An instruction is 1 bytes long 4575 instruction_fetch_unit_size = 16; // The processor fetches one line 4576 instruction_fetch_units = 1; // of 16 bytes 4577 4578 // List of nop instructions 4579 nops( MachNop ); 4580 %} 4581 4582 //----------RESOURCES---------------------------------------------------------- 4583 // Resources are the functional units available to the machine 4584 4585 // Generic P2/P3 pipeline 4586 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4587 // 3 instructions decoded per cycle. 4588 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4589 // 2 ALU op, only ALU0 handles mul/div instructions. 4590 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4591 MS0, MS1, MEM = MS0 | MS1, 4592 BR, FPU, 4593 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4594 4595 //----------PIPELINE DESCRIPTION----------------------------------------------- 4596 // Pipeline Description specifies the stages in the machine's pipeline 4597 4598 // Generic P2/P3 pipeline 4599 pipe_desc(S0, S1, S2, S3, S4, S5); 4600 4601 //----------PIPELINE CLASSES--------------------------------------------------- 4602 // Pipeline Classes describe the stages in which input and output are 4603 // referenced by the hardware pipeline. 4604 4605 // Naming convention: ialu or fpu 4606 // Then: _reg 4607 // Then: _reg if there is a 2nd register 4608 // Then: _long if it's a pair of instructions implementing a long 4609 // Then: _fat if it requires the big decoder 4610 // Or: _mem if it requires the big decoder and a memory unit. 4611 4612 // Integer ALU reg operation 4613 pipe_class ialu_reg(rRegI dst) %{ 4614 single_instruction; 4615 dst : S4(write); 4616 dst : S3(read); 4617 DECODE : S0; // any decoder 4618 ALU : S3; // any alu 4619 %} 4620 4621 // Long ALU reg operation 4622 pipe_class ialu_reg_long(eRegL dst) %{ 4623 instruction_count(2); 4624 dst : S4(write); 4625 dst : S3(read); 4626 DECODE : S0(2); // any 2 decoders 4627 ALU : S3(2); // both alus 4628 %} 4629 4630 // Integer ALU reg operation using big decoder 4631 pipe_class ialu_reg_fat(rRegI dst) %{ 4632 single_instruction; 4633 dst : S4(write); 4634 dst : S3(read); 4635 D0 : S0; // big decoder only 4636 ALU : S3; // any alu 4637 %} 4638 4639 // Long ALU reg operation using big decoder 4640 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4641 instruction_count(2); 4642 dst : S4(write); 4643 dst : S3(read); 4644 D0 : S0(2); // big decoder only; twice 4645 ALU : S3(2); // any 2 alus 4646 %} 4647 4648 // Integer ALU reg-reg operation 4649 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4650 single_instruction; 4651 dst : S4(write); 4652 src : S3(read); 4653 DECODE : S0; // any decoder 4654 ALU : S3; // any alu 4655 %} 4656 4657 // Long ALU reg-reg operation 4658 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4659 instruction_count(2); 4660 dst : S4(write); 4661 src : S3(read); 4662 DECODE : S0(2); // any 2 decoders 4663 ALU : S3(2); // both alus 4664 %} 4665 4666 // Integer ALU reg-reg operation 4667 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4668 single_instruction; 4669 dst : S4(write); 4670 src : S3(read); 4671 D0 : S0; // big decoder only 4672 ALU : S3; // any alu 4673 %} 4674 4675 // Long ALU reg-reg operation 4676 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4677 instruction_count(2); 4678 dst : S4(write); 4679 src : S3(read); 4680 D0 : S0(2); // big decoder only; twice 4681 ALU : S3(2); // both alus 4682 %} 4683 4684 // Integer ALU reg-mem operation 4685 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4686 single_instruction; 4687 dst : S5(write); 4688 mem : S3(read); 4689 D0 : S0; // big decoder only 4690 ALU : S4; // any alu 4691 MEM : S3; // any mem 4692 %} 4693 4694 // Long ALU reg-mem operation 4695 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4696 instruction_count(2); 4697 dst : S5(write); 4698 mem : S3(read); 4699 D0 : S0(2); // big decoder only; twice 4700 ALU : S4(2); // any 2 alus 4701 MEM : S3(2); // both mems 4702 %} 4703 4704 // Integer mem operation (prefetch) 4705 pipe_class ialu_mem(memory mem) 4706 %{ 4707 single_instruction; 4708 mem : S3(read); 4709 D0 : S0; // big decoder only 4710 MEM : S3; // any mem 4711 %} 4712 4713 // Integer Store to Memory 4714 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4715 single_instruction; 4716 mem : S3(read); 4717 src : S5(read); 4718 D0 : S0; // big decoder only 4719 ALU : S4; // any alu 4720 MEM : S3; 4721 %} 4722 4723 // Long Store to Memory 4724 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4725 instruction_count(2); 4726 mem : S3(read); 4727 src : S5(read); 4728 D0 : S0(2); // big decoder only; twice 4729 ALU : S4(2); // any 2 alus 4730 MEM : S3(2); // Both mems 4731 %} 4732 4733 // Integer Store to Memory 4734 pipe_class ialu_mem_imm(memory mem) %{ 4735 single_instruction; 4736 mem : S3(read); 4737 D0 : S0; // big decoder only 4738 ALU : S4; // any alu 4739 MEM : S3; 4740 %} 4741 4742 // Integer ALU0 reg-reg operation 4743 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4744 single_instruction; 4745 dst : S4(write); 4746 src : S3(read); 4747 D0 : S0; // Big decoder only 4748 ALU0 : S3; // only alu0 4749 %} 4750 4751 // Integer ALU0 reg-mem operation 4752 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4753 single_instruction; 4754 dst : S5(write); 4755 mem : S3(read); 4756 D0 : S0; // big decoder only 4757 ALU0 : S4; // ALU0 only 4758 MEM : S3; // any mem 4759 %} 4760 4761 // Integer ALU reg-reg operation 4762 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4763 single_instruction; 4764 cr : S4(write); 4765 src1 : S3(read); 4766 src2 : S3(read); 4767 DECODE : S0; // any decoder 4768 ALU : S3; // any alu 4769 %} 4770 4771 // Integer ALU reg-imm operation 4772 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4773 single_instruction; 4774 cr : S4(write); 4775 src1 : S3(read); 4776 DECODE : S0; // any decoder 4777 ALU : S3; // any alu 4778 %} 4779 4780 // Integer ALU reg-mem operation 4781 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4782 single_instruction; 4783 cr : S4(write); 4784 src1 : S3(read); 4785 src2 : S3(read); 4786 D0 : S0; // big decoder only 4787 ALU : S4; // any alu 4788 MEM : S3; 4789 %} 4790 4791 // Conditional move reg-reg 4792 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4793 instruction_count(4); 4794 y : S4(read); 4795 q : S3(read); 4796 p : S3(read); 4797 DECODE : S0(4); // any decoder 4798 %} 4799 4800 // Conditional move reg-reg 4801 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4802 single_instruction; 4803 dst : S4(write); 4804 src : S3(read); 4805 cr : S3(read); 4806 DECODE : S0; // any decoder 4807 %} 4808 4809 // Conditional move reg-mem 4810 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4811 single_instruction; 4812 dst : S4(write); 4813 src : S3(read); 4814 cr : S3(read); 4815 DECODE : S0; // any decoder 4816 MEM : S3; 4817 %} 4818 4819 // Conditional move reg-reg long 4820 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4821 single_instruction; 4822 dst : S4(write); 4823 src : S3(read); 4824 cr : S3(read); 4825 DECODE : S0(2); // any 2 decoders 4826 %} 4827 4828 // Conditional move double reg-reg 4829 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4830 single_instruction; 4831 dst : S4(write); 4832 src : S3(read); 4833 cr : S3(read); 4834 DECODE : S0; // any decoder 4835 %} 4836 4837 // Float reg-reg operation 4838 pipe_class fpu_reg(regDPR dst) %{ 4839 instruction_count(2); 4840 dst : S3(read); 4841 DECODE : S0(2); // any 2 decoders 4842 FPU : S3; 4843 %} 4844 4845 // Float reg-reg operation 4846 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4847 instruction_count(2); 4848 dst : S4(write); 4849 src : S3(read); 4850 DECODE : S0(2); // any 2 decoders 4851 FPU : S3; 4852 %} 4853 4854 // Float reg-reg operation 4855 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4856 instruction_count(3); 4857 dst : S4(write); 4858 src1 : S3(read); 4859 src2 : S3(read); 4860 DECODE : S0(3); // any 3 decoders 4861 FPU : S3(2); 4862 %} 4863 4864 // Float reg-reg operation 4865 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4866 instruction_count(4); 4867 dst : S4(write); 4868 src1 : S3(read); 4869 src2 : S3(read); 4870 src3 : S3(read); 4871 DECODE : S0(4); // any 3 decoders 4872 FPU : S3(2); 4873 %} 4874 4875 // Float reg-reg operation 4876 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4877 instruction_count(4); 4878 dst : S4(write); 4879 src1 : S3(read); 4880 src2 : S3(read); 4881 src3 : S3(read); 4882 DECODE : S1(3); // any 3 decoders 4883 D0 : S0; // Big decoder only 4884 FPU : S3(2); 4885 MEM : S3; 4886 %} 4887 4888 // Float reg-mem operation 4889 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4890 instruction_count(2); 4891 dst : S5(write); 4892 mem : S3(read); 4893 D0 : S0; // big decoder only 4894 DECODE : S1; // any decoder for FPU POP 4895 FPU : S4; 4896 MEM : S3; // any mem 4897 %} 4898 4899 // Float reg-mem operation 4900 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4901 instruction_count(3); 4902 dst : S5(write); 4903 src1 : S3(read); 4904 mem : S3(read); 4905 D0 : S0; // big decoder only 4906 DECODE : S1(2); // any decoder for FPU POP 4907 FPU : S4; 4908 MEM : S3; // any mem 4909 %} 4910 4911 // Float mem-reg operation 4912 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4913 instruction_count(2); 4914 src : S5(read); 4915 mem : S3(read); 4916 DECODE : S0; // any decoder for FPU PUSH 4917 D0 : S1; // big decoder only 4918 FPU : S4; 4919 MEM : S3; // any mem 4920 %} 4921 4922 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4923 instruction_count(3); 4924 src1 : S3(read); 4925 src2 : S3(read); 4926 mem : S3(read); 4927 DECODE : S0(2); // any decoder for FPU PUSH 4928 D0 : S1; // big decoder only 4929 FPU : S4; 4930 MEM : S3; // any mem 4931 %} 4932 4933 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4934 instruction_count(3); 4935 src1 : S3(read); 4936 src2 : S3(read); 4937 mem : S4(read); 4938 DECODE : S0; // any decoder for FPU PUSH 4939 D0 : S0(2); // big decoder only 4940 FPU : S4; 4941 MEM : S3(2); // any mem 4942 %} 4943 4944 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4945 instruction_count(2); 4946 src1 : S3(read); 4947 dst : S4(read); 4948 D0 : S0(2); // big decoder only 4949 MEM : S3(2); // any mem 4950 %} 4951 4952 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4953 instruction_count(3); 4954 src1 : S3(read); 4955 src2 : S3(read); 4956 dst : S4(read); 4957 D0 : S0(3); // big decoder only 4958 FPU : S4; 4959 MEM : S3(3); // any mem 4960 %} 4961 4962 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4963 instruction_count(3); 4964 src1 : S4(read); 4965 mem : S4(read); 4966 DECODE : S0; // any decoder for FPU PUSH 4967 D0 : S0(2); // big decoder only 4968 FPU : S4; 4969 MEM : S3(2); // any mem 4970 %} 4971 4972 // Float load constant 4973 pipe_class fpu_reg_con(regDPR dst) %{ 4974 instruction_count(2); 4975 dst : S5(write); 4976 D0 : S0; // big decoder only for the load 4977 DECODE : S1; // any decoder for FPU POP 4978 FPU : S4; 4979 MEM : S3; // any mem 4980 %} 4981 4982 // Float load constant 4983 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4984 instruction_count(3); 4985 dst : S5(write); 4986 src : S3(read); 4987 D0 : S0; // big decoder only for the load 4988 DECODE : S1(2); // any decoder for FPU POP 4989 FPU : S4; 4990 MEM : S3; // any mem 4991 %} 4992 4993 // UnConditional branch 4994 pipe_class pipe_jmp( label labl ) %{ 4995 single_instruction; 4996 BR : S3; 4997 %} 4998 4999 // Conditional branch 5000 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5001 single_instruction; 5002 cr : S1(read); 5003 BR : S3; 5004 %} 5005 5006 // Allocation idiom 5007 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5008 instruction_count(1); force_serialization; 5009 fixed_latency(6); 5010 heap_ptr : S3(read); 5011 DECODE : S0(3); 5012 D0 : S2; 5013 MEM : S3; 5014 ALU : S3(2); 5015 dst : S5(write); 5016 BR : S5; 5017 %} 5018 5019 // Generic big/slow expanded idiom 5020 pipe_class pipe_slow( ) %{ 5021 instruction_count(10); multiple_bundles; force_serialization; 5022 fixed_latency(100); 5023 D0 : S0(2); 5024 MEM : S3(2); 5025 %} 5026 5027 // The real do-nothing guy 5028 pipe_class empty( ) %{ 5029 instruction_count(0); 5030 %} 5031 5032 // Define the class for the Nop node 5033 define %{ 5034 MachNop = empty; 5035 %} 5036 5037 %} 5038 5039 //----------INSTRUCTIONS------------------------------------------------------- 5040 // 5041 // match -- States which machine-independent subtree may be replaced 5042 // by this instruction. 5043 // ins_cost -- The estimated cost of this instruction is used by instruction 5044 // selection to identify a minimum cost tree of machine 5045 // instructions that matches a tree of machine-independent 5046 // instructions. 5047 // format -- A string providing the disassembly for this instruction. 5048 // The value of an instruction's operand may be inserted 5049 // by referring to it with a '$' prefix. 5050 // opcode -- Three instruction opcodes may be provided. These are referred 5051 // to within an encode class as $primary, $secondary, and $tertiary 5052 // respectively. The primary opcode is commonly used to 5053 // indicate the type of machine instruction, while secondary 5054 // and tertiary are often used for prefix options or addressing 5055 // modes. 5056 // ins_encode -- A list of encode classes with parameters. The encode class 5057 // name must have been defined in an 'enc_class' specification 5058 // in the encode section of the architecture description. 5059 5060 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 5061 // Load Float 5062 instruct MoveF2LEG(legRegF dst, regF src) %{ 5063 match(Set dst src); 5064 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5065 ins_encode %{ 5066 ShouldNotReachHere(); 5067 %} 5068 ins_pipe( fpu_reg_reg ); 5069 %} 5070 5071 // Load Float 5072 instruct MoveLEG2F(regF dst, legRegF src) %{ 5073 match(Set dst src); 5074 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5075 ins_encode %{ 5076 ShouldNotReachHere(); 5077 %} 5078 ins_pipe( fpu_reg_reg ); 5079 %} 5080 5081 // Load Float 5082 instruct MoveF2VL(vlRegF dst, regF src) %{ 5083 match(Set dst src); 5084 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5085 ins_encode %{ 5086 ShouldNotReachHere(); 5087 %} 5088 ins_pipe( fpu_reg_reg ); 5089 %} 5090 5091 // Load Float 5092 instruct MoveVL2F(regF dst, vlRegF src) %{ 5093 match(Set dst src); 5094 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5095 ins_encode %{ 5096 ShouldNotReachHere(); 5097 %} 5098 ins_pipe( fpu_reg_reg ); 5099 %} 5100 5101 5102 5103 // Load Double 5104 instruct MoveD2LEG(legRegD dst, regD src) %{ 5105 match(Set dst src); 5106 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5107 ins_encode %{ 5108 ShouldNotReachHere(); 5109 %} 5110 ins_pipe( fpu_reg_reg ); 5111 %} 5112 5113 // Load Double 5114 instruct MoveLEG2D(regD dst, legRegD src) %{ 5115 match(Set dst src); 5116 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5117 ins_encode %{ 5118 ShouldNotReachHere(); 5119 %} 5120 ins_pipe( fpu_reg_reg ); 5121 %} 5122 5123 // Load Double 5124 instruct MoveD2VL(vlRegD dst, regD src) %{ 5125 match(Set dst src); 5126 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5127 ins_encode %{ 5128 ShouldNotReachHere(); 5129 %} 5130 ins_pipe( fpu_reg_reg ); 5131 %} 5132 5133 // Load Double 5134 instruct MoveVL2D(regD dst, vlRegD src) %{ 5135 match(Set dst src); 5136 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5137 ins_encode %{ 5138 ShouldNotReachHere(); 5139 %} 5140 ins_pipe( fpu_reg_reg ); 5141 %} 5142 5143 //----------BSWAP-Instruction-------------------------------------------------- 5144 instruct bytes_reverse_int(rRegI dst) %{ 5145 match(Set dst (ReverseBytesI dst)); 5146 5147 format %{ "BSWAP $dst" %} 5148 opcode(0x0F, 0xC8); 5149 ins_encode( OpcP, OpcSReg(dst) ); 5150 ins_pipe( ialu_reg ); 5151 %} 5152 5153 instruct bytes_reverse_long(eRegL dst) %{ 5154 match(Set dst (ReverseBytesL dst)); 5155 5156 format %{ "BSWAP $dst.lo\n\t" 5157 "BSWAP $dst.hi\n\t" 5158 "XCHG $dst.lo $dst.hi" %} 5159 5160 ins_cost(125); 5161 ins_encode( bswap_long_bytes(dst) ); 5162 ins_pipe( ialu_reg_reg); 5163 %} 5164 5165 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5166 match(Set dst (ReverseBytesUS dst)); 5167 effect(KILL cr); 5168 5169 format %{ "BSWAP $dst\n\t" 5170 "SHR $dst,16\n\t" %} 5171 ins_encode %{ 5172 __ bswapl($dst$$Register); 5173 __ shrl($dst$$Register, 16); 5174 %} 5175 ins_pipe( ialu_reg ); 5176 %} 5177 5178 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5179 match(Set dst (ReverseBytesS dst)); 5180 effect(KILL cr); 5181 5182 format %{ "BSWAP $dst\n\t" 5183 "SAR $dst,16\n\t" %} 5184 ins_encode %{ 5185 __ bswapl($dst$$Register); 5186 __ sarl($dst$$Register, 16); 5187 %} 5188 ins_pipe( ialu_reg ); 5189 %} 5190 5191 5192 //---------- Zeros Count Instructions ------------------------------------------ 5193 5194 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5195 predicate(UseCountLeadingZerosInstruction); 5196 match(Set dst (CountLeadingZerosI src)); 5197 effect(KILL cr); 5198 5199 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5200 ins_encode %{ 5201 __ lzcntl($dst$$Register, $src$$Register); 5202 %} 5203 ins_pipe(ialu_reg); 5204 %} 5205 5206 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5207 predicate(!UseCountLeadingZerosInstruction); 5208 match(Set dst (CountLeadingZerosI src)); 5209 effect(KILL cr); 5210 5211 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5212 "JNZ skip\n\t" 5213 "MOV $dst, -1\n" 5214 "skip:\n\t" 5215 "NEG $dst\n\t" 5216 "ADD $dst, 31" %} 5217 ins_encode %{ 5218 Register Rdst = $dst$$Register; 5219 Register Rsrc = $src$$Register; 5220 Label skip; 5221 __ bsrl(Rdst, Rsrc); 5222 __ jccb(Assembler::notZero, skip); 5223 __ movl(Rdst, -1); 5224 __ bind(skip); 5225 __ negl(Rdst); 5226 __ addl(Rdst, BitsPerInt - 1); 5227 %} 5228 ins_pipe(ialu_reg); 5229 %} 5230 5231 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5232 predicate(UseCountLeadingZerosInstruction); 5233 match(Set dst (CountLeadingZerosL src)); 5234 effect(TEMP dst, KILL cr); 5235 5236 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5237 "JNC done\n\t" 5238 "LZCNT $dst, $src.lo\n\t" 5239 "ADD $dst, 32\n" 5240 "done:" %} 5241 ins_encode %{ 5242 Register Rdst = $dst$$Register; 5243 Register Rsrc = $src$$Register; 5244 Label done; 5245 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5246 __ jccb(Assembler::carryClear, done); 5247 __ lzcntl(Rdst, Rsrc); 5248 __ addl(Rdst, BitsPerInt); 5249 __ bind(done); 5250 %} 5251 ins_pipe(ialu_reg); 5252 %} 5253 5254 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5255 predicate(!UseCountLeadingZerosInstruction); 5256 match(Set dst (CountLeadingZerosL src)); 5257 effect(TEMP dst, KILL cr); 5258 5259 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5260 "JZ msw_is_zero\n\t" 5261 "ADD $dst, 32\n\t" 5262 "JMP not_zero\n" 5263 "msw_is_zero:\n\t" 5264 "BSR $dst, $src.lo\n\t" 5265 "JNZ not_zero\n\t" 5266 "MOV $dst, -1\n" 5267 "not_zero:\n\t" 5268 "NEG $dst\n\t" 5269 "ADD $dst, 63\n" %} 5270 ins_encode %{ 5271 Register Rdst = $dst$$Register; 5272 Register Rsrc = $src$$Register; 5273 Label msw_is_zero; 5274 Label not_zero; 5275 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5276 __ jccb(Assembler::zero, msw_is_zero); 5277 __ addl(Rdst, BitsPerInt); 5278 __ jmpb(not_zero); 5279 __ bind(msw_is_zero); 5280 __ bsrl(Rdst, Rsrc); 5281 __ jccb(Assembler::notZero, not_zero); 5282 __ movl(Rdst, -1); 5283 __ bind(not_zero); 5284 __ negl(Rdst); 5285 __ addl(Rdst, BitsPerLong - 1); 5286 %} 5287 ins_pipe(ialu_reg); 5288 %} 5289 5290 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5291 predicate(UseCountTrailingZerosInstruction); 5292 match(Set dst (CountTrailingZerosI src)); 5293 effect(KILL cr); 5294 5295 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5296 ins_encode %{ 5297 __ tzcntl($dst$$Register, $src$$Register); 5298 %} 5299 ins_pipe(ialu_reg); 5300 %} 5301 5302 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5303 predicate(!UseCountTrailingZerosInstruction); 5304 match(Set dst (CountTrailingZerosI src)); 5305 effect(KILL cr); 5306 5307 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5308 "JNZ done\n\t" 5309 "MOV $dst, 32\n" 5310 "done:" %} 5311 ins_encode %{ 5312 Register Rdst = $dst$$Register; 5313 Label done; 5314 __ bsfl(Rdst, $src$$Register); 5315 __ jccb(Assembler::notZero, done); 5316 __ movl(Rdst, BitsPerInt); 5317 __ bind(done); 5318 %} 5319 ins_pipe(ialu_reg); 5320 %} 5321 5322 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5323 predicate(UseCountTrailingZerosInstruction); 5324 match(Set dst (CountTrailingZerosL src)); 5325 effect(TEMP dst, KILL cr); 5326 5327 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5328 "JNC done\n\t" 5329 "TZCNT $dst, $src.hi\n\t" 5330 "ADD $dst, 32\n" 5331 "done:" %} 5332 ins_encode %{ 5333 Register Rdst = $dst$$Register; 5334 Register Rsrc = $src$$Register; 5335 Label done; 5336 __ tzcntl(Rdst, Rsrc); 5337 __ jccb(Assembler::carryClear, done); 5338 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5339 __ addl(Rdst, BitsPerInt); 5340 __ bind(done); 5341 %} 5342 ins_pipe(ialu_reg); 5343 %} 5344 5345 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5346 predicate(!UseCountTrailingZerosInstruction); 5347 match(Set dst (CountTrailingZerosL src)); 5348 effect(TEMP dst, KILL cr); 5349 5350 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5351 "JNZ done\n\t" 5352 "BSF $dst, $src.hi\n\t" 5353 "JNZ msw_not_zero\n\t" 5354 "MOV $dst, 32\n" 5355 "msw_not_zero:\n\t" 5356 "ADD $dst, 32\n" 5357 "done:" %} 5358 ins_encode %{ 5359 Register Rdst = $dst$$Register; 5360 Register Rsrc = $src$$Register; 5361 Label msw_not_zero; 5362 Label done; 5363 __ bsfl(Rdst, Rsrc); 5364 __ jccb(Assembler::notZero, done); 5365 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5366 __ jccb(Assembler::notZero, msw_not_zero); 5367 __ movl(Rdst, BitsPerInt); 5368 __ bind(msw_not_zero); 5369 __ addl(Rdst, BitsPerInt); 5370 __ bind(done); 5371 %} 5372 ins_pipe(ialu_reg); 5373 %} 5374 5375 5376 //---------- Population Count Instructions ------------------------------------- 5377 5378 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5379 predicate(UsePopCountInstruction); 5380 match(Set dst (PopCountI src)); 5381 effect(KILL cr); 5382 5383 format %{ "POPCNT $dst, $src" %} 5384 ins_encode %{ 5385 __ popcntl($dst$$Register, $src$$Register); 5386 %} 5387 ins_pipe(ialu_reg); 5388 %} 5389 5390 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5391 predicate(UsePopCountInstruction); 5392 match(Set dst (PopCountI (LoadI mem))); 5393 effect(KILL cr); 5394 5395 format %{ "POPCNT $dst, $mem" %} 5396 ins_encode %{ 5397 __ popcntl($dst$$Register, $mem$$Address); 5398 %} 5399 ins_pipe(ialu_reg); 5400 %} 5401 5402 // Note: Long.bitCount(long) returns an int. 5403 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5404 predicate(UsePopCountInstruction); 5405 match(Set dst (PopCountL src)); 5406 effect(KILL cr, TEMP tmp, TEMP dst); 5407 5408 format %{ "POPCNT $dst, $src.lo\n\t" 5409 "POPCNT $tmp, $src.hi\n\t" 5410 "ADD $dst, $tmp" %} 5411 ins_encode %{ 5412 __ popcntl($dst$$Register, $src$$Register); 5413 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5414 __ addl($dst$$Register, $tmp$$Register); 5415 %} 5416 ins_pipe(ialu_reg); 5417 %} 5418 5419 // Note: Long.bitCount(long) returns an int. 5420 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5421 predicate(UsePopCountInstruction); 5422 match(Set dst (PopCountL (LoadL mem))); 5423 effect(KILL cr, TEMP tmp, TEMP dst); 5424 5425 format %{ "POPCNT $dst, $mem\n\t" 5426 "POPCNT $tmp, $mem+4\n\t" 5427 "ADD $dst, $tmp" %} 5428 ins_encode %{ 5429 //__ popcntl($dst$$Register, $mem$$Address$$first); 5430 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5431 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5432 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5433 __ addl($dst$$Register, $tmp$$Register); 5434 %} 5435 ins_pipe(ialu_reg); 5436 %} 5437 5438 5439 //----------Load/Store/Move Instructions--------------------------------------- 5440 //----------Load Instructions-------------------------------------------------- 5441 // Load Byte (8bit signed) 5442 instruct loadB(xRegI dst, memory mem) %{ 5443 match(Set dst (LoadB mem)); 5444 5445 ins_cost(125); 5446 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5447 5448 ins_encode %{ 5449 __ movsbl($dst$$Register, $mem$$Address); 5450 %} 5451 5452 ins_pipe(ialu_reg_mem); 5453 %} 5454 5455 // Load Byte (8bit signed) into Long Register 5456 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5457 match(Set dst (ConvI2L (LoadB mem))); 5458 effect(KILL cr); 5459 5460 ins_cost(375); 5461 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5462 "MOV $dst.hi,$dst.lo\n\t" 5463 "SAR $dst.hi,7" %} 5464 5465 ins_encode %{ 5466 __ movsbl($dst$$Register, $mem$$Address); 5467 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5468 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5469 %} 5470 5471 ins_pipe(ialu_reg_mem); 5472 %} 5473 5474 // Load Unsigned Byte (8bit UNsigned) 5475 instruct loadUB(xRegI dst, memory mem) %{ 5476 match(Set dst (LoadUB mem)); 5477 5478 ins_cost(125); 5479 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5480 5481 ins_encode %{ 5482 __ movzbl($dst$$Register, $mem$$Address); 5483 %} 5484 5485 ins_pipe(ialu_reg_mem); 5486 %} 5487 5488 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5489 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5490 match(Set dst (ConvI2L (LoadUB mem))); 5491 effect(KILL cr); 5492 5493 ins_cost(250); 5494 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5495 "XOR $dst.hi,$dst.hi" %} 5496 5497 ins_encode %{ 5498 Register Rdst = $dst$$Register; 5499 __ movzbl(Rdst, $mem$$Address); 5500 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5501 %} 5502 5503 ins_pipe(ialu_reg_mem); 5504 %} 5505 5506 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5507 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5508 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5509 effect(KILL cr); 5510 5511 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5512 "XOR $dst.hi,$dst.hi\n\t" 5513 "AND $dst.lo,right_n_bits($mask, 8)" %} 5514 ins_encode %{ 5515 Register Rdst = $dst$$Register; 5516 __ movzbl(Rdst, $mem$$Address); 5517 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5518 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5519 %} 5520 ins_pipe(ialu_reg_mem); 5521 %} 5522 5523 // Load Short (16bit signed) 5524 instruct loadS(rRegI dst, memory mem) %{ 5525 match(Set dst (LoadS mem)); 5526 5527 ins_cost(125); 5528 format %{ "MOVSX $dst,$mem\t# short" %} 5529 5530 ins_encode %{ 5531 __ movswl($dst$$Register, $mem$$Address); 5532 %} 5533 5534 ins_pipe(ialu_reg_mem); 5535 %} 5536 5537 // Load Short (16 bit signed) to Byte (8 bit signed) 5538 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5539 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5540 5541 ins_cost(125); 5542 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5543 ins_encode %{ 5544 __ movsbl($dst$$Register, $mem$$Address); 5545 %} 5546 ins_pipe(ialu_reg_mem); 5547 %} 5548 5549 // Load Short (16bit signed) into Long Register 5550 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5551 match(Set dst (ConvI2L (LoadS mem))); 5552 effect(KILL cr); 5553 5554 ins_cost(375); 5555 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5556 "MOV $dst.hi,$dst.lo\n\t" 5557 "SAR $dst.hi,15" %} 5558 5559 ins_encode %{ 5560 __ movswl($dst$$Register, $mem$$Address); 5561 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5562 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5563 %} 5564 5565 ins_pipe(ialu_reg_mem); 5566 %} 5567 5568 // Load Unsigned Short/Char (16bit unsigned) 5569 instruct loadUS(rRegI dst, memory mem) %{ 5570 match(Set dst (LoadUS mem)); 5571 5572 ins_cost(125); 5573 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5574 5575 ins_encode %{ 5576 __ movzwl($dst$$Register, $mem$$Address); 5577 %} 5578 5579 ins_pipe(ialu_reg_mem); 5580 %} 5581 5582 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5583 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5584 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5585 5586 ins_cost(125); 5587 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5588 ins_encode %{ 5589 __ movsbl($dst$$Register, $mem$$Address); 5590 %} 5591 ins_pipe(ialu_reg_mem); 5592 %} 5593 5594 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5595 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5596 match(Set dst (ConvI2L (LoadUS mem))); 5597 effect(KILL cr); 5598 5599 ins_cost(250); 5600 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5601 "XOR $dst.hi,$dst.hi" %} 5602 5603 ins_encode %{ 5604 __ movzwl($dst$$Register, $mem$$Address); 5605 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5606 %} 5607 5608 ins_pipe(ialu_reg_mem); 5609 %} 5610 5611 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5612 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5613 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5614 effect(KILL cr); 5615 5616 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5617 "XOR $dst.hi,$dst.hi" %} 5618 ins_encode %{ 5619 Register Rdst = $dst$$Register; 5620 __ movzbl(Rdst, $mem$$Address); 5621 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5622 %} 5623 ins_pipe(ialu_reg_mem); 5624 %} 5625 5626 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5627 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5628 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5629 effect(KILL cr); 5630 5631 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5632 "XOR $dst.hi,$dst.hi\n\t" 5633 "AND $dst.lo,right_n_bits($mask, 16)" %} 5634 ins_encode %{ 5635 Register Rdst = $dst$$Register; 5636 __ movzwl(Rdst, $mem$$Address); 5637 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5638 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5639 %} 5640 ins_pipe(ialu_reg_mem); 5641 %} 5642 5643 // Load Integer 5644 instruct loadI(rRegI dst, memory mem) %{ 5645 match(Set dst (LoadI mem)); 5646 5647 ins_cost(125); 5648 format %{ "MOV $dst,$mem\t# int" %} 5649 5650 ins_encode %{ 5651 __ movl($dst$$Register, $mem$$Address); 5652 %} 5653 5654 ins_pipe(ialu_reg_mem); 5655 %} 5656 5657 // Load Integer (32 bit signed) to Byte (8 bit signed) 5658 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5659 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5660 5661 ins_cost(125); 5662 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5663 ins_encode %{ 5664 __ movsbl($dst$$Register, $mem$$Address); 5665 %} 5666 ins_pipe(ialu_reg_mem); 5667 %} 5668 5669 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5670 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5671 match(Set dst (AndI (LoadI mem) mask)); 5672 5673 ins_cost(125); 5674 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5675 ins_encode %{ 5676 __ movzbl($dst$$Register, $mem$$Address); 5677 %} 5678 ins_pipe(ialu_reg_mem); 5679 %} 5680 5681 // Load Integer (32 bit signed) to Short (16 bit signed) 5682 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5683 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5684 5685 ins_cost(125); 5686 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5687 ins_encode %{ 5688 __ movswl($dst$$Register, $mem$$Address); 5689 %} 5690 ins_pipe(ialu_reg_mem); 5691 %} 5692 5693 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5694 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5695 match(Set dst (AndI (LoadI mem) mask)); 5696 5697 ins_cost(125); 5698 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5699 ins_encode %{ 5700 __ movzwl($dst$$Register, $mem$$Address); 5701 %} 5702 ins_pipe(ialu_reg_mem); 5703 %} 5704 5705 // Load Integer into Long Register 5706 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5707 match(Set dst (ConvI2L (LoadI mem))); 5708 effect(KILL cr); 5709 5710 ins_cost(375); 5711 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5712 "MOV $dst.hi,$dst.lo\n\t" 5713 "SAR $dst.hi,31" %} 5714 5715 ins_encode %{ 5716 __ movl($dst$$Register, $mem$$Address); 5717 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5718 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5719 %} 5720 5721 ins_pipe(ialu_reg_mem); 5722 %} 5723 5724 // Load Integer with mask 0xFF into Long Register 5725 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5726 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5727 effect(KILL cr); 5728 5729 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5730 "XOR $dst.hi,$dst.hi" %} 5731 ins_encode %{ 5732 Register Rdst = $dst$$Register; 5733 __ movzbl(Rdst, $mem$$Address); 5734 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5735 %} 5736 ins_pipe(ialu_reg_mem); 5737 %} 5738 5739 // Load Integer with mask 0xFFFF into Long Register 5740 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5741 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5742 effect(KILL cr); 5743 5744 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5745 "XOR $dst.hi,$dst.hi" %} 5746 ins_encode %{ 5747 Register Rdst = $dst$$Register; 5748 __ movzwl(Rdst, $mem$$Address); 5749 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5750 %} 5751 ins_pipe(ialu_reg_mem); 5752 %} 5753 5754 // Load Integer with 31-bit mask into Long Register 5755 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5756 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5757 effect(KILL cr); 5758 5759 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5760 "XOR $dst.hi,$dst.hi\n\t" 5761 "AND $dst.lo,$mask" %} 5762 ins_encode %{ 5763 Register Rdst = $dst$$Register; 5764 __ movl(Rdst, $mem$$Address); 5765 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5766 __ andl(Rdst, $mask$$constant); 5767 %} 5768 ins_pipe(ialu_reg_mem); 5769 %} 5770 5771 // Load Unsigned Integer into Long Register 5772 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5773 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5774 effect(KILL cr); 5775 5776 ins_cost(250); 5777 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5778 "XOR $dst.hi,$dst.hi" %} 5779 5780 ins_encode %{ 5781 __ movl($dst$$Register, $mem$$Address); 5782 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5783 %} 5784 5785 ins_pipe(ialu_reg_mem); 5786 %} 5787 5788 // Load Long. Cannot clobber address while loading, so restrict address 5789 // register to ESI 5790 instruct loadL(eRegL dst, load_long_memory mem) %{ 5791 predicate(!((LoadLNode*)n)->require_atomic_access()); 5792 match(Set dst (LoadL mem)); 5793 5794 ins_cost(250); 5795 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5796 "MOV $dst.hi,$mem+4" %} 5797 5798 ins_encode %{ 5799 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5800 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5801 __ movl($dst$$Register, Amemlo); 5802 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5803 %} 5804 5805 ins_pipe(ialu_reg_long_mem); 5806 %} 5807 5808 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5809 // then store it down to the stack and reload on the int 5810 // side. 5811 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5812 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5813 match(Set dst (LoadL mem)); 5814 5815 ins_cost(200); 5816 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5817 "FISTp $dst" %} 5818 ins_encode(enc_loadL_volatile(mem,dst)); 5819 ins_pipe( fpu_reg_mem ); 5820 %} 5821 5822 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5823 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5824 match(Set dst (LoadL mem)); 5825 effect(TEMP tmp); 5826 ins_cost(180); 5827 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5828 "MOVSD $dst,$tmp" %} 5829 ins_encode %{ 5830 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5831 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5832 %} 5833 ins_pipe( pipe_slow ); 5834 %} 5835 5836 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5837 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5838 match(Set dst (LoadL mem)); 5839 effect(TEMP tmp); 5840 ins_cost(160); 5841 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5842 "MOVD $dst.lo,$tmp\n\t" 5843 "PSRLQ $tmp,32\n\t" 5844 "MOVD $dst.hi,$tmp" %} 5845 ins_encode %{ 5846 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5847 __ movdl($dst$$Register, $tmp$$XMMRegister); 5848 __ psrlq($tmp$$XMMRegister, 32); 5849 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5850 %} 5851 ins_pipe( pipe_slow ); 5852 %} 5853 5854 // Load Range 5855 instruct loadRange(rRegI dst, memory mem) %{ 5856 match(Set dst (LoadRange mem)); 5857 5858 ins_cost(125); 5859 format %{ "MOV $dst,$mem" %} 5860 opcode(0x8B); 5861 ins_encode( OpcP, RegMem(dst,mem)); 5862 ins_pipe( ialu_reg_mem ); 5863 %} 5864 5865 5866 // Load Pointer 5867 instruct loadP(eRegP dst, memory mem) %{ 5868 match(Set dst (LoadP mem)); 5869 5870 ins_cost(125); 5871 format %{ "MOV $dst,$mem" %} 5872 opcode(0x8B); 5873 ins_encode( OpcP, RegMem(dst,mem)); 5874 ins_pipe( ialu_reg_mem ); 5875 %} 5876 5877 // Load Klass Pointer 5878 instruct loadKlass(eRegP dst, memory mem) %{ 5879 match(Set dst (LoadKlass mem)); 5880 5881 ins_cost(125); 5882 format %{ "MOV $dst,$mem" %} 5883 opcode(0x8B); 5884 ins_encode( OpcP, RegMem(dst,mem)); 5885 ins_pipe( ialu_reg_mem ); 5886 %} 5887 5888 // Load Double 5889 instruct loadDPR(regDPR dst, memory mem) %{ 5890 predicate(UseSSE<=1); 5891 match(Set dst (LoadD mem)); 5892 5893 ins_cost(150); 5894 format %{ "FLD_D ST,$mem\n\t" 5895 "FSTP $dst" %} 5896 opcode(0xDD); /* DD /0 */ 5897 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5898 Pop_Reg_DPR(dst) ); 5899 ins_pipe( fpu_reg_mem ); 5900 %} 5901 5902 // Load Double to XMM 5903 instruct loadD(regD dst, memory mem) %{ 5904 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5905 match(Set dst (LoadD mem)); 5906 ins_cost(145); 5907 format %{ "MOVSD $dst,$mem" %} 5908 ins_encode %{ 5909 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5910 %} 5911 ins_pipe( pipe_slow ); 5912 %} 5913 5914 instruct loadD_partial(regD dst, memory mem) %{ 5915 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5916 match(Set dst (LoadD mem)); 5917 ins_cost(145); 5918 format %{ "MOVLPD $dst,$mem" %} 5919 ins_encode %{ 5920 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5921 %} 5922 ins_pipe( pipe_slow ); 5923 %} 5924 5925 // Load to XMM register (single-precision floating point) 5926 // MOVSS instruction 5927 instruct loadF(regF dst, memory mem) %{ 5928 predicate(UseSSE>=1); 5929 match(Set dst (LoadF mem)); 5930 ins_cost(145); 5931 format %{ "MOVSS $dst,$mem" %} 5932 ins_encode %{ 5933 __ movflt ($dst$$XMMRegister, $mem$$Address); 5934 %} 5935 ins_pipe( pipe_slow ); 5936 %} 5937 5938 // Load Float 5939 instruct loadFPR(regFPR dst, memory mem) %{ 5940 predicate(UseSSE==0); 5941 match(Set dst (LoadF mem)); 5942 5943 ins_cost(150); 5944 format %{ "FLD_S ST,$mem\n\t" 5945 "FSTP $dst" %} 5946 opcode(0xD9); /* D9 /0 */ 5947 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5948 Pop_Reg_FPR(dst) ); 5949 ins_pipe( fpu_reg_mem ); 5950 %} 5951 5952 // Load Effective Address 5953 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5954 match(Set dst mem); 5955 5956 ins_cost(110); 5957 format %{ "LEA $dst,$mem" %} 5958 opcode(0x8D); 5959 ins_encode( OpcP, RegMem(dst,mem)); 5960 ins_pipe( ialu_reg_reg_fat ); 5961 %} 5962 5963 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5964 match(Set dst mem); 5965 5966 ins_cost(110); 5967 format %{ "LEA $dst,$mem" %} 5968 opcode(0x8D); 5969 ins_encode( OpcP, RegMem(dst,mem)); 5970 ins_pipe( ialu_reg_reg_fat ); 5971 %} 5972 5973 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5974 match(Set dst mem); 5975 5976 ins_cost(110); 5977 format %{ "LEA $dst,$mem" %} 5978 opcode(0x8D); 5979 ins_encode( OpcP, RegMem(dst,mem)); 5980 ins_pipe( ialu_reg_reg_fat ); 5981 %} 5982 5983 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5984 match(Set dst mem); 5985 5986 ins_cost(110); 5987 format %{ "LEA $dst,$mem" %} 5988 opcode(0x8D); 5989 ins_encode( OpcP, RegMem(dst,mem)); 5990 ins_pipe( ialu_reg_reg_fat ); 5991 %} 5992 5993 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5994 match(Set dst mem); 5995 5996 ins_cost(110); 5997 format %{ "LEA $dst,$mem" %} 5998 opcode(0x8D); 5999 ins_encode( OpcP, RegMem(dst,mem)); 6000 ins_pipe( ialu_reg_reg_fat ); 6001 %} 6002 6003 // Load Constant 6004 instruct loadConI(rRegI dst, immI src) %{ 6005 match(Set dst src); 6006 6007 format %{ "MOV $dst,$src" %} 6008 ins_encode( LdImmI(dst, src) ); 6009 ins_pipe( ialu_reg_fat ); 6010 %} 6011 6012 // Load Constant zero 6013 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 6014 match(Set dst src); 6015 effect(KILL cr); 6016 6017 ins_cost(50); 6018 format %{ "XOR $dst,$dst" %} 6019 opcode(0x33); /* + rd */ 6020 ins_encode( OpcP, RegReg( dst, dst ) ); 6021 ins_pipe( ialu_reg ); 6022 %} 6023 6024 instruct loadConP(eRegP dst, immP src) %{ 6025 match(Set dst src); 6026 6027 format %{ "MOV $dst,$src" %} 6028 opcode(0xB8); /* + rd */ 6029 ins_encode( LdImmP(dst, src) ); 6030 ins_pipe( ialu_reg_fat ); 6031 %} 6032 6033 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6034 match(Set dst src); 6035 effect(KILL cr); 6036 ins_cost(200); 6037 format %{ "MOV $dst.lo,$src.lo\n\t" 6038 "MOV $dst.hi,$src.hi" %} 6039 opcode(0xB8); 6040 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6041 ins_pipe( ialu_reg_long_fat ); 6042 %} 6043 6044 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6045 match(Set dst src); 6046 effect(KILL cr); 6047 ins_cost(150); 6048 format %{ "XOR $dst.lo,$dst.lo\n\t" 6049 "XOR $dst.hi,$dst.hi" %} 6050 opcode(0x33,0x33); 6051 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6052 ins_pipe( ialu_reg_long ); 6053 %} 6054 6055 // The instruction usage is guarded by predicate in operand immFPR(). 6056 instruct loadConFPR(regFPR dst, immFPR con) %{ 6057 match(Set dst con); 6058 ins_cost(125); 6059 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6060 "FSTP $dst" %} 6061 ins_encode %{ 6062 __ fld_s($constantaddress($con)); 6063 __ fstp_d($dst$$reg); 6064 %} 6065 ins_pipe(fpu_reg_con); 6066 %} 6067 6068 // The instruction usage is guarded by predicate in operand immFPR0(). 6069 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6070 match(Set dst con); 6071 ins_cost(125); 6072 format %{ "FLDZ ST\n\t" 6073 "FSTP $dst" %} 6074 ins_encode %{ 6075 __ fldz(); 6076 __ fstp_d($dst$$reg); 6077 %} 6078 ins_pipe(fpu_reg_con); 6079 %} 6080 6081 // The instruction usage is guarded by predicate in operand immFPR1(). 6082 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6083 match(Set dst con); 6084 ins_cost(125); 6085 format %{ "FLD1 ST\n\t" 6086 "FSTP $dst" %} 6087 ins_encode %{ 6088 __ fld1(); 6089 __ fstp_d($dst$$reg); 6090 %} 6091 ins_pipe(fpu_reg_con); 6092 %} 6093 6094 // The instruction usage is guarded by predicate in operand immF(). 6095 instruct loadConF(regF dst, immF con) %{ 6096 match(Set dst con); 6097 ins_cost(125); 6098 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6099 ins_encode %{ 6100 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6101 %} 6102 ins_pipe(pipe_slow); 6103 %} 6104 6105 // The instruction usage is guarded by predicate in operand immF0(). 6106 instruct loadConF0(regF dst, immF0 src) %{ 6107 match(Set dst src); 6108 ins_cost(100); 6109 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6110 ins_encode %{ 6111 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6112 %} 6113 ins_pipe(pipe_slow); 6114 %} 6115 6116 // The instruction usage is guarded by predicate in operand immDPR(). 6117 instruct loadConDPR(regDPR dst, immDPR con) %{ 6118 match(Set dst con); 6119 ins_cost(125); 6120 6121 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6122 "FSTP $dst" %} 6123 ins_encode %{ 6124 __ fld_d($constantaddress($con)); 6125 __ fstp_d($dst$$reg); 6126 %} 6127 ins_pipe(fpu_reg_con); 6128 %} 6129 6130 // The instruction usage is guarded by predicate in operand immDPR0(). 6131 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6132 match(Set dst con); 6133 ins_cost(125); 6134 6135 format %{ "FLDZ ST\n\t" 6136 "FSTP $dst" %} 6137 ins_encode %{ 6138 __ fldz(); 6139 __ fstp_d($dst$$reg); 6140 %} 6141 ins_pipe(fpu_reg_con); 6142 %} 6143 6144 // The instruction usage is guarded by predicate in operand immDPR1(). 6145 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6146 match(Set dst con); 6147 ins_cost(125); 6148 6149 format %{ "FLD1 ST\n\t" 6150 "FSTP $dst" %} 6151 ins_encode %{ 6152 __ fld1(); 6153 __ fstp_d($dst$$reg); 6154 %} 6155 ins_pipe(fpu_reg_con); 6156 %} 6157 6158 // The instruction usage is guarded by predicate in operand immD(). 6159 instruct loadConD(regD dst, immD con) %{ 6160 match(Set dst con); 6161 ins_cost(125); 6162 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6163 ins_encode %{ 6164 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6165 %} 6166 ins_pipe(pipe_slow); 6167 %} 6168 6169 // The instruction usage is guarded by predicate in operand immD0(). 6170 instruct loadConD0(regD dst, immD0 src) %{ 6171 match(Set dst src); 6172 ins_cost(100); 6173 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6174 ins_encode %{ 6175 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6176 %} 6177 ins_pipe( pipe_slow ); 6178 %} 6179 6180 // Load Stack Slot 6181 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6182 match(Set dst src); 6183 ins_cost(125); 6184 6185 format %{ "MOV $dst,$src" %} 6186 opcode(0x8B); 6187 ins_encode( OpcP, RegMem(dst,src)); 6188 ins_pipe( ialu_reg_mem ); 6189 %} 6190 6191 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6192 match(Set dst src); 6193 6194 ins_cost(200); 6195 format %{ "MOV $dst,$src.lo\n\t" 6196 "MOV $dst+4,$src.hi" %} 6197 opcode(0x8B, 0x8B); 6198 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6199 ins_pipe( ialu_mem_long_reg ); 6200 %} 6201 6202 // Load Stack Slot 6203 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6204 match(Set dst src); 6205 ins_cost(125); 6206 6207 format %{ "MOV $dst,$src" %} 6208 opcode(0x8B); 6209 ins_encode( OpcP, RegMem(dst,src)); 6210 ins_pipe( ialu_reg_mem ); 6211 %} 6212 6213 // Load Stack Slot 6214 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6215 match(Set dst src); 6216 ins_cost(125); 6217 6218 format %{ "FLD_S $src\n\t" 6219 "FSTP $dst" %} 6220 opcode(0xD9); /* D9 /0, FLD m32real */ 6221 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6222 Pop_Reg_FPR(dst) ); 6223 ins_pipe( fpu_reg_mem ); 6224 %} 6225 6226 // Load Stack Slot 6227 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6228 match(Set dst src); 6229 ins_cost(125); 6230 6231 format %{ "FLD_D $src\n\t" 6232 "FSTP $dst" %} 6233 opcode(0xDD); /* DD /0, FLD m64real */ 6234 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6235 Pop_Reg_DPR(dst) ); 6236 ins_pipe( fpu_reg_mem ); 6237 %} 6238 6239 // Prefetch instructions for allocation. 6240 // Must be safe to execute with invalid address (cannot fault). 6241 6242 instruct prefetchAlloc0( memory mem ) %{ 6243 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6244 match(PrefetchAllocation mem); 6245 ins_cost(0); 6246 size(0); 6247 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6248 ins_encode(); 6249 ins_pipe(empty); 6250 %} 6251 6252 instruct prefetchAlloc( memory mem ) %{ 6253 predicate(AllocatePrefetchInstr==3); 6254 match( PrefetchAllocation mem ); 6255 ins_cost(100); 6256 6257 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6258 ins_encode %{ 6259 __ prefetchw($mem$$Address); 6260 %} 6261 ins_pipe(ialu_mem); 6262 %} 6263 6264 instruct prefetchAllocNTA( memory mem ) %{ 6265 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6266 match(PrefetchAllocation mem); 6267 ins_cost(100); 6268 6269 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6270 ins_encode %{ 6271 __ prefetchnta($mem$$Address); 6272 %} 6273 ins_pipe(ialu_mem); 6274 %} 6275 6276 instruct prefetchAllocT0( memory mem ) %{ 6277 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6278 match(PrefetchAllocation mem); 6279 ins_cost(100); 6280 6281 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6282 ins_encode %{ 6283 __ prefetcht0($mem$$Address); 6284 %} 6285 ins_pipe(ialu_mem); 6286 %} 6287 6288 instruct prefetchAllocT2( memory mem ) %{ 6289 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6290 match(PrefetchAllocation mem); 6291 ins_cost(100); 6292 6293 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6294 ins_encode %{ 6295 __ prefetcht2($mem$$Address); 6296 %} 6297 ins_pipe(ialu_mem); 6298 %} 6299 6300 //----------Store Instructions------------------------------------------------- 6301 6302 // Store Byte 6303 instruct storeB(memory mem, xRegI src) %{ 6304 match(Set mem (StoreB mem src)); 6305 6306 ins_cost(125); 6307 format %{ "MOV8 $mem,$src" %} 6308 opcode(0x88); 6309 ins_encode( OpcP, RegMem( src, mem ) ); 6310 ins_pipe( ialu_mem_reg ); 6311 %} 6312 6313 // Store Char/Short 6314 instruct storeC(memory mem, rRegI src) %{ 6315 match(Set mem (StoreC mem src)); 6316 6317 ins_cost(125); 6318 format %{ "MOV16 $mem,$src" %} 6319 opcode(0x89, 0x66); 6320 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6321 ins_pipe( ialu_mem_reg ); 6322 %} 6323 6324 // Store Integer 6325 instruct storeI(memory mem, rRegI src) %{ 6326 match(Set mem (StoreI mem src)); 6327 6328 ins_cost(125); 6329 format %{ "MOV $mem,$src" %} 6330 opcode(0x89); 6331 ins_encode( OpcP, RegMem( src, mem ) ); 6332 ins_pipe( ialu_mem_reg ); 6333 %} 6334 6335 // Store Long 6336 instruct storeL(long_memory mem, eRegL src) %{ 6337 predicate(!((StoreLNode*)n)->require_atomic_access()); 6338 match(Set mem (StoreL mem src)); 6339 6340 ins_cost(200); 6341 format %{ "MOV $mem,$src.lo\n\t" 6342 "MOV $mem+4,$src.hi" %} 6343 opcode(0x89, 0x89); 6344 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6345 ins_pipe( ialu_mem_long_reg ); 6346 %} 6347 6348 // Store Long to Integer 6349 instruct storeL2I(memory mem, eRegL src) %{ 6350 match(Set mem (StoreI mem (ConvL2I src))); 6351 6352 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6353 ins_encode %{ 6354 __ movl($mem$$Address, $src$$Register); 6355 %} 6356 ins_pipe(ialu_mem_reg); 6357 %} 6358 6359 // Volatile Store Long. Must be atomic, so move it into 6360 // the FP TOS and then do a 64-bit FIST. Has to probe the 6361 // target address before the store (for null-ptr checks) 6362 // so the memory operand is used twice in the encoding. 6363 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6364 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6365 match(Set mem (StoreL mem src)); 6366 effect( KILL cr ); 6367 ins_cost(400); 6368 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6369 "FILD $src\n\t" 6370 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6371 opcode(0x3B); 6372 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6373 ins_pipe( fpu_reg_mem ); 6374 %} 6375 6376 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6377 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6378 match(Set mem (StoreL mem src)); 6379 effect( TEMP tmp, KILL cr ); 6380 ins_cost(380); 6381 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6382 "MOVSD $tmp,$src\n\t" 6383 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6384 ins_encode %{ 6385 __ cmpl(rax, $mem$$Address); 6386 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6387 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6388 %} 6389 ins_pipe( pipe_slow ); 6390 %} 6391 6392 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6393 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6394 match(Set mem (StoreL mem src)); 6395 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6396 ins_cost(360); 6397 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6398 "MOVD $tmp,$src.lo\n\t" 6399 "MOVD $tmp2,$src.hi\n\t" 6400 "PUNPCKLDQ $tmp,$tmp2\n\t" 6401 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6402 ins_encode %{ 6403 __ cmpl(rax, $mem$$Address); 6404 __ movdl($tmp$$XMMRegister, $src$$Register); 6405 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6406 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6407 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6408 %} 6409 ins_pipe( pipe_slow ); 6410 %} 6411 6412 // Store Pointer; for storing unknown oops and raw pointers 6413 instruct storeP(memory mem, anyRegP src) %{ 6414 match(Set mem (StoreP mem src)); 6415 6416 ins_cost(125); 6417 format %{ "MOV $mem,$src" %} 6418 opcode(0x89); 6419 ins_encode( OpcP, RegMem( src, mem ) ); 6420 ins_pipe( ialu_mem_reg ); 6421 %} 6422 6423 // Store Integer Immediate 6424 instruct storeImmI(memory mem, immI src) %{ 6425 match(Set mem (StoreI mem src)); 6426 6427 ins_cost(150); 6428 format %{ "MOV $mem,$src" %} 6429 opcode(0xC7); /* C7 /0 */ 6430 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6431 ins_pipe( ialu_mem_imm ); 6432 %} 6433 6434 // Store Short/Char Immediate 6435 instruct storeImmI16(memory mem, immI16 src) %{ 6436 predicate(UseStoreImmI16); 6437 match(Set mem (StoreC mem src)); 6438 6439 ins_cost(150); 6440 format %{ "MOV16 $mem,$src" %} 6441 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6442 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6443 ins_pipe( ialu_mem_imm ); 6444 %} 6445 6446 // Store Pointer Immediate; null pointers or constant oops that do not 6447 // need card-mark barriers. 6448 instruct storeImmP(memory mem, immP src) %{ 6449 match(Set mem (StoreP mem src)); 6450 6451 ins_cost(150); 6452 format %{ "MOV $mem,$src" %} 6453 opcode(0xC7); /* C7 /0 */ 6454 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6455 ins_pipe( ialu_mem_imm ); 6456 %} 6457 6458 // Store Byte Immediate 6459 instruct storeImmB(memory mem, immI8 src) %{ 6460 match(Set mem (StoreB mem src)); 6461 6462 ins_cost(150); 6463 format %{ "MOV8 $mem,$src" %} 6464 opcode(0xC6); /* C6 /0 */ 6465 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6466 ins_pipe( ialu_mem_imm ); 6467 %} 6468 6469 // Store CMS card-mark Immediate 6470 instruct storeImmCM(memory mem, immI8 src) %{ 6471 match(Set mem (StoreCM mem src)); 6472 6473 ins_cost(150); 6474 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6475 opcode(0xC6); /* C6 /0 */ 6476 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6477 ins_pipe( ialu_mem_imm ); 6478 %} 6479 6480 // Store Double 6481 instruct storeDPR( memory mem, regDPR1 src) %{ 6482 predicate(UseSSE<=1); 6483 match(Set mem (StoreD mem src)); 6484 6485 ins_cost(100); 6486 format %{ "FST_D $mem,$src" %} 6487 opcode(0xDD); /* DD /2 */ 6488 ins_encode( enc_FPR_store(mem,src) ); 6489 ins_pipe( fpu_mem_reg ); 6490 %} 6491 6492 // Store double does rounding on x86 6493 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6494 predicate(UseSSE<=1); 6495 match(Set mem (StoreD mem (RoundDouble src))); 6496 6497 ins_cost(100); 6498 format %{ "FST_D $mem,$src\t# round" %} 6499 opcode(0xDD); /* DD /2 */ 6500 ins_encode( enc_FPR_store(mem,src) ); 6501 ins_pipe( fpu_mem_reg ); 6502 %} 6503 6504 // Store XMM register to memory (double-precision floating points) 6505 // MOVSD instruction 6506 instruct storeD(memory mem, regD src) %{ 6507 predicate(UseSSE>=2); 6508 match(Set mem (StoreD mem src)); 6509 ins_cost(95); 6510 format %{ "MOVSD $mem,$src" %} 6511 ins_encode %{ 6512 __ movdbl($mem$$Address, $src$$XMMRegister); 6513 %} 6514 ins_pipe( pipe_slow ); 6515 %} 6516 6517 // Store XMM register to memory (single-precision floating point) 6518 // MOVSS instruction 6519 instruct storeF(memory mem, regF src) %{ 6520 predicate(UseSSE>=1); 6521 match(Set mem (StoreF mem src)); 6522 ins_cost(95); 6523 format %{ "MOVSS $mem,$src" %} 6524 ins_encode %{ 6525 __ movflt($mem$$Address, $src$$XMMRegister); 6526 %} 6527 ins_pipe( pipe_slow ); 6528 %} 6529 6530 6531 // Store Float 6532 instruct storeFPR( memory mem, regFPR1 src) %{ 6533 predicate(UseSSE==0); 6534 match(Set mem (StoreF mem src)); 6535 6536 ins_cost(100); 6537 format %{ "FST_S $mem,$src" %} 6538 opcode(0xD9); /* D9 /2 */ 6539 ins_encode( enc_FPR_store(mem,src) ); 6540 ins_pipe( fpu_mem_reg ); 6541 %} 6542 6543 // Store Float does rounding on x86 6544 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6545 predicate(UseSSE==0); 6546 match(Set mem (StoreF mem (RoundFloat src))); 6547 6548 ins_cost(100); 6549 format %{ "FST_S $mem,$src\t# round" %} 6550 opcode(0xD9); /* D9 /2 */ 6551 ins_encode( enc_FPR_store(mem,src) ); 6552 ins_pipe( fpu_mem_reg ); 6553 %} 6554 6555 // Store Float does rounding on x86 6556 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6557 predicate(UseSSE<=1); 6558 match(Set mem (StoreF mem (ConvD2F src))); 6559 6560 ins_cost(100); 6561 format %{ "FST_S $mem,$src\t# D-round" %} 6562 opcode(0xD9); /* D9 /2 */ 6563 ins_encode( enc_FPR_store(mem,src) ); 6564 ins_pipe( fpu_mem_reg ); 6565 %} 6566 6567 // Store immediate Float value (it is faster than store from FPU register) 6568 // The instruction usage is guarded by predicate in operand immFPR(). 6569 instruct storeFPR_imm( memory mem, immFPR src) %{ 6570 match(Set mem (StoreF mem src)); 6571 6572 ins_cost(50); 6573 format %{ "MOV $mem,$src\t# store float" %} 6574 opcode(0xC7); /* C7 /0 */ 6575 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6576 ins_pipe( ialu_mem_imm ); 6577 %} 6578 6579 // Store immediate Float value (it is faster than store from XMM register) 6580 // The instruction usage is guarded by predicate in operand immF(). 6581 instruct storeF_imm( memory mem, immF src) %{ 6582 match(Set mem (StoreF mem src)); 6583 6584 ins_cost(50); 6585 format %{ "MOV $mem,$src\t# store float" %} 6586 opcode(0xC7); /* C7 /0 */ 6587 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6588 ins_pipe( ialu_mem_imm ); 6589 %} 6590 6591 // Store Integer to stack slot 6592 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6593 match(Set dst src); 6594 6595 ins_cost(100); 6596 format %{ "MOV $dst,$src" %} 6597 opcode(0x89); 6598 ins_encode( OpcPRegSS( dst, src ) ); 6599 ins_pipe( ialu_mem_reg ); 6600 %} 6601 6602 // Store Integer to stack slot 6603 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6604 match(Set dst src); 6605 6606 ins_cost(100); 6607 format %{ "MOV $dst,$src" %} 6608 opcode(0x89); 6609 ins_encode( OpcPRegSS( dst, src ) ); 6610 ins_pipe( ialu_mem_reg ); 6611 %} 6612 6613 // Store Long to stack slot 6614 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6615 match(Set dst src); 6616 6617 ins_cost(200); 6618 format %{ "MOV $dst,$src.lo\n\t" 6619 "MOV $dst+4,$src.hi" %} 6620 opcode(0x89, 0x89); 6621 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6622 ins_pipe( ialu_mem_long_reg ); 6623 %} 6624 6625 //----------MemBar Instructions----------------------------------------------- 6626 // Memory barrier flavors 6627 6628 instruct membar_acquire() %{ 6629 match(MemBarAcquire); 6630 match(LoadFence); 6631 ins_cost(400); 6632 6633 size(0); 6634 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6635 ins_encode(); 6636 ins_pipe(empty); 6637 %} 6638 6639 instruct membar_acquire_lock() %{ 6640 match(MemBarAcquireLock); 6641 ins_cost(0); 6642 6643 size(0); 6644 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6645 ins_encode( ); 6646 ins_pipe(empty); 6647 %} 6648 6649 instruct membar_release() %{ 6650 match(MemBarRelease); 6651 match(StoreFence); 6652 ins_cost(400); 6653 6654 size(0); 6655 format %{ "MEMBAR-release ! (empty encoding)" %} 6656 ins_encode( ); 6657 ins_pipe(empty); 6658 %} 6659 6660 instruct membar_release_lock() %{ 6661 match(MemBarReleaseLock); 6662 ins_cost(0); 6663 6664 size(0); 6665 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6666 ins_encode( ); 6667 ins_pipe(empty); 6668 %} 6669 6670 instruct membar_volatile(eFlagsReg cr) %{ 6671 match(MemBarVolatile); 6672 effect(KILL cr); 6673 ins_cost(400); 6674 6675 format %{ 6676 $$template 6677 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6678 %} 6679 ins_encode %{ 6680 __ membar(Assembler::StoreLoad); 6681 %} 6682 ins_pipe(pipe_slow); 6683 %} 6684 6685 instruct unnecessary_membar_volatile() %{ 6686 match(MemBarVolatile); 6687 predicate(Matcher::post_store_load_barrier(n)); 6688 ins_cost(0); 6689 6690 size(0); 6691 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6692 ins_encode( ); 6693 ins_pipe(empty); 6694 %} 6695 6696 instruct membar_storestore() %{ 6697 match(MemBarStoreStore); 6698 match(StoreStoreFence); 6699 ins_cost(0); 6700 6701 size(0); 6702 format %{ "MEMBAR-storestore (empty encoding)" %} 6703 ins_encode( ); 6704 ins_pipe(empty); 6705 %} 6706 6707 //----------Move Instructions-------------------------------------------------- 6708 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6709 match(Set dst (CastX2P src)); 6710 format %{ "# X2P $dst, $src" %} 6711 ins_encode( /*empty encoding*/ ); 6712 ins_cost(0); 6713 ins_pipe(empty); 6714 %} 6715 6716 instruct castP2X(rRegI dst, eRegP src ) %{ 6717 match(Set dst (CastP2X src)); 6718 ins_cost(50); 6719 format %{ "MOV $dst, $src\t# CastP2X" %} 6720 ins_encode( enc_Copy( dst, src) ); 6721 ins_pipe( ialu_reg_reg ); 6722 %} 6723 6724 //----------Conditional Move--------------------------------------------------- 6725 // Conditional move 6726 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6727 predicate(!VM_Version::supports_cmov() ); 6728 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6729 ins_cost(200); 6730 format %{ "J$cop,us skip\t# signed cmove\n\t" 6731 "MOV $dst,$src\n" 6732 "skip:" %} 6733 ins_encode %{ 6734 Label Lskip; 6735 // Invert sense of branch from sense of CMOV 6736 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6737 __ movl($dst$$Register, $src$$Register); 6738 __ bind(Lskip); 6739 %} 6740 ins_pipe( pipe_cmov_reg ); 6741 %} 6742 6743 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6744 predicate(!VM_Version::supports_cmov() ); 6745 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6746 ins_cost(200); 6747 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6748 "MOV $dst,$src\n" 6749 "skip:" %} 6750 ins_encode %{ 6751 Label Lskip; 6752 // Invert sense of branch from sense of CMOV 6753 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6754 __ movl($dst$$Register, $src$$Register); 6755 __ bind(Lskip); 6756 %} 6757 ins_pipe( pipe_cmov_reg ); 6758 %} 6759 6760 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6761 predicate(VM_Version::supports_cmov() ); 6762 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6763 ins_cost(200); 6764 format %{ "CMOV$cop $dst,$src" %} 6765 opcode(0x0F,0x40); 6766 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6767 ins_pipe( pipe_cmov_reg ); 6768 %} 6769 6770 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6771 predicate(VM_Version::supports_cmov() ); 6772 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6773 ins_cost(200); 6774 format %{ "CMOV$cop $dst,$src" %} 6775 opcode(0x0F,0x40); 6776 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6777 ins_pipe( pipe_cmov_reg ); 6778 %} 6779 6780 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6781 predicate(VM_Version::supports_cmov() ); 6782 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6783 ins_cost(200); 6784 expand %{ 6785 cmovI_regU(cop, cr, dst, src); 6786 %} 6787 %} 6788 6789 // Conditional move 6790 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6791 predicate(VM_Version::supports_cmov() ); 6792 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6793 ins_cost(250); 6794 format %{ "CMOV$cop $dst,$src" %} 6795 opcode(0x0F,0x40); 6796 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6797 ins_pipe( pipe_cmov_mem ); 6798 %} 6799 6800 // Conditional move 6801 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6802 predicate(VM_Version::supports_cmov() ); 6803 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6804 ins_cost(250); 6805 format %{ "CMOV$cop $dst,$src" %} 6806 opcode(0x0F,0x40); 6807 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6808 ins_pipe( pipe_cmov_mem ); 6809 %} 6810 6811 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6812 predicate(VM_Version::supports_cmov() ); 6813 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6814 ins_cost(250); 6815 expand %{ 6816 cmovI_memU(cop, cr, dst, src); 6817 %} 6818 %} 6819 6820 // Conditional move 6821 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6822 predicate(VM_Version::supports_cmov() ); 6823 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6824 ins_cost(200); 6825 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6826 opcode(0x0F,0x40); 6827 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6828 ins_pipe( pipe_cmov_reg ); 6829 %} 6830 6831 // Conditional move (non-P6 version) 6832 // Note: a CMoveP is generated for stubs and native wrappers 6833 // regardless of whether we are on a P6, so we 6834 // emulate a cmov here 6835 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6836 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6837 ins_cost(300); 6838 format %{ "Jn$cop skip\n\t" 6839 "MOV $dst,$src\t# pointer\n" 6840 "skip:" %} 6841 opcode(0x8b); 6842 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6843 ins_pipe( pipe_cmov_reg ); 6844 %} 6845 6846 // Conditional move 6847 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6848 predicate(VM_Version::supports_cmov() ); 6849 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6850 ins_cost(200); 6851 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6852 opcode(0x0F,0x40); 6853 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6854 ins_pipe( pipe_cmov_reg ); 6855 %} 6856 6857 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6858 predicate(VM_Version::supports_cmov() ); 6859 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6860 ins_cost(200); 6861 expand %{ 6862 cmovP_regU(cop, cr, dst, src); 6863 %} 6864 %} 6865 6866 // DISABLED: Requires the ADLC to emit a bottom_type call that 6867 // correctly meets the two pointer arguments; one is an incoming 6868 // register but the other is a memory operand. ALSO appears to 6869 // be buggy with implicit null checks. 6870 // 6871 //// Conditional move 6872 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6873 // predicate(VM_Version::supports_cmov() ); 6874 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6875 // ins_cost(250); 6876 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6877 // opcode(0x0F,0x40); 6878 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6879 // ins_pipe( pipe_cmov_mem ); 6880 //%} 6881 // 6882 //// Conditional move 6883 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6884 // predicate(VM_Version::supports_cmov() ); 6885 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6886 // ins_cost(250); 6887 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6888 // opcode(0x0F,0x40); 6889 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6890 // ins_pipe( pipe_cmov_mem ); 6891 //%} 6892 6893 // Conditional move 6894 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6895 predicate(UseSSE<=1); 6896 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6897 ins_cost(200); 6898 format %{ "FCMOV$cop $dst,$src\t# double" %} 6899 opcode(0xDA); 6900 ins_encode( enc_cmov_dpr(cop,src) ); 6901 ins_pipe( pipe_cmovDPR_reg ); 6902 %} 6903 6904 // Conditional move 6905 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6906 predicate(UseSSE==0); 6907 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6908 ins_cost(200); 6909 format %{ "FCMOV$cop $dst,$src\t# float" %} 6910 opcode(0xDA); 6911 ins_encode( enc_cmov_dpr(cop,src) ); 6912 ins_pipe( pipe_cmovDPR_reg ); 6913 %} 6914 6915 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6916 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6917 predicate(UseSSE<=1); 6918 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6919 ins_cost(200); 6920 format %{ "Jn$cop skip\n\t" 6921 "MOV $dst,$src\t# double\n" 6922 "skip:" %} 6923 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6924 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6925 ins_pipe( pipe_cmovDPR_reg ); 6926 %} 6927 6928 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6929 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6930 predicate(UseSSE==0); 6931 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6932 ins_cost(200); 6933 format %{ "Jn$cop skip\n\t" 6934 "MOV $dst,$src\t# float\n" 6935 "skip:" %} 6936 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6937 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6938 ins_pipe( pipe_cmovDPR_reg ); 6939 %} 6940 6941 // No CMOVE with SSE/SSE2 6942 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6943 predicate (UseSSE>=1); 6944 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6945 ins_cost(200); 6946 format %{ "Jn$cop skip\n\t" 6947 "MOVSS $dst,$src\t# float\n" 6948 "skip:" %} 6949 ins_encode %{ 6950 Label skip; 6951 // Invert sense of branch from sense of CMOV 6952 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6953 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6954 __ bind(skip); 6955 %} 6956 ins_pipe( pipe_slow ); 6957 %} 6958 6959 // No CMOVE with SSE/SSE2 6960 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6961 predicate (UseSSE>=2); 6962 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6963 ins_cost(200); 6964 format %{ "Jn$cop skip\n\t" 6965 "MOVSD $dst,$src\t# float\n" 6966 "skip:" %} 6967 ins_encode %{ 6968 Label skip; 6969 // Invert sense of branch from sense of CMOV 6970 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6971 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6972 __ bind(skip); 6973 %} 6974 ins_pipe( pipe_slow ); 6975 %} 6976 6977 // unsigned version 6978 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6979 predicate (UseSSE>=1); 6980 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6981 ins_cost(200); 6982 format %{ "Jn$cop skip\n\t" 6983 "MOVSS $dst,$src\t# float\n" 6984 "skip:" %} 6985 ins_encode %{ 6986 Label skip; 6987 // Invert sense of branch from sense of CMOV 6988 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6989 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6990 __ bind(skip); 6991 %} 6992 ins_pipe( pipe_slow ); 6993 %} 6994 6995 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6996 predicate (UseSSE>=1); 6997 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6998 ins_cost(200); 6999 expand %{ 7000 fcmovF_regU(cop, cr, dst, src); 7001 %} 7002 %} 7003 7004 // unsigned version 7005 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7006 predicate (UseSSE>=2); 7007 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7008 ins_cost(200); 7009 format %{ "Jn$cop skip\n\t" 7010 "MOVSD $dst,$src\t# float\n" 7011 "skip:" %} 7012 ins_encode %{ 7013 Label skip; 7014 // Invert sense of branch from sense of CMOV 7015 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7016 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7017 __ bind(skip); 7018 %} 7019 ins_pipe( pipe_slow ); 7020 %} 7021 7022 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7023 predicate (UseSSE>=2); 7024 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7025 ins_cost(200); 7026 expand %{ 7027 fcmovD_regU(cop, cr, dst, src); 7028 %} 7029 %} 7030 7031 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7032 predicate(VM_Version::supports_cmov() ); 7033 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7034 ins_cost(200); 7035 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7036 "CMOV$cop $dst.hi,$src.hi" %} 7037 opcode(0x0F,0x40); 7038 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7039 ins_pipe( pipe_cmov_reg_long ); 7040 %} 7041 7042 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7043 predicate(VM_Version::supports_cmov() ); 7044 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7045 ins_cost(200); 7046 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7047 "CMOV$cop $dst.hi,$src.hi" %} 7048 opcode(0x0F,0x40); 7049 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7050 ins_pipe( pipe_cmov_reg_long ); 7051 %} 7052 7053 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7054 predicate(VM_Version::supports_cmov() ); 7055 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7056 ins_cost(200); 7057 expand %{ 7058 cmovL_regU(cop, cr, dst, src); 7059 %} 7060 %} 7061 7062 //----------Arithmetic Instructions-------------------------------------------- 7063 //----------Addition Instructions---------------------------------------------- 7064 7065 // Integer Addition Instructions 7066 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7067 match(Set dst (AddI dst src)); 7068 effect(KILL cr); 7069 7070 size(2); 7071 format %{ "ADD $dst,$src" %} 7072 opcode(0x03); 7073 ins_encode( OpcP, RegReg( dst, src) ); 7074 ins_pipe( ialu_reg_reg ); 7075 %} 7076 7077 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7078 match(Set dst (AddI dst src)); 7079 effect(KILL cr); 7080 7081 format %{ "ADD $dst,$src" %} 7082 opcode(0x81, 0x00); /* /0 id */ 7083 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7084 ins_pipe( ialu_reg ); 7085 %} 7086 7087 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7088 predicate(UseIncDec); 7089 match(Set dst (AddI dst src)); 7090 effect(KILL cr); 7091 7092 size(1); 7093 format %{ "INC $dst" %} 7094 opcode(0x40); /* */ 7095 ins_encode( Opc_plus( primary, dst ) ); 7096 ins_pipe( ialu_reg ); 7097 %} 7098 7099 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7100 match(Set dst (AddI src0 src1)); 7101 ins_cost(110); 7102 7103 format %{ "LEA $dst,[$src0 + $src1]" %} 7104 opcode(0x8D); /* 0x8D /r */ 7105 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7106 ins_pipe( ialu_reg_reg ); 7107 %} 7108 7109 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7110 match(Set dst (AddP src0 src1)); 7111 ins_cost(110); 7112 7113 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7114 opcode(0x8D); /* 0x8D /r */ 7115 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7116 ins_pipe( ialu_reg_reg ); 7117 %} 7118 7119 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7120 predicate(UseIncDec); 7121 match(Set dst (AddI dst src)); 7122 effect(KILL cr); 7123 7124 size(1); 7125 format %{ "DEC $dst" %} 7126 opcode(0x48); /* */ 7127 ins_encode( Opc_plus( primary, dst ) ); 7128 ins_pipe( ialu_reg ); 7129 %} 7130 7131 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7132 match(Set dst (AddP dst src)); 7133 effect(KILL cr); 7134 7135 size(2); 7136 format %{ "ADD $dst,$src" %} 7137 opcode(0x03); 7138 ins_encode( OpcP, RegReg( dst, src) ); 7139 ins_pipe( ialu_reg_reg ); 7140 %} 7141 7142 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7143 match(Set dst (AddP dst src)); 7144 effect(KILL cr); 7145 7146 format %{ "ADD $dst,$src" %} 7147 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7148 // ins_encode( RegImm( dst, src) ); 7149 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7150 ins_pipe( ialu_reg ); 7151 %} 7152 7153 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7154 match(Set dst (AddI dst (LoadI src))); 7155 effect(KILL cr); 7156 7157 ins_cost(150); 7158 format %{ "ADD $dst,$src" %} 7159 opcode(0x03); 7160 ins_encode( OpcP, RegMem( dst, src) ); 7161 ins_pipe( ialu_reg_mem ); 7162 %} 7163 7164 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7165 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7166 effect(KILL cr); 7167 7168 ins_cost(150); 7169 format %{ "ADD $dst,$src" %} 7170 opcode(0x01); /* Opcode 01 /r */ 7171 ins_encode( OpcP, RegMem( src, dst ) ); 7172 ins_pipe( ialu_mem_reg ); 7173 %} 7174 7175 // Add Memory with Immediate 7176 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7177 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7178 effect(KILL cr); 7179 7180 ins_cost(125); 7181 format %{ "ADD $dst,$src" %} 7182 opcode(0x81); /* Opcode 81 /0 id */ 7183 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7184 ins_pipe( ialu_mem_imm ); 7185 %} 7186 7187 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7188 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7189 effect(KILL cr); 7190 7191 ins_cost(125); 7192 format %{ "INC $dst" %} 7193 opcode(0xFF); /* Opcode FF /0 */ 7194 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7195 ins_pipe( ialu_mem_imm ); 7196 %} 7197 7198 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7199 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7200 effect(KILL cr); 7201 7202 ins_cost(125); 7203 format %{ "DEC $dst" %} 7204 opcode(0xFF); /* Opcode FF /1 */ 7205 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7206 ins_pipe( ialu_mem_imm ); 7207 %} 7208 7209 7210 instruct checkCastPP( eRegP dst ) %{ 7211 match(Set dst (CheckCastPP dst)); 7212 7213 size(0); 7214 format %{ "#checkcastPP of $dst" %} 7215 ins_encode( /*empty encoding*/ ); 7216 ins_pipe( empty ); 7217 %} 7218 7219 instruct castPP( eRegP dst ) %{ 7220 match(Set dst (CastPP dst)); 7221 format %{ "#castPP of $dst" %} 7222 ins_encode( /*empty encoding*/ ); 7223 ins_pipe( empty ); 7224 %} 7225 7226 instruct castII( rRegI dst ) %{ 7227 match(Set dst (CastII dst)); 7228 format %{ "#castII of $dst" %} 7229 ins_encode( /*empty encoding*/ ); 7230 ins_cost(0); 7231 ins_pipe( empty ); 7232 %} 7233 7234 instruct castLL( eRegL dst ) %{ 7235 match(Set dst (CastLL dst)); 7236 format %{ "#castLL of $dst" %} 7237 ins_encode( /*empty encoding*/ ); 7238 ins_cost(0); 7239 ins_pipe( empty ); 7240 %} 7241 7242 instruct castFF( regF dst ) %{ 7243 predicate(UseSSE >= 1); 7244 match(Set dst (CastFF dst)); 7245 format %{ "#castFF of $dst" %} 7246 ins_encode( /*empty encoding*/ ); 7247 ins_cost(0); 7248 ins_pipe( empty ); 7249 %} 7250 7251 instruct castDD( regD dst ) %{ 7252 predicate(UseSSE >= 2); 7253 match(Set dst (CastDD dst)); 7254 format %{ "#castDD of $dst" %} 7255 ins_encode( /*empty encoding*/ ); 7256 ins_cost(0); 7257 ins_pipe( empty ); 7258 %} 7259 7260 instruct castFF_PR( regFPR dst ) %{ 7261 predicate(UseSSE < 1); 7262 match(Set dst (CastFF dst)); 7263 format %{ "#castFF of $dst" %} 7264 ins_encode( /*empty encoding*/ ); 7265 ins_cost(0); 7266 ins_pipe( empty ); 7267 %} 7268 7269 instruct castDD_PR( regDPR dst ) %{ 7270 predicate(UseSSE < 2); 7271 match(Set dst (CastDD dst)); 7272 format %{ "#castDD of $dst" %} 7273 ins_encode( /*empty encoding*/ ); 7274 ins_cost(0); 7275 ins_pipe( empty ); 7276 %} 7277 7278 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7279 7280 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7281 predicate(VM_Version::supports_cx8()); 7282 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7283 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7284 effect(KILL cr, KILL oldval); 7285 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7286 "MOV $res,0\n\t" 7287 "JNE,s fail\n\t" 7288 "MOV $res,1\n" 7289 "fail:" %} 7290 ins_encode( enc_cmpxchg8(mem_ptr), 7291 enc_flags_ne_to_boolean(res) ); 7292 ins_pipe( pipe_cmpxchg ); 7293 %} 7294 7295 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7296 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7297 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7298 effect(KILL cr, KILL oldval); 7299 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7300 "MOV $res,0\n\t" 7301 "JNE,s fail\n\t" 7302 "MOV $res,1\n" 7303 "fail:" %} 7304 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7305 ins_pipe( pipe_cmpxchg ); 7306 %} 7307 7308 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7309 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7310 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7311 effect(KILL cr, KILL oldval); 7312 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7313 "MOV $res,0\n\t" 7314 "JNE,s fail\n\t" 7315 "MOV $res,1\n" 7316 "fail:" %} 7317 ins_encode( enc_cmpxchgb(mem_ptr), 7318 enc_flags_ne_to_boolean(res) ); 7319 ins_pipe( pipe_cmpxchg ); 7320 %} 7321 7322 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7323 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7324 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7325 effect(KILL cr, KILL oldval); 7326 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7327 "MOV $res,0\n\t" 7328 "JNE,s fail\n\t" 7329 "MOV $res,1\n" 7330 "fail:" %} 7331 ins_encode( enc_cmpxchgw(mem_ptr), 7332 enc_flags_ne_to_boolean(res) ); 7333 ins_pipe( pipe_cmpxchg ); 7334 %} 7335 7336 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7337 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7338 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7339 effect(KILL cr, KILL oldval); 7340 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7341 "MOV $res,0\n\t" 7342 "JNE,s fail\n\t" 7343 "MOV $res,1\n" 7344 "fail:" %} 7345 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7346 ins_pipe( pipe_cmpxchg ); 7347 %} 7348 7349 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7350 predicate(VM_Version::supports_cx8()); 7351 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7352 effect(KILL cr); 7353 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7354 ins_encode( enc_cmpxchg8(mem_ptr) ); 7355 ins_pipe( pipe_cmpxchg ); 7356 %} 7357 7358 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7359 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7360 effect(KILL cr); 7361 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7362 ins_encode( enc_cmpxchg(mem_ptr) ); 7363 ins_pipe( pipe_cmpxchg ); 7364 %} 7365 7366 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7367 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7368 effect(KILL cr); 7369 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7370 ins_encode( enc_cmpxchgb(mem_ptr) ); 7371 ins_pipe( pipe_cmpxchg ); 7372 %} 7373 7374 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7375 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7376 effect(KILL cr); 7377 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7378 ins_encode( enc_cmpxchgw(mem_ptr) ); 7379 ins_pipe( pipe_cmpxchg ); 7380 %} 7381 7382 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7383 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7384 effect(KILL cr); 7385 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7386 ins_encode( enc_cmpxchg(mem_ptr) ); 7387 ins_pipe( pipe_cmpxchg ); 7388 %} 7389 7390 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7391 predicate(n->as_LoadStore()->result_not_used()); 7392 match(Set dummy (GetAndAddB mem add)); 7393 effect(KILL cr); 7394 format %{ "ADDB [$mem],$add" %} 7395 ins_encode %{ 7396 __ lock(); 7397 __ addb($mem$$Address, $add$$constant); 7398 %} 7399 ins_pipe( pipe_cmpxchg ); 7400 %} 7401 7402 // Important to match to xRegI: only 8-bit regs. 7403 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7404 match(Set newval (GetAndAddB mem newval)); 7405 effect(KILL cr); 7406 format %{ "XADDB [$mem],$newval" %} 7407 ins_encode %{ 7408 __ lock(); 7409 __ xaddb($mem$$Address, $newval$$Register); 7410 %} 7411 ins_pipe( pipe_cmpxchg ); 7412 %} 7413 7414 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7415 predicate(n->as_LoadStore()->result_not_used()); 7416 match(Set dummy (GetAndAddS mem add)); 7417 effect(KILL cr); 7418 format %{ "ADDS [$mem],$add" %} 7419 ins_encode %{ 7420 __ lock(); 7421 __ addw($mem$$Address, $add$$constant); 7422 %} 7423 ins_pipe( pipe_cmpxchg ); 7424 %} 7425 7426 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7427 match(Set newval (GetAndAddS mem newval)); 7428 effect(KILL cr); 7429 format %{ "XADDS [$mem],$newval" %} 7430 ins_encode %{ 7431 __ lock(); 7432 __ xaddw($mem$$Address, $newval$$Register); 7433 %} 7434 ins_pipe( pipe_cmpxchg ); 7435 %} 7436 7437 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7438 predicate(n->as_LoadStore()->result_not_used()); 7439 match(Set dummy (GetAndAddI mem add)); 7440 effect(KILL cr); 7441 format %{ "ADDL [$mem],$add" %} 7442 ins_encode %{ 7443 __ lock(); 7444 __ addl($mem$$Address, $add$$constant); 7445 %} 7446 ins_pipe( pipe_cmpxchg ); 7447 %} 7448 7449 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7450 match(Set newval (GetAndAddI mem newval)); 7451 effect(KILL cr); 7452 format %{ "XADDL [$mem],$newval" %} 7453 ins_encode %{ 7454 __ lock(); 7455 __ xaddl($mem$$Address, $newval$$Register); 7456 %} 7457 ins_pipe( pipe_cmpxchg ); 7458 %} 7459 7460 // Important to match to xRegI: only 8-bit regs. 7461 instruct xchgB( memory mem, xRegI newval) %{ 7462 match(Set newval (GetAndSetB mem newval)); 7463 format %{ "XCHGB $newval,[$mem]" %} 7464 ins_encode %{ 7465 __ xchgb($newval$$Register, $mem$$Address); 7466 %} 7467 ins_pipe( pipe_cmpxchg ); 7468 %} 7469 7470 instruct xchgS( memory mem, rRegI newval) %{ 7471 match(Set newval (GetAndSetS mem newval)); 7472 format %{ "XCHGW $newval,[$mem]" %} 7473 ins_encode %{ 7474 __ xchgw($newval$$Register, $mem$$Address); 7475 %} 7476 ins_pipe( pipe_cmpxchg ); 7477 %} 7478 7479 instruct xchgI( memory mem, rRegI newval) %{ 7480 match(Set newval (GetAndSetI mem newval)); 7481 format %{ "XCHGL $newval,[$mem]" %} 7482 ins_encode %{ 7483 __ xchgl($newval$$Register, $mem$$Address); 7484 %} 7485 ins_pipe( pipe_cmpxchg ); 7486 %} 7487 7488 instruct xchgP( memory mem, pRegP newval) %{ 7489 match(Set newval (GetAndSetP mem newval)); 7490 format %{ "XCHGL $newval,[$mem]" %} 7491 ins_encode %{ 7492 __ xchgl($newval$$Register, $mem$$Address); 7493 %} 7494 ins_pipe( pipe_cmpxchg ); 7495 %} 7496 7497 //----------Subtraction Instructions------------------------------------------- 7498 7499 // Integer Subtraction Instructions 7500 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7501 match(Set dst (SubI dst src)); 7502 effect(KILL cr); 7503 7504 size(2); 7505 format %{ "SUB $dst,$src" %} 7506 opcode(0x2B); 7507 ins_encode( OpcP, RegReg( dst, src) ); 7508 ins_pipe( ialu_reg_reg ); 7509 %} 7510 7511 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7512 match(Set dst (SubI dst src)); 7513 effect(KILL cr); 7514 7515 format %{ "SUB $dst,$src" %} 7516 opcode(0x81,0x05); /* Opcode 81 /5 */ 7517 // ins_encode( RegImm( dst, src) ); 7518 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7519 ins_pipe( ialu_reg ); 7520 %} 7521 7522 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7523 match(Set dst (SubI dst (LoadI src))); 7524 effect(KILL cr); 7525 7526 ins_cost(150); 7527 format %{ "SUB $dst,$src" %} 7528 opcode(0x2B); 7529 ins_encode( OpcP, RegMem( dst, src) ); 7530 ins_pipe( ialu_reg_mem ); 7531 %} 7532 7533 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7534 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7535 effect(KILL cr); 7536 7537 ins_cost(150); 7538 format %{ "SUB $dst,$src" %} 7539 opcode(0x29); /* Opcode 29 /r */ 7540 ins_encode( OpcP, RegMem( src, dst ) ); 7541 ins_pipe( ialu_mem_reg ); 7542 %} 7543 7544 // Subtract from a pointer 7545 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7546 match(Set dst (AddP dst (SubI zero src))); 7547 effect(KILL cr); 7548 7549 size(2); 7550 format %{ "SUB $dst,$src" %} 7551 opcode(0x2B); 7552 ins_encode( OpcP, RegReg( dst, src) ); 7553 ins_pipe( ialu_reg_reg ); 7554 %} 7555 7556 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7557 match(Set dst (SubI zero dst)); 7558 effect(KILL cr); 7559 7560 size(2); 7561 format %{ "NEG $dst" %} 7562 opcode(0xF7,0x03); // Opcode F7 /3 7563 ins_encode( OpcP, RegOpc( dst ) ); 7564 ins_pipe( ialu_reg ); 7565 %} 7566 7567 //----------Multiplication/Division Instructions------------------------------- 7568 // Integer Multiplication Instructions 7569 // Multiply Register 7570 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7571 match(Set dst (MulI dst src)); 7572 effect(KILL cr); 7573 7574 size(3); 7575 ins_cost(300); 7576 format %{ "IMUL $dst,$src" %} 7577 opcode(0xAF, 0x0F); 7578 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7579 ins_pipe( ialu_reg_reg_alu0 ); 7580 %} 7581 7582 // Multiply 32-bit Immediate 7583 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7584 match(Set dst (MulI src imm)); 7585 effect(KILL cr); 7586 7587 ins_cost(300); 7588 format %{ "IMUL $dst,$src,$imm" %} 7589 opcode(0x69); /* 69 /r id */ 7590 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7591 ins_pipe( ialu_reg_reg_alu0 ); 7592 %} 7593 7594 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7595 match(Set dst src); 7596 effect(KILL cr); 7597 7598 // Note that this is artificially increased to make it more expensive than loadConL 7599 ins_cost(250); 7600 format %{ "MOV EAX,$src\t// low word only" %} 7601 opcode(0xB8); 7602 ins_encode( LdImmL_Lo(dst, src) ); 7603 ins_pipe( ialu_reg_fat ); 7604 %} 7605 7606 // Multiply by 32-bit Immediate, taking the shifted high order results 7607 // (special case for shift by 32) 7608 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7609 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7610 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7611 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7612 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7613 effect(USE src1, KILL cr); 7614 7615 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7616 ins_cost(0*100 + 1*400 - 150); 7617 format %{ "IMUL EDX:EAX,$src1" %} 7618 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7619 ins_pipe( pipe_slow ); 7620 %} 7621 7622 // Multiply by 32-bit Immediate, taking the shifted high order results 7623 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7624 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7625 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7626 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7627 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7628 effect(USE src1, KILL cr); 7629 7630 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7631 ins_cost(1*100 + 1*400 - 150); 7632 format %{ "IMUL EDX:EAX,$src1\n\t" 7633 "SAR EDX,$cnt-32" %} 7634 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7635 ins_pipe( pipe_slow ); 7636 %} 7637 7638 // Multiply Memory 32-bit Immediate 7639 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7640 match(Set dst (MulI (LoadI src) imm)); 7641 effect(KILL cr); 7642 7643 ins_cost(300); 7644 format %{ "IMUL $dst,$src,$imm" %} 7645 opcode(0x69); /* 69 /r id */ 7646 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7647 ins_pipe( ialu_reg_mem_alu0 ); 7648 %} 7649 7650 // Multiply Memory 7651 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7652 match(Set dst (MulI dst (LoadI src))); 7653 effect(KILL cr); 7654 7655 ins_cost(350); 7656 format %{ "IMUL $dst,$src" %} 7657 opcode(0xAF, 0x0F); 7658 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7659 ins_pipe( ialu_reg_mem_alu0 ); 7660 %} 7661 7662 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7663 %{ 7664 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7665 effect(KILL cr, KILL src2); 7666 7667 expand %{ mulI_eReg(dst, src1, cr); 7668 mulI_eReg(src2, src3, cr); 7669 addI_eReg(dst, src2, cr); %} 7670 %} 7671 7672 // Multiply Register Int to Long 7673 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7674 // Basic Idea: long = (long)int * (long)int 7675 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7676 effect(DEF dst, USE src, USE src1, KILL flags); 7677 7678 ins_cost(300); 7679 format %{ "IMUL $dst,$src1" %} 7680 7681 ins_encode( long_int_multiply( dst, src1 ) ); 7682 ins_pipe( ialu_reg_reg_alu0 ); 7683 %} 7684 7685 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7686 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7687 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7688 effect(KILL flags); 7689 7690 ins_cost(300); 7691 format %{ "MUL $dst,$src1" %} 7692 7693 ins_encode( long_uint_multiply(dst, src1) ); 7694 ins_pipe( ialu_reg_reg_alu0 ); 7695 %} 7696 7697 // Multiply Register Long 7698 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7699 match(Set dst (MulL dst src)); 7700 effect(KILL cr, TEMP tmp); 7701 ins_cost(4*100+3*400); 7702 // Basic idea: lo(result) = lo(x_lo * y_lo) 7703 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7704 format %{ "MOV $tmp,$src.lo\n\t" 7705 "IMUL $tmp,EDX\n\t" 7706 "MOV EDX,$src.hi\n\t" 7707 "IMUL EDX,EAX\n\t" 7708 "ADD $tmp,EDX\n\t" 7709 "MUL EDX:EAX,$src.lo\n\t" 7710 "ADD EDX,$tmp" %} 7711 ins_encode( long_multiply( dst, src, tmp ) ); 7712 ins_pipe( pipe_slow ); 7713 %} 7714 7715 // Multiply Register Long where the left operand's high 32 bits are zero 7716 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7717 predicate(is_operand_hi32_zero(n->in(1))); 7718 match(Set dst (MulL dst src)); 7719 effect(KILL cr, TEMP tmp); 7720 ins_cost(2*100+2*400); 7721 // Basic idea: lo(result) = lo(x_lo * y_lo) 7722 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7723 format %{ "MOV $tmp,$src.hi\n\t" 7724 "IMUL $tmp,EAX\n\t" 7725 "MUL EDX:EAX,$src.lo\n\t" 7726 "ADD EDX,$tmp" %} 7727 ins_encode %{ 7728 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7729 __ imull($tmp$$Register, rax); 7730 __ mull($src$$Register); 7731 __ addl(rdx, $tmp$$Register); 7732 %} 7733 ins_pipe( pipe_slow ); 7734 %} 7735 7736 // Multiply Register Long where the right operand's high 32 bits are zero 7737 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7738 predicate(is_operand_hi32_zero(n->in(2))); 7739 match(Set dst (MulL dst src)); 7740 effect(KILL cr, TEMP tmp); 7741 ins_cost(2*100+2*400); 7742 // Basic idea: lo(result) = lo(x_lo * y_lo) 7743 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7744 format %{ "MOV $tmp,$src.lo\n\t" 7745 "IMUL $tmp,EDX\n\t" 7746 "MUL EDX:EAX,$src.lo\n\t" 7747 "ADD EDX,$tmp" %} 7748 ins_encode %{ 7749 __ movl($tmp$$Register, $src$$Register); 7750 __ imull($tmp$$Register, rdx); 7751 __ mull($src$$Register); 7752 __ addl(rdx, $tmp$$Register); 7753 %} 7754 ins_pipe( pipe_slow ); 7755 %} 7756 7757 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7758 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7759 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7760 match(Set dst (MulL dst src)); 7761 effect(KILL cr); 7762 ins_cost(1*400); 7763 // Basic idea: lo(result) = lo(x_lo * y_lo) 7764 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7765 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7766 ins_encode %{ 7767 __ mull($src$$Register); 7768 %} 7769 ins_pipe( pipe_slow ); 7770 %} 7771 7772 // Multiply Register Long by small constant 7773 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7774 match(Set dst (MulL dst src)); 7775 effect(KILL cr, TEMP tmp); 7776 ins_cost(2*100+2*400); 7777 size(12); 7778 // Basic idea: lo(result) = lo(src * EAX) 7779 // hi(result) = hi(src * EAX) + lo(src * EDX) 7780 format %{ "IMUL $tmp,EDX,$src\n\t" 7781 "MOV EDX,$src\n\t" 7782 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7783 "ADD EDX,$tmp" %} 7784 ins_encode( long_multiply_con( dst, src, tmp ) ); 7785 ins_pipe( pipe_slow ); 7786 %} 7787 7788 // Integer DIV with Register 7789 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7790 match(Set rax (DivI rax div)); 7791 effect(KILL rdx, KILL cr); 7792 size(26); 7793 ins_cost(30*100+10*100); 7794 format %{ "CMP EAX,0x80000000\n\t" 7795 "JNE,s normal\n\t" 7796 "XOR EDX,EDX\n\t" 7797 "CMP ECX,-1\n\t" 7798 "JE,s done\n" 7799 "normal: CDQ\n\t" 7800 "IDIV $div\n\t" 7801 "done:" %} 7802 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7803 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7804 ins_pipe( ialu_reg_reg_alu0 ); 7805 %} 7806 7807 // Divide Register Long 7808 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7809 match(Set dst (DivL src1 src2)); 7810 effect(CALL); 7811 ins_cost(10000); 7812 format %{ "PUSH $src1.hi\n\t" 7813 "PUSH $src1.lo\n\t" 7814 "PUSH $src2.hi\n\t" 7815 "PUSH $src2.lo\n\t" 7816 "CALL SharedRuntime::ldiv\n\t" 7817 "ADD ESP,16" %} 7818 ins_encode( long_div(src1,src2) ); 7819 ins_pipe( pipe_slow ); 7820 %} 7821 7822 // Integer DIVMOD with Register, both quotient and mod results 7823 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7824 match(DivModI rax div); 7825 effect(KILL cr); 7826 size(26); 7827 ins_cost(30*100+10*100); 7828 format %{ "CMP EAX,0x80000000\n\t" 7829 "JNE,s normal\n\t" 7830 "XOR EDX,EDX\n\t" 7831 "CMP ECX,-1\n\t" 7832 "JE,s done\n" 7833 "normal: CDQ\n\t" 7834 "IDIV $div\n\t" 7835 "done:" %} 7836 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7837 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7838 ins_pipe( pipe_slow ); 7839 %} 7840 7841 // Integer MOD with Register 7842 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7843 match(Set rdx (ModI rax div)); 7844 effect(KILL rax, KILL cr); 7845 7846 size(26); 7847 ins_cost(300); 7848 format %{ "CDQ\n\t" 7849 "IDIV $div" %} 7850 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7851 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7852 ins_pipe( ialu_reg_reg_alu0 ); 7853 %} 7854 7855 // Remainder Register Long 7856 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7857 match(Set dst (ModL src1 src2)); 7858 effect(CALL); 7859 ins_cost(10000); 7860 format %{ "PUSH $src1.hi\n\t" 7861 "PUSH $src1.lo\n\t" 7862 "PUSH $src2.hi\n\t" 7863 "PUSH $src2.lo\n\t" 7864 "CALL SharedRuntime::lrem\n\t" 7865 "ADD ESP,16" %} 7866 ins_encode( long_mod(src1,src2) ); 7867 ins_pipe( pipe_slow ); 7868 %} 7869 7870 // Divide Register Long (no special case since divisor != -1) 7871 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7872 match(Set dst (DivL dst imm)); 7873 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7874 ins_cost(1000); 7875 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7876 "XOR $tmp2,$tmp2\n\t" 7877 "CMP $tmp,EDX\n\t" 7878 "JA,s fast\n\t" 7879 "MOV $tmp2,EAX\n\t" 7880 "MOV EAX,EDX\n\t" 7881 "MOV EDX,0\n\t" 7882 "JLE,s pos\n\t" 7883 "LNEG EAX : $tmp2\n\t" 7884 "DIV $tmp # unsigned division\n\t" 7885 "XCHG EAX,$tmp2\n\t" 7886 "DIV $tmp\n\t" 7887 "LNEG $tmp2 : EAX\n\t" 7888 "JMP,s done\n" 7889 "pos:\n\t" 7890 "DIV $tmp\n\t" 7891 "XCHG EAX,$tmp2\n" 7892 "fast:\n\t" 7893 "DIV $tmp\n" 7894 "done:\n\t" 7895 "MOV EDX,$tmp2\n\t" 7896 "NEG EDX:EAX # if $imm < 0" %} 7897 ins_encode %{ 7898 int con = (int)$imm$$constant; 7899 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7900 int pcon = (con > 0) ? con : -con; 7901 Label Lfast, Lpos, Ldone; 7902 7903 __ movl($tmp$$Register, pcon); 7904 __ xorl($tmp2$$Register,$tmp2$$Register); 7905 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7906 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7907 7908 __ movl($tmp2$$Register, $dst$$Register); // save 7909 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7910 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7911 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7912 7913 // Negative dividend. 7914 // convert value to positive to use unsigned division 7915 __ lneg($dst$$Register, $tmp2$$Register); 7916 __ divl($tmp$$Register); 7917 __ xchgl($dst$$Register, $tmp2$$Register); 7918 __ divl($tmp$$Register); 7919 // revert result back to negative 7920 __ lneg($tmp2$$Register, $dst$$Register); 7921 __ jmpb(Ldone); 7922 7923 __ bind(Lpos); 7924 __ divl($tmp$$Register); // Use unsigned division 7925 __ xchgl($dst$$Register, $tmp2$$Register); 7926 // Fallthrow for final divide, tmp2 has 32 bit hi result 7927 7928 __ bind(Lfast); 7929 // fast path: src is positive 7930 __ divl($tmp$$Register); // Use unsigned division 7931 7932 __ bind(Ldone); 7933 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7934 if (con < 0) { 7935 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7936 } 7937 %} 7938 ins_pipe( pipe_slow ); 7939 %} 7940 7941 // Remainder Register Long (remainder fit into 32 bits) 7942 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7943 match(Set dst (ModL dst imm)); 7944 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7945 ins_cost(1000); 7946 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7947 "CMP $tmp,EDX\n\t" 7948 "JA,s fast\n\t" 7949 "MOV $tmp2,EAX\n\t" 7950 "MOV EAX,EDX\n\t" 7951 "MOV EDX,0\n\t" 7952 "JLE,s pos\n\t" 7953 "LNEG EAX : $tmp2\n\t" 7954 "DIV $tmp # unsigned division\n\t" 7955 "MOV EAX,$tmp2\n\t" 7956 "DIV $tmp\n\t" 7957 "NEG EDX\n\t" 7958 "JMP,s done\n" 7959 "pos:\n\t" 7960 "DIV $tmp\n\t" 7961 "MOV EAX,$tmp2\n" 7962 "fast:\n\t" 7963 "DIV $tmp\n" 7964 "done:\n\t" 7965 "MOV EAX,EDX\n\t" 7966 "SAR EDX,31\n\t" %} 7967 ins_encode %{ 7968 int con = (int)$imm$$constant; 7969 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7970 int pcon = (con > 0) ? con : -con; 7971 Label Lfast, Lpos, Ldone; 7972 7973 __ movl($tmp$$Register, pcon); 7974 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7975 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7976 7977 __ movl($tmp2$$Register, $dst$$Register); // save 7978 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7979 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7980 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7981 7982 // Negative dividend. 7983 // convert value to positive to use unsigned division 7984 __ lneg($dst$$Register, $tmp2$$Register); 7985 __ divl($tmp$$Register); 7986 __ movl($dst$$Register, $tmp2$$Register); 7987 __ divl($tmp$$Register); 7988 // revert remainder back to negative 7989 __ negl(HIGH_FROM_LOW($dst$$Register)); 7990 __ jmpb(Ldone); 7991 7992 __ bind(Lpos); 7993 __ divl($tmp$$Register); 7994 __ movl($dst$$Register, $tmp2$$Register); 7995 7996 __ bind(Lfast); 7997 // fast path: src is positive 7998 __ divl($tmp$$Register); 7999 8000 __ bind(Ldone); 8001 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8002 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8003 8004 %} 8005 ins_pipe( pipe_slow ); 8006 %} 8007 8008 // Integer Shift Instructions 8009 // Shift Left by one 8010 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8011 match(Set dst (LShiftI dst shift)); 8012 effect(KILL cr); 8013 8014 size(2); 8015 format %{ "SHL $dst,$shift" %} 8016 opcode(0xD1, 0x4); /* D1 /4 */ 8017 ins_encode( OpcP, RegOpc( dst ) ); 8018 ins_pipe( ialu_reg ); 8019 %} 8020 8021 // Shift Left by 8-bit immediate 8022 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8023 match(Set dst (LShiftI dst shift)); 8024 effect(KILL cr); 8025 8026 size(3); 8027 format %{ "SHL $dst,$shift" %} 8028 opcode(0xC1, 0x4); /* C1 /4 ib */ 8029 ins_encode( RegOpcImm( dst, shift) ); 8030 ins_pipe( ialu_reg ); 8031 %} 8032 8033 // Shift Left by variable 8034 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8035 match(Set dst (LShiftI dst shift)); 8036 effect(KILL cr); 8037 8038 size(2); 8039 format %{ "SHL $dst,$shift" %} 8040 opcode(0xD3, 0x4); /* D3 /4 */ 8041 ins_encode( OpcP, RegOpc( dst ) ); 8042 ins_pipe( ialu_reg_reg ); 8043 %} 8044 8045 // Arithmetic shift right by one 8046 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8047 match(Set dst (RShiftI dst shift)); 8048 effect(KILL cr); 8049 8050 size(2); 8051 format %{ "SAR $dst,$shift" %} 8052 opcode(0xD1, 0x7); /* D1 /7 */ 8053 ins_encode( OpcP, RegOpc( dst ) ); 8054 ins_pipe( ialu_reg ); 8055 %} 8056 8057 // Arithmetic shift right by one 8058 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8059 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8060 effect(KILL cr); 8061 format %{ "SAR $dst,$shift" %} 8062 opcode(0xD1, 0x7); /* D1 /7 */ 8063 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8064 ins_pipe( ialu_mem_imm ); 8065 %} 8066 8067 // Arithmetic Shift Right by 8-bit immediate 8068 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8069 match(Set dst (RShiftI dst shift)); 8070 effect(KILL cr); 8071 8072 size(3); 8073 format %{ "SAR $dst,$shift" %} 8074 opcode(0xC1, 0x7); /* C1 /7 ib */ 8075 ins_encode( RegOpcImm( dst, shift ) ); 8076 ins_pipe( ialu_mem_imm ); 8077 %} 8078 8079 // Arithmetic Shift Right by 8-bit immediate 8080 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8081 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8082 effect(KILL cr); 8083 8084 format %{ "SAR $dst,$shift" %} 8085 opcode(0xC1, 0x7); /* C1 /7 ib */ 8086 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8087 ins_pipe( ialu_mem_imm ); 8088 %} 8089 8090 // Arithmetic Shift Right by variable 8091 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8092 match(Set dst (RShiftI dst shift)); 8093 effect(KILL cr); 8094 8095 size(2); 8096 format %{ "SAR $dst,$shift" %} 8097 opcode(0xD3, 0x7); /* D3 /7 */ 8098 ins_encode( OpcP, RegOpc( dst ) ); 8099 ins_pipe( ialu_reg_reg ); 8100 %} 8101 8102 // Logical shift right by one 8103 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8104 match(Set dst (URShiftI dst shift)); 8105 effect(KILL cr); 8106 8107 size(2); 8108 format %{ "SHR $dst,$shift" %} 8109 opcode(0xD1, 0x5); /* D1 /5 */ 8110 ins_encode( OpcP, RegOpc( dst ) ); 8111 ins_pipe( ialu_reg ); 8112 %} 8113 8114 // Logical Shift Right by 8-bit immediate 8115 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8116 match(Set dst (URShiftI dst shift)); 8117 effect(KILL cr); 8118 8119 size(3); 8120 format %{ "SHR $dst,$shift" %} 8121 opcode(0xC1, 0x5); /* C1 /5 ib */ 8122 ins_encode( RegOpcImm( dst, shift) ); 8123 ins_pipe( ialu_reg ); 8124 %} 8125 8126 8127 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8128 // This idiom is used by the compiler for the i2b bytecode. 8129 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8130 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8131 8132 size(3); 8133 format %{ "MOVSX $dst,$src :8" %} 8134 ins_encode %{ 8135 __ movsbl($dst$$Register, $src$$Register); 8136 %} 8137 ins_pipe(ialu_reg_reg); 8138 %} 8139 8140 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8141 // This idiom is used by the compiler the i2s bytecode. 8142 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8143 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8144 8145 size(3); 8146 format %{ "MOVSX $dst,$src :16" %} 8147 ins_encode %{ 8148 __ movswl($dst$$Register, $src$$Register); 8149 %} 8150 ins_pipe(ialu_reg_reg); 8151 %} 8152 8153 8154 // Logical Shift Right by variable 8155 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8156 match(Set dst (URShiftI dst shift)); 8157 effect(KILL cr); 8158 8159 size(2); 8160 format %{ "SHR $dst,$shift" %} 8161 opcode(0xD3, 0x5); /* D3 /5 */ 8162 ins_encode( OpcP, RegOpc( dst ) ); 8163 ins_pipe( ialu_reg_reg ); 8164 %} 8165 8166 8167 //----------Logical Instructions----------------------------------------------- 8168 //----------Integer Logical Instructions--------------------------------------- 8169 // And Instructions 8170 // And Register with Register 8171 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8172 match(Set dst (AndI dst src)); 8173 effect(KILL cr); 8174 8175 size(2); 8176 format %{ "AND $dst,$src" %} 8177 opcode(0x23); 8178 ins_encode( OpcP, RegReg( dst, src) ); 8179 ins_pipe( ialu_reg_reg ); 8180 %} 8181 8182 // And Register with Immediate 8183 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8184 match(Set dst (AndI dst src)); 8185 effect(KILL cr); 8186 8187 format %{ "AND $dst,$src" %} 8188 opcode(0x81,0x04); /* Opcode 81 /4 */ 8189 // ins_encode( RegImm( dst, src) ); 8190 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8191 ins_pipe( ialu_reg ); 8192 %} 8193 8194 // And Register with Memory 8195 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8196 match(Set dst (AndI dst (LoadI src))); 8197 effect(KILL cr); 8198 8199 ins_cost(150); 8200 format %{ "AND $dst,$src" %} 8201 opcode(0x23); 8202 ins_encode( OpcP, RegMem( dst, src) ); 8203 ins_pipe( ialu_reg_mem ); 8204 %} 8205 8206 // And Memory with Register 8207 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8208 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8209 effect(KILL cr); 8210 8211 ins_cost(150); 8212 format %{ "AND $dst,$src" %} 8213 opcode(0x21); /* Opcode 21 /r */ 8214 ins_encode( OpcP, RegMem( src, dst ) ); 8215 ins_pipe( ialu_mem_reg ); 8216 %} 8217 8218 // And Memory with Immediate 8219 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8220 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8221 effect(KILL cr); 8222 8223 ins_cost(125); 8224 format %{ "AND $dst,$src" %} 8225 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8226 // ins_encode( MemImm( dst, src) ); 8227 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8228 ins_pipe( ialu_mem_imm ); 8229 %} 8230 8231 // BMI1 instructions 8232 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8233 match(Set dst (AndI (XorI src1 minus_1) src2)); 8234 predicate(UseBMI1Instructions); 8235 effect(KILL cr); 8236 8237 format %{ "ANDNL $dst, $src1, $src2" %} 8238 8239 ins_encode %{ 8240 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8241 %} 8242 ins_pipe(ialu_reg); 8243 %} 8244 8245 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8246 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8247 predicate(UseBMI1Instructions); 8248 effect(KILL cr); 8249 8250 ins_cost(125); 8251 format %{ "ANDNL $dst, $src1, $src2" %} 8252 8253 ins_encode %{ 8254 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8255 %} 8256 ins_pipe(ialu_reg_mem); 8257 %} 8258 8259 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8260 match(Set dst (AndI (SubI imm_zero src) src)); 8261 predicate(UseBMI1Instructions); 8262 effect(KILL cr); 8263 8264 format %{ "BLSIL $dst, $src" %} 8265 8266 ins_encode %{ 8267 __ blsil($dst$$Register, $src$$Register); 8268 %} 8269 ins_pipe(ialu_reg); 8270 %} 8271 8272 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8273 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8274 predicate(UseBMI1Instructions); 8275 effect(KILL cr); 8276 8277 ins_cost(125); 8278 format %{ "BLSIL $dst, $src" %} 8279 8280 ins_encode %{ 8281 __ blsil($dst$$Register, $src$$Address); 8282 %} 8283 ins_pipe(ialu_reg_mem); 8284 %} 8285 8286 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8287 %{ 8288 match(Set dst (XorI (AddI src minus_1) src)); 8289 predicate(UseBMI1Instructions); 8290 effect(KILL cr); 8291 8292 format %{ "BLSMSKL $dst, $src" %} 8293 8294 ins_encode %{ 8295 __ blsmskl($dst$$Register, $src$$Register); 8296 %} 8297 8298 ins_pipe(ialu_reg); 8299 %} 8300 8301 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8302 %{ 8303 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8304 predicate(UseBMI1Instructions); 8305 effect(KILL cr); 8306 8307 ins_cost(125); 8308 format %{ "BLSMSKL $dst, $src" %} 8309 8310 ins_encode %{ 8311 __ blsmskl($dst$$Register, $src$$Address); 8312 %} 8313 8314 ins_pipe(ialu_reg_mem); 8315 %} 8316 8317 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8318 %{ 8319 match(Set dst (AndI (AddI src minus_1) src) ); 8320 predicate(UseBMI1Instructions); 8321 effect(KILL cr); 8322 8323 format %{ "BLSRL $dst, $src" %} 8324 8325 ins_encode %{ 8326 __ blsrl($dst$$Register, $src$$Register); 8327 %} 8328 8329 ins_pipe(ialu_reg); 8330 %} 8331 8332 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8333 %{ 8334 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8335 predicate(UseBMI1Instructions); 8336 effect(KILL cr); 8337 8338 ins_cost(125); 8339 format %{ "BLSRL $dst, $src" %} 8340 8341 ins_encode %{ 8342 __ blsrl($dst$$Register, $src$$Address); 8343 %} 8344 8345 ins_pipe(ialu_reg_mem); 8346 %} 8347 8348 // Or Instructions 8349 // Or Register with Register 8350 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8351 match(Set dst (OrI dst src)); 8352 effect(KILL cr); 8353 8354 size(2); 8355 format %{ "OR $dst,$src" %} 8356 opcode(0x0B); 8357 ins_encode( OpcP, RegReg( dst, src) ); 8358 ins_pipe( ialu_reg_reg ); 8359 %} 8360 8361 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8362 match(Set dst (OrI dst (CastP2X src))); 8363 effect(KILL cr); 8364 8365 size(2); 8366 format %{ "OR $dst,$src" %} 8367 opcode(0x0B); 8368 ins_encode( OpcP, RegReg( dst, src) ); 8369 ins_pipe( ialu_reg_reg ); 8370 %} 8371 8372 8373 // Or Register with Immediate 8374 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8375 match(Set dst (OrI dst src)); 8376 effect(KILL cr); 8377 8378 format %{ "OR $dst,$src" %} 8379 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8380 // ins_encode( RegImm( dst, src) ); 8381 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8382 ins_pipe( ialu_reg ); 8383 %} 8384 8385 // Or Register with Memory 8386 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8387 match(Set dst (OrI dst (LoadI src))); 8388 effect(KILL cr); 8389 8390 ins_cost(150); 8391 format %{ "OR $dst,$src" %} 8392 opcode(0x0B); 8393 ins_encode( OpcP, RegMem( dst, src) ); 8394 ins_pipe( ialu_reg_mem ); 8395 %} 8396 8397 // Or Memory with Register 8398 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8399 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8400 effect(KILL cr); 8401 8402 ins_cost(150); 8403 format %{ "OR $dst,$src" %} 8404 opcode(0x09); /* Opcode 09 /r */ 8405 ins_encode( OpcP, RegMem( src, dst ) ); 8406 ins_pipe( ialu_mem_reg ); 8407 %} 8408 8409 // Or Memory with Immediate 8410 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8411 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8412 effect(KILL cr); 8413 8414 ins_cost(125); 8415 format %{ "OR $dst,$src" %} 8416 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8417 // ins_encode( MemImm( dst, src) ); 8418 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8419 ins_pipe( ialu_mem_imm ); 8420 %} 8421 8422 // ROL/ROR 8423 // ROL expand 8424 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8425 effect(USE_DEF dst, USE shift, KILL cr); 8426 8427 format %{ "ROL $dst, $shift" %} 8428 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8429 ins_encode( OpcP, RegOpc( dst )); 8430 ins_pipe( ialu_reg ); 8431 %} 8432 8433 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8434 effect(USE_DEF dst, USE shift, KILL cr); 8435 8436 format %{ "ROL $dst, $shift" %} 8437 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8438 ins_encode( RegOpcImm(dst, shift) ); 8439 ins_pipe(ialu_reg); 8440 %} 8441 8442 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8443 effect(USE_DEF dst, USE shift, KILL cr); 8444 8445 format %{ "ROL $dst, $shift" %} 8446 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8447 ins_encode(OpcP, RegOpc(dst)); 8448 ins_pipe( ialu_reg_reg ); 8449 %} 8450 // end of ROL expand 8451 8452 // ROL 32bit by one once 8453 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8454 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8455 8456 expand %{ 8457 rolI_eReg_imm1(dst, lshift, cr); 8458 %} 8459 %} 8460 8461 // ROL 32bit var by imm8 once 8462 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8463 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8464 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8465 8466 expand %{ 8467 rolI_eReg_imm8(dst, lshift, cr); 8468 %} 8469 %} 8470 8471 // ROL 32bit var by var once 8472 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8473 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8474 8475 expand %{ 8476 rolI_eReg_CL(dst, shift, cr); 8477 %} 8478 %} 8479 8480 // ROL 32bit var by var once 8481 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8482 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8483 8484 expand %{ 8485 rolI_eReg_CL(dst, shift, cr); 8486 %} 8487 %} 8488 8489 // ROR expand 8490 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8491 effect(USE_DEF dst, USE shift, KILL cr); 8492 8493 format %{ "ROR $dst, $shift" %} 8494 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8495 ins_encode( OpcP, RegOpc( dst ) ); 8496 ins_pipe( ialu_reg ); 8497 %} 8498 8499 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8500 effect (USE_DEF dst, USE shift, KILL cr); 8501 8502 format %{ "ROR $dst, $shift" %} 8503 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8504 ins_encode( RegOpcImm(dst, shift) ); 8505 ins_pipe( ialu_reg ); 8506 %} 8507 8508 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8509 effect(USE_DEF dst, USE shift, KILL cr); 8510 8511 format %{ "ROR $dst, $shift" %} 8512 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8513 ins_encode(OpcP, RegOpc(dst)); 8514 ins_pipe( ialu_reg_reg ); 8515 %} 8516 // end of ROR expand 8517 8518 // ROR right once 8519 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8520 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8521 8522 expand %{ 8523 rorI_eReg_imm1(dst, rshift, cr); 8524 %} 8525 %} 8526 8527 // ROR 32bit by immI8 once 8528 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8529 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8530 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8531 8532 expand %{ 8533 rorI_eReg_imm8(dst, rshift, cr); 8534 %} 8535 %} 8536 8537 // ROR 32bit var by var once 8538 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8539 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8540 8541 expand %{ 8542 rorI_eReg_CL(dst, shift, cr); 8543 %} 8544 %} 8545 8546 // ROR 32bit var by var once 8547 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8548 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8549 8550 expand %{ 8551 rorI_eReg_CL(dst, shift, cr); 8552 %} 8553 %} 8554 8555 // Xor Instructions 8556 // Xor Register with Register 8557 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8558 match(Set dst (XorI dst src)); 8559 effect(KILL cr); 8560 8561 size(2); 8562 format %{ "XOR $dst,$src" %} 8563 opcode(0x33); 8564 ins_encode( OpcP, RegReg( dst, src) ); 8565 ins_pipe( ialu_reg_reg ); 8566 %} 8567 8568 // Xor Register with Immediate -1 8569 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8570 match(Set dst (XorI dst imm)); 8571 8572 size(2); 8573 format %{ "NOT $dst" %} 8574 ins_encode %{ 8575 __ notl($dst$$Register); 8576 %} 8577 ins_pipe( ialu_reg ); 8578 %} 8579 8580 // Xor Register with Immediate 8581 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8582 match(Set dst (XorI dst src)); 8583 effect(KILL cr); 8584 8585 format %{ "XOR $dst,$src" %} 8586 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8587 // ins_encode( RegImm( dst, src) ); 8588 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8589 ins_pipe( ialu_reg ); 8590 %} 8591 8592 // Xor Register with Memory 8593 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8594 match(Set dst (XorI dst (LoadI src))); 8595 effect(KILL cr); 8596 8597 ins_cost(150); 8598 format %{ "XOR $dst,$src" %} 8599 opcode(0x33); 8600 ins_encode( OpcP, RegMem(dst, src) ); 8601 ins_pipe( ialu_reg_mem ); 8602 %} 8603 8604 // Xor Memory with Register 8605 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8606 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8607 effect(KILL cr); 8608 8609 ins_cost(150); 8610 format %{ "XOR $dst,$src" %} 8611 opcode(0x31); /* Opcode 31 /r */ 8612 ins_encode( OpcP, RegMem( src, dst ) ); 8613 ins_pipe( ialu_mem_reg ); 8614 %} 8615 8616 // Xor Memory with Immediate 8617 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8618 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8619 effect(KILL cr); 8620 8621 ins_cost(125); 8622 format %{ "XOR $dst,$src" %} 8623 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8624 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8625 ins_pipe( ialu_mem_imm ); 8626 %} 8627 8628 //----------Convert Int to Boolean--------------------------------------------- 8629 8630 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8631 effect( DEF dst, USE src ); 8632 format %{ "MOV $dst,$src" %} 8633 ins_encode( enc_Copy( dst, src) ); 8634 ins_pipe( ialu_reg_reg ); 8635 %} 8636 8637 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8638 effect( USE_DEF dst, USE src, KILL cr ); 8639 8640 size(4); 8641 format %{ "NEG $dst\n\t" 8642 "ADC $dst,$src" %} 8643 ins_encode( neg_reg(dst), 8644 OpcRegReg(0x13,dst,src) ); 8645 ins_pipe( ialu_reg_reg_long ); 8646 %} 8647 8648 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8649 match(Set dst (Conv2B src)); 8650 8651 expand %{ 8652 movI_nocopy(dst,src); 8653 ci2b(dst,src,cr); 8654 %} 8655 %} 8656 8657 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8658 effect( DEF dst, USE src ); 8659 format %{ "MOV $dst,$src" %} 8660 ins_encode( enc_Copy( dst, src) ); 8661 ins_pipe( ialu_reg_reg ); 8662 %} 8663 8664 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8665 effect( USE_DEF dst, USE src, KILL cr ); 8666 format %{ "NEG $dst\n\t" 8667 "ADC $dst,$src" %} 8668 ins_encode( neg_reg(dst), 8669 OpcRegReg(0x13,dst,src) ); 8670 ins_pipe( ialu_reg_reg_long ); 8671 %} 8672 8673 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8674 match(Set dst (Conv2B src)); 8675 8676 expand %{ 8677 movP_nocopy(dst,src); 8678 cp2b(dst,src,cr); 8679 %} 8680 %} 8681 8682 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8683 match(Set dst (CmpLTMask p q)); 8684 effect(KILL cr); 8685 ins_cost(400); 8686 8687 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8688 format %{ "XOR $dst,$dst\n\t" 8689 "CMP $p,$q\n\t" 8690 "SETlt $dst\n\t" 8691 "NEG $dst" %} 8692 ins_encode %{ 8693 Register Rp = $p$$Register; 8694 Register Rq = $q$$Register; 8695 Register Rd = $dst$$Register; 8696 Label done; 8697 __ xorl(Rd, Rd); 8698 __ cmpl(Rp, Rq); 8699 __ setb(Assembler::less, Rd); 8700 __ negl(Rd); 8701 %} 8702 8703 ins_pipe(pipe_slow); 8704 %} 8705 8706 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8707 match(Set dst (CmpLTMask dst zero)); 8708 effect(DEF dst, KILL cr); 8709 ins_cost(100); 8710 8711 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8712 ins_encode %{ 8713 __ sarl($dst$$Register, 31); 8714 %} 8715 ins_pipe(ialu_reg); 8716 %} 8717 8718 /* better to save a register than avoid a branch */ 8719 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8720 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8721 effect(KILL cr); 8722 ins_cost(400); 8723 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8724 "JGE done\n\t" 8725 "ADD $p,$y\n" 8726 "done: " %} 8727 ins_encode %{ 8728 Register Rp = $p$$Register; 8729 Register Rq = $q$$Register; 8730 Register Ry = $y$$Register; 8731 Label done; 8732 __ subl(Rp, Rq); 8733 __ jccb(Assembler::greaterEqual, done); 8734 __ addl(Rp, Ry); 8735 __ bind(done); 8736 %} 8737 8738 ins_pipe(pipe_cmplt); 8739 %} 8740 8741 /* better to save a register than avoid a branch */ 8742 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8743 match(Set y (AndI (CmpLTMask p q) y)); 8744 effect(KILL cr); 8745 8746 ins_cost(300); 8747 8748 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8749 "JLT done\n\t" 8750 "XORL $y, $y\n" 8751 "done: " %} 8752 ins_encode %{ 8753 Register Rp = $p$$Register; 8754 Register Rq = $q$$Register; 8755 Register Ry = $y$$Register; 8756 Label done; 8757 __ cmpl(Rp, Rq); 8758 __ jccb(Assembler::less, done); 8759 __ xorl(Ry, Ry); 8760 __ bind(done); 8761 %} 8762 8763 ins_pipe(pipe_cmplt); 8764 %} 8765 8766 /* If I enable this, I encourage spilling in the inner loop of compress. 8767 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8768 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8769 */ 8770 //----------Overflow Math Instructions----------------------------------------- 8771 8772 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8773 %{ 8774 match(Set cr (OverflowAddI op1 op2)); 8775 effect(DEF cr, USE_KILL op1, USE op2); 8776 8777 format %{ "ADD $op1, $op2\t# overflow check int" %} 8778 8779 ins_encode %{ 8780 __ addl($op1$$Register, $op2$$Register); 8781 %} 8782 ins_pipe(ialu_reg_reg); 8783 %} 8784 8785 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8786 %{ 8787 match(Set cr (OverflowAddI op1 op2)); 8788 effect(DEF cr, USE_KILL op1, USE op2); 8789 8790 format %{ "ADD $op1, $op2\t# overflow check int" %} 8791 8792 ins_encode %{ 8793 __ addl($op1$$Register, $op2$$constant); 8794 %} 8795 ins_pipe(ialu_reg_reg); 8796 %} 8797 8798 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8799 %{ 8800 match(Set cr (OverflowSubI op1 op2)); 8801 8802 format %{ "CMP $op1, $op2\t# overflow check int" %} 8803 ins_encode %{ 8804 __ cmpl($op1$$Register, $op2$$Register); 8805 %} 8806 ins_pipe(ialu_reg_reg); 8807 %} 8808 8809 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8810 %{ 8811 match(Set cr (OverflowSubI op1 op2)); 8812 8813 format %{ "CMP $op1, $op2\t# overflow check int" %} 8814 ins_encode %{ 8815 __ cmpl($op1$$Register, $op2$$constant); 8816 %} 8817 ins_pipe(ialu_reg_reg); 8818 %} 8819 8820 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8821 %{ 8822 match(Set cr (OverflowSubI zero op2)); 8823 effect(DEF cr, USE_KILL op2); 8824 8825 format %{ "NEG $op2\t# overflow check int" %} 8826 ins_encode %{ 8827 __ negl($op2$$Register); 8828 %} 8829 ins_pipe(ialu_reg_reg); 8830 %} 8831 8832 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8833 %{ 8834 match(Set cr (OverflowMulI op1 op2)); 8835 effect(DEF cr, USE_KILL op1, USE op2); 8836 8837 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8838 ins_encode %{ 8839 __ imull($op1$$Register, $op2$$Register); 8840 %} 8841 ins_pipe(ialu_reg_reg_alu0); 8842 %} 8843 8844 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8845 %{ 8846 match(Set cr (OverflowMulI op1 op2)); 8847 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8848 8849 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8850 ins_encode %{ 8851 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8852 %} 8853 ins_pipe(ialu_reg_reg_alu0); 8854 %} 8855 8856 // Integer Absolute Instructions 8857 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8858 %{ 8859 match(Set dst (AbsI src)); 8860 effect(TEMP dst, TEMP tmp, KILL cr); 8861 format %{ "movl $tmp, $src\n\t" 8862 "sarl $tmp, 31\n\t" 8863 "movl $dst, $src\n\t" 8864 "xorl $dst, $tmp\n\t" 8865 "subl $dst, $tmp\n" 8866 %} 8867 ins_encode %{ 8868 __ movl($tmp$$Register, $src$$Register); 8869 __ sarl($tmp$$Register, 31); 8870 __ movl($dst$$Register, $src$$Register); 8871 __ xorl($dst$$Register, $tmp$$Register); 8872 __ subl($dst$$Register, $tmp$$Register); 8873 %} 8874 8875 ins_pipe(ialu_reg_reg); 8876 %} 8877 8878 //----------Long Instructions------------------------------------------------ 8879 // Add Long Register with Register 8880 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8881 match(Set dst (AddL dst src)); 8882 effect(KILL cr); 8883 ins_cost(200); 8884 format %{ "ADD $dst.lo,$src.lo\n\t" 8885 "ADC $dst.hi,$src.hi" %} 8886 opcode(0x03, 0x13); 8887 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8888 ins_pipe( ialu_reg_reg_long ); 8889 %} 8890 8891 // Add Long Register with Immediate 8892 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8893 match(Set dst (AddL dst src)); 8894 effect(KILL cr); 8895 format %{ "ADD $dst.lo,$src.lo\n\t" 8896 "ADC $dst.hi,$src.hi" %} 8897 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8898 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8899 ins_pipe( ialu_reg_long ); 8900 %} 8901 8902 // Add Long Register with Memory 8903 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8904 match(Set dst (AddL dst (LoadL mem))); 8905 effect(KILL cr); 8906 ins_cost(125); 8907 format %{ "ADD $dst.lo,$mem\n\t" 8908 "ADC $dst.hi,$mem+4" %} 8909 opcode(0x03, 0x13); 8910 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8911 ins_pipe( ialu_reg_long_mem ); 8912 %} 8913 8914 // Subtract Long Register with Register. 8915 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8916 match(Set dst (SubL dst src)); 8917 effect(KILL cr); 8918 ins_cost(200); 8919 format %{ "SUB $dst.lo,$src.lo\n\t" 8920 "SBB $dst.hi,$src.hi" %} 8921 opcode(0x2B, 0x1B); 8922 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8923 ins_pipe( ialu_reg_reg_long ); 8924 %} 8925 8926 // Subtract Long Register with Immediate 8927 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8928 match(Set dst (SubL dst src)); 8929 effect(KILL cr); 8930 format %{ "SUB $dst.lo,$src.lo\n\t" 8931 "SBB $dst.hi,$src.hi" %} 8932 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8933 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8934 ins_pipe( ialu_reg_long ); 8935 %} 8936 8937 // Subtract Long Register with Memory 8938 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8939 match(Set dst (SubL dst (LoadL mem))); 8940 effect(KILL cr); 8941 ins_cost(125); 8942 format %{ "SUB $dst.lo,$mem\n\t" 8943 "SBB $dst.hi,$mem+4" %} 8944 opcode(0x2B, 0x1B); 8945 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8946 ins_pipe( ialu_reg_long_mem ); 8947 %} 8948 8949 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8950 match(Set dst (SubL zero dst)); 8951 effect(KILL cr); 8952 ins_cost(300); 8953 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8954 ins_encode( neg_long(dst) ); 8955 ins_pipe( ialu_reg_reg_long ); 8956 %} 8957 8958 // And Long Register with Register 8959 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8960 match(Set dst (AndL dst src)); 8961 effect(KILL cr); 8962 format %{ "AND $dst.lo,$src.lo\n\t" 8963 "AND $dst.hi,$src.hi" %} 8964 opcode(0x23,0x23); 8965 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8966 ins_pipe( ialu_reg_reg_long ); 8967 %} 8968 8969 // And Long Register with Immediate 8970 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8971 match(Set dst (AndL dst src)); 8972 effect(KILL cr); 8973 format %{ "AND $dst.lo,$src.lo\n\t" 8974 "AND $dst.hi,$src.hi" %} 8975 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8976 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8977 ins_pipe( ialu_reg_long ); 8978 %} 8979 8980 // And Long Register with Memory 8981 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8982 match(Set dst (AndL dst (LoadL mem))); 8983 effect(KILL cr); 8984 ins_cost(125); 8985 format %{ "AND $dst.lo,$mem\n\t" 8986 "AND $dst.hi,$mem+4" %} 8987 opcode(0x23, 0x23); 8988 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8989 ins_pipe( ialu_reg_long_mem ); 8990 %} 8991 8992 // BMI1 instructions 8993 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8994 match(Set dst (AndL (XorL src1 minus_1) src2)); 8995 predicate(UseBMI1Instructions); 8996 effect(KILL cr, TEMP dst); 8997 8998 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8999 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9000 %} 9001 9002 ins_encode %{ 9003 Register Rdst = $dst$$Register; 9004 Register Rsrc1 = $src1$$Register; 9005 Register Rsrc2 = $src2$$Register; 9006 __ andnl(Rdst, Rsrc1, Rsrc2); 9007 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9008 %} 9009 ins_pipe(ialu_reg_reg_long); 9010 %} 9011 9012 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9013 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9014 predicate(UseBMI1Instructions); 9015 effect(KILL cr, TEMP dst); 9016 9017 ins_cost(125); 9018 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9019 "ANDNL $dst.hi, $src1.hi, $src2+4" 9020 %} 9021 9022 ins_encode %{ 9023 Register Rdst = $dst$$Register; 9024 Register Rsrc1 = $src1$$Register; 9025 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9026 9027 __ andnl(Rdst, Rsrc1, $src2$$Address); 9028 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9029 %} 9030 ins_pipe(ialu_reg_mem); 9031 %} 9032 9033 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9034 match(Set dst (AndL (SubL imm_zero src) src)); 9035 predicate(UseBMI1Instructions); 9036 effect(KILL cr, TEMP dst); 9037 9038 format %{ "MOVL $dst.hi, 0\n\t" 9039 "BLSIL $dst.lo, $src.lo\n\t" 9040 "JNZ done\n\t" 9041 "BLSIL $dst.hi, $src.hi\n" 9042 "done:" 9043 %} 9044 9045 ins_encode %{ 9046 Label done; 9047 Register Rdst = $dst$$Register; 9048 Register Rsrc = $src$$Register; 9049 __ movl(HIGH_FROM_LOW(Rdst), 0); 9050 __ blsil(Rdst, Rsrc); 9051 __ jccb(Assembler::notZero, done); 9052 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9053 __ bind(done); 9054 %} 9055 ins_pipe(ialu_reg); 9056 %} 9057 9058 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9059 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9060 predicate(UseBMI1Instructions); 9061 effect(KILL cr, TEMP dst); 9062 9063 ins_cost(125); 9064 format %{ "MOVL $dst.hi, 0\n\t" 9065 "BLSIL $dst.lo, $src\n\t" 9066 "JNZ done\n\t" 9067 "BLSIL $dst.hi, $src+4\n" 9068 "done:" 9069 %} 9070 9071 ins_encode %{ 9072 Label done; 9073 Register Rdst = $dst$$Register; 9074 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9075 9076 __ movl(HIGH_FROM_LOW(Rdst), 0); 9077 __ blsil(Rdst, $src$$Address); 9078 __ jccb(Assembler::notZero, done); 9079 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9080 __ bind(done); 9081 %} 9082 ins_pipe(ialu_reg_mem); 9083 %} 9084 9085 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9086 %{ 9087 match(Set dst (XorL (AddL src minus_1) src)); 9088 predicate(UseBMI1Instructions); 9089 effect(KILL cr, TEMP dst); 9090 9091 format %{ "MOVL $dst.hi, 0\n\t" 9092 "BLSMSKL $dst.lo, $src.lo\n\t" 9093 "JNC done\n\t" 9094 "BLSMSKL $dst.hi, $src.hi\n" 9095 "done:" 9096 %} 9097 9098 ins_encode %{ 9099 Label done; 9100 Register Rdst = $dst$$Register; 9101 Register Rsrc = $src$$Register; 9102 __ movl(HIGH_FROM_LOW(Rdst), 0); 9103 __ blsmskl(Rdst, Rsrc); 9104 __ jccb(Assembler::carryClear, done); 9105 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9106 __ bind(done); 9107 %} 9108 9109 ins_pipe(ialu_reg); 9110 %} 9111 9112 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9113 %{ 9114 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9115 predicate(UseBMI1Instructions); 9116 effect(KILL cr, TEMP dst); 9117 9118 ins_cost(125); 9119 format %{ "MOVL $dst.hi, 0\n\t" 9120 "BLSMSKL $dst.lo, $src\n\t" 9121 "JNC done\n\t" 9122 "BLSMSKL $dst.hi, $src+4\n" 9123 "done:" 9124 %} 9125 9126 ins_encode %{ 9127 Label done; 9128 Register Rdst = $dst$$Register; 9129 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9130 9131 __ movl(HIGH_FROM_LOW(Rdst), 0); 9132 __ blsmskl(Rdst, $src$$Address); 9133 __ jccb(Assembler::carryClear, done); 9134 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9135 __ bind(done); 9136 %} 9137 9138 ins_pipe(ialu_reg_mem); 9139 %} 9140 9141 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9142 %{ 9143 match(Set dst (AndL (AddL src minus_1) src) ); 9144 predicate(UseBMI1Instructions); 9145 effect(KILL cr, TEMP dst); 9146 9147 format %{ "MOVL $dst.hi, $src.hi\n\t" 9148 "BLSRL $dst.lo, $src.lo\n\t" 9149 "JNC done\n\t" 9150 "BLSRL $dst.hi, $src.hi\n" 9151 "done:" 9152 %} 9153 9154 ins_encode %{ 9155 Label done; 9156 Register Rdst = $dst$$Register; 9157 Register Rsrc = $src$$Register; 9158 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9159 __ blsrl(Rdst, Rsrc); 9160 __ jccb(Assembler::carryClear, done); 9161 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9162 __ bind(done); 9163 %} 9164 9165 ins_pipe(ialu_reg); 9166 %} 9167 9168 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9169 %{ 9170 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9171 predicate(UseBMI1Instructions); 9172 effect(KILL cr, TEMP dst); 9173 9174 ins_cost(125); 9175 format %{ "MOVL $dst.hi, $src+4\n\t" 9176 "BLSRL $dst.lo, $src\n\t" 9177 "JNC done\n\t" 9178 "BLSRL $dst.hi, $src+4\n" 9179 "done:" 9180 %} 9181 9182 ins_encode %{ 9183 Label done; 9184 Register Rdst = $dst$$Register; 9185 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9186 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9187 __ blsrl(Rdst, $src$$Address); 9188 __ jccb(Assembler::carryClear, done); 9189 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9190 __ bind(done); 9191 %} 9192 9193 ins_pipe(ialu_reg_mem); 9194 %} 9195 9196 // Or Long Register with Register 9197 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9198 match(Set dst (OrL dst src)); 9199 effect(KILL cr); 9200 format %{ "OR $dst.lo,$src.lo\n\t" 9201 "OR $dst.hi,$src.hi" %} 9202 opcode(0x0B,0x0B); 9203 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9204 ins_pipe( ialu_reg_reg_long ); 9205 %} 9206 9207 // Or Long Register with Immediate 9208 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9209 match(Set dst (OrL dst src)); 9210 effect(KILL cr); 9211 format %{ "OR $dst.lo,$src.lo\n\t" 9212 "OR $dst.hi,$src.hi" %} 9213 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9214 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9215 ins_pipe( ialu_reg_long ); 9216 %} 9217 9218 // Or Long Register with Memory 9219 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9220 match(Set dst (OrL dst (LoadL mem))); 9221 effect(KILL cr); 9222 ins_cost(125); 9223 format %{ "OR $dst.lo,$mem\n\t" 9224 "OR $dst.hi,$mem+4" %} 9225 opcode(0x0B,0x0B); 9226 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9227 ins_pipe( ialu_reg_long_mem ); 9228 %} 9229 9230 // Xor Long Register with Register 9231 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9232 match(Set dst (XorL dst src)); 9233 effect(KILL cr); 9234 format %{ "XOR $dst.lo,$src.lo\n\t" 9235 "XOR $dst.hi,$src.hi" %} 9236 opcode(0x33,0x33); 9237 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9238 ins_pipe( ialu_reg_reg_long ); 9239 %} 9240 9241 // Xor Long Register with Immediate -1 9242 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9243 match(Set dst (XorL dst imm)); 9244 format %{ "NOT $dst.lo\n\t" 9245 "NOT $dst.hi" %} 9246 ins_encode %{ 9247 __ notl($dst$$Register); 9248 __ notl(HIGH_FROM_LOW($dst$$Register)); 9249 %} 9250 ins_pipe( ialu_reg_long ); 9251 %} 9252 9253 // Xor Long Register with Immediate 9254 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9255 match(Set dst (XorL dst src)); 9256 effect(KILL cr); 9257 format %{ "XOR $dst.lo,$src.lo\n\t" 9258 "XOR $dst.hi,$src.hi" %} 9259 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9260 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9261 ins_pipe( ialu_reg_long ); 9262 %} 9263 9264 // Xor Long Register with Memory 9265 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9266 match(Set dst (XorL dst (LoadL mem))); 9267 effect(KILL cr); 9268 ins_cost(125); 9269 format %{ "XOR $dst.lo,$mem\n\t" 9270 "XOR $dst.hi,$mem+4" %} 9271 opcode(0x33,0x33); 9272 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9273 ins_pipe( ialu_reg_long_mem ); 9274 %} 9275 9276 // Shift Left Long by 1 9277 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9278 predicate(UseNewLongLShift); 9279 match(Set dst (LShiftL dst cnt)); 9280 effect(KILL cr); 9281 ins_cost(100); 9282 format %{ "ADD $dst.lo,$dst.lo\n\t" 9283 "ADC $dst.hi,$dst.hi" %} 9284 ins_encode %{ 9285 __ addl($dst$$Register,$dst$$Register); 9286 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9287 %} 9288 ins_pipe( ialu_reg_long ); 9289 %} 9290 9291 // Shift Left Long by 2 9292 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9293 predicate(UseNewLongLShift); 9294 match(Set dst (LShiftL dst cnt)); 9295 effect(KILL cr); 9296 ins_cost(100); 9297 format %{ "ADD $dst.lo,$dst.lo\n\t" 9298 "ADC $dst.hi,$dst.hi\n\t" 9299 "ADD $dst.lo,$dst.lo\n\t" 9300 "ADC $dst.hi,$dst.hi" %} 9301 ins_encode %{ 9302 __ addl($dst$$Register,$dst$$Register); 9303 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9304 __ addl($dst$$Register,$dst$$Register); 9305 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9306 %} 9307 ins_pipe( ialu_reg_long ); 9308 %} 9309 9310 // Shift Left Long by 3 9311 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9312 predicate(UseNewLongLShift); 9313 match(Set dst (LShiftL dst cnt)); 9314 effect(KILL cr); 9315 ins_cost(100); 9316 format %{ "ADD $dst.lo,$dst.lo\n\t" 9317 "ADC $dst.hi,$dst.hi\n\t" 9318 "ADD $dst.lo,$dst.lo\n\t" 9319 "ADC $dst.hi,$dst.hi\n\t" 9320 "ADD $dst.lo,$dst.lo\n\t" 9321 "ADC $dst.hi,$dst.hi" %} 9322 ins_encode %{ 9323 __ addl($dst$$Register,$dst$$Register); 9324 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9325 __ addl($dst$$Register,$dst$$Register); 9326 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9327 __ addl($dst$$Register,$dst$$Register); 9328 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9329 %} 9330 ins_pipe( ialu_reg_long ); 9331 %} 9332 9333 // Shift Left Long by 1-31 9334 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9335 match(Set dst (LShiftL dst cnt)); 9336 effect(KILL cr); 9337 ins_cost(200); 9338 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9339 "SHL $dst.lo,$cnt" %} 9340 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9341 ins_encode( move_long_small_shift(dst,cnt) ); 9342 ins_pipe( ialu_reg_long ); 9343 %} 9344 9345 // Shift Left Long by 32-63 9346 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9347 match(Set dst (LShiftL dst cnt)); 9348 effect(KILL cr); 9349 ins_cost(300); 9350 format %{ "MOV $dst.hi,$dst.lo\n" 9351 "\tSHL $dst.hi,$cnt-32\n" 9352 "\tXOR $dst.lo,$dst.lo" %} 9353 opcode(0xC1, 0x4); /* C1 /4 ib */ 9354 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9355 ins_pipe( ialu_reg_long ); 9356 %} 9357 9358 // Shift Left Long by variable 9359 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9360 match(Set dst (LShiftL dst shift)); 9361 effect(KILL cr); 9362 ins_cost(500+200); 9363 size(17); 9364 format %{ "TEST $shift,32\n\t" 9365 "JEQ,s small\n\t" 9366 "MOV $dst.hi,$dst.lo\n\t" 9367 "XOR $dst.lo,$dst.lo\n" 9368 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9369 "SHL $dst.lo,$shift" %} 9370 ins_encode( shift_left_long( dst, shift ) ); 9371 ins_pipe( pipe_slow ); 9372 %} 9373 9374 // Shift Right Long by 1-31 9375 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9376 match(Set dst (URShiftL dst cnt)); 9377 effect(KILL cr); 9378 ins_cost(200); 9379 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9380 "SHR $dst.hi,$cnt" %} 9381 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9382 ins_encode( move_long_small_shift(dst,cnt) ); 9383 ins_pipe( ialu_reg_long ); 9384 %} 9385 9386 // Shift Right Long by 32-63 9387 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9388 match(Set dst (URShiftL dst cnt)); 9389 effect(KILL cr); 9390 ins_cost(300); 9391 format %{ "MOV $dst.lo,$dst.hi\n" 9392 "\tSHR $dst.lo,$cnt-32\n" 9393 "\tXOR $dst.hi,$dst.hi" %} 9394 opcode(0xC1, 0x5); /* C1 /5 ib */ 9395 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9396 ins_pipe( ialu_reg_long ); 9397 %} 9398 9399 // Shift Right Long by variable 9400 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9401 match(Set dst (URShiftL dst shift)); 9402 effect(KILL cr); 9403 ins_cost(600); 9404 size(17); 9405 format %{ "TEST $shift,32\n\t" 9406 "JEQ,s small\n\t" 9407 "MOV $dst.lo,$dst.hi\n\t" 9408 "XOR $dst.hi,$dst.hi\n" 9409 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9410 "SHR $dst.hi,$shift" %} 9411 ins_encode( shift_right_long( dst, shift ) ); 9412 ins_pipe( pipe_slow ); 9413 %} 9414 9415 // Shift Right Long by 1-31 9416 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9417 match(Set dst (RShiftL dst cnt)); 9418 effect(KILL cr); 9419 ins_cost(200); 9420 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9421 "SAR $dst.hi,$cnt" %} 9422 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9423 ins_encode( move_long_small_shift(dst,cnt) ); 9424 ins_pipe( ialu_reg_long ); 9425 %} 9426 9427 // Shift Right Long by 32-63 9428 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9429 match(Set dst (RShiftL dst cnt)); 9430 effect(KILL cr); 9431 ins_cost(300); 9432 format %{ "MOV $dst.lo,$dst.hi\n" 9433 "\tSAR $dst.lo,$cnt-32\n" 9434 "\tSAR $dst.hi,31" %} 9435 opcode(0xC1, 0x7); /* C1 /7 ib */ 9436 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9437 ins_pipe( ialu_reg_long ); 9438 %} 9439 9440 // Shift Right arithmetic Long by variable 9441 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9442 match(Set dst (RShiftL dst shift)); 9443 effect(KILL cr); 9444 ins_cost(600); 9445 size(18); 9446 format %{ "TEST $shift,32\n\t" 9447 "JEQ,s small\n\t" 9448 "MOV $dst.lo,$dst.hi\n\t" 9449 "SAR $dst.hi,31\n" 9450 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9451 "SAR $dst.hi,$shift" %} 9452 ins_encode( shift_right_arith_long( dst, shift ) ); 9453 ins_pipe( pipe_slow ); 9454 %} 9455 9456 9457 //----------Double Instructions------------------------------------------------ 9458 // Double Math 9459 9460 // Compare & branch 9461 9462 // P6 version of float compare, sets condition codes in EFLAGS 9463 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9464 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9465 match(Set cr (CmpD src1 src2)); 9466 effect(KILL rax); 9467 ins_cost(150); 9468 format %{ "FLD $src1\n\t" 9469 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9470 "JNP exit\n\t" 9471 "MOV ah,1 // saw a NaN, set CF\n\t" 9472 "SAHF\n" 9473 "exit:\tNOP // avoid branch to branch" %} 9474 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9475 ins_encode( Push_Reg_DPR(src1), 9476 OpcP, RegOpc(src2), 9477 cmpF_P6_fixup ); 9478 ins_pipe( pipe_slow ); 9479 %} 9480 9481 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9482 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9483 match(Set cr (CmpD src1 src2)); 9484 ins_cost(150); 9485 format %{ "FLD $src1\n\t" 9486 "FUCOMIP ST,$src2 // P6 instruction" %} 9487 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9488 ins_encode( Push_Reg_DPR(src1), 9489 OpcP, RegOpc(src2)); 9490 ins_pipe( pipe_slow ); 9491 %} 9492 9493 // Compare & branch 9494 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9495 predicate(UseSSE<=1); 9496 match(Set cr (CmpD src1 src2)); 9497 effect(KILL rax); 9498 ins_cost(200); 9499 format %{ "FLD $src1\n\t" 9500 "FCOMp $src2\n\t" 9501 "FNSTSW AX\n\t" 9502 "TEST AX,0x400\n\t" 9503 "JZ,s flags\n\t" 9504 "MOV AH,1\t# unordered treat as LT\n" 9505 "flags:\tSAHF" %} 9506 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9507 ins_encode( Push_Reg_DPR(src1), 9508 OpcP, RegOpc(src2), 9509 fpu_flags); 9510 ins_pipe( pipe_slow ); 9511 %} 9512 9513 // Compare vs zero into -1,0,1 9514 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9515 predicate(UseSSE<=1); 9516 match(Set dst (CmpD3 src1 zero)); 9517 effect(KILL cr, KILL rax); 9518 ins_cost(280); 9519 format %{ "FTSTD $dst,$src1" %} 9520 opcode(0xE4, 0xD9); 9521 ins_encode( Push_Reg_DPR(src1), 9522 OpcS, OpcP, PopFPU, 9523 CmpF_Result(dst)); 9524 ins_pipe( pipe_slow ); 9525 %} 9526 9527 // Compare into -1,0,1 9528 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9529 predicate(UseSSE<=1); 9530 match(Set dst (CmpD3 src1 src2)); 9531 effect(KILL cr, KILL rax); 9532 ins_cost(300); 9533 format %{ "FCMPD $dst,$src1,$src2" %} 9534 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9535 ins_encode( Push_Reg_DPR(src1), 9536 OpcP, RegOpc(src2), 9537 CmpF_Result(dst)); 9538 ins_pipe( pipe_slow ); 9539 %} 9540 9541 // float compare and set condition codes in EFLAGS by XMM regs 9542 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9543 predicate(UseSSE>=2); 9544 match(Set cr (CmpD src1 src2)); 9545 ins_cost(145); 9546 format %{ "UCOMISD $src1,$src2\n\t" 9547 "JNP,s exit\n\t" 9548 "PUSHF\t# saw NaN, set CF\n\t" 9549 "AND [rsp], #0xffffff2b\n\t" 9550 "POPF\n" 9551 "exit:" %} 9552 ins_encode %{ 9553 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9554 emit_cmpfp_fixup(_masm); 9555 %} 9556 ins_pipe( pipe_slow ); 9557 %} 9558 9559 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9560 predicate(UseSSE>=2); 9561 match(Set cr (CmpD src1 src2)); 9562 ins_cost(100); 9563 format %{ "UCOMISD $src1,$src2" %} 9564 ins_encode %{ 9565 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9566 %} 9567 ins_pipe( pipe_slow ); 9568 %} 9569 9570 // float compare and set condition codes in EFLAGS by XMM regs 9571 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9572 predicate(UseSSE>=2); 9573 match(Set cr (CmpD src1 (LoadD src2))); 9574 ins_cost(145); 9575 format %{ "UCOMISD $src1,$src2\n\t" 9576 "JNP,s exit\n\t" 9577 "PUSHF\t# saw NaN, set CF\n\t" 9578 "AND [rsp], #0xffffff2b\n\t" 9579 "POPF\n" 9580 "exit:" %} 9581 ins_encode %{ 9582 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9583 emit_cmpfp_fixup(_masm); 9584 %} 9585 ins_pipe( pipe_slow ); 9586 %} 9587 9588 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9589 predicate(UseSSE>=2); 9590 match(Set cr (CmpD src1 (LoadD src2))); 9591 ins_cost(100); 9592 format %{ "UCOMISD $src1,$src2" %} 9593 ins_encode %{ 9594 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9595 %} 9596 ins_pipe( pipe_slow ); 9597 %} 9598 9599 // Compare into -1,0,1 in XMM 9600 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9601 predicate(UseSSE>=2); 9602 match(Set dst (CmpD3 src1 src2)); 9603 effect(KILL cr); 9604 ins_cost(255); 9605 format %{ "UCOMISD $src1, $src2\n\t" 9606 "MOV $dst, #-1\n\t" 9607 "JP,s done\n\t" 9608 "JB,s done\n\t" 9609 "SETNE $dst\n\t" 9610 "MOVZB $dst, $dst\n" 9611 "done:" %} 9612 ins_encode %{ 9613 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9614 emit_cmpfp3(_masm, $dst$$Register); 9615 %} 9616 ins_pipe( pipe_slow ); 9617 %} 9618 9619 // Compare into -1,0,1 in XMM and memory 9620 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9621 predicate(UseSSE>=2); 9622 match(Set dst (CmpD3 src1 (LoadD src2))); 9623 effect(KILL cr); 9624 ins_cost(275); 9625 format %{ "UCOMISD $src1, $src2\n\t" 9626 "MOV $dst, #-1\n\t" 9627 "JP,s done\n\t" 9628 "JB,s done\n\t" 9629 "SETNE $dst\n\t" 9630 "MOVZB $dst, $dst\n" 9631 "done:" %} 9632 ins_encode %{ 9633 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9634 emit_cmpfp3(_masm, $dst$$Register); 9635 %} 9636 ins_pipe( pipe_slow ); 9637 %} 9638 9639 9640 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9641 predicate (UseSSE <=1); 9642 match(Set dst (SubD dst src)); 9643 9644 format %{ "FLD $src\n\t" 9645 "DSUBp $dst,ST" %} 9646 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9647 ins_cost(150); 9648 ins_encode( Push_Reg_DPR(src), 9649 OpcP, RegOpc(dst) ); 9650 ins_pipe( fpu_reg_reg ); 9651 %} 9652 9653 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9654 predicate (UseSSE <=1); 9655 match(Set dst (RoundDouble (SubD src1 src2))); 9656 ins_cost(250); 9657 9658 format %{ "FLD $src2\n\t" 9659 "DSUB ST,$src1\n\t" 9660 "FSTP_D $dst\t# D-round" %} 9661 opcode(0xD8, 0x5); 9662 ins_encode( Push_Reg_DPR(src2), 9663 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9664 ins_pipe( fpu_mem_reg_reg ); 9665 %} 9666 9667 9668 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9669 predicate (UseSSE <=1); 9670 match(Set dst (SubD dst (LoadD src))); 9671 ins_cost(150); 9672 9673 format %{ "FLD $src\n\t" 9674 "DSUBp $dst,ST" %} 9675 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9676 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9677 OpcP, RegOpc(dst) ); 9678 ins_pipe( fpu_reg_mem ); 9679 %} 9680 9681 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9682 predicate (UseSSE<=1); 9683 match(Set dst (AbsD src)); 9684 ins_cost(100); 9685 format %{ "FABS" %} 9686 opcode(0xE1, 0xD9); 9687 ins_encode( OpcS, OpcP ); 9688 ins_pipe( fpu_reg_reg ); 9689 %} 9690 9691 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9692 predicate(UseSSE<=1); 9693 match(Set dst (NegD src)); 9694 ins_cost(100); 9695 format %{ "FCHS" %} 9696 opcode(0xE0, 0xD9); 9697 ins_encode( OpcS, OpcP ); 9698 ins_pipe( fpu_reg_reg ); 9699 %} 9700 9701 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9702 predicate(UseSSE<=1); 9703 match(Set dst (AddD dst src)); 9704 format %{ "FLD $src\n\t" 9705 "DADD $dst,ST" %} 9706 size(4); 9707 ins_cost(150); 9708 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9709 ins_encode( Push_Reg_DPR(src), 9710 OpcP, RegOpc(dst) ); 9711 ins_pipe( fpu_reg_reg ); 9712 %} 9713 9714 9715 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9716 predicate(UseSSE<=1); 9717 match(Set dst (RoundDouble (AddD src1 src2))); 9718 ins_cost(250); 9719 9720 format %{ "FLD $src2\n\t" 9721 "DADD ST,$src1\n\t" 9722 "FSTP_D $dst\t# D-round" %} 9723 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9724 ins_encode( Push_Reg_DPR(src2), 9725 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9726 ins_pipe( fpu_mem_reg_reg ); 9727 %} 9728 9729 9730 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9731 predicate(UseSSE<=1); 9732 match(Set dst (AddD dst (LoadD src))); 9733 ins_cost(150); 9734 9735 format %{ "FLD $src\n\t" 9736 "DADDp $dst,ST" %} 9737 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9738 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9739 OpcP, RegOpc(dst) ); 9740 ins_pipe( fpu_reg_mem ); 9741 %} 9742 9743 // add-to-memory 9744 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9745 predicate(UseSSE<=1); 9746 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9747 ins_cost(150); 9748 9749 format %{ "FLD_D $dst\n\t" 9750 "DADD ST,$src\n\t" 9751 "FST_D $dst" %} 9752 opcode(0xDD, 0x0); 9753 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9754 Opcode(0xD8), RegOpc(src), 9755 set_instruction_start, 9756 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9757 ins_pipe( fpu_reg_mem ); 9758 %} 9759 9760 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9761 predicate(UseSSE<=1); 9762 match(Set dst (AddD dst con)); 9763 ins_cost(125); 9764 format %{ "FLD1\n\t" 9765 "DADDp $dst,ST" %} 9766 ins_encode %{ 9767 __ fld1(); 9768 __ faddp($dst$$reg); 9769 %} 9770 ins_pipe(fpu_reg); 9771 %} 9772 9773 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9774 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9775 match(Set dst (AddD dst con)); 9776 ins_cost(200); 9777 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9778 "DADDp $dst,ST" %} 9779 ins_encode %{ 9780 __ fld_d($constantaddress($con)); 9781 __ faddp($dst$$reg); 9782 %} 9783 ins_pipe(fpu_reg_mem); 9784 %} 9785 9786 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9787 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9788 match(Set dst (RoundDouble (AddD src con))); 9789 ins_cost(200); 9790 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9791 "DADD ST,$src\n\t" 9792 "FSTP_D $dst\t# D-round" %} 9793 ins_encode %{ 9794 __ fld_d($constantaddress($con)); 9795 __ fadd($src$$reg); 9796 __ fstp_d(Address(rsp, $dst$$disp)); 9797 %} 9798 ins_pipe(fpu_mem_reg_con); 9799 %} 9800 9801 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9802 predicate(UseSSE<=1); 9803 match(Set dst (MulD dst src)); 9804 format %{ "FLD $src\n\t" 9805 "DMULp $dst,ST" %} 9806 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9807 ins_cost(150); 9808 ins_encode( Push_Reg_DPR(src), 9809 OpcP, RegOpc(dst) ); 9810 ins_pipe( fpu_reg_reg ); 9811 %} 9812 9813 // Strict FP instruction biases argument before multiply then 9814 // biases result to avoid double rounding of subnormals. 9815 // 9816 // scale arg1 by multiplying arg1 by 2^(-15360) 9817 // load arg2 9818 // multiply scaled arg1 by arg2 9819 // rescale product by 2^(15360) 9820 // 9821 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9822 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9823 match(Set dst (MulD dst src)); 9824 ins_cost(1); // Select this instruction for all FP double multiplies 9825 9826 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9827 "DMULp $dst,ST\n\t" 9828 "FLD $src\n\t" 9829 "DMULp $dst,ST\n\t" 9830 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9831 "DMULp $dst,ST\n\t" %} 9832 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9833 ins_encode( strictfp_bias1(dst), 9834 Push_Reg_DPR(src), 9835 OpcP, RegOpc(dst), 9836 strictfp_bias2(dst) ); 9837 ins_pipe( fpu_reg_reg ); 9838 %} 9839 9840 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9841 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9842 match(Set dst (MulD dst con)); 9843 ins_cost(200); 9844 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9845 "DMULp $dst,ST" %} 9846 ins_encode %{ 9847 __ fld_d($constantaddress($con)); 9848 __ fmulp($dst$$reg); 9849 %} 9850 ins_pipe(fpu_reg_mem); 9851 %} 9852 9853 9854 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9855 predicate( UseSSE<=1 ); 9856 match(Set dst (MulD dst (LoadD src))); 9857 ins_cost(200); 9858 format %{ "FLD_D $src\n\t" 9859 "DMULp $dst,ST" %} 9860 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9861 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9862 OpcP, RegOpc(dst) ); 9863 ins_pipe( fpu_reg_mem ); 9864 %} 9865 9866 // 9867 // Cisc-alternate to reg-reg multiply 9868 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9869 predicate( UseSSE<=1 ); 9870 match(Set dst (MulD src (LoadD mem))); 9871 ins_cost(250); 9872 format %{ "FLD_D $mem\n\t" 9873 "DMUL ST,$src\n\t" 9874 "FSTP_D $dst" %} 9875 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9876 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9877 OpcReg_FPR(src), 9878 Pop_Reg_DPR(dst) ); 9879 ins_pipe( fpu_reg_reg_mem ); 9880 %} 9881 9882 9883 // MACRO3 -- addDPR a mulDPR 9884 // This instruction is a '2-address' instruction in that the result goes 9885 // back to src2. This eliminates a move from the macro; possibly the 9886 // register allocator will have to add it back (and maybe not). 9887 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9888 predicate( UseSSE<=1 ); 9889 match(Set src2 (AddD (MulD src0 src1) src2)); 9890 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9891 "DMUL ST,$src1\n\t" 9892 "DADDp $src2,ST" %} 9893 ins_cost(250); 9894 opcode(0xDD); /* LoadD DD /0 */ 9895 ins_encode( Push_Reg_FPR(src0), 9896 FMul_ST_reg(src1), 9897 FAddP_reg_ST(src2) ); 9898 ins_pipe( fpu_reg_reg_reg ); 9899 %} 9900 9901 9902 // MACRO3 -- subDPR a mulDPR 9903 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9904 predicate( UseSSE<=1 ); 9905 match(Set src2 (SubD (MulD src0 src1) src2)); 9906 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9907 "DMUL ST,$src1\n\t" 9908 "DSUBRp $src2,ST" %} 9909 ins_cost(250); 9910 ins_encode( Push_Reg_FPR(src0), 9911 FMul_ST_reg(src1), 9912 Opcode(0xDE), Opc_plus(0xE0,src2)); 9913 ins_pipe( fpu_reg_reg_reg ); 9914 %} 9915 9916 9917 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9918 predicate( UseSSE<=1 ); 9919 match(Set dst (DivD dst src)); 9920 9921 format %{ "FLD $src\n\t" 9922 "FDIVp $dst,ST" %} 9923 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9924 ins_cost(150); 9925 ins_encode( Push_Reg_DPR(src), 9926 OpcP, RegOpc(dst) ); 9927 ins_pipe( fpu_reg_reg ); 9928 %} 9929 9930 // Strict FP instruction biases argument before division then 9931 // biases result, to avoid double rounding of subnormals. 9932 // 9933 // scale dividend by multiplying dividend by 2^(-15360) 9934 // load divisor 9935 // divide scaled dividend by divisor 9936 // rescale quotient by 2^(15360) 9937 // 9938 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9939 predicate (UseSSE<=1); 9940 match(Set dst (DivD dst src)); 9941 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9942 ins_cost(01); 9943 9944 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9945 "DMULp $dst,ST\n\t" 9946 "FLD $src\n\t" 9947 "FDIVp $dst,ST\n\t" 9948 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9949 "DMULp $dst,ST\n\t" %} 9950 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9951 ins_encode( strictfp_bias1(dst), 9952 Push_Reg_DPR(src), 9953 OpcP, RegOpc(dst), 9954 strictfp_bias2(dst) ); 9955 ins_pipe( fpu_reg_reg ); 9956 %} 9957 9958 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9959 predicate(UseSSE<=1); 9960 match(Set dst (ModD dst src)); 9961 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9962 9963 format %{ "DMOD $dst,$src" %} 9964 ins_cost(250); 9965 ins_encode(Push_Reg_Mod_DPR(dst, src), 9966 emitModDPR(), 9967 Push_Result_Mod_DPR(src), 9968 Pop_Reg_DPR(dst)); 9969 ins_pipe( pipe_slow ); 9970 %} 9971 9972 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9973 predicate(UseSSE>=2); 9974 match(Set dst (ModD src0 src1)); 9975 effect(KILL rax, KILL cr); 9976 9977 format %{ "SUB ESP,8\t # DMOD\n" 9978 "\tMOVSD [ESP+0],$src1\n" 9979 "\tFLD_D [ESP+0]\n" 9980 "\tMOVSD [ESP+0],$src0\n" 9981 "\tFLD_D [ESP+0]\n" 9982 "loop:\tFPREM\n" 9983 "\tFWAIT\n" 9984 "\tFNSTSW AX\n" 9985 "\tSAHF\n" 9986 "\tJP loop\n" 9987 "\tFSTP_D [ESP+0]\n" 9988 "\tMOVSD $dst,[ESP+0]\n" 9989 "\tADD ESP,8\n" 9990 "\tFSTP ST0\t # Restore FPU Stack" 9991 %} 9992 ins_cost(250); 9993 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9994 ins_pipe( pipe_slow ); 9995 %} 9996 9997 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9998 predicate (UseSSE<=1); 9999 match(Set dst(AtanD dst src)); 10000 format %{ "DATA $dst,$src" %} 10001 opcode(0xD9, 0xF3); 10002 ins_encode( Push_Reg_DPR(src), 10003 OpcP, OpcS, RegOpc(dst) ); 10004 ins_pipe( pipe_slow ); 10005 %} 10006 10007 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10008 predicate (UseSSE>=2); 10009 match(Set dst(AtanD dst src)); 10010 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10011 format %{ "DATA $dst,$src" %} 10012 opcode(0xD9, 0xF3); 10013 ins_encode( Push_SrcD(src), 10014 OpcP, OpcS, Push_ResultD(dst) ); 10015 ins_pipe( pipe_slow ); 10016 %} 10017 10018 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10019 predicate (UseSSE<=1); 10020 match(Set dst (SqrtD src)); 10021 format %{ "DSQRT $dst,$src" %} 10022 opcode(0xFA, 0xD9); 10023 ins_encode( Push_Reg_DPR(src), 10024 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10025 ins_pipe( pipe_slow ); 10026 %} 10027 10028 //-------------Float Instructions------------------------------- 10029 // Float Math 10030 10031 // Code for float compare: 10032 // fcompp(); 10033 // fwait(); fnstsw_ax(); 10034 // sahf(); 10035 // movl(dst, unordered_result); 10036 // jcc(Assembler::parity, exit); 10037 // movl(dst, less_result); 10038 // jcc(Assembler::below, exit); 10039 // movl(dst, equal_result); 10040 // jcc(Assembler::equal, exit); 10041 // movl(dst, greater_result); 10042 // exit: 10043 10044 // P6 version of float compare, sets condition codes in EFLAGS 10045 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10046 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10047 match(Set cr (CmpF src1 src2)); 10048 effect(KILL rax); 10049 ins_cost(150); 10050 format %{ "FLD $src1\n\t" 10051 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10052 "JNP exit\n\t" 10053 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10054 "SAHF\n" 10055 "exit:\tNOP // avoid branch to branch" %} 10056 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10057 ins_encode( Push_Reg_DPR(src1), 10058 OpcP, RegOpc(src2), 10059 cmpF_P6_fixup ); 10060 ins_pipe( pipe_slow ); 10061 %} 10062 10063 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10064 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10065 match(Set cr (CmpF src1 src2)); 10066 ins_cost(100); 10067 format %{ "FLD $src1\n\t" 10068 "FUCOMIP ST,$src2 // P6 instruction" %} 10069 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10070 ins_encode( Push_Reg_DPR(src1), 10071 OpcP, RegOpc(src2)); 10072 ins_pipe( pipe_slow ); 10073 %} 10074 10075 10076 // Compare & branch 10077 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10078 predicate(UseSSE == 0); 10079 match(Set cr (CmpF src1 src2)); 10080 effect(KILL rax); 10081 ins_cost(200); 10082 format %{ "FLD $src1\n\t" 10083 "FCOMp $src2\n\t" 10084 "FNSTSW AX\n\t" 10085 "TEST AX,0x400\n\t" 10086 "JZ,s flags\n\t" 10087 "MOV AH,1\t# unordered treat as LT\n" 10088 "flags:\tSAHF" %} 10089 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10090 ins_encode( Push_Reg_DPR(src1), 10091 OpcP, RegOpc(src2), 10092 fpu_flags); 10093 ins_pipe( pipe_slow ); 10094 %} 10095 10096 // Compare vs zero into -1,0,1 10097 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10098 predicate(UseSSE == 0); 10099 match(Set dst (CmpF3 src1 zero)); 10100 effect(KILL cr, KILL rax); 10101 ins_cost(280); 10102 format %{ "FTSTF $dst,$src1" %} 10103 opcode(0xE4, 0xD9); 10104 ins_encode( Push_Reg_DPR(src1), 10105 OpcS, OpcP, PopFPU, 10106 CmpF_Result(dst)); 10107 ins_pipe( pipe_slow ); 10108 %} 10109 10110 // Compare into -1,0,1 10111 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10112 predicate(UseSSE == 0); 10113 match(Set dst (CmpF3 src1 src2)); 10114 effect(KILL cr, KILL rax); 10115 ins_cost(300); 10116 format %{ "FCMPF $dst,$src1,$src2" %} 10117 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10118 ins_encode( Push_Reg_DPR(src1), 10119 OpcP, RegOpc(src2), 10120 CmpF_Result(dst)); 10121 ins_pipe( pipe_slow ); 10122 %} 10123 10124 // float compare and set condition codes in EFLAGS by XMM regs 10125 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10126 predicate(UseSSE>=1); 10127 match(Set cr (CmpF src1 src2)); 10128 ins_cost(145); 10129 format %{ "UCOMISS $src1,$src2\n\t" 10130 "JNP,s exit\n\t" 10131 "PUSHF\t# saw NaN, set CF\n\t" 10132 "AND [rsp], #0xffffff2b\n\t" 10133 "POPF\n" 10134 "exit:" %} 10135 ins_encode %{ 10136 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10137 emit_cmpfp_fixup(_masm); 10138 %} 10139 ins_pipe( pipe_slow ); 10140 %} 10141 10142 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10143 predicate(UseSSE>=1); 10144 match(Set cr (CmpF src1 src2)); 10145 ins_cost(100); 10146 format %{ "UCOMISS $src1,$src2" %} 10147 ins_encode %{ 10148 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10149 %} 10150 ins_pipe( pipe_slow ); 10151 %} 10152 10153 // float compare and set condition codes in EFLAGS by XMM regs 10154 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10155 predicate(UseSSE>=1); 10156 match(Set cr (CmpF src1 (LoadF src2))); 10157 ins_cost(165); 10158 format %{ "UCOMISS $src1,$src2\n\t" 10159 "JNP,s exit\n\t" 10160 "PUSHF\t# saw NaN, set CF\n\t" 10161 "AND [rsp], #0xffffff2b\n\t" 10162 "POPF\n" 10163 "exit:" %} 10164 ins_encode %{ 10165 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10166 emit_cmpfp_fixup(_masm); 10167 %} 10168 ins_pipe( pipe_slow ); 10169 %} 10170 10171 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10172 predicate(UseSSE>=1); 10173 match(Set cr (CmpF src1 (LoadF src2))); 10174 ins_cost(100); 10175 format %{ "UCOMISS $src1,$src2" %} 10176 ins_encode %{ 10177 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10178 %} 10179 ins_pipe( pipe_slow ); 10180 %} 10181 10182 // Compare into -1,0,1 in XMM 10183 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10184 predicate(UseSSE>=1); 10185 match(Set dst (CmpF3 src1 src2)); 10186 effect(KILL cr); 10187 ins_cost(255); 10188 format %{ "UCOMISS $src1, $src2\n\t" 10189 "MOV $dst, #-1\n\t" 10190 "JP,s done\n\t" 10191 "JB,s done\n\t" 10192 "SETNE $dst\n\t" 10193 "MOVZB $dst, $dst\n" 10194 "done:" %} 10195 ins_encode %{ 10196 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10197 emit_cmpfp3(_masm, $dst$$Register); 10198 %} 10199 ins_pipe( pipe_slow ); 10200 %} 10201 10202 // Compare into -1,0,1 in XMM and memory 10203 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10204 predicate(UseSSE>=1); 10205 match(Set dst (CmpF3 src1 (LoadF src2))); 10206 effect(KILL cr); 10207 ins_cost(275); 10208 format %{ "UCOMISS $src1, $src2\n\t" 10209 "MOV $dst, #-1\n\t" 10210 "JP,s done\n\t" 10211 "JB,s done\n\t" 10212 "SETNE $dst\n\t" 10213 "MOVZB $dst, $dst\n" 10214 "done:" %} 10215 ins_encode %{ 10216 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10217 emit_cmpfp3(_masm, $dst$$Register); 10218 %} 10219 ins_pipe( pipe_slow ); 10220 %} 10221 10222 // Spill to obtain 24-bit precision 10223 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10224 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10225 match(Set dst (SubF src1 src2)); 10226 10227 format %{ "FSUB $dst,$src1 - $src2" %} 10228 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10229 ins_encode( Push_Reg_FPR(src1), 10230 OpcReg_FPR(src2), 10231 Pop_Mem_FPR(dst) ); 10232 ins_pipe( fpu_mem_reg_reg ); 10233 %} 10234 // 10235 // This instruction does not round to 24-bits 10236 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10237 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10238 match(Set dst (SubF dst src)); 10239 10240 format %{ "FSUB $dst,$src" %} 10241 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10242 ins_encode( Push_Reg_FPR(src), 10243 OpcP, RegOpc(dst) ); 10244 ins_pipe( fpu_reg_reg ); 10245 %} 10246 10247 // Spill to obtain 24-bit precision 10248 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10249 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10250 match(Set dst (AddF src1 src2)); 10251 10252 format %{ "FADD $dst,$src1,$src2" %} 10253 opcode(0xD8, 0x0); /* D8 C0+i */ 10254 ins_encode( Push_Reg_FPR(src2), 10255 OpcReg_FPR(src1), 10256 Pop_Mem_FPR(dst) ); 10257 ins_pipe( fpu_mem_reg_reg ); 10258 %} 10259 // 10260 // This instruction does not round to 24-bits 10261 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10262 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10263 match(Set dst (AddF dst src)); 10264 10265 format %{ "FLD $src\n\t" 10266 "FADDp $dst,ST" %} 10267 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10268 ins_encode( Push_Reg_FPR(src), 10269 OpcP, RegOpc(dst) ); 10270 ins_pipe( fpu_reg_reg ); 10271 %} 10272 10273 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10274 predicate(UseSSE==0); 10275 match(Set dst (AbsF src)); 10276 ins_cost(100); 10277 format %{ "FABS" %} 10278 opcode(0xE1, 0xD9); 10279 ins_encode( OpcS, OpcP ); 10280 ins_pipe( fpu_reg_reg ); 10281 %} 10282 10283 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10284 predicate(UseSSE==0); 10285 match(Set dst (NegF src)); 10286 ins_cost(100); 10287 format %{ "FCHS" %} 10288 opcode(0xE0, 0xD9); 10289 ins_encode( OpcS, OpcP ); 10290 ins_pipe( fpu_reg_reg ); 10291 %} 10292 10293 // Cisc-alternate to addFPR_reg 10294 // Spill to obtain 24-bit precision 10295 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10296 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10297 match(Set dst (AddF src1 (LoadF src2))); 10298 10299 format %{ "FLD $src2\n\t" 10300 "FADD ST,$src1\n\t" 10301 "FSTP_S $dst" %} 10302 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10303 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10304 OpcReg_FPR(src1), 10305 Pop_Mem_FPR(dst) ); 10306 ins_pipe( fpu_mem_reg_mem ); 10307 %} 10308 // 10309 // Cisc-alternate to addFPR_reg 10310 // This instruction does not round to 24-bits 10311 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10312 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10313 match(Set dst (AddF dst (LoadF src))); 10314 10315 format %{ "FADD $dst,$src" %} 10316 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10317 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10318 OpcP, RegOpc(dst) ); 10319 ins_pipe( fpu_reg_mem ); 10320 %} 10321 10322 // // Following two instructions for _222_mpegaudio 10323 // Spill to obtain 24-bit precision 10324 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10325 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10326 match(Set dst (AddF src1 src2)); 10327 10328 format %{ "FADD $dst,$src1,$src2" %} 10329 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10330 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10331 OpcReg_FPR(src2), 10332 Pop_Mem_FPR(dst) ); 10333 ins_pipe( fpu_mem_reg_mem ); 10334 %} 10335 10336 // Cisc-spill variant 10337 // Spill to obtain 24-bit precision 10338 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10339 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10340 match(Set dst (AddF src1 (LoadF src2))); 10341 10342 format %{ "FADD $dst,$src1,$src2 cisc" %} 10343 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10344 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10345 set_instruction_start, 10346 OpcP, RMopc_Mem(secondary,src1), 10347 Pop_Mem_FPR(dst) ); 10348 ins_pipe( fpu_mem_mem_mem ); 10349 %} 10350 10351 // Spill to obtain 24-bit precision 10352 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10353 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10354 match(Set dst (AddF src1 src2)); 10355 10356 format %{ "FADD $dst,$src1,$src2" %} 10357 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10358 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10359 set_instruction_start, 10360 OpcP, RMopc_Mem(secondary,src1), 10361 Pop_Mem_FPR(dst) ); 10362 ins_pipe( fpu_mem_mem_mem ); 10363 %} 10364 10365 10366 // Spill to obtain 24-bit precision 10367 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10368 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10369 match(Set dst (AddF src con)); 10370 format %{ "FLD $src\n\t" 10371 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10372 "FSTP_S $dst" %} 10373 ins_encode %{ 10374 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10375 __ fadd_s($constantaddress($con)); 10376 __ fstp_s(Address(rsp, $dst$$disp)); 10377 %} 10378 ins_pipe(fpu_mem_reg_con); 10379 %} 10380 // 10381 // This instruction does not round to 24-bits 10382 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10383 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10384 match(Set dst (AddF src con)); 10385 format %{ "FLD $src\n\t" 10386 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10387 "FSTP $dst" %} 10388 ins_encode %{ 10389 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10390 __ fadd_s($constantaddress($con)); 10391 __ fstp_d($dst$$reg); 10392 %} 10393 ins_pipe(fpu_reg_reg_con); 10394 %} 10395 10396 // Spill to obtain 24-bit precision 10397 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10398 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10399 match(Set dst (MulF src1 src2)); 10400 10401 format %{ "FLD $src1\n\t" 10402 "FMUL $src2\n\t" 10403 "FSTP_S $dst" %} 10404 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10405 ins_encode( Push_Reg_FPR(src1), 10406 OpcReg_FPR(src2), 10407 Pop_Mem_FPR(dst) ); 10408 ins_pipe( fpu_mem_reg_reg ); 10409 %} 10410 // 10411 // This instruction does not round to 24-bits 10412 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10413 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10414 match(Set dst (MulF src1 src2)); 10415 10416 format %{ "FLD $src1\n\t" 10417 "FMUL $src2\n\t" 10418 "FSTP_S $dst" %} 10419 opcode(0xD8, 0x1); /* D8 C8+i */ 10420 ins_encode( Push_Reg_FPR(src2), 10421 OpcReg_FPR(src1), 10422 Pop_Reg_FPR(dst) ); 10423 ins_pipe( fpu_reg_reg_reg ); 10424 %} 10425 10426 10427 // Spill to obtain 24-bit precision 10428 // Cisc-alternate to reg-reg multiply 10429 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10430 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10431 match(Set dst (MulF src1 (LoadF src2))); 10432 10433 format %{ "FLD_S $src2\n\t" 10434 "FMUL $src1\n\t" 10435 "FSTP_S $dst" %} 10436 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10437 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10438 OpcReg_FPR(src1), 10439 Pop_Mem_FPR(dst) ); 10440 ins_pipe( fpu_mem_reg_mem ); 10441 %} 10442 // 10443 // This instruction does not round to 24-bits 10444 // Cisc-alternate to reg-reg multiply 10445 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10446 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10447 match(Set dst (MulF src1 (LoadF src2))); 10448 10449 format %{ "FMUL $dst,$src1,$src2" %} 10450 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10451 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10452 OpcReg_FPR(src1), 10453 Pop_Reg_FPR(dst) ); 10454 ins_pipe( fpu_reg_reg_mem ); 10455 %} 10456 10457 // Spill to obtain 24-bit precision 10458 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10459 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10460 match(Set dst (MulF src1 src2)); 10461 10462 format %{ "FMUL $dst,$src1,$src2" %} 10463 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10464 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10465 set_instruction_start, 10466 OpcP, RMopc_Mem(secondary,src1), 10467 Pop_Mem_FPR(dst) ); 10468 ins_pipe( fpu_mem_mem_mem ); 10469 %} 10470 10471 // Spill to obtain 24-bit precision 10472 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10473 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10474 match(Set dst (MulF src con)); 10475 10476 format %{ "FLD $src\n\t" 10477 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10478 "FSTP_S $dst" %} 10479 ins_encode %{ 10480 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10481 __ fmul_s($constantaddress($con)); 10482 __ fstp_s(Address(rsp, $dst$$disp)); 10483 %} 10484 ins_pipe(fpu_mem_reg_con); 10485 %} 10486 // 10487 // This instruction does not round to 24-bits 10488 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10489 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10490 match(Set dst (MulF src con)); 10491 10492 format %{ "FLD $src\n\t" 10493 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10494 "FSTP $dst" %} 10495 ins_encode %{ 10496 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10497 __ fmul_s($constantaddress($con)); 10498 __ fstp_d($dst$$reg); 10499 %} 10500 ins_pipe(fpu_reg_reg_con); 10501 %} 10502 10503 10504 // 10505 // MACRO1 -- subsume unshared load into mulFPR 10506 // This instruction does not round to 24-bits 10507 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10508 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10509 match(Set dst (MulF (LoadF mem1) src)); 10510 10511 format %{ "FLD $mem1 ===MACRO1===\n\t" 10512 "FMUL ST,$src\n\t" 10513 "FSTP $dst" %} 10514 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10515 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10516 OpcReg_FPR(src), 10517 Pop_Reg_FPR(dst) ); 10518 ins_pipe( fpu_reg_reg_mem ); 10519 %} 10520 // 10521 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10522 // This instruction does not round to 24-bits 10523 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10524 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10525 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10526 ins_cost(95); 10527 10528 format %{ "FLD $mem1 ===MACRO2===\n\t" 10529 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10530 "FADD ST,$src2\n\t" 10531 "FSTP $dst" %} 10532 opcode(0xD9); /* LoadF D9 /0 */ 10533 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10534 FMul_ST_reg(src1), 10535 FAdd_ST_reg(src2), 10536 Pop_Reg_FPR(dst) ); 10537 ins_pipe( fpu_reg_mem_reg_reg ); 10538 %} 10539 10540 // MACRO3 -- addFPR a mulFPR 10541 // This instruction does not round to 24-bits. It is a '2-address' 10542 // instruction in that the result goes back to src2. This eliminates 10543 // a move from the macro; possibly the register allocator will have 10544 // to add it back (and maybe not). 10545 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10546 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10547 match(Set src2 (AddF (MulF src0 src1) src2)); 10548 10549 format %{ "FLD $src0 ===MACRO3===\n\t" 10550 "FMUL ST,$src1\n\t" 10551 "FADDP $src2,ST" %} 10552 opcode(0xD9); /* LoadF D9 /0 */ 10553 ins_encode( Push_Reg_FPR(src0), 10554 FMul_ST_reg(src1), 10555 FAddP_reg_ST(src2) ); 10556 ins_pipe( fpu_reg_reg_reg ); 10557 %} 10558 10559 // MACRO4 -- divFPR subFPR 10560 // This instruction does not round to 24-bits 10561 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10562 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10563 match(Set dst (DivF (SubF src2 src1) src3)); 10564 10565 format %{ "FLD $src2 ===MACRO4===\n\t" 10566 "FSUB ST,$src1\n\t" 10567 "FDIV ST,$src3\n\t" 10568 "FSTP $dst" %} 10569 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10570 ins_encode( Push_Reg_FPR(src2), 10571 subFPR_divFPR_encode(src1,src3), 10572 Pop_Reg_FPR(dst) ); 10573 ins_pipe( fpu_reg_reg_reg_reg ); 10574 %} 10575 10576 // Spill to obtain 24-bit precision 10577 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10578 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10579 match(Set dst (DivF src1 src2)); 10580 10581 format %{ "FDIV $dst,$src1,$src2" %} 10582 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10583 ins_encode( Push_Reg_FPR(src1), 10584 OpcReg_FPR(src2), 10585 Pop_Mem_FPR(dst) ); 10586 ins_pipe( fpu_mem_reg_reg ); 10587 %} 10588 // 10589 // This instruction does not round to 24-bits 10590 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10591 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10592 match(Set dst (DivF dst src)); 10593 10594 format %{ "FDIV $dst,$src" %} 10595 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10596 ins_encode( Push_Reg_FPR(src), 10597 OpcP, RegOpc(dst) ); 10598 ins_pipe( fpu_reg_reg ); 10599 %} 10600 10601 10602 // Spill to obtain 24-bit precision 10603 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10604 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10605 match(Set dst (ModF src1 src2)); 10606 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10607 10608 format %{ "FMOD $dst,$src1,$src2" %} 10609 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10610 emitModDPR(), 10611 Push_Result_Mod_DPR(src2), 10612 Pop_Mem_FPR(dst)); 10613 ins_pipe( pipe_slow ); 10614 %} 10615 // 10616 // This instruction does not round to 24-bits 10617 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10618 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10619 match(Set dst (ModF dst src)); 10620 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10621 10622 format %{ "FMOD $dst,$src" %} 10623 ins_encode(Push_Reg_Mod_DPR(dst, src), 10624 emitModDPR(), 10625 Push_Result_Mod_DPR(src), 10626 Pop_Reg_FPR(dst)); 10627 ins_pipe( pipe_slow ); 10628 %} 10629 10630 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10631 predicate(UseSSE>=1); 10632 match(Set dst (ModF src0 src1)); 10633 effect(KILL rax, KILL cr); 10634 format %{ "SUB ESP,4\t # FMOD\n" 10635 "\tMOVSS [ESP+0],$src1\n" 10636 "\tFLD_S [ESP+0]\n" 10637 "\tMOVSS [ESP+0],$src0\n" 10638 "\tFLD_S [ESP+0]\n" 10639 "loop:\tFPREM\n" 10640 "\tFWAIT\n" 10641 "\tFNSTSW AX\n" 10642 "\tSAHF\n" 10643 "\tJP loop\n" 10644 "\tFSTP_S [ESP+0]\n" 10645 "\tMOVSS $dst,[ESP+0]\n" 10646 "\tADD ESP,4\n" 10647 "\tFSTP ST0\t # Restore FPU Stack" 10648 %} 10649 ins_cost(250); 10650 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10651 ins_pipe( pipe_slow ); 10652 %} 10653 10654 10655 //----------Arithmetic Conversion Instructions--------------------------------- 10656 // The conversions operations are all Alpha sorted. Please keep it that way! 10657 10658 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10659 predicate(UseSSE==0); 10660 match(Set dst (RoundFloat src)); 10661 ins_cost(125); 10662 format %{ "FST_S $dst,$src\t# F-round" %} 10663 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10664 ins_pipe( fpu_mem_reg ); 10665 %} 10666 10667 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10668 predicate(UseSSE<=1); 10669 match(Set dst (RoundDouble src)); 10670 ins_cost(125); 10671 format %{ "FST_D $dst,$src\t# D-round" %} 10672 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10673 ins_pipe( fpu_mem_reg ); 10674 %} 10675 10676 // Force rounding to 24-bit precision and 6-bit exponent 10677 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10678 predicate(UseSSE==0); 10679 match(Set dst (ConvD2F src)); 10680 format %{ "FST_S $dst,$src\t# F-round" %} 10681 expand %{ 10682 roundFloat_mem_reg(dst,src); 10683 %} 10684 %} 10685 10686 // Force rounding to 24-bit precision and 6-bit exponent 10687 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10688 predicate(UseSSE==1); 10689 match(Set dst (ConvD2F src)); 10690 effect( KILL cr ); 10691 format %{ "SUB ESP,4\n\t" 10692 "FST_S [ESP],$src\t# F-round\n\t" 10693 "MOVSS $dst,[ESP]\n\t" 10694 "ADD ESP,4" %} 10695 ins_encode %{ 10696 __ subptr(rsp, 4); 10697 if ($src$$reg != FPR1L_enc) { 10698 __ fld_s($src$$reg-1); 10699 __ fstp_s(Address(rsp, 0)); 10700 } else { 10701 __ fst_s(Address(rsp, 0)); 10702 } 10703 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10704 __ addptr(rsp, 4); 10705 %} 10706 ins_pipe( pipe_slow ); 10707 %} 10708 10709 // Force rounding double precision to single precision 10710 instruct convD2F_reg(regF dst, regD src) %{ 10711 predicate(UseSSE>=2); 10712 match(Set dst (ConvD2F src)); 10713 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10714 ins_encode %{ 10715 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10716 %} 10717 ins_pipe( pipe_slow ); 10718 %} 10719 10720 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10721 predicate(UseSSE==0); 10722 match(Set dst (ConvF2D src)); 10723 format %{ "FST_S $dst,$src\t# D-round" %} 10724 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10725 ins_pipe( fpu_reg_reg ); 10726 %} 10727 10728 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10729 predicate(UseSSE==1); 10730 match(Set dst (ConvF2D src)); 10731 format %{ "FST_D $dst,$src\t# D-round" %} 10732 expand %{ 10733 roundDouble_mem_reg(dst,src); 10734 %} 10735 %} 10736 10737 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10738 predicate(UseSSE==1); 10739 match(Set dst (ConvF2D src)); 10740 effect( KILL cr ); 10741 format %{ "SUB ESP,4\n\t" 10742 "MOVSS [ESP] $src\n\t" 10743 "FLD_S [ESP]\n\t" 10744 "ADD ESP,4\n\t" 10745 "FSTP $dst\t# D-round" %} 10746 ins_encode %{ 10747 __ subptr(rsp, 4); 10748 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10749 __ fld_s(Address(rsp, 0)); 10750 __ addptr(rsp, 4); 10751 __ fstp_d($dst$$reg); 10752 %} 10753 ins_pipe( pipe_slow ); 10754 %} 10755 10756 instruct convF2D_reg(regD dst, regF src) %{ 10757 predicate(UseSSE>=2); 10758 match(Set dst (ConvF2D src)); 10759 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10760 ins_encode %{ 10761 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10762 %} 10763 ins_pipe( pipe_slow ); 10764 %} 10765 10766 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10767 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10768 predicate(UseSSE<=1); 10769 match(Set dst (ConvD2I src)); 10770 effect( KILL tmp, KILL cr ); 10771 format %{ "FLD $src\t# Convert double to int \n\t" 10772 "FLDCW trunc mode\n\t" 10773 "SUB ESP,4\n\t" 10774 "FISTp [ESP + #0]\n\t" 10775 "FLDCW std/24-bit mode\n\t" 10776 "POP EAX\n\t" 10777 "CMP EAX,0x80000000\n\t" 10778 "JNE,s fast\n\t" 10779 "FLD_D $src\n\t" 10780 "CALL d2i_wrapper\n" 10781 "fast:" %} 10782 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10783 ins_pipe( pipe_slow ); 10784 %} 10785 10786 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10787 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10788 predicate(UseSSE>=2); 10789 match(Set dst (ConvD2I src)); 10790 effect( KILL tmp, KILL cr ); 10791 format %{ "CVTTSD2SI $dst, $src\n\t" 10792 "CMP $dst,0x80000000\n\t" 10793 "JNE,s fast\n\t" 10794 "SUB ESP, 8\n\t" 10795 "MOVSD [ESP], $src\n\t" 10796 "FLD_D [ESP]\n\t" 10797 "ADD ESP, 8\n\t" 10798 "CALL d2i_wrapper\n" 10799 "fast:" %} 10800 ins_encode %{ 10801 Label fast; 10802 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10803 __ cmpl($dst$$Register, 0x80000000); 10804 __ jccb(Assembler::notEqual, fast); 10805 __ subptr(rsp, 8); 10806 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10807 __ fld_d(Address(rsp, 0)); 10808 __ addptr(rsp, 8); 10809 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10810 __ post_call_nop(); 10811 __ bind(fast); 10812 %} 10813 ins_pipe( pipe_slow ); 10814 %} 10815 10816 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10817 predicate(UseSSE<=1); 10818 match(Set dst (ConvD2L src)); 10819 effect( KILL cr ); 10820 format %{ "FLD $src\t# Convert double to long\n\t" 10821 "FLDCW trunc mode\n\t" 10822 "SUB ESP,8\n\t" 10823 "FISTp [ESP + #0]\n\t" 10824 "FLDCW std/24-bit mode\n\t" 10825 "POP EAX\n\t" 10826 "POP EDX\n\t" 10827 "CMP EDX,0x80000000\n\t" 10828 "JNE,s fast\n\t" 10829 "TEST EAX,EAX\n\t" 10830 "JNE,s fast\n\t" 10831 "FLD $src\n\t" 10832 "CALL d2l_wrapper\n" 10833 "fast:" %} 10834 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10835 ins_pipe( pipe_slow ); 10836 %} 10837 10838 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10839 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10840 predicate (UseSSE>=2); 10841 match(Set dst (ConvD2L src)); 10842 effect( KILL cr ); 10843 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10844 "MOVSD [ESP],$src\n\t" 10845 "FLD_D [ESP]\n\t" 10846 "FLDCW trunc mode\n\t" 10847 "FISTp [ESP + #0]\n\t" 10848 "FLDCW std/24-bit mode\n\t" 10849 "POP EAX\n\t" 10850 "POP EDX\n\t" 10851 "CMP EDX,0x80000000\n\t" 10852 "JNE,s fast\n\t" 10853 "TEST EAX,EAX\n\t" 10854 "JNE,s fast\n\t" 10855 "SUB ESP,8\n\t" 10856 "MOVSD [ESP],$src\n\t" 10857 "FLD_D [ESP]\n\t" 10858 "ADD ESP,8\n\t" 10859 "CALL d2l_wrapper\n" 10860 "fast:" %} 10861 ins_encode %{ 10862 Label fast; 10863 __ subptr(rsp, 8); 10864 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10865 __ fld_d(Address(rsp, 0)); 10866 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10867 __ fistp_d(Address(rsp, 0)); 10868 // Restore the rounding mode, mask the exception 10869 if (Compile::current()->in_24_bit_fp_mode()) { 10870 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10871 } else { 10872 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10873 } 10874 // Load the converted long, adjust CPU stack 10875 __ pop(rax); 10876 __ pop(rdx); 10877 __ cmpl(rdx, 0x80000000); 10878 __ jccb(Assembler::notEqual, fast); 10879 __ testl(rax, rax); 10880 __ jccb(Assembler::notEqual, fast); 10881 __ subptr(rsp, 8); 10882 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10883 __ fld_d(Address(rsp, 0)); 10884 __ addptr(rsp, 8); 10885 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10886 __ post_call_nop(); 10887 __ bind(fast); 10888 %} 10889 ins_pipe( pipe_slow ); 10890 %} 10891 10892 // Convert a double to an int. Java semantics require we do complex 10893 // manglations in the corner cases. So we set the rounding mode to 10894 // 'zero', store the darned double down as an int, and reset the 10895 // rounding mode to 'nearest'. The hardware stores a flag value down 10896 // if we would overflow or converted a NAN; we check for this and 10897 // and go the slow path if needed. 10898 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10899 predicate(UseSSE==0); 10900 match(Set dst (ConvF2I src)); 10901 effect( KILL tmp, KILL cr ); 10902 format %{ "FLD $src\t# Convert float to int \n\t" 10903 "FLDCW trunc mode\n\t" 10904 "SUB ESP,4\n\t" 10905 "FISTp [ESP + #0]\n\t" 10906 "FLDCW std/24-bit mode\n\t" 10907 "POP EAX\n\t" 10908 "CMP EAX,0x80000000\n\t" 10909 "JNE,s fast\n\t" 10910 "FLD $src\n\t" 10911 "CALL d2i_wrapper\n" 10912 "fast:" %} 10913 // DPR2I_encoding works for FPR2I 10914 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10915 ins_pipe( pipe_slow ); 10916 %} 10917 10918 // Convert a float in xmm to an int reg. 10919 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10920 predicate(UseSSE>=1); 10921 match(Set dst (ConvF2I src)); 10922 effect( KILL tmp, KILL cr ); 10923 format %{ "CVTTSS2SI $dst, $src\n\t" 10924 "CMP $dst,0x80000000\n\t" 10925 "JNE,s fast\n\t" 10926 "SUB ESP, 4\n\t" 10927 "MOVSS [ESP], $src\n\t" 10928 "FLD [ESP]\n\t" 10929 "ADD ESP, 4\n\t" 10930 "CALL d2i_wrapper\n" 10931 "fast:" %} 10932 ins_encode %{ 10933 Label fast; 10934 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10935 __ cmpl($dst$$Register, 0x80000000); 10936 __ jccb(Assembler::notEqual, fast); 10937 __ subptr(rsp, 4); 10938 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10939 __ fld_s(Address(rsp, 0)); 10940 __ addptr(rsp, 4); 10941 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10942 __ post_call_nop(); 10943 __ bind(fast); 10944 %} 10945 ins_pipe( pipe_slow ); 10946 %} 10947 10948 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10949 predicate(UseSSE==0); 10950 match(Set dst (ConvF2L src)); 10951 effect( KILL cr ); 10952 format %{ "FLD $src\t# Convert float to long\n\t" 10953 "FLDCW trunc mode\n\t" 10954 "SUB ESP,8\n\t" 10955 "FISTp [ESP + #0]\n\t" 10956 "FLDCW std/24-bit mode\n\t" 10957 "POP EAX\n\t" 10958 "POP EDX\n\t" 10959 "CMP EDX,0x80000000\n\t" 10960 "JNE,s fast\n\t" 10961 "TEST EAX,EAX\n\t" 10962 "JNE,s fast\n\t" 10963 "FLD $src\n\t" 10964 "CALL d2l_wrapper\n" 10965 "fast:" %} 10966 // DPR2L_encoding works for FPR2L 10967 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10968 ins_pipe( pipe_slow ); 10969 %} 10970 10971 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10972 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10973 predicate (UseSSE>=1); 10974 match(Set dst (ConvF2L src)); 10975 effect( KILL cr ); 10976 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10977 "MOVSS [ESP],$src\n\t" 10978 "FLD_S [ESP]\n\t" 10979 "FLDCW trunc mode\n\t" 10980 "FISTp [ESP + #0]\n\t" 10981 "FLDCW std/24-bit mode\n\t" 10982 "POP EAX\n\t" 10983 "POP EDX\n\t" 10984 "CMP EDX,0x80000000\n\t" 10985 "JNE,s fast\n\t" 10986 "TEST EAX,EAX\n\t" 10987 "JNE,s fast\n\t" 10988 "SUB ESP,4\t# Convert float to long\n\t" 10989 "MOVSS [ESP],$src\n\t" 10990 "FLD_S [ESP]\n\t" 10991 "ADD ESP,4\n\t" 10992 "CALL d2l_wrapper\n" 10993 "fast:" %} 10994 ins_encode %{ 10995 Label fast; 10996 __ subptr(rsp, 8); 10997 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10998 __ fld_s(Address(rsp, 0)); 10999 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 11000 __ fistp_d(Address(rsp, 0)); 11001 // Restore the rounding mode, mask the exception 11002 if (Compile::current()->in_24_bit_fp_mode()) { 11003 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 11004 } else { 11005 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 11006 } 11007 // Load the converted long, adjust CPU stack 11008 __ pop(rax); 11009 __ pop(rdx); 11010 __ cmpl(rdx, 0x80000000); 11011 __ jccb(Assembler::notEqual, fast); 11012 __ testl(rax, rax); 11013 __ jccb(Assembler::notEqual, fast); 11014 __ subptr(rsp, 4); 11015 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11016 __ fld_s(Address(rsp, 0)); 11017 __ addptr(rsp, 4); 11018 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11019 __ post_call_nop(); 11020 __ bind(fast); 11021 %} 11022 ins_pipe( pipe_slow ); 11023 %} 11024 11025 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11026 predicate( UseSSE<=1 ); 11027 match(Set dst (ConvI2D src)); 11028 format %{ "FILD $src\n\t" 11029 "FSTP $dst" %} 11030 opcode(0xDB, 0x0); /* DB /0 */ 11031 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11032 ins_pipe( fpu_reg_mem ); 11033 %} 11034 11035 instruct convI2D_reg(regD dst, rRegI src) %{ 11036 predicate( UseSSE>=2 && !UseXmmI2D ); 11037 match(Set dst (ConvI2D src)); 11038 format %{ "CVTSI2SD $dst,$src" %} 11039 ins_encode %{ 11040 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11041 %} 11042 ins_pipe( pipe_slow ); 11043 %} 11044 11045 instruct convI2D_mem(regD dst, memory mem) %{ 11046 predicate( UseSSE>=2 ); 11047 match(Set dst (ConvI2D (LoadI mem))); 11048 format %{ "CVTSI2SD $dst,$mem" %} 11049 ins_encode %{ 11050 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11051 %} 11052 ins_pipe( pipe_slow ); 11053 %} 11054 11055 instruct convXI2D_reg(regD dst, rRegI src) 11056 %{ 11057 predicate( UseSSE>=2 && UseXmmI2D ); 11058 match(Set dst (ConvI2D src)); 11059 11060 format %{ "MOVD $dst,$src\n\t" 11061 "CVTDQ2PD $dst,$dst\t# i2d" %} 11062 ins_encode %{ 11063 __ movdl($dst$$XMMRegister, $src$$Register); 11064 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11065 %} 11066 ins_pipe(pipe_slow); // XXX 11067 %} 11068 11069 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11070 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11071 match(Set dst (ConvI2D (LoadI mem))); 11072 format %{ "FILD $mem\n\t" 11073 "FSTP $dst" %} 11074 opcode(0xDB); /* DB /0 */ 11075 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11076 Pop_Reg_DPR(dst)); 11077 ins_pipe( fpu_reg_mem ); 11078 %} 11079 11080 // Convert a byte to a float; no rounding step needed. 11081 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11082 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11083 match(Set dst (ConvI2F src)); 11084 format %{ "FILD $src\n\t" 11085 "FSTP $dst" %} 11086 11087 opcode(0xDB, 0x0); /* DB /0 */ 11088 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11089 ins_pipe( fpu_reg_mem ); 11090 %} 11091 11092 // In 24-bit mode, force exponent rounding by storing back out 11093 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11094 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11095 match(Set dst (ConvI2F src)); 11096 ins_cost(200); 11097 format %{ "FILD $src\n\t" 11098 "FSTP_S $dst" %} 11099 opcode(0xDB, 0x0); /* DB /0 */ 11100 ins_encode( Push_Mem_I(src), 11101 Pop_Mem_FPR(dst)); 11102 ins_pipe( fpu_mem_mem ); 11103 %} 11104 11105 // In 24-bit mode, force exponent rounding by storing back out 11106 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11107 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11108 match(Set dst (ConvI2F (LoadI mem))); 11109 ins_cost(200); 11110 format %{ "FILD $mem\n\t" 11111 "FSTP_S $dst" %} 11112 opcode(0xDB); /* DB /0 */ 11113 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11114 Pop_Mem_FPR(dst)); 11115 ins_pipe( fpu_mem_mem ); 11116 %} 11117 11118 // This instruction does not round to 24-bits 11119 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11120 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11121 match(Set dst (ConvI2F src)); 11122 format %{ "FILD $src\n\t" 11123 "FSTP $dst" %} 11124 opcode(0xDB, 0x0); /* DB /0 */ 11125 ins_encode( Push_Mem_I(src), 11126 Pop_Reg_FPR(dst)); 11127 ins_pipe( fpu_reg_mem ); 11128 %} 11129 11130 // This instruction does not round to 24-bits 11131 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11132 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11133 match(Set dst (ConvI2F (LoadI mem))); 11134 format %{ "FILD $mem\n\t" 11135 "FSTP $dst" %} 11136 opcode(0xDB); /* DB /0 */ 11137 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11138 Pop_Reg_FPR(dst)); 11139 ins_pipe( fpu_reg_mem ); 11140 %} 11141 11142 // Convert an int to a float in xmm; no rounding step needed. 11143 instruct convI2F_reg(regF dst, rRegI src) %{ 11144 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11145 match(Set dst (ConvI2F src)); 11146 format %{ "CVTSI2SS $dst, $src" %} 11147 ins_encode %{ 11148 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11149 %} 11150 ins_pipe( pipe_slow ); 11151 %} 11152 11153 instruct convXI2F_reg(regF dst, rRegI src) 11154 %{ 11155 predicate( UseSSE>=2 && UseXmmI2F ); 11156 match(Set dst (ConvI2F src)); 11157 11158 format %{ "MOVD $dst,$src\n\t" 11159 "CVTDQ2PS $dst,$dst\t# i2f" %} 11160 ins_encode %{ 11161 __ movdl($dst$$XMMRegister, $src$$Register); 11162 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11163 %} 11164 ins_pipe(pipe_slow); // XXX 11165 %} 11166 11167 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11168 match(Set dst (ConvI2L src)); 11169 effect(KILL cr); 11170 ins_cost(375); 11171 format %{ "MOV $dst.lo,$src\n\t" 11172 "MOV $dst.hi,$src\n\t" 11173 "SAR $dst.hi,31" %} 11174 ins_encode(convert_int_long(dst,src)); 11175 ins_pipe( ialu_reg_reg_long ); 11176 %} 11177 11178 // Zero-extend convert int to long 11179 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11180 match(Set dst (AndL (ConvI2L src) mask) ); 11181 effect( KILL flags ); 11182 ins_cost(250); 11183 format %{ "MOV $dst.lo,$src\n\t" 11184 "XOR $dst.hi,$dst.hi" %} 11185 opcode(0x33); // XOR 11186 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11187 ins_pipe( ialu_reg_reg_long ); 11188 %} 11189 11190 // Zero-extend long 11191 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11192 match(Set dst (AndL src mask) ); 11193 effect( KILL flags ); 11194 ins_cost(250); 11195 format %{ "MOV $dst.lo,$src.lo\n\t" 11196 "XOR $dst.hi,$dst.hi\n\t" %} 11197 opcode(0x33); // XOR 11198 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11199 ins_pipe( ialu_reg_reg_long ); 11200 %} 11201 11202 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11203 predicate (UseSSE<=1); 11204 match(Set dst (ConvL2D src)); 11205 effect( KILL cr ); 11206 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11207 "PUSH $src.lo\n\t" 11208 "FILD ST,[ESP + #0]\n\t" 11209 "ADD ESP,8\n\t" 11210 "FSTP_D $dst\t# D-round" %} 11211 opcode(0xDF, 0x5); /* DF /5 */ 11212 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11213 ins_pipe( pipe_slow ); 11214 %} 11215 11216 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11217 predicate (UseSSE>=2); 11218 match(Set dst (ConvL2D src)); 11219 effect( KILL cr ); 11220 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11221 "PUSH $src.lo\n\t" 11222 "FILD_D [ESP]\n\t" 11223 "FSTP_D [ESP]\n\t" 11224 "MOVSD $dst,[ESP]\n\t" 11225 "ADD ESP,8" %} 11226 opcode(0xDF, 0x5); /* DF /5 */ 11227 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11228 ins_pipe( pipe_slow ); 11229 %} 11230 11231 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11232 predicate (UseSSE>=1); 11233 match(Set dst (ConvL2F src)); 11234 effect( KILL cr ); 11235 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11236 "PUSH $src.lo\n\t" 11237 "FILD_D [ESP]\n\t" 11238 "FSTP_S [ESP]\n\t" 11239 "MOVSS $dst,[ESP]\n\t" 11240 "ADD ESP,8" %} 11241 opcode(0xDF, 0x5); /* DF /5 */ 11242 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11243 ins_pipe( pipe_slow ); 11244 %} 11245 11246 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11247 match(Set dst (ConvL2F src)); 11248 effect( KILL cr ); 11249 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11250 "PUSH $src.lo\n\t" 11251 "FILD ST,[ESP + #0]\n\t" 11252 "ADD ESP,8\n\t" 11253 "FSTP_S $dst\t# F-round" %} 11254 opcode(0xDF, 0x5); /* DF /5 */ 11255 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11256 ins_pipe( pipe_slow ); 11257 %} 11258 11259 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11260 match(Set dst (ConvL2I src)); 11261 effect( DEF dst, USE src ); 11262 format %{ "MOV $dst,$src.lo" %} 11263 ins_encode(enc_CopyL_Lo(dst,src)); 11264 ins_pipe( ialu_reg_reg ); 11265 %} 11266 11267 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11268 match(Set dst (MoveF2I src)); 11269 effect( DEF dst, USE src ); 11270 ins_cost(100); 11271 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11272 ins_encode %{ 11273 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11274 %} 11275 ins_pipe( ialu_reg_mem ); 11276 %} 11277 11278 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11279 predicate(UseSSE==0); 11280 match(Set dst (MoveF2I src)); 11281 effect( DEF dst, USE src ); 11282 11283 ins_cost(125); 11284 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11285 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11286 ins_pipe( fpu_mem_reg ); 11287 %} 11288 11289 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11290 predicate(UseSSE>=1); 11291 match(Set dst (MoveF2I src)); 11292 effect( DEF dst, USE src ); 11293 11294 ins_cost(95); 11295 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11296 ins_encode %{ 11297 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11298 %} 11299 ins_pipe( pipe_slow ); 11300 %} 11301 11302 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11303 predicate(UseSSE>=2); 11304 match(Set dst (MoveF2I src)); 11305 effect( DEF dst, USE src ); 11306 ins_cost(85); 11307 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11308 ins_encode %{ 11309 __ movdl($dst$$Register, $src$$XMMRegister); 11310 %} 11311 ins_pipe( pipe_slow ); 11312 %} 11313 11314 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11315 match(Set dst (MoveI2F src)); 11316 effect( DEF dst, USE src ); 11317 11318 ins_cost(100); 11319 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11320 ins_encode %{ 11321 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11322 %} 11323 ins_pipe( ialu_mem_reg ); 11324 %} 11325 11326 11327 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11328 predicate(UseSSE==0); 11329 match(Set dst (MoveI2F src)); 11330 effect(DEF dst, USE src); 11331 11332 ins_cost(125); 11333 format %{ "FLD_S $src\n\t" 11334 "FSTP $dst\t# MoveI2F_stack_reg" %} 11335 opcode(0xD9); /* D9 /0, FLD m32real */ 11336 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11337 Pop_Reg_FPR(dst) ); 11338 ins_pipe( fpu_reg_mem ); 11339 %} 11340 11341 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11342 predicate(UseSSE>=1); 11343 match(Set dst (MoveI2F src)); 11344 effect( DEF dst, USE src ); 11345 11346 ins_cost(95); 11347 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11348 ins_encode %{ 11349 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11350 %} 11351 ins_pipe( pipe_slow ); 11352 %} 11353 11354 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11355 predicate(UseSSE>=2); 11356 match(Set dst (MoveI2F src)); 11357 effect( DEF dst, USE src ); 11358 11359 ins_cost(85); 11360 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11361 ins_encode %{ 11362 __ movdl($dst$$XMMRegister, $src$$Register); 11363 %} 11364 ins_pipe( pipe_slow ); 11365 %} 11366 11367 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11368 match(Set dst (MoveD2L src)); 11369 effect(DEF dst, USE src); 11370 11371 ins_cost(250); 11372 format %{ "MOV $dst.lo,$src\n\t" 11373 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11374 opcode(0x8B, 0x8B); 11375 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11376 ins_pipe( ialu_mem_long_reg ); 11377 %} 11378 11379 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11380 predicate(UseSSE<=1); 11381 match(Set dst (MoveD2L src)); 11382 effect(DEF dst, USE src); 11383 11384 ins_cost(125); 11385 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11386 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11387 ins_pipe( fpu_mem_reg ); 11388 %} 11389 11390 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11391 predicate(UseSSE>=2); 11392 match(Set dst (MoveD2L src)); 11393 effect(DEF dst, USE src); 11394 ins_cost(95); 11395 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11396 ins_encode %{ 11397 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11398 %} 11399 ins_pipe( pipe_slow ); 11400 %} 11401 11402 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11403 predicate(UseSSE>=2); 11404 match(Set dst (MoveD2L src)); 11405 effect(DEF dst, USE src, TEMP tmp); 11406 ins_cost(85); 11407 format %{ "MOVD $dst.lo,$src\n\t" 11408 "PSHUFLW $tmp,$src,0x4E\n\t" 11409 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11410 ins_encode %{ 11411 __ movdl($dst$$Register, $src$$XMMRegister); 11412 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11413 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11414 %} 11415 ins_pipe( pipe_slow ); 11416 %} 11417 11418 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11419 match(Set dst (MoveL2D src)); 11420 effect(DEF dst, USE src); 11421 11422 ins_cost(200); 11423 format %{ "MOV $dst,$src.lo\n\t" 11424 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11425 opcode(0x89, 0x89); 11426 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11427 ins_pipe( ialu_mem_long_reg ); 11428 %} 11429 11430 11431 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11432 predicate(UseSSE<=1); 11433 match(Set dst (MoveL2D src)); 11434 effect(DEF dst, USE src); 11435 ins_cost(125); 11436 11437 format %{ "FLD_D $src\n\t" 11438 "FSTP $dst\t# MoveL2D_stack_reg" %} 11439 opcode(0xDD); /* DD /0, FLD m64real */ 11440 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11441 Pop_Reg_DPR(dst) ); 11442 ins_pipe( fpu_reg_mem ); 11443 %} 11444 11445 11446 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11447 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11448 match(Set dst (MoveL2D src)); 11449 effect(DEF dst, USE src); 11450 11451 ins_cost(95); 11452 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11453 ins_encode %{ 11454 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11455 %} 11456 ins_pipe( pipe_slow ); 11457 %} 11458 11459 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11460 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11461 match(Set dst (MoveL2D src)); 11462 effect(DEF dst, USE src); 11463 11464 ins_cost(95); 11465 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11466 ins_encode %{ 11467 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11468 %} 11469 ins_pipe( pipe_slow ); 11470 %} 11471 11472 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11473 predicate(UseSSE>=2); 11474 match(Set dst (MoveL2D src)); 11475 effect(TEMP dst, USE src, TEMP tmp); 11476 ins_cost(85); 11477 format %{ "MOVD $dst,$src.lo\n\t" 11478 "MOVD $tmp,$src.hi\n\t" 11479 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11480 ins_encode %{ 11481 __ movdl($dst$$XMMRegister, $src$$Register); 11482 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11483 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11484 %} 11485 ins_pipe( pipe_slow ); 11486 %} 11487 11488 //----------------------------- CompressBits/ExpandBits ------------------------ 11489 11490 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11491 predicate(n->bottom_type()->isa_long()); 11492 match(Set dst (CompressBits src mask)); 11493 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11494 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11495 ins_encode %{ 11496 Label exit, partail_result; 11497 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11498 // Merge the results of upper and lower destination registers such that upper destination 11499 // results are contiguously laid out after the lower destination result. 11500 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11501 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11502 __ popcntl($rtmp$$Register, $mask$$Register); 11503 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11504 __ cmpl($rtmp$$Register, 32); 11505 __ jccb(Assembler::equal, exit); 11506 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11507 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11508 // Shift left the contents of upper destination register by true bit count of lower mask register 11509 // and merge with lower destination register. 11510 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11511 __ orl($dst$$Register, $rtmp$$Register); 11512 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11513 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11514 // since contents of upper destination have already been copied to lower destination 11515 // register. 11516 __ cmpl($rtmp$$Register, 0); 11517 __ jccb(Assembler::greater, partail_result); 11518 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11519 __ jmp(exit); 11520 __ bind(partail_result); 11521 // Perform right shift over upper destination register to move out bits already copied 11522 // to lower destination register. 11523 __ subl($rtmp$$Register, 32); 11524 __ negl($rtmp$$Register); 11525 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11526 __ bind(exit); 11527 %} 11528 ins_pipe( pipe_slow ); 11529 %} 11530 11531 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11532 predicate(n->bottom_type()->isa_long()); 11533 match(Set dst (ExpandBits src mask)); 11534 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11535 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11536 ins_encode %{ 11537 // Extraction operation sequentially reads the bits from source register starting from LSB 11538 // and lays them out into destination register at bit locations corresponding to true bits 11539 // in mask register. Thus number of source bits read are equal to combined true bit count 11540 // of mask register pair. 11541 Label exit, mask_clipping; 11542 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11543 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11544 __ popcntl($rtmp$$Register, $mask$$Register); 11545 // If true bit count of lower mask register is 32 then none of bit of lower source register 11546 // will feed to upper destination register. 11547 __ cmpl($rtmp$$Register, 32); 11548 __ jccb(Assembler::equal, exit); 11549 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11550 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11551 // Shift right the contents of lower source register to remove already consumed bits. 11552 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11553 // Extract the bits from lower source register starting from LSB under the influence 11554 // of upper mask register. 11555 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11556 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11557 __ subl($rtmp$$Register, 32); 11558 __ negl($rtmp$$Register); 11559 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11560 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11561 // Clear the set bits in upper mask register which have been used to extract the contents 11562 // from lower source register. 11563 __ bind(mask_clipping); 11564 __ blsrl($mask$$Register, $mask$$Register); 11565 __ decrementl($rtmp$$Register, 1); 11566 __ jccb(Assembler::greater, mask_clipping); 11567 // Starting from LSB extract the bits from upper source register under the influence of 11568 // remaining set bits in upper mask register. 11569 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11570 // Merge the partial results extracted from lower and upper source register bits. 11571 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11572 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11573 __ bind(exit); 11574 %} 11575 ins_pipe( pipe_slow ); 11576 %} 11577 11578 // ======================================================================= 11579 // fast clearing of an array 11580 // Small ClearArray non-AVX512. 11581 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11582 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11583 match(Set dummy (ClearArray cnt base)); 11584 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11585 11586 format %{ $$template 11587 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11588 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11589 $$emit$$"JG LARGE\n\t" 11590 $$emit$$"SHL ECX, 1\n\t" 11591 $$emit$$"DEC ECX\n\t" 11592 $$emit$$"JS DONE\t# Zero length\n\t" 11593 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11594 $$emit$$"DEC ECX\n\t" 11595 $$emit$$"JGE LOOP\n\t" 11596 $$emit$$"JMP DONE\n\t" 11597 $$emit$$"# LARGE:\n\t" 11598 if (UseFastStosb) { 11599 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11600 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11601 } else if (UseXMMForObjInit) { 11602 $$emit$$"MOV RDI,RAX\n\t" 11603 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11604 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11605 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11606 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11607 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11608 $$emit$$"ADD 0x40,RAX\n\t" 11609 $$emit$$"# L_zero_64_bytes:\n\t" 11610 $$emit$$"SUB 0x8,RCX\n\t" 11611 $$emit$$"JGE L_loop\n\t" 11612 $$emit$$"ADD 0x4,RCX\n\t" 11613 $$emit$$"JL L_tail\n\t" 11614 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11615 $$emit$$"ADD 0x20,RAX\n\t" 11616 $$emit$$"SUB 0x4,RCX\n\t" 11617 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11618 $$emit$$"ADD 0x4,RCX\n\t" 11619 $$emit$$"JLE L_end\n\t" 11620 $$emit$$"DEC RCX\n\t" 11621 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11622 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11623 $$emit$$"ADD 0x8,RAX\n\t" 11624 $$emit$$"DEC RCX\n\t" 11625 $$emit$$"JGE L_sloop\n\t" 11626 $$emit$$"# L_end:\n\t" 11627 } else { 11628 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11629 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11630 } 11631 $$emit$$"# DONE" 11632 %} 11633 ins_encode %{ 11634 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11635 $tmp$$XMMRegister, false, knoreg); 11636 %} 11637 ins_pipe( pipe_slow ); 11638 %} 11639 11640 // Small ClearArray AVX512 non-constant length. 11641 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11642 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11643 match(Set dummy (ClearArray cnt base)); 11644 ins_cost(125); 11645 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11646 11647 format %{ $$template 11648 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11649 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11650 $$emit$$"JG LARGE\n\t" 11651 $$emit$$"SHL ECX, 1\n\t" 11652 $$emit$$"DEC ECX\n\t" 11653 $$emit$$"JS DONE\t# Zero length\n\t" 11654 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11655 $$emit$$"DEC ECX\n\t" 11656 $$emit$$"JGE LOOP\n\t" 11657 $$emit$$"JMP DONE\n\t" 11658 $$emit$$"# LARGE:\n\t" 11659 if (UseFastStosb) { 11660 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11661 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11662 } else if (UseXMMForObjInit) { 11663 $$emit$$"MOV RDI,RAX\n\t" 11664 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11665 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11666 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11667 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11668 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11669 $$emit$$"ADD 0x40,RAX\n\t" 11670 $$emit$$"# L_zero_64_bytes:\n\t" 11671 $$emit$$"SUB 0x8,RCX\n\t" 11672 $$emit$$"JGE L_loop\n\t" 11673 $$emit$$"ADD 0x4,RCX\n\t" 11674 $$emit$$"JL L_tail\n\t" 11675 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11676 $$emit$$"ADD 0x20,RAX\n\t" 11677 $$emit$$"SUB 0x4,RCX\n\t" 11678 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11679 $$emit$$"ADD 0x4,RCX\n\t" 11680 $$emit$$"JLE L_end\n\t" 11681 $$emit$$"DEC RCX\n\t" 11682 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11683 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11684 $$emit$$"ADD 0x8,RAX\n\t" 11685 $$emit$$"DEC RCX\n\t" 11686 $$emit$$"JGE L_sloop\n\t" 11687 $$emit$$"# L_end:\n\t" 11688 } else { 11689 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11690 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11691 } 11692 $$emit$$"# DONE" 11693 %} 11694 ins_encode %{ 11695 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11696 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11697 %} 11698 ins_pipe( pipe_slow ); 11699 %} 11700 11701 // Large ClearArray non-AVX512. 11702 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11703 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11704 match(Set dummy (ClearArray cnt base)); 11705 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11706 format %{ $$template 11707 if (UseFastStosb) { 11708 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11709 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11710 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11711 } else if (UseXMMForObjInit) { 11712 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11713 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11714 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11715 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11716 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11717 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11718 $$emit$$"ADD 0x40,RAX\n\t" 11719 $$emit$$"# L_zero_64_bytes:\n\t" 11720 $$emit$$"SUB 0x8,RCX\n\t" 11721 $$emit$$"JGE L_loop\n\t" 11722 $$emit$$"ADD 0x4,RCX\n\t" 11723 $$emit$$"JL L_tail\n\t" 11724 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11725 $$emit$$"ADD 0x20,RAX\n\t" 11726 $$emit$$"SUB 0x4,RCX\n\t" 11727 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11728 $$emit$$"ADD 0x4,RCX\n\t" 11729 $$emit$$"JLE L_end\n\t" 11730 $$emit$$"DEC RCX\n\t" 11731 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11732 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11733 $$emit$$"ADD 0x8,RAX\n\t" 11734 $$emit$$"DEC RCX\n\t" 11735 $$emit$$"JGE L_sloop\n\t" 11736 $$emit$$"# L_end:\n\t" 11737 } else { 11738 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11739 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11740 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11741 } 11742 $$emit$$"# DONE" 11743 %} 11744 ins_encode %{ 11745 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11746 $tmp$$XMMRegister, true, knoreg); 11747 %} 11748 ins_pipe( pipe_slow ); 11749 %} 11750 11751 // Large ClearArray AVX512. 11752 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11753 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11754 match(Set dummy (ClearArray cnt base)); 11755 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11756 format %{ $$template 11757 if (UseFastStosb) { 11758 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11759 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11760 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11761 } else if (UseXMMForObjInit) { 11762 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11763 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11764 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11765 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11766 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11767 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11768 $$emit$$"ADD 0x40,RAX\n\t" 11769 $$emit$$"# L_zero_64_bytes:\n\t" 11770 $$emit$$"SUB 0x8,RCX\n\t" 11771 $$emit$$"JGE L_loop\n\t" 11772 $$emit$$"ADD 0x4,RCX\n\t" 11773 $$emit$$"JL L_tail\n\t" 11774 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11775 $$emit$$"ADD 0x20,RAX\n\t" 11776 $$emit$$"SUB 0x4,RCX\n\t" 11777 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11778 $$emit$$"ADD 0x4,RCX\n\t" 11779 $$emit$$"JLE L_end\n\t" 11780 $$emit$$"DEC RCX\n\t" 11781 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11782 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11783 $$emit$$"ADD 0x8,RAX\n\t" 11784 $$emit$$"DEC RCX\n\t" 11785 $$emit$$"JGE L_sloop\n\t" 11786 $$emit$$"# L_end:\n\t" 11787 } else { 11788 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11789 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11790 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11791 } 11792 $$emit$$"# DONE" 11793 %} 11794 ins_encode %{ 11795 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11796 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11797 %} 11798 ins_pipe( pipe_slow ); 11799 %} 11800 11801 // Small ClearArray AVX512 constant length. 11802 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11803 %{ 11804 predicate(!((ClearArrayNode*)n)->is_large() && 11805 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11806 match(Set dummy (ClearArray cnt base)); 11807 ins_cost(100); 11808 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11809 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11810 ins_encode %{ 11811 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11812 %} 11813 ins_pipe(pipe_slow); 11814 %} 11815 11816 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11817 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11818 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11819 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11820 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11821 11822 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11823 ins_encode %{ 11824 __ string_compare($str1$$Register, $str2$$Register, 11825 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11826 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11827 %} 11828 ins_pipe( pipe_slow ); 11829 %} 11830 11831 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11832 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11833 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11834 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11835 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11836 11837 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11838 ins_encode %{ 11839 __ string_compare($str1$$Register, $str2$$Register, 11840 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11841 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11842 %} 11843 ins_pipe( pipe_slow ); 11844 %} 11845 11846 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11847 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11848 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11849 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11850 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11851 11852 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11853 ins_encode %{ 11854 __ string_compare($str1$$Register, $str2$$Register, 11855 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11856 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11857 %} 11858 ins_pipe( pipe_slow ); 11859 %} 11860 11861 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11862 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11863 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11864 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11865 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11866 11867 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11868 ins_encode %{ 11869 __ string_compare($str1$$Register, $str2$$Register, 11870 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11871 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11872 %} 11873 ins_pipe( pipe_slow ); 11874 %} 11875 11876 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11877 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11878 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11879 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11880 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11881 11882 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11883 ins_encode %{ 11884 __ string_compare($str1$$Register, $str2$$Register, 11885 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11886 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11887 %} 11888 ins_pipe( pipe_slow ); 11889 %} 11890 11891 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11892 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11893 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11894 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11895 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11896 11897 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11898 ins_encode %{ 11899 __ string_compare($str1$$Register, $str2$$Register, 11900 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11901 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11902 %} 11903 ins_pipe( pipe_slow ); 11904 %} 11905 11906 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11907 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11908 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11909 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11910 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11911 11912 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11913 ins_encode %{ 11914 __ string_compare($str2$$Register, $str1$$Register, 11915 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11916 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11917 %} 11918 ins_pipe( pipe_slow ); 11919 %} 11920 11921 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11922 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11923 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11924 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11925 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11926 11927 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11928 ins_encode %{ 11929 __ string_compare($str2$$Register, $str1$$Register, 11930 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11931 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11932 %} 11933 ins_pipe( pipe_slow ); 11934 %} 11935 11936 // fast string equals 11937 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11938 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11939 predicate(!VM_Version::supports_avx512vlbw()); 11940 match(Set result (StrEquals (Binary str1 str2) cnt)); 11941 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11942 11943 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11944 ins_encode %{ 11945 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11946 $cnt$$Register, $result$$Register, $tmp3$$Register, 11947 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11948 %} 11949 11950 ins_pipe( pipe_slow ); 11951 %} 11952 11953 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11954 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11955 predicate(VM_Version::supports_avx512vlbw()); 11956 match(Set result (StrEquals (Binary str1 str2) cnt)); 11957 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11958 11959 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11960 ins_encode %{ 11961 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11962 $cnt$$Register, $result$$Register, $tmp3$$Register, 11963 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11964 %} 11965 11966 ins_pipe( pipe_slow ); 11967 %} 11968 11969 11970 // fast search of substring with known size. 11971 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11972 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11973 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11974 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11975 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11976 11977 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11978 ins_encode %{ 11979 int icnt2 = (int)$int_cnt2$$constant; 11980 if (icnt2 >= 16) { 11981 // IndexOf for constant substrings with size >= 16 elements 11982 // which don't need to be loaded through stack. 11983 __ string_indexofC8($str1$$Register, $str2$$Register, 11984 $cnt1$$Register, $cnt2$$Register, 11985 icnt2, $result$$Register, 11986 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11987 } else { 11988 // Small strings are loaded through stack if they cross page boundary. 11989 __ string_indexof($str1$$Register, $str2$$Register, 11990 $cnt1$$Register, $cnt2$$Register, 11991 icnt2, $result$$Register, 11992 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11993 } 11994 %} 11995 ins_pipe( pipe_slow ); 11996 %} 11997 11998 // fast search of substring with known size. 11999 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12000 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12001 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12002 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12003 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12004 12005 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12006 ins_encode %{ 12007 int icnt2 = (int)$int_cnt2$$constant; 12008 if (icnt2 >= 8) { 12009 // IndexOf for constant substrings with size >= 8 elements 12010 // which don't need to be loaded through stack. 12011 __ string_indexofC8($str1$$Register, $str2$$Register, 12012 $cnt1$$Register, $cnt2$$Register, 12013 icnt2, $result$$Register, 12014 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12015 } else { 12016 // Small strings are loaded through stack if they cross page boundary. 12017 __ string_indexof($str1$$Register, $str2$$Register, 12018 $cnt1$$Register, $cnt2$$Register, 12019 icnt2, $result$$Register, 12020 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12021 } 12022 %} 12023 ins_pipe( pipe_slow ); 12024 %} 12025 12026 // fast search of substring with known size. 12027 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12028 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12029 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12030 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12031 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12032 12033 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12034 ins_encode %{ 12035 int icnt2 = (int)$int_cnt2$$constant; 12036 if (icnt2 >= 8) { 12037 // IndexOf for constant substrings with size >= 8 elements 12038 // which don't need to be loaded through stack. 12039 __ string_indexofC8($str1$$Register, $str2$$Register, 12040 $cnt1$$Register, $cnt2$$Register, 12041 icnt2, $result$$Register, 12042 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12043 } else { 12044 // Small strings are loaded through stack if they cross page boundary. 12045 __ string_indexof($str1$$Register, $str2$$Register, 12046 $cnt1$$Register, $cnt2$$Register, 12047 icnt2, $result$$Register, 12048 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12049 } 12050 %} 12051 ins_pipe( pipe_slow ); 12052 %} 12053 12054 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12055 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12056 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 12057 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12058 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12059 12060 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12061 ins_encode %{ 12062 __ string_indexof($str1$$Register, $str2$$Register, 12063 $cnt1$$Register, $cnt2$$Register, 12064 (-1), $result$$Register, 12065 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 12066 %} 12067 ins_pipe( pipe_slow ); 12068 %} 12069 12070 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12071 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12072 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12073 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12074 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12075 12076 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12077 ins_encode %{ 12078 __ string_indexof($str1$$Register, $str2$$Register, 12079 $cnt1$$Register, $cnt2$$Register, 12080 (-1), $result$$Register, 12081 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12082 %} 12083 ins_pipe( pipe_slow ); 12084 %} 12085 12086 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12087 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12088 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12089 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12090 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12091 12092 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12093 ins_encode %{ 12094 __ string_indexof($str1$$Register, $str2$$Register, 12095 $cnt1$$Register, $cnt2$$Register, 12096 (-1), $result$$Register, 12097 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12098 %} 12099 ins_pipe( pipe_slow ); 12100 %} 12101 12102 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12103 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12104 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12105 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12106 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12107 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12108 ins_encode %{ 12109 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12110 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12111 %} 12112 ins_pipe( pipe_slow ); 12113 %} 12114 12115 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12116 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12117 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12118 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12119 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12120 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12121 ins_encode %{ 12122 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12123 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12124 %} 12125 ins_pipe( pipe_slow ); 12126 %} 12127 12128 12129 // fast array equals 12130 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12131 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12132 %{ 12133 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12134 match(Set result (AryEq ary1 ary2)); 12135 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12136 //ins_cost(300); 12137 12138 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12139 ins_encode %{ 12140 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12141 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12142 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12143 %} 12144 ins_pipe( pipe_slow ); 12145 %} 12146 12147 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12148 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12149 %{ 12150 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12151 match(Set result (AryEq ary1 ary2)); 12152 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12153 //ins_cost(300); 12154 12155 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12156 ins_encode %{ 12157 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12158 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12159 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12160 %} 12161 ins_pipe( pipe_slow ); 12162 %} 12163 12164 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12165 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12166 %{ 12167 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12168 match(Set result (AryEq ary1 ary2)); 12169 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12170 //ins_cost(300); 12171 12172 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12173 ins_encode %{ 12174 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12175 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12176 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12177 %} 12178 ins_pipe( pipe_slow ); 12179 %} 12180 12181 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12182 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12183 %{ 12184 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12185 match(Set result (AryEq ary1 ary2)); 12186 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12187 //ins_cost(300); 12188 12189 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12190 ins_encode %{ 12191 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12192 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12193 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12194 %} 12195 ins_pipe( pipe_slow ); 12196 %} 12197 12198 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12199 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12200 %{ 12201 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12202 match(Set result (CountPositives ary1 len)); 12203 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12204 12205 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12206 ins_encode %{ 12207 __ count_positives($ary1$$Register, $len$$Register, 12208 $result$$Register, $tmp3$$Register, 12209 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12210 %} 12211 ins_pipe( pipe_slow ); 12212 %} 12213 12214 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12215 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12216 %{ 12217 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12218 match(Set result (CountPositives ary1 len)); 12219 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12220 12221 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12222 ins_encode %{ 12223 __ count_positives($ary1$$Register, $len$$Register, 12224 $result$$Register, $tmp3$$Register, 12225 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12226 %} 12227 ins_pipe( pipe_slow ); 12228 %} 12229 12230 12231 // fast char[] to byte[] compression 12232 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12233 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12234 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12235 match(Set result (StrCompressedCopy src (Binary dst len))); 12236 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12237 12238 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12239 ins_encode %{ 12240 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12241 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12242 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12243 knoreg, knoreg); 12244 %} 12245 ins_pipe( pipe_slow ); 12246 %} 12247 12248 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12249 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12250 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12251 match(Set result (StrCompressedCopy src (Binary dst len))); 12252 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12253 12254 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12255 ins_encode %{ 12256 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12257 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12258 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12259 $ktmp1$$KRegister, $ktmp2$$KRegister); 12260 %} 12261 ins_pipe( pipe_slow ); 12262 %} 12263 12264 // fast byte[] to char[] inflation 12265 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12266 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12267 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12268 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12269 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12270 12271 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12272 ins_encode %{ 12273 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12274 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12275 %} 12276 ins_pipe( pipe_slow ); 12277 %} 12278 12279 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12280 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12281 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12282 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12283 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12284 12285 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12286 ins_encode %{ 12287 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12288 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12289 %} 12290 ins_pipe( pipe_slow ); 12291 %} 12292 12293 // encode char[] to byte[] in ISO_8859_1 12294 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12295 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12296 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12297 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12298 match(Set result (EncodeISOArray src (Binary dst len))); 12299 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12300 12301 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12302 ins_encode %{ 12303 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12304 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12305 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12306 %} 12307 ins_pipe( pipe_slow ); 12308 %} 12309 12310 // encode char[] to byte[] in ASCII 12311 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12312 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12313 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12314 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12315 match(Set result (EncodeISOArray src (Binary dst len))); 12316 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12317 12318 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12319 ins_encode %{ 12320 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12321 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12322 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12323 %} 12324 ins_pipe( pipe_slow ); 12325 %} 12326 12327 //----------Control Flow Instructions------------------------------------------ 12328 // Signed compare Instructions 12329 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12330 match(Set cr (CmpI op1 op2)); 12331 effect( DEF cr, USE op1, USE op2 ); 12332 format %{ "CMP $op1,$op2" %} 12333 opcode(0x3B); /* Opcode 3B /r */ 12334 ins_encode( OpcP, RegReg( op1, op2) ); 12335 ins_pipe( ialu_cr_reg_reg ); 12336 %} 12337 12338 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12339 match(Set cr (CmpI op1 op2)); 12340 effect( DEF cr, USE op1 ); 12341 format %{ "CMP $op1,$op2" %} 12342 opcode(0x81,0x07); /* Opcode 81 /7 */ 12343 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12344 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12345 ins_pipe( ialu_cr_reg_imm ); 12346 %} 12347 12348 // Cisc-spilled version of cmpI_eReg 12349 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12350 match(Set cr (CmpI op1 (LoadI op2))); 12351 12352 format %{ "CMP $op1,$op2" %} 12353 ins_cost(500); 12354 opcode(0x3B); /* Opcode 3B /r */ 12355 ins_encode( OpcP, RegMem( op1, op2) ); 12356 ins_pipe( ialu_cr_reg_mem ); 12357 %} 12358 12359 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12360 match(Set cr (CmpI src zero)); 12361 effect( DEF cr, USE src ); 12362 12363 format %{ "TEST $src,$src" %} 12364 opcode(0x85); 12365 ins_encode( OpcP, RegReg( src, src ) ); 12366 ins_pipe( ialu_cr_reg_imm ); 12367 %} 12368 12369 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12370 match(Set cr (CmpI (AndI src con) zero)); 12371 12372 format %{ "TEST $src,$con" %} 12373 opcode(0xF7,0x00); 12374 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12375 ins_pipe( ialu_cr_reg_imm ); 12376 %} 12377 12378 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12379 match(Set cr (CmpI (AndI src mem) zero)); 12380 12381 format %{ "TEST $src,$mem" %} 12382 opcode(0x85); 12383 ins_encode( OpcP, RegMem( src, mem ) ); 12384 ins_pipe( ialu_cr_reg_mem ); 12385 %} 12386 12387 // Unsigned compare Instructions; really, same as signed except they 12388 // produce an eFlagsRegU instead of eFlagsReg. 12389 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12390 match(Set cr (CmpU op1 op2)); 12391 12392 format %{ "CMPu $op1,$op2" %} 12393 opcode(0x3B); /* Opcode 3B /r */ 12394 ins_encode( OpcP, RegReg( op1, op2) ); 12395 ins_pipe( ialu_cr_reg_reg ); 12396 %} 12397 12398 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12399 match(Set cr (CmpU op1 op2)); 12400 12401 format %{ "CMPu $op1,$op2" %} 12402 opcode(0x81,0x07); /* Opcode 81 /7 */ 12403 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12404 ins_pipe( ialu_cr_reg_imm ); 12405 %} 12406 12407 // // Cisc-spilled version of cmpU_eReg 12408 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12409 match(Set cr (CmpU op1 (LoadI op2))); 12410 12411 format %{ "CMPu $op1,$op2" %} 12412 ins_cost(500); 12413 opcode(0x3B); /* Opcode 3B /r */ 12414 ins_encode( OpcP, RegMem( op1, op2) ); 12415 ins_pipe( ialu_cr_reg_mem ); 12416 %} 12417 12418 // // Cisc-spilled version of cmpU_eReg 12419 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12420 // match(Set cr (CmpU (LoadI op1) op2)); 12421 // 12422 // format %{ "CMPu $op1,$op2" %} 12423 // ins_cost(500); 12424 // opcode(0x39); /* Opcode 39 /r */ 12425 // ins_encode( OpcP, RegMem( op1, op2) ); 12426 //%} 12427 12428 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12429 match(Set cr (CmpU src zero)); 12430 12431 format %{ "TESTu $src,$src" %} 12432 opcode(0x85); 12433 ins_encode( OpcP, RegReg( src, src ) ); 12434 ins_pipe( ialu_cr_reg_imm ); 12435 %} 12436 12437 // Unsigned pointer compare Instructions 12438 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12439 match(Set cr (CmpP op1 op2)); 12440 12441 format %{ "CMPu $op1,$op2" %} 12442 opcode(0x3B); /* Opcode 3B /r */ 12443 ins_encode( OpcP, RegReg( op1, op2) ); 12444 ins_pipe( ialu_cr_reg_reg ); 12445 %} 12446 12447 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12448 match(Set cr (CmpP op1 op2)); 12449 12450 format %{ "CMPu $op1,$op2" %} 12451 opcode(0x81,0x07); /* Opcode 81 /7 */ 12452 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12453 ins_pipe( ialu_cr_reg_imm ); 12454 %} 12455 12456 // // Cisc-spilled version of cmpP_eReg 12457 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12458 match(Set cr (CmpP op1 (LoadP op2))); 12459 12460 format %{ "CMPu $op1,$op2" %} 12461 ins_cost(500); 12462 opcode(0x3B); /* Opcode 3B /r */ 12463 ins_encode( OpcP, RegMem( op1, op2) ); 12464 ins_pipe( ialu_cr_reg_mem ); 12465 %} 12466 12467 // // Cisc-spilled version of cmpP_eReg 12468 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12469 // match(Set cr (CmpP (LoadP op1) op2)); 12470 // 12471 // format %{ "CMPu $op1,$op2" %} 12472 // ins_cost(500); 12473 // opcode(0x39); /* Opcode 39 /r */ 12474 // ins_encode( OpcP, RegMem( op1, op2) ); 12475 //%} 12476 12477 // Compare raw pointer (used in out-of-heap check). 12478 // Only works because non-oop pointers must be raw pointers 12479 // and raw pointers have no anti-dependencies. 12480 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12481 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12482 match(Set cr (CmpP op1 (LoadP op2))); 12483 12484 format %{ "CMPu $op1,$op2" %} 12485 opcode(0x3B); /* Opcode 3B /r */ 12486 ins_encode( OpcP, RegMem( op1, op2) ); 12487 ins_pipe( ialu_cr_reg_mem ); 12488 %} 12489 12490 // 12491 // This will generate a signed flags result. This should be ok 12492 // since any compare to a zero should be eq/neq. 12493 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12494 match(Set cr (CmpP src zero)); 12495 12496 format %{ "TEST $src,$src" %} 12497 opcode(0x85); 12498 ins_encode( OpcP, RegReg( src, src ) ); 12499 ins_pipe( ialu_cr_reg_imm ); 12500 %} 12501 12502 // Cisc-spilled version of testP_reg 12503 // This will generate a signed flags result. This should be ok 12504 // since any compare to a zero should be eq/neq. 12505 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12506 match(Set cr (CmpP (LoadP op) zero)); 12507 12508 format %{ "TEST $op,0xFFFFFFFF" %} 12509 ins_cost(500); 12510 opcode(0xF7); /* Opcode F7 /0 */ 12511 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12512 ins_pipe( ialu_cr_reg_imm ); 12513 %} 12514 12515 // Yanked all unsigned pointer compare operations. 12516 // Pointer compares are done with CmpP which is already unsigned. 12517 12518 //----------Max and Min-------------------------------------------------------- 12519 // Min Instructions 12520 //// 12521 // *** Min and Max using the conditional move are slower than the 12522 // *** branch version on a Pentium III. 12523 // // Conditional move for min 12524 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12525 // effect( USE_DEF op2, USE op1, USE cr ); 12526 // format %{ "CMOVlt $op2,$op1\t! min" %} 12527 // opcode(0x4C,0x0F); 12528 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12529 // ins_pipe( pipe_cmov_reg ); 12530 //%} 12531 // 12532 //// Min Register with Register (P6 version) 12533 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12534 // predicate(VM_Version::supports_cmov() ); 12535 // match(Set op2 (MinI op1 op2)); 12536 // ins_cost(200); 12537 // expand %{ 12538 // eFlagsReg cr; 12539 // compI_eReg(cr,op1,op2); 12540 // cmovI_reg_lt(op2,op1,cr); 12541 // %} 12542 //%} 12543 12544 // Min Register with Register (generic version) 12545 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12546 match(Set dst (MinI dst src)); 12547 effect(KILL flags); 12548 ins_cost(300); 12549 12550 format %{ "MIN $dst,$src" %} 12551 opcode(0xCC); 12552 ins_encode( min_enc(dst,src) ); 12553 ins_pipe( pipe_slow ); 12554 %} 12555 12556 // Max Register with Register 12557 // *** Min and Max using the conditional move are slower than the 12558 // *** branch version on a Pentium III. 12559 // // Conditional move for max 12560 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12561 // effect( USE_DEF op2, USE op1, USE cr ); 12562 // format %{ "CMOVgt $op2,$op1\t! max" %} 12563 // opcode(0x4F,0x0F); 12564 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12565 // ins_pipe( pipe_cmov_reg ); 12566 //%} 12567 // 12568 // // Max Register with Register (P6 version) 12569 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12570 // predicate(VM_Version::supports_cmov() ); 12571 // match(Set op2 (MaxI op1 op2)); 12572 // ins_cost(200); 12573 // expand %{ 12574 // eFlagsReg cr; 12575 // compI_eReg(cr,op1,op2); 12576 // cmovI_reg_gt(op2,op1,cr); 12577 // %} 12578 //%} 12579 12580 // Max Register with Register (generic version) 12581 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12582 match(Set dst (MaxI dst src)); 12583 effect(KILL flags); 12584 ins_cost(300); 12585 12586 format %{ "MAX $dst,$src" %} 12587 opcode(0xCC); 12588 ins_encode( max_enc(dst,src) ); 12589 ins_pipe( pipe_slow ); 12590 %} 12591 12592 // ============================================================================ 12593 // Counted Loop limit node which represents exact final iterator value. 12594 // Note: the resulting value should fit into integer range since 12595 // counted loops have limit check on overflow. 12596 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12597 match(Set limit (LoopLimit (Binary init limit) stride)); 12598 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12599 ins_cost(300); 12600 12601 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12602 ins_encode %{ 12603 int strd = (int)$stride$$constant; 12604 assert(strd != 1 && strd != -1, "sanity"); 12605 int m1 = (strd > 0) ? 1 : -1; 12606 // Convert limit to long (EAX:EDX) 12607 __ cdql(); 12608 // Convert init to long (init:tmp) 12609 __ movl($tmp$$Register, $init$$Register); 12610 __ sarl($tmp$$Register, 31); 12611 // $limit - $init 12612 __ subl($limit$$Register, $init$$Register); 12613 __ sbbl($limit_hi$$Register, $tmp$$Register); 12614 // + ($stride - 1) 12615 if (strd > 0) { 12616 __ addl($limit$$Register, (strd - 1)); 12617 __ adcl($limit_hi$$Register, 0); 12618 __ movl($tmp$$Register, strd); 12619 } else { 12620 __ addl($limit$$Register, (strd + 1)); 12621 __ adcl($limit_hi$$Register, -1); 12622 __ lneg($limit_hi$$Register, $limit$$Register); 12623 __ movl($tmp$$Register, -strd); 12624 } 12625 // signed division: (EAX:EDX) / pos_stride 12626 __ idivl($tmp$$Register); 12627 if (strd < 0) { 12628 // restore sign 12629 __ negl($tmp$$Register); 12630 } 12631 // (EAX) * stride 12632 __ mull($tmp$$Register); 12633 // + init (ignore upper bits) 12634 __ addl($limit$$Register, $init$$Register); 12635 %} 12636 ins_pipe( pipe_slow ); 12637 %} 12638 12639 // ============================================================================ 12640 // Branch Instructions 12641 // Jump Table 12642 instruct jumpXtnd(rRegI switch_val) %{ 12643 match(Jump switch_val); 12644 ins_cost(350); 12645 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12646 ins_encode %{ 12647 // Jump to Address(table_base + switch_reg) 12648 Address index(noreg, $switch_val$$Register, Address::times_1); 12649 __ jump(ArrayAddress($constantaddress, index), noreg); 12650 %} 12651 ins_pipe(pipe_jmp); 12652 %} 12653 12654 // Jump Direct - Label defines a relative address from JMP+1 12655 instruct jmpDir(label labl) %{ 12656 match(Goto); 12657 effect(USE labl); 12658 12659 ins_cost(300); 12660 format %{ "JMP $labl" %} 12661 size(5); 12662 ins_encode %{ 12663 Label* L = $labl$$label; 12664 __ jmp(*L, false); // Always long jump 12665 %} 12666 ins_pipe( pipe_jmp ); 12667 %} 12668 12669 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12670 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12671 match(If cop cr); 12672 effect(USE labl); 12673 12674 ins_cost(300); 12675 format %{ "J$cop $labl" %} 12676 size(6); 12677 ins_encode %{ 12678 Label* L = $labl$$label; 12679 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12680 %} 12681 ins_pipe( pipe_jcc ); 12682 %} 12683 12684 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12685 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12686 match(CountedLoopEnd cop cr); 12687 effect(USE labl); 12688 12689 ins_cost(300); 12690 format %{ "J$cop $labl\t# Loop end" %} 12691 size(6); 12692 ins_encode %{ 12693 Label* L = $labl$$label; 12694 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12695 %} 12696 ins_pipe( pipe_jcc ); 12697 %} 12698 12699 // Jump Direct Conditional - using unsigned comparison 12700 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12701 match(If cop cmp); 12702 effect(USE labl); 12703 12704 ins_cost(300); 12705 format %{ "J$cop,u $labl" %} 12706 size(6); 12707 ins_encode %{ 12708 Label* L = $labl$$label; 12709 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12710 %} 12711 ins_pipe(pipe_jcc); 12712 %} 12713 12714 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12715 match(If cop cmp); 12716 effect(USE labl); 12717 12718 ins_cost(200); 12719 format %{ "J$cop,u $labl" %} 12720 size(6); 12721 ins_encode %{ 12722 Label* L = $labl$$label; 12723 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12724 %} 12725 ins_pipe(pipe_jcc); 12726 %} 12727 12728 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12729 match(If cop cmp); 12730 effect(USE labl); 12731 12732 ins_cost(200); 12733 format %{ $$template 12734 if ($cop$$cmpcode == Assembler::notEqual) { 12735 $$emit$$"JP,u $labl\n\t" 12736 $$emit$$"J$cop,u $labl" 12737 } else { 12738 $$emit$$"JP,u done\n\t" 12739 $$emit$$"J$cop,u $labl\n\t" 12740 $$emit$$"done:" 12741 } 12742 %} 12743 ins_encode %{ 12744 Label* l = $labl$$label; 12745 if ($cop$$cmpcode == Assembler::notEqual) { 12746 __ jcc(Assembler::parity, *l, false); 12747 __ jcc(Assembler::notEqual, *l, false); 12748 } else if ($cop$$cmpcode == Assembler::equal) { 12749 Label done; 12750 __ jccb(Assembler::parity, done); 12751 __ jcc(Assembler::equal, *l, false); 12752 __ bind(done); 12753 } else { 12754 ShouldNotReachHere(); 12755 } 12756 %} 12757 ins_pipe(pipe_jcc); 12758 %} 12759 12760 // ============================================================================ 12761 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12762 // array for an instance of the superklass. Set a hidden internal cache on a 12763 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12764 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12765 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12766 match(Set result (PartialSubtypeCheck sub super)); 12767 effect( KILL rcx, KILL cr ); 12768 12769 ins_cost(1100); // slightly larger than the next version 12770 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12771 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12772 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12773 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12774 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12775 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12776 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12777 "miss:\t" %} 12778 12779 opcode(0x1); // Force a XOR of EDI 12780 ins_encode( enc_PartialSubtypeCheck() ); 12781 ins_pipe( pipe_slow ); 12782 %} 12783 12784 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12785 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12786 effect( KILL rcx, KILL result ); 12787 12788 ins_cost(1000); 12789 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12790 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12791 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12792 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12793 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12794 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12795 "miss:\t" %} 12796 12797 opcode(0x0); // No need to XOR EDI 12798 ins_encode( enc_PartialSubtypeCheck() ); 12799 ins_pipe( pipe_slow ); 12800 %} 12801 12802 // ============================================================================ 12803 // Branch Instructions -- short offset versions 12804 // 12805 // These instructions are used to replace jumps of a long offset (the default 12806 // match) with jumps of a shorter offset. These instructions are all tagged 12807 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12808 // match rules in general matching. Instead, the ADLC generates a conversion 12809 // method in the MachNode which can be used to do in-place replacement of the 12810 // long variant with the shorter variant. The compiler will determine if a 12811 // branch can be taken by the is_short_branch_offset() predicate in the machine 12812 // specific code section of the file. 12813 12814 // Jump Direct - Label defines a relative address from JMP+1 12815 instruct jmpDir_short(label labl) %{ 12816 match(Goto); 12817 effect(USE labl); 12818 12819 ins_cost(300); 12820 format %{ "JMP,s $labl" %} 12821 size(2); 12822 ins_encode %{ 12823 Label* L = $labl$$label; 12824 __ jmpb(*L); 12825 %} 12826 ins_pipe( pipe_jmp ); 12827 ins_short_branch(1); 12828 %} 12829 12830 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12831 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12832 match(If cop cr); 12833 effect(USE labl); 12834 12835 ins_cost(300); 12836 format %{ "J$cop,s $labl" %} 12837 size(2); 12838 ins_encode %{ 12839 Label* L = $labl$$label; 12840 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12841 %} 12842 ins_pipe( pipe_jcc ); 12843 ins_short_branch(1); 12844 %} 12845 12846 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12847 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12848 match(CountedLoopEnd cop cr); 12849 effect(USE labl); 12850 12851 ins_cost(300); 12852 format %{ "J$cop,s $labl\t# Loop end" %} 12853 size(2); 12854 ins_encode %{ 12855 Label* L = $labl$$label; 12856 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12857 %} 12858 ins_pipe( pipe_jcc ); 12859 ins_short_branch(1); 12860 %} 12861 12862 // Jump Direct Conditional - using unsigned comparison 12863 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12864 match(If cop cmp); 12865 effect(USE labl); 12866 12867 ins_cost(300); 12868 format %{ "J$cop,us $labl" %} 12869 size(2); 12870 ins_encode %{ 12871 Label* L = $labl$$label; 12872 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12873 %} 12874 ins_pipe( pipe_jcc ); 12875 ins_short_branch(1); 12876 %} 12877 12878 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12879 match(If cop cmp); 12880 effect(USE labl); 12881 12882 ins_cost(300); 12883 format %{ "J$cop,us $labl" %} 12884 size(2); 12885 ins_encode %{ 12886 Label* L = $labl$$label; 12887 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12888 %} 12889 ins_pipe( pipe_jcc ); 12890 ins_short_branch(1); 12891 %} 12892 12893 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12894 match(If cop cmp); 12895 effect(USE labl); 12896 12897 ins_cost(300); 12898 format %{ $$template 12899 if ($cop$$cmpcode == Assembler::notEqual) { 12900 $$emit$$"JP,u,s $labl\n\t" 12901 $$emit$$"J$cop,u,s $labl" 12902 } else { 12903 $$emit$$"JP,u,s done\n\t" 12904 $$emit$$"J$cop,u,s $labl\n\t" 12905 $$emit$$"done:" 12906 } 12907 %} 12908 size(4); 12909 ins_encode %{ 12910 Label* l = $labl$$label; 12911 if ($cop$$cmpcode == Assembler::notEqual) { 12912 __ jccb(Assembler::parity, *l); 12913 __ jccb(Assembler::notEqual, *l); 12914 } else if ($cop$$cmpcode == Assembler::equal) { 12915 Label done; 12916 __ jccb(Assembler::parity, done); 12917 __ jccb(Assembler::equal, *l); 12918 __ bind(done); 12919 } else { 12920 ShouldNotReachHere(); 12921 } 12922 %} 12923 ins_pipe(pipe_jcc); 12924 ins_short_branch(1); 12925 %} 12926 12927 // ============================================================================ 12928 // Long Compare 12929 // 12930 // Currently we hold longs in 2 registers. Comparing such values efficiently 12931 // is tricky. The flavor of compare used depends on whether we are testing 12932 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12933 // The GE test is the negated LT test. The LE test can be had by commuting 12934 // the operands (yielding a GE test) and then negating; negate again for the 12935 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12936 // NE test is negated from that. 12937 12938 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12939 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12940 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12941 // are collapsed internally in the ADLC's dfa-gen code. The match for 12942 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12943 // foo match ends up with the wrong leaf. One fix is to not match both 12944 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12945 // both forms beat the trinary form of long-compare and both are very useful 12946 // on Intel which has so few registers. 12947 12948 // Manifest a CmpL result in an integer register. Very painful. 12949 // This is the test to avoid. 12950 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12951 match(Set dst (CmpL3 src1 src2)); 12952 effect( KILL flags ); 12953 ins_cost(1000); 12954 format %{ "XOR $dst,$dst\n\t" 12955 "CMP $src1.hi,$src2.hi\n\t" 12956 "JLT,s m_one\n\t" 12957 "JGT,s p_one\n\t" 12958 "CMP $src1.lo,$src2.lo\n\t" 12959 "JB,s m_one\n\t" 12960 "JEQ,s done\n" 12961 "p_one:\tINC $dst\n\t" 12962 "JMP,s done\n" 12963 "m_one:\tDEC $dst\n" 12964 "done:" %} 12965 ins_encode %{ 12966 Label p_one, m_one, done; 12967 __ xorptr($dst$$Register, $dst$$Register); 12968 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12969 __ jccb(Assembler::less, m_one); 12970 __ jccb(Assembler::greater, p_one); 12971 __ cmpl($src1$$Register, $src2$$Register); 12972 __ jccb(Assembler::below, m_one); 12973 __ jccb(Assembler::equal, done); 12974 __ bind(p_one); 12975 __ incrementl($dst$$Register); 12976 __ jmpb(done); 12977 __ bind(m_one); 12978 __ decrementl($dst$$Register); 12979 __ bind(done); 12980 %} 12981 ins_pipe( pipe_slow ); 12982 %} 12983 12984 //====== 12985 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12986 // compares. Can be used for LE or GT compares by reversing arguments. 12987 // NOT GOOD FOR EQ/NE tests. 12988 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12989 match( Set flags (CmpL src zero )); 12990 ins_cost(100); 12991 format %{ "TEST $src.hi,$src.hi" %} 12992 opcode(0x85); 12993 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12994 ins_pipe( ialu_cr_reg_reg ); 12995 %} 12996 12997 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12998 // compares. Can be used for LE or GT compares by reversing arguments. 12999 // NOT GOOD FOR EQ/NE tests. 13000 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13001 match( Set flags (CmpL src1 src2 )); 13002 effect( TEMP tmp ); 13003 ins_cost(300); 13004 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13005 "MOV $tmp,$src1.hi\n\t" 13006 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13007 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13008 ins_pipe( ialu_cr_reg_reg ); 13009 %} 13010 13011 // Long compares reg < zero/req OR reg >= zero/req. 13012 // Just a wrapper for a normal branch, plus the predicate test. 13013 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13014 match(If cmp flags); 13015 effect(USE labl); 13016 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13017 expand %{ 13018 jmpCon(cmp,flags,labl); // JLT or JGE... 13019 %} 13020 %} 13021 13022 //====== 13023 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13024 // compares. Can be used for LE or GT compares by reversing arguments. 13025 // NOT GOOD FOR EQ/NE tests. 13026 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13027 match(Set flags (CmpUL src zero)); 13028 ins_cost(100); 13029 format %{ "TEST $src.hi,$src.hi" %} 13030 opcode(0x85); 13031 ins_encode(OpcP, RegReg_Hi2(src, src)); 13032 ins_pipe(ialu_cr_reg_reg); 13033 %} 13034 13035 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13036 // compares. Can be used for LE or GT compares by reversing arguments. 13037 // NOT GOOD FOR EQ/NE tests. 13038 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13039 match(Set flags (CmpUL src1 src2)); 13040 effect(TEMP tmp); 13041 ins_cost(300); 13042 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13043 "MOV $tmp,$src1.hi\n\t" 13044 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13045 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13046 ins_pipe(ialu_cr_reg_reg); 13047 %} 13048 13049 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13050 // Just a wrapper for a normal branch, plus the predicate test. 13051 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13052 match(If cmp flags); 13053 effect(USE labl); 13054 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13055 expand %{ 13056 jmpCon(cmp, flags, labl); // JLT or JGE... 13057 %} 13058 %} 13059 13060 // Compare 2 longs and CMOVE longs. 13061 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13062 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13063 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13064 ins_cost(400); 13065 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13066 "CMOV$cmp $dst.hi,$src.hi" %} 13067 opcode(0x0F,0x40); 13068 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13069 ins_pipe( pipe_cmov_reg_long ); 13070 %} 13071 13072 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13073 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13074 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13075 ins_cost(500); 13076 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13077 "CMOV$cmp $dst.hi,$src.hi" %} 13078 opcode(0x0F,0x40); 13079 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13080 ins_pipe( pipe_cmov_reg_long ); 13081 %} 13082 13083 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 13084 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13085 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13086 ins_cost(400); 13087 expand %{ 13088 cmovLL_reg_LTGE(cmp, flags, dst, src); 13089 %} 13090 %} 13091 13092 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 13093 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13094 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13095 ins_cost(500); 13096 expand %{ 13097 cmovLL_mem_LTGE(cmp, flags, dst, src); 13098 %} 13099 %} 13100 13101 // Compare 2 longs and CMOVE ints. 13102 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13103 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13104 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13105 ins_cost(200); 13106 format %{ "CMOV$cmp $dst,$src" %} 13107 opcode(0x0F,0x40); 13108 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13109 ins_pipe( pipe_cmov_reg ); 13110 %} 13111 13112 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13113 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13114 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13115 ins_cost(250); 13116 format %{ "CMOV$cmp $dst,$src" %} 13117 opcode(0x0F,0x40); 13118 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13119 ins_pipe( pipe_cmov_mem ); 13120 %} 13121 13122 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 13123 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13124 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13125 ins_cost(200); 13126 expand %{ 13127 cmovII_reg_LTGE(cmp, flags, dst, src); 13128 %} 13129 %} 13130 13131 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 13132 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13133 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13134 ins_cost(250); 13135 expand %{ 13136 cmovII_mem_LTGE(cmp, flags, dst, src); 13137 %} 13138 %} 13139 13140 // Compare 2 longs and CMOVE ptrs. 13141 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13142 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13143 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13144 ins_cost(200); 13145 format %{ "CMOV$cmp $dst,$src" %} 13146 opcode(0x0F,0x40); 13147 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13148 ins_pipe( pipe_cmov_reg ); 13149 %} 13150 13151 // Compare 2 unsigned longs and CMOVE ptrs. 13152 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 13153 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13154 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13155 ins_cost(200); 13156 expand %{ 13157 cmovPP_reg_LTGE(cmp,flags,dst,src); 13158 %} 13159 %} 13160 13161 // Compare 2 longs and CMOVE doubles 13162 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13163 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13164 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13165 ins_cost(200); 13166 expand %{ 13167 fcmovDPR_regS(cmp,flags,dst,src); 13168 %} 13169 %} 13170 13171 // Compare 2 longs and CMOVE doubles 13172 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13173 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13174 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13175 ins_cost(200); 13176 expand %{ 13177 fcmovD_regS(cmp,flags,dst,src); 13178 %} 13179 %} 13180 13181 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13182 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13183 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13184 ins_cost(200); 13185 expand %{ 13186 fcmovFPR_regS(cmp,flags,dst,src); 13187 %} 13188 %} 13189 13190 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13191 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13192 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13193 ins_cost(200); 13194 expand %{ 13195 fcmovF_regS(cmp,flags,dst,src); 13196 %} 13197 %} 13198 13199 //====== 13200 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13201 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13202 match( Set flags (CmpL src zero )); 13203 effect(TEMP tmp); 13204 ins_cost(200); 13205 format %{ "MOV $tmp,$src.lo\n\t" 13206 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13207 ins_encode( long_cmp_flags0( src, tmp ) ); 13208 ins_pipe( ialu_reg_reg_long ); 13209 %} 13210 13211 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13212 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13213 match( Set flags (CmpL src1 src2 )); 13214 ins_cost(200+300); 13215 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13216 "JNE,s skip\n\t" 13217 "CMP $src1.hi,$src2.hi\n\t" 13218 "skip:\t" %} 13219 ins_encode( long_cmp_flags1( src1, src2 ) ); 13220 ins_pipe( ialu_cr_reg_reg ); 13221 %} 13222 13223 // Long compare reg == zero/reg OR reg != zero/reg 13224 // Just a wrapper for a normal branch, plus the predicate test. 13225 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13226 match(If cmp flags); 13227 effect(USE labl); 13228 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13229 expand %{ 13230 jmpCon(cmp,flags,labl); // JEQ or JNE... 13231 %} 13232 %} 13233 13234 //====== 13235 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13236 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13237 match(Set flags (CmpUL src zero)); 13238 effect(TEMP tmp); 13239 ins_cost(200); 13240 format %{ "MOV $tmp,$src.lo\n\t" 13241 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13242 ins_encode(long_cmp_flags0(src, tmp)); 13243 ins_pipe(ialu_reg_reg_long); 13244 %} 13245 13246 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13247 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13248 match(Set flags (CmpUL src1 src2)); 13249 ins_cost(200+300); 13250 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13251 "JNE,s skip\n\t" 13252 "CMP $src1.hi,$src2.hi\n\t" 13253 "skip:\t" %} 13254 ins_encode(long_cmp_flags1(src1, src2)); 13255 ins_pipe(ialu_cr_reg_reg); 13256 %} 13257 13258 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13259 // Just a wrapper for a normal branch, plus the predicate test. 13260 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13261 match(If cmp flags); 13262 effect(USE labl); 13263 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13264 expand %{ 13265 jmpCon(cmp, flags, labl); // JEQ or JNE... 13266 %} 13267 %} 13268 13269 // Compare 2 longs and CMOVE longs. 13270 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13271 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13272 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13273 ins_cost(400); 13274 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13275 "CMOV$cmp $dst.hi,$src.hi" %} 13276 opcode(0x0F,0x40); 13277 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13278 ins_pipe( pipe_cmov_reg_long ); 13279 %} 13280 13281 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13282 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13283 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13284 ins_cost(500); 13285 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13286 "CMOV$cmp $dst.hi,$src.hi" %} 13287 opcode(0x0F,0x40); 13288 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13289 ins_pipe( pipe_cmov_reg_long ); 13290 %} 13291 13292 // Compare 2 longs and CMOVE ints. 13293 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13294 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13295 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13296 ins_cost(200); 13297 format %{ "CMOV$cmp $dst,$src" %} 13298 opcode(0x0F,0x40); 13299 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13300 ins_pipe( pipe_cmov_reg ); 13301 %} 13302 13303 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13304 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13305 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13306 ins_cost(250); 13307 format %{ "CMOV$cmp $dst,$src" %} 13308 opcode(0x0F,0x40); 13309 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13310 ins_pipe( pipe_cmov_mem ); 13311 %} 13312 13313 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13314 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13315 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13316 ins_cost(200); 13317 expand %{ 13318 cmovII_reg_EQNE(cmp, flags, dst, src); 13319 %} 13320 %} 13321 13322 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13323 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13324 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13325 ins_cost(250); 13326 expand %{ 13327 cmovII_mem_EQNE(cmp, flags, dst, src); 13328 %} 13329 %} 13330 13331 // Compare 2 longs and CMOVE ptrs. 13332 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13333 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13334 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13335 ins_cost(200); 13336 format %{ "CMOV$cmp $dst,$src" %} 13337 opcode(0x0F,0x40); 13338 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13339 ins_pipe( pipe_cmov_reg ); 13340 %} 13341 13342 // Compare 2 unsigned longs and CMOVE ptrs. 13343 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13344 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13345 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13346 ins_cost(200); 13347 expand %{ 13348 cmovPP_reg_EQNE(cmp,flags,dst,src); 13349 %} 13350 %} 13351 13352 // Compare 2 longs and CMOVE doubles 13353 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13354 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13355 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13356 ins_cost(200); 13357 expand %{ 13358 fcmovDPR_regS(cmp,flags,dst,src); 13359 %} 13360 %} 13361 13362 // Compare 2 longs and CMOVE doubles 13363 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13364 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13365 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13366 ins_cost(200); 13367 expand %{ 13368 fcmovD_regS(cmp,flags,dst,src); 13369 %} 13370 %} 13371 13372 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13373 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13374 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13375 ins_cost(200); 13376 expand %{ 13377 fcmovFPR_regS(cmp,flags,dst,src); 13378 %} 13379 %} 13380 13381 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13382 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13383 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13384 ins_cost(200); 13385 expand %{ 13386 fcmovF_regS(cmp,flags,dst,src); 13387 %} 13388 %} 13389 13390 //====== 13391 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13392 // Same as cmpL_reg_flags_LEGT except must negate src 13393 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13394 match( Set flags (CmpL src zero )); 13395 effect( TEMP tmp ); 13396 ins_cost(300); 13397 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13398 "CMP $tmp,$src.lo\n\t" 13399 "SBB $tmp,$src.hi\n\t" %} 13400 ins_encode( long_cmp_flags3(src, tmp) ); 13401 ins_pipe( ialu_reg_reg_long ); 13402 %} 13403 13404 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13405 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13406 // requires a commuted test to get the same result. 13407 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13408 match( Set flags (CmpL src1 src2 )); 13409 effect( TEMP tmp ); 13410 ins_cost(300); 13411 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13412 "MOV $tmp,$src2.hi\n\t" 13413 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13414 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13415 ins_pipe( ialu_cr_reg_reg ); 13416 %} 13417 13418 // Long compares reg < zero/req OR reg >= zero/req. 13419 // Just a wrapper for a normal branch, plus the predicate test 13420 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13421 match(If cmp flags); 13422 effect(USE labl); 13423 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13424 ins_cost(300); 13425 expand %{ 13426 jmpCon(cmp,flags,labl); // JGT or JLE... 13427 %} 13428 %} 13429 13430 //====== 13431 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13432 // Same as cmpUL_reg_flags_LEGT except must negate src 13433 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13434 match(Set flags (CmpUL src zero)); 13435 effect(TEMP tmp); 13436 ins_cost(300); 13437 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13438 "CMP $tmp,$src.lo\n\t" 13439 "SBB $tmp,$src.hi\n\t" %} 13440 ins_encode(long_cmp_flags3(src, tmp)); 13441 ins_pipe(ialu_reg_reg_long); 13442 %} 13443 13444 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13445 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13446 // requires a commuted test to get the same result. 13447 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13448 match(Set flags (CmpUL src1 src2)); 13449 effect(TEMP tmp); 13450 ins_cost(300); 13451 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13452 "MOV $tmp,$src2.hi\n\t" 13453 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13454 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13455 ins_pipe(ialu_cr_reg_reg); 13456 %} 13457 13458 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13459 // Just a wrapper for a normal branch, plus the predicate test 13460 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13461 match(If cmp flags); 13462 effect(USE labl); 13463 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13464 ins_cost(300); 13465 expand %{ 13466 jmpCon(cmp, flags, labl); // JGT or JLE... 13467 %} 13468 %} 13469 13470 // Compare 2 longs and CMOVE longs. 13471 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13472 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13473 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13474 ins_cost(400); 13475 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13476 "CMOV$cmp $dst.hi,$src.hi" %} 13477 opcode(0x0F,0x40); 13478 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13479 ins_pipe( pipe_cmov_reg_long ); 13480 %} 13481 13482 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13483 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13484 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13485 ins_cost(500); 13486 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13487 "CMOV$cmp $dst.hi,$src.hi+4" %} 13488 opcode(0x0F,0x40); 13489 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13490 ins_pipe( pipe_cmov_reg_long ); 13491 %} 13492 13493 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13494 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13495 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13496 ins_cost(400); 13497 expand %{ 13498 cmovLL_reg_LEGT(cmp, flags, dst, src); 13499 %} 13500 %} 13501 13502 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13503 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13504 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13505 ins_cost(500); 13506 expand %{ 13507 cmovLL_mem_LEGT(cmp, flags, dst, src); 13508 %} 13509 %} 13510 13511 // Compare 2 longs and CMOVE ints. 13512 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13513 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13514 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13515 ins_cost(200); 13516 format %{ "CMOV$cmp $dst,$src" %} 13517 opcode(0x0F,0x40); 13518 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13519 ins_pipe( pipe_cmov_reg ); 13520 %} 13521 13522 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13523 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13524 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13525 ins_cost(250); 13526 format %{ "CMOV$cmp $dst,$src" %} 13527 opcode(0x0F,0x40); 13528 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13529 ins_pipe( pipe_cmov_mem ); 13530 %} 13531 13532 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13533 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13534 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13535 ins_cost(200); 13536 expand %{ 13537 cmovII_reg_LEGT(cmp, flags, dst, src); 13538 %} 13539 %} 13540 13541 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13542 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13543 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13544 ins_cost(250); 13545 expand %{ 13546 cmovII_mem_LEGT(cmp, flags, dst, src); 13547 %} 13548 %} 13549 13550 // Compare 2 longs and CMOVE ptrs. 13551 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13552 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13553 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13554 ins_cost(200); 13555 format %{ "CMOV$cmp $dst,$src" %} 13556 opcode(0x0F,0x40); 13557 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13558 ins_pipe( pipe_cmov_reg ); 13559 %} 13560 13561 // Compare 2 unsigned longs and CMOVE ptrs. 13562 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13563 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13564 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13565 ins_cost(200); 13566 expand %{ 13567 cmovPP_reg_LEGT(cmp,flags,dst,src); 13568 %} 13569 %} 13570 13571 // Compare 2 longs and CMOVE doubles 13572 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13573 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13574 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13575 ins_cost(200); 13576 expand %{ 13577 fcmovDPR_regS(cmp,flags,dst,src); 13578 %} 13579 %} 13580 13581 // Compare 2 longs and CMOVE doubles 13582 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13583 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13584 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13585 ins_cost(200); 13586 expand %{ 13587 fcmovD_regS(cmp,flags,dst,src); 13588 %} 13589 %} 13590 13591 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13592 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13593 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13594 ins_cost(200); 13595 expand %{ 13596 fcmovFPR_regS(cmp,flags,dst,src); 13597 %} 13598 %} 13599 13600 13601 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13602 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13603 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13604 ins_cost(200); 13605 expand %{ 13606 fcmovF_regS(cmp,flags,dst,src); 13607 %} 13608 %} 13609 13610 13611 // ============================================================================ 13612 // Procedure Call/Return Instructions 13613 // Call Java Static Instruction 13614 // Note: If this code changes, the corresponding ret_addr_offset() and 13615 // compute_padding() functions will have to be adjusted. 13616 instruct CallStaticJavaDirect(method meth) %{ 13617 match(CallStaticJava); 13618 effect(USE meth); 13619 13620 ins_cost(300); 13621 format %{ "CALL,static " %} 13622 opcode(0xE8); /* E8 cd */ 13623 ins_encode( pre_call_resets, 13624 Java_Static_Call( meth ), 13625 call_epilog, 13626 post_call_FPU ); 13627 ins_pipe( pipe_slow ); 13628 ins_alignment(4); 13629 %} 13630 13631 // Call Java Dynamic Instruction 13632 // Note: If this code changes, the corresponding ret_addr_offset() and 13633 // compute_padding() functions will have to be adjusted. 13634 instruct CallDynamicJavaDirect(method meth) %{ 13635 match(CallDynamicJava); 13636 effect(USE meth); 13637 13638 ins_cost(300); 13639 format %{ "MOV EAX,(oop)-1\n\t" 13640 "CALL,dynamic" %} 13641 opcode(0xE8); /* E8 cd */ 13642 ins_encode( pre_call_resets, 13643 Java_Dynamic_Call( meth ), 13644 call_epilog, 13645 post_call_FPU ); 13646 ins_pipe( pipe_slow ); 13647 ins_alignment(4); 13648 %} 13649 13650 // Call Runtime Instruction 13651 instruct CallRuntimeDirect(method meth) %{ 13652 match(CallRuntime ); 13653 effect(USE meth); 13654 13655 ins_cost(300); 13656 format %{ "CALL,runtime " %} 13657 opcode(0xE8); /* E8 cd */ 13658 // Use FFREEs to clear entries in float stack 13659 ins_encode( pre_call_resets, 13660 FFree_Float_Stack_All, 13661 Java_To_Runtime( meth ), 13662 post_call_FPU ); 13663 ins_pipe( pipe_slow ); 13664 %} 13665 13666 // Call runtime without safepoint 13667 instruct CallLeafDirect(method meth) %{ 13668 match(CallLeaf); 13669 effect(USE meth); 13670 13671 ins_cost(300); 13672 format %{ "CALL_LEAF,runtime " %} 13673 opcode(0xE8); /* E8 cd */ 13674 ins_encode( pre_call_resets, 13675 FFree_Float_Stack_All, 13676 Java_To_Runtime( meth ), 13677 Verify_FPU_For_Leaf, post_call_FPU ); 13678 ins_pipe( pipe_slow ); 13679 %} 13680 13681 instruct CallLeafNoFPDirect(method meth) %{ 13682 match(CallLeafNoFP); 13683 effect(USE meth); 13684 13685 ins_cost(300); 13686 format %{ "CALL_LEAF_NOFP,runtime " %} 13687 opcode(0xE8); /* E8 cd */ 13688 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13689 ins_pipe( pipe_slow ); 13690 %} 13691 13692 13693 // Return Instruction 13694 // Remove the return address & jump to it. 13695 instruct Ret() %{ 13696 match(Return); 13697 format %{ "RET" %} 13698 opcode(0xC3); 13699 ins_encode(OpcP); 13700 ins_pipe( pipe_jmp ); 13701 %} 13702 13703 // Tail Call; Jump from runtime stub to Java code. 13704 // Also known as an 'interprocedural jump'. 13705 // Target of jump will eventually return to caller. 13706 // TailJump below removes the return address. 13707 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13708 match(TailCall jump_target method_ptr); 13709 ins_cost(300); 13710 format %{ "JMP $jump_target \t# EBX holds method" %} 13711 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13712 ins_encode( OpcP, RegOpc(jump_target) ); 13713 ins_pipe( pipe_jmp ); 13714 %} 13715 13716 13717 // Tail Jump; remove the return address; jump to target. 13718 // TailCall above leaves the return address around. 13719 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13720 match( TailJump jump_target ex_oop ); 13721 ins_cost(300); 13722 format %{ "POP EDX\t# pop return address into dummy\n\t" 13723 "JMP $jump_target " %} 13724 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13725 ins_encode( enc_pop_rdx, 13726 OpcP, RegOpc(jump_target) ); 13727 ins_pipe( pipe_jmp ); 13728 %} 13729 13730 // Create exception oop: created by stack-crawling runtime code. 13731 // Created exception is now available to this handler, and is setup 13732 // just prior to jumping to this handler. No code emitted. 13733 instruct CreateException( eAXRegP ex_oop ) 13734 %{ 13735 match(Set ex_oop (CreateEx)); 13736 13737 size(0); 13738 // use the following format syntax 13739 format %{ "# exception oop is in EAX; no code emitted" %} 13740 ins_encode(); 13741 ins_pipe( empty ); 13742 %} 13743 13744 13745 // Rethrow exception: 13746 // The exception oop will come in the first argument position. 13747 // Then JUMP (not call) to the rethrow stub code. 13748 instruct RethrowException() 13749 %{ 13750 match(Rethrow); 13751 13752 // use the following format syntax 13753 format %{ "JMP rethrow_stub" %} 13754 ins_encode(enc_rethrow); 13755 ins_pipe( pipe_jmp ); 13756 %} 13757 13758 // inlined locking and unlocking 13759 13760 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{ 13761 predicate(Compile::current()->use_rtm()); 13762 match(Set cr (FastLock object box)); 13763 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread); 13764 ins_cost(300); 13765 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13766 ins_encode %{ 13767 __ get_thread($thread$$Register); 13768 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13769 $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register, 13770 _rtm_counters, _stack_rtm_counters, 13771 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13772 true, ra_->C->profile_rtm()); 13773 %} 13774 ins_pipe(pipe_slow); 13775 %} 13776 13777 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13778 predicate(!Compile::current()->use_rtm()); 13779 match(Set cr (FastLock object box)); 13780 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13781 ins_cost(300); 13782 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13783 ins_encode %{ 13784 __ get_thread($thread$$Register); 13785 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13786 $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false); 13787 %} 13788 ins_pipe(pipe_slow); 13789 %} 13790 13791 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13792 match(Set cr (FastUnlock object box)); 13793 effect(TEMP tmp, USE_KILL box); 13794 ins_cost(300); 13795 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13796 ins_encode %{ 13797 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13798 %} 13799 ins_pipe(pipe_slow); 13800 %} 13801 13802 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13803 predicate(Matcher::vector_length(n) <= 32); 13804 match(Set dst (MaskAll src)); 13805 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13806 ins_encode %{ 13807 int mask_len = Matcher::vector_length(this); 13808 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13809 %} 13810 ins_pipe( pipe_slow ); 13811 %} 13812 13813 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13814 predicate(Matcher::vector_length(n) > 32); 13815 match(Set dst (MaskAll src)); 13816 effect(TEMP ktmp); 13817 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13818 ins_encode %{ 13819 int mask_len = Matcher::vector_length(this); 13820 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13821 %} 13822 ins_pipe( pipe_slow ); 13823 %} 13824 13825 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13826 predicate(Matcher::vector_length(n) > 32); 13827 match(Set dst (MaskAll src)); 13828 effect(TEMP ktmp); 13829 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13830 ins_encode %{ 13831 int mask_len = Matcher::vector_length(this); 13832 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13833 %} 13834 ins_pipe( pipe_slow ); 13835 %} 13836 13837 // ============================================================================ 13838 // Safepoint Instruction 13839 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13840 match(SafePoint poll); 13841 effect(KILL cr, USE poll); 13842 13843 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13844 ins_cost(125); 13845 // EBP would need size(3) 13846 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13847 ins_encode %{ 13848 __ relocate(relocInfo::poll_type); 13849 address pre_pc = __ pc(); 13850 __ testl(rax, Address($poll$$Register, 0)); 13851 address post_pc = __ pc(); 13852 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13853 %} 13854 ins_pipe(ialu_reg_mem); 13855 %} 13856 13857 13858 // ============================================================================ 13859 // This name is KNOWN by the ADLC and cannot be changed. 13860 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13861 // for this guy. 13862 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13863 match(Set dst (ThreadLocal)); 13864 effect(DEF dst, KILL cr); 13865 13866 format %{ "MOV $dst, Thread::current()" %} 13867 ins_encode %{ 13868 Register dstReg = as_Register($dst$$reg); 13869 __ get_thread(dstReg); 13870 %} 13871 ins_pipe( ialu_reg_fat ); 13872 %} 13873 13874 13875 13876 //----------PEEPHOLE RULES----------------------------------------------------- 13877 // These must follow all instruction definitions as they use the names 13878 // defined in the instructions definitions. 13879 // 13880 // peepmatch ( root_instr_name [preceding_instruction]* ); 13881 // 13882 // peepconstraint %{ 13883 // (instruction_number.operand_name relational_op instruction_number.operand_name 13884 // [, ...] ); 13885 // // instruction numbers are zero-based using left to right order in peepmatch 13886 // 13887 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13888 // // provide an instruction_number.operand_name for each operand that appears 13889 // // in the replacement instruction's match rule 13890 // 13891 // ---------VM FLAGS--------------------------------------------------------- 13892 // 13893 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13894 // 13895 // Each peephole rule is given an identifying number starting with zero and 13896 // increasing by one in the order seen by the parser. An individual peephole 13897 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13898 // on the command-line. 13899 // 13900 // ---------CURRENT LIMITATIONS---------------------------------------------- 13901 // 13902 // Only match adjacent instructions in same basic block 13903 // Only equality constraints 13904 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13905 // Only one replacement instruction 13906 // 13907 // ---------EXAMPLE---------------------------------------------------------- 13908 // 13909 // // pertinent parts of existing instructions in architecture description 13910 // instruct movI(rRegI dst, rRegI src) %{ 13911 // match(Set dst (CopyI src)); 13912 // %} 13913 // 13914 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13915 // match(Set dst (AddI dst src)); 13916 // effect(KILL cr); 13917 // %} 13918 // 13919 // // Change (inc mov) to lea 13920 // peephole %{ 13921 // // increment preceded by register-register move 13922 // peepmatch ( incI_eReg movI ); 13923 // // require that the destination register of the increment 13924 // // match the destination register of the move 13925 // peepconstraint ( 0.dst == 1.dst ); 13926 // // construct a replacement instruction that sets 13927 // // the destination to ( move's source register + one ) 13928 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13929 // %} 13930 // 13931 // Implementation no longer uses movX instructions since 13932 // machine-independent system no longer uses CopyX nodes. 13933 // 13934 // peephole %{ 13935 // peepmatch ( incI_eReg movI ); 13936 // peepconstraint ( 0.dst == 1.dst ); 13937 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13938 // %} 13939 // 13940 // peephole %{ 13941 // peepmatch ( decI_eReg movI ); 13942 // peepconstraint ( 0.dst == 1.dst ); 13943 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13944 // %} 13945 // 13946 // peephole %{ 13947 // peepmatch ( addI_eReg_imm movI ); 13948 // peepconstraint ( 0.dst == 1.dst ); 13949 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13950 // %} 13951 // 13952 // peephole %{ 13953 // peepmatch ( addP_eReg_imm movP ); 13954 // peepconstraint ( 0.dst == 1.dst ); 13955 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13956 // %} 13957 13958 // // Change load of spilled value to only a spill 13959 // instruct storeI(memory mem, rRegI src) %{ 13960 // match(Set mem (StoreI mem src)); 13961 // %} 13962 // 13963 // instruct loadI(rRegI dst, memory mem) %{ 13964 // match(Set dst (LoadI mem)); 13965 // %} 13966 // 13967 peephole %{ 13968 peepmatch ( loadI storeI ); 13969 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13970 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13971 %} 13972 13973 //----------SMARTSPILL RULES--------------------------------------------------- 13974 // These must follow all instruction definitions as they use the names 13975 // defined in the instructions definitions.