1 // 2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ _masm. 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 cbuf.insts()->emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 cbuf.insts()->emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(CodeBuffer &cbuf, int code) { 353 cbuf.insts()->emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 358 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 359 emit_opcode(cbuf, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(CodeBuffer &cbuf, int d8) { 364 cbuf.insts()->emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(CodeBuffer &cbuf, int d16) { 369 cbuf.insts()->emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(CodeBuffer &cbuf, int d32) { 374 cbuf.insts()->emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 379 int format) { 380 cbuf.relocate(cbuf.insts_mark(), reloc, format); 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 cbuf.relocate(cbuf.insts_mark(), rspec, format); 393 cbuf.insts()->emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 398 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (cbuf, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(cbuf, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(cbuf, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(cbuf, 0x1, reg_encoding, base); 423 emit_d8(cbuf, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (cbuf, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(cbuf, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (cbuf, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 450 emit_rm(cbuf, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 456 emit_rm(cbuf, scale, index, base); 457 emit_d8(cbuf, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 462 emit_rm(cbuf, scale, index, 0x04); 463 } else { 464 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 465 emit_rm(cbuf, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (cbuf, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( cbuf, 0x8B ); 483 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler& _masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 C2_MacroAssembler _masm(&cbuf); 613 614 __ verified_entry(C); 615 616 C->output()->set_frame_complete(cbuf.insts_size()); 617 618 if (C->has_mach_constant_base_node()) { 619 // NOTE: We set the table base offset here because users might be 620 // emitted before MachConstantBaseNode. 621 ConstantTable& constant_table = C->output()->constant_table(); 622 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 623 } 624 } 625 626 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 627 return MachNode::size(ra_); // too many variables; just compute it the hard way 628 } 629 630 int MachPrologNode::reloc() const { 631 return 0; // a large enough number 632 } 633 634 //============================================================================= 635 #ifndef PRODUCT 636 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 637 Compile *C = ra_->C; 638 int framesize = C->output()->frame_size_in_bytes(); 639 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 640 // Remove two words for return addr and rbp, 641 framesize -= 2*wordSize; 642 643 if (C->max_vector_size() > 16) { 644 st->print("VZEROUPPER"); 645 st->cr(); st->print("\t"); 646 } 647 if (C->in_24_bit_fp_mode()) { 648 st->print("FLDCW standard control word"); 649 st->cr(); st->print("\t"); 650 } 651 if (framesize) { 652 st->print("ADD ESP,%d\t# Destroy frame",framesize); 653 st->cr(); st->print("\t"); 654 } 655 st->print_cr("POPL EBP"); st->print("\t"); 656 if (do_polling() && C->is_method_compilation()) { 657 st->print("CMPL rsp, poll_offset[thread] \n\t" 658 "JA #safepoint_stub\t" 659 "# Safepoint: poll for GC"); 660 } 661 } 662 #endif 663 664 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 665 Compile *C = ra_->C; 666 MacroAssembler _masm(&cbuf); 667 668 if (C->max_vector_size() > 16) { 669 // Clear upper bits of YMM registers when current compiled code uses 670 // wide vectors to avoid AVX <-> SSE transition penalty during call. 671 _masm.vzeroupper(); 672 } 673 // If method set FPU control word, restore to standard control word 674 if (C->in_24_bit_fp_mode()) { 675 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 676 } 677 678 int framesize = C->output()->frame_size_in_bytes(); 679 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 680 // Remove two words for return addr and rbp, 681 framesize -= 2*wordSize; 682 683 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 684 685 if (framesize >= 128) { 686 emit_opcode(cbuf, 0x81); // add SP, #framesize 687 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 688 emit_d32(cbuf, framesize); 689 } else if (framesize) { 690 emit_opcode(cbuf, 0x83); // add SP, #framesize 691 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 692 emit_d8(cbuf, framesize); 693 } 694 695 emit_opcode(cbuf, 0x58 | EBP_enc); 696 697 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 698 __ reserved_stack_check(); 699 } 700 701 if (do_polling() && C->is_method_compilation()) { 702 Register thread = as_Register(EBX_enc); 703 MacroAssembler masm(&cbuf); 704 __ get_thread(thread); 705 Label dummy_label; 706 Label* code_stub = &dummy_label; 707 if (!C->output()->in_scratch_emit_size()) { 708 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 709 C->output()->add_stub(stub); 710 code_stub = &stub->entry(); 711 } 712 __ relocate(relocInfo::poll_return_type); 713 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 714 } 715 } 716 717 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 718 return MachNode::size(ra_); // too many variables; just compute it 719 // the hard way 720 } 721 722 int MachEpilogNode::reloc() const { 723 return 0; // a large enough number 724 } 725 726 const Pipeline * MachEpilogNode::pipeline() const { 727 return MachNode::pipeline_class(); 728 } 729 730 //============================================================================= 731 732 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 733 static enum RC rc_class( OptoReg::Name reg ) { 734 735 if( !OptoReg::is_valid(reg) ) return rc_bad; 736 if (OptoReg::is_stack(reg)) return rc_stack; 737 738 VMReg r = OptoReg::as_VMReg(reg); 739 if (r->is_Register()) return rc_int; 740 if (r->is_FloatRegister()) { 741 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 742 return rc_float; 743 } 744 if (r->is_KRegister()) return rc_kreg; 745 assert(r->is_XMMRegister(), "must be"); 746 return rc_xmm; 747 } 748 749 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 750 int opcode, const char *op_str, int size, outputStream* st ) { 751 if( cbuf ) { 752 emit_opcode (*cbuf, opcode ); 753 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 754 #ifndef PRODUCT 755 } else if( !do_size ) { 756 if( size != 0 ) st->print("\n\t"); 757 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 758 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 759 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 760 } else { // FLD, FST, PUSH, POP 761 st->print("%s [ESP + #%d]",op_str,offset); 762 } 763 #endif 764 } 765 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 766 return size+3+offset_size; 767 } 768 769 // Helper for XMM registers. Extra opcode bits, limited syntax. 770 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 771 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 772 int in_size_in_bits = Assembler::EVEX_32bit; 773 int evex_encoding = 0; 774 if (reg_lo+1 == reg_hi) { 775 in_size_in_bits = Assembler::EVEX_64bit; 776 evex_encoding = Assembler::VEX_W; 777 } 778 if (cbuf) { 779 MacroAssembler _masm(cbuf); 780 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 781 // it maps more cases to single byte displacement 782 _masm.set_managed(); 783 if (reg_lo+1 == reg_hi) { // double move? 784 if (is_load) { 785 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 786 } else { 787 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 788 } 789 } else { 790 if (is_load) { 791 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 792 } else { 793 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 794 } 795 } 796 #ifndef PRODUCT 797 } else if (!do_size) { 798 if (size != 0) st->print("\n\t"); 799 if (reg_lo+1 == reg_hi) { // double move? 800 if (is_load) st->print("%s %s,[ESP + #%d]", 801 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 802 Matcher::regName[reg_lo], offset); 803 else st->print("MOVSD [ESP + #%d],%s", 804 offset, Matcher::regName[reg_lo]); 805 } else { 806 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 807 Matcher::regName[reg_lo], offset); 808 else st->print("MOVSS [ESP + #%d],%s", 809 offset, Matcher::regName[reg_lo]); 810 } 811 #endif 812 } 813 bool is_single_byte = false; 814 if ((UseAVX > 2) && (offset != 0)) { 815 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 816 } 817 int offset_size = 0; 818 if (UseAVX > 2 ) { 819 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 820 } else { 821 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 822 } 823 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 824 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 825 return size+5+offset_size; 826 } 827 828 829 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 830 int src_hi, int dst_hi, int size, outputStream* st ) { 831 if (cbuf) { 832 MacroAssembler _masm(cbuf); 833 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 834 _masm.set_managed(); 835 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 836 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 837 as_XMMRegister(Matcher::_regEncode[src_lo])); 838 } else { 839 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 840 as_XMMRegister(Matcher::_regEncode[src_lo])); 841 } 842 #ifndef PRODUCT 843 } else if (!do_size) { 844 if (size != 0) st->print("\n\t"); 845 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 846 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 847 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 848 } else { 849 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 850 } 851 } else { 852 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 853 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 854 } else { 855 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 856 } 857 } 858 #endif 859 } 860 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 861 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 862 int sz = (UseAVX > 2) ? 6 : 4; 863 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 864 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 865 return size + sz; 866 } 867 868 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 869 int src_hi, int dst_hi, int size, outputStream* st ) { 870 // 32-bit 871 if (cbuf) { 872 MacroAssembler _masm(cbuf); 873 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 874 _masm.set_managed(); 875 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 876 as_Register(Matcher::_regEncode[src_lo])); 877 #ifndef PRODUCT 878 } else if (!do_size) { 879 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 880 #endif 881 } 882 return (UseAVX> 2) ? 6 : 4; 883 } 884 885 886 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 887 int src_hi, int dst_hi, int size, outputStream* st ) { 888 // 32-bit 889 if (cbuf) { 890 MacroAssembler _masm(cbuf); 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 _masm.set_managed(); 893 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 894 as_XMMRegister(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 904 if( cbuf ) { 905 emit_opcode(*cbuf, 0x8B ); 906 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 907 #ifndef PRODUCT 908 } else if( !do_size ) { 909 if( size != 0 ) st->print("\n\t"); 910 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 911 #endif 912 } 913 return size+2; 914 } 915 916 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 917 int offset, int size, outputStream* st ) { 918 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 919 if( cbuf ) { 920 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 921 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 922 #ifndef PRODUCT 923 } else if( !do_size ) { 924 if( size != 0 ) st->print("\n\t"); 925 st->print("FLD %s",Matcher::regName[src_lo]); 926 #endif 927 } 928 size += 2; 929 } 930 931 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 932 const char *op_str; 933 int op; 934 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 935 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 936 op = 0xDD; 937 } else { // 32-bit store 938 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 939 op = 0xD9; 940 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 941 } 942 943 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 944 } 945 946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 947 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 948 int src_hi, int dst_hi, uint ireg, outputStream* st); 949 950 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 951 int stack_offset, int reg, uint ireg, outputStream* st); 952 953 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 954 int dst_offset, uint ireg, outputStream* st) { 955 if (cbuf) { 956 MacroAssembler _masm(cbuf); 957 switch (ireg) { 958 case Op_VecS: 959 __ pushl(Address(rsp, src_offset)); 960 __ popl (Address(rsp, dst_offset)); 961 break; 962 case Op_VecD: 963 __ pushl(Address(rsp, src_offset)); 964 __ popl (Address(rsp, dst_offset)); 965 __ pushl(Address(rsp, src_offset+4)); 966 __ popl (Address(rsp, dst_offset+4)); 967 break; 968 case Op_VecX: 969 __ movdqu(Address(rsp, -16), xmm0); 970 __ movdqu(xmm0, Address(rsp, src_offset)); 971 __ movdqu(Address(rsp, dst_offset), xmm0); 972 __ movdqu(xmm0, Address(rsp, -16)); 973 break; 974 case Op_VecY: 975 __ vmovdqu(Address(rsp, -32), xmm0); 976 __ vmovdqu(xmm0, Address(rsp, src_offset)); 977 __ vmovdqu(Address(rsp, dst_offset), xmm0); 978 __ vmovdqu(xmm0, Address(rsp, -32)); 979 break; 980 case Op_VecZ: 981 __ evmovdquq(Address(rsp, -64), xmm0, 2); 982 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 983 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 984 __ evmovdquq(xmm0, Address(rsp, -64), 2); 985 break; 986 default: 987 ShouldNotReachHere(); 988 } 989 #ifndef PRODUCT 990 } else { 991 switch (ireg) { 992 case Op_VecS: 993 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 994 "popl [rsp + #%d]", 995 src_offset, dst_offset); 996 break; 997 case Op_VecD: 998 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 999 "popq [rsp + #%d]\n\t" 1000 "pushl [rsp + #%d]\n\t" 1001 "popq [rsp + #%d]", 1002 src_offset, dst_offset, src_offset+4, dst_offset+4); 1003 break; 1004 case Op_VecX: 1005 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1006 "movdqu xmm0, [rsp + #%d]\n\t" 1007 "movdqu [rsp + #%d], xmm0\n\t" 1008 "movdqu xmm0, [rsp - #16]", 1009 src_offset, dst_offset); 1010 break; 1011 case Op_VecY: 1012 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1013 "vmovdqu xmm0, [rsp + #%d]\n\t" 1014 "vmovdqu [rsp + #%d], xmm0\n\t" 1015 "vmovdqu xmm0, [rsp - #32]", 1016 src_offset, dst_offset); 1017 break; 1018 case Op_VecZ: 1019 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1020 "vmovdqu xmm0, [rsp + #%d]\n\t" 1021 "vmovdqu [rsp + #%d], xmm0\n\t" 1022 "vmovdqu xmm0, [rsp - #64]", 1023 src_offset, dst_offset); 1024 break; 1025 default: 1026 ShouldNotReachHere(); 1027 } 1028 #endif 1029 } 1030 } 1031 1032 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1033 // Get registers to move 1034 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1035 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1036 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1037 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1038 1039 enum RC src_second_rc = rc_class(src_second); 1040 enum RC src_first_rc = rc_class(src_first); 1041 enum RC dst_second_rc = rc_class(dst_second); 1042 enum RC dst_first_rc = rc_class(dst_first); 1043 1044 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1045 1046 // Generate spill code! 1047 int size = 0; 1048 1049 if( src_first == dst_first && src_second == dst_second ) 1050 return size; // Self copy, no move 1051 1052 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) { 1053 uint ireg = ideal_reg(); 1054 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1055 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1056 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1057 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1058 // mem -> mem 1059 int src_offset = ra_->reg2offset(src_first); 1060 int dst_offset = ra_->reg2offset(dst_first); 1061 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1062 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1063 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1064 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1065 int stack_offset = ra_->reg2offset(dst_first); 1066 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1067 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1068 int stack_offset = ra_->reg2offset(src_first); 1069 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1070 } else { 1071 ShouldNotReachHere(); 1072 } 1073 return 0; 1074 } 1075 1076 // -------------------------------------- 1077 // Check for mem-mem move. push/pop to move. 1078 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1079 if( src_second == dst_first ) { // overlapping stack copy ranges 1080 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1081 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1082 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1083 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1084 } 1085 // move low bits 1086 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1087 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1088 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1089 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1090 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1091 } 1092 return size; 1093 } 1094 1095 // -------------------------------------- 1096 // Check for integer reg-reg copy 1097 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1098 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1099 1100 // Check for integer store 1101 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1102 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1103 1104 // Check for integer load 1105 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1106 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1107 1108 // Check for integer reg-xmm reg copy 1109 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1110 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1111 "no 64 bit integer-float reg moves" ); 1112 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1113 } 1114 // -------------------------------------- 1115 // Check for float reg-reg copy 1116 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1117 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1118 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1119 if( cbuf ) { 1120 1121 // Note the mucking with the register encode to compensate for the 0/1 1122 // indexing issue mentioned in a comment in the reg_def sections 1123 // for FPR registers many lines above here. 1124 1125 if( src_first != FPR1L_num ) { 1126 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1127 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1128 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1129 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1130 } else { 1131 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1132 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1133 } 1134 #ifndef PRODUCT 1135 } else if( !do_size ) { 1136 if( size != 0 ) st->print("\n\t"); 1137 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1138 else st->print( "FST %s", Matcher::regName[dst_first]); 1139 #endif 1140 } 1141 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1142 } 1143 1144 // Check for float store 1145 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1146 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1147 } 1148 1149 // Check for float load 1150 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1151 int offset = ra_->reg2offset(src_first); 1152 const char *op_str; 1153 int op; 1154 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1155 op_str = "FLD_D"; 1156 op = 0xDD; 1157 } else { // 32-bit load 1158 op_str = "FLD_S"; 1159 op = 0xD9; 1160 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1161 } 1162 if( cbuf ) { 1163 emit_opcode (*cbuf, op ); 1164 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 #ifndef PRODUCT 1168 } else if( !do_size ) { 1169 if( size != 0 ) st->print("\n\t"); 1170 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1171 #endif 1172 } 1173 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1174 return size + 3+offset_size+2; 1175 } 1176 1177 // Check for xmm reg-reg copy 1178 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1179 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1180 (src_first+1 == src_second && dst_first+1 == dst_second), 1181 "no non-adjacent float-moves" ); 1182 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1183 } 1184 1185 // Check for xmm reg-integer reg copy 1186 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1187 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1188 "no 64 bit float-integer reg moves" ); 1189 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1190 } 1191 1192 // Check for xmm store 1193 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1194 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1195 } 1196 1197 // Check for float xmm load 1198 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1199 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1200 } 1201 1202 // Copy from float reg to xmm reg 1203 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1204 // copy to the top of stack from floating point reg 1205 // and use LEA to preserve flags 1206 if( cbuf ) { 1207 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1208 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1209 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1210 emit_d8(*cbuf,0xF8); 1211 #ifndef PRODUCT 1212 } else if( !do_size ) { 1213 if( size != 0 ) st->print("\n\t"); 1214 st->print("LEA ESP,[ESP-8]"); 1215 #endif 1216 } 1217 size += 4; 1218 1219 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1220 1221 // Copy from the temp memory to the xmm reg. 1222 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1223 1224 if( cbuf ) { 1225 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1226 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1227 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1228 emit_d8(*cbuf,0x08); 1229 #ifndef PRODUCT 1230 } else if( !do_size ) { 1231 if( size != 0 ) st->print("\n\t"); 1232 st->print("LEA ESP,[ESP+8]"); 1233 #endif 1234 } 1235 size += 4; 1236 return size; 1237 } 1238 1239 // AVX-512 opmask specific spilling. 1240 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1241 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1242 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1243 int offset = ra_->reg2offset(src_first); 1244 if (cbuf != nullptr) { 1245 MacroAssembler _masm(cbuf); 1246 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1247 #ifndef PRODUCT 1248 } else { 1249 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); 1250 #endif 1251 } 1252 return 0; 1253 } 1254 1255 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1256 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1257 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1258 int offset = ra_->reg2offset(dst_first); 1259 if (cbuf != nullptr) { 1260 MacroAssembler _masm(cbuf); 1261 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1262 #ifndef PRODUCT 1263 } else { 1264 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); 1265 #endif 1266 } 1267 return 0; 1268 } 1269 1270 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1271 Unimplemented(); 1272 return 0; 1273 } 1274 1275 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1276 Unimplemented(); 1277 return 0; 1278 } 1279 1280 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1281 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1282 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1283 if (cbuf != nullptr) { 1284 MacroAssembler _masm(cbuf); 1285 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1286 #ifndef PRODUCT 1287 } else { 1288 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); 1289 #endif 1290 } 1291 return 0; 1292 } 1293 1294 assert( size > 0, "missed a case" ); 1295 1296 // -------------------------------------------------------------------- 1297 // Check for second bits still needing moving. 1298 if( src_second == dst_second ) 1299 return size; // Self copy; no move 1300 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1301 1302 // Check for second word int-int move 1303 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1304 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1305 1306 // Check for second word integer store 1307 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1308 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1309 1310 // Check for second word integer load 1311 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1312 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1313 1314 Unimplemented(); 1315 return 0; // Mute compiler 1316 } 1317 1318 #ifndef PRODUCT 1319 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1320 implementation( nullptr, ra_, false, st ); 1321 } 1322 #endif 1323 1324 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1325 implementation( &cbuf, ra_, false, nullptr ); 1326 } 1327 1328 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1329 return MachNode::size(ra_); 1330 } 1331 1332 1333 //============================================================================= 1334 #ifndef PRODUCT 1335 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1336 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1337 int reg = ra_->get_reg_first(this); 1338 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1339 } 1340 #endif 1341 1342 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 int reg = ra_->get_encode(this); 1345 if( offset >= 128 ) { 1346 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1347 emit_rm(cbuf, 0x2, reg, 0x04); 1348 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1349 emit_d32(cbuf, offset); 1350 } 1351 else { 1352 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1353 emit_rm(cbuf, 0x1, reg, 0x04); 1354 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1355 emit_d8(cbuf, offset); 1356 } 1357 } 1358 1359 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1360 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1361 if( offset >= 128 ) { 1362 return 7; 1363 } 1364 else { 1365 return 4; 1366 } 1367 } 1368 1369 //============================================================================= 1370 #ifndef PRODUCT 1371 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1372 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1373 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1374 st->print_cr("\tNOP"); 1375 st->print_cr("\tNOP"); 1376 if( !OptoBreakpoint ) 1377 st->print_cr("\tNOP"); 1378 } 1379 #endif 1380 1381 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1382 MacroAssembler masm(&cbuf); 1383 #ifdef ASSERT 1384 uint insts_size = cbuf.insts_size(); 1385 #endif 1386 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1387 masm.jump_cc(Assembler::notEqual, 1388 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1389 /* WARNING these NOPs are critical so that verified entry point is properly 1390 aligned for patching by NativeJump::patch_verified_entry() */ 1391 int nops_cnt = 2; 1392 if( !OptoBreakpoint ) // Leave space for int3 1393 nops_cnt += 1; 1394 masm.nop(nops_cnt); 1395 1396 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1397 } 1398 1399 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1400 return OptoBreakpoint ? 11 : 12; 1401 } 1402 1403 1404 //============================================================================= 1405 1406 // Vector calling convention not supported. 1407 bool Matcher::supports_vector_calling_convention() { 1408 return false; 1409 } 1410 1411 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1412 Unimplemented(); 1413 return OptoRegPair(0, 0); 1414 } 1415 1416 // Is this branch offset short enough that a short branch can be used? 1417 // 1418 // NOTE: If the platform does not provide any short branch variants, then 1419 // this method should return false for offset 0. 1420 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1421 // The passed offset is relative to address of the branch. 1422 // On 86 a branch displacement is calculated relative to address 1423 // of a next instruction. 1424 offset -= br_size; 1425 1426 // the short version of jmpConUCF2 contains multiple branches, 1427 // making the reach slightly less 1428 if (rule == jmpConUCF2_rule) 1429 return (-126 <= offset && offset <= 125); 1430 return (-128 <= offset && offset <= 127); 1431 } 1432 1433 // Return whether or not this register is ever used as an argument. This 1434 // function is used on startup to build the trampoline stubs in generateOptoStub. 1435 // Registers not mentioned will be killed by the VM call in the trampoline, and 1436 // arguments in those registers not be available to the callee. 1437 bool Matcher::can_be_java_arg( int reg ) { 1438 if( reg == ECX_num || reg == EDX_num ) return true; 1439 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1440 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1441 return false; 1442 } 1443 1444 bool Matcher::is_spillable_arg( int reg ) { 1445 return can_be_java_arg(reg); 1446 } 1447 1448 uint Matcher::int_pressure_limit() 1449 { 1450 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1451 } 1452 1453 uint Matcher::float_pressure_limit() 1454 { 1455 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1456 } 1457 1458 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1459 // Use hardware integer DIV instruction when 1460 // it is faster than a code which use multiply. 1461 // Only when constant divisor fits into 32 bit 1462 // (min_jint is excluded to get only correct 1463 // positive 32 bit values from negative). 1464 return VM_Version::has_fast_idiv() && 1465 (divisor == (int)divisor && divisor != min_jint); 1466 } 1467 1468 // Register for DIVI projection of divmodI 1469 RegMask Matcher::divI_proj_mask() { 1470 return EAX_REG_mask(); 1471 } 1472 1473 // Register for MODI projection of divmodI 1474 RegMask Matcher::modI_proj_mask() { 1475 return EDX_REG_mask(); 1476 } 1477 1478 // Register for DIVL projection of divmodL 1479 RegMask Matcher::divL_proj_mask() { 1480 ShouldNotReachHere(); 1481 return RegMask(); 1482 } 1483 1484 // Register for MODL projection of divmodL 1485 RegMask Matcher::modL_proj_mask() { 1486 ShouldNotReachHere(); 1487 return RegMask(); 1488 } 1489 1490 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1491 return NO_REG_mask(); 1492 } 1493 1494 // Returns true if the high 32 bits of the value is known to be zero. 1495 bool is_operand_hi32_zero(Node* n) { 1496 int opc = n->Opcode(); 1497 if (opc == Op_AndL) { 1498 Node* o2 = n->in(2); 1499 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1500 return true; 1501 } 1502 } 1503 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1504 return true; 1505 } 1506 return false; 1507 } 1508 1509 %} 1510 1511 //----------ENCODING BLOCK----------------------------------------------------- 1512 // This block specifies the encoding classes used by the compiler to output 1513 // byte streams. Encoding classes generate functions which are called by 1514 // Machine Instruction Nodes in order to generate the bit encoding of the 1515 // instruction. Operands specify their base encoding interface with the 1516 // interface keyword. There are currently supported four interfaces, 1517 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1518 // operand to generate a function which returns its register number when 1519 // queried. CONST_INTER causes an operand to generate a function which 1520 // returns the value of the constant when queried. MEMORY_INTER causes an 1521 // operand to generate four functions which return the Base Register, the 1522 // Index Register, the Scale Value, and the Offset Value of the operand when 1523 // queried. COND_INTER causes an operand to generate six functions which 1524 // return the encoding code (ie - encoding bits for the instruction) 1525 // associated with each basic boolean condition for a conditional instruction. 1526 // Instructions specify two basic values for encoding. They use the 1527 // ins_encode keyword to specify their encoding class (which must be one of 1528 // the class names specified in the encoding block), and they use the 1529 // opcode keyword to specify, in order, their primary, secondary, and 1530 // tertiary opcode. Only the opcode sections which a particular instruction 1531 // needs for encoding need to be specified. 1532 encode %{ 1533 // Build emit functions for each basic byte or larger field in the intel 1534 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1535 // code in the enc_class source block. Emit functions will live in the 1536 // main source block for now. In future, we can generalize this by 1537 // adding a syntax that specifies the sizes of fields in an order, 1538 // so that the adlc can build the emit functions automagically 1539 1540 // Emit primary opcode 1541 enc_class OpcP %{ 1542 emit_opcode(cbuf, $primary); 1543 %} 1544 1545 // Emit secondary opcode 1546 enc_class OpcS %{ 1547 emit_opcode(cbuf, $secondary); 1548 %} 1549 1550 // Emit opcode directly 1551 enc_class Opcode(immI d8) %{ 1552 emit_opcode(cbuf, $d8$$constant); 1553 %} 1554 1555 enc_class SizePrefix %{ 1556 emit_opcode(cbuf,0x66); 1557 %} 1558 1559 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1560 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1561 %} 1562 1563 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1564 emit_opcode(cbuf,$opcode$$constant); 1565 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1566 %} 1567 1568 enc_class mov_r32_imm0( rRegI dst ) %{ 1569 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1570 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1571 %} 1572 1573 enc_class cdq_enc %{ 1574 // Full implementation of Java idiv and irem; checks for 1575 // special case as described in JVM spec., p.243 & p.271. 1576 // 1577 // normal case special case 1578 // 1579 // input : rax,: dividend min_int 1580 // reg: divisor -1 1581 // 1582 // output: rax,: quotient (= rax, idiv reg) min_int 1583 // rdx: remainder (= rax, irem reg) 0 1584 // 1585 // Code sequnce: 1586 // 1587 // 81 F8 00 00 00 80 cmp rax,80000000h 1588 // 0F 85 0B 00 00 00 jne normal_case 1589 // 33 D2 xor rdx,edx 1590 // 83 F9 FF cmp rcx,0FFh 1591 // 0F 84 03 00 00 00 je done 1592 // normal_case: 1593 // 99 cdq 1594 // F7 F9 idiv rax,ecx 1595 // done: 1596 // 1597 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1598 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1599 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1600 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1601 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1602 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1603 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1604 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1605 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1606 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1607 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1608 // normal_case: 1609 emit_opcode(cbuf,0x99); // cdq 1610 // idiv (note: must be emitted by the user of this rule) 1611 // normal: 1612 %} 1613 1614 // Dense encoding for older common ops 1615 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1616 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1617 %} 1618 1619 1620 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1621 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1622 // Check for 8-bit immediate, and set sign extend bit in opcode 1623 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1624 emit_opcode(cbuf, $primary | 0x02); 1625 } 1626 else { // If 32-bit immediate 1627 emit_opcode(cbuf, $primary); 1628 } 1629 %} 1630 1631 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1632 // Emit primary opcode and set sign-extend bit 1633 // Check for 8-bit immediate, and set sign extend bit in opcode 1634 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1635 emit_opcode(cbuf, $primary | 0x02); } 1636 else { // If 32-bit immediate 1637 emit_opcode(cbuf, $primary); 1638 } 1639 // Emit r/m byte with secondary opcode, after primary opcode. 1640 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1641 %} 1642 1643 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1644 // Check for 8-bit immediate, and set sign extend bit in opcode 1645 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1646 $$$emit8$imm$$constant; 1647 } 1648 else { // If 32-bit immediate 1649 // Output immediate 1650 $$$emit32$imm$$constant; 1651 } 1652 %} 1653 1654 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1655 // Emit primary opcode and set sign-extend bit 1656 // Check for 8-bit immediate, and set sign extend bit in opcode 1657 int con = (int)$imm$$constant; // Throw away top bits 1658 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1659 // Emit r/m byte with secondary opcode, after primary opcode. 1660 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1661 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1662 else emit_d32(cbuf,con); 1663 %} 1664 1665 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1666 // Emit primary opcode and set sign-extend bit 1667 // Check for 8-bit immediate, and set sign extend bit in opcode 1668 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1669 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1670 // Emit r/m byte with tertiary opcode, after primary opcode. 1671 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1672 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1673 else emit_d32(cbuf,con); 1674 %} 1675 1676 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1677 emit_cc(cbuf, $secondary, $dst$$reg ); 1678 %} 1679 1680 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1681 int destlo = $dst$$reg; 1682 int desthi = HIGH_FROM_LOW_ENC(destlo); 1683 // bswap lo 1684 emit_opcode(cbuf, 0x0F); 1685 emit_cc(cbuf, 0xC8, destlo); 1686 // bswap hi 1687 emit_opcode(cbuf, 0x0F); 1688 emit_cc(cbuf, 0xC8, desthi); 1689 // xchg lo and hi 1690 emit_opcode(cbuf, 0x87); 1691 emit_rm(cbuf, 0x3, destlo, desthi); 1692 %} 1693 1694 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1695 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1696 %} 1697 1698 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1699 $$$emit8$primary; 1700 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1701 %} 1702 1703 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1704 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1705 emit_d8(cbuf, op >> 8 ); 1706 emit_d8(cbuf, op & 255); 1707 %} 1708 1709 // emulate a CMOV with a conditional branch around a MOV 1710 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1711 // Invert sense of branch from sense of CMOV 1712 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1713 emit_d8( cbuf, $brOffs$$constant ); 1714 %} 1715 1716 enc_class enc_PartialSubtypeCheck( ) %{ 1717 Register Redi = as_Register(EDI_enc); // result register 1718 Register Reax = as_Register(EAX_enc); // super class 1719 Register Recx = as_Register(ECX_enc); // killed 1720 Register Resi = as_Register(ESI_enc); // sub class 1721 Label miss; 1722 1723 MacroAssembler _masm(&cbuf); 1724 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1725 nullptr, &miss, 1726 /*set_cond_codes:*/ true); 1727 if ($primary) { 1728 __ xorptr(Redi, Redi); 1729 } 1730 __ bind(miss); 1731 %} 1732 1733 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1734 MacroAssembler masm(&cbuf); 1735 int start = masm.offset(); 1736 if (UseSSE >= 2) { 1737 if (VerifyFPU) { 1738 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1739 } 1740 } else { 1741 // External c_calling_convention expects the FPU stack to be 'clean'. 1742 // Compiled code leaves it dirty. Do cleanup now. 1743 masm.empty_FPU_stack(); 1744 } 1745 if (sizeof_FFree_Float_Stack_All == -1) { 1746 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1747 } else { 1748 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1749 } 1750 %} 1751 1752 enc_class Verify_FPU_For_Leaf %{ 1753 if( VerifyFPU ) { 1754 MacroAssembler masm(&cbuf); 1755 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1756 } 1757 %} 1758 1759 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1760 // This is the instruction starting address for relocation info. 1761 MacroAssembler _masm(&cbuf); 1762 cbuf.set_insts_mark(); 1763 $$$emit8$primary; 1764 // CALL directly to the runtime 1765 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1766 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1767 __ post_call_nop(); 1768 1769 if (UseSSE >= 2) { 1770 MacroAssembler _masm(&cbuf); 1771 BasicType rt = tf()->return_type(); 1772 1773 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1774 // A C runtime call where the return value is unused. In SSE2+ 1775 // mode the result needs to be removed from the FPU stack. It's 1776 // likely that this function call could be removed by the 1777 // optimizer if the C function is a pure function. 1778 __ ffree(0); 1779 } else if (rt == T_FLOAT) { 1780 __ lea(rsp, Address(rsp, -4)); 1781 __ fstp_s(Address(rsp, 0)); 1782 __ movflt(xmm0, Address(rsp, 0)); 1783 __ lea(rsp, Address(rsp, 4)); 1784 } else if (rt == T_DOUBLE) { 1785 __ lea(rsp, Address(rsp, -8)); 1786 __ fstp_d(Address(rsp, 0)); 1787 __ movdbl(xmm0, Address(rsp, 0)); 1788 __ lea(rsp, Address(rsp, 8)); 1789 } 1790 } 1791 %} 1792 1793 enc_class pre_call_resets %{ 1794 // If method sets FPU control word restore it here 1795 debug_only(int off0 = cbuf.insts_size()); 1796 if (ra_->C->in_24_bit_fp_mode()) { 1797 MacroAssembler _masm(&cbuf); 1798 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1799 } 1800 // Clear upper bits of YMM registers when current compiled code uses 1801 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1802 MacroAssembler _masm(&cbuf); 1803 __ vzeroupper(); 1804 debug_only(int off1 = cbuf.insts_size()); 1805 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1806 %} 1807 1808 enc_class post_call_FPU %{ 1809 // If method sets FPU control word do it here also 1810 if (Compile::current()->in_24_bit_fp_mode()) { 1811 MacroAssembler masm(&cbuf); 1812 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1813 } 1814 %} 1815 1816 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1817 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1818 // who we intended to call. 1819 MacroAssembler _masm(&cbuf); 1820 cbuf.set_insts_mark(); 1821 $$$emit8$primary; 1822 1823 if (!_method) { 1824 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1825 runtime_call_Relocation::spec(), 1826 RELOC_IMM32); 1827 __ post_call_nop(); 1828 } else { 1829 int method_index = resolved_method_index(cbuf); 1830 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1831 : static_call_Relocation::spec(method_index); 1832 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1833 rspec, RELOC_DISP32); 1834 __ post_call_nop(); 1835 address mark = cbuf.insts_mark(); 1836 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1837 // Calls of the same statically bound method can share 1838 // a stub to the interpreter. 1839 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off()); 1840 } else { 1841 // Emit stubs for static call. 1842 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark); 1843 if (stub == nullptr) { 1844 ciEnv::current()->record_failure("CodeCache is full"); 1845 return; 1846 } 1847 } 1848 } 1849 %} 1850 1851 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1852 MacroAssembler _masm(&cbuf); 1853 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1854 __ post_call_nop(); 1855 %} 1856 1857 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1858 int disp = in_bytes(Method::from_compiled_offset()); 1859 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1860 1861 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1862 MacroAssembler _masm(&cbuf); 1863 cbuf.set_insts_mark(); 1864 $$$emit8$primary; 1865 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1866 emit_d8(cbuf, disp); // Displacement 1867 __ post_call_nop(); 1868 %} 1869 1870 // Following encoding is no longer used, but may be restored if calling 1871 // convention changes significantly. 1872 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1873 // 1874 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1875 // // int ic_reg = Matcher::inline_cache_reg(); 1876 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1877 // // int imo_reg = Matcher::interpreter_method_reg(); 1878 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1879 // 1880 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1881 // // // so we load it immediately before the call 1882 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1883 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1884 // 1885 // // xor rbp,ebp 1886 // emit_opcode(cbuf, 0x33); 1887 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1888 // 1889 // // CALL to interpreter. 1890 // cbuf.set_insts_mark(); 1891 // $$$emit8$primary; 1892 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1893 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1894 // %} 1895 1896 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1897 $$$emit8$primary; 1898 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1899 $$$emit8$shift$$constant; 1900 %} 1901 1902 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1903 // Load immediate does not have a zero or sign extended version 1904 // for 8-bit immediates 1905 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1906 $$$emit32$src$$constant; 1907 %} 1908 1909 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1910 // Load immediate does not have a zero or sign extended version 1911 // for 8-bit immediates 1912 emit_opcode(cbuf, $primary + $dst$$reg); 1913 $$$emit32$src$$constant; 1914 %} 1915 1916 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1917 // Load immediate does not have a zero or sign extended version 1918 // for 8-bit immediates 1919 int dst_enc = $dst$$reg; 1920 int src_con = $src$$constant & 0x0FFFFFFFFL; 1921 if (src_con == 0) { 1922 // xor dst, dst 1923 emit_opcode(cbuf, 0x33); 1924 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1925 } else { 1926 emit_opcode(cbuf, $primary + dst_enc); 1927 emit_d32(cbuf, src_con); 1928 } 1929 %} 1930 1931 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1932 // Load immediate does not have a zero or sign extended version 1933 // for 8-bit immediates 1934 int dst_enc = $dst$$reg + 2; 1935 int src_con = ((julong)($src$$constant)) >> 32; 1936 if (src_con == 0) { 1937 // xor dst, dst 1938 emit_opcode(cbuf, 0x33); 1939 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1940 } else { 1941 emit_opcode(cbuf, $primary + dst_enc); 1942 emit_d32(cbuf, src_con); 1943 } 1944 %} 1945 1946 1947 // Encode a reg-reg copy. If it is useless, then empty encoding. 1948 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1949 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1950 %} 1951 1952 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1953 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1954 %} 1955 1956 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1957 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1958 %} 1959 1960 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1961 $$$emit8$primary; 1962 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1963 %} 1964 1965 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1966 $$$emit8$secondary; 1967 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1968 %} 1969 1970 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1971 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1972 %} 1973 1974 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1975 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1976 %} 1977 1978 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1979 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1980 %} 1981 1982 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1983 // Output immediate 1984 $$$emit32$src$$constant; 1985 %} 1986 1987 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1988 // Output Float immediate bits 1989 jfloat jf = $src$$constant; 1990 int jf_as_bits = jint_cast( jf ); 1991 emit_d32(cbuf, jf_as_bits); 1992 %} 1993 1994 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1995 // Output Float immediate bits 1996 jfloat jf = $src$$constant; 1997 int jf_as_bits = jint_cast( jf ); 1998 emit_d32(cbuf, jf_as_bits); 1999 %} 2000 2001 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2002 // Output immediate 2003 $$$emit16$src$$constant; 2004 %} 2005 2006 enc_class Con_d32(immI src) %{ 2007 emit_d32(cbuf,$src$$constant); 2008 %} 2009 2010 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2011 // Output immediate memory reference 2012 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2013 emit_d32(cbuf, 0x00); 2014 %} 2015 2016 enc_class lock_prefix( ) %{ 2017 emit_opcode(cbuf,0xF0); // [Lock] 2018 %} 2019 2020 // Cmp-xchg long value. 2021 // Note: we need to swap rbx, and rcx before and after the 2022 // cmpxchg8 instruction because the instruction uses 2023 // rcx as the high order word of the new value to store but 2024 // our register encoding uses rbx,. 2025 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2026 2027 // XCHG rbx,ecx 2028 emit_opcode(cbuf,0x87); 2029 emit_opcode(cbuf,0xD9); 2030 // [Lock] 2031 emit_opcode(cbuf,0xF0); 2032 // CMPXCHG8 [Eptr] 2033 emit_opcode(cbuf,0x0F); 2034 emit_opcode(cbuf,0xC7); 2035 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2036 // XCHG rbx,ecx 2037 emit_opcode(cbuf,0x87); 2038 emit_opcode(cbuf,0xD9); 2039 %} 2040 2041 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2042 // [Lock] 2043 emit_opcode(cbuf,0xF0); 2044 2045 // CMPXCHG [Eptr] 2046 emit_opcode(cbuf,0x0F); 2047 emit_opcode(cbuf,0xB1); 2048 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2049 %} 2050 2051 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2052 // [Lock] 2053 emit_opcode(cbuf,0xF0); 2054 2055 // CMPXCHGB [Eptr] 2056 emit_opcode(cbuf,0x0F); 2057 emit_opcode(cbuf,0xB0); 2058 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2059 %} 2060 2061 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2062 // [Lock] 2063 emit_opcode(cbuf,0xF0); 2064 2065 // 16-bit mode 2066 emit_opcode(cbuf, 0x66); 2067 2068 // CMPXCHGW [Eptr] 2069 emit_opcode(cbuf,0x0F); 2070 emit_opcode(cbuf,0xB1); 2071 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2072 %} 2073 2074 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2075 int res_encoding = $res$$reg; 2076 2077 // MOV res,0 2078 emit_opcode( cbuf, 0xB8 + res_encoding); 2079 emit_d32( cbuf, 0 ); 2080 // JNE,s fail 2081 emit_opcode(cbuf,0x75); 2082 emit_d8(cbuf, 5 ); 2083 // MOV res,1 2084 emit_opcode( cbuf, 0xB8 + res_encoding); 2085 emit_d32( cbuf, 1 ); 2086 // fail: 2087 %} 2088 2089 enc_class set_instruction_start( ) %{ 2090 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2091 %} 2092 2093 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2094 int reg_encoding = $ereg$$reg; 2095 int base = $mem$$base; 2096 int index = $mem$$index; 2097 int scale = $mem$$scale; 2098 int displace = $mem$$disp; 2099 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2100 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2101 %} 2102 2103 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2104 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2105 int base = $mem$$base; 2106 int index = $mem$$index; 2107 int scale = $mem$$scale; 2108 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2109 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2110 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2111 %} 2112 2113 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2114 int r1, r2; 2115 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2116 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2117 emit_opcode(cbuf,0x0F); 2118 emit_opcode(cbuf,$tertiary); 2119 emit_rm(cbuf, 0x3, r1, r2); 2120 emit_d8(cbuf,$cnt$$constant); 2121 emit_d8(cbuf,$primary); 2122 emit_rm(cbuf, 0x3, $secondary, r1); 2123 emit_d8(cbuf,$cnt$$constant); 2124 %} 2125 2126 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2127 emit_opcode( cbuf, 0x8B ); // Move 2128 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2129 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2130 emit_d8(cbuf,$primary); 2131 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2132 emit_d8(cbuf,$cnt$$constant-32); 2133 } 2134 emit_d8(cbuf,$primary); 2135 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2136 emit_d8(cbuf,31); 2137 %} 2138 2139 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2140 int r1, r2; 2141 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2142 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2143 2144 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2145 emit_rm(cbuf, 0x3, r1, r2); 2146 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2147 emit_opcode(cbuf,$primary); 2148 emit_rm(cbuf, 0x3, $secondary, r1); 2149 emit_d8(cbuf,$cnt$$constant-32); 2150 } 2151 emit_opcode(cbuf,0x33); // XOR r2,r2 2152 emit_rm(cbuf, 0x3, r2, r2); 2153 %} 2154 2155 // Clone of RegMem but accepts an extra parameter to access each 2156 // half of a double in memory; it never needs relocation info. 2157 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2158 emit_opcode(cbuf,$opcode$$constant); 2159 int reg_encoding = $rm_reg$$reg; 2160 int base = $mem$$base; 2161 int index = $mem$$index; 2162 int scale = $mem$$scale; 2163 int displace = $mem$$disp + $disp_for_half$$constant; 2164 relocInfo::relocType disp_reloc = relocInfo::none; 2165 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2166 %} 2167 2168 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2169 // 2170 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2171 // and it never needs relocation information. 2172 // Frequently used to move data between FPU's Stack Top and memory. 2173 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2174 int rm_byte_opcode = $rm_opcode$$constant; 2175 int base = $mem$$base; 2176 int index = $mem$$index; 2177 int scale = $mem$$scale; 2178 int displace = $mem$$disp; 2179 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2180 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2181 %} 2182 2183 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2184 int rm_byte_opcode = $rm_opcode$$constant; 2185 int base = $mem$$base; 2186 int index = $mem$$index; 2187 int scale = $mem$$scale; 2188 int displace = $mem$$disp; 2189 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2190 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2191 %} 2192 2193 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2194 int reg_encoding = $dst$$reg; 2195 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2196 int index = 0x04; // 0x04 indicates no index 2197 int scale = 0x00; // 0x00 indicates no scale 2198 int displace = $src1$$constant; // 0x00 indicates no displacement 2199 relocInfo::relocType disp_reloc = relocInfo::none; 2200 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2201 %} 2202 2203 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2204 // Compare dst,src 2205 emit_opcode(cbuf,0x3B); 2206 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2207 // jmp dst < src around move 2208 emit_opcode(cbuf,0x7C); 2209 emit_d8(cbuf,2); 2210 // move dst,src 2211 emit_opcode(cbuf,0x8B); 2212 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2213 %} 2214 2215 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2216 // Compare dst,src 2217 emit_opcode(cbuf,0x3B); 2218 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2219 // jmp dst > src around move 2220 emit_opcode(cbuf,0x7F); 2221 emit_d8(cbuf,2); 2222 // move dst,src 2223 emit_opcode(cbuf,0x8B); 2224 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2225 %} 2226 2227 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2228 // If src is FPR1, we can just FST to store it. 2229 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2230 int reg_encoding = 0x2; // Just store 2231 int base = $mem$$base; 2232 int index = $mem$$index; 2233 int scale = $mem$$scale; 2234 int displace = $mem$$disp; 2235 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2236 if( $src$$reg != FPR1L_enc ) { 2237 reg_encoding = 0x3; // Store & pop 2238 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2239 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2240 } 2241 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2242 emit_opcode(cbuf,$primary); 2243 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2244 %} 2245 2246 enc_class neg_reg(rRegI dst) %{ 2247 // NEG $dst 2248 emit_opcode(cbuf,0xF7); 2249 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2250 %} 2251 2252 enc_class setLT_reg(eCXRegI dst) %{ 2253 // SETLT $dst 2254 emit_opcode(cbuf,0x0F); 2255 emit_opcode(cbuf,0x9C); 2256 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2257 %} 2258 2259 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2260 int tmpReg = $tmp$$reg; 2261 2262 // SUB $p,$q 2263 emit_opcode(cbuf,0x2B); 2264 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2265 // SBB $tmp,$tmp 2266 emit_opcode(cbuf,0x1B); 2267 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2268 // AND $tmp,$y 2269 emit_opcode(cbuf,0x23); 2270 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2271 // ADD $p,$tmp 2272 emit_opcode(cbuf,0x03); 2273 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2274 %} 2275 2276 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2277 // TEST shift,32 2278 emit_opcode(cbuf,0xF7); 2279 emit_rm(cbuf, 0x3, 0, ECX_enc); 2280 emit_d32(cbuf,0x20); 2281 // JEQ,s small 2282 emit_opcode(cbuf, 0x74); 2283 emit_d8(cbuf, 0x04); 2284 // MOV $dst.hi,$dst.lo 2285 emit_opcode( cbuf, 0x8B ); 2286 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2287 // CLR $dst.lo 2288 emit_opcode(cbuf, 0x33); 2289 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2290 // small: 2291 // SHLD $dst.hi,$dst.lo,$shift 2292 emit_opcode(cbuf,0x0F); 2293 emit_opcode(cbuf,0xA5); 2294 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2295 // SHL $dst.lo,$shift" 2296 emit_opcode(cbuf,0xD3); 2297 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2298 %} 2299 2300 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2301 // TEST shift,32 2302 emit_opcode(cbuf,0xF7); 2303 emit_rm(cbuf, 0x3, 0, ECX_enc); 2304 emit_d32(cbuf,0x20); 2305 // JEQ,s small 2306 emit_opcode(cbuf, 0x74); 2307 emit_d8(cbuf, 0x04); 2308 // MOV $dst.lo,$dst.hi 2309 emit_opcode( cbuf, 0x8B ); 2310 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2311 // CLR $dst.hi 2312 emit_opcode(cbuf, 0x33); 2313 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2314 // small: 2315 // SHRD $dst.lo,$dst.hi,$shift 2316 emit_opcode(cbuf,0x0F); 2317 emit_opcode(cbuf,0xAD); 2318 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2319 // SHR $dst.hi,$shift" 2320 emit_opcode(cbuf,0xD3); 2321 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2322 %} 2323 2324 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2325 // TEST shift,32 2326 emit_opcode(cbuf,0xF7); 2327 emit_rm(cbuf, 0x3, 0, ECX_enc); 2328 emit_d32(cbuf,0x20); 2329 // JEQ,s small 2330 emit_opcode(cbuf, 0x74); 2331 emit_d8(cbuf, 0x05); 2332 // MOV $dst.lo,$dst.hi 2333 emit_opcode( cbuf, 0x8B ); 2334 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2335 // SAR $dst.hi,31 2336 emit_opcode(cbuf, 0xC1); 2337 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2338 emit_d8(cbuf, 0x1F ); 2339 // small: 2340 // SHRD $dst.lo,$dst.hi,$shift 2341 emit_opcode(cbuf,0x0F); 2342 emit_opcode(cbuf,0xAD); 2343 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2344 // SAR $dst.hi,$shift" 2345 emit_opcode(cbuf,0xD3); 2346 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2347 %} 2348 2349 2350 // ----------------- Encodings for floating point unit ----------------- 2351 // May leave result in FPU-TOS or FPU reg depending on opcodes 2352 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2353 $$$emit8$primary; 2354 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2355 %} 2356 2357 // Pop argument in FPR0 with FSTP ST(0) 2358 enc_class PopFPU() %{ 2359 emit_opcode( cbuf, 0xDD ); 2360 emit_d8( cbuf, 0xD8 ); 2361 %} 2362 2363 // !!!!! equivalent to Pop_Reg_F 2364 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2365 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2366 emit_d8( cbuf, 0xD8+$dst$$reg ); 2367 %} 2368 2369 enc_class Push_Reg_DPR( regDPR dst ) %{ 2370 emit_opcode( cbuf, 0xD9 ); 2371 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2372 %} 2373 2374 enc_class strictfp_bias1( regDPR dst ) %{ 2375 emit_opcode( cbuf, 0xDB ); // FLD m80real 2376 emit_opcode( cbuf, 0x2D ); 2377 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2378 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2379 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2380 %} 2381 2382 enc_class strictfp_bias2( regDPR dst ) %{ 2383 emit_opcode( cbuf, 0xDB ); // FLD m80real 2384 emit_opcode( cbuf, 0x2D ); 2385 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2386 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2387 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2388 %} 2389 2390 // Special case for moving an integer register to a stack slot. 2391 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2392 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2393 %} 2394 2395 // Special case for moving a register to a stack slot. 2396 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2397 // Opcode already emitted 2398 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2399 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2400 emit_d32(cbuf, $dst$$disp); // Displacement 2401 %} 2402 2403 // Push the integer in stackSlot 'src' onto FP-stack 2404 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2405 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2406 %} 2407 2408 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2409 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2410 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2411 %} 2412 2413 // Same as Pop_Mem_F except for opcode 2414 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2415 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2416 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2417 %} 2418 2419 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2420 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2421 emit_d8( cbuf, 0xD8+$dst$$reg ); 2422 %} 2423 2424 enc_class Push_Reg_FPR( regFPR dst ) %{ 2425 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2426 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2427 %} 2428 2429 // Push FPU's float to a stack-slot, and pop FPU-stack 2430 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2431 int pop = 0x02; 2432 if ($src$$reg != FPR1L_enc) { 2433 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2434 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2435 pop = 0x03; 2436 } 2437 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2438 %} 2439 2440 // Push FPU's double to a stack-slot, and pop FPU-stack 2441 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2442 int pop = 0x02; 2443 if ($src$$reg != FPR1L_enc) { 2444 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2445 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2446 pop = 0x03; 2447 } 2448 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2449 %} 2450 2451 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2452 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2453 int pop = 0xD0 - 1; // -1 since we skip FLD 2454 if ($src$$reg != FPR1L_enc) { 2455 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2456 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2457 pop = 0xD8; 2458 } 2459 emit_opcode( cbuf, 0xDD ); 2460 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2461 %} 2462 2463 2464 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2465 // load dst in FPR0 2466 emit_opcode( cbuf, 0xD9 ); 2467 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2468 if ($src$$reg != FPR1L_enc) { 2469 // fincstp 2470 emit_opcode (cbuf, 0xD9); 2471 emit_opcode (cbuf, 0xF7); 2472 // swap src with FPR1: 2473 // FXCH FPR1 with src 2474 emit_opcode(cbuf, 0xD9); 2475 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2476 // fdecstp 2477 emit_opcode (cbuf, 0xD9); 2478 emit_opcode (cbuf, 0xF6); 2479 } 2480 %} 2481 2482 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2483 MacroAssembler _masm(&cbuf); 2484 __ subptr(rsp, 8); 2485 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2486 __ fld_d(Address(rsp, 0)); 2487 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2488 __ fld_d(Address(rsp, 0)); 2489 %} 2490 2491 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2492 MacroAssembler _masm(&cbuf); 2493 __ subptr(rsp, 4); 2494 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2495 __ fld_s(Address(rsp, 0)); 2496 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2497 __ fld_s(Address(rsp, 0)); 2498 %} 2499 2500 enc_class Push_ResultD(regD dst) %{ 2501 MacroAssembler _masm(&cbuf); 2502 __ fstp_d(Address(rsp, 0)); 2503 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2504 __ addptr(rsp, 8); 2505 %} 2506 2507 enc_class Push_ResultF(regF dst, immI d8) %{ 2508 MacroAssembler _masm(&cbuf); 2509 __ fstp_s(Address(rsp, 0)); 2510 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2511 __ addptr(rsp, $d8$$constant); 2512 %} 2513 2514 enc_class Push_SrcD(regD src) %{ 2515 MacroAssembler _masm(&cbuf); 2516 __ subptr(rsp, 8); 2517 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2518 __ fld_d(Address(rsp, 0)); 2519 %} 2520 2521 enc_class push_stack_temp_qword() %{ 2522 MacroAssembler _masm(&cbuf); 2523 __ subptr(rsp, 8); 2524 %} 2525 2526 enc_class pop_stack_temp_qword() %{ 2527 MacroAssembler _masm(&cbuf); 2528 __ addptr(rsp, 8); 2529 %} 2530 2531 enc_class push_xmm_to_fpr1(regD src) %{ 2532 MacroAssembler _masm(&cbuf); 2533 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2534 __ fld_d(Address(rsp, 0)); 2535 %} 2536 2537 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2538 if ($src$$reg != FPR1L_enc) { 2539 // fincstp 2540 emit_opcode (cbuf, 0xD9); 2541 emit_opcode (cbuf, 0xF7); 2542 // FXCH FPR1 with src 2543 emit_opcode(cbuf, 0xD9); 2544 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2545 // fdecstp 2546 emit_opcode (cbuf, 0xD9); 2547 emit_opcode (cbuf, 0xF6); 2548 } 2549 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2550 // // FSTP FPR$dst$$reg 2551 // emit_opcode( cbuf, 0xDD ); 2552 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2553 %} 2554 2555 enc_class fnstsw_sahf_skip_parity() %{ 2556 // fnstsw ax 2557 emit_opcode( cbuf, 0xDF ); 2558 emit_opcode( cbuf, 0xE0 ); 2559 // sahf 2560 emit_opcode( cbuf, 0x9E ); 2561 // jnp ::skip 2562 emit_opcode( cbuf, 0x7B ); 2563 emit_opcode( cbuf, 0x05 ); 2564 %} 2565 2566 enc_class emitModDPR() %{ 2567 // fprem must be iterative 2568 // :: loop 2569 // fprem 2570 emit_opcode( cbuf, 0xD9 ); 2571 emit_opcode( cbuf, 0xF8 ); 2572 // wait 2573 emit_opcode( cbuf, 0x9b ); 2574 // fnstsw ax 2575 emit_opcode( cbuf, 0xDF ); 2576 emit_opcode( cbuf, 0xE0 ); 2577 // sahf 2578 emit_opcode( cbuf, 0x9E ); 2579 // jp ::loop 2580 emit_opcode( cbuf, 0x0F ); 2581 emit_opcode( cbuf, 0x8A ); 2582 emit_opcode( cbuf, 0xF4 ); 2583 emit_opcode( cbuf, 0xFF ); 2584 emit_opcode( cbuf, 0xFF ); 2585 emit_opcode( cbuf, 0xFF ); 2586 %} 2587 2588 enc_class fpu_flags() %{ 2589 // fnstsw_ax 2590 emit_opcode( cbuf, 0xDF); 2591 emit_opcode( cbuf, 0xE0); 2592 // test ax,0x0400 2593 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2594 emit_opcode( cbuf, 0xA9 ); 2595 emit_d16 ( cbuf, 0x0400 ); 2596 // // // This sequence works, but stalls for 12-16 cycles on PPro 2597 // // test rax,0x0400 2598 // emit_opcode( cbuf, 0xA9 ); 2599 // emit_d32 ( cbuf, 0x00000400 ); 2600 // 2601 // jz exit (no unordered comparison) 2602 emit_opcode( cbuf, 0x74 ); 2603 emit_d8 ( cbuf, 0x02 ); 2604 // mov ah,1 - treat as LT case (set carry flag) 2605 emit_opcode( cbuf, 0xB4 ); 2606 emit_d8 ( cbuf, 0x01 ); 2607 // sahf 2608 emit_opcode( cbuf, 0x9E); 2609 %} 2610 2611 enc_class cmpF_P6_fixup() %{ 2612 // Fixup the integer flags in case comparison involved a NaN 2613 // 2614 // JNP exit (no unordered comparison, P-flag is set by NaN) 2615 emit_opcode( cbuf, 0x7B ); 2616 emit_d8 ( cbuf, 0x03 ); 2617 // MOV AH,1 - treat as LT case (set carry flag) 2618 emit_opcode( cbuf, 0xB4 ); 2619 emit_d8 ( cbuf, 0x01 ); 2620 // SAHF 2621 emit_opcode( cbuf, 0x9E); 2622 // NOP // target for branch to avoid branch to branch 2623 emit_opcode( cbuf, 0x90); 2624 %} 2625 2626 // fnstsw_ax(); 2627 // sahf(); 2628 // movl(dst, nan_result); 2629 // jcc(Assembler::parity, exit); 2630 // movl(dst, less_result); 2631 // jcc(Assembler::below, exit); 2632 // movl(dst, equal_result); 2633 // jcc(Assembler::equal, exit); 2634 // movl(dst, greater_result); 2635 2636 // less_result = 1; 2637 // greater_result = -1; 2638 // equal_result = 0; 2639 // nan_result = -1; 2640 2641 enc_class CmpF_Result(rRegI dst) %{ 2642 // fnstsw_ax(); 2643 emit_opcode( cbuf, 0xDF); 2644 emit_opcode( cbuf, 0xE0); 2645 // sahf 2646 emit_opcode( cbuf, 0x9E); 2647 // movl(dst, nan_result); 2648 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2649 emit_d32( cbuf, -1 ); 2650 // jcc(Assembler::parity, exit); 2651 emit_opcode( cbuf, 0x7A ); 2652 emit_d8 ( cbuf, 0x13 ); 2653 // movl(dst, less_result); 2654 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2655 emit_d32( cbuf, -1 ); 2656 // jcc(Assembler::below, exit); 2657 emit_opcode( cbuf, 0x72 ); 2658 emit_d8 ( cbuf, 0x0C ); 2659 // movl(dst, equal_result); 2660 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2661 emit_d32( cbuf, 0 ); 2662 // jcc(Assembler::equal, exit); 2663 emit_opcode( cbuf, 0x74 ); 2664 emit_d8 ( cbuf, 0x05 ); 2665 // movl(dst, greater_result); 2666 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2667 emit_d32( cbuf, 1 ); 2668 %} 2669 2670 2671 // Compare the longs and set flags 2672 // BROKEN! Do Not use as-is 2673 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2674 // CMP $src1.hi,$src2.hi 2675 emit_opcode( cbuf, 0x3B ); 2676 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2677 // JNE,s done 2678 emit_opcode(cbuf,0x75); 2679 emit_d8(cbuf, 2 ); 2680 // CMP $src1.lo,$src2.lo 2681 emit_opcode( cbuf, 0x3B ); 2682 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2683 // done: 2684 %} 2685 2686 enc_class convert_int_long( regL dst, rRegI src ) %{ 2687 // mov $dst.lo,$src 2688 int dst_encoding = $dst$$reg; 2689 int src_encoding = $src$$reg; 2690 encode_Copy( cbuf, dst_encoding , src_encoding ); 2691 // mov $dst.hi,$src 2692 encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2693 // sar $dst.hi,31 2694 emit_opcode( cbuf, 0xC1 ); 2695 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2696 emit_d8(cbuf, 0x1F ); 2697 %} 2698 2699 enc_class convert_long_double( eRegL src ) %{ 2700 // push $src.hi 2701 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2702 // push $src.lo 2703 emit_opcode(cbuf, 0x50+$src$$reg ); 2704 // fild 64-bits at [SP] 2705 emit_opcode(cbuf,0xdf); 2706 emit_d8(cbuf, 0x6C); 2707 emit_d8(cbuf, 0x24); 2708 emit_d8(cbuf, 0x00); 2709 // pop stack 2710 emit_opcode(cbuf, 0x83); // add SP, #8 2711 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2712 emit_d8(cbuf, 0x8); 2713 %} 2714 2715 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2716 // IMUL EDX:EAX,$src1 2717 emit_opcode( cbuf, 0xF7 ); 2718 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2719 // SAR EDX,$cnt-32 2720 int shift_count = ((int)$cnt$$constant) - 32; 2721 if (shift_count > 0) { 2722 emit_opcode(cbuf, 0xC1); 2723 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2724 emit_d8(cbuf, shift_count); 2725 } 2726 %} 2727 2728 // this version doesn't have add sp, 8 2729 enc_class convert_long_double2( eRegL src ) %{ 2730 // push $src.hi 2731 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2732 // push $src.lo 2733 emit_opcode(cbuf, 0x50+$src$$reg ); 2734 // fild 64-bits at [SP] 2735 emit_opcode(cbuf,0xdf); 2736 emit_d8(cbuf, 0x6C); 2737 emit_d8(cbuf, 0x24); 2738 emit_d8(cbuf, 0x00); 2739 %} 2740 2741 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2742 // Basic idea: long = (long)int * (long)int 2743 // IMUL EDX:EAX, src 2744 emit_opcode( cbuf, 0xF7 ); 2745 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2746 %} 2747 2748 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2749 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2750 // MUL EDX:EAX, src 2751 emit_opcode( cbuf, 0xF7 ); 2752 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2753 %} 2754 2755 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2756 // Basic idea: lo(result) = lo(x_lo * y_lo) 2757 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2758 // MOV $tmp,$src.lo 2759 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2760 // IMUL $tmp,EDX 2761 emit_opcode( cbuf, 0x0F ); 2762 emit_opcode( cbuf, 0xAF ); 2763 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2764 // MOV EDX,$src.hi 2765 encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2766 // IMUL EDX,EAX 2767 emit_opcode( cbuf, 0x0F ); 2768 emit_opcode( cbuf, 0xAF ); 2769 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2770 // ADD $tmp,EDX 2771 emit_opcode( cbuf, 0x03 ); 2772 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2773 // MUL EDX:EAX,$src.lo 2774 emit_opcode( cbuf, 0xF7 ); 2775 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2776 // ADD EDX,ESI 2777 emit_opcode( cbuf, 0x03 ); 2778 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2779 %} 2780 2781 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2782 // Basic idea: lo(result) = lo(src * y_lo) 2783 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2784 // IMUL $tmp,EDX,$src 2785 emit_opcode( cbuf, 0x6B ); 2786 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2787 emit_d8( cbuf, (int)$src$$constant ); 2788 // MOV EDX,$src 2789 emit_opcode(cbuf, 0xB8 + EDX_enc); 2790 emit_d32( cbuf, (int)$src$$constant ); 2791 // MUL EDX:EAX,EDX 2792 emit_opcode( cbuf, 0xF7 ); 2793 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2794 // ADD EDX,ESI 2795 emit_opcode( cbuf, 0x03 ); 2796 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2797 %} 2798 2799 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2800 // PUSH src1.hi 2801 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2802 // PUSH src1.lo 2803 emit_opcode(cbuf, 0x50+$src1$$reg ); 2804 // PUSH src2.hi 2805 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2806 // PUSH src2.lo 2807 emit_opcode(cbuf, 0x50+$src2$$reg ); 2808 // CALL directly to the runtime 2809 MacroAssembler _masm(&cbuf); 2810 cbuf.set_insts_mark(); 2811 emit_opcode(cbuf,0xE8); // Call into runtime 2812 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2813 __ post_call_nop(); 2814 // Restore stack 2815 emit_opcode(cbuf, 0x83); // add SP, #framesize 2816 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2817 emit_d8(cbuf, 4*4); 2818 %} 2819 2820 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2821 // PUSH src1.hi 2822 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2823 // PUSH src1.lo 2824 emit_opcode(cbuf, 0x50+$src1$$reg ); 2825 // PUSH src2.hi 2826 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2827 // PUSH src2.lo 2828 emit_opcode(cbuf, 0x50+$src2$$reg ); 2829 // CALL directly to the runtime 2830 MacroAssembler _masm(&cbuf); 2831 cbuf.set_insts_mark(); 2832 emit_opcode(cbuf,0xE8); // Call into runtime 2833 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2834 __ post_call_nop(); 2835 // Restore stack 2836 emit_opcode(cbuf, 0x83); // add SP, #framesize 2837 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2838 emit_d8(cbuf, 4*4); 2839 %} 2840 2841 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2842 // MOV $tmp,$src.lo 2843 emit_opcode(cbuf, 0x8B); 2844 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2845 // OR $tmp,$src.hi 2846 emit_opcode(cbuf, 0x0B); 2847 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2848 %} 2849 2850 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2851 // CMP $src1.lo,$src2.lo 2852 emit_opcode( cbuf, 0x3B ); 2853 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2854 // JNE,s skip 2855 emit_cc(cbuf, 0x70, 0x5); 2856 emit_d8(cbuf,2); 2857 // CMP $src1.hi,$src2.hi 2858 emit_opcode( cbuf, 0x3B ); 2859 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2860 %} 2861 2862 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2863 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2864 emit_opcode( cbuf, 0x3B ); 2865 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2866 // MOV $tmp,$src1.hi 2867 emit_opcode( cbuf, 0x8B ); 2868 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2869 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2870 emit_opcode( cbuf, 0x1B ); 2871 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2872 %} 2873 2874 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2875 // XOR $tmp,$tmp 2876 emit_opcode(cbuf,0x33); // XOR 2877 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2878 // CMP $tmp,$src.lo 2879 emit_opcode( cbuf, 0x3B ); 2880 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2881 // SBB $tmp,$src.hi 2882 emit_opcode( cbuf, 0x1B ); 2883 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2884 %} 2885 2886 // Sniff, sniff... smells like Gnu Superoptimizer 2887 enc_class neg_long( eRegL dst ) %{ 2888 emit_opcode(cbuf,0xF7); // NEG hi 2889 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2890 emit_opcode(cbuf,0xF7); // NEG lo 2891 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2892 emit_opcode(cbuf,0x83); // SBB hi,0 2893 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2894 emit_d8 (cbuf,0 ); 2895 %} 2896 2897 enc_class enc_pop_rdx() %{ 2898 emit_opcode(cbuf,0x5A); 2899 %} 2900 2901 enc_class enc_rethrow() %{ 2902 MacroAssembler _masm(&cbuf); 2903 cbuf.set_insts_mark(); 2904 emit_opcode(cbuf, 0xE9); // jmp entry 2905 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2906 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2907 __ post_call_nop(); 2908 %} 2909 2910 2911 // Convert a double to an int. Java semantics require we do complex 2912 // manglelations in the corner cases. So we set the rounding mode to 2913 // 'zero', store the darned double down as an int, and reset the 2914 // rounding mode to 'nearest'. The hardware throws an exception which 2915 // patches up the correct value directly to the stack. 2916 enc_class DPR2I_encoding( regDPR src ) %{ 2917 // Flip to round-to-zero mode. We attempted to allow invalid-op 2918 // exceptions here, so that a NAN or other corner-case value will 2919 // thrown an exception (but normal values get converted at full speed). 2920 // However, I2C adapters and other float-stack manglers leave pending 2921 // invalid-op exceptions hanging. We would have to clear them before 2922 // enabling them and that is more expensive than just testing for the 2923 // invalid value Intel stores down in the corner cases. 2924 emit_opcode(cbuf,0xD9); // FLDCW trunc 2925 emit_opcode(cbuf,0x2D); 2926 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2927 // Allocate a word 2928 emit_opcode(cbuf,0x83); // SUB ESP,4 2929 emit_opcode(cbuf,0xEC); 2930 emit_d8(cbuf,0x04); 2931 // Encoding assumes a double has been pushed into FPR0. 2932 // Store down the double as an int, popping the FPU stack 2933 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2934 emit_opcode(cbuf,0x1C); 2935 emit_d8(cbuf,0x24); 2936 // Restore the rounding mode; mask the exception 2937 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2938 emit_opcode(cbuf,0x2D); 2939 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2940 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2941 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2942 2943 // Load the converted int; adjust CPU stack 2944 emit_opcode(cbuf,0x58); // POP EAX 2945 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2946 emit_d32 (cbuf,0x80000000); // 0x80000000 2947 emit_opcode(cbuf,0x75); // JNE around_slow_call 2948 emit_d8 (cbuf,0x07); // Size of slow_call 2949 // Push src onto stack slow-path 2950 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2951 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2952 // CALL directly to the runtime 2953 MacroAssembler _masm(&cbuf); 2954 cbuf.set_insts_mark(); 2955 emit_opcode(cbuf,0xE8); // Call into runtime 2956 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2957 __ post_call_nop(); 2958 // Carry on here... 2959 %} 2960 2961 enc_class DPR2L_encoding( regDPR src ) %{ 2962 emit_opcode(cbuf,0xD9); // FLDCW trunc 2963 emit_opcode(cbuf,0x2D); 2964 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2965 // Allocate a word 2966 emit_opcode(cbuf,0x83); // SUB ESP,8 2967 emit_opcode(cbuf,0xEC); 2968 emit_d8(cbuf,0x08); 2969 // Encoding assumes a double has been pushed into FPR0. 2970 // Store down the double as a long, popping the FPU stack 2971 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2972 emit_opcode(cbuf,0x3C); 2973 emit_d8(cbuf,0x24); 2974 // Restore the rounding mode; mask the exception 2975 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2976 emit_opcode(cbuf,0x2D); 2977 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2978 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2979 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2980 2981 // Load the converted int; adjust CPU stack 2982 emit_opcode(cbuf,0x58); // POP EAX 2983 emit_opcode(cbuf,0x5A); // POP EDX 2984 emit_opcode(cbuf,0x81); // CMP EDX,imm 2985 emit_d8 (cbuf,0xFA); // rdx 2986 emit_d32 (cbuf,0x80000000); // 0x80000000 2987 emit_opcode(cbuf,0x75); // JNE around_slow_call 2988 emit_d8 (cbuf,0x07+4); // Size of slow_call 2989 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2990 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2991 emit_opcode(cbuf,0x75); // JNE around_slow_call 2992 emit_d8 (cbuf,0x07); // Size of slow_call 2993 // Push src onto stack slow-path 2994 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2995 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2996 // CALL directly to the runtime 2997 MacroAssembler _masm(&cbuf); 2998 cbuf.set_insts_mark(); 2999 emit_opcode(cbuf,0xE8); // Call into runtime 3000 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3001 __ post_call_nop(); 3002 // Carry on here... 3003 %} 3004 3005 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3006 // Operand was loaded from memory into fp ST (stack top) 3007 // FMUL ST,$src /* D8 C8+i */ 3008 emit_opcode(cbuf, 0xD8); 3009 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3010 %} 3011 3012 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3013 // FADDP ST,src2 /* D8 C0+i */ 3014 emit_opcode(cbuf, 0xD8); 3015 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3016 //could use FADDP src2,fpST /* DE C0+i */ 3017 %} 3018 3019 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3020 // FADDP src2,ST /* DE C0+i */ 3021 emit_opcode(cbuf, 0xDE); 3022 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3023 %} 3024 3025 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3026 // Operand has been loaded into fp ST (stack top) 3027 // FSUB ST,$src1 3028 emit_opcode(cbuf, 0xD8); 3029 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3030 3031 // FDIV 3032 emit_opcode(cbuf, 0xD8); 3033 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3034 %} 3035 3036 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3037 // Operand was loaded from memory into fp ST (stack top) 3038 // FADD ST,$src /* D8 C0+i */ 3039 emit_opcode(cbuf, 0xD8); 3040 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3041 3042 // FMUL ST,src2 /* D8 C*+i */ 3043 emit_opcode(cbuf, 0xD8); 3044 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3045 %} 3046 3047 3048 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3049 // Operand was loaded from memory into fp ST (stack top) 3050 // FADD ST,$src /* D8 C0+i */ 3051 emit_opcode(cbuf, 0xD8); 3052 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3053 3054 // FMULP src2,ST /* DE C8+i */ 3055 emit_opcode(cbuf, 0xDE); 3056 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3057 %} 3058 3059 // Atomically load the volatile long 3060 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3061 emit_opcode(cbuf,0xDF); 3062 int rm_byte_opcode = 0x05; 3063 int base = $mem$$base; 3064 int index = $mem$$index; 3065 int scale = $mem$$scale; 3066 int displace = $mem$$disp; 3067 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3068 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3069 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3070 %} 3071 3072 // Volatile Store Long. Must be atomic, so move it into 3073 // the FP TOS and then do a 64-bit FIST. Has to probe the 3074 // target address before the store (for null-ptr checks) 3075 // so the memory operand is used twice in the encoding. 3076 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3077 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3078 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3079 emit_opcode(cbuf,0xDF); 3080 int rm_byte_opcode = 0x07; 3081 int base = $mem$$base; 3082 int index = $mem$$index; 3083 int scale = $mem$$scale; 3084 int displace = $mem$$disp; 3085 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3086 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3087 %} 3088 3089 %} 3090 3091 3092 //----------FRAME-------------------------------------------------------------- 3093 // Definition of frame structure and management information. 3094 // 3095 // S T A C K L A Y O U T Allocators stack-slot number 3096 // | (to get allocators register number 3097 // G Owned by | | v add OptoReg::stack0()) 3098 // r CALLER | | 3099 // o | +--------+ pad to even-align allocators stack-slot 3100 // w V | pad0 | numbers; owned by CALLER 3101 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3102 // h ^ | in | 5 3103 // | | args | 4 Holes in incoming args owned by SELF 3104 // | | | | 3 3105 // | | +--------+ 3106 // V | | old out| Empty on Intel, window on Sparc 3107 // | old |preserve| Must be even aligned. 3108 // | SP-+--------+----> Matcher::_old_SP, even aligned 3109 // | | in | 3 area for Intel ret address 3110 // Owned by |preserve| Empty on Sparc. 3111 // SELF +--------+ 3112 // | | pad2 | 2 pad to align old SP 3113 // | +--------+ 1 3114 // | | locks | 0 3115 // | +--------+----> OptoReg::stack0(), even aligned 3116 // | | pad1 | 11 pad to align new SP 3117 // | +--------+ 3118 // | | | 10 3119 // | | spills | 9 spills 3120 // V | | 8 (pad0 slot for callee) 3121 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3122 // ^ | out | 7 3123 // | | args | 6 Holes in outgoing args owned by CALLEE 3124 // Owned by +--------+ 3125 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3126 // | new |preserve| Must be even-aligned. 3127 // | SP-+--------+----> Matcher::_new_SP, even aligned 3128 // | | | 3129 // 3130 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3131 // known from SELF's arguments and the Java calling convention. 3132 // Region 6-7 is determined per call site. 3133 // Note 2: If the calling convention leaves holes in the incoming argument 3134 // area, those holes are owned by SELF. Holes in the outgoing area 3135 // are owned by the CALLEE. Holes should not be necessary in the 3136 // incoming area, as the Java calling convention is completely under 3137 // the control of the AD file. Doubles can be sorted and packed to 3138 // avoid holes. Holes in the outgoing arguments may be necessary for 3139 // varargs C calling conventions. 3140 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3141 // even aligned with pad0 as needed. 3142 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3143 // region 6-11 is even aligned; it may be padded out more so that 3144 // the region from SP to FP meets the minimum stack alignment. 3145 3146 frame %{ 3147 // These three registers define part of the calling convention 3148 // between compiled code and the interpreter. 3149 inline_cache_reg(EAX); // Inline Cache Register 3150 3151 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3152 cisc_spilling_operand_name(indOffset32); 3153 3154 // Number of stack slots consumed by locking an object 3155 sync_stack_slots(1); 3156 3157 // Compiled code's Frame Pointer 3158 frame_pointer(ESP); 3159 // Interpreter stores its frame pointer in a register which is 3160 // stored to the stack by I2CAdaptors. 3161 // I2CAdaptors convert from interpreted java to compiled java. 3162 interpreter_frame_pointer(EBP); 3163 3164 // Stack alignment requirement 3165 // Alignment size in bytes (128-bit -> 16 bytes) 3166 stack_alignment(StackAlignmentInBytes); 3167 3168 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3169 // for calls to C. Supports the var-args backing area for register parms. 3170 varargs_C_out_slots_killed(0); 3171 3172 // The after-PROLOG location of the return address. Location of 3173 // return address specifies a type (REG or STACK) and a number 3174 // representing the register number (i.e. - use a register name) or 3175 // stack slot. 3176 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3177 // Otherwise, it is above the locks and verification slot and alignment word 3178 return_addr(STACK - 1 + 3179 align_up((Compile::current()->in_preserve_stack_slots() + 3180 Compile::current()->fixed_slots()), 3181 stack_alignment_in_slots())); 3182 3183 // Location of C & interpreter return values 3184 c_return_value %{ 3185 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3186 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3187 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3188 3189 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3190 // that C functions return float and double results in XMM0. 3191 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3192 return OptoRegPair(XMM0b_num,XMM0_num); 3193 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3194 return OptoRegPair(OptoReg::Bad,XMM0_num); 3195 3196 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3197 %} 3198 3199 // Location of return values 3200 return_value %{ 3201 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3202 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3203 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3204 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3205 return OptoRegPair(XMM0b_num,XMM0_num); 3206 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3207 return OptoRegPair(OptoReg::Bad,XMM0_num); 3208 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3209 %} 3210 3211 %} 3212 3213 //----------ATTRIBUTES--------------------------------------------------------- 3214 //----------Operand Attributes------------------------------------------------- 3215 op_attrib op_cost(0); // Required cost attribute 3216 3217 //----------Instruction Attributes--------------------------------------------- 3218 ins_attrib ins_cost(100); // Required cost attribute 3219 ins_attrib ins_size(8); // Required size attribute (in bits) 3220 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3221 // non-matching short branch variant of some 3222 // long branch? 3223 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3224 // specifies the alignment that some part of the instruction (not 3225 // necessarily the start) requires. If > 1, a compute_padding() 3226 // function must be provided for the instruction 3227 3228 //----------OPERANDS----------------------------------------------------------- 3229 // Operand definitions must precede instruction definitions for correct parsing 3230 // in the ADLC because operands constitute user defined types which are used in 3231 // instruction definitions. 3232 3233 //----------Simple Operands---------------------------------------------------- 3234 // Immediate Operands 3235 // Integer Immediate 3236 operand immI() %{ 3237 match(ConI); 3238 3239 op_cost(10); 3240 format %{ %} 3241 interface(CONST_INTER); 3242 %} 3243 3244 // Constant for test vs zero 3245 operand immI_0() %{ 3246 predicate(n->get_int() == 0); 3247 match(ConI); 3248 3249 op_cost(0); 3250 format %{ %} 3251 interface(CONST_INTER); 3252 %} 3253 3254 // Constant for increment 3255 operand immI_1() %{ 3256 predicate(n->get_int() == 1); 3257 match(ConI); 3258 3259 op_cost(0); 3260 format %{ %} 3261 interface(CONST_INTER); 3262 %} 3263 3264 // Constant for decrement 3265 operand immI_M1() %{ 3266 predicate(n->get_int() == -1); 3267 match(ConI); 3268 3269 op_cost(0); 3270 format %{ %} 3271 interface(CONST_INTER); 3272 %} 3273 3274 // Valid scale values for addressing modes 3275 operand immI2() %{ 3276 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3277 match(ConI); 3278 3279 format %{ %} 3280 interface(CONST_INTER); 3281 %} 3282 3283 operand immI8() %{ 3284 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3285 match(ConI); 3286 3287 op_cost(5); 3288 format %{ %} 3289 interface(CONST_INTER); 3290 %} 3291 3292 operand immU8() %{ 3293 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3294 match(ConI); 3295 3296 op_cost(5); 3297 format %{ %} 3298 interface(CONST_INTER); 3299 %} 3300 3301 operand immI16() %{ 3302 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3303 match(ConI); 3304 3305 op_cost(10); 3306 format %{ %} 3307 interface(CONST_INTER); 3308 %} 3309 3310 // Int Immediate non-negative 3311 operand immU31() 3312 %{ 3313 predicate(n->get_int() >= 0); 3314 match(ConI); 3315 3316 op_cost(0); 3317 format %{ %} 3318 interface(CONST_INTER); 3319 %} 3320 3321 // Constant for long shifts 3322 operand immI_32() %{ 3323 predicate( n->get_int() == 32 ); 3324 match(ConI); 3325 3326 op_cost(0); 3327 format %{ %} 3328 interface(CONST_INTER); 3329 %} 3330 3331 operand immI_1_31() %{ 3332 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3333 match(ConI); 3334 3335 op_cost(0); 3336 format %{ %} 3337 interface(CONST_INTER); 3338 %} 3339 3340 operand immI_32_63() %{ 3341 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3342 match(ConI); 3343 op_cost(0); 3344 3345 format %{ %} 3346 interface(CONST_INTER); 3347 %} 3348 3349 operand immI_2() %{ 3350 predicate( n->get_int() == 2 ); 3351 match(ConI); 3352 3353 op_cost(0); 3354 format %{ %} 3355 interface(CONST_INTER); 3356 %} 3357 3358 operand immI_3() %{ 3359 predicate( n->get_int() == 3 ); 3360 match(ConI); 3361 3362 op_cost(0); 3363 format %{ %} 3364 interface(CONST_INTER); 3365 %} 3366 3367 operand immI_4() 3368 %{ 3369 predicate(n->get_int() == 4); 3370 match(ConI); 3371 3372 op_cost(0); 3373 format %{ %} 3374 interface(CONST_INTER); 3375 %} 3376 3377 operand immI_8() 3378 %{ 3379 predicate(n->get_int() == 8); 3380 match(ConI); 3381 3382 op_cost(0); 3383 format %{ %} 3384 interface(CONST_INTER); 3385 %} 3386 3387 // Pointer Immediate 3388 operand immP() %{ 3389 match(ConP); 3390 3391 op_cost(10); 3392 format %{ %} 3393 interface(CONST_INTER); 3394 %} 3395 3396 // nullptr Pointer Immediate 3397 operand immP0() %{ 3398 predicate( n->get_ptr() == 0 ); 3399 match(ConP); 3400 op_cost(0); 3401 3402 format %{ %} 3403 interface(CONST_INTER); 3404 %} 3405 3406 // Long Immediate 3407 operand immL() %{ 3408 match(ConL); 3409 3410 op_cost(20); 3411 format %{ %} 3412 interface(CONST_INTER); 3413 %} 3414 3415 // Long Immediate zero 3416 operand immL0() %{ 3417 predicate( n->get_long() == 0L ); 3418 match(ConL); 3419 op_cost(0); 3420 3421 format %{ %} 3422 interface(CONST_INTER); 3423 %} 3424 3425 // Long Immediate zero 3426 operand immL_M1() %{ 3427 predicate( n->get_long() == -1L ); 3428 match(ConL); 3429 op_cost(0); 3430 3431 format %{ %} 3432 interface(CONST_INTER); 3433 %} 3434 3435 // Long immediate from 0 to 127. 3436 // Used for a shorter form of long mul by 10. 3437 operand immL_127() %{ 3438 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3439 match(ConL); 3440 op_cost(0); 3441 3442 format %{ %} 3443 interface(CONST_INTER); 3444 %} 3445 3446 // Long Immediate: low 32-bit mask 3447 operand immL_32bits() %{ 3448 predicate(n->get_long() == 0xFFFFFFFFL); 3449 match(ConL); 3450 op_cost(0); 3451 3452 format %{ %} 3453 interface(CONST_INTER); 3454 %} 3455 3456 // Long Immediate: low 32-bit mask 3457 operand immL32() %{ 3458 predicate(n->get_long() == (int)(n->get_long())); 3459 match(ConL); 3460 op_cost(20); 3461 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 //Double Immediate zero 3467 operand immDPR0() %{ 3468 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3469 // bug that generates code such that NaNs compare equal to 0.0 3470 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3471 match(ConD); 3472 3473 op_cost(5); 3474 format %{ %} 3475 interface(CONST_INTER); 3476 %} 3477 3478 // Double Immediate one 3479 operand immDPR1() %{ 3480 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3481 match(ConD); 3482 3483 op_cost(5); 3484 format %{ %} 3485 interface(CONST_INTER); 3486 %} 3487 3488 // Double Immediate 3489 operand immDPR() %{ 3490 predicate(UseSSE<=1); 3491 match(ConD); 3492 3493 op_cost(5); 3494 format %{ %} 3495 interface(CONST_INTER); 3496 %} 3497 3498 operand immD() %{ 3499 predicate(UseSSE>=2); 3500 match(ConD); 3501 3502 op_cost(5); 3503 format %{ %} 3504 interface(CONST_INTER); 3505 %} 3506 3507 // Double Immediate zero 3508 operand immD0() %{ 3509 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3510 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3511 // compare equal to -0.0. 3512 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3513 match(ConD); 3514 3515 format %{ %} 3516 interface(CONST_INTER); 3517 %} 3518 3519 // Float Immediate zero 3520 operand immFPR0() %{ 3521 predicate(UseSSE == 0 && n->getf() == 0.0F); 3522 match(ConF); 3523 3524 op_cost(5); 3525 format %{ %} 3526 interface(CONST_INTER); 3527 %} 3528 3529 // Float Immediate one 3530 operand immFPR1() %{ 3531 predicate(UseSSE == 0 && n->getf() == 1.0F); 3532 match(ConF); 3533 3534 op_cost(5); 3535 format %{ %} 3536 interface(CONST_INTER); 3537 %} 3538 3539 // Float Immediate 3540 operand immFPR() %{ 3541 predicate( UseSSE == 0 ); 3542 match(ConF); 3543 3544 op_cost(5); 3545 format %{ %} 3546 interface(CONST_INTER); 3547 %} 3548 3549 // Float Immediate 3550 operand immF() %{ 3551 predicate(UseSSE >= 1); 3552 match(ConF); 3553 3554 op_cost(5); 3555 format %{ %} 3556 interface(CONST_INTER); 3557 %} 3558 3559 // Float Immediate zero. Zero and not -0.0 3560 operand immF0() %{ 3561 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3562 match(ConF); 3563 3564 op_cost(5); 3565 format %{ %} 3566 interface(CONST_INTER); 3567 %} 3568 3569 // Immediates for special shifts (sign extend) 3570 3571 // Constants for increment 3572 operand immI_16() %{ 3573 predicate( n->get_int() == 16 ); 3574 match(ConI); 3575 3576 format %{ %} 3577 interface(CONST_INTER); 3578 %} 3579 3580 operand immI_24() %{ 3581 predicate( n->get_int() == 24 ); 3582 match(ConI); 3583 3584 format %{ %} 3585 interface(CONST_INTER); 3586 %} 3587 3588 // Constant for byte-wide masking 3589 operand immI_255() %{ 3590 predicate( n->get_int() == 255 ); 3591 match(ConI); 3592 3593 format %{ %} 3594 interface(CONST_INTER); 3595 %} 3596 3597 // Constant for short-wide masking 3598 operand immI_65535() %{ 3599 predicate(n->get_int() == 65535); 3600 match(ConI); 3601 3602 format %{ %} 3603 interface(CONST_INTER); 3604 %} 3605 3606 operand kReg() 3607 %{ 3608 constraint(ALLOC_IN_RC(vectmask_reg)); 3609 match(RegVectMask); 3610 format %{%} 3611 interface(REG_INTER); 3612 %} 3613 3614 operand kReg_K1() 3615 %{ 3616 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3617 match(RegVectMask); 3618 format %{%} 3619 interface(REG_INTER); 3620 %} 3621 3622 operand kReg_K2() 3623 %{ 3624 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3625 match(RegVectMask); 3626 format %{%} 3627 interface(REG_INTER); 3628 %} 3629 3630 // Special Registers 3631 operand kReg_K3() 3632 %{ 3633 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3634 match(RegVectMask); 3635 format %{%} 3636 interface(REG_INTER); 3637 %} 3638 3639 operand kReg_K4() 3640 %{ 3641 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3642 match(RegVectMask); 3643 format %{%} 3644 interface(REG_INTER); 3645 %} 3646 3647 operand kReg_K5() 3648 %{ 3649 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3650 match(RegVectMask); 3651 format %{%} 3652 interface(REG_INTER); 3653 %} 3654 3655 operand kReg_K6() 3656 %{ 3657 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3658 match(RegVectMask); 3659 format %{%} 3660 interface(REG_INTER); 3661 %} 3662 3663 // Special Registers 3664 operand kReg_K7() 3665 %{ 3666 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3667 match(RegVectMask); 3668 format %{%} 3669 interface(REG_INTER); 3670 %} 3671 3672 // Register Operands 3673 // Integer Register 3674 operand rRegI() %{ 3675 constraint(ALLOC_IN_RC(int_reg)); 3676 match(RegI); 3677 match(xRegI); 3678 match(eAXRegI); 3679 match(eBXRegI); 3680 match(eCXRegI); 3681 match(eDXRegI); 3682 match(eDIRegI); 3683 match(eSIRegI); 3684 3685 format %{ %} 3686 interface(REG_INTER); 3687 %} 3688 3689 // Subset of Integer Register 3690 operand xRegI(rRegI reg) %{ 3691 constraint(ALLOC_IN_RC(int_x_reg)); 3692 match(reg); 3693 match(eAXRegI); 3694 match(eBXRegI); 3695 match(eCXRegI); 3696 match(eDXRegI); 3697 3698 format %{ %} 3699 interface(REG_INTER); 3700 %} 3701 3702 // Special Registers 3703 operand eAXRegI(xRegI reg) %{ 3704 constraint(ALLOC_IN_RC(eax_reg)); 3705 match(reg); 3706 match(rRegI); 3707 3708 format %{ "EAX" %} 3709 interface(REG_INTER); 3710 %} 3711 3712 // Special Registers 3713 operand eBXRegI(xRegI reg) %{ 3714 constraint(ALLOC_IN_RC(ebx_reg)); 3715 match(reg); 3716 match(rRegI); 3717 3718 format %{ "EBX" %} 3719 interface(REG_INTER); 3720 %} 3721 3722 operand eCXRegI(xRegI reg) %{ 3723 constraint(ALLOC_IN_RC(ecx_reg)); 3724 match(reg); 3725 match(rRegI); 3726 3727 format %{ "ECX" %} 3728 interface(REG_INTER); 3729 %} 3730 3731 operand eDXRegI(xRegI reg) %{ 3732 constraint(ALLOC_IN_RC(edx_reg)); 3733 match(reg); 3734 match(rRegI); 3735 3736 format %{ "EDX" %} 3737 interface(REG_INTER); 3738 %} 3739 3740 operand eDIRegI(xRegI reg) %{ 3741 constraint(ALLOC_IN_RC(edi_reg)); 3742 match(reg); 3743 match(rRegI); 3744 3745 format %{ "EDI" %} 3746 interface(REG_INTER); 3747 %} 3748 3749 operand naxRegI() %{ 3750 constraint(ALLOC_IN_RC(nax_reg)); 3751 match(RegI); 3752 match(eCXRegI); 3753 match(eDXRegI); 3754 match(eSIRegI); 3755 match(eDIRegI); 3756 3757 format %{ %} 3758 interface(REG_INTER); 3759 %} 3760 3761 operand nadxRegI() %{ 3762 constraint(ALLOC_IN_RC(nadx_reg)); 3763 match(RegI); 3764 match(eBXRegI); 3765 match(eCXRegI); 3766 match(eSIRegI); 3767 match(eDIRegI); 3768 3769 format %{ %} 3770 interface(REG_INTER); 3771 %} 3772 3773 operand ncxRegI() %{ 3774 constraint(ALLOC_IN_RC(ncx_reg)); 3775 match(RegI); 3776 match(eAXRegI); 3777 match(eDXRegI); 3778 match(eSIRegI); 3779 match(eDIRegI); 3780 3781 format %{ %} 3782 interface(REG_INTER); 3783 %} 3784 3785 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3786 // // 3787 operand eSIRegI(xRegI reg) %{ 3788 constraint(ALLOC_IN_RC(esi_reg)); 3789 match(reg); 3790 match(rRegI); 3791 3792 format %{ "ESI" %} 3793 interface(REG_INTER); 3794 %} 3795 3796 // Pointer Register 3797 operand anyRegP() %{ 3798 constraint(ALLOC_IN_RC(any_reg)); 3799 match(RegP); 3800 match(eAXRegP); 3801 match(eBXRegP); 3802 match(eCXRegP); 3803 match(eDIRegP); 3804 match(eRegP); 3805 3806 format %{ %} 3807 interface(REG_INTER); 3808 %} 3809 3810 operand eRegP() %{ 3811 constraint(ALLOC_IN_RC(int_reg)); 3812 match(RegP); 3813 match(eAXRegP); 3814 match(eBXRegP); 3815 match(eCXRegP); 3816 match(eDIRegP); 3817 3818 format %{ %} 3819 interface(REG_INTER); 3820 %} 3821 3822 operand rRegP() %{ 3823 constraint(ALLOC_IN_RC(int_reg)); 3824 match(RegP); 3825 match(eAXRegP); 3826 match(eBXRegP); 3827 match(eCXRegP); 3828 match(eDIRegP); 3829 3830 format %{ %} 3831 interface(REG_INTER); 3832 %} 3833 3834 // On windows95, EBP is not safe to use for implicit null tests. 3835 operand eRegP_no_EBP() %{ 3836 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3837 match(RegP); 3838 match(eAXRegP); 3839 match(eBXRegP); 3840 match(eCXRegP); 3841 match(eDIRegP); 3842 3843 op_cost(100); 3844 format %{ %} 3845 interface(REG_INTER); 3846 %} 3847 3848 operand naxRegP() %{ 3849 constraint(ALLOC_IN_RC(nax_reg)); 3850 match(RegP); 3851 match(eBXRegP); 3852 match(eDXRegP); 3853 match(eCXRegP); 3854 match(eSIRegP); 3855 match(eDIRegP); 3856 3857 format %{ %} 3858 interface(REG_INTER); 3859 %} 3860 3861 operand nabxRegP() %{ 3862 constraint(ALLOC_IN_RC(nabx_reg)); 3863 match(RegP); 3864 match(eCXRegP); 3865 match(eDXRegP); 3866 match(eSIRegP); 3867 match(eDIRegP); 3868 3869 format %{ %} 3870 interface(REG_INTER); 3871 %} 3872 3873 operand pRegP() %{ 3874 constraint(ALLOC_IN_RC(p_reg)); 3875 match(RegP); 3876 match(eBXRegP); 3877 match(eDXRegP); 3878 match(eSIRegP); 3879 match(eDIRegP); 3880 3881 format %{ %} 3882 interface(REG_INTER); 3883 %} 3884 3885 // Special Registers 3886 // Return a pointer value 3887 operand eAXRegP(eRegP reg) %{ 3888 constraint(ALLOC_IN_RC(eax_reg)); 3889 match(reg); 3890 format %{ "EAX" %} 3891 interface(REG_INTER); 3892 %} 3893 3894 // Used in AtomicAdd 3895 operand eBXRegP(eRegP reg) %{ 3896 constraint(ALLOC_IN_RC(ebx_reg)); 3897 match(reg); 3898 format %{ "EBX" %} 3899 interface(REG_INTER); 3900 %} 3901 3902 // Tail-call (interprocedural jump) to interpreter 3903 operand eCXRegP(eRegP reg) %{ 3904 constraint(ALLOC_IN_RC(ecx_reg)); 3905 match(reg); 3906 format %{ "ECX" %} 3907 interface(REG_INTER); 3908 %} 3909 3910 operand eDXRegP(eRegP reg) %{ 3911 constraint(ALLOC_IN_RC(edx_reg)); 3912 match(reg); 3913 format %{ "EDX" %} 3914 interface(REG_INTER); 3915 %} 3916 3917 operand eSIRegP(eRegP reg) %{ 3918 constraint(ALLOC_IN_RC(esi_reg)); 3919 match(reg); 3920 format %{ "ESI" %} 3921 interface(REG_INTER); 3922 %} 3923 3924 // Used in rep stosw 3925 operand eDIRegP(eRegP reg) %{ 3926 constraint(ALLOC_IN_RC(edi_reg)); 3927 match(reg); 3928 format %{ "EDI" %} 3929 interface(REG_INTER); 3930 %} 3931 3932 operand eRegL() %{ 3933 constraint(ALLOC_IN_RC(long_reg)); 3934 match(RegL); 3935 match(eADXRegL); 3936 3937 format %{ %} 3938 interface(REG_INTER); 3939 %} 3940 3941 operand eADXRegL( eRegL reg ) %{ 3942 constraint(ALLOC_IN_RC(eadx_reg)); 3943 match(reg); 3944 3945 format %{ "EDX:EAX" %} 3946 interface(REG_INTER); 3947 %} 3948 3949 operand eBCXRegL( eRegL reg ) %{ 3950 constraint(ALLOC_IN_RC(ebcx_reg)); 3951 match(reg); 3952 3953 format %{ "EBX:ECX" %} 3954 interface(REG_INTER); 3955 %} 3956 3957 operand eBDPRegL( eRegL reg ) %{ 3958 constraint(ALLOC_IN_RC(ebpd_reg)); 3959 match(reg); 3960 3961 format %{ "EBP:EDI" %} 3962 interface(REG_INTER); 3963 %} 3964 // Special case for integer high multiply 3965 operand eADXRegL_low_only() %{ 3966 constraint(ALLOC_IN_RC(eadx_reg)); 3967 match(RegL); 3968 3969 format %{ "EAX" %} 3970 interface(REG_INTER); 3971 %} 3972 3973 // Flags register, used as output of compare instructions 3974 operand rFlagsReg() %{ 3975 constraint(ALLOC_IN_RC(int_flags)); 3976 match(RegFlags); 3977 3978 format %{ "EFLAGS" %} 3979 interface(REG_INTER); 3980 %} 3981 3982 // Flags register, used as output of compare instructions 3983 operand eFlagsReg() %{ 3984 constraint(ALLOC_IN_RC(int_flags)); 3985 match(RegFlags); 3986 3987 format %{ "EFLAGS" %} 3988 interface(REG_INTER); 3989 %} 3990 3991 // Flags register, used as output of FLOATING POINT compare instructions 3992 operand eFlagsRegU() %{ 3993 constraint(ALLOC_IN_RC(int_flags)); 3994 match(RegFlags); 3995 3996 format %{ "EFLAGS_U" %} 3997 interface(REG_INTER); 3998 %} 3999 4000 operand eFlagsRegUCF() %{ 4001 constraint(ALLOC_IN_RC(int_flags)); 4002 match(RegFlags); 4003 predicate(false); 4004 4005 format %{ "EFLAGS_U_CF" %} 4006 interface(REG_INTER); 4007 %} 4008 4009 // Condition Code Register used by long compare 4010 operand flagsReg_long_LTGE() %{ 4011 constraint(ALLOC_IN_RC(int_flags)); 4012 match(RegFlags); 4013 format %{ "FLAGS_LTGE" %} 4014 interface(REG_INTER); 4015 %} 4016 operand flagsReg_long_EQNE() %{ 4017 constraint(ALLOC_IN_RC(int_flags)); 4018 match(RegFlags); 4019 format %{ "FLAGS_EQNE" %} 4020 interface(REG_INTER); 4021 %} 4022 operand flagsReg_long_LEGT() %{ 4023 constraint(ALLOC_IN_RC(int_flags)); 4024 match(RegFlags); 4025 format %{ "FLAGS_LEGT" %} 4026 interface(REG_INTER); 4027 %} 4028 4029 // Condition Code Register used by unsigned long compare 4030 operand flagsReg_ulong_LTGE() %{ 4031 constraint(ALLOC_IN_RC(int_flags)); 4032 match(RegFlags); 4033 format %{ "FLAGS_U_LTGE" %} 4034 interface(REG_INTER); 4035 %} 4036 operand flagsReg_ulong_EQNE() %{ 4037 constraint(ALLOC_IN_RC(int_flags)); 4038 match(RegFlags); 4039 format %{ "FLAGS_U_EQNE" %} 4040 interface(REG_INTER); 4041 %} 4042 operand flagsReg_ulong_LEGT() %{ 4043 constraint(ALLOC_IN_RC(int_flags)); 4044 match(RegFlags); 4045 format %{ "FLAGS_U_LEGT" %} 4046 interface(REG_INTER); 4047 %} 4048 4049 // Float register operands 4050 operand regDPR() %{ 4051 predicate( UseSSE < 2 ); 4052 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4053 match(RegD); 4054 match(regDPR1); 4055 match(regDPR2); 4056 format %{ %} 4057 interface(REG_INTER); 4058 %} 4059 4060 operand regDPR1(regDPR reg) %{ 4061 predicate( UseSSE < 2 ); 4062 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4063 match(reg); 4064 format %{ "FPR1" %} 4065 interface(REG_INTER); 4066 %} 4067 4068 operand regDPR2(regDPR reg) %{ 4069 predicate( UseSSE < 2 ); 4070 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4071 match(reg); 4072 format %{ "FPR2" %} 4073 interface(REG_INTER); 4074 %} 4075 4076 operand regnotDPR1(regDPR reg) %{ 4077 predicate( UseSSE < 2 ); 4078 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4079 match(reg); 4080 format %{ %} 4081 interface(REG_INTER); 4082 %} 4083 4084 // Float register operands 4085 operand regFPR() %{ 4086 predicate( UseSSE < 2 ); 4087 constraint(ALLOC_IN_RC(fp_flt_reg)); 4088 match(RegF); 4089 match(regFPR1); 4090 format %{ %} 4091 interface(REG_INTER); 4092 %} 4093 4094 // Float register operands 4095 operand regFPR1(regFPR reg) %{ 4096 predicate( UseSSE < 2 ); 4097 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4098 match(reg); 4099 format %{ "FPR1" %} 4100 interface(REG_INTER); 4101 %} 4102 4103 // XMM Float register operands 4104 operand regF() %{ 4105 predicate( UseSSE>=1 ); 4106 constraint(ALLOC_IN_RC(float_reg_legacy)); 4107 match(RegF); 4108 format %{ %} 4109 interface(REG_INTER); 4110 %} 4111 4112 operand legRegF() %{ 4113 predicate( UseSSE>=1 ); 4114 constraint(ALLOC_IN_RC(float_reg_legacy)); 4115 match(RegF); 4116 format %{ %} 4117 interface(REG_INTER); 4118 %} 4119 4120 // Float register operands 4121 operand vlRegF() %{ 4122 constraint(ALLOC_IN_RC(float_reg_vl)); 4123 match(RegF); 4124 4125 format %{ %} 4126 interface(REG_INTER); 4127 %} 4128 4129 // XMM Double register operands 4130 operand regD() %{ 4131 predicate( UseSSE>=2 ); 4132 constraint(ALLOC_IN_RC(double_reg_legacy)); 4133 match(RegD); 4134 format %{ %} 4135 interface(REG_INTER); 4136 %} 4137 4138 // Double register operands 4139 operand legRegD() %{ 4140 predicate( UseSSE>=2 ); 4141 constraint(ALLOC_IN_RC(double_reg_legacy)); 4142 match(RegD); 4143 format %{ %} 4144 interface(REG_INTER); 4145 %} 4146 4147 operand vlRegD() %{ 4148 constraint(ALLOC_IN_RC(double_reg_vl)); 4149 match(RegD); 4150 4151 format %{ %} 4152 interface(REG_INTER); 4153 %} 4154 4155 //----------Memory Operands---------------------------------------------------- 4156 // Direct Memory Operand 4157 operand direct(immP addr) %{ 4158 match(addr); 4159 4160 format %{ "[$addr]" %} 4161 interface(MEMORY_INTER) %{ 4162 base(0xFFFFFFFF); 4163 index(0x4); 4164 scale(0x0); 4165 disp($addr); 4166 %} 4167 %} 4168 4169 // Indirect Memory Operand 4170 operand indirect(eRegP reg) %{ 4171 constraint(ALLOC_IN_RC(int_reg)); 4172 match(reg); 4173 4174 format %{ "[$reg]" %} 4175 interface(MEMORY_INTER) %{ 4176 base($reg); 4177 index(0x4); 4178 scale(0x0); 4179 disp(0x0); 4180 %} 4181 %} 4182 4183 // Indirect Memory Plus Short Offset Operand 4184 operand indOffset8(eRegP reg, immI8 off) %{ 4185 match(AddP reg off); 4186 4187 format %{ "[$reg + $off]" %} 4188 interface(MEMORY_INTER) %{ 4189 base($reg); 4190 index(0x4); 4191 scale(0x0); 4192 disp($off); 4193 %} 4194 %} 4195 4196 // Indirect Memory Plus Long Offset Operand 4197 operand indOffset32(eRegP reg, immI off) %{ 4198 match(AddP reg off); 4199 4200 format %{ "[$reg + $off]" %} 4201 interface(MEMORY_INTER) %{ 4202 base($reg); 4203 index(0x4); 4204 scale(0x0); 4205 disp($off); 4206 %} 4207 %} 4208 4209 // Indirect Memory Plus Long Offset Operand 4210 operand indOffset32X(rRegI reg, immP off) %{ 4211 match(AddP off reg); 4212 4213 format %{ "[$reg + $off]" %} 4214 interface(MEMORY_INTER) %{ 4215 base($reg); 4216 index(0x4); 4217 scale(0x0); 4218 disp($off); 4219 %} 4220 %} 4221 4222 // Indirect Memory Plus Index Register Plus Offset Operand 4223 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4224 match(AddP (AddP reg ireg) off); 4225 4226 op_cost(10); 4227 format %{"[$reg + $off + $ireg]" %} 4228 interface(MEMORY_INTER) %{ 4229 base($reg); 4230 index($ireg); 4231 scale(0x0); 4232 disp($off); 4233 %} 4234 %} 4235 4236 // Indirect Memory Plus Index Register Plus Offset Operand 4237 operand indIndex(eRegP reg, rRegI ireg) %{ 4238 match(AddP reg ireg); 4239 4240 op_cost(10); 4241 format %{"[$reg + $ireg]" %} 4242 interface(MEMORY_INTER) %{ 4243 base($reg); 4244 index($ireg); 4245 scale(0x0); 4246 disp(0x0); 4247 %} 4248 %} 4249 4250 // // ------------------------------------------------------------------------- 4251 // // 486 architecture doesn't support "scale * index + offset" with out a base 4252 // // ------------------------------------------------------------------------- 4253 // // Scaled Memory Operands 4254 // // Indirect Memory Times Scale Plus Offset Operand 4255 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4256 // match(AddP off (LShiftI ireg scale)); 4257 // 4258 // op_cost(10); 4259 // format %{"[$off + $ireg << $scale]" %} 4260 // interface(MEMORY_INTER) %{ 4261 // base(0x4); 4262 // index($ireg); 4263 // scale($scale); 4264 // disp($off); 4265 // %} 4266 // %} 4267 4268 // Indirect Memory Times Scale Plus Index Register 4269 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4270 match(AddP reg (LShiftI ireg scale)); 4271 4272 op_cost(10); 4273 format %{"[$reg + $ireg << $scale]" %} 4274 interface(MEMORY_INTER) %{ 4275 base($reg); 4276 index($ireg); 4277 scale($scale); 4278 disp(0x0); 4279 %} 4280 %} 4281 4282 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4283 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4284 match(AddP (AddP reg (LShiftI ireg scale)) off); 4285 4286 op_cost(10); 4287 format %{"[$reg + $off + $ireg << $scale]" %} 4288 interface(MEMORY_INTER) %{ 4289 base($reg); 4290 index($ireg); 4291 scale($scale); 4292 disp($off); 4293 %} 4294 %} 4295 4296 //----------Load Long Memory Operands------------------------------------------ 4297 // The load-long idiom will use it's address expression again after loading 4298 // the first word of the long. If the load-long destination overlaps with 4299 // registers used in the addressing expression, the 2nd half will be loaded 4300 // from a clobbered address. Fix this by requiring that load-long use 4301 // address registers that do not overlap with the load-long target. 4302 4303 // load-long support 4304 operand load_long_RegP() %{ 4305 constraint(ALLOC_IN_RC(esi_reg)); 4306 match(RegP); 4307 match(eSIRegP); 4308 op_cost(100); 4309 format %{ %} 4310 interface(REG_INTER); 4311 %} 4312 4313 // Indirect Memory Operand Long 4314 operand load_long_indirect(load_long_RegP reg) %{ 4315 constraint(ALLOC_IN_RC(esi_reg)); 4316 match(reg); 4317 4318 format %{ "[$reg]" %} 4319 interface(MEMORY_INTER) %{ 4320 base($reg); 4321 index(0x4); 4322 scale(0x0); 4323 disp(0x0); 4324 %} 4325 %} 4326 4327 // Indirect Memory Plus Long Offset Operand 4328 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4329 match(AddP reg off); 4330 4331 format %{ "[$reg + $off]" %} 4332 interface(MEMORY_INTER) %{ 4333 base($reg); 4334 index(0x4); 4335 scale(0x0); 4336 disp($off); 4337 %} 4338 %} 4339 4340 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4341 4342 4343 //----------Special Memory Operands-------------------------------------------- 4344 // Stack Slot Operand - This operand is used for loading and storing temporary 4345 // values on the stack where a match requires a value to 4346 // flow through memory. 4347 operand stackSlotP(sRegP reg) %{ 4348 constraint(ALLOC_IN_RC(stack_slots)); 4349 // No match rule because this operand is only generated in matching 4350 format %{ "[$reg]" %} 4351 interface(MEMORY_INTER) %{ 4352 base(0x4); // ESP 4353 index(0x4); // No Index 4354 scale(0x0); // No Scale 4355 disp($reg); // Stack Offset 4356 %} 4357 %} 4358 4359 operand stackSlotI(sRegI reg) %{ 4360 constraint(ALLOC_IN_RC(stack_slots)); 4361 // No match rule because this operand is only generated in matching 4362 format %{ "[$reg]" %} 4363 interface(MEMORY_INTER) %{ 4364 base(0x4); // ESP 4365 index(0x4); // No Index 4366 scale(0x0); // No Scale 4367 disp($reg); // Stack Offset 4368 %} 4369 %} 4370 4371 operand stackSlotF(sRegF reg) %{ 4372 constraint(ALLOC_IN_RC(stack_slots)); 4373 // No match rule because this operand is only generated in matching 4374 format %{ "[$reg]" %} 4375 interface(MEMORY_INTER) %{ 4376 base(0x4); // ESP 4377 index(0x4); // No Index 4378 scale(0x0); // No Scale 4379 disp($reg); // Stack Offset 4380 %} 4381 %} 4382 4383 operand stackSlotD(sRegD reg) %{ 4384 constraint(ALLOC_IN_RC(stack_slots)); 4385 // No match rule because this operand is only generated in matching 4386 format %{ "[$reg]" %} 4387 interface(MEMORY_INTER) %{ 4388 base(0x4); // ESP 4389 index(0x4); // No Index 4390 scale(0x0); // No Scale 4391 disp($reg); // Stack Offset 4392 %} 4393 %} 4394 4395 operand stackSlotL(sRegL reg) %{ 4396 constraint(ALLOC_IN_RC(stack_slots)); 4397 // No match rule because this operand is only generated in matching 4398 format %{ "[$reg]" %} 4399 interface(MEMORY_INTER) %{ 4400 base(0x4); // ESP 4401 index(0x4); // No Index 4402 scale(0x0); // No Scale 4403 disp($reg); // Stack Offset 4404 %} 4405 %} 4406 4407 //----------Conditional Branch Operands---------------------------------------- 4408 // Comparison Op - This is the operation of the comparison, and is limited to 4409 // the following set of codes: 4410 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4411 // 4412 // Other attributes of the comparison, such as unsignedness, are specified 4413 // by the comparison instruction that sets a condition code flags register. 4414 // That result is represented by a flags operand whose subtype is appropriate 4415 // to the unsignedness (etc.) of the comparison. 4416 // 4417 // Later, the instruction which matches both the Comparison Op (a Bool) and 4418 // the flags (produced by the Cmp) specifies the coding of the comparison op 4419 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4420 4421 // Comparison Code 4422 operand cmpOp() %{ 4423 match(Bool); 4424 4425 format %{ "" %} 4426 interface(COND_INTER) %{ 4427 equal(0x4, "e"); 4428 not_equal(0x5, "ne"); 4429 less(0xC, "l"); 4430 greater_equal(0xD, "ge"); 4431 less_equal(0xE, "le"); 4432 greater(0xF, "g"); 4433 overflow(0x0, "o"); 4434 no_overflow(0x1, "no"); 4435 %} 4436 %} 4437 4438 // Comparison Code, unsigned compare. Used by FP also, with 4439 // C2 (unordered) turned into GT or LT already. The other bits 4440 // C0 and C3 are turned into Carry & Zero flags. 4441 operand cmpOpU() %{ 4442 match(Bool); 4443 4444 format %{ "" %} 4445 interface(COND_INTER) %{ 4446 equal(0x4, "e"); 4447 not_equal(0x5, "ne"); 4448 less(0x2, "b"); 4449 greater_equal(0x3, "nb"); 4450 less_equal(0x6, "be"); 4451 greater(0x7, "nbe"); 4452 overflow(0x0, "o"); 4453 no_overflow(0x1, "no"); 4454 %} 4455 %} 4456 4457 // Floating comparisons that don't require any fixup for the unordered case 4458 operand cmpOpUCF() %{ 4459 match(Bool); 4460 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4461 n->as_Bool()->_test._test == BoolTest::ge || 4462 n->as_Bool()->_test._test == BoolTest::le || 4463 n->as_Bool()->_test._test == BoolTest::gt); 4464 format %{ "" %} 4465 interface(COND_INTER) %{ 4466 equal(0x4, "e"); 4467 not_equal(0x5, "ne"); 4468 less(0x2, "b"); 4469 greater_equal(0x3, "nb"); 4470 less_equal(0x6, "be"); 4471 greater(0x7, "nbe"); 4472 overflow(0x0, "o"); 4473 no_overflow(0x1, "no"); 4474 %} 4475 %} 4476 4477 4478 // Floating comparisons that can be fixed up with extra conditional jumps 4479 operand cmpOpUCF2() %{ 4480 match(Bool); 4481 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4482 n->as_Bool()->_test._test == BoolTest::eq); 4483 format %{ "" %} 4484 interface(COND_INTER) %{ 4485 equal(0x4, "e"); 4486 not_equal(0x5, "ne"); 4487 less(0x2, "b"); 4488 greater_equal(0x3, "nb"); 4489 less_equal(0x6, "be"); 4490 greater(0x7, "nbe"); 4491 overflow(0x0, "o"); 4492 no_overflow(0x1, "no"); 4493 %} 4494 %} 4495 4496 // Comparison Code for FP conditional move 4497 operand cmpOp_fcmov() %{ 4498 match(Bool); 4499 4500 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4501 n->as_Bool()->_test._test != BoolTest::no_overflow); 4502 format %{ "" %} 4503 interface(COND_INTER) %{ 4504 equal (0x0C8); 4505 not_equal (0x1C8); 4506 less (0x0C0); 4507 greater_equal(0x1C0); 4508 less_equal (0x0D0); 4509 greater (0x1D0); 4510 overflow(0x0, "o"); // not really supported by the instruction 4511 no_overflow(0x1, "no"); // not really supported by the instruction 4512 %} 4513 %} 4514 4515 // Comparison Code used in long compares 4516 operand cmpOp_commute() %{ 4517 match(Bool); 4518 4519 format %{ "" %} 4520 interface(COND_INTER) %{ 4521 equal(0x4, "e"); 4522 not_equal(0x5, "ne"); 4523 less(0xF, "g"); 4524 greater_equal(0xE, "le"); 4525 less_equal(0xD, "ge"); 4526 greater(0xC, "l"); 4527 overflow(0x0, "o"); 4528 no_overflow(0x1, "no"); 4529 %} 4530 %} 4531 4532 // Comparison Code used in unsigned long compares 4533 operand cmpOpU_commute() %{ 4534 match(Bool); 4535 4536 format %{ "" %} 4537 interface(COND_INTER) %{ 4538 equal(0x4, "e"); 4539 not_equal(0x5, "ne"); 4540 less(0x7, "nbe"); 4541 greater_equal(0x6, "be"); 4542 less_equal(0x3, "nb"); 4543 greater(0x2, "b"); 4544 overflow(0x0, "o"); 4545 no_overflow(0x1, "no"); 4546 %} 4547 %} 4548 4549 //----------OPERAND CLASSES---------------------------------------------------- 4550 // Operand Classes are groups of operands that are used as to simplify 4551 // instruction definitions by not requiring the AD writer to specify separate 4552 // instructions for every form of operand when the instruction accepts 4553 // multiple operand types with the same basic encoding and format. The classic 4554 // case of this is memory operands. 4555 4556 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4557 indIndex, indIndexScale, indIndexScaleOffset); 4558 4559 // Long memory operations are encoded in 2 instructions and a +4 offset. 4560 // This means some kind of offset is always required and you cannot use 4561 // an oop as the offset (done when working on static globals). 4562 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4563 indIndex, indIndexScale, indIndexScaleOffset); 4564 4565 4566 //----------PIPELINE----------------------------------------------------------- 4567 // Rules which define the behavior of the target architectures pipeline. 4568 pipeline %{ 4569 4570 //----------ATTRIBUTES--------------------------------------------------------- 4571 attributes %{ 4572 variable_size_instructions; // Fixed size instructions 4573 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4574 instruction_unit_size = 1; // An instruction is 1 bytes long 4575 instruction_fetch_unit_size = 16; // The processor fetches one line 4576 instruction_fetch_units = 1; // of 16 bytes 4577 4578 // List of nop instructions 4579 nops( MachNop ); 4580 %} 4581 4582 //----------RESOURCES---------------------------------------------------------- 4583 // Resources are the functional units available to the machine 4584 4585 // Generic P2/P3 pipeline 4586 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4587 // 3 instructions decoded per cycle. 4588 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4589 // 2 ALU op, only ALU0 handles mul/div instructions. 4590 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4591 MS0, MS1, MEM = MS0 | MS1, 4592 BR, FPU, 4593 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4594 4595 //----------PIPELINE DESCRIPTION----------------------------------------------- 4596 // Pipeline Description specifies the stages in the machine's pipeline 4597 4598 // Generic P2/P3 pipeline 4599 pipe_desc(S0, S1, S2, S3, S4, S5); 4600 4601 //----------PIPELINE CLASSES--------------------------------------------------- 4602 // Pipeline Classes describe the stages in which input and output are 4603 // referenced by the hardware pipeline. 4604 4605 // Naming convention: ialu or fpu 4606 // Then: _reg 4607 // Then: _reg if there is a 2nd register 4608 // Then: _long if it's a pair of instructions implementing a long 4609 // Then: _fat if it requires the big decoder 4610 // Or: _mem if it requires the big decoder and a memory unit. 4611 4612 // Integer ALU reg operation 4613 pipe_class ialu_reg(rRegI dst) %{ 4614 single_instruction; 4615 dst : S4(write); 4616 dst : S3(read); 4617 DECODE : S0; // any decoder 4618 ALU : S3; // any alu 4619 %} 4620 4621 // Long ALU reg operation 4622 pipe_class ialu_reg_long(eRegL dst) %{ 4623 instruction_count(2); 4624 dst : S4(write); 4625 dst : S3(read); 4626 DECODE : S0(2); // any 2 decoders 4627 ALU : S3(2); // both alus 4628 %} 4629 4630 // Integer ALU reg operation using big decoder 4631 pipe_class ialu_reg_fat(rRegI dst) %{ 4632 single_instruction; 4633 dst : S4(write); 4634 dst : S3(read); 4635 D0 : S0; // big decoder only 4636 ALU : S3; // any alu 4637 %} 4638 4639 // Long ALU reg operation using big decoder 4640 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4641 instruction_count(2); 4642 dst : S4(write); 4643 dst : S3(read); 4644 D0 : S0(2); // big decoder only; twice 4645 ALU : S3(2); // any 2 alus 4646 %} 4647 4648 // Integer ALU reg-reg operation 4649 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4650 single_instruction; 4651 dst : S4(write); 4652 src : S3(read); 4653 DECODE : S0; // any decoder 4654 ALU : S3; // any alu 4655 %} 4656 4657 // Long ALU reg-reg operation 4658 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4659 instruction_count(2); 4660 dst : S4(write); 4661 src : S3(read); 4662 DECODE : S0(2); // any 2 decoders 4663 ALU : S3(2); // both alus 4664 %} 4665 4666 // Integer ALU reg-reg operation 4667 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4668 single_instruction; 4669 dst : S4(write); 4670 src : S3(read); 4671 D0 : S0; // big decoder only 4672 ALU : S3; // any alu 4673 %} 4674 4675 // Long ALU reg-reg operation 4676 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4677 instruction_count(2); 4678 dst : S4(write); 4679 src : S3(read); 4680 D0 : S0(2); // big decoder only; twice 4681 ALU : S3(2); // both alus 4682 %} 4683 4684 // Integer ALU reg-mem operation 4685 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4686 single_instruction; 4687 dst : S5(write); 4688 mem : S3(read); 4689 D0 : S0; // big decoder only 4690 ALU : S4; // any alu 4691 MEM : S3; // any mem 4692 %} 4693 4694 // Long ALU reg-mem operation 4695 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4696 instruction_count(2); 4697 dst : S5(write); 4698 mem : S3(read); 4699 D0 : S0(2); // big decoder only; twice 4700 ALU : S4(2); // any 2 alus 4701 MEM : S3(2); // both mems 4702 %} 4703 4704 // Integer mem operation (prefetch) 4705 pipe_class ialu_mem(memory mem) 4706 %{ 4707 single_instruction; 4708 mem : S3(read); 4709 D0 : S0; // big decoder only 4710 MEM : S3; // any mem 4711 %} 4712 4713 // Integer Store to Memory 4714 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4715 single_instruction; 4716 mem : S3(read); 4717 src : S5(read); 4718 D0 : S0; // big decoder only 4719 ALU : S4; // any alu 4720 MEM : S3; 4721 %} 4722 4723 // Long Store to Memory 4724 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4725 instruction_count(2); 4726 mem : S3(read); 4727 src : S5(read); 4728 D0 : S0(2); // big decoder only; twice 4729 ALU : S4(2); // any 2 alus 4730 MEM : S3(2); // Both mems 4731 %} 4732 4733 // Integer Store to Memory 4734 pipe_class ialu_mem_imm(memory mem) %{ 4735 single_instruction; 4736 mem : S3(read); 4737 D0 : S0; // big decoder only 4738 ALU : S4; // any alu 4739 MEM : S3; 4740 %} 4741 4742 // Integer ALU0 reg-reg operation 4743 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4744 single_instruction; 4745 dst : S4(write); 4746 src : S3(read); 4747 D0 : S0; // Big decoder only 4748 ALU0 : S3; // only alu0 4749 %} 4750 4751 // Integer ALU0 reg-mem operation 4752 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4753 single_instruction; 4754 dst : S5(write); 4755 mem : S3(read); 4756 D0 : S0; // big decoder only 4757 ALU0 : S4; // ALU0 only 4758 MEM : S3; // any mem 4759 %} 4760 4761 // Integer ALU reg-reg operation 4762 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4763 single_instruction; 4764 cr : S4(write); 4765 src1 : S3(read); 4766 src2 : S3(read); 4767 DECODE : S0; // any decoder 4768 ALU : S3; // any alu 4769 %} 4770 4771 // Integer ALU reg-imm operation 4772 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4773 single_instruction; 4774 cr : S4(write); 4775 src1 : S3(read); 4776 DECODE : S0; // any decoder 4777 ALU : S3; // any alu 4778 %} 4779 4780 // Integer ALU reg-mem operation 4781 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4782 single_instruction; 4783 cr : S4(write); 4784 src1 : S3(read); 4785 src2 : S3(read); 4786 D0 : S0; // big decoder only 4787 ALU : S4; // any alu 4788 MEM : S3; 4789 %} 4790 4791 // Conditional move reg-reg 4792 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4793 instruction_count(4); 4794 y : S4(read); 4795 q : S3(read); 4796 p : S3(read); 4797 DECODE : S0(4); // any decoder 4798 %} 4799 4800 // Conditional move reg-reg 4801 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4802 single_instruction; 4803 dst : S4(write); 4804 src : S3(read); 4805 cr : S3(read); 4806 DECODE : S0; // any decoder 4807 %} 4808 4809 // Conditional move reg-mem 4810 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4811 single_instruction; 4812 dst : S4(write); 4813 src : S3(read); 4814 cr : S3(read); 4815 DECODE : S0; // any decoder 4816 MEM : S3; 4817 %} 4818 4819 // Conditional move reg-reg long 4820 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4821 single_instruction; 4822 dst : S4(write); 4823 src : S3(read); 4824 cr : S3(read); 4825 DECODE : S0(2); // any 2 decoders 4826 %} 4827 4828 // Conditional move double reg-reg 4829 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4830 single_instruction; 4831 dst : S4(write); 4832 src : S3(read); 4833 cr : S3(read); 4834 DECODE : S0; // any decoder 4835 %} 4836 4837 // Float reg-reg operation 4838 pipe_class fpu_reg(regDPR dst) %{ 4839 instruction_count(2); 4840 dst : S3(read); 4841 DECODE : S0(2); // any 2 decoders 4842 FPU : S3; 4843 %} 4844 4845 // Float reg-reg operation 4846 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4847 instruction_count(2); 4848 dst : S4(write); 4849 src : S3(read); 4850 DECODE : S0(2); // any 2 decoders 4851 FPU : S3; 4852 %} 4853 4854 // Float reg-reg operation 4855 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4856 instruction_count(3); 4857 dst : S4(write); 4858 src1 : S3(read); 4859 src2 : S3(read); 4860 DECODE : S0(3); // any 3 decoders 4861 FPU : S3(2); 4862 %} 4863 4864 // Float reg-reg operation 4865 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4866 instruction_count(4); 4867 dst : S4(write); 4868 src1 : S3(read); 4869 src2 : S3(read); 4870 src3 : S3(read); 4871 DECODE : S0(4); // any 3 decoders 4872 FPU : S3(2); 4873 %} 4874 4875 // Float reg-reg operation 4876 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4877 instruction_count(4); 4878 dst : S4(write); 4879 src1 : S3(read); 4880 src2 : S3(read); 4881 src3 : S3(read); 4882 DECODE : S1(3); // any 3 decoders 4883 D0 : S0; // Big decoder only 4884 FPU : S3(2); 4885 MEM : S3; 4886 %} 4887 4888 // Float reg-mem operation 4889 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4890 instruction_count(2); 4891 dst : S5(write); 4892 mem : S3(read); 4893 D0 : S0; // big decoder only 4894 DECODE : S1; // any decoder for FPU POP 4895 FPU : S4; 4896 MEM : S3; // any mem 4897 %} 4898 4899 // Float reg-mem operation 4900 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4901 instruction_count(3); 4902 dst : S5(write); 4903 src1 : S3(read); 4904 mem : S3(read); 4905 D0 : S0; // big decoder only 4906 DECODE : S1(2); // any decoder for FPU POP 4907 FPU : S4; 4908 MEM : S3; // any mem 4909 %} 4910 4911 // Float mem-reg operation 4912 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4913 instruction_count(2); 4914 src : S5(read); 4915 mem : S3(read); 4916 DECODE : S0; // any decoder for FPU PUSH 4917 D0 : S1; // big decoder only 4918 FPU : S4; 4919 MEM : S3; // any mem 4920 %} 4921 4922 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4923 instruction_count(3); 4924 src1 : S3(read); 4925 src2 : S3(read); 4926 mem : S3(read); 4927 DECODE : S0(2); // any decoder for FPU PUSH 4928 D0 : S1; // big decoder only 4929 FPU : S4; 4930 MEM : S3; // any mem 4931 %} 4932 4933 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4934 instruction_count(3); 4935 src1 : S3(read); 4936 src2 : S3(read); 4937 mem : S4(read); 4938 DECODE : S0; // any decoder for FPU PUSH 4939 D0 : S0(2); // big decoder only 4940 FPU : S4; 4941 MEM : S3(2); // any mem 4942 %} 4943 4944 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4945 instruction_count(2); 4946 src1 : S3(read); 4947 dst : S4(read); 4948 D0 : S0(2); // big decoder only 4949 MEM : S3(2); // any mem 4950 %} 4951 4952 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4953 instruction_count(3); 4954 src1 : S3(read); 4955 src2 : S3(read); 4956 dst : S4(read); 4957 D0 : S0(3); // big decoder only 4958 FPU : S4; 4959 MEM : S3(3); // any mem 4960 %} 4961 4962 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4963 instruction_count(3); 4964 src1 : S4(read); 4965 mem : S4(read); 4966 DECODE : S0; // any decoder for FPU PUSH 4967 D0 : S0(2); // big decoder only 4968 FPU : S4; 4969 MEM : S3(2); // any mem 4970 %} 4971 4972 // Float load constant 4973 pipe_class fpu_reg_con(regDPR dst) %{ 4974 instruction_count(2); 4975 dst : S5(write); 4976 D0 : S0; // big decoder only for the load 4977 DECODE : S1; // any decoder for FPU POP 4978 FPU : S4; 4979 MEM : S3; // any mem 4980 %} 4981 4982 // Float load constant 4983 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4984 instruction_count(3); 4985 dst : S5(write); 4986 src : S3(read); 4987 D0 : S0; // big decoder only for the load 4988 DECODE : S1(2); // any decoder for FPU POP 4989 FPU : S4; 4990 MEM : S3; // any mem 4991 %} 4992 4993 // UnConditional branch 4994 pipe_class pipe_jmp( label labl ) %{ 4995 single_instruction; 4996 BR : S3; 4997 %} 4998 4999 // Conditional branch 5000 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5001 single_instruction; 5002 cr : S1(read); 5003 BR : S3; 5004 %} 5005 5006 // Allocation idiom 5007 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5008 instruction_count(1); force_serialization; 5009 fixed_latency(6); 5010 heap_ptr : S3(read); 5011 DECODE : S0(3); 5012 D0 : S2; 5013 MEM : S3; 5014 ALU : S3(2); 5015 dst : S5(write); 5016 BR : S5; 5017 %} 5018 5019 // Generic big/slow expanded idiom 5020 pipe_class pipe_slow( ) %{ 5021 instruction_count(10); multiple_bundles; force_serialization; 5022 fixed_latency(100); 5023 D0 : S0(2); 5024 MEM : S3(2); 5025 %} 5026 5027 // The real do-nothing guy 5028 pipe_class empty( ) %{ 5029 instruction_count(0); 5030 %} 5031 5032 // Define the class for the Nop node 5033 define %{ 5034 MachNop = empty; 5035 %} 5036 5037 %} 5038 5039 //----------INSTRUCTIONS------------------------------------------------------- 5040 // 5041 // match -- States which machine-independent subtree may be replaced 5042 // by this instruction. 5043 // ins_cost -- The estimated cost of this instruction is used by instruction 5044 // selection to identify a minimum cost tree of machine 5045 // instructions that matches a tree of machine-independent 5046 // instructions. 5047 // format -- A string providing the disassembly for this instruction. 5048 // The value of an instruction's operand may be inserted 5049 // by referring to it with a '$' prefix. 5050 // opcode -- Three instruction opcodes may be provided. These are referred 5051 // to within an encode class as $primary, $secondary, and $tertiary 5052 // respectively. The primary opcode is commonly used to 5053 // indicate the type of machine instruction, while secondary 5054 // and tertiary are often used for prefix options or addressing 5055 // modes. 5056 // ins_encode -- A list of encode classes with parameters. The encode class 5057 // name must have been defined in an 'enc_class' specification 5058 // in the encode section of the architecture description. 5059 5060 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 5061 // Load Float 5062 instruct MoveF2LEG(legRegF dst, regF src) %{ 5063 match(Set dst src); 5064 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5065 ins_encode %{ 5066 ShouldNotReachHere(); 5067 %} 5068 ins_pipe( fpu_reg_reg ); 5069 %} 5070 5071 // Load Float 5072 instruct MoveLEG2F(regF dst, legRegF src) %{ 5073 match(Set dst src); 5074 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5075 ins_encode %{ 5076 ShouldNotReachHere(); 5077 %} 5078 ins_pipe( fpu_reg_reg ); 5079 %} 5080 5081 // Load Float 5082 instruct MoveF2VL(vlRegF dst, regF src) %{ 5083 match(Set dst src); 5084 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5085 ins_encode %{ 5086 ShouldNotReachHere(); 5087 %} 5088 ins_pipe( fpu_reg_reg ); 5089 %} 5090 5091 // Load Float 5092 instruct MoveVL2F(regF dst, vlRegF src) %{ 5093 match(Set dst src); 5094 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5095 ins_encode %{ 5096 ShouldNotReachHere(); 5097 %} 5098 ins_pipe( fpu_reg_reg ); 5099 %} 5100 5101 5102 5103 // Load Double 5104 instruct MoveD2LEG(legRegD dst, regD src) %{ 5105 match(Set dst src); 5106 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5107 ins_encode %{ 5108 ShouldNotReachHere(); 5109 %} 5110 ins_pipe( fpu_reg_reg ); 5111 %} 5112 5113 // Load Double 5114 instruct MoveLEG2D(regD dst, legRegD src) %{ 5115 match(Set dst src); 5116 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5117 ins_encode %{ 5118 ShouldNotReachHere(); 5119 %} 5120 ins_pipe( fpu_reg_reg ); 5121 %} 5122 5123 // Load Double 5124 instruct MoveD2VL(vlRegD dst, regD src) %{ 5125 match(Set dst src); 5126 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5127 ins_encode %{ 5128 ShouldNotReachHere(); 5129 %} 5130 ins_pipe( fpu_reg_reg ); 5131 %} 5132 5133 // Load Double 5134 instruct MoveVL2D(regD dst, vlRegD src) %{ 5135 match(Set dst src); 5136 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5137 ins_encode %{ 5138 ShouldNotReachHere(); 5139 %} 5140 ins_pipe( fpu_reg_reg ); 5141 %} 5142 5143 //----------BSWAP-Instruction-------------------------------------------------- 5144 instruct bytes_reverse_int(rRegI dst) %{ 5145 match(Set dst (ReverseBytesI dst)); 5146 5147 format %{ "BSWAP $dst" %} 5148 opcode(0x0F, 0xC8); 5149 ins_encode( OpcP, OpcSReg(dst) ); 5150 ins_pipe( ialu_reg ); 5151 %} 5152 5153 instruct bytes_reverse_long(eRegL dst) %{ 5154 match(Set dst (ReverseBytesL dst)); 5155 5156 format %{ "BSWAP $dst.lo\n\t" 5157 "BSWAP $dst.hi\n\t" 5158 "XCHG $dst.lo $dst.hi" %} 5159 5160 ins_cost(125); 5161 ins_encode( bswap_long_bytes(dst) ); 5162 ins_pipe( ialu_reg_reg); 5163 %} 5164 5165 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5166 match(Set dst (ReverseBytesUS dst)); 5167 effect(KILL cr); 5168 5169 format %{ "BSWAP $dst\n\t" 5170 "SHR $dst,16\n\t" %} 5171 ins_encode %{ 5172 __ bswapl($dst$$Register); 5173 __ shrl($dst$$Register, 16); 5174 %} 5175 ins_pipe( ialu_reg ); 5176 %} 5177 5178 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5179 match(Set dst (ReverseBytesS dst)); 5180 effect(KILL cr); 5181 5182 format %{ "BSWAP $dst\n\t" 5183 "SAR $dst,16\n\t" %} 5184 ins_encode %{ 5185 __ bswapl($dst$$Register); 5186 __ sarl($dst$$Register, 16); 5187 %} 5188 ins_pipe( ialu_reg ); 5189 %} 5190 5191 5192 //---------- Zeros Count Instructions ------------------------------------------ 5193 5194 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5195 predicate(UseCountLeadingZerosInstruction); 5196 match(Set dst (CountLeadingZerosI src)); 5197 effect(KILL cr); 5198 5199 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5200 ins_encode %{ 5201 __ lzcntl($dst$$Register, $src$$Register); 5202 %} 5203 ins_pipe(ialu_reg); 5204 %} 5205 5206 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5207 predicate(!UseCountLeadingZerosInstruction); 5208 match(Set dst (CountLeadingZerosI src)); 5209 effect(KILL cr); 5210 5211 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5212 "JNZ skip\n\t" 5213 "MOV $dst, -1\n" 5214 "skip:\n\t" 5215 "NEG $dst\n\t" 5216 "ADD $dst, 31" %} 5217 ins_encode %{ 5218 Register Rdst = $dst$$Register; 5219 Register Rsrc = $src$$Register; 5220 Label skip; 5221 __ bsrl(Rdst, Rsrc); 5222 __ jccb(Assembler::notZero, skip); 5223 __ movl(Rdst, -1); 5224 __ bind(skip); 5225 __ negl(Rdst); 5226 __ addl(Rdst, BitsPerInt - 1); 5227 %} 5228 ins_pipe(ialu_reg); 5229 %} 5230 5231 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5232 predicate(UseCountLeadingZerosInstruction); 5233 match(Set dst (CountLeadingZerosL src)); 5234 effect(TEMP dst, KILL cr); 5235 5236 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5237 "JNC done\n\t" 5238 "LZCNT $dst, $src.lo\n\t" 5239 "ADD $dst, 32\n" 5240 "done:" %} 5241 ins_encode %{ 5242 Register Rdst = $dst$$Register; 5243 Register Rsrc = $src$$Register; 5244 Label done; 5245 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5246 __ jccb(Assembler::carryClear, done); 5247 __ lzcntl(Rdst, Rsrc); 5248 __ addl(Rdst, BitsPerInt); 5249 __ bind(done); 5250 %} 5251 ins_pipe(ialu_reg); 5252 %} 5253 5254 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5255 predicate(!UseCountLeadingZerosInstruction); 5256 match(Set dst (CountLeadingZerosL src)); 5257 effect(TEMP dst, KILL cr); 5258 5259 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5260 "JZ msw_is_zero\n\t" 5261 "ADD $dst, 32\n\t" 5262 "JMP not_zero\n" 5263 "msw_is_zero:\n\t" 5264 "BSR $dst, $src.lo\n\t" 5265 "JNZ not_zero\n\t" 5266 "MOV $dst, -1\n" 5267 "not_zero:\n\t" 5268 "NEG $dst\n\t" 5269 "ADD $dst, 63\n" %} 5270 ins_encode %{ 5271 Register Rdst = $dst$$Register; 5272 Register Rsrc = $src$$Register; 5273 Label msw_is_zero; 5274 Label not_zero; 5275 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5276 __ jccb(Assembler::zero, msw_is_zero); 5277 __ addl(Rdst, BitsPerInt); 5278 __ jmpb(not_zero); 5279 __ bind(msw_is_zero); 5280 __ bsrl(Rdst, Rsrc); 5281 __ jccb(Assembler::notZero, not_zero); 5282 __ movl(Rdst, -1); 5283 __ bind(not_zero); 5284 __ negl(Rdst); 5285 __ addl(Rdst, BitsPerLong - 1); 5286 %} 5287 ins_pipe(ialu_reg); 5288 %} 5289 5290 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5291 predicate(UseCountTrailingZerosInstruction); 5292 match(Set dst (CountTrailingZerosI src)); 5293 effect(KILL cr); 5294 5295 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5296 ins_encode %{ 5297 __ tzcntl($dst$$Register, $src$$Register); 5298 %} 5299 ins_pipe(ialu_reg); 5300 %} 5301 5302 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5303 predicate(!UseCountTrailingZerosInstruction); 5304 match(Set dst (CountTrailingZerosI src)); 5305 effect(KILL cr); 5306 5307 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5308 "JNZ done\n\t" 5309 "MOV $dst, 32\n" 5310 "done:" %} 5311 ins_encode %{ 5312 Register Rdst = $dst$$Register; 5313 Label done; 5314 __ bsfl(Rdst, $src$$Register); 5315 __ jccb(Assembler::notZero, done); 5316 __ movl(Rdst, BitsPerInt); 5317 __ bind(done); 5318 %} 5319 ins_pipe(ialu_reg); 5320 %} 5321 5322 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5323 predicate(UseCountTrailingZerosInstruction); 5324 match(Set dst (CountTrailingZerosL src)); 5325 effect(TEMP dst, KILL cr); 5326 5327 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5328 "JNC done\n\t" 5329 "TZCNT $dst, $src.hi\n\t" 5330 "ADD $dst, 32\n" 5331 "done:" %} 5332 ins_encode %{ 5333 Register Rdst = $dst$$Register; 5334 Register Rsrc = $src$$Register; 5335 Label done; 5336 __ tzcntl(Rdst, Rsrc); 5337 __ jccb(Assembler::carryClear, done); 5338 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5339 __ addl(Rdst, BitsPerInt); 5340 __ bind(done); 5341 %} 5342 ins_pipe(ialu_reg); 5343 %} 5344 5345 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5346 predicate(!UseCountTrailingZerosInstruction); 5347 match(Set dst (CountTrailingZerosL src)); 5348 effect(TEMP dst, KILL cr); 5349 5350 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5351 "JNZ done\n\t" 5352 "BSF $dst, $src.hi\n\t" 5353 "JNZ msw_not_zero\n\t" 5354 "MOV $dst, 32\n" 5355 "msw_not_zero:\n\t" 5356 "ADD $dst, 32\n" 5357 "done:" %} 5358 ins_encode %{ 5359 Register Rdst = $dst$$Register; 5360 Register Rsrc = $src$$Register; 5361 Label msw_not_zero; 5362 Label done; 5363 __ bsfl(Rdst, Rsrc); 5364 __ jccb(Assembler::notZero, done); 5365 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5366 __ jccb(Assembler::notZero, msw_not_zero); 5367 __ movl(Rdst, BitsPerInt); 5368 __ bind(msw_not_zero); 5369 __ addl(Rdst, BitsPerInt); 5370 __ bind(done); 5371 %} 5372 ins_pipe(ialu_reg); 5373 %} 5374 5375 5376 //---------- Population Count Instructions ------------------------------------- 5377 5378 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5379 predicate(UsePopCountInstruction); 5380 match(Set dst (PopCountI src)); 5381 effect(KILL cr); 5382 5383 format %{ "POPCNT $dst, $src" %} 5384 ins_encode %{ 5385 __ popcntl($dst$$Register, $src$$Register); 5386 %} 5387 ins_pipe(ialu_reg); 5388 %} 5389 5390 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5391 predicate(UsePopCountInstruction); 5392 match(Set dst (PopCountI (LoadI mem))); 5393 effect(KILL cr); 5394 5395 format %{ "POPCNT $dst, $mem" %} 5396 ins_encode %{ 5397 __ popcntl($dst$$Register, $mem$$Address); 5398 %} 5399 ins_pipe(ialu_reg); 5400 %} 5401 5402 // Note: Long.bitCount(long) returns an int. 5403 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5404 predicate(UsePopCountInstruction); 5405 match(Set dst (PopCountL src)); 5406 effect(KILL cr, TEMP tmp, TEMP dst); 5407 5408 format %{ "POPCNT $dst, $src.lo\n\t" 5409 "POPCNT $tmp, $src.hi\n\t" 5410 "ADD $dst, $tmp" %} 5411 ins_encode %{ 5412 __ popcntl($dst$$Register, $src$$Register); 5413 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5414 __ addl($dst$$Register, $tmp$$Register); 5415 %} 5416 ins_pipe(ialu_reg); 5417 %} 5418 5419 // Note: Long.bitCount(long) returns an int. 5420 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5421 predicate(UsePopCountInstruction); 5422 match(Set dst (PopCountL (LoadL mem))); 5423 effect(KILL cr, TEMP tmp, TEMP dst); 5424 5425 format %{ "POPCNT $dst, $mem\n\t" 5426 "POPCNT $tmp, $mem+4\n\t" 5427 "ADD $dst, $tmp" %} 5428 ins_encode %{ 5429 //__ popcntl($dst$$Register, $mem$$Address$$first); 5430 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5431 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5432 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5433 __ addl($dst$$Register, $tmp$$Register); 5434 %} 5435 ins_pipe(ialu_reg); 5436 %} 5437 5438 5439 //----------Load/Store/Move Instructions--------------------------------------- 5440 //----------Load Instructions-------------------------------------------------- 5441 // Load Byte (8bit signed) 5442 instruct loadB(xRegI dst, memory mem) %{ 5443 match(Set dst (LoadB mem)); 5444 5445 ins_cost(125); 5446 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5447 5448 ins_encode %{ 5449 __ movsbl($dst$$Register, $mem$$Address); 5450 %} 5451 5452 ins_pipe(ialu_reg_mem); 5453 %} 5454 5455 // Load Byte (8bit signed) into Long Register 5456 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5457 match(Set dst (ConvI2L (LoadB mem))); 5458 effect(KILL cr); 5459 5460 ins_cost(375); 5461 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5462 "MOV $dst.hi,$dst.lo\n\t" 5463 "SAR $dst.hi,7" %} 5464 5465 ins_encode %{ 5466 __ movsbl($dst$$Register, $mem$$Address); 5467 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5468 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5469 %} 5470 5471 ins_pipe(ialu_reg_mem); 5472 %} 5473 5474 // Load Unsigned Byte (8bit UNsigned) 5475 instruct loadUB(xRegI dst, memory mem) %{ 5476 match(Set dst (LoadUB mem)); 5477 5478 ins_cost(125); 5479 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5480 5481 ins_encode %{ 5482 __ movzbl($dst$$Register, $mem$$Address); 5483 %} 5484 5485 ins_pipe(ialu_reg_mem); 5486 %} 5487 5488 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5489 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5490 match(Set dst (ConvI2L (LoadUB mem))); 5491 effect(KILL cr); 5492 5493 ins_cost(250); 5494 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5495 "XOR $dst.hi,$dst.hi" %} 5496 5497 ins_encode %{ 5498 Register Rdst = $dst$$Register; 5499 __ movzbl(Rdst, $mem$$Address); 5500 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5501 %} 5502 5503 ins_pipe(ialu_reg_mem); 5504 %} 5505 5506 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5507 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5508 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5509 effect(KILL cr); 5510 5511 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5512 "XOR $dst.hi,$dst.hi\n\t" 5513 "AND $dst.lo,right_n_bits($mask, 8)" %} 5514 ins_encode %{ 5515 Register Rdst = $dst$$Register; 5516 __ movzbl(Rdst, $mem$$Address); 5517 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5518 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5519 %} 5520 ins_pipe(ialu_reg_mem); 5521 %} 5522 5523 // Load Short (16bit signed) 5524 instruct loadS(rRegI dst, memory mem) %{ 5525 match(Set dst (LoadS mem)); 5526 5527 ins_cost(125); 5528 format %{ "MOVSX $dst,$mem\t# short" %} 5529 5530 ins_encode %{ 5531 __ movswl($dst$$Register, $mem$$Address); 5532 %} 5533 5534 ins_pipe(ialu_reg_mem); 5535 %} 5536 5537 // Load Short (16 bit signed) to Byte (8 bit signed) 5538 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5539 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5540 5541 ins_cost(125); 5542 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5543 ins_encode %{ 5544 __ movsbl($dst$$Register, $mem$$Address); 5545 %} 5546 ins_pipe(ialu_reg_mem); 5547 %} 5548 5549 // Load Short (16bit signed) into Long Register 5550 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5551 match(Set dst (ConvI2L (LoadS mem))); 5552 effect(KILL cr); 5553 5554 ins_cost(375); 5555 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5556 "MOV $dst.hi,$dst.lo\n\t" 5557 "SAR $dst.hi,15" %} 5558 5559 ins_encode %{ 5560 __ movswl($dst$$Register, $mem$$Address); 5561 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5562 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5563 %} 5564 5565 ins_pipe(ialu_reg_mem); 5566 %} 5567 5568 // Load Unsigned Short/Char (16bit unsigned) 5569 instruct loadUS(rRegI dst, memory mem) %{ 5570 match(Set dst (LoadUS mem)); 5571 5572 ins_cost(125); 5573 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5574 5575 ins_encode %{ 5576 __ movzwl($dst$$Register, $mem$$Address); 5577 %} 5578 5579 ins_pipe(ialu_reg_mem); 5580 %} 5581 5582 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5583 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5584 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5585 5586 ins_cost(125); 5587 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5588 ins_encode %{ 5589 __ movsbl($dst$$Register, $mem$$Address); 5590 %} 5591 ins_pipe(ialu_reg_mem); 5592 %} 5593 5594 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5595 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5596 match(Set dst (ConvI2L (LoadUS mem))); 5597 effect(KILL cr); 5598 5599 ins_cost(250); 5600 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5601 "XOR $dst.hi,$dst.hi" %} 5602 5603 ins_encode %{ 5604 __ movzwl($dst$$Register, $mem$$Address); 5605 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5606 %} 5607 5608 ins_pipe(ialu_reg_mem); 5609 %} 5610 5611 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5612 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5613 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5614 effect(KILL cr); 5615 5616 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5617 "XOR $dst.hi,$dst.hi" %} 5618 ins_encode %{ 5619 Register Rdst = $dst$$Register; 5620 __ movzbl(Rdst, $mem$$Address); 5621 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5622 %} 5623 ins_pipe(ialu_reg_mem); 5624 %} 5625 5626 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5627 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5628 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5629 effect(KILL cr); 5630 5631 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5632 "XOR $dst.hi,$dst.hi\n\t" 5633 "AND $dst.lo,right_n_bits($mask, 16)" %} 5634 ins_encode %{ 5635 Register Rdst = $dst$$Register; 5636 __ movzwl(Rdst, $mem$$Address); 5637 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5638 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5639 %} 5640 ins_pipe(ialu_reg_mem); 5641 %} 5642 5643 // Load Integer 5644 instruct loadI(rRegI dst, memory mem) %{ 5645 match(Set dst (LoadI mem)); 5646 5647 ins_cost(125); 5648 format %{ "MOV $dst,$mem\t# int" %} 5649 5650 ins_encode %{ 5651 __ movl($dst$$Register, $mem$$Address); 5652 %} 5653 5654 ins_pipe(ialu_reg_mem); 5655 %} 5656 5657 // Load Integer (32 bit signed) to Byte (8 bit signed) 5658 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5659 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5660 5661 ins_cost(125); 5662 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5663 ins_encode %{ 5664 __ movsbl($dst$$Register, $mem$$Address); 5665 %} 5666 ins_pipe(ialu_reg_mem); 5667 %} 5668 5669 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5670 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5671 match(Set dst (AndI (LoadI mem) mask)); 5672 5673 ins_cost(125); 5674 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5675 ins_encode %{ 5676 __ movzbl($dst$$Register, $mem$$Address); 5677 %} 5678 ins_pipe(ialu_reg_mem); 5679 %} 5680 5681 // Load Integer (32 bit signed) to Short (16 bit signed) 5682 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5683 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5684 5685 ins_cost(125); 5686 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5687 ins_encode %{ 5688 __ movswl($dst$$Register, $mem$$Address); 5689 %} 5690 ins_pipe(ialu_reg_mem); 5691 %} 5692 5693 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5694 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5695 match(Set dst (AndI (LoadI mem) mask)); 5696 5697 ins_cost(125); 5698 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5699 ins_encode %{ 5700 __ movzwl($dst$$Register, $mem$$Address); 5701 %} 5702 ins_pipe(ialu_reg_mem); 5703 %} 5704 5705 // Load Integer into Long Register 5706 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5707 match(Set dst (ConvI2L (LoadI mem))); 5708 effect(KILL cr); 5709 5710 ins_cost(375); 5711 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5712 "MOV $dst.hi,$dst.lo\n\t" 5713 "SAR $dst.hi,31" %} 5714 5715 ins_encode %{ 5716 __ movl($dst$$Register, $mem$$Address); 5717 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5718 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5719 %} 5720 5721 ins_pipe(ialu_reg_mem); 5722 %} 5723 5724 // Load Integer with mask 0xFF into Long Register 5725 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5726 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5727 effect(KILL cr); 5728 5729 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5730 "XOR $dst.hi,$dst.hi" %} 5731 ins_encode %{ 5732 Register Rdst = $dst$$Register; 5733 __ movzbl(Rdst, $mem$$Address); 5734 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5735 %} 5736 ins_pipe(ialu_reg_mem); 5737 %} 5738 5739 // Load Integer with mask 0xFFFF into Long Register 5740 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5741 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5742 effect(KILL cr); 5743 5744 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5745 "XOR $dst.hi,$dst.hi" %} 5746 ins_encode %{ 5747 Register Rdst = $dst$$Register; 5748 __ movzwl(Rdst, $mem$$Address); 5749 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5750 %} 5751 ins_pipe(ialu_reg_mem); 5752 %} 5753 5754 // Load Integer with 31-bit mask into Long Register 5755 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5756 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5757 effect(KILL cr); 5758 5759 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5760 "XOR $dst.hi,$dst.hi\n\t" 5761 "AND $dst.lo,$mask" %} 5762 ins_encode %{ 5763 Register Rdst = $dst$$Register; 5764 __ movl(Rdst, $mem$$Address); 5765 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5766 __ andl(Rdst, $mask$$constant); 5767 %} 5768 ins_pipe(ialu_reg_mem); 5769 %} 5770 5771 // Load Unsigned Integer into Long Register 5772 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5773 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5774 effect(KILL cr); 5775 5776 ins_cost(250); 5777 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5778 "XOR $dst.hi,$dst.hi" %} 5779 5780 ins_encode %{ 5781 __ movl($dst$$Register, $mem$$Address); 5782 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5783 %} 5784 5785 ins_pipe(ialu_reg_mem); 5786 %} 5787 5788 // Load Long. Cannot clobber address while loading, so restrict address 5789 // register to ESI 5790 instruct loadL(eRegL dst, load_long_memory mem) %{ 5791 predicate(!((LoadLNode*)n)->require_atomic_access()); 5792 match(Set dst (LoadL mem)); 5793 5794 ins_cost(250); 5795 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5796 "MOV $dst.hi,$mem+4" %} 5797 5798 ins_encode %{ 5799 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5800 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5801 __ movl($dst$$Register, Amemlo); 5802 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5803 %} 5804 5805 ins_pipe(ialu_reg_long_mem); 5806 %} 5807 5808 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5809 // then store it down to the stack and reload on the int 5810 // side. 5811 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5812 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5813 match(Set dst (LoadL mem)); 5814 5815 ins_cost(200); 5816 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5817 "FISTp $dst" %} 5818 ins_encode(enc_loadL_volatile(mem,dst)); 5819 ins_pipe( fpu_reg_mem ); 5820 %} 5821 5822 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5823 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5824 match(Set dst (LoadL mem)); 5825 effect(TEMP tmp); 5826 ins_cost(180); 5827 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5828 "MOVSD $dst,$tmp" %} 5829 ins_encode %{ 5830 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5831 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5832 %} 5833 ins_pipe( pipe_slow ); 5834 %} 5835 5836 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5837 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5838 match(Set dst (LoadL mem)); 5839 effect(TEMP tmp); 5840 ins_cost(160); 5841 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5842 "MOVD $dst.lo,$tmp\n\t" 5843 "PSRLQ $tmp,32\n\t" 5844 "MOVD $dst.hi,$tmp" %} 5845 ins_encode %{ 5846 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5847 __ movdl($dst$$Register, $tmp$$XMMRegister); 5848 __ psrlq($tmp$$XMMRegister, 32); 5849 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5850 %} 5851 ins_pipe( pipe_slow ); 5852 %} 5853 5854 // Load Range 5855 instruct loadRange(rRegI dst, memory mem) %{ 5856 match(Set dst (LoadRange mem)); 5857 5858 ins_cost(125); 5859 format %{ "MOV $dst,$mem" %} 5860 opcode(0x8B); 5861 ins_encode( OpcP, RegMem(dst,mem)); 5862 ins_pipe( ialu_reg_mem ); 5863 %} 5864 5865 5866 // Load Pointer 5867 instruct loadP(eRegP dst, memory mem) %{ 5868 match(Set dst (LoadP mem)); 5869 5870 ins_cost(125); 5871 format %{ "MOV $dst,$mem" %} 5872 opcode(0x8B); 5873 ins_encode( OpcP, RegMem(dst,mem)); 5874 ins_pipe( ialu_reg_mem ); 5875 %} 5876 5877 // Load Klass Pointer 5878 instruct loadKlass(eRegP dst, memory mem) %{ 5879 match(Set dst (LoadKlass mem)); 5880 5881 ins_cost(125); 5882 format %{ "MOV $dst,$mem" %} 5883 opcode(0x8B); 5884 ins_encode( OpcP, RegMem(dst,mem)); 5885 ins_pipe( ialu_reg_mem ); 5886 %} 5887 5888 // Load Double 5889 instruct loadDPR(regDPR dst, memory mem) %{ 5890 predicate(UseSSE<=1); 5891 match(Set dst (LoadD mem)); 5892 5893 ins_cost(150); 5894 format %{ "FLD_D ST,$mem\n\t" 5895 "FSTP $dst" %} 5896 opcode(0xDD); /* DD /0 */ 5897 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5898 Pop_Reg_DPR(dst) ); 5899 ins_pipe( fpu_reg_mem ); 5900 %} 5901 5902 // Load Double to XMM 5903 instruct loadD(regD dst, memory mem) %{ 5904 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5905 match(Set dst (LoadD mem)); 5906 ins_cost(145); 5907 format %{ "MOVSD $dst,$mem" %} 5908 ins_encode %{ 5909 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5910 %} 5911 ins_pipe( pipe_slow ); 5912 %} 5913 5914 instruct loadD_partial(regD dst, memory mem) %{ 5915 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5916 match(Set dst (LoadD mem)); 5917 ins_cost(145); 5918 format %{ "MOVLPD $dst,$mem" %} 5919 ins_encode %{ 5920 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5921 %} 5922 ins_pipe( pipe_slow ); 5923 %} 5924 5925 // Load to XMM register (single-precision floating point) 5926 // MOVSS instruction 5927 instruct loadF(regF dst, memory mem) %{ 5928 predicate(UseSSE>=1); 5929 match(Set dst (LoadF mem)); 5930 ins_cost(145); 5931 format %{ "MOVSS $dst,$mem" %} 5932 ins_encode %{ 5933 __ movflt ($dst$$XMMRegister, $mem$$Address); 5934 %} 5935 ins_pipe( pipe_slow ); 5936 %} 5937 5938 // Load Float 5939 instruct loadFPR(regFPR dst, memory mem) %{ 5940 predicate(UseSSE==0); 5941 match(Set dst (LoadF mem)); 5942 5943 ins_cost(150); 5944 format %{ "FLD_S ST,$mem\n\t" 5945 "FSTP $dst" %} 5946 opcode(0xD9); /* D9 /0 */ 5947 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5948 Pop_Reg_FPR(dst) ); 5949 ins_pipe( fpu_reg_mem ); 5950 %} 5951 5952 // Load Effective Address 5953 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5954 match(Set dst mem); 5955 5956 ins_cost(110); 5957 format %{ "LEA $dst,$mem" %} 5958 opcode(0x8D); 5959 ins_encode( OpcP, RegMem(dst,mem)); 5960 ins_pipe( ialu_reg_reg_fat ); 5961 %} 5962 5963 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5964 match(Set dst mem); 5965 5966 ins_cost(110); 5967 format %{ "LEA $dst,$mem" %} 5968 opcode(0x8D); 5969 ins_encode( OpcP, RegMem(dst,mem)); 5970 ins_pipe( ialu_reg_reg_fat ); 5971 %} 5972 5973 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5974 match(Set dst mem); 5975 5976 ins_cost(110); 5977 format %{ "LEA $dst,$mem" %} 5978 opcode(0x8D); 5979 ins_encode( OpcP, RegMem(dst,mem)); 5980 ins_pipe( ialu_reg_reg_fat ); 5981 %} 5982 5983 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5984 match(Set dst mem); 5985 5986 ins_cost(110); 5987 format %{ "LEA $dst,$mem" %} 5988 opcode(0x8D); 5989 ins_encode( OpcP, RegMem(dst,mem)); 5990 ins_pipe( ialu_reg_reg_fat ); 5991 %} 5992 5993 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5994 match(Set dst mem); 5995 5996 ins_cost(110); 5997 format %{ "LEA $dst,$mem" %} 5998 opcode(0x8D); 5999 ins_encode( OpcP, RegMem(dst,mem)); 6000 ins_pipe( ialu_reg_reg_fat ); 6001 %} 6002 6003 // Load Constant 6004 instruct loadConI(rRegI dst, immI src) %{ 6005 match(Set dst src); 6006 6007 format %{ "MOV $dst,$src" %} 6008 ins_encode( LdImmI(dst, src) ); 6009 ins_pipe( ialu_reg_fat ); 6010 %} 6011 6012 // Load Constant zero 6013 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 6014 match(Set dst src); 6015 effect(KILL cr); 6016 6017 ins_cost(50); 6018 format %{ "XOR $dst,$dst" %} 6019 opcode(0x33); /* + rd */ 6020 ins_encode( OpcP, RegReg( dst, dst ) ); 6021 ins_pipe( ialu_reg ); 6022 %} 6023 6024 instruct loadConP(eRegP dst, immP src) %{ 6025 match(Set dst src); 6026 6027 format %{ "MOV $dst,$src" %} 6028 opcode(0xB8); /* + rd */ 6029 ins_encode( LdImmP(dst, src) ); 6030 ins_pipe( ialu_reg_fat ); 6031 %} 6032 6033 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6034 match(Set dst src); 6035 effect(KILL cr); 6036 ins_cost(200); 6037 format %{ "MOV $dst.lo,$src.lo\n\t" 6038 "MOV $dst.hi,$src.hi" %} 6039 opcode(0xB8); 6040 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6041 ins_pipe( ialu_reg_long_fat ); 6042 %} 6043 6044 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6045 match(Set dst src); 6046 effect(KILL cr); 6047 ins_cost(150); 6048 format %{ "XOR $dst.lo,$dst.lo\n\t" 6049 "XOR $dst.hi,$dst.hi" %} 6050 opcode(0x33,0x33); 6051 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6052 ins_pipe( ialu_reg_long ); 6053 %} 6054 6055 // The instruction usage is guarded by predicate in operand immFPR(). 6056 instruct loadConFPR(regFPR dst, immFPR con) %{ 6057 match(Set dst con); 6058 ins_cost(125); 6059 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6060 "FSTP $dst" %} 6061 ins_encode %{ 6062 __ fld_s($constantaddress($con)); 6063 __ fstp_d($dst$$reg); 6064 %} 6065 ins_pipe(fpu_reg_con); 6066 %} 6067 6068 // The instruction usage is guarded by predicate in operand immFPR0(). 6069 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6070 match(Set dst con); 6071 ins_cost(125); 6072 format %{ "FLDZ ST\n\t" 6073 "FSTP $dst" %} 6074 ins_encode %{ 6075 __ fldz(); 6076 __ fstp_d($dst$$reg); 6077 %} 6078 ins_pipe(fpu_reg_con); 6079 %} 6080 6081 // The instruction usage is guarded by predicate in operand immFPR1(). 6082 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6083 match(Set dst con); 6084 ins_cost(125); 6085 format %{ "FLD1 ST\n\t" 6086 "FSTP $dst" %} 6087 ins_encode %{ 6088 __ fld1(); 6089 __ fstp_d($dst$$reg); 6090 %} 6091 ins_pipe(fpu_reg_con); 6092 %} 6093 6094 // The instruction usage is guarded by predicate in operand immF(). 6095 instruct loadConF(regF dst, immF con) %{ 6096 match(Set dst con); 6097 ins_cost(125); 6098 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6099 ins_encode %{ 6100 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6101 %} 6102 ins_pipe(pipe_slow); 6103 %} 6104 6105 // The instruction usage is guarded by predicate in operand immF0(). 6106 instruct loadConF0(regF dst, immF0 src) %{ 6107 match(Set dst src); 6108 ins_cost(100); 6109 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6110 ins_encode %{ 6111 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6112 %} 6113 ins_pipe(pipe_slow); 6114 %} 6115 6116 // The instruction usage is guarded by predicate in operand immDPR(). 6117 instruct loadConDPR(regDPR dst, immDPR con) %{ 6118 match(Set dst con); 6119 ins_cost(125); 6120 6121 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6122 "FSTP $dst" %} 6123 ins_encode %{ 6124 __ fld_d($constantaddress($con)); 6125 __ fstp_d($dst$$reg); 6126 %} 6127 ins_pipe(fpu_reg_con); 6128 %} 6129 6130 // The instruction usage is guarded by predicate in operand immDPR0(). 6131 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6132 match(Set dst con); 6133 ins_cost(125); 6134 6135 format %{ "FLDZ ST\n\t" 6136 "FSTP $dst" %} 6137 ins_encode %{ 6138 __ fldz(); 6139 __ fstp_d($dst$$reg); 6140 %} 6141 ins_pipe(fpu_reg_con); 6142 %} 6143 6144 // The instruction usage is guarded by predicate in operand immDPR1(). 6145 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6146 match(Set dst con); 6147 ins_cost(125); 6148 6149 format %{ "FLD1 ST\n\t" 6150 "FSTP $dst" %} 6151 ins_encode %{ 6152 __ fld1(); 6153 __ fstp_d($dst$$reg); 6154 %} 6155 ins_pipe(fpu_reg_con); 6156 %} 6157 6158 // The instruction usage is guarded by predicate in operand immD(). 6159 instruct loadConD(regD dst, immD con) %{ 6160 match(Set dst con); 6161 ins_cost(125); 6162 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6163 ins_encode %{ 6164 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6165 %} 6166 ins_pipe(pipe_slow); 6167 %} 6168 6169 // The instruction usage is guarded by predicate in operand immD0(). 6170 instruct loadConD0(regD dst, immD0 src) %{ 6171 match(Set dst src); 6172 ins_cost(100); 6173 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6174 ins_encode %{ 6175 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6176 %} 6177 ins_pipe( pipe_slow ); 6178 %} 6179 6180 // Load Stack Slot 6181 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6182 match(Set dst src); 6183 ins_cost(125); 6184 6185 format %{ "MOV $dst,$src" %} 6186 opcode(0x8B); 6187 ins_encode( OpcP, RegMem(dst,src)); 6188 ins_pipe( ialu_reg_mem ); 6189 %} 6190 6191 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6192 match(Set dst src); 6193 6194 ins_cost(200); 6195 format %{ "MOV $dst,$src.lo\n\t" 6196 "MOV $dst+4,$src.hi" %} 6197 opcode(0x8B, 0x8B); 6198 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6199 ins_pipe( ialu_mem_long_reg ); 6200 %} 6201 6202 // Load Stack Slot 6203 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6204 match(Set dst src); 6205 ins_cost(125); 6206 6207 format %{ "MOV $dst,$src" %} 6208 opcode(0x8B); 6209 ins_encode( OpcP, RegMem(dst,src)); 6210 ins_pipe( ialu_reg_mem ); 6211 %} 6212 6213 // Load Stack Slot 6214 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6215 match(Set dst src); 6216 ins_cost(125); 6217 6218 format %{ "FLD_S $src\n\t" 6219 "FSTP $dst" %} 6220 opcode(0xD9); /* D9 /0, FLD m32real */ 6221 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6222 Pop_Reg_FPR(dst) ); 6223 ins_pipe( fpu_reg_mem ); 6224 %} 6225 6226 // Load Stack Slot 6227 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6228 match(Set dst src); 6229 ins_cost(125); 6230 6231 format %{ "FLD_D $src\n\t" 6232 "FSTP $dst" %} 6233 opcode(0xDD); /* DD /0, FLD m64real */ 6234 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6235 Pop_Reg_DPR(dst) ); 6236 ins_pipe( fpu_reg_mem ); 6237 %} 6238 6239 // Prefetch instructions for allocation. 6240 // Must be safe to execute with invalid address (cannot fault). 6241 6242 instruct prefetchAlloc0( memory mem ) %{ 6243 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6244 match(PrefetchAllocation mem); 6245 ins_cost(0); 6246 size(0); 6247 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6248 ins_encode(); 6249 ins_pipe(empty); 6250 %} 6251 6252 instruct prefetchAlloc( memory mem ) %{ 6253 predicate(AllocatePrefetchInstr==3); 6254 match( PrefetchAllocation mem ); 6255 ins_cost(100); 6256 6257 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6258 ins_encode %{ 6259 __ prefetchw($mem$$Address); 6260 %} 6261 ins_pipe(ialu_mem); 6262 %} 6263 6264 instruct prefetchAllocNTA( memory mem ) %{ 6265 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6266 match(PrefetchAllocation mem); 6267 ins_cost(100); 6268 6269 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6270 ins_encode %{ 6271 __ prefetchnta($mem$$Address); 6272 %} 6273 ins_pipe(ialu_mem); 6274 %} 6275 6276 instruct prefetchAllocT0( memory mem ) %{ 6277 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6278 match(PrefetchAllocation mem); 6279 ins_cost(100); 6280 6281 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6282 ins_encode %{ 6283 __ prefetcht0($mem$$Address); 6284 %} 6285 ins_pipe(ialu_mem); 6286 %} 6287 6288 instruct prefetchAllocT2( memory mem ) %{ 6289 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6290 match(PrefetchAllocation mem); 6291 ins_cost(100); 6292 6293 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6294 ins_encode %{ 6295 __ prefetcht2($mem$$Address); 6296 %} 6297 ins_pipe(ialu_mem); 6298 %} 6299 6300 //----------Store Instructions------------------------------------------------- 6301 6302 // Store Byte 6303 instruct storeB(memory mem, xRegI src) %{ 6304 match(Set mem (StoreB mem src)); 6305 6306 ins_cost(125); 6307 format %{ "MOV8 $mem,$src" %} 6308 opcode(0x88); 6309 ins_encode( OpcP, RegMem( src, mem ) ); 6310 ins_pipe( ialu_mem_reg ); 6311 %} 6312 6313 // Store Char/Short 6314 instruct storeC(memory mem, rRegI src) %{ 6315 match(Set mem (StoreC mem src)); 6316 6317 ins_cost(125); 6318 format %{ "MOV16 $mem,$src" %} 6319 opcode(0x89, 0x66); 6320 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6321 ins_pipe( ialu_mem_reg ); 6322 %} 6323 6324 // Store Integer 6325 instruct storeI(memory mem, rRegI src) %{ 6326 match(Set mem (StoreI mem src)); 6327 6328 ins_cost(125); 6329 format %{ "MOV $mem,$src" %} 6330 opcode(0x89); 6331 ins_encode( OpcP, RegMem( src, mem ) ); 6332 ins_pipe( ialu_mem_reg ); 6333 %} 6334 6335 // Store Long 6336 instruct storeL(long_memory mem, eRegL src) %{ 6337 predicate(!((StoreLNode*)n)->require_atomic_access()); 6338 match(Set mem (StoreL mem src)); 6339 6340 ins_cost(200); 6341 format %{ "MOV $mem,$src.lo\n\t" 6342 "MOV $mem+4,$src.hi" %} 6343 opcode(0x89, 0x89); 6344 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6345 ins_pipe( ialu_mem_long_reg ); 6346 %} 6347 6348 // Store Long to Integer 6349 instruct storeL2I(memory mem, eRegL src) %{ 6350 match(Set mem (StoreI mem (ConvL2I src))); 6351 6352 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6353 ins_encode %{ 6354 __ movl($mem$$Address, $src$$Register); 6355 %} 6356 ins_pipe(ialu_mem_reg); 6357 %} 6358 6359 // Volatile Store Long. Must be atomic, so move it into 6360 // the FP TOS and then do a 64-bit FIST. Has to probe the 6361 // target address before the store (for null-ptr checks) 6362 // so the memory operand is used twice in the encoding. 6363 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6364 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6365 match(Set mem (StoreL mem src)); 6366 effect( KILL cr ); 6367 ins_cost(400); 6368 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6369 "FILD $src\n\t" 6370 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6371 opcode(0x3B); 6372 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6373 ins_pipe( fpu_reg_mem ); 6374 %} 6375 6376 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6377 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6378 match(Set mem (StoreL mem src)); 6379 effect( TEMP tmp, KILL cr ); 6380 ins_cost(380); 6381 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6382 "MOVSD $tmp,$src\n\t" 6383 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6384 ins_encode %{ 6385 __ cmpl(rax, $mem$$Address); 6386 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6387 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6388 %} 6389 ins_pipe( pipe_slow ); 6390 %} 6391 6392 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6393 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6394 match(Set mem (StoreL mem src)); 6395 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6396 ins_cost(360); 6397 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6398 "MOVD $tmp,$src.lo\n\t" 6399 "MOVD $tmp2,$src.hi\n\t" 6400 "PUNPCKLDQ $tmp,$tmp2\n\t" 6401 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6402 ins_encode %{ 6403 __ cmpl(rax, $mem$$Address); 6404 __ movdl($tmp$$XMMRegister, $src$$Register); 6405 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6406 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6407 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6408 %} 6409 ins_pipe( pipe_slow ); 6410 %} 6411 6412 // Store Pointer; for storing unknown oops and raw pointers 6413 instruct storeP(memory mem, anyRegP src) %{ 6414 match(Set mem (StoreP mem src)); 6415 6416 ins_cost(125); 6417 format %{ "MOV $mem,$src" %} 6418 opcode(0x89); 6419 ins_encode( OpcP, RegMem( src, mem ) ); 6420 ins_pipe( ialu_mem_reg ); 6421 %} 6422 6423 // Store Integer Immediate 6424 instruct storeImmI(memory mem, immI src) %{ 6425 match(Set mem (StoreI mem src)); 6426 6427 ins_cost(150); 6428 format %{ "MOV $mem,$src" %} 6429 opcode(0xC7); /* C7 /0 */ 6430 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6431 ins_pipe( ialu_mem_imm ); 6432 %} 6433 6434 // Store Short/Char Immediate 6435 instruct storeImmI16(memory mem, immI16 src) %{ 6436 predicate(UseStoreImmI16); 6437 match(Set mem (StoreC mem src)); 6438 6439 ins_cost(150); 6440 format %{ "MOV16 $mem,$src" %} 6441 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6442 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6443 ins_pipe( ialu_mem_imm ); 6444 %} 6445 6446 // Store Pointer Immediate; null pointers or constant oops that do not 6447 // need card-mark barriers. 6448 instruct storeImmP(memory mem, immP src) %{ 6449 match(Set mem (StoreP mem src)); 6450 6451 ins_cost(150); 6452 format %{ "MOV $mem,$src" %} 6453 opcode(0xC7); /* C7 /0 */ 6454 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6455 ins_pipe( ialu_mem_imm ); 6456 %} 6457 6458 // Store Byte Immediate 6459 instruct storeImmB(memory mem, immI8 src) %{ 6460 match(Set mem (StoreB mem src)); 6461 6462 ins_cost(150); 6463 format %{ "MOV8 $mem,$src" %} 6464 opcode(0xC6); /* C6 /0 */ 6465 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6466 ins_pipe( ialu_mem_imm ); 6467 %} 6468 6469 // Store CMS card-mark Immediate 6470 instruct storeImmCM(memory mem, immI8 src) %{ 6471 match(Set mem (StoreCM mem src)); 6472 6473 ins_cost(150); 6474 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6475 opcode(0xC6); /* C6 /0 */ 6476 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6477 ins_pipe( ialu_mem_imm ); 6478 %} 6479 6480 // Store Double 6481 instruct storeDPR( memory mem, regDPR1 src) %{ 6482 predicate(UseSSE<=1); 6483 match(Set mem (StoreD mem src)); 6484 6485 ins_cost(100); 6486 format %{ "FST_D $mem,$src" %} 6487 opcode(0xDD); /* DD /2 */ 6488 ins_encode( enc_FPR_store(mem,src) ); 6489 ins_pipe( fpu_mem_reg ); 6490 %} 6491 6492 // Store double does rounding on x86 6493 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6494 predicate(UseSSE<=1); 6495 match(Set mem (StoreD mem (RoundDouble src))); 6496 6497 ins_cost(100); 6498 format %{ "FST_D $mem,$src\t# round" %} 6499 opcode(0xDD); /* DD /2 */ 6500 ins_encode( enc_FPR_store(mem,src) ); 6501 ins_pipe( fpu_mem_reg ); 6502 %} 6503 6504 // Store XMM register to memory (double-precision floating points) 6505 // MOVSD instruction 6506 instruct storeD(memory mem, regD src) %{ 6507 predicate(UseSSE>=2); 6508 match(Set mem (StoreD mem src)); 6509 ins_cost(95); 6510 format %{ "MOVSD $mem,$src" %} 6511 ins_encode %{ 6512 __ movdbl($mem$$Address, $src$$XMMRegister); 6513 %} 6514 ins_pipe( pipe_slow ); 6515 %} 6516 6517 // Store XMM register to memory (single-precision floating point) 6518 // MOVSS instruction 6519 instruct storeF(memory mem, regF src) %{ 6520 predicate(UseSSE>=1); 6521 match(Set mem (StoreF mem src)); 6522 ins_cost(95); 6523 format %{ "MOVSS $mem,$src" %} 6524 ins_encode %{ 6525 __ movflt($mem$$Address, $src$$XMMRegister); 6526 %} 6527 ins_pipe( pipe_slow ); 6528 %} 6529 6530 6531 // Store Float 6532 instruct storeFPR( memory mem, regFPR1 src) %{ 6533 predicate(UseSSE==0); 6534 match(Set mem (StoreF mem src)); 6535 6536 ins_cost(100); 6537 format %{ "FST_S $mem,$src" %} 6538 opcode(0xD9); /* D9 /2 */ 6539 ins_encode( enc_FPR_store(mem,src) ); 6540 ins_pipe( fpu_mem_reg ); 6541 %} 6542 6543 // Store Float does rounding on x86 6544 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6545 predicate(UseSSE==0); 6546 match(Set mem (StoreF mem (RoundFloat src))); 6547 6548 ins_cost(100); 6549 format %{ "FST_S $mem,$src\t# round" %} 6550 opcode(0xD9); /* D9 /2 */ 6551 ins_encode( enc_FPR_store(mem,src) ); 6552 ins_pipe( fpu_mem_reg ); 6553 %} 6554 6555 // Store Float does rounding on x86 6556 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6557 predicate(UseSSE<=1); 6558 match(Set mem (StoreF mem (ConvD2F src))); 6559 6560 ins_cost(100); 6561 format %{ "FST_S $mem,$src\t# D-round" %} 6562 opcode(0xD9); /* D9 /2 */ 6563 ins_encode( enc_FPR_store(mem,src) ); 6564 ins_pipe( fpu_mem_reg ); 6565 %} 6566 6567 // Store immediate Float value (it is faster than store from FPU register) 6568 // The instruction usage is guarded by predicate in operand immFPR(). 6569 instruct storeFPR_imm( memory mem, immFPR src) %{ 6570 match(Set mem (StoreF mem src)); 6571 6572 ins_cost(50); 6573 format %{ "MOV $mem,$src\t# store float" %} 6574 opcode(0xC7); /* C7 /0 */ 6575 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6576 ins_pipe( ialu_mem_imm ); 6577 %} 6578 6579 // Store immediate Float value (it is faster than store from XMM register) 6580 // The instruction usage is guarded by predicate in operand immF(). 6581 instruct storeF_imm( memory mem, immF src) %{ 6582 match(Set mem (StoreF mem src)); 6583 6584 ins_cost(50); 6585 format %{ "MOV $mem,$src\t# store float" %} 6586 opcode(0xC7); /* C7 /0 */ 6587 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6588 ins_pipe( ialu_mem_imm ); 6589 %} 6590 6591 // Store Integer to stack slot 6592 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6593 match(Set dst src); 6594 6595 ins_cost(100); 6596 format %{ "MOV $dst,$src" %} 6597 opcode(0x89); 6598 ins_encode( OpcPRegSS( dst, src ) ); 6599 ins_pipe( ialu_mem_reg ); 6600 %} 6601 6602 // Store Integer to stack slot 6603 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6604 match(Set dst src); 6605 6606 ins_cost(100); 6607 format %{ "MOV $dst,$src" %} 6608 opcode(0x89); 6609 ins_encode( OpcPRegSS( dst, src ) ); 6610 ins_pipe( ialu_mem_reg ); 6611 %} 6612 6613 // Store Long to stack slot 6614 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6615 match(Set dst src); 6616 6617 ins_cost(200); 6618 format %{ "MOV $dst,$src.lo\n\t" 6619 "MOV $dst+4,$src.hi" %} 6620 opcode(0x89, 0x89); 6621 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6622 ins_pipe( ialu_mem_long_reg ); 6623 %} 6624 6625 //----------MemBar Instructions----------------------------------------------- 6626 // Memory barrier flavors 6627 6628 instruct membar_acquire() %{ 6629 match(MemBarAcquire); 6630 match(LoadFence); 6631 ins_cost(400); 6632 6633 size(0); 6634 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6635 ins_encode(); 6636 ins_pipe(empty); 6637 %} 6638 6639 instruct membar_acquire_lock() %{ 6640 match(MemBarAcquireLock); 6641 ins_cost(0); 6642 6643 size(0); 6644 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6645 ins_encode( ); 6646 ins_pipe(empty); 6647 %} 6648 6649 instruct membar_release() %{ 6650 match(MemBarRelease); 6651 match(StoreFence); 6652 ins_cost(400); 6653 6654 size(0); 6655 format %{ "MEMBAR-release ! (empty encoding)" %} 6656 ins_encode( ); 6657 ins_pipe(empty); 6658 %} 6659 6660 instruct membar_release_lock() %{ 6661 match(MemBarReleaseLock); 6662 ins_cost(0); 6663 6664 size(0); 6665 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6666 ins_encode( ); 6667 ins_pipe(empty); 6668 %} 6669 6670 instruct membar_volatile(eFlagsReg cr) %{ 6671 match(MemBarVolatile); 6672 effect(KILL cr); 6673 ins_cost(400); 6674 6675 format %{ 6676 $$template 6677 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6678 %} 6679 ins_encode %{ 6680 __ membar(Assembler::StoreLoad); 6681 %} 6682 ins_pipe(pipe_slow); 6683 %} 6684 6685 instruct unnecessary_membar_volatile() %{ 6686 match(MemBarVolatile); 6687 predicate(Matcher::post_store_load_barrier(n)); 6688 ins_cost(0); 6689 6690 size(0); 6691 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6692 ins_encode( ); 6693 ins_pipe(empty); 6694 %} 6695 6696 instruct membar_storestore() %{ 6697 match(MemBarStoreStore); 6698 match(StoreStoreFence); 6699 ins_cost(0); 6700 6701 size(0); 6702 format %{ "MEMBAR-storestore (empty encoding)" %} 6703 ins_encode( ); 6704 ins_pipe(empty); 6705 %} 6706 6707 //----------Move Instructions-------------------------------------------------- 6708 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6709 match(Set dst (CastX2P src)); 6710 format %{ "# X2P $dst, $src" %} 6711 ins_encode( /*empty encoding*/ ); 6712 ins_cost(0); 6713 ins_pipe(empty); 6714 %} 6715 6716 instruct castP2X(rRegI dst, eRegP src ) %{ 6717 match(Set dst (CastP2X src)); 6718 ins_cost(50); 6719 format %{ "MOV $dst, $src\t# CastP2X" %} 6720 ins_encode( enc_Copy( dst, src) ); 6721 ins_pipe( ialu_reg_reg ); 6722 %} 6723 6724 //----------Conditional Move--------------------------------------------------- 6725 // Conditional move 6726 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6727 predicate(!VM_Version::supports_cmov() ); 6728 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6729 ins_cost(200); 6730 format %{ "J$cop,us skip\t# signed cmove\n\t" 6731 "MOV $dst,$src\n" 6732 "skip:" %} 6733 ins_encode %{ 6734 Label Lskip; 6735 // Invert sense of branch from sense of CMOV 6736 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6737 __ movl($dst$$Register, $src$$Register); 6738 __ bind(Lskip); 6739 %} 6740 ins_pipe( pipe_cmov_reg ); 6741 %} 6742 6743 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6744 predicate(!VM_Version::supports_cmov() ); 6745 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6746 ins_cost(200); 6747 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6748 "MOV $dst,$src\n" 6749 "skip:" %} 6750 ins_encode %{ 6751 Label Lskip; 6752 // Invert sense of branch from sense of CMOV 6753 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6754 __ movl($dst$$Register, $src$$Register); 6755 __ bind(Lskip); 6756 %} 6757 ins_pipe( pipe_cmov_reg ); 6758 %} 6759 6760 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6761 predicate(VM_Version::supports_cmov() ); 6762 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6763 ins_cost(200); 6764 format %{ "CMOV$cop $dst,$src" %} 6765 opcode(0x0F,0x40); 6766 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6767 ins_pipe( pipe_cmov_reg ); 6768 %} 6769 6770 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6771 predicate(VM_Version::supports_cmov() ); 6772 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6773 ins_cost(200); 6774 format %{ "CMOV$cop $dst,$src" %} 6775 opcode(0x0F,0x40); 6776 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6777 ins_pipe( pipe_cmov_reg ); 6778 %} 6779 6780 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6781 predicate(VM_Version::supports_cmov() ); 6782 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6783 ins_cost(200); 6784 expand %{ 6785 cmovI_regU(cop, cr, dst, src); 6786 %} 6787 %} 6788 6789 // Conditional move 6790 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6791 predicate(VM_Version::supports_cmov() ); 6792 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6793 ins_cost(250); 6794 format %{ "CMOV$cop $dst,$src" %} 6795 opcode(0x0F,0x40); 6796 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6797 ins_pipe( pipe_cmov_mem ); 6798 %} 6799 6800 // Conditional move 6801 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6802 predicate(VM_Version::supports_cmov() ); 6803 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6804 ins_cost(250); 6805 format %{ "CMOV$cop $dst,$src" %} 6806 opcode(0x0F,0x40); 6807 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6808 ins_pipe( pipe_cmov_mem ); 6809 %} 6810 6811 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6812 predicate(VM_Version::supports_cmov() ); 6813 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6814 ins_cost(250); 6815 expand %{ 6816 cmovI_memU(cop, cr, dst, src); 6817 %} 6818 %} 6819 6820 // Conditional move 6821 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6822 predicate(VM_Version::supports_cmov() ); 6823 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6824 ins_cost(200); 6825 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6826 opcode(0x0F,0x40); 6827 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6828 ins_pipe( pipe_cmov_reg ); 6829 %} 6830 6831 // Conditional move (non-P6 version) 6832 // Note: a CMoveP is generated for stubs and native wrappers 6833 // regardless of whether we are on a P6, so we 6834 // emulate a cmov here 6835 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6836 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6837 ins_cost(300); 6838 format %{ "Jn$cop skip\n\t" 6839 "MOV $dst,$src\t# pointer\n" 6840 "skip:" %} 6841 opcode(0x8b); 6842 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6843 ins_pipe( pipe_cmov_reg ); 6844 %} 6845 6846 // Conditional move 6847 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6848 predicate(VM_Version::supports_cmov() ); 6849 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6850 ins_cost(200); 6851 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6852 opcode(0x0F,0x40); 6853 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6854 ins_pipe( pipe_cmov_reg ); 6855 %} 6856 6857 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6858 predicate(VM_Version::supports_cmov() ); 6859 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6860 ins_cost(200); 6861 expand %{ 6862 cmovP_regU(cop, cr, dst, src); 6863 %} 6864 %} 6865 6866 // DISABLED: Requires the ADLC to emit a bottom_type call that 6867 // correctly meets the two pointer arguments; one is an incoming 6868 // register but the other is a memory operand. ALSO appears to 6869 // be buggy with implicit null checks. 6870 // 6871 //// Conditional move 6872 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6873 // predicate(VM_Version::supports_cmov() ); 6874 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6875 // ins_cost(250); 6876 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6877 // opcode(0x0F,0x40); 6878 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6879 // ins_pipe( pipe_cmov_mem ); 6880 //%} 6881 // 6882 //// Conditional move 6883 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6884 // predicate(VM_Version::supports_cmov() ); 6885 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6886 // ins_cost(250); 6887 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6888 // opcode(0x0F,0x40); 6889 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6890 // ins_pipe( pipe_cmov_mem ); 6891 //%} 6892 6893 // Conditional move 6894 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6895 predicate(UseSSE<=1); 6896 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6897 ins_cost(200); 6898 format %{ "FCMOV$cop $dst,$src\t# double" %} 6899 opcode(0xDA); 6900 ins_encode( enc_cmov_dpr(cop,src) ); 6901 ins_pipe( pipe_cmovDPR_reg ); 6902 %} 6903 6904 // Conditional move 6905 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6906 predicate(UseSSE==0); 6907 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6908 ins_cost(200); 6909 format %{ "FCMOV$cop $dst,$src\t# float" %} 6910 opcode(0xDA); 6911 ins_encode( enc_cmov_dpr(cop,src) ); 6912 ins_pipe( pipe_cmovDPR_reg ); 6913 %} 6914 6915 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6916 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6917 predicate(UseSSE<=1); 6918 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6919 ins_cost(200); 6920 format %{ "Jn$cop skip\n\t" 6921 "MOV $dst,$src\t# double\n" 6922 "skip:" %} 6923 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6924 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6925 ins_pipe( pipe_cmovDPR_reg ); 6926 %} 6927 6928 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6929 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6930 predicate(UseSSE==0); 6931 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6932 ins_cost(200); 6933 format %{ "Jn$cop skip\n\t" 6934 "MOV $dst,$src\t# float\n" 6935 "skip:" %} 6936 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6937 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6938 ins_pipe( pipe_cmovDPR_reg ); 6939 %} 6940 6941 // No CMOVE with SSE/SSE2 6942 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6943 predicate (UseSSE>=1); 6944 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6945 ins_cost(200); 6946 format %{ "Jn$cop skip\n\t" 6947 "MOVSS $dst,$src\t# float\n" 6948 "skip:" %} 6949 ins_encode %{ 6950 Label skip; 6951 // Invert sense of branch from sense of CMOV 6952 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6953 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6954 __ bind(skip); 6955 %} 6956 ins_pipe( pipe_slow ); 6957 %} 6958 6959 // No CMOVE with SSE/SSE2 6960 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6961 predicate (UseSSE>=2); 6962 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6963 ins_cost(200); 6964 format %{ "Jn$cop skip\n\t" 6965 "MOVSD $dst,$src\t# float\n" 6966 "skip:" %} 6967 ins_encode %{ 6968 Label skip; 6969 // Invert sense of branch from sense of CMOV 6970 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6971 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6972 __ bind(skip); 6973 %} 6974 ins_pipe( pipe_slow ); 6975 %} 6976 6977 // unsigned version 6978 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6979 predicate (UseSSE>=1); 6980 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6981 ins_cost(200); 6982 format %{ "Jn$cop skip\n\t" 6983 "MOVSS $dst,$src\t# float\n" 6984 "skip:" %} 6985 ins_encode %{ 6986 Label skip; 6987 // Invert sense of branch from sense of CMOV 6988 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6989 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6990 __ bind(skip); 6991 %} 6992 ins_pipe( pipe_slow ); 6993 %} 6994 6995 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6996 predicate (UseSSE>=1); 6997 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6998 ins_cost(200); 6999 expand %{ 7000 fcmovF_regU(cop, cr, dst, src); 7001 %} 7002 %} 7003 7004 // unsigned version 7005 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7006 predicate (UseSSE>=2); 7007 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7008 ins_cost(200); 7009 format %{ "Jn$cop skip\n\t" 7010 "MOVSD $dst,$src\t# float\n" 7011 "skip:" %} 7012 ins_encode %{ 7013 Label skip; 7014 // Invert sense of branch from sense of CMOV 7015 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7016 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7017 __ bind(skip); 7018 %} 7019 ins_pipe( pipe_slow ); 7020 %} 7021 7022 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7023 predicate (UseSSE>=2); 7024 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7025 ins_cost(200); 7026 expand %{ 7027 fcmovD_regU(cop, cr, dst, src); 7028 %} 7029 %} 7030 7031 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7032 predicate(VM_Version::supports_cmov() ); 7033 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7034 ins_cost(200); 7035 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7036 "CMOV$cop $dst.hi,$src.hi" %} 7037 opcode(0x0F,0x40); 7038 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7039 ins_pipe( pipe_cmov_reg_long ); 7040 %} 7041 7042 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7043 predicate(VM_Version::supports_cmov() ); 7044 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7045 ins_cost(200); 7046 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7047 "CMOV$cop $dst.hi,$src.hi" %} 7048 opcode(0x0F,0x40); 7049 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7050 ins_pipe( pipe_cmov_reg_long ); 7051 %} 7052 7053 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7054 predicate(VM_Version::supports_cmov() ); 7055 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7056 ins_cost(200); 7057 expand %{ 7058 cmovL_regU(cop, cr, dst, src); 7059 %} 7060 %} 7061 7062 //----------Arithmetic Instructions-------------------------------------------- 7063 //----------Addition Instructions---------------------------------------------- 7064 7065 // Integer Addition Instructions 7066 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7067 match(Set dst (AddI dst src)); 7068 effect(KILL cr); 7069 7070 size(2); 7071 format %{ "ADD $dst,$src" %} 7072 opcode(0x03); 7073 ins_encode( OpcP, RegReg( dst, src) ); 7074 ins_pipe( ialu_reg_reg ); 7075 %} 7076 7077 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7078 match(Set dst (AddI dst src)); 7079 effect(KILL cr); 7080 7081 format %{ "ADD $dst,$src" %} 7082 opcode(0x81, 0x00); /* /0 id */ 7083 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7084 ins_pipe( ialu_reg ); 7085 %} 7086 7087 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7088 predicate(UseIncDec); 7089 match(Set dst (AddI dst src)); 7090 effect(KILL cr); 7091 7092 size(1); 7093 format %{ "INC $dst" %} 7094 opcode(0x40); /* */ 7095 ins_encode( Opc_plus( primary, dst ) ); 7096 ins_pipe( ialu_reg ); 7097 %} 7098 7099 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7100 match(Set dst (AddI src0 src1)); 7101 ins_cost(110); 7102 7103 format %{ "LEA $dst,[$src0 + $src1]" %} 7104 opcode(0x8D); /* 0x8D /r */ 7105 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7106 ins_pipe( ialu_reg_reg ); 7107 %} 7108 7109 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7110 match(Set dst (AddP src0 src1)); 7111 ins_cost(110); 7112 7113 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7114 opcode(0x8D); /* 0x8D /r */ 7115 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7116 ins_pipe( ialu_reg_reg ); 7117 %} 7118 7119 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7120 predicate(UseIncDec); 7121 match(Set dst (AddI dst src)); 7122 effect(KILL cr); 7123 7124 size(1); 7125 format %{ "DEC $dst" %} 7126 opcode(0x48); /* */ 7127 ins_encode( Opc_plus( primary, dst ) ); 7128 ins_pipe( ialu_reg ); 7129 %} 7130 7131 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7132 match(Set dst (AddP dst src)); 7133 effect(KILL cr); 7134 7135 size(2); 7136 format %{ "ADD $dst,$src" %} 7137 opcode(0x03); 7138 ins_encode( OpcP, RegReg( dst, src) ); 7139 ins_pipe( ialu_reg_reg ); 7140 %} 7141 7142 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7143 match(Set dst (AddP dst src)); 7144 effect(KILL cr); 7145 7146 format %{ "ADD $dst,$src" %} 7147 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7148 // ins_encode( RegImm( dst, src) ); 7149 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7150 ins_pipe( ialu_reg ); 7151 %} 7152 7153 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7154 match(Set dst (AddI dst (LoadI src))); 7155 effect(KILL cr); 7156 7157 ins_cost(150); 7158 format %{ "ADD $dst,$src" %} 7159 opcode(0x03); 7160 ins_encode( OpcP, RegMem( dst, src) ); 7161 ins_pipe( ialu_reg_mem ); 7162 %} 7163 7164 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7165 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7166 effect(KILL cr); 7167 7168 ins_cost(150); 7169 format %{ "ADD $dst,$src" %} 7170 opcode(0x01); /* Opcode 01 /r */ 7171 ins_encode( OpcP, RegMem( src, dst ) ); 7172 ins_pipe( ialu_mem_reg ); 7173 %} 7174 7175 // Add Memory with Immediate 7176 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7177 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7178 effect(KILL cr); 7179 7180 ins_cost(125); 7181 format %{ "ADD $dst,$src" %} 7182 opcode(0x81); /* Opcode 81 /0 id */ 7183 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7184 ins_pipe( ialu_mem_imm ); 7185 %} 7186 7187 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7188 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7189 effect(KILL cr); 7190 7191 ins_cost(125); 7192 format %{ "INC $dst" %} 7193 opcode(0xFF); /* Opcode FF /0 */ 7194 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7195 ins_pipe( ialu_mem_imm ); 7196 %} 7197 7198 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7199 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7200 effect(KILL cr); 7201 7202 ins_cost(125); 7203 format %{ "DEC $dst" %} 7204 opcode(0xFF); /* Opcode FF /1 */ 7205 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7206 ins_pipe( ialu_mem_imm ); 7207 %} 7208 7209 7210 instruct checkCastPP( eRegP dst ) %{ 7211 match(Set dst (CheckCastPP dst)); 7212 7213 size(0); 7214 format %{ "#checkcastPP of $dst" %} 7215 ins_encode( /*empty encoding*/ ); 7216 ins_pipe( empty ); 7217 %} 7218 7219 instruct castPP( eRegP dst ) %{ 7220 match(Set dst (CastPP dst)); 7221 format %{ "#castPP of $dst" %} 7222 ins_encode( /*empty encoding*/ ); 7223 ins_pipe( empty ); 7224 %} 7225 7226 instruct castII( rRegI dst ) %{ 7227 match(Set dst (CastII dst)); 7228 format %{ "#castII of $dst" %} 7229 ins_encode( /*empty encoding*/ ); 7230 ins_cost(0); 7231 ins_pipe( empty ); 7232 %} 7233 7234 instruct castLL( eRegL dst ) %{ 7235 match(Set dst (CastLL dst)); 7236 format %{ "#castLL of $dst" %} 7237 ins_encode( /*empty encoding*/ ); 7238 ins_cost(0); 7239 ins_pipe( empty ); 7240 %} 7241 7242 instruct castFF( regF dst ) %{ 7243 predicate(UseSSE >= 1); 7244 match(Set dst (CastFF dst)); 7245 format %{ "#castFF of $dst" %} 7246 ins_encode( /*empty encoding*/ ); 7247 ins_cost(0); 7248 ins_pipe( empty ); 7249 %} 7250 7251 instruct castDD( regD dst ) %{ 7252 predicate(UseSSE >= 2); 7253 match(Set dst (CastDD dst)); 7254 format %{ "#castDD of $dst" %} 7255 ins_encode( /*empty encoding*/ ); 7256 ins_cost(0); 7257 ins_pipe( empty ); 7258 %} 7259 7260 instruct castFF_PR( regFPR dst ) %{ 7261 predicate(UseSSE < 1); 7262 match(Set dst (CastFF dst)); 7263 format %{ "#castFF of $dst" %} 7264 ins_encode( /*empty encoding*/ ); 7265 ins_cost(0); 7266 ins_pipe( empty ); 7267 %} 7268 7269 instruct castDD_PR( regDPR dst ) %{ 7270 predicate(UseSSE < 2); 7271 match(Set dst (CastDD dst)); 7272 format %{ "#castDD of $dst" %} 7273 ins_encode( /*empty encoding*/ ); 7274 ins_cost(0); 7275 ins_pipe( empty ); 7276 %} 7277 7278 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7279 7280 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7281 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7282 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7283 effect(KILL cr, KILL oldval); 7284 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7285 "MOV $res,0\n\t" 7286 "JNE,s fail\n\t" 7287 "MOV $res,1\n" 7288 "fail:" %} 7289 ins_encode( enc_cmpxchg8(mem_ptr), 7290 enc_flags_ne_to_boolean(res) ); 7291 ins_pipe( pipe_cmpxchg ); 7292 %} 7293 7294 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7295 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7296 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7297 effect(KILL cr, KILL oldval); 7298 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7299 "MOV $res,0\n\t" 7300 "JNE,s fail\n\t" 7301 "MOV $res,1\n" 7302 "fail:" %} 7303 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7304 ins_pipe( pipe_cmpxchg ); 7305 %} 7306 7307 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7308 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7309 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7310 effect(KILL cr, KILL oldval); 7311 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7312 "MOV $res,0\n\t" 7313 "JNE,s fail\n\t" 7314 "MOV $res,1\n" 7315 "fail:" %} 7316 ins_encode( enc_cmpxchgb(mem_ptr), 7317 enc_flags_ne_to_boolean(res) ); 7318 ins_pipe( pipe_cmpxchg ); 7319 %} 7320 7321 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7322 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7323 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7324 effect(KILL cr, KILL oldval); 7325 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7326 "MOV $res,0\n\t" 7327 "JNE,s fail\n\t" 7328 "MOV $res,1\n" 7329 "fail:" %} 7330 ins_encode( enc_cmpxchgw(mem_ptr), 7331 enc_flags_ne_to_boolean(res) ); 7332 ins_pipe( pipe_cmpxchg ); 7333 %} 7334 7335 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7336 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7337 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7338 effect(KILL cr, KILL oldval); 7339 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7340 "MOV $res,0\n\t" 7341 "JNE,s fail\n\t" 7342 "MOV $res,1\n" 7343 "fail:" %} 7344 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7345 ins_pipe( pipe_cmpxchg ); 7346 %} 7347 7348 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7349 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7350 effect(KILL cr); 7351 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7352 ins_encode( enc_cmpxchg8(mem_ptr) ); 7353 ins_pipe( pipe_cmpxchg ); 7354 %} 7355 7356 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7357 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7358 effect(KILL cr); 7359 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7360 ins_encode( enc_cmpxchg(mem_ptr) ); 7361 ins_pipe( pipe_cmpxchg ); 7362 %} 7363 7364 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7365 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7366 effect(KILL cr); 7367 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7368 ins_encode( enc_cmpxchgb(mem_ptr) ); 7369 ins_pipe( pipe_cmpxchg ); 7370 %} 7371 7372 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7373 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7374 effect(KILL cr); 7375 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7376 ins_encode( enc_cmpxchgw(mem_ptr) ); 7377 ins_pipe( pipe_cmpxchg ); 7378 %} 7379 7380 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7381 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7382 effect(KILL cr); 7383 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7384 ins_encode( enc_cmpxchg(mem_ptr) ); 7385 ins_pipe( pipe_cmpxchg ); 7386 %} 7387 7388 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7389 predicate(n->as_LoadStore()->result_not_used()); 7390 match(Set dummy (GetAndAddB mem add)); 7391 effect(KILL cr); 7392 format %{ "ADDB [$mem],$add" %} 7393 ins_encode %{ 7394 __ lock(); 7395 __ addb($mem$$Address, $add$$constant); 7396 %} 7397 ins_pipe( pipe_cmpxchg ); 7398 %} 7399 7400 // Important to match to xRegI: only 8-bit regs. 7401 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7402 match(Set newval (GetAndAddB mem newval)); 7403 effect(KILL cr); 7404 format %{ "XADDB [$mem],$newval" %} 7405 ins_encode %{ 7406 __ lock(); 7407 __ xaddb($mem$$Address, $newval$$Register); 7408 %} 7409 ins_pipe( pipe_cmpxchg ); 7410 %} 7411 7412 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7413 predicate(n->as_LoadStore()->result_not_used()); 7414 match(Set dummy (GetAndAddS mem add)); 7415 effect(KILL cr); 7416 format %{ "ADDS [$mem],$add" %} 7417 ins_encode %{ 7418 __ lock(); 7419 __ addw($mem$$Address, $add$$constant); 7420 %} 7421 ins_pipe( pipe_cmpxchg ); 7422 %} 7423 7424 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7425 match(Set newval (GetAndAddS mem newval)); 7426 effect(KILL cr); 7427 format %{ "XADDS [$mem],$newval" %} 7428 ins_encode %{ 7429 __ lock(); 7430 __ xaddw($mem$$Address, $newval$$Register); 7431 %} 7432 ins_pipe( pipe_cmpxchg ); 7433 %} 7434 7435 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7436 predicate(n->as_LoadStore()->result_not_used()); 7437 match(Set dummy (GetAndAddI mem add)); 7438 effect(KILL cr); 7439 format %{ "ADDL [$mem],$add" %} 7440 ins_encode %{ 7441 __ lock(); 7442 __ addl($mem$$Address, $add$$constant); 7443 %} 7444 ins_pipe( pipe_cmpxchg ); 7445 %} 7446 7447 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7448 match(Set newval (GetAndAddI mem newval)); 7449 effect(KILL cr); 7450 format %{ "XADDL [$mem],$newval" %} 7451 ins_encode %{ 7452 __ lock(); 7453 __ xaddl($mem$$Address, $newval$$Register); 7454 %} 7455 ins_pipe( pipe_cmpxchg ); 7456 %} 7457 7458 // Important to match to xRegI: only 8-bit regs. 7459 instruct xchgB( memory mem, xRegI newval) %{ 7460 match(Set newval (GetAndSetB mem newval)); 7461 format %{ "XCHGB $newval,[$mem]" %} 7462 ins_encode %{ 7463 __ xchgb($newval$$Register, $mem$$Address); 7464 %} 7465 ins_pipe( pipe_cmpxchg ); 7466 %} 7467 7468 instruct xchgS( memory mem, rRegI newval) %{ 7469 match(Set newval (GetAndSetS mem newval)); 7470 format %{ "XCHGW $newval,[$mem]" %} 7471 ins_encode %{ 7472 __ xchgw($newval$$Register, $mem$$Address); 7473 %} 7474 ins_pipe( pipe_cmpxchg ); 7475 %} 7476 7477 instruct xchgI( memory mem, rRegI newval) %{ 7478 match(Set newval (GetAndSetI mem newval)); 7479 format %{ "XCHGL $newval,[$mem]" %} 7480 ins_encode %{ 7481 __ xchgl($newval$$Register, $mem$$Address); 7482 %} 7483 ins_pipe( pipe_cmpxchg ); 7484 %} 7485 7486 instruct xchgP( memory mem, pRegP newval) %{ 7487 match(Set newval (GetAndSetP mem newval)); 7488 format %{ "XCHGL $newval,[$mem]" %} 7489 ins_encode %{ 7490 __ xchgl($newval$$Register, $mem$$Address); 7491 %} 7492 ins_pipe( pipe_cmpxchg ); 7493 %} 7494 7495 //----------Subtraction Instructions------------------------------------------- 7496 7497 // Integer Subtraction Instructions 7498 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7499 match(Set dst (SubI dst src)); 7500 effect(KILL cr); 7501 7502 size(2); 7503 format %{ "SUB $dst,$src" %} 7504 opcode(0x2B); 7505 ins_encode( OpcP, RegReg( dst, src) ); 7506 ins_pipe( ialu_reg_reg ); 7507 %} 7508 7509 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7510 match(Set dst (SubI dst src)); 7511 effect(KILL cr); 7512 7513 format %{ "SUB $dst,$src" %} 7514 opcode(0x81,0x05); /* Opcode 81 /5 */ 7515 // ins_encode( RegImm( dst, src) ); 7516 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7517 ins_pipe( ialu_reg ); 7518 %} 7519 7520 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7521 match(Set dst (SubI dst (LoadI src))); 7522 effect(KILL cr); 7523 7524 ins_cost(150); 7525 format %{ "SUB $dst,$src" %} 7526 opcode(0x2B); 7527 ins_encode( OpcP, RegMem( dst, src) ); 7528 ins_pipe( ialu_reg_mem ); 7529 %} 7530 7531 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7532 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7533 effect(KILL cr); 7534 7535 ins_cost(150); 7536 format %{ "SUB $dst,$src" %} 7537 opcode(0x29); /* Opcode 29 /r */ 7538 ins_encode( OpcP, RegMem( src, dst ) ); 7539 ins_pipe( ialu_mem_reg ); 7540 %} 7541 7542 // Subtract from a pointer 7543 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7544 match(Set dst (AddP dst (SubI zero src))); 7545 effect(KILL cr); 7546 7547 size(2); 7548 format %{ "SUB $dst,$src" %} 7549 opcode(0x2B); 7550 ins_encode( OpcP, RegReg( dst, src) ); 7551 ins_pipe( ialu_reg_reg ); 7552 %} 7553 7554 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7555 match(Set dst (SubI zero dst)); 7556 effect(KILL cr); 7557 7558 size(2); 7559 format %{ "NEG $dst" %} 7560 opcode(0xF7,0x03); // Opcode F7 /3 7561 ins_encode( OpcP, RegOpc( dst ) ); 7562 ins_pipe( ialu_reg ); 7563 %} 7564 7565 //----------Multiplication/Division Instructions------------------------------- 7566 // Integer Multiplication Instructions 7567 // Multiply Register 7568 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7569 match(Set dst (MulI dst src)); 7570 effect(KILL cr); 7571 7572 size(3); 7573 ins_cost(300); 7574 format %{ "IMUL $dst,$src" %} 7575 opcode(0xAF, 0x0F); 7576 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7577 ins_pipe( ialu_reg_reg_alu0 ); 7578 %} 7579 7580 // Multiply 32-bit Immediate 7581 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7582 match(Set dst (MulI src imm)); 7583 effect(KILL cr); 7584 7585 ins_cost(300); 7586 format %{ "IMUL $dst,$src,$imm" %} 7587 opcode(0x69); /* 69 /r id */ 7588 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7589 ins_pipe( ialu_reg_reg_alu0 ); 7590 %} 7591 7592 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7593 match(Set dst src); 7594 effect(KILL cr); 7595 7596 // Note that this is artificially increased to make it more expensive than loadConL 7597 ins_cost(250); 7598 format %{ "MOV EAX,$src\t// low word only" %} 7599 opcode(0xB8); 7600 ins_encode( LdImmL_Lo(dst, src) ); 7601 ins_pipe( ialu_reg_fat ); 7602 %} 7603 7604 // Multiply by 32-bit Immediate, taking the shifted high order results 7605 // (special case for shift by 32) 7606 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7607 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7608 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7609 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7610 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7611 effect(USE src1, KILL cr); 7612 7613 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7614 ins_cost(0*100 + 1*400 - 150); 7615 format %{ "IMUL EDX:EAX,$src1" %} 7616 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7617 ins_pipe( pipe_slow ); 7618 %} 7619 7620 // Multiply by 32-bit Immediate, taking the shifted high order results 7621 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7622 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7623 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7624 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7625 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7626 effect(USE src1, KILL cr); 7627 7628 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7629 ins_cost(1*100 + 1*400 - 150); 7630 format %{ "IMUL EDX:EAX,$src1\n\t" 7631 "SAR EDX,$cnt-32" %} 7632 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7633 ins_pipe( pipe_slow ); 7634 %} 7635 7636 // Multiply Memory 32-bit Immediate 7637 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7638 match(Set dst (MulI (LoadI src) imm)); 7639 effect(KILL cr); 7640 7641 ins_cost(300); 7642 format %{ "IMUL $dst,$src,$imm" %} 7643 opcode(0x69); /* 69 /r id */ 7644 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7645 ins_pipe( ialu_reg_mem_alu0 ); 7646 %} 7647 7648 // Multiply Memory 7649 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7650 match(Set dst (MulI dst (LoadI src))); 7651 effect(KILL cr); 7652 7653 ins_cost(350); 7654 format %{ "IMUL $dst,$src" %} 7655 opcode(0xAF, 0x0F); 7656 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7657 ins_pipe( ialu_reg_mem_alu0 ); 7658 %} 7659 7660 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7661 %{ 7662 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7663 effect(KILL cr, KILL src2); 7664 7665 expand %{ mulI_eReg(dst, src1, cr); 7666 mulI_eReg(src2, src3, cr); 7667 addI_eReg(dst, src2, cr); %} 7668 %} 7669 7670 // Multiply Register Int to Long 7671 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7672 // Basic Idea: long = (long)int * (long)int 7673 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7674 effect(DEF dst, USE src, USE src1, KILL flags); 7675 7676 ins_cost(300); 7677 format %{ "IMUL $dst,$src1" %} 7678 7679 ins_encode( long_int_multiply( dst, src1 ) ); 7680 ins_pipe( ialu_reg_reg_alu0 ); 7681 %} 7682 7683 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7684 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7685 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7686 effect(KILL flags); 7687 7688 ins_cost(300); 7689 format %{ "MUL $dst,$src1" %} 7690 7691 ins_encode( long_uint_multiply(dst, src1) ); 7692 ins_pipe( ialu_reg_reg_alu0 ); 7693 %} 7694 7695 // Multiply Register Long 7696 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7697 match(Set dst (MulL dst src)); 7698 effect(KILL cr, TEMP tmp); 7699 ins_cost(4*100+3*400); 7700 // Basic idea: lo(result) = lo(x_lo * y_lo) 7701 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7702 format %{ "MOV $tmp,$src.lo\n\t" 7703 "IMUL $tmp,EDX\n\t" 7704 "MOV EDX,$src.hi\n\t" 7705 "IMUL EDX,EAX\n\t" 7706 "ADD $tmp,EDX\n\t" 7707 "MUL EDX:EAX,$src.lo\n\t" 7708 "ADD EDX,$tmp" %} 7709 ins_encode( long_multiply( dst, src, tmp ) ); 7710 ins_pipe( pipe_slow ); 7711 %} 7712 7713 // Multiply Register Long where the left operand's high 32 bits are zero 7714 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7715 predicate(is_operand_hi32_zero(n->in(1))); 7716 match(Set dst (MulL dst src)); 7717 effect(KILL cr, TEMP tmp); 7718 ins_cost(2*100+2*400); 7719 // Basic idea: lo(result) = lo(x_lo * y_lo) 7720 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7721 format %{ "MOV $tmp,$src.hi\n\t" 7722 "IMUL $tmp,EAX\n\t" 7723 "MUL EDX:EAX,$src.lo\n\t" 7724 "ADD EDX,$tmp" %} 7725 ins_encode %{ 7726 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7727 __ imull($tmp$$Register, rax); 7728 __ mull($src$$Register); 7729 __ addl(rdx, $tmp$$Register); 7730 %} 7731 ins_pipe( pipe_slow ); 7732 %} 7733 7734 // Multiply Register Long where the right operand's high 32 bits are zero 7735 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7736 predicate(is_operand_hi32_zero(n->in(2))); 7737 match(Set dst (MulL dst src)); 7738 effect(KILL cr, TEMP tmp); 7739 ins_cost(2*100+2*400); 7740 // Basic idea: lo(result) = lo(x_lo * y_lo) 7741 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7742 format %{ "MOV $tmp,$src.lo\n\t" 7743 "IMUL $tmp,EDX\n\t" 7744 "MUL EDX:EAX,$src.lo\n\t" 7745 "ADD EDX,$tmp" %} 7746 ins_encode %{ 7747 __ movl($tmp$$Register, $src$$Register); 7748 __ imull($tmp$$Register, rdx); 7749 __ mull($src$$Register); 7750 __ addl(rdx, $tmp$$Register); 7751 %} 7752 ins_pipe( pipe_slow ); 7753 %} 7754 7755 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7756 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7757 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7758 match(Set dst (MulL dst src)); 7759 effect(KILL cr); 7760 ins_cost(1*400); 7761 // Basic idea: lo(result) = lo(x_lo * y_lo) 7762 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7763 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7764 ins_encode %{ 7765 __ mull($src$$Register); 7766 %} 7767 ins_pipe( pipe_slow ); 7768 %} 7769 7770 // Multiply Register Long by small constant 7771 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7772 match(Set dst (MulL dst src)); 7773 effect(KILL cr, TEMP tmp); 7774 ins_cost(2*100+2*400); 7775 size(12); 7776 // Basic idea: lo(result) = lo(src * EAX) 7777 // hi(result) = hi(src * EAX) + lo(src * EDX) 7778 format %{ "IMUL $tmp,EDX,$src\n\t" 7779 "MOV EDX,$src\n\t" 7780 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7781 "ADD EDX,$tmp" %} 7782 ins_encode( long_multiply_con( dst, src, tmp ) ); 7783 ins_pipe( pipe_slow ); 7784 %} 7785 7786 // Integer DIV with Register 7787 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7788 match(Set rax (DivI rax div)); 7789 effect(KILL rdx, KILL cr); 7790 size(26); 7791 ins_cost(30*100+10*100); 7792 format %{ "CMP EAX,0x80000000\n\t" 7793 "JNE,s normal\n\t" 7794 "XOR EDX,EDX\n\t" 7795 "CMP ECX,-1\n\t" 7796 "JE,s done\n" 7797 "normal: CDQ\n\t" 7798 "IDIV $div\n\t" 7799 "done:" %} 7800 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7801 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7802 ins_pipe( ialu_reg_reg_alu0 ); 7803 %} 7804 7805 // Divide Register Long 7806 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7807 match(Set dst (DivL src1 src2)); 7808 effect(CALL); 7809 ins_cost(10000); 7810 format %{ "PUSH $src1.hi\n\t" 7811 "PUSH $src1.lo\n\t" 7812 "PUSH $src2.hi\n\t" 7813 "PUSH $src2.lo\n\t" 7814 "CALL SharedRuntime::ldiv\n\t" 7815 "ADD ESP,16" %} 7816 ins_encode( long_div(src1,src2) ); 7817 ins_pipe( pipe_slow ); 7818 %} 7819 7820 // Integer DIVMOD with Register, both quotient and mod results 7821 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7822 match(DivModI rax div); 7823 effect(KILL cr); 7824 size(26); 7825 ins_cost(30*100+10*100); 7826 format %{ "CMP EAX,0x80000000\n\t" 7827 "JNE,s normal\n\t" 7828 "XOR EDX,EDX\n\t" 7829 "CMP ECX,-1\n\t" 7830 "JE,s done\n" 7831 "normal: CDQ\n\t" 7832 "IDIV $div\n\t" 7833 "done:" %} 7834 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7835 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7836 ins_pipe( pipe_slow ); 7837 %} 7838 7839 // Integer MOD with Register 7840 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7841 match(Set rdx (ModI rax div)); 7842 effect(KILL rax, KILL cr); 7843 7844 size(26); 7845 ins_cost(300); 7846 format %{ "CDQ\n\t" 7847 "IDIV $div" %} 7848 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7849 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7850 ins_pipe( ialu_reg_reg_alu0 ); 7851 %} 7852 7853 // Remainder Register Long 7854 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7855 match(Set dst (ModL src1 src2)); 7856 effect(CALL); 7857 ins_cost(10000); 7858 format %{ "PUSH $src1.hi\n\t" 7859 "PUSH $src1.lo\n\t" 7860 "PUSH $src2.hi\n\t" 7861 "PUSH $src2.lo\n\t" 7862 "CALL SharedRuntime::lrem\n\t" 7863 "ADD ESP,16" %} 7864 ins_encode( long_mod(src1,src2) ); 7865 ins_pipe( pipe_slow ); 7866 %} 7867 7868 // Divide Register Long (no special case since divisor != -1) 7869 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7870 match(Set dst (DivL dst imm)); 7871 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7872 ins_cost(1000); 7873 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7874 "XOR $tmp2,$tmp2\n\t" 7875 "CMP $tmp,EDX\n\t" 7876 "JA,s fast\n\t" 7877 "MOV $tmp2,EAX\n\t" 7878 "MOV EAX,EDX\n\t" 7879 "MOV EDX,0\n\t" 7880 "JLE,s pos\n\t" 7881 "LNEG EAX : $tmp2\n\t" 7882 "DIV $tmp # unsigned division\n\t" 7883 "XCHG EAX,$tmp2\n\t" 7884 "DIV $tmp\n\t" 7885 "LNEG $tmp2 : EAX\n\t" 7886 "JMP,s done\n" 7887 "pos:\n\t" 7888 "DIV $tmp\n\t" 7889 "XCHG EAX,$tmp2\n" 7890 "fast:\n\t" 7891 "DIV $tmp\n" 7892 "done:\n\t" 7893 "MOV EDX,$tmp2\n\t" 7894 "NEG EDX:EAX # if $imm < 0" %} 7895 ins_encode %{ 7896 int con = (int)$imm$$constant; 7897 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7898 int pcon = (con > 0) ? con : -con; 7899 Label Lfast, Lpos, Ldone; 7900 7901 __ movl($tmp$$Register, pcon); 7902 __ xorl($tmp2$$Register,$tmp2$$Register); 7903 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7904 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7905 7906 __ movl($tmp2$$Register, $dst$$Register); // save 7907 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7908 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7909 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7910 7911 // Negative dividend. 7912 // convert value to positive to use unsigned division 7913 __ lneg($dst$$Register, $tmp2$$Register); 7914 __ divl($tmp$$Register); 7915 __ xchgl($dst$$Register, $tmp2$$Register); 7916 __ divl($tmp$$Register); 7917 // revert result back to negative 7918 __ lneg($tmp2$$Register, $dst$$Register); 7919 __ jmpb(Ldone); 7920 7921 __ bind(Lpos); 7922 __ divl($tmp$$Register); // Use unsigned division 7923 __ xchgl($dst$$Register, $tmp2$$Register); 7924 // Fallthrow for final divide, tmp2 has 32 bit hi result 7925 7926 __ bind(Lfast); 7927 // fast path: src is positive 7928 __ divl($tmp$$Register); // Use unsigned division 7929 7930 __ bind(Ldone); 7931 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7932 if (con < 0) { 7933 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7934 } 7935 %} 7936 ins_pipe( pipe_slow ); 7937 %} 7938 7939 // Remainder Register Long (remainder fit into 32 bits) 7940 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7941 match(Set dst (ModL dst imm)); 7942 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7943 ins_cost(1000); 7944 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7945 "CMP $tmp,EDX\n\t" 7946 "JA,s fast\n\t" 7947 "MOV $tmp2,EAX\n\t" 7948 "MOV EAX,EDX\n\t" 7949 "MOV EDX,0\n\t" 7950 "JLE,s pos\n\t" 7951 "LNEG EAX : $tmp2\n\t" 7952 "DIV $tmp # unsigned division\n\t" 7953 "MOV EAX,$tmp2\n\t" 7954 "DIV $tmp\n\t" 7955 "NEG EDX\n\t" 7956 "JMP,s done\n" 7957 "pos:\n\t" 7958 "DIV $tmp\n\t" 7959 "MOV EAX,$tmp2\n" 7960 "fast:\n\t" 7961 "DIV $tmp\n" 7962 "done:\n\t" 7963 "MOV EAX,EDX\n\t" 7964 "SAR EDX,31\n\t" %} 7965 ins_encode %{ 7966 int con = (int)$imm$$constant; 7967 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7968 int pcon = (con > 0) ? con : -con; 7969 Label Lfast, Lpos, Ldone; 7970 7971 __ movl($tmp$$Register, pcon); 7972 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7973 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7974 7975 __ movl($tmp2$$Register, $dst$$Register); // save 7976 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7977 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7978 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7979 7980 // Negative dividend. 7981 // convert value to positive to use unsigned division 7982 __ lneg($dst$$Register, $tmp2$$Register); 7983 __ divl($tmp$$Register); 7984 __ movl($dst$$Register, $tmp2$$Register); 7985 __ divl($tmp$$Register); 7986 // revert remainder back to negative 7987 __ negl(HIGH_FROM_LOW($dst$$Register)); 7988 __ jmpb(Ldone); 7989 7990 __ bind(Lpos); 7991 __ divl($tmp$$Register); 7992 __ movl($dst$$Register, $tmp2$$Register); 7993 7994 __ bind(Lfast); 7995 // fast path: src is positive 7996 __ divl($tmp$$Register); 7997 7998 __ bind(Ldone); 7999 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8000 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8001 8002 %} 8003 ins_pipe( pipe_slow ); 8004 %} 8005 8006 // Integer Shift Instructions 8007 // Shift Left by one 8008 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8009 match(Set dst (LShiftI dst shift)); 8010 effect(KILL cr); 8011 8012 size(2); 8013 format %{ "SHL $dst,$shift" %} 8014 opcode(0xD1, 0x4); /* D1 /4 */ 8015 ins_encode( OpcP, RegOpc( dst ) ); 8016 ins_pipe( ialu_reg ); 8017 %} 8018 8019 // Shift Left by 8-bit immediate 8020 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8021 match(Set dst (LShiftI dst shift)); 8022 effect(KILL cr); 8023 8024 size(3); 8025 format %{ "SHL $dst,$shift" %} 8026 opcode(0xC1, 0x4); /* C1 /4 ib */ 8027 ins_encode( RegOpcImm( dst, shift) ); 8028 ins_pipe( ialu_reg ); 8029 %} 8030 8031 // Shift Left by variable 8032 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8033 match(Set dst (LShiftI dst shift)); 8034 effect(KILL cr); 8035 8036 size(2); 8037 format %{ "SHL $dst,$shift" %} 8038 opcode(0xD3, 0x4); /* D3 /4 */ 8039 ins_encode( OpcP, RegOpc( dst ) ); 8040 ins_pipe( ialu_reg_reg ); 8041 %} 8042 8043 // Arithmetic shift right by one 8044 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8045 match(Set dst (RShiftI dst shift)); 8046 effect(KILL cr); 8047 8048 size(2); 8049 format %{ "SAR $dst,$shift" %} 8050 opcode(0xD1, 0x7); /* D1 /7 */ 8051 ins_encode( OpcP, RegOpc( dst ) ); 8052 ins_pipe( ialu_reg ); 8053 %} 8054 8055 // Arithmetic shift right by one 8056 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8057 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8058 effect(KILL cr); 8059 format %{ "SAR $dst,$shift" %} 8060 opcode(0xD1, 0x7); /* D1 /7 */ 8061 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8062 ins_pipe( ialu_mem_imm ); 8063 %} 8064 8065 // Arithmetic Shift Right by 8-bit immediate 8066 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8067 match(Set dst (RShiftI dst shift)); 8068 effect(KILL cr); 8069 8070 size(3); 8071 format %{ "SAR $dst,$shift" %} 8072 opcode(0xC1, 0x7); /* C1 /7 ib */ 8073 ins_encode( RegOpcImm( dst, shift ) ); 8074 ins_pipe( ialu_mem_imm ); 8075 %} 8076 8077 // Arithmetic Shift Right by 8-bit immediate 8078 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8079 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8080 effect(KILL cr); 8081 8082 format %{ "SAR $dst,$shift" %} 8083 opcode(0xC1, 0x7); /* C1 /7 ib */ 8084 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8085 ins_pipe( ialu_mem_imm ); 8086 %} 8087 8088 // Arithmetic Shift Right by variable 8089 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8090 match(Set dst (RShiftI dst shift)); 8091 effect(KILL cr); 8092 8093 size(2); 8094 format %{ "SAR $dst,$shift" %} 8095 opcode(0xD3, 0x7); /* D3 /7 */ 8096 ins_encode( OpcP, RegOpc( dst ) ); 8097 ins_pipe( ialu_reg_reg ); 8098 %} 8099 8100 // Logical shift right by one 8101 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8102 match(Set dst (URShiftI dst shift)); 8103 effect(KILL cr); 8104 8105 size(2); 8106 format %{ "SHR $dst,$shift" %} 8107 opcode(0xD1, 0x5); /* D1 /5 */ 8108 ins_encode( OpcP, RegOpc( dst ) ); 8109 ins_pipe( ialu_reg ); 8110 %} 8111 8112 // Logical Shift Right by 8-bit immediate 8113 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8114 match(Set dst (URShiftI dst shift)); 8115 effect(KILL cr); 8116 8117 size(3); 8118 format %{ "SHR $dst,$shift" %} 8119 opcode(0xC1, 0x5); /* C1 /5 ib */ 8120 ins_encode( RegOpcImm( dst, shift) ); 8121 ins_pipe( ialu_reg ); 8122 %} 8123 8124 8125 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8126 // This idiom is used by the compiler for the i2b bytecode. 8127 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8128 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8129 8130 size(3); 8131 format %{ "MOVSX $dst,$src :8" %} 8132 ins_encode %{ 8133 __ movsbl($dst$$Register, $src$$Register); 8134 %} 8135 ins_pipe(ialu_reg_reg); 8136 %} 8137 8138 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8139 // This idiom is used by the compiler the i2s bytecode. 8140 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8141 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8142 8143 size(3); 8144 format %{ "MOVSX $dst,$src :16" %} 8145 ins_encode %{ 8146 __ movswl($dst$$Register, $src$$Register); 8147 %} 8148 ins_pipe(ialu_reg_reg); 8149 %} 8150 8151 8152 // Logical Shift Right by variable 8153 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8154 match(Set dst (URShiftI dst shift)); 8155 effect(KILL cr); 8156 8157 size(2); 8158 format %{ "SHR $dst,$shift" %} 8159 opcode(0xD3, 0x5); /* D3 /5 */ 8160 ins_encode( OpcP, RegOpc( dst ) ); 8161 ins_pipe( ialu_reg_reg ); 8162 %} 8163 8164 8165 //----------Logical Instructions----------------------------------------------- 8166 //----------Integer Logical Instructions--------------------------------------- 8167 // And Instructions 8168 // And Register with Register 8169 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8170 match(Set dst (AndI dst src)); 8171 effect(KILL cr); 8172 8173 size(2); 8174 format %{ "AND $dst,$src" %} 8175 opcode(0x23); 8176 ins_encode( OpcP, RegReg( dst, src) ); 8177 ins_pipe( ialu_reg_reg ); 8178 %} 8179 8180 // And Register with Immediate 8181 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8182 match(Set dst (AndI dst src)); 8183 effect(KILL cr); 8184 8185 format %{ "AND $dst,$src" %} 8186 opcode(0x81,0x04); /* Opcode 81 /4 */ 8187 // ins_encode( RegImm( dst, src) ); 8188 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8189 ins_pipe( ialu_reg ); 8190 %} 8191 8192 // And Register with Memory 8193 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8194 match(Set dst (AndI dst (LoadI src))); 8195 effect(KILL cr); 8196 8197 ins_cost(150); 8198 format %{ "AND $dst,$src" %} 8199 opcode(0x23); 8200 ins_encode( OpcP, RegMem( dst, src) ); 8201 ins_pipe( ialu_reg_mem ); 8202 %} 8203 8204 // And Memory with Register 8205 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8206 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8207 effect(KILL cr); 8208 8209 ins_cost(150); 8210 format %{ "AND $dst,$src" %} 8211 opcode(0x21); /* Opcode 21 /r */ 8212 ins_encode( OpcP, RegMem( src, dst ) ); 8213 ins_pipe( ialu_mem_reg ); 8214 %} 8215 8216 // And Memory with Immediate 8217 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8218 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8219 effect(KILL cr); 8220 8221 ins_cost(125); 8222 format %{ "AND $dst,$src" %} 8223 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8224 // ins_encode( MemImm( dst, src) ); 8225 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8226 ins_pipe( ialu_mem_imm ); 8227 %} 8228 8229 // BMI1 instructions 8230 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8231 match(Set dst (AndI (XorI src1 minus_1) src2)); 8232 predicate(UseBMI1Instructions); 8233 effect(KILL cr); 8234 8235 format %{ "ANDNL $dst, $src1, $src2" %} 8236 8237 ins_encode %{ 8238 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8239 %} 8240 ins_pipe(ialu_reg); 8241 %} 8242 8243 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8244 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8245 predicate(UseBMI1Instructions); 8246 effect(KILL cr); 8247 8248 ins_cost(125); 8249 format %{ "ANDNL $dst, $src1, $src2" %} 8250 8251 ins_encode %{ 8252 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8253 %} 8254 ins_pipe(ialu_reg_mem); 8255 %} 8256 8257 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8258 match(Set dst (AndI (SubI imm_zero src) src)); 8259 predicate(UseBMI1Instructions); 8260 effect(KILL cr); 8261 8262 format %{ "BLSIL $dst, $src" %} 8263 8264 ins_encode %{ 8265 __ blsil($dst$$Register, $src$$Register); 8266 %} 8267 ins_pipe(ialu_reg); 8268 %} 8269 8270 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8271 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8272 predicate(UseBMI1Instructions); 8273 effect(KILL cr); 8274 8275 ins_cost(125); 8276 format %{ "BLSIL $dst, $src" %} 8277 8278 ins_encode %{ 8279 __ blsil($dst$$Register, $src$$Address); 8280 %} 8281 ins_pipe(ialu_reg_mem); 8282 %} 8283 8284 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8285 %{ 8286 match(Set dst (XorI (AddI src minus_1) src)); 8287 predicate(UseBMI1Instructions); 8288 effect(KILL cr); 8289 8290 format %{ "BLSMSKL $dst, $src" %} 8291 8292 ins_encode %{ 8293 __ blsmskl($dst$$Register, $src$$Register); 8294 %} 8295 8296 ins_pipe(ialu_reg); 8297 %} 8298 8299 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8300 %{ 8301 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8302 predicate(UseBMI1Instructions); 8303 effect(KILL cr); 8304 8305 ins_cost(125); 8306 format %{ "BLSMSKL $dst, $src" %} 8307 8308 ins_encode %{ 8309 __ blsmskl($dst$$Register, $src$$Address); 8310 %} 8311 8312 ins_pipe(ialu_reg_mem); 8313 %} 8314 8315 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8316 %{ 8317 match(Set dst (AndI (AddI src minus_1) src) ); 8318 predicate(UseBMI1Instructions); 8319 effect(KILL cr); 8320 8321 format %{ "BLSRL $dst, $src" %} 8322 8323 ins_encode %{ 8324 __ blsrl($dst$$Register, $src$$Register); 8325 %} 8326 8327 ins_pipe(ialu_reg); 8328 %} 8329 8330 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8331 %{ 8332 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8333 predicate(UseBMI1Instructions); 8334 effect(KILL cr); 8335 8336 ins_cost(125); 8337 format %{ "BLSRL $dst, $src" %} 8338 8339 ins_encode %{ 8340 __ blsrl($dst$$Register, $src$$Address); 8341 %} 8342 8343 ins_pipe(ialu_reg_mem); 8344 %} 8345 8346 // Or Instructions 8347 // Or Register with Register 8348 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8349 match(Set dst (OrI dst src)); 8350 effect(KILL cr); 8351 8352 size(2); 8353 format %{ "OR $dst,$src" %} 8354 opcode(0x0B); 8355 ins_encode( OpcP, RegReg( dst, src) ); 8356 ins_pipe( ialu_reg_reg ); 8357 %} 8358 8359 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8360 match(Set dst (OrI dst (CastP2X src))); 8361 effect(KILL cr); 8362 8363 size(2); 8364 format %{ "OR $dst,$src" %} 8365 opcode(0x0B); 8366 ins_encode( OpcP, RegReg( dst, src) ); 8367 ins_pipe( ialu_reg_reg ); 8368 %} 8369 8370 8371 // Or Register with Immediate 8372 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8373 match(Set dst (OrI dst src)); 8374 effect(KILL cr); 8375 8376 format %{ "OR $dst,$src" %} 8377 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8378 // ins_encode( RegImm( dst, src) ); 8379 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8380 ins_pipe( ialu_reg ); 8381 %} 8382 8383 // Or Register with Memory 8384 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8385 match(Set dst (OrI dst (LoadI src))); 8386 effect(KILL cr); 8387 8388 ins_cost(150); 8389 format %{ "OR $dst,$src" %} 8390 opcode(0x0B); 8391 ins_encode( OpcP, RegMem( dst, src) ); 8392 ins_pipe( ialu_reg_mem ); 8393 %} 8394 8395 // Or Memory with Register 8396 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8397 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8398 effect(KILL cr); 8399 8400 ins_cost(150); 8401 format %{ "OR $dst,$src" %} 8402 opcode(0x09); /* Opcode 09 /r */ 8403 ins_encode( OpcP, RegMem( src, dst ) ); 8404 ins_pipe( ialu_mem_reg ); 8405 %} 8406 8407 // Or Memory with Immediate 8408 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8409 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8410 effect(KILL cr); 8411 8412 ins_cost(125); 8413 format %{ "OR $dst,$src" %} 8414 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8415 // ins_encode( MemImm( dst, src) ); 8416 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8417 ins_pipe( ialu_mem_imm ); 8418 %} 8419 8420 // ROL/ROR 8421 // ROL expand 8422 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8423 effect(USE_DEF dst, USE shift, KILL cr); 8424 8425 format %{ "ROL $dst, $shift" %} 8426 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8427 ins_encode( OpcP, RegOpc( dst )); 8428 ins_pipe( ialu_reg ); 8429 %} 8430 8431 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8432 effect(USE_DEF dst, USE shift, KILL cr); 8433 8434 format %{ "ROL $dst, $shift" %} 8435 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8436 ins_encode( RegOpcImm(dst, shift) ); 8437 ins_pipe(ialu_reg); 8438 %} 8439 8440 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8441 effect(USE_DEF dst, USE shift, KILL cr); 8442 8443 format %{ "ROL $dst, $shift" %} 8444 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8445 ins_encode(OpcP, RegOpc(dst)); 8446 ins_pipe( ialu_reg_reg ); 8447 %} 8448 // end of ROL expand 8449 8450 // ROL 32bit by one once 8451 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8452 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8453 8454 expand %{ 8455 rolI_eReg_imm1(dst, lshift, cr); 8456 %} 8457 %} 8458 8459 // ROL 32bit var by imm8 once 8460 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8461 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8462 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8463 8464 expand %{ 8465 rolI_eReg_imm8(dst, lshift, cr); 8466 %} 8467 %} 8468 8469 // ROL 32bit var by var once 8470 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8471 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8472 8473 expand %{ 8474 rolI_eReg_CL(dst, shift, cr); 8475 %} 8476 %} 8477 8478 // ROL 32bit var by var once 8479 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8480 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8481 8482 expand %{ 8483 rolI_eReg_CL(dst, shift, cr); 8484 %} 8485 %} 8486 8487 // ROR expand 8488 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8489 effect(USE_DEF dst, USE shift, KILL cr); 8490 8491 format %{ "ROR $dst, $shift" %} 8492 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8493 ins_encode( OpcP, RegOpc( dst ) ); 8494 ins_pipe( ialu_reg ); 8495 %} 8496 8497 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8498 effect (USE_DEF dst, USE shift, KILL cr); 8499 8500 format %{ "ROR $dst, $shift" %} 8501 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8502 ins_encode( RegOpcImm(dst, shift) ); 8503 ins_pipe( ialu_reg ); 8504 %} 8505 8506 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8507 effect(USE_DEF dst, USE shift, KILL cr); 8508 8509 format %{ "ROR $dst, $shift" %} 8510 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8511 ins_encode(OpcP, RegOpc(dst)); 8512 ins_pipe( ialu_reg_reg ); 8513 %} 8514 // end of ROR expand 8515 8516 // ROR right once 8517 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8518 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8519 8520 expand %{ 8521 rorI_eReg_imm1(dst, rshift, cr); 8522 %} 8523 %} 8524 8525 // ROR 32bit by immI8 once 8526 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8527 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8528 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8529 8530 expand %{ 8531 rorI_eReg_imm8(dst, rshift, cr); 8532 %} 8533 %} 8534 8535 // ROR 32bit var by var once 8536 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8537 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8538 8539 expand %{ 8540 rorI_eReg_CL(dst, shift, cr); 8541 %} 8542 %} 8543 8544 // ROR 32bit var by var once 8545 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8546 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8547 8548 expand %{ 8549 rorI_eReg_CL(dst, shift, cr); 8550 %} 8551 %} 8552 8553 // Xor Instructions 8554 // Xor Register with Register 8555 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8556 match(Set dst (XorI dst src)); 8557 effect(KILL cr); 8558 8559 size(2); 8560 format %{ "XOR $dst,$src" %} 8561 opcode(0x33); 8562 ins_encode( OpcP, RegReg( dst, src) ); 8563 ins_pipe( ialu_reg_reg ); 8564 %} 8565 8566 // Xor Register with Immediate -1 8567 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8568 match(Set dst (XorI dst imm)); 8569 8570 size(2); 8571 format %{ "NOT $dst" %} 8572 ins_encode %{ 8573 __ notl($dst$$Register); 8574 %} 8575 ins_pipe( ialu_reg ); 8576 %} 8577 8578 // Xor Register with Immediate 8579 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8580 match(Set dst (XorI dst src)); 8581 effect(KILL cr); 8582 8583 format %{ "XOR $dst,$src" %} 8584 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8585 // ins_encode( RegImm( dst, src) ); 8586 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8587 ins_pipe( ialu_reg ); 8588 %} 8589 8590 // Xor Register with Memory 8591 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8592 match(Set dst (XorI dst (LoadI src))); 8593 effect(KILL cr); 8594 8595 ins_cost(150); 8596 format %{ "XOR $dst,$src" %} 8597 opcode(0x33); 8598 ins_encode( OpcP, RegMem(dst, src) ); 8599 ins_pipe( ialu_reg_mem ); 8600 %} 8601 8602 // Xor Memory with Register 8603 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8604 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8605 effect(KILL cr); 8606 8607 ins_cost(150); 8608 format %{ "XOR $dst,$src" %} 8609 opcode(0x31); /* Opcode 31 /r */ 8610 ins_encode( OpcP, RegMem( src, dst ) ); 8611 ins_pipe( ialu_mem_reg ); 8612 %} 8613 8614 // Xor Memory with Immediate 8615 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8616 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8617 effect(KILL cr); 8618 8619 ins_cost(125); 8620 format %{ "XOR $dst,$src" %} 8621 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8622 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8623 ins_pipe( ialu_mem_imm ); 8624 %} 8625 8626 //----------Convert Int to Boolean--------------------------------------------- 8627 8628 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8629 effect( DEF dst, USE src ); 8630 format %{ "MOV $dst,$src" %} 8631 ins_encode( enc_Copy( dst, src) ); 8632 ins_pipe( ialu_reg_reg ); 8633 %} 8634 8635 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8636 effect( USE_DEF dst, USE src, KILL cr ); 8637 8638 size(4); 8639 format %{ "NEG $dst\n\t" 8640 "ADC $dst,$src" %} 8641 ins_encode( neg_reg(dst), 8642 OpcRegReg(0x13,dst,src) ); 8643 ins_pipe( ialu_reg_reg_long ); 8644 %} 8645 8646 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8647 match(Set dst (Conv2B src)); 8648 8649 expand %{ 8650 movI_nocopy(dst,src); 8651 ci2b(dst,src,cr); 8652 %} 8653 %} 8654 8655 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8656 effect( DEF dst, USE src ); 8657 format %{ "MOV $dst,$src" %} 8658 ins_encode( enc_Copy( dst, src) ); 8659 ins_pipe( ialu_reg_reg ); 8660 %} 8661 8662 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8663 effect( USE_DEF dst, USE src, KILL cr ); 8664 format %{ "NEG $dst\n\t" 8665 "ADC $dst,$src" %} 8666 ins_encode( neg_reg(dst), 8667 OpcRegReg(0x13,dst,src) ); 8668 ins_pipe( ialu_reg_reg_long ); 8669 %} 8670 8671 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8672 match(Set dst (Conv2B src)); 8673 8674 expand %{ 8675 movP_nocopy(dst,src); 8676 cp2b(dst,src,cr); 8677 %} 8678 %} 8679 8680 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8681 match(Set dst (CmpLTMask p q)); 8682 effect(KILL cr); 8683 ins_cost(400); 8684 8685 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8686 format %{ "XOR $dst,$dst\n\t" 8687 "CMP $p,$q\n\t" 8688 "SETlt $dst\n\t" 8689 "NEG $dst" %} 8690 ins_encode %{ 8691 Register Rp = $p$$Register; 8692 Register Rq = $q$$Register; 8693 Register Rd = $dst$$Register; 8694 Label done; 8695 __ xorl(Rd, Rd); 8696 __ cmpl(Rp, Rq); 8697 __ setb(Assembler::less, Rd); 8698 __ negl(Rd); 8699 %} 8700 8701 ins_pipe(pipe_slow); 8702 %} 8703 8704 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8705 match(Set dst (CmpLTMask dst zero)); 8706 effect(DEF dst, KILL cr); 8707 ins_cost(100); 8708 8709 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8710 ins_encode %{ 8711 __ sarl($dst$$Register, 31); 8712 %} 8713 ins_pipe(ialu_reg); 8714 %} 8715 8716 /* better to save a register than avoid a branch */ 8717 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8718 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8719 effect(KILL cr); 8720 ins_cost(400); 8721 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8722 "JGE done\n\t" 8723 "ADD $p,$y\n" 8724 "done: " %} 8725 ins_encode %{ 8726 Register Rp = $p$$Register; 8727 Register Rq = $q$$Register; 8728 Register Ry = $y$$Register; 8729 Label done; 8730 __ subl(Rp, Rq); 8731 __ jccb(Assembler::greaterEqual, done); 8732 __ addl(Rp, Ry); 8733 __ bind(done); 8734 %} 8735 8736 ins_pipe(pipe_cmplt); 8737 %} 8738 8739 /* better to save a register than avoid a branch */ 8740 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8741 match(Set y (AndI (CmpLTMask p q) y)); 8742 effect(KILL cr); 8743 8744 ins_cost(300); 8745 8746 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8747 "JLT done\n\t" 8748 "XORL $y, $y\n" 8749 "done: " %} 8750 ins_encode %{ 8751 Register Rp = $p$$Register; 8752 Register Rq = $q$$Register; 8753 Register Ry = $y$$Register; 8754 Label done; 8755 __ cmpl(Rp, Rq); 8756 __ jccb(Assembler::less, done); 8757 __ xorl(Ry, Ry); 8758 __ bind(done); 8759 %} 8760 8761 ins_pipe(pipe_cmplt); 8762 %} 8763 8764 /* If I enable this, I encourage spilling in the inner loop of compress. 8765 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8766 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8767 */ 8768 //----------Overflow Math Instructions----------------------------------------- 8769 8770 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8771 %{ 8772 match(Set cr (OverflowAddI op1 op2)); 8773 effect(DEF cr, USE_KILL op1, USE op2); 8774 8775 format %{ "ADD $op1, $op2\t# overflow check int" %} 8776 8777 ins_encode %{ 8778 __ addl($op1$$Register, $op2$$Register); 8779 %} 8780 ins_pipe(ialu_reg_reg); 8781 %} 8782 8783 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8784 %{ 8785 match(Set cr (OverflowAddI op1 op2)); 8786 effect(DEF cr, USE_KILL op1, USE op2); 8787 8788 format %{ "ADD $op1, $op2\t# overflow check int" %} 8789 8790 ins_encode %{ 8791 __ addl($op1$$Register, $op2$$constant); 8792 %} 8793 ins_pipe(ialu_reg_reg); 8794 %} 8795 8796 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8797 %{ 8798 match(Set cr (OverflowSubI op1 op2)); 8799 8800 format %{ "CMP $op1, $op2\t# overflow check int" %} 8801 ins_encode %{ 8802 __ cmpl($op1$$Register, $op2$$Register); 8803 %} 8804 ins_pipe(ialu_reg_reg); 8805 %} 8806 8807 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8808 %{ 8809 match(Set cr (OverflowSubI op1 op2)); 8810 8811 format %{ "CMP $op1, $op2\t# overflow check int" %} 8812 ins_encode %{ 8813 __ cmpl($op1$$Register, $op2$$constant); 8814 %} 8815 ins_pipe(ialu_reg_reg); 8816 %} 8817 8818 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8819 %{ 8820 match(Set cr (OverflowSubI zero op2)); 8821 effect(DEF cr, USE_KILL op2); 8822 8823 format %{ "NEG $op2\t# overflow check int" %} 8824 ins_encode %{ 8825 __ negl($op2$$Register); 8826 %} 8827 ins_pipe(ialu_reg_reg); 8828 %} 8829 8830 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8831 %{ 8832 match(Set cr (OverflowMulI op1 op2)); 8833 effect(DEF cr, USE_KILL op1, USE op2); 8834 8835 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8836 ins_encode %{ 8837 __ imull($op1$$Register, $op2$$Register); 8838 %} 8839 ins_pipe(ialu_reg_reg_alu0); 8840 %} 8841 8842 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8843 %{ 8844 match(Set cr (OverflowMulI op1 op2)); 8845 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8846 8847 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8848 ins_encode %{ 8849 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8850 %} 8851 ins_pipe(ialu_reg_reg_alu0); 8852 %} 8853 8854 // Integer Absolute Instructions 8855 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8856 %{ 8857 match(Set dst (AbsI src)); 8858 effect(TEMP dst, TEMP tmp, KILL cr); 8859 format %{ "movl $tmp, $src\n\t" 8860 "sarl $tmp, 31\n\t" 8861 "movl $dst, $src\n\t" 8862 "xorl $dst, $tmp\n\t" 8863 "subl $dst, $tmp\n" 8864 %} 8865 ins_encode %{ 8866 __ movl($tmp$$Register, $src$$Register); 8867 __ sarl($tmp$$Register, 31); 8868 __ movl($dst$$Register, $src$$Register); 8869 __ xorl($dst$$Register, $tmp$$Register); 8870 __ subl($dst$$Register, $tmp$$Register); 8871 %} 8872 8873 ins_pipe(ialu_reg_reg); 8874 %} 8875 8876 //----------Long Instructions------------------------------------------------ 8877 // Add Long Register with Register 8878 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8879 match(Set dst (AddL dst src)); 8880 effect(KILL cr); 8881 ins_cost(200); 8882 format %{ "ADD $dst.lo,$src.lo\n\t" 8883 "ADC $dst.hi,$src.hi" %} 8884 opcode(0x03, 0x13); 8885 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8886 ins_pipe( ialu_reg_reg_long ); 8887 %} 8888 8889 // Add Long Register with Immediate 8890 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8891 match(Set dst (AddL dst src)); 8892 effect(KILL cr); 8893 format %{ "ADD $dst.lo,$src.lo\n\t" 8894 "ADC $dst.hi,$src.hi" %} 8895 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8896 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8897 ins_pipe( ialu_reg_long ); 8898 %} 8899 8900 // Add Long Register with Memory 8901 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8902 match(Set dst (AddL dst (LoadL mem))); 8903 effect(KILL cr); 8904 ins_cost(125); 8905 format %{ "ADD $dst.lo,$mem\n\t" 8906 "ADC $dst.hi,$mem+4" %} 8907 opcode(0x03, 0x13); 8908 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8909 ins_pipe( ialu_reg_long_mem ); 8910 %} 8911 8912 // Subtract Long Register with Register. 8913 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8914 match(Set dst (SubL dst src)); 8915 effect(KILL cr); 8916 ins_cost(200); 8917 format %{ "SUB $dst.lo,$src.lo\n\t" 8918 "SBB $dst.hi,$src.hi" %} 8919 opcode(0x2B, 0x1B); 8920 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8921 ins_pipe( ialu_reg_reg_long ); 8922 %} 8923 8924 // Subtract Long Register with Immediate 8925 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8926 match(Set dst (SubL dst src)); 8927 effect(KILL cr); 8928 format %{ "SUB $dst.lo,$src.lo\n\t" 8929 "SBB $dst.hi,$src.hi" %} 8930 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8931 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8932 ins_pipe( ialu_reg_long ); 8933 %} 8934 8935 // Subtract Long Register with Memory 8936 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8937 match(Set dst (SubL dst (LoadL mem))); 8938 effect(KILL cr); 8939 ins_cost(125); 8940 format %{ "SUB $dst.lo,$mem\n\t" 8941 "SBB $dst.hi,$mem+4" %} 8942 opcode(0x2B, 0x1B); 8943 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8944 ins_pipe( ialu_reg_long_mem ); 8945 %} 8946 8947 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8948 match(Set dst (SubL zero dst)); 8949 effect(KILL cr); 8950 ins_cost(300); 8951 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8952 ins_encode( neg_long(dst) ); 8953 ins_pipe( ialu_reg_reg_long ); 8954 %} 8955 8956 // And Long Register with Register 8957 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8958 match(Set dst (AndL dst src)); 8959 effect(KILL cr); 8960 format %{ "AND $dst.lo,$src.lo\n\t" 8961 "AND $dst.hi,$src.hi" %} 8962 opcode(0x23,0x23); 8963 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8964 ins_pipe( ialu_reg_reg_long ); 8965 %} 8966 8967 // And Long Register with Immediate 8968 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8969 match(Set dst (AndL dst src)); 8970 effect(KILL cr); 8971 format %{ "AND $dst.lo,$src.lo\n\t" 8972 "AND $dst.hi,$src.hi" %} 8973 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8974 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8975 ins_pipe( ialu_reg_long ); 8976 %} 8977 8978 // And Long Register with Memory 8979 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8980 match(Set dst (AndL dst (LoadL mem))); 8981 effect(KILL cr); 8982 ins_cost(125); 8983 format %{ "AND $dst.lo,$mem\n\t" 8984 "AND $dst.hi,$mem+4" %} 8985 opcode(0x23, 0x23); 8986 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8987 ins_pipe( ialu_reg_long_mem ); 8988 %} 8989 8990 // BMI1 instructions 8991 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8992 match(Set dst (AndL (XorL src1 minus_1) src2)); 8993 predicate(UseBMI1Instructions); 8994 effect(KILL cr, TEMP dst); 8995 8996 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8997 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8998 %} 8999 9000 ins_encode %{ 9001 Register Rdst = $dst$$Register; 9002 Register Rsrc1 = $src1$$Register; 9003 Register Rsrc2 = $src2$$Register; 9004 __ andnl(Rdst, Rsrc1, Rsrc2); 9005 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9006 %} 9007 ins_pipe(ialu_reg_reg_long); 9008 %} 9009 9010 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9011 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9012 predicate(UseBMI1Instructions); 9013 effect(KILL cr, TEMP dst); 9014 9015 ins_cost(125); 9016 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9017 "ANDNL $dst.hi, $src1.hi, $src2+4" 9018 %} 9019 9020 ins_encode %{ 9021 Register Rdst = $dst$$Register; 9022 Register Rsrc1 = $src1$$Register; 9023 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9024 9025 __ andnl(Rdst, Rsrc1, $src2$$Address); 9026 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9027 %} 9028 ins_pipe(ialu_reg_mem); 9029 %} 9030 9031 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9032 match(Set dst (AndL (SubL imm_zero src) src)); 9033 predicate(UseBMI1Instructions); 9034 effect(KILL cr, TEMP dst); 9035 9036 format %{ "MOVL $dst.hi, 0\n\t" 9037 "BLSIL $dst.lo, $src.lo\n\t" 9038 "JNZ done\n\t" 9039 "BLSIL $dst.hi, $src.hi\n" 9040 "done:" 9041 %} 9042 9043 ins_encode %{ 9044 Label done; 9045 Register Rdst = $dst$$Register; 9046 Register Rsrc = $src$$Register; 9047 __ movl(HIGH_FROM_LOW(Rdst), 0); 9048 __ blsil(Rdst, Rsrc); 9049 __ jccb(Assembler::notZero, done); 9050 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9051 __ bind(done); 9052 %} 9053 ins_pipe(ialu_reg); 9054 %} 9055 9056 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9057 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9058 predicate(UseBMI1Instructions); 9059 effect(KILL cr, TEMP dst); 9060 9061 ins_cost(125); 9062 format %{ "MOVL $dst.hi, 0\n\t" 9063 "BLSIL $dst.lo, $src\n\t" 9064 "JNZ done\n\t" 9065 "BLSIL $dst.hi, $src+4\n" 9066 "done:" 9067 %} 9068 9069 ins_encode %{ 9070 Label done; 9071 Register Rdst = $dst$$Register; 9072 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9073 9074 __ movl(HIGH_FROM_LOW(Rdst), 0); 9075 __ blsil(Rdst, $src$$Address); 9076 __ jccb(Assembler::notZero, done); 9077 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9078 __ bind(done); 9079 %} 9080 ins_pipe(ialu_reg_mem); 9081 %} 9082 9083 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9084 %{ 9085 match(Set dst (XorL (AddL src minus_1) src)); 9086 predicate(UseBMI1Instructions); 9087 effect(KILL cr, TEMP dst); 9088 9089 format %{ "MOVL $dst.hi, 0\n\t" 9090 "BLSMSKL $dst.lo, $src.lo\n\t" 9091 "JNC done\n\t" 9092 "BLSMSKL $dst.hi, $src.hi\n" 9093 "done:" 9094 %} 9095 9096 ins_encode %{ 9097 Label done; 9098 Register Rdst = $dst$$Register; 9099 Register Rsrc = $src$$Register; 9100 __ movl(HIGH_FROM_LOW(Rdst), 0); 9101 __ blsmskl(Rdst, Rsrc); 9102 __ jccb(Assembler::carryClear, done); 9103 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9104 __ bind(done); 9105 %} 9106 9107 ins_pipe(ialu_reg); 9108 %} 9109 9110 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9111 %{ 9112 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9113 predicate(UseBMI1Instructions); 9114 effect(KILL cr, TEMP dst); 9115 9116 ins_cost(125); 9117 format %{ "MOVL $dst.hi, 0\n\t" 9118 "BLSMSKL $dst.lo, $src\n\t" 9119 "JNC done\n\t" 9120 "BLSMSKL $dst.hi, $src+4\n" 9121 "done:" 9122 %} 9123 9124 ins_encode %{ 9125 Label done; 9126 Register Rdst = $dst$$Register; 9127 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9128 9129 __ movl(HIGH_FROM_LOW(Rdst), 0); 9130 __ blsmskl(Rdst, $src$$Address); 9131 __ jccb(Assembler::carryClear, done); 9132 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9133 __ bind(done); 9134 %} 9135 9136 ins_pipe(ialu_reg_mem); 9137 %} 9138 9139 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9140 %{ 9141 match(Set dst (AndL (AddL src minus_1) src) ); 9142 predicate(UseBMI1Instructions); 9143 effect(KILL cr, TEMP dst); 9144 9145 format %{ "MOVL $dst.hi, $src.hi\n\t" 9146 "BLSRL $dst.lo, $src.lo\n\t" 9147 "JNC done\n\t" 9148 "BLSRL $dst.hi, $src.hi\n" 9149 "done:" 9150 %} 9151 9152 ins_encode %{ 9153 Label done; 9154 Register Rdst = $dst$$Register; 9155 Register Rsrc = $src$$Register; 9156 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9157 __ blsrl(Rdst, Rsrc); 9158 __ jccb(Assembler::carryClear, done); 9159 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9160 __ bind(done); 9161 %} 9162 9163 ins_pipe(ialu_reg); 9164 %} 9165 9166 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9167 %{ 9168 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9169 predicate(UseBMI1Instructions); 9170 effect(KILL cr, TEMP dst); 9171 9172 ins_cost(125); 9173 format %{ "MOVL $dst.hi, $src+4\n\t" 9174 "BLSRL $dst.lo, $src\n\t" 9175 "JNC done\n\t" 9176 "BLSRL $dst.hi, $src+4\n" 9177 "done:" 9178 %} 9179 9180 ins_encode %{ 9181 Label done; 9182 Register Rdst = $dst$$Register; 9183 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9184 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9185 __ blsrl(Rdst, $src$$Address); 9186 __ jccb(Assembler::carryClear, done); 9187 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9188 __ bind(done); 9189 %} 9190 9191 ins_pipe(ialu_reg_mem); 9192 %} 9193 9194 // Or Long Register with Register 9195 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9196 match(Set dst (OrL dst src)); 9197 effect(KILL cr); 9198 format %{ "OR $dst.lo,$src.lo\n\t" 9199 "OR $dst.hi,$src.hi" %} 9200 opcode(0x0B,0x0B); 9201 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9202 ins_pipe( ialu_reg_reg_long ); 9203 %} 9204 9205 // Or Long Register with Immediate 9206 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9207 match(Set dst (OrL dst src)); 9208 effect(KILL cr); 9209 format %{ "OR $dst.lo,$src.lo\n\t" 9210 "OR $dst.hi,$src.hi" %} 9211 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9212 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9213 ins_pipe( ialu_reg_long ); 9214 %} 9215 9216 // Or Long Register with Memory 9217 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9218 match(Set dst (OrL dst (LoadL mem))); 9219 effect(KILL cr); 9220 ins_cost(125); 9221 format %{ "OR $dst.lo,$mem\n\t" 9222 "OR $dst.hi,$mem+4" %} 9223 opcode(0x0B,0x0B); 9224 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9225 ins_pipe( ialu_reg_long_mem ); 9226 %} 9227 9228 // Xor Long Register with Register 9229 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9230 match(Set dst (XorL dst src)); 9231 effect(KILL cr); 9232 format %{ "XOR $dst.lo,$src.lo\n\t" 9233 "XOR $dst.hi,$src.hi" %} 9234 opcode(0x33,0x33); 9235 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9236 ins_pipe( ialu_reg_reg_long ); 9237 %} 9238 9239 // Xor Long Register with Immediate -1 9240 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9241 match(Set dst (XorL dst imm)); 9242 format %{ "NOT $dst.lo\n\t" 9243 "NOT $dst.hi" %} 9244 ins_encode %{ 9245 __ notl($dst$$Register); 9246 __ notl(HIGH_FROM_LOW($dst$$Register)); 9247 %} 9248 ins_pipe( ialu_reg_long ); 9249 %} 9250 9251 // Xor Long Register with Immediate 9252 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9253 match(Set dst (XorL dst src)); 9254 effect(KILL cr); 9255 format %{ "XOR $dst.lo,$src.lo\n\t" 9256 "XOR $dst.hi,$src.hi" %} 9257 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9258 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9259 ins_pipe( ialu_reg_long ); 9260 %} 9261 9262 // Xor Long Register with Memory 9263 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9264 match(Set dst (XorL dst (LoadL mem))); 9265 effect(KILL cr); 9266 ins_cost(125); 9267 format %{ "XOR $dst.lo,$mem\n\t" 9268 "XOR $dst.hi,$mem+4" %} 9269 opcode(0x33,0x33); 9270 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9271 ins_pipe( ialu_reg_long_mem ); 9272 %} 9273 9274 // Shift Left Long by 1 9275 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9276 predicate(UseNewLongLShift); 9277 match(Set dst (LShiftL dst cnt)); 9278 effect(KILL cr); 9279 ins_cost(100); 9280 format %{ "ADD $dst.lo,$dst.lo\n\t" 9281 "ADC $dst.hi,$dst.hi" %} 9282 ins_encode %{ 9283 __ addl($dst$$Register,$dst$$Register); 9284 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9285 %} 9286 ins_pipe( ialu_reg_long ); 9287 %} 9288 9289 // Shift Left Long by 2 9290 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9291 predicate(UseNewLongLShift); 9292 match(Set dst (LShiftL dst cnt)); 9293 effect(KILL cr); 9294 ins_cost(100); 9295 format %{ "ADD $dst.lo,$dst.lo\n\t" 9296 "ADC $dst.hi,$dst.hi\n\t" 9297 "ADD $dst.lo,$dst.lo\n\t" 9298 "ADC $dst.hi,$dst.hi" %} 9299 ins_encode %{ 9300 __ addl($dst$$Register,$dst$$Register); 9301 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9302 __ addl($dst$$Register,$dst$$Register); 9303 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9304 %} 9305 ins_pipe( ialu_reg_long ); 9306 %} 9307 9308 // Shift Left Long by 3 9309 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9310 predicate(UseNewLongLShift); 9311 match(Set dst (LShiftL dst cnt)); 9312 effect(KILL cr); 9313 ins_cost(100); 9314 format %{ "ADD $dst.lo,$dst.lo\n\t" 9315 "ADC $dst.hi,$dst.hi\n\t" 9316 "ADD $dst.lo,$dst.lo\n\t" 9317 "ADC $dst.hi,$dst.hi\n\t" 9318 "ADD $dst.lo,$dst.lo\n\t" 9319 "ADC $dst.hi,$dst.hi" %} 9320 ins_encode %{ 9321 __ addl($dst$$Register,$dst$$Register); 9322 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9323 __ addl($dst$$Register,$dst$$Register); 9324 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9325 __ addl($dst$$Register,$dst$$Register); 9326 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9327 %} 9328 ins_pipe( ialu_reg_long ); 9329 %} 9330 9331 // Shift Left Long by 1-31 9332 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9333 match(Set dst (LShiftL dst cnt)); 9334 effect(KILL cr); 9335 ins_cost(200); 9336 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9337 "SHL $dst.lo,$cnt" %} 9338 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9339 ins_encode( move_long_small_shift(dst,cnt) ); 9340 ins_pipe( ialu_reg_long ); 9341 %} 9342 9343 // Shift Left Long by 32-63 9344 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9345 match(Set dst (LShiftL dst cnt)); 9346 effect(KILL cr); 9347 ins_cost(300); 9348 format %{ "MOV $dst.hi,$dst.lo\n" 9349 "\tSHL $dst.hi,$cnt-32\n" 9350 "\tXOR $dst.lo,$dst.lo" %} 9351 opcode(0xC1, 0x4); /* C1 /4 ib */ 9352 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9353 ins_pipe( ialu_reg_long ); 9354 %} 9355 9356 // Shift Left Long by variable 9357 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9358 match(Set dst (LShiftL dst shift)); 9359 effect(KILL cr); 9360 ins_cost(500+200); 9361 size(17); 9362 format %{ "TEST $shift,32\n\t" 9363 "JEQ,s small\n\t" 9364 "MOV $dst.hi,$dst.lo\n\t" 9365 "XOR $dst.lo,$dst.lo\n" 9366 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9367 "SHL $dst.lo,$shift" %} 9368 ins_encode( shift_left_long( dst, shift ) ); 9369 ins_pipe( pipe_slow ); 9370 %} 9371 9372 // Shift Right Long by 1-31 9373 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9374 match(Set dst (URShiftL dst cnt)); 9375 effect(KILL cr); 9376 ins_cost(200); 9377 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9378 "SHR $dst.hi,$cnt" %} 9379 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9380 ins_encode( move_long_small_shift(dst,cnt) ); 9381 ins_pipe( ialu_reg_long ); 9382 %} 9383 9384 // Shift Right Long by 32-63 9385 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9386 match(Set dst (URShiftL dst cnt)); 9387 effect(KILL cr); 9388 ins_cost(300); 9389 format %{ "MOV $dst.lo,$dst.hi\n" 9390 "\tSHR $dst.lo,$cnt-32\n" 9391 "\tXOR $dst.hi,$dst.hi" %} 9392 opcode(0xC1, 0x5); /* C1 /5 ib */ 9393 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9394 ins_pipe( ialu_reg_long ); 9395 %} 9396 9397 // Shift Right Long by variable 9398 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9399 match(Set dst (URShiftL dst shift)); 9400 effect(KILL cr); 9401 ins_cost(600); 9402 size(17); 9403 format %{ "TEST $shift,32\n\t" 9404 "JEQ,s small\n\t" 9405 "MOV $dst.lo,$dst.hi\n\t" 9406 "XOR $dst.hi,$dst.hi\n" 9407 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9408 "SHR $dst.hi,$shift" %} 9409 ins_encode( shift_right_long( dst, shift ) ); 9410 ins_pipe( pipe_slow ); 9411 %} 9412 9413 // Shift Right Long by 1-31 9414 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9415 match(Set dst (RShiftL dst cnt)); 9416 effect(KILL cr); 9417 ins_cost(200); 9418 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9419 "SAR $dst.hi,$cnt" %} 9420 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9421 ins_encode( move_long_small_shift(dst,cnt) ); 9422 ins_pipe( ialu_reg_long ); 9423 %} 9424 9425 // Shift Right Long by 32-63 9426 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9427 match(Set dst (RShiftL dst cnt)); 9428 effect(KILL cr); 9429 ins_cost(300); 9430 format %{ "MOV $dst.lo,$dst.hi\n" 9431 "\tSAR $dst.lo,$cnt-32\n" 9432 "\tSAR $dst.hi,31" %} 9433 opcode(0xC1, 0x7); /* C1 /7 ib */ 9434 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9435 ins_pipe( ialu_reg_long ); 9436 %} 9437 9438 // Shift Right arithmetic Long by variable 9439 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9440 match(Set dst (RShiftL dst shift)); 9441 effect(KILL cr); 9442 ins_cost(600); 9443 size(18); 9444 format %{ "TEST $shift,32\n\t" 9445 "JEQ,s small\n\t" 9446 "MOV $dst.lo,$dst.hi\n\t" 9447 "SAR $dst.hi,31\n" 9448 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9449 "SAR $dst.hi,$shift" %} 9450 ins_encode( shift_right_arith_long( dst, shift ) ); 9451 ins_pipe( pipe_slow ); 9452 %} 9453 9454 9455 //----------Double Instructions------------------------------------------------ 9456 // Double Math 9457 9458 // Compare & branch 9459 9460 // P6 version of float compare, sets condition codes in EFLAGS 9461 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9462 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9463 match(Set cr (CmpD src1 src2)); 9464 effect(KILL rax); 9465 ins_cost(150); 9466 format %{ "FLD $src1\n\t" 9467 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9468 "JNP exit\n\t" 9469 "MOV ah,1 // saw a NaN, set CF\n\t" 9470 "SAHF\n" 9471 "exit:\tNOP // avoid branch to branch" %} 9472 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9473 ins_encode( Push_Reg_DPR(src1), 9474 OpcP, RegOpc(src2), 9475 cmpF_P6_fixup ); 9476 ins_pipe( pipe_slow ); 9477 %} 9478 9479 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9480 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9481 match(Set cr (CmpD src1 src2)); 9482 ins_cost(150); 9483 format %{ "FLD $src1\n\t" 9484 "FUCOMIP ST,$src2 // P6 instruction" %} 9485 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9486 ins_encode( Push_Reg_DPR(src1), 9487 OpcP, RegOpc(src2)); 9488 ins_pipe( pipe_slow ); 9489 %} 9490 9491 // Compare & branch 9492 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9493 predicate(UseSSE<=1); 9494 match(Set cr (CmpD src1 src2)); 9495 effect(KILL rax); 9496 ins_cost(200); 9497 format %{ "FLD $src1\n\t" 9498 "FCOMp $src2\n\t" 9499 "FNSTSW AX\n\t" 9500 "TEST AX,0x400\n\t" 9501 "JZ,s flags\n\t" 9502 "MOV AH,1\t# unordered treat as LT\n" 9503 "flags:\tSAHF" %} 9504 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9505 ins_encode( Push_Reg_DPR(src1), 9506 OpcP, RegOpc(src2), 9507 fpu_flags); 9508 ins_pipe( pipe_slow ); 9509 %} 9510 9511 // Compare vs zero into -1,0,1 9512 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9513 predicate(UseSSE<=1); 9514 match(Set dst (CmpD3 src1 zero)); 9515 effect(KILL cr, KILL rax); 9516 ins_cost(280); 9517 format %{ "FTSTD $dst,$src1" %} 9518 opcode(0xE4, 0xD9); 9519 ins_encode( Push_Reg_DPR(src1), 9520 OpcS, OpcP, PopFPU, 9521 CmpF_Result(dst)); 9522 ins_pipe( pipe_slow ); 9523 %} 9524 9525 // Compare into -1,0,1 9526 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9527 predicate(UseSSE<=1); 9528 match(Set dst (CmpD3 src1 src2)); 9529 effect(KILL cr, KILL rax); 9530 ins_cost(300); 9531 format %{ "FCMPD $dst,$src1,$src2" %} 9532 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9533 ins_encode( Push_Reg_DPR(src1), 9534 OpcP, RegOpc(src2), 9535 CmpF_Result(dst)); 9536 ins_pipe( pipe_slow ); 9537 %} 9538 9539 // float compare and set condition codes in EFLAGS by XMM regs 9540 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9541 predicate(UseSSE>=2); 9542 match(Set cr (CmpD src1 src2)); 9543 ins_cost(145); 9544 format %{ "UCOMISD $src1,$src2\n\t" 9545 "JNP,s exit\n\t" 9546 "PUSHF\t# saw NaN, set CF\n\t" 9547 "AND [rsp], #0xffffff2b\n\t" 9548 "POPF\n" 9549 "exit:" %} 9550 ins_encode %{ 9551 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9552 emit_cmpfp_fixup(_masm); 9553 %} 9554 ins_pipe( pipe_slow ); 9555 %} 9556 9557 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9558 predicate(UseSSE>=2); 9559 match(Set cr (CmpD src1 src2)); 9560 ins_cost(100); 9561 format %{ "UCOMISD $src1,$src2" %} 9562 ins_encode %{ 9563 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9564 %} 9565 ins_pipe( pipe_slow ); 9566 %} 9567 9568 // float compare and set condition codes in EFLAGS by XMM regs 9569 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9570 predicate(UseSSE>=2); 9571 match(Set cr (CmpD src1 (LoadD src2))); 9572 ins_cost(145); 9573 format %{ "UCOMISD $src1,$src2\n\t" 9574 "JNP,s exit\n\t" 9575 "PUSHF\t# saw NaN, set CF\n\t" 9576 "AND [rsp], #0xffffff2b\n\t" 9577 "POPF\n" 9578 "exit:" %} 9579 ins_encode %{ 9580 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9581 emit_cmpfp_fixup(_masm); 9582 %} 9583 ins_pipe( pipe_slow ); 9584 %} 9585 9586 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9587 predicate(UseSSE>=2); 9588 match(Set cr (CmpD src1 (LoadD src2))); 9589 ins_cost(100); 9590 format %{ "UCOMISD $src1,$src2" %} 9591 ins_encode %{ 9592 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9593 %} 9594 ins_pipe( pipe_slow ); 9595 %} 9596 9597 // Compare into -1,0,1 in XMM 9598 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9599 predicate(UseSSE>=2); 9600 match(Set dst (CmpD3 src1 src2)); 9601 effect(KILL cr); 9602 ins_cost(255); 9603 format %{ "UCOMISD $src1, $src2\n\t" 9604 "MOV $dst, #-1\n\t" 9605 "JP,s done\n\t" 9606 "JB,s done\n\t" 9607 "SETNE $dst\n\t" 9608 "MOVZB $dst, $dst\n" 9609 "done:" %} 9610 ins_encode %{ 9611 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9612 emit_cmpfp3(_masm, $dst$$Register); 9613 %} 9614 ins_pipe( pipe_slow ); 9615 %} 9616 9617 // Compare into -1,0,1 in XMM and memory 9618 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9619 predicate(UseSSE>=2); 9620 match(Set dst (CmpD3 src1 (LoadD src2))); 9621 effect(KILL cr); 9622 ins_cost(275); 9623 format %{ "UCOMISD $src1, $src2\n\t" 9624 "MOV $dst, #-1\n\t" 9625 "JP,s done\n\t" 9626 "JB,s done\n\t" 9627 "SETNE $dst\n\t" 9628 "MOVZB $dst, $dst\n" 9629 "done:" %} 9630 ins_encode %{ 9631 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9632 emit_cmpfp3(_masm, $dst$$Register); 9633 %} 9634 ins_pipe( pipe_slow ); 9635 %} 9636 9637 9638 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9639 predicate (UseSSE <=1); 9640 match(Set dst (SubD dst src)); 9641 9642 format %{ "FLD $src\n\t" 9643 "DSUBp $dst,ST" %} 9644 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9645 ins_cost(150); 9646 ins_encode( Push_Reg_DPR(src), 9647 OpcP, RegOpc(dst) ); 9648 ins_pipe( fpu_reg_reg ); 9649 %} 9650 9651 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9652 predicate (UseSSE <=1); 9653 match(Set dst (RoundDouble (SubD src1 src2))); 9654 ins_cost(250); 9655 9656 format %{ "FLD $src2\n\t" 9657 "DSUB ST,$src1\n\t" 9658 "FSTP_D $dst\t# D-round" %} 9659 opcode(0xD8, 0x5); 9660 ins_encode( Push_Reg_DPR(src2), 9661 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9662 ins_pipe( fpu_mem_reg_reg ); 9663 %} 9664 9665 9666 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9667 predicate (UseSSE <=1); 9668 match(Set dst (SubD dst (LoadD src))); 9669 ins_cost(150); 9670 9671 format %{ "FLD $src\n\t" 9672 "DSUBp $dst,ST" %} 9673 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9674 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9675 OpcP, RegOpc(dst) ); 9676 ins_pipe( fpu_reg_mem ); 9677 %} 9678 9679 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9680 predicate (UseSSE<=1); 9681 match(Set dst (AbsD src)); 9682 ins_cost(100); 9683 format %{ "FABS" %} 9684 opcode(0xE1, 0xD9); 9685 ins_encode( OpcS, OpcP ); 9686 ins_pipe( fpu_reg_reg ); 9687 %} 9688 9689 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9690 predicate(UseSSE<=1); 9691 match(Set dst (NegD src)); 9692 ins_cost(100); 9693 format %{ "FCHS" %} 9694 opcode(0xE0, 0xD9); 9695 ins_encode( OpcS, OpcP ); 9696 ins_pipe( fpu_reg_reg ); 9697 %} 9698 9699 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9700 predicate(UseSSE<=1); 9701 match(Set dst (AddD dst src)); 9702 format %{ "FLD $src\n\t" 9703 "DADD $dst,ST" %} 9704 size(4); 9705 ins_cost(150); 9706 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9707 ins_encode( Push_Reg_DPR(src), 9708 OpcP, RegOpc(dst) ); 9709 ins_pipe( fpu_reg_reg ); 9710 %} 9711 9712 9713 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9714 predicate(UseSSE<=1); 9715 match(Set dst (RoundDouble (AddD src1 src2))); 9716 ins_cost(250); 9717 9718 format %{ "FLD $src2\n\t" 9719 "DADD ST,$src1\n\t" 9720 "FSTP_D $dst\t# D-round" %} 9721 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9722 ins_encode( Push_Reg_DPR(src2), 9723 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9724 ins_pipe( fpu_mem_reg_reg ); 9725 %} 9726 9727 9728 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9729 predicate(UseSSE<=1); 9730 match(Set dst (AddD dst (LoadD src))); 9731 ins_cost(150); 9732 9733 format %{ "FLD $src\n\t" 9734 "DADDp $dst,ST" %} 9735 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9736 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9737 OpcP, RegOpc(dst) ); 9738 ins_pipe( fpu_reg_mem ); 9739 %} 9740 9741 // add-to-memory 9742 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9743 predicate(UseSSE<=1); 9744 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9745 ins_cost(150); 9746 9747 format %{ "FLD_D $dst\n\t" 9748 "DADD ST,$src\n\t" 9749 "FST_D $dst" %} 9750 opcode(0xDD, 0x0); 9751 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9752 Opcode(0xD8), RegOpc(src), 9753 set_instruction_start, 9754 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9755 ins_pipe( fpu_reg_mem ); 9756 %} 9757 9758 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9759 predicate(UseSSE<=1); 9760 match(Set dst (AddD dst con)); 9761 ins_cost(125); 9762 format %{ "FLD1\n\t" 9763 "DADDp $dst,ST" %} 9764 ins_encode %{ 9765 __ fld1(); 9766 __ faddp($dst$$reg); 9767 %} 9768 ins_pipe(fpu_reg); 9769 %} 9770 9771 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9772 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9773 match(Set dst (AddD dst con)); 9774 ins_cost(200); 9775 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9776 "DADDp $dst,ST" %} 9777 ins_encode %{ 9778 __ fld_d($constantaddress($con)); 9779 __ faddp($dst$$reg); 9780 %} 9781 ins_pipe(fpu_reg_mem); 9782 %} 9783 9784 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9785 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9786 match(Set dst (RoundDouble (AddD src con))); 9787 ins_cost(200); 9788 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9789 "DADD ST,$src\n\t" 9790 "FSTP_D $dst\t# D-round" %} 9791 ins_encode %{ 9792 __ fld_d($constantaddress($con)); 9793 __ fadd($src$$reg); 9794 __ fstp_d(Address(rsp, $dst$$disp)); 9795 %} 9796 ins_pipe(fpu_mem_reg_con); 9797 %} 9798 9799 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9800 predicate(UseSSE<=1); 9801 match(Set dst (MulD dst src)); 9802 format %{ "FLD $src\n\t" 9803 "DMULp $dst,ST" %} 9804 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9805 ins_cost(150); 9806 ins_encode( Push_Reg_DPR(src), 9807 OpcP, RegOpc(dst) ); 9808 ins_pipe( fpu_reg_reg ); 9809 %} 9810 9811 // Strict FP instruction biases argument before multiply then 9812 // biases result to avoid double rounding of subnormals. 9813 // 9814 // scale arg1 by multiplying arg1 by 2^(-15360) 9815 // load arg2 9816 // multiply scaled arg1 by arg2 9817 // rescale product by 2^(15360) 9818 // 9819 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9820 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9821 match(Set dst (MulD dst src)); 9822 ins_cost(1); // Select this instruction for all FP double multiplies 9823 9824 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9825 "DMULp $dst,ST\n\t" 9826 "FLD $src\n\t" 9827 "DMULp $dst,ST\n\t" 9828 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9829 "DMULp $dst,ST\n\t" %} 9830 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9831 ins_encode( strictfp_bias1(dst), 9832 Push_Reg_DPR(src), 9833 OpcP, RegOpc(dst), 9834 strictfp_bias2(dst) ); 9835 ins_pipe( fpu_reg_reg ); 9836 %} 9837 9838 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9839 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9840 match(Set dst (MulD dst con)); 9841 ins_cost(200); 9842 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9843 "DMULp $dst,ST" %} 9844 ins_encode %{ 9845 __ fld_d($constantaddress($con)); 9846 __ fmulp($dst$$reg); 9847 %} 9848 ins_pipe(fpu_reg_mem); 9849 %} 9850 9851 9852 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9853 predicate( UseSSE<=1 ); 9854 match(Set dst (MulD dst (LoadD src))); 9855 ins_cost(200); 9856 format %{ "FLD_D $src\n\t" 9857 "DMULp $dst,ST" %} 9858 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9859 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9860 OpcP, RegOpc(dst) ); 9861 ins_pipe( fpu_reg_mem ); 9862 %} 9863 9864 // 9865 // Cisc-alternate to reg-reg multiply 9866 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9867 predicate( UseSSE<=1 ); 9868 match(Set dst (MulD src (LoadD mem))); 9869 ins_cost(250); 9870 format %{ "FLD_D $mem\n\t" 9871 "DMUL ST,$src\n\t" 9872 "FSTP_D $dst" %} 9873 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9874 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9875 OpcReg_FPR(src), 9876 Pop_Reg_DPR(dst) ); 9877 ins_pipe( fpu_reg_reg_mem ); 9878 %} 9879 9880 9881 // MACRO3 -- addDPR a mulDPR 9882 // This instruction is a '2-address' instruction in that the result goes 9883 // back to src2. This eliminates a move from the macro; possibly the 9884 // register allocator will have to add it back (and maybe not). 9885 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9886 predicate( UseSSE<=1 ); 9887 match(Set src2 (AddD (MulD src0 src1) src2)); 9888 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9889 "DMUL ST,$src1\n\t" 9890 "DADDp $src2,ST" %} 9891 ins_cost(250); 9892 opcode(0xDD); /* LoadD DD /0 */ 9893 ins_encode( Push_Reg_FPR(src0), 9894 FMul_ST_reg(src1), 9895 FAddP_reg_ST(src2) ); 9896 ins_pipe( fpu_reg_reg_reg ); 9897 %} 9898 9899 9900 // MACRO3 -- subDPR a mulDPR 9901 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9902 predicate( UseSSE<=1 ); 9903 match(Set src2 (SubD (MulD src0 src1) src2)); 9904 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9905 "DMUL ST,$src1\n\t" 9906 "DSUBRp $src2,ST" %} 9907 ins_cost(250); 9908 ins_encode( Push_Reg_FPR(src0), 9909 FMul_ST_reg(src1), 9910 Opcode(0xDE), Opc_plus(0xE0,src2)); 9911 ins_pipe( fpu_reg_reg_reg ); 9912 %} 9913 9914 9915 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9916 predicate( UseSSE<=1 ); 9917 match(Set dst (DivD dst src)); 9918 9919 format %{ "FLD $src\n\t" 9920 "FDIVp $dst,ST" %} 9921 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9922 ins_cost(150); 9923 ins_encode( Push_Reg_DPR(src), 9924 OpcP, RegOpc(dst) ); 9925 ins_pipe( fpu_reg_reg ); 9926 %} 9927 9928 // Strict FP instruction biases argument before division then 9929 // biases result, to avoid double rounding of subnormals. 9930 // 9931 // scale dividend by multiplying dividend by 2^(-15360) 9932 // load divisor 9933 // divide scaled dividend by divisor 9934 // rescale quotient by 2^(15360) 9935 // 9936 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9937 predicate (UseSSE<=1); 9938 match(Set dst (DivD dst src)); 9939 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9940 ins_cost(01); 9941 9942 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9943 "DMULp $dst,ST\n\t" 9944 "FLD $src\n\t" 9945 "FDIVp $dst,ST\n\t" 9946 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9947 "DMULp $dst,ST\n\t" %} 9948 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9949 ins_encode( strictfp_bias1(dst), 9950 Push_Reg_DPR(src), 9951 OpcP, RegOpc(dst), 9952 strictfp_bias2(dst) ); 9953 ins_pipe( fpu_reg_reg ); 9954 %} 9955 9956 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9957 predicate(UseSSE<=1); 9958 match(Set dst (ModD dst src)); 9959 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9960 9961 format %{ "DMOD $dst,$src" %} 9962 ins_cost(250); 9963 ins_encode(Push_Reg_Mod_DPR(dst, src), 9964 emitModDPR(), 9965 Push_Result_Mod_DPR(src), 9966 Pop_Reg_DPR(dst)); 9967 ins_pipe( pipe_slow ); 9968 %} 9969 9970 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9971 predicate(UseSSE>=2); 9972 match(Set dst (ModD src0 src1)); 9973 effect(KILL rax, KILL cr); 9974 9975 format %{ "SUB ESP,8\t # DMOD\n" 9976 "\tMOVSD [ESP+0],$src1\n" 9977 "\tFLD_D [ESP+0]\n" 9978 "\tMOVSD [ESP+0],$src0\n" 9979 "\tFLD_D [ESP+0]\n" 9980 "loop:\tFPREM\n" 9981 "\tFWAIT\n" 9982 "\tFNSTSW AX\n" 9983 "\tSAHF\n" 9984 "\tJP loop\n" 9985 "\tFSTP_D [ESP+0]\n" 9986 "\tMOVSD $dst,[ESP+0]\n" 9987 "\tADD ESP,8\n" 9988 "\tFSTP ST0\t # Restore FPU Stack" 9989 %} 9990 ins_cost(250); 9991 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9992 ins_pipe( pipe_slow ); 9993 %} 9994 9995 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9996 predicate (UseSSE<=1); 9997 match(Set dst(AtanD dst src)); 9998 format %{ "DATA $dst,$src" %} 9999 opcode(0xD9, 0xF3); 10000 ins_encode( Push_Reg_DPR(src), 10001 OpcP, OpcS, RegOpc(dst) ); 10002 ins_pipe( pipe_slow ); 10003 %} 10004 10005 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10006 predicate (UseSSE>=2); 10007 match(Set dst(AtanD dst src)); 10008 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10009 format %{ "DATA $dst,$src" %} 10010 opcode(0xD9, 0xF3); 10011 ins_encode( Push_SrcD(src), 10012 OpcP, OpcS, Push_ResultD(dst) ); 10013 ins_pipe( pipe_slow ); 10014 %} 10015 10016 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10017 predicate (UseSSE<=1); 10018 match(Set dst (SqrtD src)); 10019 format %{ "DSQRT $dst,$src" %} 10020 opcode(0xFA, 0xD9); 10021 ins_encode( Push_Reg_DPR(src), 10022 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 //-------------Float Instructions------------------------------- 10027 // Float Math 10028 10029 // Code for float compare: 10030 // fcompp(); 10031 // fwait(); fnstsw_ax(); 10032 // sahf(); 10033 // movl(dst, unordered_result); 10034 // jcc(Assembler::parity, exit); 10035 // movl(dst, less_result); 10036 // jcc(Assembler::below, exit); 10037 // movl(dst, equal_result); 10038 // jcc(Assembler::equal, exit); 10039 // movl(dst, greater_result); 10040 // exit: 10041 10042 // P6 version of float compare, sets condition codes in EFLAGS 10043 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10044 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10045 match(Set cr (CmpF src1 src2)); 10046 effect(KILL rax); 10047 ins_cost(150); 10048 format %{ "FLD $src1\n\t" 10049 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10050 "JNP exit\n\t" 10051 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10052 "SAHF\n" 10053 "exit:\tNOP // avoid branch to branch" %} 10054 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10055 ins_encode( Push_Reg_DPR(src1), 10056 OpcP, RegOpc(src2), 10057 cmpF_P6_fixup ); 10058 ins_pipe( pipe_slow ); 10059 %} 10060 10061 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10062 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10063 match(Set cr (CmpF src1 src2)); 10064 ins_cost(100); 10065 format %{ "FLD $src1\n\t" 10066 "FUCOMIP ST,$src2 // P6 instruction" %} 10067 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10068 ins_encode( Push_Reg_DPR(src1), 10069 OpcP, RegOpc(src2)); 10070 ins_pipe( pipe_slow ); 10071 %} 10072 10073 10074 // Compare & branch 10075 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10076 predicate(UseSSE == 0); 10077 match(Set cr (CmpF src1 src2)); 10078 effect(KILL rax); 10079 ins_cost(200); 10080 format %{ "FLD $src1\n\t" 10081 "FCOMp $src2\n\t" 10082 "FNSTSW AX\n\t" 10083 "TEST AX,0x400\n\t" 10084 "JZ,s flags\n\t" 10085 "MOV AH,1\t# unordered treat as LT\n" 10086 "flags:\tSAHF" %} 10087 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10088 ins_encode( Push_Reg_DPR(src1), 10089 OpcP, RegOpc(src2), 10090 fpu_flags); 10091 ins_pipe( pipe_slow ); 10092 %} 10093 10094 // Compare vs zero into -1,0,1 10095 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10096 predicate(UseSSE == 0); 10097 match(Set dst (CmpF3 src1 zero)); 10098 effect(KILL cr, KILL rax); 10099 ins_cost(280); 10100 format %{ "FTSTF $dst,$src1" %} 10101 opcode(0xE4, 0xD9); 10102 ins_encode( Push_Reg_DPR(src1), 10103 OpcS, OpcP, PopFPU, 10104 CmpF_Result(dst)); 10105 ins_pipe( pipe_slow ); 10106 %} 10107 10108 // Compare into -1,0,1 10109 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10110 predicate(UseSSE == 0); 10111 match(Set dst (CmpF3 src1 src2)); 10112 effect(KILL cr, KILL rax); 10113 ins_cost(300); 10114 format %{ "FCMPF $dst,$src1,$src2" %} 10115 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10116 ins_encode( Push_Reg_DPR(src1), 10117 OpcP, RegOpc(src2), 10118 CmpF_Result(dst)); 10119 ins_pipe( pipe_slow ); 10120 %} 10121 10122 // float compare and set condition codes in EFLAGS by XMM regs 10123 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10124 predicate(UseSSE>=1); 10125 match(Set cr (CmpF src1 src2)); 10126 ins_cost(145); 10127 format %{ "UCOMISS $src1,$src2\n\t" 10128 "JNP,s exit\n\t" 10129 "PUSHF\t# saw NaN, set CF\n\t" 10130 "AND [rsp], #0xffffff2b\n\t" 10131 "POPF\n" 10132 "exit:" %} 10133 ins_encode %{ 10134 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10135 emit_cmpfp_fixup(_masm); 10136 %} 10137 ins_pipe( pipe_slow ); 10138 %} 10139 10140 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10141 predicate(UseSSE>=1); 10142 match(Set cr (CmpF src1 src2)); 10143 ins_cost(100); 10144 format %{ "UCOMISS $src1,$src2" %} 10145 ins_encode %{ 10146 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10147 %} 10148 ins_pipe( pipe_slow ); 10149 %} 10150 10151 // float compare and set condition codes in EFLAGS by XMM regs 10152 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10153 predicate(UseSSE>=1); 10154 match(Set cr (CmpF src1 (LoadF src2))); 10155 ins_cost(165); 10156 format %{ "UCOMISS $src1,$src2\n\t" 10157 "JNP,s exit\n\t" 10158 "PUSHF\t# saw NaN, set CF\n\t" 10159 "AND [rsp], #0xffffff2b\n\t" 10160 "POPF\n" 10161 "exit:" %} 10162 ins_encode %{ 10163 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10164 emit_cmpfp_fixup(_masm); 10165 %} 10166 ins_pipe( pipe_slow ); 10167 %} 10168 10169 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10170 predicate(UseSSE>=1); 10171 match(Set cr (CmpF src1 (LoadF src2))); 10172 ins_cost(100); 10173 format %{ "UCOMISS $src1,$src2" %} 10174 ins_encode %{ 10175 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10176 %} 10177 ins_pipe( pipe_slow ); 10178 %} 10179 10180 // Compare into -1,0,1 in XMM 10181 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10182 predicate(UseSSE>=1); 10183 match(Set dst (CmpF3 src1 src2)); 10184 effect(KILL cr); 10185 ins_cost(255); 10186 format %{ "UCOMISS $src1, $src2\n\t" 10187 "MOV $dst, #-1\n\t" 10188 "JP,s done\n\t" 10189 "JB,s done\n\t" 10190 "SETNE $dst\n\t" 10191 "MOVZB $dst, $dst\n" 10192 "done:" %} 10193 ins_encode %{ 10194 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10195 emit_cmpfp3(_masm, $dst$$Register); 10196 %} 10197 ins_pipe( pipe_slow ); 10198 %} 10199 10200 // Compare into -1,0,1 in XMM and memory 10201 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10202 predicate(UseSSE>=1); 10203 match(Set dst (CmpF3 src1 (LoadF src2))); 10204 effect(KILL cr); 10205 ins_cost(275); 10206 format %{ "UCOMISS $src1, $src2\n\t" 10207 "MOV $dst, #-1\n\t" 10208 "JP,s done\n\t" 10209 "JB,s done\n\t" 10210 "SETNE $dst\n\t" 10211 "MOVZB $dst, $dst\n" 10212 "done:" %} 10213 ins_encode %{ 10214 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10215 emit_cmpfp3(_masm, $dst$$Register); 10216 %} 10217 ins_pipe( pipe_slow ); 10218 %} 10219 10220 // Spill to obtain 24-bit precision 10221 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10222 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10223 match(Set dst (SubF src1 src2)); 10224 10225 format %{ "FSUB $dst,$src1 - $src2" %} 10226 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10227 ins_encode( Push_Reg_FPR(src1), 10228 OpcReg_FPR(src2), 10229 Pop_Mem_FPR(dst) ); 10230 ins_pipe( fpu_mem_reg_reg ); 10231 %} 10232 // 10233 // This instruction does not round to 24-bits 10234 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10235 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10236 match(Set dst (SubF dst src)); 10237 10238 format %{ "FSUB $dst,$src" %} 10239 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10240 ins_encode( Push_Reg_FPR(src), 10241 OpcP, RegOpc(dst) ); 10242 ins_pipe( fpu_reg_reg ); 10243 %} 10244 10245 // Spill to obtain 24-bit precision 10246 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10247 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10248 match(Set dst (AddF src1 src2)); 10249 10250 format %{ "FADD $dst,$src1,$src2" %} 10251 opcode(0xD8, 0x0); /* D8 C0+i */ 10252 ins_encode( Push_Reg_FPR(src2), 10253 OpcReg_FPR(src1), 10254 Pop_Mem_FPR(dst) ); 10255 ins_pipe( fpu_mem_reg_reg ); 10256 %} 10257 // 10258 // This instruction does not round to 24-bits 10259 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10260 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10261 match(Set dst (AddF dst src)); 10262 10263 format %{ "FLD $src\n\t" 10264 "FADDp $dst,ST" %} 10265 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10266 ins_encode( Push_Reg_FPR(src), 10267 OpcP, RegOpc(dst) ); 10268 ins_pipe( fpu_reg_reg ); 10269 %} 10270 10271 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10272 predicate(UseSSE==0); 10273 match(Set dst (AbsF src)); 10274 ins_cost(100); 10275 format %{ "FABS" %} 10276 opcode(0xE1, 0xD9); 10277 ins_encode( OpcS, OpcP ); 10278 ins_pipe( fpu_reg_reg ); 10279 %} 10280 10281 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10282 predicate(UseSSE==0); 10283 match(Set dst (NegF src)); 10284 ins_cost(100); 10285 format %{ "FCHS" %} 10286 opcode(0xE0, 0xD9); 10287 ins_encode( OpcS, OpcP ); 10288 ins_pipe( fpu_reg_reg ); 10289 %} 10290 10291 // Cisc-alternate to addFPR_reg 10292 // Spill to obtain 24-bit precision 10293 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10294 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10295 match(Set dst (AddF src1 (LoadF src2))); 10296 10297 format %{ "FLD $src2\n\t" 10298 "FADD ST,$src1\n\t" 10299 "FSTP_S $dst" %} 10300 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10301 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10302 OpcReg_FPR(src1), 10303 Pop_Mem_FPR(dst) ); 10304 ins_pipe( fpu_mem_reg_mem ); 10305 %} 10306 // 10307 // Cisc-alternate to addFPR_reg 10308 // This instruction does not round to 24-bits 10309 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10310 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10311 match(Set dst (AddF dst (LoadF src))); 10312 10313 format %{ "FADD $dst,$src" %} 10314 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10315 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10316 OpcP, RegOpc(dst) ); 10317 ins_pipe( fpu_reg_mem ); 10318 %} 10319 10320 // // Following two instructions for _222_mpegaudio 10321 // Spill to obtain 24-bit precision 10322 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10323 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10324 match(Set dst (AddF src1 src2)); 10325 10326 format %{ "FADD $dst,$src1,$src2" %} 10327 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10328 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10329 OpcReg_FPR(src2), 10330 Pop_Mem_FPR(dst) ); 10331 ins_pipe( fpu_mem_reg_mem ); 10332 %} 10333 10334 // Cisc-spill variant 10335 // Spill to obtain 24-bit precision 10336 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10337 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10338 match(Set dst (AddF src1 (LoadF src2))); 10339 10340 format %{ "FADD $dst,$src1,$src2 cisc" %} 10341 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10342 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10343 set_instruction_start, 10344 OpcP, RMopc_Mem(secondary,src1), 10345 Pop_Mem_FPR(dst) ); 10346 ins_pipe( fpu_mem_mem_mem ); 10347 %} 10348 10349 // Spill to obtain 24-bit precision 10350 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10351 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10352 match(Set dst (AddF src1 src2)); 10353 10354 format %{ "FADD $dst,$src1,$src2" %} 10355 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10356 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10357 set_instruction_start, 10358 OpcP, RMopc_Mem(secondary,src1), 10359 Pop_Mem_FPR(dst) ); 10360 ins_pipe( fpu_mem_mem_mem ); 10361 %} 10362 10363 10364 // Spill to obtain 24-bit precision 10365 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10366 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10367 match(Set dst (AddF src con)); 10368 format %{ "FLD $src\n\t" 10369 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10370 "FSTP_S $dst" %} 10371 ins_encode %{ 10372 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10373 __ fadd_s($constantaddress($con)); 10374 __ fstp_s(Address(rsp, $dst$$disp)); 10375 %} 10376 ins_pipe(fpu_mem_reg_con); 10377 %} 10378 // 10379 // This instruction does not round to 24-bits 10380 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10381 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10382 match(Set dst (AddF src con)); 10383 format %{ "FLD $src\n\t" 10384 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10385 "FSTP $dst" %} 10386 ins_encode %{ 10387 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10388 __ fadd_s($constantaddress($con)); 10389 __ fstp_d($dst$$reg); 10390 %} 10391 ins_pipe(fpu_reg_reg_con); 10392 %} 10393 10394 // Spill to obtain 24-bit precision 10395 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10396 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10397 match(Set dst (MulF src1 src2)); 10398 10399 format %{ "FLD $src1\n\t" 10400 "FMUL $src2\n\t" 10401 "FSTP_S $dst" %} 10402 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10403 ins_encode( Push_Reg_FPR(src1), 10404 OpcReg_FPR(src2), 10405 Pop_Mem_FPR(dst) ); 10406 ins_pipe( fpu_mem_reg_reg ); 10407 %} 10408 // 10409 // This instruction does not round to 24-bits 10410 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10411 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10412 match(Set dst (MulF src1 src2)); 10413 10414 format %{ "FLD $src1\n\t" 10415 "FMUL $src2\n\t" 10416 "FSTP_S $dst" %} 10417 opcode(0xD8, 0x1); /* D8 C8+i */ 10418 ins_encode( Push_Reg_FPR(src2), 10419 OpcReg_FPR(src1), 10420 Pop_Reg_FPR(dst) ); 10421 ins_pipe( fpu_reg_reg_reg ); 10422 %} 10423 10424 10425 // Spill to obtain 24-bit precision 10426 // Cisc-alternate to reg-reg multiply 10427 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10428 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10429 match(Set dst (MulF src1 (LoadF src2))); 10430 10431 format %{ "FLD_S $src2\n\t" 10432 "FMUL $src1\n\t" 10433 "FSTP_S $dst" %} 10434 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10435 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10436 OpcReg_FPR(src1), 10437 Pop_Mem_FPR(dst) ); 10438 ins_pipe( fpu_mem_reg_mem ); 10439 %} 10440 // 10441 // This instruction does not round to 24-bits 10442 // Cisc-alternate to reg-reg multiply 10443 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10444 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10445 match(Set dst (MulF src1 (LoadF src2))); 10446 10447 format %{ "FMUL $dst,$src1,$src2" %} 10448 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10449 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10450 OpcReg_FPR(src1), 10451 Pop_Reg_FPR(dst) ); 10452 ins_pipe( fpu_reg_reg_mem ); 10453 %} 10454 10455 // Spill to obtain 24-bit precision 10456 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10457 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10458 match(Set dst (MulF src1 src2)); 10459 10460 format %{ "FMUL $dst,$src1,$src2" %} 10461 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10462 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10463 set_instruction_start, 10464 OpcP, RMopc_Mem(secondary,src1), 10465 Pop_Mem_FPR(dst) ); 10466 ins_pipe( fpu_mem_mem_mem ); 10467 %} 10468 10469 // Spill to obtain 24-bit precision 10470 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10471 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10472 match(Set dst (MulF src con)); 10473 10474 format %{ "FLD $src\n\t" 10475 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10476 "FSTP_S $dst" %} 10477 ins_encode %{ 10478 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10479 __ fmul_s($constantaddress($con)); 10480 __ fstp_s(Address(rsp, $dst$$disp)); 10481 %} 10482 ins_pipe(fpu_mem_reg_con); 10483 %} 10484 // 10485 // This instruction does not round to 24-bits 10486 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10487 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10488 match(Set dst (MulF src con)); 10489 10490 format %{ "FLD $src\n\t" 10491 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10492 "FSTP $dst" %} 10493 ins_encode %{ 10494 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10495 __ fmul_s($constantaddress($con)); 10496 __ fstp_d($dst$$reg); 10497 %} 10498 ins_pipe(fpu_reg_reg_con); 10499 %} 10500 10501 10502 // 10503 // MACRO1 -- subsume unshared load into mulFPR 10504 // This instruction does not round to 24-bits 10505 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10506 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10507 match(Set dst (MulF (LoadF mem1) src)); 10508 10509 format %{ "FLD $mem1 ===MACRO1===\n\t" 10510 "FMUL ST,$src\n\t" 10511 "FSTP $dst" %} 10512 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10513 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10514 OpcReg_FPR(src), 10515 Pop_Reg_FPR(dst) ); 10516 ins_pipe( fpu_reg_reg_mem ); 10517 %} 10518 // 10519 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10520 // This instruction does not round to 24-bits 10521 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10522 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10523 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10524 ins_cost(95); 10525 10526 format %{ "FLD $mem1 ===MACRO2===\n\t" 10527 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10528 "FADD ST,$src2\n\t" 10529 "FSTP $dst" %} 10530 opcode(0xD9); /* LoadF D9 /0 */ 10531 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10532 FMul_ST_reg(src1), 10533 FAdd_ST_reg(src2), 10534 Pop_Reg_FPR(dst) ); 10535 ins_pipe( fpu_reg_mem_reg_reg ); 10536 %} 10537 10538 // MACRO3 -- addFPR a mulFPR 10539 // This instruction does not round to 24-bits. It is a '2-address' 10540 // instruction in that the result goes back to src2. This eliminates 10541 // a move from the macro; possibly the register allocator will have 10542 // to add it back (and maybe not). 10543 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10544 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10545 match(Set src2 (AddF (MulF src0 src1) src2)); 10546 10547 format %{ "FLD $src0 ===MACRO3===\n\t" 10548 "FMUL ST,$src1\n\t" 10549 "FADDP $src2,ST" %} 10550 opcode(0xD9); /* LoadF D9 /0 */ 10551 ins_encode( Push_Reg_FPR(src0), 10552 FMul_ST_reg(src1), 10553 FAddP_reg_ST(src2) ); 10554 ins_pipe( fpu_reg_reg_reg ); 10555 %} 10556 10557 // MACRO4 -- divFPR subFPR 10558 // This instruction does not round to 24-bits 10559 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10560 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10561 match(Set dst (DivF (SubF src2 src1) src3)); 10562 10563 format %{ "FLD $src2 ===MACRO4===\n\t" 10564 "FSUB ST,$src1\n\t" 10565 "FDIV ST,$src3\n\t" 10566 "FSTP $dst" %} 10567 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10568 ins_encode( Push_Reg_FPR(src2), 10569 subFPR_divFPR_encode(src1,src3), 10570 Pop_Reg_FPR(dst) ); 10571 ins_pipe( fpu_reg_reg_reg_reg ); 10572 %} 10573 10574 // Spill to obtain 24-bit precision 10575 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10576 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10577 match(Set dst (DivF src1 src2)); 10578 10579 format %{ "FDIV $dst,$src1,$src2" %} 10580 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10581 ins_encode( Push_Reg_FPR(src1), 10582 OpcReg_FPR(src2), 10583 Pop_Mem_FPR(dst) ); 10584 ins_pipe( fpu_mem_reg_reg ); 10585 %} 10586 // 10587 // This instruction does not round to 24-bits 10588 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10589 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10590 match(Set dst (DivF dst src)); 10591 10592 format %{ "FDIV $dst,$src" %} 10593 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10594 ins_encode( Push_Reg_FPR(src), 10595 OpcP, RegOpc(dst) ); 10596 ins_pipe( fpu_reg_reg ); 10597 %} 10598 10599 10600 // Spill to obtain 24-bit precision 10601 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10602 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10603 match(Set dst (ModF src1 src2)); 10604 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10605 10606 format %{ "FMOD $dst,$src1,$src2" %} 10607 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10608 emitModDPR(), 10609 Push_Result_Mod_DPR(src2), 10610 Pop_Mem_FPR(dst)); 10611 ins_pipe( pipe_slow ); 10612 %} 10613 // 10614 // This instruction does not round to 24-bits 10615 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10616 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10617 match(Set dst (ModF dst src)); 10618 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10619 10620 format %{ "FMOD $dst,$src" %} 10621 ins_encode(Push_Reg_Mod_DPR(dst, src), 10622 emitModDPR(), 10623 Push_Result_Mod_DPR(src), 10624 Pop_Reg_FPR(dst)); 10625 ins_pipe( pipe_slow ); 10626 %} 10627 10628 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10629 predicate(UseSSE>=1); 10630 match(Set dst (ModF src0 src1)); 10631 effect(KILL rax, KILL cr); 10632 format %{ "SUB ESP,4\t # FMOD\n" 10633 "\tMOVSS [ESP+0],$src1\n" 10634 "\tFLD_S [ESP+0]\n" 10635 "\tMOVSS [ESP+0],$src0\n" 10636 "\tFLD_S [ESP+0]\n" 10637 "loop:\tFPREM\n" 10638 "\tFWAIT\n" 10639 "\tFNSTSW AX\n" 10640 "\tSAHF\n" 10641 "\tJP loop\n" 10642 "\tFSTP_S [ESP+0]\n" 10643 "\tMOVSS $dst,[ESP+0]\n" 10644 "\tADD ESP,4\n" 10645 "\tFSTP ST0\t # Restore FPU Stack" 10646 %} 10647 ins_cost(250); 10648 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10649 ins_pipe( pipe_slow ); 10650 %} 10651 10652 10653 //----------Arithmetic Conversion Instructions--------------------------------- 10654 // The conversions operations are all Alpha sorted. Please keep it that way! 10655 10656 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10657 predicate(UseSSE==0); 10658 match(Set dst (RoundFloat src)); 10659 ins_cost(125); 10660 format %{ "FST_S $dst,$src\t# F-round" %} 10661 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10662 ins_pipe( fpu_mem_reg ); 10663 %} 10664 10665 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10666 predicate(UseSSE<=1); 10667 match(Set dst (RoundDouble src)); 10668 ins_cost(125); 10669 format %{ "FST_D $dst,$src\t# D-round" %} 10670 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10671 ins_pipe( fpu_mem_reg ); 10672 %} 10673 10674 // Force rounding to 24-bit precision and 6-bit exponent 10675 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10676 predicate(UseSSE==0); 10677 match(Set dst (ConvD2F src)); 10678 format %{ "FST_S $dst,$src\t# F-round" %} 10679 expand %{ 10680 roundFloat_mem_reg(dst,src); 10681 %} 10682 %} 10683 10684 // Force rounding to 24-bit precision and 6-bit exponent 10685 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10686 predicate(UseSSE==1); 10687 match(Set dst (ConvD2F src)); 10688 effect( KILL cr ); 10689 format %{ "SUB ESP,4\n\t" 10690 "FST_S [ESP],$src\t# F-round\n\t" 10691 "MOVSS $dst,[ESP]\n\t" 10692 "ADD ESP,4" %} 10693 ins_encode %{ 10694 __ subptr(rsp, 4); 10695 if ($src$$reg != FPR1L_enc) { 10696 __ fld_s($src$$reg-1); 10697 __ fstp_s(Address(rsp, 0)); 10698 } else { 10699 __ fst_s(Address(rsp, 0)); 10700 } 10701 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10702 __ addptr(rsp, 4); 10703 %} 10704 ins_pipe( pipe_slow ); 10705 %} 10706 10707 // Force rounding double precision to single precision 10708 instruct convD2F_reg(regF dst, regD src) %{ 10709 predicate(UseSSE>=2); 10710 match(Set dst (ConvD2F src)); 10711 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10712 ins_encode %{ 10713 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10714 %} 10715 ins_pipe( pipe_slow ); 10716 %} 10717 10718 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10719 predicate(UseSSE==0); 10720 match(Set dst (ConvF2D src)); 10721 format %{ "FST_S $dst,$src\t# D-round" %} 10722 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10723 ins_pipe( fpu_reg_reg ); 10724 %} 10725 10726 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10727 predicate(UseSSE==1); 10728 match(Set dst (ConvF2D src)); 10729 format %{ "FST_D $dst,$src\t# D-round" %} 10730 expand %{ 10731 roundDouble_mem_reg(dst,src); 10732 %} 10733 %} 10734 10735 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10736 predicate(UseSSE==1); 10737 match(Set dst (ConvF2D src)); 10738 effect( KILL cr ); 10739 format %{ "SUB ESP,4\n\t" 10740 "MOVSS [ESP] $src\n\t" 10741 "FLD_S [ESP]\n\t" 10742 "ADD ESP,4\n\t" 10743 "FSTP $dst\t# D-round" %} 10744 ins_encode %{ 10745 __ subptr(rsp, 4); 10746 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10747 __ fld_s(Address(rsp, 0)); 10748 __ addptr(rsp, 4); 10749 __ fstp_d($dst$$reg); 10750 %} 10751 ins_pipe( pipe_slow ); 10752 %} 10753 10754 instruct convF2D_reg(regD dst, regF src) %{ 10755 predicate(UseSSE>=2); 10756 match(Set dst (ConvF2D src)); 10757 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10758 ins_encode %{ 10759 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10760 %} 10761 ins_pipe( pipe_slow ); 10762 %} 10763 10764 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10765 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10766 predicate(UseSSE<=1); 10767 match(Set dst (ConvD2I src)); 10768 effect( KILL tmp, KILL cr ); 10769 format %{ "FLD $src\t# Convert double to int \n\t" 10770 "FLDCW trunc mode\n\t" 10771 "SUB ESP,4\n\t" 10772 "FISTp [ESP + #0]\n\t" 10773 "FLDCW std/24-bit mode\n\t" 10774 "POP EAX\n\t" 10775 "CMP EAX,0x80000000\n\t" 10776 "JNE,s fast\n\t" 10777 "FLD_D $src\n\t" 10778 "CALL d2i_wrapper\n" 10779 "fast:" %} 10780 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10781 ins_pipe( pipe_slow ); 10782 %} 10783 10784 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10785 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10786 predicate(UseSSE>=2); 10787 match(Set dst (ConvD2I src)); 10788 effect( KILL tmp, KILL cr ); 10789 format %{ "CVTTSD2SI $dst, $src\n\t" 10790 "CMP $dst,0x80000000\n\t" 10791 "JNE,s fast\n\t" 10792 "SUB ESP, 8\n\t" 10793 "MOVSD [ESP], $src\n\t" 10794 "FLD_D [ESP]\n\t" 10795 "ADD ESP, 8\n\t" 10796 "CALL d2i_wrapper\n" 10797 "fast:" %} 10798 ins_encode %{ 10799 Label fast; 10800 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10801 __ cmpl($dst$$Register, 0x80000000); 10802 __ jccb(Assembler::notEqual, fast); 10803 __ subptr(rsp, 8); 10804 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10805 __ fld_d(Address(rsp, 0)); 10806 __ addptr(rsp, 8); 10807 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10808 __ post_call_nop(); 10809 __ bind(fast); 10810 %} 10811 ins_pipe( pipe_slow ); 10812 %} 10813 10814 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10815 predicate(UseSSE<=1); 10816 match(Set dst (ConvD2L src)); 10817 effect( KILL cr ); 10818 format %{ "FLD $src\t# Convert double to long\n\t" 10819 "FLDCW trunc mode\n\t" 10820 "SUB ESP,8\n\t" 10821 "FISTp [ESP + #0]\n\t" 10822 "FLDCW std/24-bit mode\n\t" 10823 "POP EAX\n\t" 10824 "POP EDX\n\t" 10825 "CMP EDX,0x80000000\n\t" 10826 "JNE,s fast\n\t" 10827 "TEST EAX,EAX\n\t" 10828 "JNE,s fast\n\t" 10829 "FLD $src\n\t" 10830 "CALL d2l_wrapper\n" 10831 "fast:" %} 10832 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10833 ins_pipe( pipe_slow ); 10834 %} 10835 10836 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10837 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10838 predicate (UseSSE>=2); 10839 match(Set dst (ConvD2L src)); 10840 effect( KILL cr ); 10841 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10842 "MOVSD [ESP],$src\n\t" 10843 "FLD_D [ESP]\n\t" 10844 "FLDCW trunc mode\n\t" 10845 "FISTp [ESP + #0]\n\t" 10846 "FLDCW std/24-bit mode\n\t" 10847 "POP EAX\n\t" 10848 "POP EDX\n\t" 10849 "CMP EDX,0x80000000\n\t" 10850 "JNE,s fast\n\t" 10851 "TEST EAX,EAX\n\t" 10852 "JNE,s fast\n\t" 10853 "SUB ESP,8\n\t" 10854 "MOVSD [ESP],$src\n\t" 10855 "FLD_D [ESP]\n\t" 10856 "ADD ESP,8\n\t" 10857 "CALL d2l_wrapper\n" 10858 "fast:" %} 10859 ins_encode %{ 10860 Label fast; 10861 __ subptr(rsp, 8); 10862 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10863 __ fld_d(Address(rsp, 0)); 10864 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10865 __ fistp_d(Address(rsp, 0)); 10866 // Restore the rounding mode, mask the exception 10867 if (Compile::current()->in_24_bit_fp_mode()) { 10868 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10869 } else { 10870 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10871 } 10872 // Load the converted long, adjust CPU stack 10873 __ pop(rax); 10874 __ pop(rdx); 10875 __ cmpl(rdx, 0x80000000); 10876 __ jccb(Assembler::notEqual, fast); 10877 __ testl(rax, rax); 10878 __ jccb(Assembler::notEqual, fast); 10879 __ subptr(rsp, 8); 10880 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10881 __ fld_d(Address(rsp, 0)); 10882 __ addptr(rsp, 8); 10883 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10884 __ post_call_nop(); 10885 __ bind(fast); 10886 %} 10887 ins_pipe( pipe_slow ); 10888 %} 10889 10890 // Convert a double to an int. Java semantics require we do complex 10891 // manglations in the corner cases. So we set the rounding mode to 10892 // 'zero', store the darned double down as an int, and reset the 10893 // rounding mode to 'nearest'. The hardware stores a flag value down 10894 // if we would overflow or converted a NAN; we check for this and 10895 // and go the slow path if needed. 10896 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10897 predicate(UseSSE==0); 10898 match(Set dst (ConvF2I src)); 10899 effect( KILL tmp, KILL cr ); 10900 format %{ "FLD $src\t# Convert float to int \n\t" 10901 "FLDCW trunc mode\n\t" 10902 "SUB ESP,4\n\t" 10903 "FISTp [ESP + #0]\n\t" 10904 "FLDCW std/24-bit mode\n\t" 10905 "POP EAX\n\t" 10906 "CMP EAX,0x80000000\n\t" 10907 "JNE,s fast\n\t" 10908 "FLD $src\n\t" 10909 "CALL d2i_wrapper\n" 10910 "fast:" %} 10911 // DPR2I_encoding works for FPR2I 10912 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10913 ins_pipe( pipe_slow ); 10914 %} 10915 10916 // Convert a float in xmm to an int reg. 10917 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10918 predicate(UseSSE>=1); 10919 match(Set dst (ConvF2I src)); 10920 effect( KILL tmp, KILL cr ); 10921 format %{ "CVTTSS2SI $dst, $src\n\t" 10922 "CMP $dst,0x80000000\n\t" 10923 "JNE,s fast\n\t" 10924 "SUB ESP, 4\n\t" 10925 "MOVSS [ESP], $src\n\t" 10926 "FLD [ESP]\n\t" 10927 "ADD ESP, 4\n\t" 10928 "CALL d2i_wrapper\n" 10929 "fast:" %} 10930 ins_encode %{ 10931 Label fast; 10932 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10933 __ cmpl($dst$$Register, 0x80000000); 10934 __ jccb(Assembler::notEqual, fast); 10935 __ subptr(rsp, 4); 10936 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10937 __ fld_s(Address(rsp, 0)); 10938 __ addptr(rsp, 4); 10939 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10940 __ post_call_nop(); 10941 __ bind(fast); 10942 %} 10943 ins_pipe( pipe_slow ); 10944 %} 10945 10946 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10947 predicate(UseSSE==0); 10948 match(Set dst (ConvF2L src)); 10949 effect( KILL cr ); 10950 format %{ "FLD $src\t# Convert float to long\n\t" 10951 "FLDCW trunc mode\n\t" 10952 "SUB ESP,8\n\t" 10953 "FISTp [ESP + #0]\n\t" 10954 "FLDCW std/24-bit mode\n\t" 10955 "POP EAX\n\t" 10956 "POP EDX\n\t" 10957 "CMP EDX,0x80000000\n\t" 10958 "JNE,s fast\n\t" 10959 "TEST EAX,EAX\n\t" 10960 "JNE,s fast\n\t" 10961 "FLD $src\n\t" 10962 "CALL d2l_wrapper\n" 10963 "fast:" %} 10964 // DPR2L_encoding works for FPR2L 10965 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10966 ins_pipe( pipe_slow ); 10967 %} 10968 10969 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10970 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10971 predicate (UseSSE>=1); 10972 match(Set dst (ConvF2L src)); 10973 effect( KILL cr ); 10974 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10975 "MOVSS [ESP],$src\n\t" 10976 "FLD_S [ESP]\n\t" 10977 "FLDCW trunc mode\n\t" 10978 "FISTp [ESP + #0]\n\t" 10979 "FLDCW std/24-bit mode\n\t" 10980 "POP EAX\n\t" 10981 "POP EDX\n\t" 10982 "CMP EDX,0x80000000\n\t" 10983 "JNE,s fast\n\t" 10984 "TEST EAX,EAX\n\t" 10985 "JNE,s fast\n\t" 10986 "SUB ESP,4\t# Convert float to long\n\t" 10987 "MOVSS [ESP],$src\n\t" 10988 "FLD_S [ESP]\n\t" 10989 "ADD ESP,4\n\t" 10990 "CALL d2l_wrapper\n" 10991 "fast:" %} 10992 ins_encode %{ 10993 Label fast; 10994 __ subptr(rsp, 8); 10995 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10996 __ fld_s(Address(rsp, 0)); 10997 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10998 __ fistp_d(Address(rsp, 0)); 10999 // Restore the rounding mode, mask the exception 11000 if (Compile::current()->in_24_bit_fp_mode()) { 11001 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 11002 } else { 11003 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 11004 } 11005 // Load the converted long, adjust CPU stack 11006 __ pop(rax); 11007 __ pop(rdx); 11008 __ cmpl(rdx, 0x80000000); 11009 __ jccb(Assembler::notEqual, fast); 11010 __ testl(rax, rax); 11011 __ jccb(Assembler::notEqual, fast); 11012 __ subptr(rsp, 4); 11013 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11014 __ fld_s(Address(rsp, 0)); 11015 __ addptr(rsp, 4); 11016 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11017 __ post_call_nop(); 11018 __ bind(fast); 11019 %} 11020 ins_pipe( pipe_slow ); 11021 %} 11022 11023 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11024 predicate( UseSSE<=1 ); 11025 match(Set dst (ConvI2D src)); 11026 format %{ "FILD $src\n\t" 11027 "FSTP $dst" %} 11028 opcode(0xDB, 0x0); /* DB /0 */ 11029 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11030 ins_pipe( fpu_reg_mem ); 11031 %} 11032 11033 instruct convI2D_reg(regD dst, rRegI src) %{ 11034 predicate( UseSSE>=2 && !UseXmmI2D ); 11035 match(Set dst (ConvI2D src)); 11036 format %{ "CVTSI2SD $dst,$src" %} 11037 ins_encode %{ 11038 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11039 %} 11040 ins_pipe( pipe_slow ); 11041 %} 11042 11043 instruct convI2D_mem(regD dst, memory mem) %{ 11044 predicate( UseSSE>=2 ); 11045 match(Set dst (ConvI2D (LoadI mem))); 11046 format %{ "CVTSI2SD $dst,$mem" %} 11047 ins_encode %{ 11048 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11049 %} 11050 ins_pipe( pipe_slow ); 11051 %} 11052 11053 instruct convXI2D_reg(regD dst, rRegI src) 11054 %{ 11055 predicate( UseSSE>=2 && UseXmmI2D ); 11056 match(Set dst (ConvI2D src)); 11057 11058 format %{ "MOVD $dst,$src\n\t" 11059 "CVTDQ2PD $dst,$dst\t# i2d" %} 11060 ins_encode %{ 11061 __ movdl($dst$$XMMRegister, $src$$Register); 11062 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11063 %} 11064 ins_pipe(pipe_slow); // XXX 11065 %} 11066 11067 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11068 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11069 match(Set dst (ConvI2D (LoadI mem))); 11070 format %{ "FILD $mem\n\t" 11071 "FSTP $dst" %} 11072 opcode(0xDB); /* DB /0 */ 11073 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11074 Pop_Reg_DPR(dst)); 11075 ins_pipe( fpu_reg_mem ); 11076 %} 11077 11078 // Convert a byte to a float; no rounding step needed. 11079 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11080 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11081 match(Set dst (ConvI2F src)); 11082 format %{ "FILD $src\n\t" 11083 "FSTP $dst" %} 11084 11085 opcode(0xDB, 0x0); /* DB /0 */ 11086 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11087 ins_pipe( fpu_reg_mem ); 11088 %} 11089 11090 // In 24-bit mode, force exponent rounding by storing back out 11091 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11092 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11093 match(Set dst (ConvI2F src)); 11094 ins_cost(200); 11095 format %{ "FILD $src\n\t" 11096 "FSTP_S $dst" %} 11097 opcode(0xDB, 0x0); /* DB /0 */ 11098 ins_encode( Push_Mem_I(src), 11099 Pop_Mem_FPR(dst)); 11100 ins_pipe( fpu_mem_mem ); 11101 %} 11102 11103 // In 24-bit mode, force exponent rounding by storing back out 11104 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11105 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11106 match(Set dst (ConvI2F (LoadI mem))); 11107 ins_cost(200); 11108 format %{ "FILD $mem\n\t" 11109 "FSTP_S $dst" %} 11110 opcode(0xDB); /* DB /0 */ 11111 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11112 Pop_Mem_FPR(dst)); 11113 ins_pipe( fpu_mem_mem ); 11114 %} 11115 11116 // This instruction does not round to 24-bits 11117 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11118 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11119 match(Set dst (ConvI2F src)); 11120 format %{ "FILD $src\n\t" 11121 "FSTP $dst" %} 11122 opcode(0xDB, 0x0); /* DB /0 */ 11123 ins_encode( Push_Mem_I(src), 11124 Pop_Reg_FPR(dst)); 11125 ins_pipe( fpu_reg_mem ); 11126 %} 11127 11128 // This instruction does not round to 24-bits 11129 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11130 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11131 match(Set dst (ConvI2F (LoadI mem))); 11132 format %{ "FILD $mem\n\t" 11133 "FSTP $dst" %} 11134 opcode(0xDB); /* DB /0 */ 11135 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11136 Pop_Reg_FPR(dst)); 11137 ins_pipe( fpu_reg_mem ); 11138 %} 11139 11140 // Convert an int to a float in xmm; no rounding step needed. 11141 instruct convI2F_reg(regF dst, rRegI src) %{ 11142 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11143 match(Set dst (ConvI2F src)); 11144 format %{ "CVTSI2SS $dst, $src" %} 11145 ins_encode %{ 11146 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11147 %} 11148 ins_pipe( pipe_slow ); 11149 %} 11150 11151 instruct convXI2F_reg(regF dst, rRegI src) 11152 %{ 11153 predicate( UseSSE>=2 && UseXmmI2F ); 11154 match(Set dst (ConvI2F src)); 11155 11156 format %{ "MOVD $dst,$src\n\t" 11157 "CVTDQ2PS $dst,$dst\t# i2f" %} 11158 ins_encode %{ 11159 __ movdl($dst$$XMMRegister, $src$$Register); 11160 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11161 %} 11162 ins_pipe(pipe_slow); // XXX 11163 %} 11164 11165 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11166 match(Set dst (ConvI2L src)); 11167 effect(KILL cr); 11168 ins_cost(375); 11169 format %{ "MOV $dst.lo,$src\n\t" 11170 "MOV $dst.hi,$src\n\t" 11171 "SAR $dst.hi,31" %} 11172 ins_encode(convert_int_long(dst,src)); 11173 ins_pipe( ialu_reg_reg_long ); 11174 %} 11175 11176 // Zero-extend convert int to long 11177 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11178 match(Set dst (AndL (ConvI2L src) mask) ); 11179 effect( KILL flags ); 11180 ins_cost(250); 11181 format %{ "MOV $dst.lo,$src\n\t" 11182 "XOR $dst.hi,$dst.hi" %} 11183 opcode(0x33); // XOR 11184 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11185 ins_pipe( ialu_reg_reg_long ); 11186 %} 11187 11188 // Zero-extend long 11189 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11190 match(Set dst (AndL src mask) ); 11191 effect( KILL flags ); 11192 ins_cost(250); 11193 format %{ "MOV $dst.lo,$src.lo\n\t" 11194 "XOR $dst.hi,$dst.hi\n\t" %} 11195 opcode(0x33); // XOR 11196 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11197 ins_pipe( ialu_reg_reg_long ); 11198 %} 11199 11200 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11201 predicate (UseSSE<=1); 11202 match(Set dst (ConvL2D src)); 11203 effect( KILL cr ); 11204 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11205 "PUSH $src.lo\n\t" 11206 "FILD ST,[ESP + #0]\n\t" 11207 "ADD ESP,8\n\t" 11208 "FSTP_D $dst\t# D-round" %} 11209 opcode(0xDF, 0x5); /* DF /5 */ 11210 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11211 ins_pipe( pipe_slow ); 11212 %} 11213 11214 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11215 predicate (UseSSE>=2); 11216 match(Set dst (ConvL2D src)); 11217 effect( KILL cr ); 11218 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11219 "PUSH $src.lo\n\t" 11220 "FILD_D [ESP]\n\t" 11221 "FSTP_D [ESP]\n\t" 11222 "MOVSD $dst,[ESP]\n\t" 11223 "ADD ESP,8" %} 11224 opcode(0xDF, 0x5); /* DF /5 */ 11225 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11226 ins_pipe( pipe_slow ); 11227 %} 11228 11229 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11230 predicate (UseSSE>=1); 11231 match(Set dst (ConvL2F src)); 11232 effect( KILL cr ); 11233 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11234 "PUSH $src.lo\n\t" 11235 "FILD_D [ESP]\n\t" 11236 "FSTP_S [ESP]\n\t" 11237 "MOVSS $dst,[ESP]\n\t" 11238 "ADD ESP,8" %} 11239 opcode(0xDF, 0x5); /* DF /5 */ 11240 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11241 ins_pipe( pipe_slow ); 11242 %} 11243 11244 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11245 match(Set dst (ConvL2F src)); 11246 effect( KILL cr ); 11247 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11248 "PUSH $src.lo\n\t" 11249 "FILD ST,[ESP + #0]\n\t" 11250 "ADD ESP,8\n\t" 11251 "FSTP_S $dst\t# F-round" %} 11252 opcode(0xDF, 0x5); /* DF /5 */ 11253 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11254 ins_pipe( pipe_slow ); 11255 %} 11256 11257 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11258 match(Set dst (ConvL2I src)); 11259 effect( DEF dst, USE src ); 11260 format %{ "MOV $dst,$src.lo" %} 11261 ins_encode(enc_CopyL_Lo(dst,src)); 11262 ins_pipe( ialu_reg_reg ); 11263 %} 11264 11265 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11266 match(Set dst (MoveF2I src)); 11267 effect( DEF dst, USE src ); 11268 ins_cost(100); 11269 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11270 ins_encode %{ 11271 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11272 %} 11273 ins_pipe( ialu_reg_mem ); 11274 %} 11275 11276 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11277 predicate(UseSSE==0); 11278 match(Set dst (MoveF2I src)); 11279 effect( DEF dst, USE src ); 11280 11281 ins_cost(125); 11282 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11283 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11284 ins_pipe( fpu_mem_reg ); 11285 %} 11286 11287 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11288 predicate(UseSSE>=1); 11289 match(Set dst (MoveF2I src)); 11290 effect( DEF dst, USE src ); 11291 11292 ins_cost(95); 11293 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11294 ins_encode %{ 11295 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11296 %} 11297 ins_pipe( pipe_slow ); 11298 %} 11299 11300 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11301 predicate(UseSSE>=2); 11302 match(Set dst (MoveF2I src)); 11303 effect( DEF dst, USE src ); 11304 ins_cost(85); 11305 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11306 ins_encode %{ 11307 __ movdl($dst$$Register, $src$$XMMRegister); 11308 %} 11309 ins_pipe( pipe_slow ); 11310 %} 11311 11312 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11313 match(Set dst (MoveI2F src)); 11314 effect( DEF dst, USE src ); 11315 11316 ins_cost(100); 11317 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11318 ins_encode %{ 11319 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11320 %} 11321 ins_pipe( ialu_mem_reg ); 11322 %} 11323 11324 11325 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11326 predicate(UseSSE==0); 11327 match(Set dst (MoveI2F src)); 11328 effect(DEF dst, USE src); 11329 11330 ins_cost(125); 11331 format %{ "FLD_S $src\n\t" 11332 "FSTP $dst\t# MoveI2F_stack_reg" %} 11333 opcode(0xD9); /* D9 /0, FLD m32real */ 11334 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11335 Pop_Reg_FPR(dst) ); 11336 ins_pipe( fpu_reg_mem ); 11337 %} 11338 11339 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11340 predicate(UseSSE>=1); 11341 match(Set dst (MoveI2F src)); 11342 effect( DEF dst, USE src ); 11343 11344 ins_cost(95); 11345 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11346 ins_encode %{ 11347 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11348 %} 11349 ins_pipe( pipe_slow ); 11350 %} 11351 11352 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11353 predicate(UseSSE>=2); 11354 match(Set dst (MoveI2F src)); 11355 effect( DEF dst, USE src ); 11356 11357 ins_cost(85); 11358 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11359 ins_encode %{ 11360 __ movdl($dst$$XMMRegister, $src$$Register); 11361 %} 11362 ins_pipe( pipe_slow ); 11363 %} 11364 11365 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11366 match(Set dst (MoveD2L src)); 11367 effect(DEF dst, USE src); 11368 11369 ins_cost(250); 11370 format %{ "MOV $dst.lo,$src\n\t" 11371 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11372 opcode(0x8B, 0x8B); 11373 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11374 ins_pipe( ialu_mem_long_reg ); 11375 %} 11376 11377 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11378 predicate(UseSSE<=1); 11379 match(Set dst (MoveD2L src)); 11380 effect(DEF dst, USE src); 11381 11382 ins_cost(125); 11383 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11384 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11385 ins_pipe( fpu_mem_reg ); 11386 %} 11387 11388 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11389 predicate(UseSSE>=2); 11390 match(Set dst (MoveD2L src)); 11391 effect(DEF dst, USE src); 11392 ins_cost(95); 11393 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11394 ins_encode %{ 11395 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11396 %} 11397 ins_pipe( pipe_slow ); 11398 %} 11399 11400 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11401 predicate(UseSSE>=2); 11402 match(Set dst (MoveD2L src)); 11403 effect(DEF dst, USE src, TEMP tmp); 11404 ins_cost(85); 11405 format %{ "MOVD $dst.lo,$src\n\t" 11406 "PSHUFLW $tmp,$src,0x4E\n\t" 11407 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11408 ins_encode %{ 11409 __ movdl($dst$$Register, $src$$XMMRegister); 11410 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11411 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11412 %} 11413 ins_pipe( pipe_slow ); 11414 %} 11415 11416 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11417 match(Set dst (MoveL2D src)); 11418 effect(DEF dst, USE src); 11419 11420 ins_cost(200); 11421 format %{ "MOV $dst,$src.lo\n\t" 11422 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11423 opcode(0x89, 0x89); 11424 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11425 ins_pipe( ialu_mem_long_reg ); 11426 %} 11427 11428 11429 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11430 predicate(UseSSE<=1); 11431 match(Set dst (MoveL2D src)); 11432 effect(DEF dst, USE src); 11433 ins_cost(125); 11434 11435 format %{ "FLD_D $src\n\t" 11436 "FSTP $dst\t# MoveL2D_stack_reg" %} 11437 opcode(0xDD); /* DD /0, FLD m64real */ 11438 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11439 Pop_Reg_DPR(dst) ); 11440 ins_pipe( fpu_reg_mem ); 11441 %} 11442 11443 11444 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11445 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11446 match(Set dst (MoveL2D src)); 11447 effect(DEF dst, USE src); 11448 11449 ins_cost(95); 11450 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11451 ins_encode %{ 11452 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11453 %} 11454 ins_pipe( pipe_slow ); 11455 %} 11456 11457 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11458 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11459 match(Set dst (MoveL2D src)); 11460 effect(DEF dst, USE src); 11461 11462 ins_cost(95); 11463 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11464 ins_encode %{ 11465 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11466 %} 11467 ins_pipe( pipe_slow ); 11468 %} 11469 11470 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11471 predicate(UseSSE>=2); 11472 match(Set dst (MoveL2D src)); 11473 effect(TEMP dst, USE src, TEMP tmp); 11474 ins_cost(85); 11475 format %{ "MOVD $dst,$src.lo\n\t" 11476 "MOVD $tmp,$src.hi\n\t" 11477 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11478 ins_encode %{ 11479 __ movdl($dst$$XMMRegister, $src$$Register); 11480 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11481 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11482 %} 11483 ins_pipe( pipe_slow ); 11484 %} 11485 11486 //----------------------------- CompressBits/ExpandBits ------------------------ 11487 11488 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11489 predicate(n->bottom_type()->isa_long()); 11490 match(Set dst (CompressBits src mask)); 11491 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11492 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11493 ins_encode %{ 11494 Label exit, partail_result; 11495 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11496 // Merge the results of upper and lower destination registers such that upper destination 11497 // results are contiguously laid out after the lower destination result. 11498 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11499 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11500 __ popcntl($rtmp$$Register, $mask$$Register); 11501 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11502 __ cmpl($rtmp$$Register, 32); 11503 __ jccb(Assembler::equal, exit); 11504 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11505 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11506 // Shift left the contents of upper destination register by true bit count of lower mask register 11507 // and merge with lower destination register. 11508 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11509 __ orl($dst$$Register, $rtmp$$Register); 11510 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11511 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11512 // since contents of upper destination have already been copied to lower destination 11513 // register. 11514 __ cmpl($rtmp$$Register, 0); 11515 __ jccb(Assembler::greater, partail_result); 11516 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11517 __ jmp(exit); 11518 __ bind(partail_result); 11519 // Perform right shift over upper destination register to move out bits already copied 11520 // to lower destination register. 11521 __ subl($rtmp$$Register, 32); 11522 __ negl($rtmp$$Register); 11523 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11524 __ bind(exit); 11525 %} 11526 ins_pipe( pipe_slow ); 11527 %} 11528 11529 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11530 predicate(n->bottom_type()->isa_long()); 11531 match(Set dst (ExpandBits src mask)); 11532 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11533 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11534 ins_encode %{ 11535 // Extraction operation sequentially reads the bits from source register starting from LSB 11536 // and lays them out into destination register at bit locations corresponding to true bits 11537 // in mask register. Thus number of source bits read are equal to combined true bit count 11538 // of mask register pair. 11539 Label exit, mask_clipping; 11540 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11541 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11542 __ popcntl($rtmp$$Register, $mask$$Register); 11543 // If true bit count of lower mask register is 32 then none of bit of lower source register 11544 // will feed to upper destination register. 11545 __ cmpl($rtmp$$Register, 32); 11546 __ jccb(Assembler::equal, exit); 11547 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11548 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11549 // Shift right the contents of lower source register to remove already consumed bits. 11550 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11551 // Extract the bits from lower source register starting from LSB under the influence 11552 // of upper mask register. 11553 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11554 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11555 __ subl($rtmp$$Register, 32); 11556 __ negl($rtmp$$Register); 11557 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11558 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11559 // Clear the set bits in upper mask register which have been used to extract the contents 11560 // from lower source register. 11561 __ bind(mask_clipping); 11562 __ blsrl($mask$$Register, $mask$$Register); 11563 __ decrementl($rtmp$$Register, 1); 11564 __ jccb(Assembler::greater, mask_clipping); 11565 // Starting from LSB extract the bits from upper source register under the influence of 11566 // remaining set bits in upper mask register. 11567 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11568 // Merge the partial results extracted from lower and upper source register bits. 11569 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11570 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11571 __ bind(exit); 11572 %} 11573 ins_pipe( pipe_slow ); 11574 %} 11575 11576 // ======================================================================= 11577 // fast clearing of an array 11578 // Small ClearArray non-AVX512. 11579 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11580 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11581 match(Set dummy (ClearArray cnt base)); 11582 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11583 11584 format %{ $$template 11585 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11586 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11587 $$emit$$"JG LARGE\n\t" 11588 $$emit$$"SHL ECX, 1\n\t" 11589 $$emit$$"DEC ECX\n\t" 11590 $$emit$$"JS DONE\t# Zero length\n\t" 11591 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11592 $$emit$$"DEC ECX\n\t" 11593 $$emit$$"JGE LOOP\n\t" 11594 $$emit$$"JMP DONE\n\t" 11595 $$emit$$"# LARGE:\n\t" 11596 if (UseFastStosb) { 11597 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11598 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11599 } else if (UseXMMForObjInit) { 11600 $$emit$$"MOV RDI,RAX\n\t" 11601 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11602 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11603 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11604 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11605 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11606 $$emit$$"ADD 0x40,RAX\n\t" 11607 $$emit$$"# L_zero_64_bytes:\n\t" 11608 $$emit$$"SUB 0x8,RCX\n\t" 11609 $$emit$$"JGE L_loop\n\t" 11610 $$emit$$"ADD 0x4,RCX\n\t" 11611 $$emit$$"JL L_tail\n\t" 11612 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11613 $$emit$$"ADD 0x20,RAX\n\t" 11614 $$emit$$"SUB 0x4,RCX\n\t" 11615 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11616 $$emit$$"ADD 0x4,RCX\n\t" 11617 $$emit$$"JLE L_end\n\t" 11618 $$emit$$"DEC RCX\n\t" 11619 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11620 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11621 $$emit$$"ADD 0x8,RAX\n\t" 11622 $$emit$$"DEC RCX\n\t" 11623 $$emit$$"JGE L_sloop\n\t" 11624 $$emit$$"# L_end:\n\t" 11625 } else { 11626 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11627 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11628 } 11629 $$emit$$"# DONE" 11630 %} 11631 ins_encode %{ 11632 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11633 $tmp$$XMMRegister, false, knoreg); 11634 %} 11635 ins_pipe( pipe_slow ); 11636 %} 11637 11638 // Small ClearArray AVX512 non-constant length. 11639 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11640 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11641 match(Set dummy (ClearArray cnt base)); 11642 ins_cost(125); 11643 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11644 11645 format %{ $$template 11646 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11647 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11648 $$emit$$"JG LARGE\n\t" 11649 $$emit$$"SHL ECX, 1\n\t" 11650 $$emit$$"DEC ECX\n\t" 11651 $$emit$$"JS DONE\t# Zero length\n\t" 11652 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11653 $$emit$$"DEC ECX\n\t" 11654 $$emit$$"JGE LOOP\n\t" 11655 $$emit$$"JMP DONE\n\t" 11656 $$emit$$"# LARGE:\n\t" 11657 if (UseFastStosb) { 11658 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11659 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11660 } else if (UseXMMForObjInit) { 11661 $$emit$$"MOV RDI,RAX\n\t" 11662 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11663 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11664 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11665 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11666 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11667 $$emit$$"ADD 0x40,RAX\n\t" 11668 $$emit$$"# L_zero_64_bytes:\n\t" 11669 $$emit$$"SUB 0x8,RCX\n\t" 11670 $$emit$$"JGE L_loop\n\t" 11671 $$emit$$"ADD 0x4,RCX\n\t" 11672 $$emit$$"JL L_tail\n\t" 11673 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11674 $$emit$$"ADD 0x20,RAX\n\t" 11675 $$emit$$"SUB 0x4,RCX\n\t" 11676 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11677 $$emit$$"ADD 0x4,RCX\n\t" 11678 $$emit$$"JLE L_end\n\t" 11679 $$emit$$"DEC RCX\n\t" 11680 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11681 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11682 $$emit$$"ADD 0x8,RAX\n\t" 11683 $$emit$$"DEC RCX\n\t" 11684 $$emit$$"JGE L_sloop\n\t" 11685 $$emit$$"# L_end:\n\t" 11686 } else { 11687 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11688 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11689 } 11690 $$emit$$"# DONE" 11691 %} 11692 ins_encode %{ 11693 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11694 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11695 %} 11696 ins_pipe( pipe_slow ); 11697 %} 11698 11699 // Large ClearArray non-AVX512. 11700 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11701 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11702 match(Set dummy (ClearArray cnt base)); 11703 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11704 format %{ $$template 11705 if (UseFastStosb) { 11706 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11707 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11708 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11709 } else if (UseXMMForObjInit) { 11710 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11711 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11712 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11713 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11714 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11715 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11716 $$emit$$"ADD 0x40,RAX\n\t" 11717 $$emit$$"# L_zero_64_bytes:\n\t" 11718 $$emit$$"SUB 0x8,RCX\n\t" 11719 $$emit$$"JGE L_loop\n\t" 11720 $$emit$$"ADD 0x4,RCX\n\t" 11721 $$emit$$"JL L_tail\n\t" 11722 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11723 $$emit$$"ADD 0x20,RAX\n\t" 11724 $$emit$$"SUB 0x4,RCX\n\t" 11725 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11726 $$emit$$"ADD 0x4,RCX\n\t" 11727 $$emit$$"JLE L_end\n\t" 11728 $$emit$$"DEC RCX\n\t" 11729 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11730 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11731 $$emit$$"ADD 0x8,RAX\n\t" 11732 $$emit$$"DEC RCX\n\t" 11733 $$emit$$"JGE L_sloop\n\t" 11734 $$emit$$"# L_end:\n\t" 11735 } else { 11736 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11737 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11738 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11739 } 11740 $$emit$$"# DONE" 11741 %} 11742 ins_encode %{ 11743 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11744 $tmp$$XMMRegister, true, knoreg); 11745 %} 11746 ins_pipe( pipe_slow ); 11747 %} 11748 11749 // Large ClearArray AVX512. 11750 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11751 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11752 match(Set dummy (ClearArray cnt base)); 11753 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11754 format %{ $$template 11755 if (UseFastStosb) { 11756 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11757 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11758 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11759 } else if (UseXMMForObjInit) { 11760 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11761 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11762 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11763 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11764 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11765 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11766 $$emit$$"ADD 0x40,RAX\n\t" 11767 $$emit$$"# L_zero_64_bytes:\n\t" 11768 $$emit$$"SUB 0x8,RCX\n\t" 11769 $$emit$$"JGE L_loop\n\t" 11770 $$emit$$"ADD 0x4,RCX\n\t" 11771 $$emit$$"JL L_tail\n\t" 11772 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11773 $$emit$$"ADD 0x20,RAX\n\t" 11774 $$emit$$"SUB 0x4,RCX\n\t" 11775 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11776 $$emit$$"ADD 0x4,RCX\n\t" 11777 $$emit$$"JLE L_end\n\t" 11778 $$emit$$"DEC RCX\n\t" 11779 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11780 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11781 $$emit$$"ADD 0x8,RAX\n\t" 11782 $$emit$$"DEC RCX\n\t" 11783 $$emit$$"JGE L_sloop\n\t" 11784 $$emit$$"# L_end:\n\t" 11785 } else { 11786 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11787 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11788 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11789 } 11790 $$emit$$"# DONE" 11791 %} 11792 ins_encode %{ 11793 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11794 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11795 %} 11796 ins_pipe( pipe_slow ); 11797 %} 11798 11799 // Small ClearArray AVX512 constant length. 11800 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11801 %{ 11802 predicate(!((ClearArrayNode*)n)->is_large() && 11803 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11804 match(Set dummy (ClearArray cnt base)); 11805 ins_cost(100); 11806 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11807 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11808 ins_encode %{ 11809 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11810 %} 11811 ins_pipe(pipe_slow); 11812 %} 11813 11814 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11815 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11816 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11817 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11818 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11819 11820 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11821 ins_encode %{ 11822 __ string_compare($str1$$Register, $str2$$Register, 11823 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11824 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11825 %} 11826 ins_pipe( pipe_slow ); 11827 %} 11828 11829 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11830 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11831 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11832 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11833 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11834 11835 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11836 ins_encode %{ 11837 __ string_compare($str1$$Register, $str2$$Register, 11838 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11839 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11840 %} 11841 ins_pipe( pipe_slow ); 11842 %} 11843 11844 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11845 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11846 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11847 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11848 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11849 11850 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11851 ins_encode %{ 11852 __ string_compare($str1$$Register, $str2$$Register, 11853 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11854 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11855 %} 11856 ins_pipe( pipe_slow ); 11857 %} 11858 11859 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11860 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11861 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11862 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11863 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11864 11865 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11866 ins_encode %{ 11867 __ string_compare($str1$$Register, $str2$$Register, 11868 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11869 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11870 %} 11871 ins_pipe( pipe_slow ); 11872 %} 11873 11874 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11875 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11876 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11877 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11878 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11879 11880 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11881 ins_encode %{ 11882 __ string_compare($str1$$Register, $str2$$Register, 11883 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11884 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11885 %} 11886 ins_pipe( pipe_slow ); 11887 %} 11888 11889 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11890 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11891 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11892 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11893 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11894 11895 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11896 ins_encode %{ 11897 __ string_compare($str1$$Register, $str2$$Register, 11898 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11899 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11900 %} 11901 ins_pipe( pipe_slow ); 11902 %} 11903 11904 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11905 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11906 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11907 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11908 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11909 11910 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11911 ins_encode %{ 11912 __ string_compare($str2$$Register, $str1$$Register, 11913 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11914 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11915 %} 11916 ins_pipe( pipe_slow ); 11917 %} 11918 11919 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11920 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11921 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11922 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11923 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11924 11925 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11926 ins_encode %{ 11927 __ string_compare($str2$$Register, $str1$$Register, 11928 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11929 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11930 %} 11931 ins_pipe( pipe_slow ); 11932 %} 11933 11934 // fast string equals 11935 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11936 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11937 predicate(!VM_Version::supports_avx512vlbw()); 11938 match(Set result (StrEquals (Binary str1 str2) cnt)); 11939 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11940 11941 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11942 ins_encode %{ 11943 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11944 $cnt$$Register, $result$$Register, $tmp3$$Register, 11945 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11946 %} 11947 11948 ins_pipe( pipe_slow ); 11949 %} 11950 11951 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11952 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11953 predicate(VM_Version::supports_avx512vlbw()); 11954 match(Set result (StrEquals (Binary str1 str2) cnt)); 11955 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11956 11957 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11958 ins_encode %{ 11959 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11960 $cnt$$Register, $result$$Register, $tmp3$$Register, 11961 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11962 %} 11963 11964 ins_pipe( pipe_slow ); 11965 %} 11966 11967 11968 // fast search of substring with known size. 11969 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11970 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11971 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11972 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11973 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11974 11975 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11976 ins_encode %{ 11977 int icnt2 = (int)$int_cnt2$$constant; 11978 if (icnt2 >= 16) { 11979 // IndexOf for constant substrings with size >= 16 elements 11980 // which don't need to be loaded through stack. 11981 __ string_indexofC8($str1$$Register, $str2$$Register, 11982 $cnt1$$Register, $cnt2$$Register, 11983 icnt2, $result$$Register, 11984 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11985 } else { 11986 // Small strings are loaded through stack if they cross page boundary. 11987 __ string_indexof($str1$$Register, $str2$$Register, 11988 $cnt1$$Register, $cnt2$$Register, 11989 icnt2, $result$$Register, 11990 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11991 } 11992 %} 11993 ins_pipe( pipe_slow ); 11994 %} 11995 11996 // fast search of substring with known size. 11997 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11998 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11999 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12000 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12001 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12002 12003 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12004 ins_encode %{ 12005 int icnt2 = (int)$int_cnt2$$constant; 12006 if (icnt2 >= 8) { 12007 // IndexOf for constant substrings with size >= 8 elements 12008 // which don't need to be loaded through stack. 12009 __ string_indexofC8($str1$$Register, $str2$$Register, 12010 $cnt1$$Register, $cnt2$$Register, 12011 icnt2, $result$$Register, 12012 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12013 } else { 12014 // Small strings are loaded through stack if they cross page boundary. 12015 __ string_indexof($str1$$Register, $str2$$Register, 12016 $cnt1$$Register, $cnt2$$Register, 12017 icnt2, $result$$Register, 12018 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12019 } 12020 %} 12021 ins_pipe( pipe_slow ); 12022 %} 12023 12024 // fast search of substring with known size. 12025 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12026 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12027 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12028 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12029 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12030 12031 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12032 ins_encode %{ 12033 int icnt2 = (int)$int_cnt2$$constant; 12034 if (icnt2 >= 8) { 12035 // IndexOf for constant substrings with size >= 8 elements 12036 // which don't need to be loaded through stack. 12037 __ string_indexofC8($str1$$Register, $str2$$Register, 12038 $cnt1$$Register, $cnt2$$Register, 12039 icnt2, $result$$Register, 12040 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12041 } else { 12042 // Small strings are loaded through stack if they cross page boundary. 12043 __ string_indexof($str1$$Register, $str2$$Register, 12044 $cnt1$$Register, $cnt2$$Register, 12045 icnt2, $result$$Register, 12046 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12047 } 12048 %} 12049 ins_pipe( pipe_slow ); 12050 %} 12051 12052 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12053 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12054 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 12055 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12056 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12057 12058 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12059 ins_encode %{ 12060 __ string_indexof($str1$$Register, $str2$$Register, 12061 $cnt1$$Register, $cnt2$$Register, 12062 (-1), $result$$Register, 12063 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 12064 %} 12065 ins_pipe( pipe_slow ); 12066 %} 12067 12068 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12069 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12070 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12071 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12072 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12073 12074 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12075 ins_encode %{ 12076 __ string_indexof($str1$$Register, $str2$$Register, 12077 $cnt1$$Register, $cnt2$$Register, 12078 (-1), $result$$Register, 12079 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12080 %} 12081 ins_pipe( pipe_slow ); 12082 %} 12083 12084 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12085 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12086 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12087 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12088 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12089 12090 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12091 ins_encode %{ 12092 __ string_indexof($str1$$Register, $str2$$Register, 12093 $cnt1$$Register, $cnt2$$Register, 12094 (-1), $result$$Register, 12095 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12096 %} 12097 ins_pipe( pipe_slow ); 12098 %} 12099 12100 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12101 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12102 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12103 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12104 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12105 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12106 ins_encode %{ 12107 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12108 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12109 %} 12110 ins_pipe( pipe_slow ); 12111 %} 12112 12113 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12114 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12115 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12116 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12117 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12118 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12119 ins_encode %{ 12120 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12121 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12122 %} 12123 ins_pipe( pipe_slow ); 12124 %} 12125 12126 12127 // fast array equals 12128 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12129 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12130 %{ 12131 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12132 match(Set result (AryEq ary1 ary2)); 12133 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12134 //ins_cost(300); 12135 12136 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12137 ins_encode %{ 12138 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12139 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12140 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12141 %} 12142 ins_pipe( pipe_slow ); 12143 %} 12144 12145 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12146 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12147 %{ 12148 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12149 match(Set result (AryEq ary1 ary2)); 12150 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12151 //ins_cost(300); 12152 12153 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12154 ins_encode %{ 12155 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12156 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12157 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12158 %} 12159 ins_pipe( pipe_slow ); 12160 %} 12161 12162 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12163 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12164 %{ 12165 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12166 match(Set result (AryEq ary1 ary2)); 12167 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12168 //ins_cost(300); 12169 12170 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12171 ins_encode %{ 12172 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12173 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12174 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12175 %} 12176 ins_pipe( pipe_slow ); 12177 %} 12178 12179 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12180 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12181 %{ 12182 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12183 match(Set result (AryEq ary1 ary2)); 12184 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12185 //ins_cost(300); 12186 12187 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12188 ins_encode %{ 12189 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12190 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12191 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12192 %} 12193 ins_pipe( pipe_slow ); 12194 %} 12195 12196 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12197 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12198 %{ 12199 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12200 match(Set result (CountPositives ary1 len)); 12201 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12202 12203 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12204 ins_encode %{ 12205 __ count_positives($ary1$$Register, $len$$Register, 12206 $result$$Register, $tmp3$$Register, 12207 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12208 %} 12209 ins_pipe( pipe_slow ); 12210 %} 12211 12212 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12213 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12214 %{ 12215 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12216 match(Set result (CountPositives ary1 len)); 12217 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12218 12219 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12220 ins_encode %{ 12221 __ count_positives($ary1$$Register, $len$$Register, 12222 $result$$Register, $tmp3$$Register, 12223 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12224 %} 12225 ins_pipe( pipe_slow ); 12226 %} 12227 12228 12229 // fast char[] to byte[] compression 12230 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12231 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12232 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12233 match(Set result (StrCompressedCopy src (Binary dst len))); 12234 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12235 12236 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12237 ins_encode %{ 12238 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12239 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12240 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12241 knoreg, knoreg); 12242 %} 12243 ins_pipe( pipe_slow ); 12244 %} 12245 12246 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12247 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12248 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12249 match(Set result (StrCompressedCopy src (Binary dst len))); 12250 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12251 12252 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12253 ins_encode %{ 12254 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12255 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12256 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12257 $ktmp1$$KRegister, $ktmp2$$KRegister); 12258 %} 12259 ins_pipe( pipe_slow ); 12260 %} 12261 12262 // fast byte[] to char[] inflation 12263 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12264 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12265 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12266 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12267 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12268 12269 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12270 ins_encode %{ 12271 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12272 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12273 %} 12274 ins_pipe( pipe_slow ); 12275 %} 12276 12277 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12278 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12279 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12280 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12281 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12282 12283 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12284 ins_encode %{ 12285 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12286 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12287 %} 12288 ins_pipe( pipe_slow ); 12289 %} 12290 12291 // encode char[] to byte[] in ISO_8859_1 12292 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12293 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12294 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12295 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12296 match(Set result (EncodeISOArray src (Binary dst len))); 12297 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12298 12299 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12300 ins_encode %{ 12301 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12302 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12303 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12304 %} 12305 ins_pipe( pipe_slow ); 12306 %} 12307 12308 // encode char[] to byte[] in ASCII 12309 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12310 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12311 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12312 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12313 match(Set result (EncodeISOArray src (Binary dst len))); 12314 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12315 12316 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12317 ins_encode %{ 12318 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12319 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12320 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12321 %} 12322 ins_pipe( pipe_slow ); 12323 %} 12324 12325 //----------Control Flow Instructions------------------------------------------ 12326 // Signed compare Instructions 12327 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12328 match(Set cr (CmpI op1 op2)); 12329 effect( DEF cr, USE op1, USE op2 ); 12330 format %{ "CMP $op1,$op2" %} 12331 opcode(0x3B); /* Opcode 3B /r */ 12332 ins_encode( OpcP, RegReg( op1, op2) ); 12333 ins_pipe( ialu_cr_reg_reg ); 12334 %} 12335 12336 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12337 match(Set cr (CmpI op1 op2)); 12338 effect( DEF cr, USE op1 ); 12339 format %{ "CMP $op1,$op2" %} 12340 opcode(0x81,0x07); /* Opcode 81 /7 */ 12341 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12342 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12343 ins_pipe( ialu_cr_reg_imm ); 12344 %} 12345 12346 // Cisc-spilled version of cmpI_eReg 12347 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12348 match(Set cr (CmpI op1 (LoadI op2))); 12349 12350 format %{ "CMP $op1,$op2" %} 12351 ins_cost(500); 12352 opcode(0x3B); /* Opcode 3B /r */ 12353 ins_encode( OpcP, RegMem( op1, op2) ); 12354 ins_pipe( ialu_cr_reg_mem ); 12355 %} 12356 12357 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12358 match(Set cr (CmpI src zero)); 12359 effect( DEF cr, USE src ); 12360 12361 format %{ "TEST $src,$src" %} 12362 opcode(0x85); 12363 ins_encode( OpcP, RegReg( src, src ) ); 12364 ins_pipe( ialu_cr_reg_imm ); 12365 %} 12366 12367 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12368 match(Set cr (CmpI (AndI src con) zero)); 12369 12370 format %{ "TEST $src,$con" %} 12371 opcode(0xF7,0x00); 12372 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12373 ins_pipe( ialu_cr_reg_imm ); 12374 %} 12375 12376 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12377 match(Set cr (CmpI (AndI src mem) zero)); 12378 12379 format %{ "TEST $src,$mem" %} 12380 opcode(0x85); 12381 ins_encode( OpcP, RegMem( src, mem ) ); 12382 ins_pipe( ialu_cr_reg_mem ); 12383 %} 12384 12385 // Unsigned compare Instructions; really, same as signed except they 12386 // produce an eFlagsRegU instead of eFlagsReg. 12387 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12388 match(Set cr (CmpU op1 op2)); 12389 12390 format %{ "CMPu $op1,$op2" %} 12391 opcode(0x3B); /* Opcode 3B /r */ 12392 ins_encode( OpcP, RegReg( op1, op2) ); 12393 ins_pipe( ialu_cr_reg_reg ); 12394 %} 12395 12396 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12397 match(Set cr (CmpU op1 op2)); 12398 12399 format %{ "CMPu $op1,$op2" %} 12400 opcode(0x81,0x07); /* Opcode 81 /7 */ 12401 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12402 ins_pipe( ialu_cr_reg_imm ); 12403 %} 12404 12405 // // Cisc-spilled version of cmpU_eReg 12406 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12407 match(Set cr (CmpU op1 (LoadI op2))); 12408 12409 format %{ "CMPu $op1,$op2" %} 12410 ins_cost(500); 12411 opcode(0x3B); /* Opcode 3B /r */ 12412 ins_encode( OpcP, RegMem( op1, op2) ); 12413 ins_pipe( ialu_cr_reg_mem ); 12414 %} 12415 12416 // // Cisc-spilled version of cmpU_eReg 12417 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12418 // match(Set cr (CmpU (LoadI op1) op2)); 12419 // 12420 // format %{ "CMPu $op1,$op2" %} 12421 // ins_cost(500); 12422 // opcode(0x39); /* Opcode 39 /r */ 12423 // ins_encode( OpcP, RegMem( op1, op2) ); 12424 //%} 12425 12426 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12427 match(Set cr (CmpU src zero)); 12428 12429 format %{ "TESTu $src,$src" %} 12430 opcode(0x85); 12431 ins_encode( OpcP, RegReg( src, src ) ); 12432 ins_pipe( ialu_cr_reg_imm ); 12433 %} 12434 12435 // Unsigned pointer compare Instructions 12436 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12437 match(Set cr (CmpP op1 op2)); 12438 12439 format %{ "CMPu $op1,$op2" %} 12440 opcode(0x3B); /* Opcode 3B /r */ 12441 ins_encode( OpcP, RegReg( op1, op2) ); 12442 ins_pipe( ialu_cr_reg_reg ); 12443 %} 12444 12445 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12446 match(Set cr (CmpP op1 op2)); 12447 12448 format %{ "CMPu $op1,$op2" %} 12449 opcode(0x81,0x07); /* Opcode 81 /7 */ 12450 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12451 ins_pipe( ialu_cr_reg_imm ); 12452 %} 12453 12454 // // Cisc-spilled version of cmpP_eReg 12455 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12456 match(Set cr (CmpP op1 (LoadP op2))); 12457 12458 format %{ "CMPu $op1,$op2" %} 12459 ins_cost(500); 12460 opcode(0x3B); /* Opcode 3B /r */ 12461 ins_encode( OpcP, RegMem( op1, op2) ); 12462 ins_pipe( ialu_cr_reg_mem ); 12463 %} 12464 12465 // // Cisc-spilled version of cmpP_eReg 12466 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12467 // match(Set cr (CmpP (LoadP op1) op2)); 12468 // 12469 // format %{ "CMPu $op1,$op2" %} 12470 // ins_cost(500); 12471 // opcode(0x39); /* Opcode 39 /r */ 12472 // ins_encode( OpcP, RegMem( op1, op2) ); 12473 //%} 12474 12475 // Compare raw pointer (used in out-of-heap check). 12476 // Only works because non-oop pointers must be raw pointers 12477 // and raw pointers have no anti-dependencies. 12478 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12479 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12480 match(Set cr (CmpP op1 (LoadP op2))); 12481 12482 format %{ "CMPu $op1,$op2" %} 12483 opcode(0x3B); /* Opcode 3B /r */ 12484 ins_encode( OpcP, RegMem( op1, op2) ); 12485 ins_pipe( ialu_cr_reg_mem ); 12486 %} 12487 12488 // 12489 // This will generate a signed flags result. This should be ok 12490 // since any compare to a zero should be eq/neq. 12491 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12492 match(Set cr (CmpP src zero)); 12493 12494 format %{ "TEST $src,$src" %} 12495 opcode(0x85); 12496 ins_encode( OpcP, RegReg( src, src ) ); 12497 ins_pipe( ialu_cr_reg_imm ); 12498 %} 12499 12500 // Cisc-spilled version of testP_reg 12501 // This will generate a signed flags result. This should be ok 12502 // since any compare to a zero should be eq/neq. 12503 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12504 match(Set cr (CmpP (LoadP op) zero)); 12505 12506 format %{ "TEST $op,0xFFFFFFFF" %} 12507 ins_cost(500); 12508 opcode(0xF7); /* Opcode F7 /0 */ 12509 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12510 ins_pipe( ialu_cr_reg_imm ); 12511 %} 12512 12513 // Yanked all unsigned pointer compare operations. 12514 // Pointer compares are done with CmpP which is already unsigned. 12515 12516 //----------Max and Min-------------------------------------------------------- 12517 // Min Instructions 12518 //// 12519 // *** Min and Max using the conditional move are slower than the 12520 // *** branch version on a Pentium III. 12521 // // Conditional move for min 12522 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12523 // effect( USE_DEF op2, USE op1, USE cr ); 12524 // format %{ "CMOVlt $op2,$op1\t! min" %} 12525 // opcode(0x4C,0x0F); 12526 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12527 // ins_pipe( pipe_cmov_reg ); 12528 //%} 12529 // 12530 //// Min Register with Register (P6 version) 12531 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12532 // predicate(VM_Version::supports_cmov() ); 12533 // match(Set op2 (MinI op1 op2)); 12534 // ins_cost(200); 12535 // expand %{ 12536 // eFlagsReg cr; 12537 // compI_eReg(cr,op1,op2); 12538 // cmovI_reg_lt(op2,op1,cr); 12539 // %} 12540 //%} 12541 12542 // Min Register with Register (generic version) 12543 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12544 match(Set dst (MinI dst src)); 12545 effect(KILL flags); 12546 ins_cost(300); 12547 12548 format %{ "MIN $dst,$src" %} 12549 opcode(0xCC); 12550 ins_encode( min_enc(dst,src) ); 12551 ins_pipe( pipe_slow ); 12552 %} 12553 12554 // Max Register with Register 12555 // *** Min and Max using the conditional move are slower than the 12556 // *** branch version on a Pentium III. 12557 // // Conditional move for max 12558 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12559 // effect( USE_DEF op2, USE op1, USE cr ); 12560 // format %{ "CMOVgt $op2,$op1\t! max" %} 12561 // opcode(0x4F,0x0F); 12562 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12563 // ins_pipe( pipe_cmov_reg ); 12564 //%} 12565 // 12566 // // Max Register with Register (P6 version) 12567 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12568 // predicate(VM_Version::supports_cmov() ); 12569 // match(Set op2 (MaxI op1 op2)); 12570 // ins_cost(200); 12571 // expand %{ 12572 // eFlagsReg cr; 12573 // compI_eReg(cr,op1,op2); 12574 // cmovI_reg_gt(op2,op1,cr); 12575 // %} 12576 //%} 12577 12578 // Max Register with Register (generic version) 12579 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12580 match(Set dst (MaxI dst src)); 12581 effect(KILL flags); 12582 ins_cost(300); 12583 12584 format %{ "MAX $dst,$src" %} 12585 opcode(0xCC); 12586 ins_encode( max_enc(dst,src) ); 12587 ins_pipe( pipe_slow ); 12588 %} 12589 12590 // ============================================================================ 12591 // Counted Loop limit node which represents exact final iterator value. 12592 // Note: the resulting value should fit into integer range since 12593 // counted loops have limit check on overflow. 12594 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12595 match(Set limit (LoopLimit (Binary init limit) stride)); 12596 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12597 ins_cost(300); 12598 12599 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12600 ins_encode %{ 12601 int strd = (int)$stride$$constant; 12602 assert(strd != 1 && strd != -1, "sanity"); 12603 int m1 = (strd > 0) ? 1 : -1; 12604 // Convert limit to long (EAX:EDX) 12605 __ cdql(); 12606 // Convert init to long (init:tmp) 12607 __ movl($tmp$$Register, $init$$Register); 12608 __ sarl($tmp$$Register, 31); 12609 // $limit - $init 12610 __ subl($limit$$Register, $init$$Register); 12611 __ sbbl($limit_hi$$Register, $tmp$$Register); 12612 // + ($stride - 1) 12613 if (strd > 0) { 12614 __ addl($limit$$Register, (strd - 1)); 12615 __ adcl($limit_hi$$Register, 0); 12616 __ movl($tmp$$Register, strd); 12617 } else { 12618 __ addl($limit$$Register, (strd + 1)); 12619 __ adcl($limit_hi$$Register, -1); 12620 __ lneg($limit_hi$$Register, $limit$$Register); 12621 __ movl($tmp$$Register, -strd); 12622 } 12623 // signed division: (EAX:EDX) / pos_stride 12624 __ idivl($tmp$$Register); 12625 if (strd < 0) { 12626 // restore sign 12627 __ negl($tmp$$Register); 12628 } 12629 // (EAX) * stride 12630 __ mull($tmp$$Register); 12631 // + init (ignore upper bits) 12632 __ addl($limit$$Register, $init$$Register); 12633 %} 12634 ins_pipe( pipe_slow ); 12635 %} 12636 12637 // ============================================================================ 12638 // Branch Instructions 12639 // Jump Table 12640 instruct jumpXtnd(rRegI switch_val) %{ 12641 match(Jump switch_val); 12642 ins_cost(350); 12643 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12644 ins_encode %{ 12645 // Jump to Address(table_base + switch_reg) 12646 Address index(noreg, $switch_val$$Register, Address::times_1); 12647 __ jump(ArrayAddress($constantaddress, index), noreg); 12648 %} 12649 ins_pipe(pipe_jmp); 12650 %} 12651 12652 // Jump Direct - Label defines a relative address from JMP+1 12653 instruct jmpDir(label labl) %{ 12654 match(Goto); 12655 effect(USE labl); 12656 12657 ins_cost(300); 12658 format %{ "JMP $labl" %} 12659 size(5); 12660 ins_encode %{ 12661 Label* L = $labl$$label; 12662 __ jmp(*L, false); // Always long jump 12663 %} 12664 ins_pipe( pipe_jmp ); 12665 %} 12666 12667 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12668 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12669 match(If cop cr); 12670 effect(USE labl); 12671 12672 ins_cost(300); 12673 format %{ "J$cop $labl" %} 12674 size(6); 12675 ins_encode %{ 12676 Label* L = $labl$$label; 12677 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12678 %} 12679 ins_pipe( pipe_jcc ); 12680 %} 12681 12682 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12683 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12684 match(CountedLoopEnd cop cr); 12685 effect(USE labl); 12686 12687 ins_cost(300); 12688 format %{ "J$cop $labl\t# Loop end" %} 12689 size(6); 12690 ins_encode %{ 12691 Label* L = $labl$$label; 12692 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12693 %} 12694 ins_pipe( pipe_jcc ); 12695 %} 12696 12697 // Jump Direct Conditional - using unsigned comparison 12698 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12699 match(If cop cmp); 12700 effect(USE labl); 12701 12702 ins_cost(300); 12703 format %{ "J$cop,u $labl" %} 12704 size(6); 12705 ins_encode %{ 12706 Label* L = $labl$$label; 12707 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12708 %} 12709 ins_pipe(pipe_jcc); 12710 %} 12711 12712 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12713 match(If cop cmp); 12714 effect(USE labl); 12715 12716 ins_cost(200); 12717 format %{ "J$cop,u $labl" %} 12718 size(6); 12719 ins_encode %{ 12720 Label* L = $labl$$label; 12721 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12722 %} 12723 ins_pipe(pipe_jcc); 12724 %} 12725 12726 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12727 match(If cop cmp); 12728 effect(USE labl); 12729 12730 ins_cost(200); 12731 format %{ $$template 12732 if ($cop$$cmpcode == Assembler::notEqual) { 12733 $$emit$$"JP,u $labl\n\t" 12734 $$emit$$"J$cop,u $labl" 12735 } else { 12736 $$emit$$"JP,u done\n\t" 12737 $$emit$$"J$cop,u $labl\n\t" 12738 $$emit$$"done:" 12739 } 12740 %} 12741 ins_encode %{ 12742 Label* l = $labl$$label; 12743 if ($cop$$cmpcode == Assembler::notEqual) { 12744 __ jcc(Assembler::parity, *l, false); 12745 __ jcc(Assembler::notEqual, *l, false); 12746 } else if ($cop$$cmpcode == Assembler::equal) { 12747 Label done; 12748 __ jccb(Assembler::parity, done); 12749 __ jcc(Assembler::equal, *l, false); 12750 __ bind(done); 12751 } else { 12752 ShouldNotReachHere(); 12753 } 12754 %} 12755 ins_pipe(pipe_jcc); 12756 %} 12757 12758 // ============================================================================ 12759 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12760 // array for an instance of the superklass. Set a hidden internal cache on a 12761 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12762 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12763 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12764 match(Set result (PartialSubtypeCheck sub super)); 12765 effect( KILL rcx, KILL cr ); 12766 12767 ins_cost(1100); // slightly larger than the next version 12768 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12769 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12770 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12771 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12772 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12773 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12774 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12775 "miss:\t" %} 12776 12777 opcode(0x1); // Force a XOR of EDI 12778 ins_encode( enc_PartialSubtypeCheck() ); 12779 ins_pipe( pipe_slow ); 12780 %} 12781 12782 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12783 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12784 effect( KILL rcx, KILL result ); 12785 12786 ins_cost(1000); 12787 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12788 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12789 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12790 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12791 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12792 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12793 "miss:\t" %} 12794 12795 opcode(0x0); // No need to XOR EDI 12796 ins_encode( enc_PartialSubtypeCheck() ); 12797 ins_pipe( pipe_slow ); 12798 %} 12799 12800 // ============================================================================ 12801 // Branch Instructions -- short offset versions 12802 // 12803 // These instructions are used to replace jumps of a long offset (the default 12804 // match) with jumps of a shorter offset. These instructions are all tagged 12805 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12806 // match rules in general matching. Instead, the ADLC generates a conversion 12807 // method in the MachNode which can be used to do in-place replacement of the 12808 // long variant with the shorter variant. The compiler will determine if a 12809 // branch can be taken by the is_short_branch_offset() predicate in the machine 12810 // specific code section of the file. 12811 12812 // Jump Direct - Label defines a relative address from JMP+1 12813 instruct jmpDir_short(label labl) %{ 12814 match(Goto); 12815 effect(USE labl); 12816 12817 ins_cost(300); 12818 format %{ "JMP,s $labl" %} 12819 size(2); 12820 ins_encode %{ 12821 Label* L = $labl$$label; 12822 __ jmpb(*L); 12823 %} 12824 ins_pipe( pipe_jmp ); 12825 ins_short_branch(1); 12826 %} 12827 12828 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12829 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12830 match(If cop cr); 12831 effect(USE labl); 12832 12833 ins_cost(300); 12834 format %{ "J$cop,s $labl" %} 12835 size(2); 12836 ins_encode %{ 12837 Label* L = $labl$$label; 12838 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12839 %} 12840 ins_pipe( pipe_jcc ); 12841 ins_short_branch(1); 12842 %} 12843 12844 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12845 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12846 match(CountedLoopEnd cop cr); 12847 effect(USE labl); 12848 12849 ins_cost(300); 12850 format %{ "J$cop,s $labl\t# Loop end" %} 12851 size(2); 12852 ins_encode %{ 12853 Label* L = $labl$$label; 12854 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12855 %} 12856 ins_pipe( pipe_jcc ); 12857 ins_short_branch(1); 12858 %} 12859 12860 // Jump Direct Conditional - using unsigned comparison 12861 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12862 match(If cop cmp); 12863 effect(USE labl); 12864 12865 ins_cost(300); 12866 format %{ "J$cop,us $labl" %} 12867 size(2); 12868 ins_encode %{ 12869 Label* L = $labl$$label; 12870 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12871 %} 12872 ins_pipe( pipe_jcc ); 12873 ins_short_branch(1); 12874 %} 12875 12876 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12877 match(If cop cmp); 12878 effect(USE labl); 12879 12880 ins_cost(300); 12881 format %{ "J$cop,us $labl" %} 12882 size(2); 12883 ins_encode %{ 12884 Label* L = $labl$$label; 12885 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12886 %} 12887 ins_pipe( pipe_jcc ); 12888 ins_short_branch(1); 12889 %} 12890 12891 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12892 match(If cop cmp); 12893 effect(USE labl); 12894 12895 ins_cost(300); 12896 format %{ $$template 12897 if ($cop$$cmpcode == Assembler::notEqual) { 12898 $$emit$$"JP,u,s $labl\n\t" 12899 $$emit$$"J$cop,u,s $labl" 12900 } else { 12901 $$emit$$"JP,u,s done\n\t" 12902 $$emit$$"J$cop,u,s $labl\n\t" 12903 $$emit$$"done:" 12904 } 12905 %} 12906 size(4); 12907 ins_encode %{ 12908 Label* l = $labl$$label; 12909 if ($cop$$cmpcode == Assembler::notEqual) { 12910 __ jccb(Assembler::parity, *l); 12911 __ jccb(Assembler::notEqual, *l); 12912 } else if ($cop$$cmpcode == Assembler::equal) { 12913 Label done; 12914 __ jccb(Assembler::parity, done); 12915 __ jccb(Assembler::equal, *l); 12916 __ bind(done); 12917 } else { 12918 ShouldNotReachHere(); 12919 } 12920 %} 12921 ins_pipe(pipe_jcc); 12922 ins_short_branch(1); 12923 %} 12924 12925 // ============================================================================ 12926 // Long Compare 12927 // 12928 // Currently we hold longs in 2 registers. Comparing such values efficiently 12929 // is tricky. The flavor of compare used depends on whether we are testing 12930 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12931 // The GE test is the negated LT test. The LE test can be had by commuting 12932 // the operands (yielding a GE test) and then negating; negate again for the 12933 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12934 // NE test is negated from that. 12935 12936 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12937 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12938 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12939 // are collapsed internally in the ADLC's dfa-gen code. The match for 12940 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12941 // foo match ends up with the wrong leaf. One fix is to not match both 12942 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12943 // both forms beat the trinary form of long-compare and both are very useful 12944 // on Intel which has so few registers. 12945 12946 // Manifest a CmpL result in an integer register. Very painful. 12947 // This is the test to avoid. 12948 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12949 match(Set dst (CmpL3 src1 src2)); 12950 effect( KILL flags ); 12951 ins_cost(1000); 12952 format %{ "XOR $dst,$dst\n\t" 12953 "CMP $src1.hi,$src2.hi\n\t" 12954 "JLT,s m_one\n\t" 12955 "JGT,s p_one\n\t" 12956 "CMP $src1.lo,$src2.lo\n\t" 12957 "JB,s m_one\n\t" 12958 "JEQ,s done\n" 12959 "p_one:\tINC $dst\n\t" 12960 "JMP,s done\n" 12961 "m_one:\tDEC $dst\n" 12962 "done:" %} 12963 ins_encode %{ 12964 Label p_one, m_one, done; 12965 __ xorptr($dst$$Register, $dst$$Register); 12966 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12967 __ jccb(Assembler::less, m_one); 12968 __ jccb(Assembler::greater, p_one); 12969 __ cmpl($src1$$Register, $src2$$Register); 12970 __ jccb(Assembler::below, m_one); 12971 __ jccb(Assembler::equal, done); 12972 __ bind(p_one); 12973 __ incrementl($dst$$Register); 12974 __ jmpb(done); 12975 __ bind(m_one); 12976 __ decrementl($dst$$Register); 12977 __ bind(done); 12978 %} 12979 ins_pipe( pipe_slow ); 12980 %} 12981 12982 //====== 12983 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12984 // compares. Can be used for LE or GT compares by reversing arguments. 12985 // NOT GOOD FOR EQ/NE tests. 12986 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12987 match( Set flags (CmpL src zero )); 12988 ins_cost(100); 12989 format %{ "TEST $src.hi,$src.hi" %} 12990 opcode(0x85); 12991 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12992 ins_pipe( ialu_cr_reg_reg ); 12993 %} 12994 12995 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12996 // compares. Can be used for LE or GT compares by reversing arguments. 12997 // NOT GOOD FOR EQ/NE tests. 12998 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12999 match( Set flags (CmpL src1 src2 )); 13000 effect( TEMP tmp ); 13001 ins_cost(300); 13002 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13003 "MOV $tmp,$src1.hi\n\t" 13004 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13005 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13006 ins_pipe( ialu_cr_reg_reg ); 13007 %} 13008 13009 // Long compares reg < zero/req OR reg >= zero/req. 13010 // Just a wrapper for a normal branch, plus the predicate test. 13011 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13012 match(If cmp flags); 13013 effect(USE labl); 13014 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13015 expand %{ 13016 jmpCon(cmp,flags,labl); // JLT or JGE... 13017 %} 13018 %} 13019 13020 //====== 13021 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13022 // compares. Can be used for LE or GT compares by reversing arguments. 13023 // NOT GOOD FOR EQ/NE tests. 13024 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13025 match(Set flags (CmpUL src zero)); 13026 ins_cost(100); 13027 format %{ "TEST $src.hi,$src.hi" %} 13028 opcode(0x85); 13029 ins_encode(OpcP, RegReg_Hi2(src, src)); 13030 ins_pipe(ialu_cr_reg_reg); 13031 %} 13032 13033 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13034 // compares. Can be used for LE or GT compares by reversing arguments. 13035 // NOT GOOD FOR EQ/NE tests. 13036 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13037 match(Set flags (CmpUL src1 src2)); 13038 effect(TEMP tmp); 13039 ins_cost(300); 13040 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13041 "MOV $tmp,$src1.hi\n\t" 13042 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13043 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13044 ins_pipe(ialu_cr_reg_reg); 13045 %} 13046 13047 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13048 // Just a wrapper for a normal branch, plus the predicate test. 13049 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13050 match(If cmp flags); 13051 effect(USE labl); 13052 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13053 expand %{ 13054 jmpCon(cmp, flags, labl); // JLT or JGE... 13055 %} 13056 %} 13057 13058 // Compare 2 longs and CMOVE longs. 13059 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13060 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13061 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13062 ins_cost(400); 13063 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13064 "CMOV$cmp $dst.hi,$src.hi" %} 13065 opcode(0x0F,0x40); 13066 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13067 ins_pipe( pipe_cmov_reg_long ); 13068 %} 13069 13070 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13071 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13072 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13073 ins_cost(500); 13074 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13075 "CMOV$cmp $dst.hi,$src.hi" %} 13076 opcode(0x0F,0x40); 13077 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13078 ins_pipe( pipe_cmov_reg_long ); 13079 %} 13080 13081 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 13082 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13083 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13084 ins_cost(400); 13085 expand %{ 13086 cmovLL_reg_LTGE(cmp, flags, dst, src); 13087 %} 13088 %} 13089 13090 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 13091 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13092 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13093 ins_cost(500); 13094 expand %{ 13095 cmovLL_mem_LTGE(cmp, flags, dst, src); 13096 %} 13097 %} 13098 13099 // Compare 2 longs and CMOVE ints. 13100 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13101 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13102 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13103 ins_cost(200); 13104 format %{ "CMOV$cmp $dst,$src" %} 13105 opcode(0x0F,0x40); 13106 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13107 ins_pipe( pipe_cmov_reg ); 13108 %} 13109 13110 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13111 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13112 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13113 ins_cost(250); 13114 format %{ "CMOV$cmp $dst,$src" %} 13115 opcode(0x0F,0x40); 13116 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13117 ins_pipe( pipe_cmov_mem ); 13118 %} 13119 13120 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 13121 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13122 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13123 ins_cost(200); 13124 expand %{ 13125 cmovII_reg_LTGE(cmp, flags, dst, src); 13126 %} 13127 %} 13128 13129 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 13130 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13131 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13132 ins_cost(250); 13133 expand %{ 13134 cmovII_mem_LTGE(cmp, flags, dst, src); 13135 %} 13136 %} 13137 13138 // Compare 2 longs and CMOVE ptrs. 13139 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13140 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13141 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13142 ins_cost(200); 13143 format %{ "CMOV$cmp $dst,$src" %} 13144 opcode(0x0F,0x40); 13145 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13146 ins_pipe( pipe_cmov_reg ); 13147 %} 13148 13149 // Compare 2 unsigned longs and CMOVE ptrs. 13150 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 13151 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13152 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13153 ins_cost(200); 13154 expand %{ 13155 cmovPP_reg_LTGE(cmp,flags,dst,src); 13156 %} 13157 %} 13158 13159 // Compare 2 longs and CMOVE doubles 13160 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13161 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13162 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13163 ins_cost(200); 13164 expand %{ 13165 fcmovDPR_regS(cmp,flags,dst,src); 13166 %} 13167 %} 13168 13169 // Compare 2 longs and CMOVE doubles 13170 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13171 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13172 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13173 ins_cost(200); 13174 expand %{ 13175 fcmovD_regS(cmp,flags,dst,src); 13176 %} 13177 %} 13178 13179 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13180 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13181 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13182 ins_cost(200); 13183 expand %{ 13184 fcmovFPR_regS(cmp,flags,dst,src); 13185 %} 13186 %} 13187 13188 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13189 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13190 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13191 ins_cost(200); 13192 expand %{ 13193 fcmovF_regS(cmp,flags,dst,src); 13194 %} 13195 %} 13196 13197 //====== 13198 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13199 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13200 match( Set flags (CmpL src zero )); 13201 effect(TEMP tmp); 13202 ins_cost(200); 13203 format %{ "MOV $tmp,$src.lo\n\t" 13204 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13205 ins_encode( long_cmp_flags0( src, tmp ) ); 13206 ins_pipe( ialu_reg_reg_long ); 13207 %} 13208 13209 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13210 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13211 match( Set flags (CmpL src1 src2 )); 13212 ins_cost(200+300); 13213 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13214 "JNE,s skip\n\t" 13215 "CMP $src1.hi,$src2.hi\n\t" 13216 "skip:\t" %} 13217 ins_encode( long_cmp_flags1( src1, src2 ) ); 13218 ins_pipe( ialu_cr_reg_reg ); 13219 %} 13220 13221 // Long compare reg == zero/reg OR reg != zero/reg 13222 // Just a wrapper for a normal branch, plus the predicate test. 13223 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13224 match(If cmp flags); 13225 effect(USE labl); 13226 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13227 expand %{ 13228 jmpCon(cmp,flags,labl); // JEQ or JNE... 13229 %} 13230 %} 13231 13232 //====== 13233 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13234 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13235 match(Set flags (CmpUL src zero)); 13236 effect(TEMP tmp); 13237 ins_cost(200); 13238 format %{ "MOV $tmp,$src.lo\n\t" 13239 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13240 ins_encode(long_cmp_flags0(src, tmp)); 13241 ins_pipe(ialu_reg_reg_long); 13242 %} 13243 13244 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13245 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13246 match(Set flags (CmpUL src1 src2)); 13247 ins_cost(200+300); 13248 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13249 "JNE,s skip\n\t" 13250 "CMP $src1.hi,$src2.hi\n\t" 13251 "skip:\t" %} 13252 ins_encode(long_cmp_flags1(src1, src2)); 13253 ins_pipe(ialu_cr_reg_reg); 13254 %} 13255 13256 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13257 // Just a wrapper for a normal branch, plus the predicate test. 13258 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13259 match(If cmp flags); 13260 effect(USE labl); 13261 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13262 expand %{ 13263 jmpCon(cmp, flags, labl); // JEQ or JNE... 13264 %} 13265 %} 13266 13267 // Compare 2 longs and CMOVE longs. 13268 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13269 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13270 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13271 ins_cost(400); 13272 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13273 "CMOV$cmp $dst.hi,$src.hi" %} 13274 opcode(0x0F,0x40); 13275 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13276 ins_pipe( pipe_cmov_reg_long ); 13277 %} 13278 13279 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13280 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13281 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13282 ins_cost(500); 13283 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13284 "CMOV$cmp $dst.hi,$src.hi" %} 13285 opcode(0x0F,0x40); 13286 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13287 ins_pipe( pipe_cmov_reg_long ); 13288 %} 13289 13290 // Compare 2 longs and CMOVE ints. 13291 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13292 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13293 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13294 ins_cost(200); 13295 format %{ "CMOV$cmp $dst,$src" %} 13296 opcode(0x0F,0x40); 13297 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13298 ins_pipe( pipe_cmov_reg ); 13299 %} 13300 13301 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13302 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13303 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13304 ins_cost(250); 13305 format %{ "CMOV$cmp $dst,$src" %} 13306 opcode(0x0F,0x40); 13307 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13308 ins_pipe( pipe_cmov_mem ); 13309 %} 13310 13311 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13312 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13313 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13314 ins_cost(200); 13315 expand %{ 13316 cmovII_reg_EQNE(cmp, flags, dst, src); 13317 %} 13318 %} 13319 13320 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13321 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13322 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13323 ins_cost(250); 13324 expand %{ 13325 cmovII_mem_EQNE(cmp, flags, dst, src); 13326 %} 13327 %} 13328 13329 // Compare 2 longs and CMOVE ptrs. 13330 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13331 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13332 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13333 ins_cost(200); 13334 format %{ "CMOV$cmp $dst,$src" %} 13335 opcode(0x0F,0x40); 13336 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13337 ins_pipe( pipe_cmov_reg ); 13338 %} 13339 13340 // Compare 2 unsigned longs and CMOVE ptrs. 13341 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13342 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13343 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13344 ins_cost(200); 13345 expand %{ 13346 cmovPP_reg_EQNE(cmp,flags,dst,src); 13347 %} 13348 %} 13349 13350 // Compare 2 longs and CMOVE doubles 13351 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13352 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13353 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13354 ins_cost(200); 13355 expand %{ 13356 fcmovDPR_regS(cmp,flags,dst,src); 13357 %} 13358 %} 13359 13360 // Compare 2 longs and CMOVE doubles 13361 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13362 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13363 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13364 ins_cost(200); 13365 expand %{ 13366 fcmovD_regS(cmp,flags,dst,src); 13367 %} 13368 %} 13369 13370 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13371 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13372 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13373 ins_cost(200); 13374 expand %{ 13375 fcmovFPR_regS(cmp,flags,dst,src); 13376 %} 13377 %} 13378 13379 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13380 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13381 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13382 ins_cost(200); 13383 expand %{ 13384 fcmovF_regS(cmp,flags,dst,src); 13385 %} 13386 %} 13387 13388 //====== 13389 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13390 // Same as cmpL_reg_flags_LEGT except must negate src 13391 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13392 match( Set flags (CmpL src zero )); 13393 effect( TEMP tmp ); 13394 ins_cost(300); 13395 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13396 "CMP $tmp,$src.lo\n\t" 13397 "SBB $tmp,$src.hi\n\t" %} 13398 ins_encode( long_cmp_flags3(src, tmp) ); 13399 ins_pipe( ialu_reg_reg_long ); 13400 %} 13401 13402 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13403 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13404 // requires a commuted test to get the same result. 13405 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13406 match( Set flags (CmpL src1 src2 )); 13407 effect( TEMP tmp ); 13408 ins_cost(300); 13409 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13410 "MOV $tmp,$src2.hi\n\t" 13411 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13412 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13413 ins_pipe( ialu_cr_reg_reg ); 13414 %} 13415 13416 // Long compares reg < zero/req OR reg >= zero/req. 13417 // Just a wrapper for a normal branch, plus the predicate test 13418 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13419 match(If cmp flags); 13420 effect(USE labl); 13421 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13422 ins_cost(300); 13423 expand %{ 13424 jmpCon(cmp,flags,labl); // JGT or JLE... 13425 %} 13426 %} 13427 13428 //====== 13429 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13430 // Same as cmpUL_reg_flags_LEGT except must negate src 13431 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13432 match(Set flags (CmpUL src zero)); 13433 effect(TEMP tmp); 13434 ins_cost(300); 13435 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13436 "CMP $tmp,$src.lo\n\t" 13437 "SBB $tmp,$src.hi\n\t" %} 13438 ins_encode(long_cmp_flags3(src, tmp)); 13439 ins_pipe(ialu_reg_reg_long); 13440 %} 13441 13442 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13443 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13444 // requires a commuted test to get the same result. 13445 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13446 match(Set flags (CmpUL src1 src2)); 13447 effect(TEMP tmp); 13448 ins_cost(300); 13449 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13450 "MOV $tmp,$src2.hi\n\t" 13451 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13452 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13453 ins_pipe(ialu_cr_reg_reg); 13454 %} 13455 13456 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13457 // Just a wrapper for a normal branch, plus the predicate test 13458 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13459 match(If cmp flags); 13460 effect(USE labl); 13461 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13462 ins_cost(300); 13463 expand %{ 13464 jmpCon(cmp, flags, labl); // JGT or JLE... 13465 %} 13466 %} 13467 13468 // Compare 2 longs and CMOVE longs. 13469 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13470 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13471 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13472 ins_cost(400); 13473 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13474 "CMOV$cmp $dst.hi,$src.hi" %} 13475 opcode(0x0F,0x40); 13476 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13477 ins_pipe( pipe_cmov_reg_long ); 13478 %} 13479 13480 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13481 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13482 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13483 ins_cost(500); 13484 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13485 "CMOV$cmp $dst.hi,$src.hi+4" %} 13486 opcode(0x0F,0x40); 13487 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13488 ins_pipe( pipe_cmov_reg_long ); 13489 %} 13490 13491 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13492 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13493 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13494 ins_cost(400); 13495 expand %{ 13496 cmovLL_reg_LEGT(cmp, flags, dst, src); 13497 %} 13498 %} 13499 13500 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13501 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13502 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13503 ins_cost(500); 13504 expand %{ 13505 cmovLL_mem_LEGT(cmp, flags, dst, src); 13506 %} 13507 %} 13508 13509 // Compare 2 longs and CMOVE ints. 13510 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13511 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13512 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13513 ins_cost(200); 13514 format %{ "CMOV$cmp $dst,$src" %} 13515 opcode(0x0F,0x40); 13516 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13517 ins_pipe( pipe_cmov_reg ); 13518 %} 13519 13520 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13521 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13522 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13523 ins_cost(250); 13524 format %{ "CMOV$cmp $dst,$src" %} 13525 opcode(0x0F,0x40); 13526 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13527 ins_pipe( pipe_cmov_mem ); 13528 %} 13529 13530 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13531 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13532 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13533 ins_cost(200); 13534 expand %{ 13535 cmovII_reg_LEGT(cmp, flags, dst, src); 13536 %} 13537 %} 13538 13539 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13540 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13541 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13542 ins_cost(250); 13543 expand %{ 13544 cmovII_mem_LEGT(cmp, flags, dst, src); 13545 %} 13546 %} 13547 13548 // Compare 2 longs and CMOVE ptrs. 13549 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13550 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13551 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13552 ins_cost(200); 13553 format %{ "CMOV$cmp $dst,$src" %} 13554 opcode(0x0F,0x40); 13555 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13556 ins_pipe( pipe_cmov_reg ); 13557 %} 13558 13559 // Compare 2 unsigned longs and CMOVE ptrs. 13560 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13561 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13562 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13563 ins_cost(200); 13564 expand %{ 13565 cmovPP_reg_LEGT(cmp,flags,dst,src); 13566 %} 13567 %} 13568 13569 // Compare 2 longs and CMOVE doubles 13570 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13571 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13572 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13573 ins_cost(200); 13574 expand %{ 13575 fcmovDPR_regS(cmp,flags,dst,src); 13576 %} 13577 %} 13578 13579 // Compare 2 longs and CMOVE doubles 13580 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13581 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13582 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13583 ins_cost(200); 13584 expand %{ 13585 fcmovD_regS(cmp,flags,dst,src); 13586 %} 13587 %} 13588 13589 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13590 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13591 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13592 ins_cost(200); 13593 expand %{ 13594 fcmovFPR_regS(cmp,flags,dst,src); 13595 %} 13596 %} 13597 13598 13599 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13600 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13601 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13602 ins_cost(200); 13603 expand %{ 13604 fcmovF_regS(cmp,flags,dst,src); 13605 %} 13606 %} 13607 13608 13609 // ============================================================================ 13610 // Procedure Call/Return Instructions 13611 // Call Java Static Instruction 13612 // Note: If this code changes, the corresponding ret_addr_offset() and 13613 // compute_padding() functions will have to be adjusted. 13614 instruct CallStaticJavaDirect(method meth) %{ 13615 match(CallStaticJava); 13616 effect(USE meth); 13617 13618 ins_cost(300); 13619 format %{ "CALL,static " %} 13620 opcode(0xE8); /* E8 cd */ 13621 ins_encode( pre_call_resets, 13622 Java_Static_Call( meth ), 13623 call_epilog, 13624 post_call_FPU ); 13625 ins_pipe( pipe_slow ); 13626 ins_alignment(4); 13627 %} 13628 13629 // Call Java Dynamic Instruction 13630 // Note: If this code changes, the corresponding ret_addr_offset() and 13631 // compute_padding() functions will have to be adjusted. 13632 instruct CallDynamicJavaDirect(method meth) %{ 13633 match(CallDynamicJava); 13634 effect(USE meth); 13635 13636 ins_cost(300); 13637 format %{ "MOV EAX,(oop)-1\n\t" 13638 "CALL,dynamic" %} 13639 opcode(0xE8); /* E8 cd */ 13640 ins_encode( pre_call_resets, 13641 Java_Dynamic_Call( meth ), 13642 call_epilog, 13643 post_call_FPU ); 13644 ins_pipe( pipe_slow ); 13645 ins_alignment(4); 13646 %} 13647 13648 // Call Runtime Instruction 13649 instruct CallRuntimeDirect(method meth) %{ 13650 match(CallRuntime ); 13651 effect(USE meth); 13652 13653 ins_cost(300); 13654 format %{ "CALL,runtime " %} 13655 opcode(0xE8); /* E8 cd */ 13656 // Use FFREEs to clear entries in float stack 13657 ins_encode( pre_call_resets, 13658 FFree_Float_Stack_All, 13659 Java_To_Runtime( meth ), 13660 post_call_FPU ); 13661 ins_pipe( pipe_slow ); 13662 %} 13663 13664 // Call runtime without safepoint 13665 instruct CallLeafDirect(method meth) %{ 13666 match(CallLeaf); 13667 effect(USE meth); 13668 13669 ins_cost(300); 13670 format %{ "CALL_LEAF,runtime " %} 13671 opcode(0xE8); /* E8 cd */ 13672 ins_encode( pre_call_resets, 13673 FFree_Float_Stack_All, 13674 Java_To_Runtime( meth ), 13675 Verify_FPU_For_Leaf, post_call_FPU ); 13676 ins_pipe( pipe_slow ); 13677 %} 13678 13679 instruct CallLeafNoFPDirect(method meth) %{ 13680 match(CallLeafNoFP); 13681 effect(USE meth); 13682 13683 ins_cost(300); 13684 format %{ "CALL_LEAF_NOFP,runtime " %} 13685 opcode(0xE8); /* E8 cd */ 13686 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13687 ins_pipe( pipe_slow ); 13688 %} 13689 13690 13691 // Return Instruction 13692 // Remove the return address & jump to it. 13693 instruct Ret() %{ 13694 match(Return); 13695 format %{ "RET" %} 13696 opcode(0xC3); 13697 ins_encode(OpcP); 13698 ins_pipe( pipe_jmp ); 13699 %} 13700 13701 // Tail Call; Jump from runtime stub to Java code. 13702 // Also known as an 'interprocedural jump'. 13703 // Target of jump will eventually return to caller. 13704 // TailJump below removes the return address. 13705 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13706 match(TailCall jump_target method_ptr); 13707 ins_cost(300); 13708 format %{ "JMP $jump_target \t# EBX holds method" %} 13709 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13710 ins_encode( OpcP, RegOpc(jump_target) ); 13711 ins_pipe( pipe_jmp ); 13712 %} 13713 13714 13715 // Tail Jump; remove the return address; jump to target. 13716 // TailCall above leaves the return address around. 13717 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13718 match( TailJump jump_target ex_oop ); 13719 ins_cost(300); 13720 format %{ "POP EDX\t# pop return address into dummy\n\t" 13721 "JMP $jump_target " %} 13722 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13723 ins_encode( enc_pop_rdx, 13724 OpcP, RegOpc(jump_target) ); 13725 ins_pipe( pipe_jmp ); 13726 %} 13727 13728 // Create exception oop: created by stack-crawling runtime code. 13729 // Created exception is now available to this handler, and is setup 13730 // just prior to jumping to this handler. No code emitted. 13731 instruct CreateException( eAXRegP ex_oop ) 13732 %{ 13733 match(Set ex_oop (CreateEx)); 13734 13735 size(0); 13736 // use the following format syntax 13737 format %{ "# exception oop is in EAX; no code emitted" %} 13738 ins_encode(); 13739 ins_pipe( empty ); 13740 %} 13741 13742 13743 // Rethrow exception: 13744 // The exception oop will come in the first argument position. 13745 // Then JUMP (not call) to the rethrow stub code. 13746 instruct RethrowException() 13747 %{ 13748 match(Rethrow); 13749 13750 // use the following format syntax 13751 format %{ "JMP rethrow_stub" %} 13752 ins_encode(enc_rethrow); 13753 ins_pipe( pipe_jmp ); 13754 %} 13755 13756 // inlined locking and unlocking 13757 13758 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{ 13759 predicate(Compile::current()->use_rtm()); 13760 match(Set cr (FastLock object box)); 13761 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread); 13762 ins_cost(300); 13763 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13764 ins_encode %{ 13765 __ get_thread($thread$$Register); 13766 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13767 $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register, 13768 _rtm_counters, _stack_rtm_counters, 13769 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13770 true, ra_->C->profile_rtm()); 13771 %} 13772 ins_pipe(pipe_slow); 13773 %} 13774 13775 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13776 predicate(!Compile::current()->use_rtm()); 13777 match(Set cr (FastLock object box)); 13778 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13779 ins_cost(300); 13780 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13781 ins_encode %{ 13782 __ get_thread($thread$$Register); 13783 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13784 $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false); 13785 %} 13786 ins_pipe(pipe_slow); 13787 %} 13788 13789 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13790 match(Set cr (FastUnlock object box)); 13791 effect(TEMP tmp, USE_KILL box); 13792 ins_cost(300); 13793 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13794 ins_encode %{ 13795 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13796 %} 13797 ins_pipe(pipe_slow); 13798 %} 13799 13800 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13801 predicate(Matcher::vector_length(n) <= 32); 13802 match(Set dst (MaskAll src)); 13803 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13804 ins_encode %{ 13805 int mask_len = Matcher::vector_length(this); 13806 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13807 %} 13808 ins_pipe( pipe_slow ); 13809 %} 13810 13811 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13812 predicate(Matcher::vector_length(n) > 32); 13813 match(Set dst (MaskAll src)); 13814 effect(TEMP ktmp); 13815 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13816 ins_encode %{ 13817 int mask_len = Matcher::vector_length(this); 13818 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13819 %} 13820 ins_pipe( pipe_slow ); 13821 %} 13822 13823 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13824 predicate(Matcher::vector_length(n) > 32); 13825 match(Set dst (MaskAll src)); 13826 effect(TEMP ktmp); 13827 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13828 ins_encode %{ 13829 int mask_len = Matcher::vector_length(this); 13830 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13831 %} 13832 ins_pipe( pipe_slow ); 13833 %} 13834 13835 // ============================================================================ 13836 // Safepoint Instruction 13837 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13838 match(SafePoint poll); 13839 effect(KILL cr, USE poll); 13840 13841 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13842 ins_cost(125); 13843 // EBP would need size(3) 13844 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13845 ins_encode %{ 13846 __ relocate(relocInfo::poll_type); 13847 address pre_pc = __ pc(); 13848 __ testl(rax, Address($poll$$Register, 0)); 13849 address post_pc = __ pc(); 13850 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13851 %} 13852 ins_pipe(ialu_reg_mem); 13853 %} 13854 13855 13856 // ============================================================================ 13857 // This name is KNOWN by the ADLC and cannot be changed. 13858 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13859 // for this guy. 13860 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13861 match(Set dst (ThreadLocal)); 13862 effect(DEF dst, KILL cr); 13863 13864 format %{ "MOV $dst, Thread::current()" %} 13865 ins_encode %{ 13866 Register dstReg = as_Register($dst$$reg); 13867 __ get_thread(dstReg); 13868 %} 13869 ins_pipe( ialu_reg_fat ); 13870 %} 13871 13872 13873 13874 //----------PEEPHOLE RULES----------------------------------------------------- 13875 // These must follow all instruction definitions as they use the names 13876 // defined in the instructions definitions. 13877 // 13878 // peepmatch ( root_instr_name [preceding_instruction]* ); 13879 // 13880 // peepconstraint %{ 13881 // (instruction_number.operand_name relational_op instruction_number.operand_name 13882 // [, ...] ); 13883 // // instruction numbers are zero-based using left to right order in peepmatch 13884 // 13885 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13886 // // provide an instruction_number.operand_name for each operand that appears 13887 // // in the replacement instruction's match rule 13888 // 13889 // ---------VM FLAGS--------------------------------------------------------- 13890 // 13891 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13892 // 13893 // Each peephole rule is given an identifying number starting with zero and 13894 // increasing by one in the order seen by the parser. An individual peephole 13895 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13896 // on the command-line. 13897 // 13898 // ---------CURRENT LIMITATIONS---------------------------------------------- 13899 // 13900 // Only match adjacent instructions in same basic block 13901 // Only equality constraints 13902 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13903 // Only one replacement instruction 13904 // 13905 // ---------EXAMPLE---------------------------------------------------------- 13906 // 13907 // // pertinent parts of existing instructions in architecture description 13908 // instruct movI(rRegI dst, rRegI src) %{ 13909 // match(Set dst (CopyI src)); 13910 // %} 13911 // 13912 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13913 // match(Set dst (AddI dst src)); 13914 // effect(KILL cr); 13915 // %} 13916 // 13917 // // Change (inc mov) to lea 13918 // peephole %{ 13919 // // increment preceded by register-register move 13920 // peepmatch ( incI_eReg movI ); 13921 // // require that the destination register of the increment 13922 // // match the destination register of the move 13923 // peepconstraint ( 0.dst == 1.dst ); 13924 // // construct a replacement instruction that sets 13925 // // the destination to ( move's source register + one ) 13926 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13927 // %} 13928 // 13929 // Implementation no longer uses movX instructions since 13930 // machine-independent system no longer uses CopyX nodes. 13931 // 13932 // peephole %{ 13933 // peepmatch ( incI_eReg movI ); 13934 // peepconstraint ( 0.dst == 1.dst ); 13935 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13936 // %} 13937 // 13938 // peephole %{ 13939 // peepmatch ( decI_eReg movI ); 13940 // peepconstraint ( 0.dst == 1.dst ); 13941 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13942 // %} 13943 // 13944 // peephole %{ 13945 // peepmatch ( addI_eReg_imm movI ); 13946 // peepconstraint ( 0.dst == 1.dst ); 13947 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13948 // %} 13949 // 13950 // peephole %{ 13951 // peepmatch ( addP_eReg_imm movP ); 13952 // peepconstraint ( 0.dst == 1.dst ); 13953 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13954 // %} 13955 13956 // // Change load of spilled value to only a spill 13957 // instruct storeI(memory mem, rRegI src) %{ 13958 // match(Set mem (StoreI mem src)); 13959 // %} 13960 // 13961 // instruct loadI(rRegI dst, memory mem) %{ 13962 // match(Set dst (LoadI mem)); 13963 // %} 13964 // 13965 peephole %{ 13966 peepmatch ( loadI storeI ); 13967 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13968 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13969 %} 13970 13971 //----------SMARTSPILL RULES--------------------------------------------------- 13972 // These must follow all instruction definitions as they use the names 13973 // defined in the instructions definitions.