1 // 2 // Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 105 // Specify priority of register selection within phases of register 106 // allocation. Highest priority is first. A useful heuristic is to 107 // give registers a low priority when they are required by machine 108 // instructions, like EAX and EDX. Registers which are used as 109 // pairs must fall on an even boundary (witness the FPR#L's in this list). 110 // For the Intel integer registers, the equivalent Long pairs are 111 // EDX:EAX, EBX:ECX, and EDI:EBP. 112 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 113 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 114 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 115 FPR6L, FPR6H, FPR7L, FPR7H ); 116 117 118 //----------Architecture Description Register Classes-------------------------- 119 // Several register classes are automatically defined based upon information in 120 // this architecture description. 121 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 122 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 123 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 124 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 125 // 126 // Class for no registers (empty set). 127 reg_class no_reg(); 128 129 // Class for all registers 130 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 131 // Class for all registers (excluding EBP) 132 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 133 // Dynamic register class that selects at runtime between register classes 134 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 135 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 136 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 137 138 // Class for general registers 139 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 140 // Class for general registers (excluding EBP). 141 // This register class can be used for implicit null checks on win95. 142 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 143 // Used also if the PreserveFramePointer flag is true. 144 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 145 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 146 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class of "X" registers 149 reg_class int_x_reg(EBX, ECX, EDX, EAX); 150 151 // Class of registers that can appear in an address with no offset. 152 // EBP and ESP require an extra instruction byte for zero offset. 153 // Used in fast-unlock 154 reg_class p_reg(EDX, EDI, ESI, EBX); 155 156 // Class for general registers excluding ECX 157 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 158 // Class for general registers excluding ECX (and EBP) 159 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 160 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 161 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 162 163 // Class for general registers excluding EAX 164 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 165 166 // Class for general registers excluding EAX and EBX. 167 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 168 // Class for general registers excluding EAX and EBX (and EBP) 169 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 170 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 171 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 172 173 // Class of EAX (for multiply and divide operations) 174 reg_class eax_reg(EAX); 175 176 // Class of EBX (for atomic add) 177 reg_class ebx_reg(EBX); 178 179 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 180 reg_class ecx_reg(ECX); 181 182 // Class of EDX (for multiply and divide operations) 183 reg_class edx_reg(EDX); 184 185 // Class of EDI (for synchronization) 186 reg_class edi_reg(EDI); 187 188 // Class of ESI (for synchronization) 189 reg_class esi_reg(ESI); 190 191 // Singleton class for stack pointer 192 reg_class sp_reg(ESP); 193 194 // Singleton class for instruction pointer 195 // reg_class ip_reg(EIP); 196 197 // Class of integer register pairs 198 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 199 // Class of integer register pairs (excluding EBP and EDI); 200 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 201 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 202 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 203 204 // Class of integer register pairs that aligns with calling convention 205 reg_class eadx_reg( EAX,EDX ); 206 reg_class ebcx_reg( ECX,EBX ); 207 208 // Not AX or DX, used in divides 209 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 210 // Not AX or DX (and neither EBP), used in divides 211 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 212 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 213 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 214 215 // Floating point registers. Notice FPR0 is not a choice. 216 // FPR0 is not ever allocated; we use clever encodings to fake 217 // a 2-address instructions out of Intels FP stack. 218 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 219 220 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 221 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 222 FPR7L,FPR7H ); 223 224 reg_class fp_flt_reg0( FPR1L ); 225 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 226 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 227 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 228 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 229 230 %} 231 232 233 //----------SOURCE BLOCK------------------------------------------------------- 234 // This is a block of C++ code which provides values, functions, and 235 // definitions necessary in the rest of the architecture description 236 source_hpp %{ 237 // Must be visible to the DFA in dfa_x86_32.cpp 238 extern bool is_operand_hi32_zero(Node* n); 239 %} 240 241 source %{ 242 #define RELOC_IMM32 Assembler::imm_operand 243 #define RELOC_DISP32 Assembler::disp32_operand 244 245 #define __ _masm. 246 247 // How to find the high register of a Long pair, given the low register 248 #define HIGH_FROM_LOW(x) ((x)+2) 249 250 // These masks are used to provide 128-bit aligned bitmasks to the XMM 251 // instructions, to allow sign-masking or sign-bit flipping. They allow 252 // fast versions of NegF/NegD and AbsF/AbsD. 253 254 // Note: 'double' and 'long long' have 32-bits alignment on x86. 255 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 256 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 257 // of 128-bits operands for SSE instructions. 258 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 259 // Store the value to a 128-bits operand. 260 operand[0] = lo; 261 operand[1] = hi; 262 return operand; 263 } 264 265 // Buffer for 128-bits masks used by SSE instructions. 266 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 267 268 // Static initialization during VM startup. 269 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 270 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 271 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 272 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 273 274 // Offset hacking within calls. 275 static int pre_call_resets_size() { 276 int size = 0; 277 Compile* C = Compile::current(); 278 if (C->in_24_bit_fp_mode()) { 279 size += 6; // fldcw 280 } 281 if (C->max_vector_size() > 16) { 282 size += 3; // vzeroupper 283 } 284 return size; 285 } 286 287 // !!!!! Special hack to get all type of calls to specify the byte offset 288 // from the start of the call to the point where the return address 289 // will point. 290 int MachCallStaticJavaNode::ret_addr_offset() { 291 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 292 } 293 294 int MachCallDynamicJavaNode::ret_addr_offset() { 295 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 296 } 297 298 static int sizeof_FFree_Float_Stack_All = -1; 299 300 int MachCallRuntimeNode::ret_addr_offset() { 301 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 302 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 303 } 304 305 // Indicate if the safepoint node needs the polling page as an input. 306 // Since x86 does have absolute addressing, it doesn't. 307 bool SafePointNode::needs_polling_address_input() { 308 return false; 309 } 310 311 // 312 // Compute padding required for nodes which need alignment 313 // 314 315 // The address of the call instruction needs to be 4-byte aligned to 316 // ensure that it does not span a cache line so that it can be patched. 317 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 318 current_offset += pre_call_resets_size(); // skip fldcw, if any 319 current_offset += 1; // skip call opcode byte 320 return round_to(current_offset, alignment_required()) - current_offset; 321 } 322 323 // The address of the call instruction needs to be 4-byte aligned to 324 // ensure that it does not span a cache line so that it can be patched. 325 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 326 current_offset += pre_call_resets_size(); // skip fldcw, if any 327 current_offset += 5; // skip MOV instruction 328 current_offset += 1; // skip call opcode byte 329 return round_to(current_offset, alignment_required()) - current_offset; 330 } 331 332 // EMIT_RM() 333 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 334 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 335 cbuf.insts()->emit_int8(c); 336 } 337 338 // EMIT_CC() 339 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 340 unsigned char c = (unsigned char)( f1 | f2 ); 341 cbuf.insts()->emit_int8(c); 342 } 343 344 // EMIT_OPCODE() 345 void emit_opcode(CodeBuffer &cbuf, int code) { 346 cbuf.insts()->emit_int8((unsigned char) code); 347 } 348 349 // EMIT_OPCODE() w/ relocation information 350 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 351 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 352 emit_opcode(cbuf, code); 353 } 354 355 // EMIT_D8() 356 void emit_d8(CodeBuffer &cbuf, int d8) { 357 cbuf.insts()->emit_int8((unsigned char) d8); 358 } 359 360 // EMIT_D16() 361 void emit_d16(CodeBuffer &cbuf, int d16) { 362 cbuf.insts()->emit_int16(d16); 363 } 364 365 // EMIT_D32() 366 void emit_d32(CodeBuffer &cbuf, int d32) { 367 cbuf.insts()->emit_int32(d32); 368 } 369 370 // emit 32 bit value and construct relocation entry from relocInfo::relocType 371 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 372 int format) { 373 cbuf.relocate(cbuf.insts_mark(), reloc, format); 374 cbuf.insts()->emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from RelocationHolder 378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 379 int format) { 380 #ifdef ASSERT 381 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 382 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 383 } 384 #endif 385 cbuf.relocate(cbuf.insts_mark(), rspec, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // Access stack slot for load or store 390 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 391 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 392 if( -128 <= disp && disp <= 127 ) { 393 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 394 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 395 emit_d8 (cbuf, disp); // Displacement // R/M byte 396 } else { 397 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 398 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 399 emit_d32(cbuf, disp); // Displacement // R/M byte 400 } 401 } 402 403 // rRegI ereg, memory mem) %{ // emit_reg_mem 404 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 405 // There is no index & no scale, use form without SIB byte 406 if ((index == 0x4) && 407 (scale == 0) && (base != ESP_enc)) { 408 // If no displacement, mode is 0x0; unless base is [EBP] 409 if ( (displace == 0) && (base != EBP_enc) ) { 410 emit_rm(cbuf, 0x0, reg_encoding, base); 411 } 412 else { // If 8-bit displacement, mode 0x1 413 if ((displace >= -128) && (displace <= 127) 414 && (disp_reloc == relocInfo::none) ) { 415 emit_rm(cbuf, 0x1, reg_encoding, base); 416 emit_d8(cbuf, displace); 417 } 418 else { // If 32-bit displacement 419 if (base == -1) { // Special flag for absolute address 420 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 421 // (manual lies; no SIB needed here) 422 if ( disp_reloc != relocInfo::none ) { 423 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 424 } else { 425 emit_d32 (cbuf, displace); 426 } 427 } 428 else { // Normal base + offset 429 emit_rm(cbuf, 0x2, reg_encoding, base); 430 if ( disp_reloc != relocInfo::none ) { 431 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 432 } else { 433 emit_d32 (cbuf, displace); 434 } 435 } 436 } 437 } 438 } 439 else { // Else, encode with the SIB byte 440 // If no displacement, mode is 0x0; unless base is [EBP] 441 if (displace == 0 && (base != EBP_enc)) { // If no displacement 442 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 443 emit_rm(cbuf, scale, index, base); 444 } 445 else { // If 8-bit displacement, mode 0x1 446 if ((displace >= -128) && (displace <= 127) 447 && (disp_reloc == relocInfo::none) ) { 448 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 449 emit_rm(cbuf, scale, index, base); 450 emit_d8(cbuf, displace); 451 } 452 else { // If 32-bit displacement 453 if (base == 0x04 ) { 454 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, 0x04); 456 } else { 457 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 458 emit_rm(cbuf, scale, index, base); 459 } 460 if ( disp_reloc != relocInfo::none ) { 461 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 462 } else { 463 emit_d32 (cbuf, displace); 464 } 465 } 466 } 467 } 468 } 469 470 471 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 472 if( dst_encoding == src_encoding ) { 473 // reg-reg copy, use an empty encoding 474 } else { 475 emit_opcode( cbuf, 0x8B ); 476 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 477 } 478 } 479 480 void emit_cmpfp_fixup(MacroAssembler& _masm) { 481 Label exit; 482 __ jccb(Assembler::noParity, exit); 483 __ pushf(); 484 // 485 // comiss/ucomiss instructions set ZF,PF,CF flags and 486 // zero OF,AF,SF for NaN values. 487 // Fixup flags by zeroing ZF,PF so that compare of NaN 488 // values returns 'less than' result (CF is set). 489 // Leave the rest of flags unchanged. 490 // 491 // 7 6 5 4 3 2 1 0 492 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 493 // 0 0 1 0 1 0 1 1 (0x2B) 494 // 495 __ andl(Address(rsp, 0), 0xffffff2b); 496 __ popf(); 497 __ bind(exit); 498 } 499 500 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 501 Label done; 502 __ movl(dst, -1); 503 __ jcc(Assembler::parity, done); 504 __ jcc(Assembler::below, done); 505 __ setb(Assembler::notEqual, dst); 506 __ movzbl(dst, dst); 507 __ bind(done); 508 } 509 510 511 //============================================================================= 512 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 513 514 int Compile::ConstantTable::calculate_table_base_offset() const { 515 return 0; // absolute addressing, no offset 516 } 517 518 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 519 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 520 ShouldNotReachHere(); 521 } 522 523 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 524 // Empty encoding 525 } 526 527 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 528 return 0; 529 } 530 531 #ifndef PRODUCT 532 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 533 st->print("# MachConstantBaseNode (empty encoding)"); 534 } 535 #endif 536 537 538 //============================================================================= 539 #ifndef PRODUCT 540 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 541 Compile* C = ra_->C; 542 543 int framesize = C->frame_size_in_bytes(); 544 int bangsize = C->bang_size_in_bytes(); 545 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 546 // Remove wordSize for return addr which is already pushed. 547 framesize -= wordSize; 548 549 if (C->need_stack_bang(bangsize)) { 550 framesize -= wordSize; 551 st->print("# stack bang (%d bytes)", bangsize); 552 st->print("\n\t"); 553 st->print("PUSH EBP\t# Save EBP"); 554 if (PreserveFramePointer) { 555 st->print("\n\t"); 556 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 557 } 558 if (framesize) { 559 st->print("\n\t"); 560 st->print("SUB ESP, #%d\t# Create frame",framesize); 561 } 562 } else { 563 st->print("SUB ESP, #%d\t# Create frame",framesize); 564 st->print("\n\t"); 565 framesize -= wordSize; 566 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 567 if (PreserveFramePointer) { 568 st->print("\n\t"); 569 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 570 if (framesize > 0) { 571 st->print("\n\t"); 572 st->print("ADD EBP, #%d", framesize); 573 } 574 } 575 } 576 577 if (VerifyStackAtCalls) { 578 st->print("\n\t"); 579 framesize -= wordSize; 580 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 581 } 582 583 if( C->in_24_bit_fp_mode() ) { 584 st->print("\n\t"); 585 st->print("FLDCW \t# load 24 bit fpu control word"); 586 } 587 if (UseSSE >= 2 && VerifyFPU) { 588 st->print("\n\t"); 589 st->print("# verify FPU stack (must be clean on entry)"); 590 } 591 592 #ifdef ASSERT 593 if (VerifyStackAtCalls) { 594 st->print("\n\t"); 595 st->print("# stack alignment check"); 596 } 597 #endif 598 st->cr(); 599 } 600 #endif 601 602 603 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 604 Compile* C = ra_->C; 605 MacroAssembler _masm(&cbuf); 606 607 int framesize = C->frame_size_in_bytes(); 608 int bangsize = C->bang_size_in_bytes(); 609 610 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 611 612 C->set_frame_complete(cbuf.insts_size()); 613 614 if (C->has_mach_constant_base_node()) { 615 // NOTE: We set the table base offset here because users might be 616 // emitted before MachConstantBaseNode. 617 Compile::ConstantTable& constant_table = C->constant_table(); 618 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 619 } 620 } 621 622 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 623 return MachNode::size(ra_); // too many variables; just compute it the hard way 624 } 625 626 int MachPrologNode::reloc() const { 627 return 0; // a large enough number 628 } 629 630 //============================================================================= 631 #ifndef PRODUCT 632 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 633 Compile *C = ra_->C; 634 int framesize = C->frame_size_in_bytes(); 635 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 636 // Remove two words for return addr and rbp, 637 framesize -= 2*wordSize; 638 639 if (C->max_vector_size() > 16) { 640 st->print("VZEROUPPER"); 641 st->cr(); st->print("\t"); 642 } 643 if (C->in_24_bit_fp_mode()) { 644 st->print("FLDCW standard control word"); 645 st->cr(); st->print("\t"); 646 } 647 if (framesize) { 648 st->print("ADD ESP,%d\t# Destroy frame",framesize); 649 st->cr(); st->print("\t"); 650 } 651 st->print_cr("POPL EBP"); st->print("\t"); 652 if (do_polling() && C->is_method_compilation()) { 653 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 654 st->cr(); st->print("\t"); 655 } 656 } 657 #endif 658 659 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 660 Compile *C = ra_->C; 661 662 if (C->max_vector_size() > 16) { 663 // Clear upper bits of YMM registers when current compiled code uses 664 // wide vectors to avoid AVX <-> SSE transition penalty during call. 665 MacroAssembler masm(&cbuf); 666 masm.vzeroupper(); 667 } 668 // If method set FPU control word, restore to standard control word 669 if (C->in_24_bit_fp_mode()) { 670 MacroAssembler masm(&cbuf); 671 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 672 } 673 674 int framesize = C->frame_size_in_bytes(); 675 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 676 // Remove two words for return addr and rbp, 677 framesize -= 2*wordSize; 678 679 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 680 681 if (framesize >= 128) { 682 emit_opcode(cbuf, 0x81); // add SP, #framesize 683 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 684 emit_d32(cbuf, framesize); 685 } else if (framesize) { 686 emit_opcode(cbuf, 0x83); // add SP, #framesize 687 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 688 emit_d8(cbuf, framesize); 689 } 690 691 emit_opcode(cbuf, 0x58 | EBP_enc); 692 693 if (do_polling() && C->is_method_compilation()) { 694 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 695 emit_opcode(cbuf,0x85); 696 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 697 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 698 } 699 } 700 701 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 702 Compile *C = ra_->C; 703 // If method set FPU control word, restore to standard control word 704 int size = C->in_24_bit_fp_mode() ? 6 : 0; 705 if (C->max_vector_size() > 16) size += 3; // vzeroupper 706 if (do_polling() && C->is_method_compilation()) size += 6; 707 708 int framesize = C->frame_size_in_bytes(); 709 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 710 // Remove two words for return addr and rbp, 711 framesize -= 2*wordSize; 712 713 size++; // popl rbp, 714 715 if (framesize >= 128) { 716 size += 6; 717 } else { 718 size += framesize ? 3 : 0; 719 } 720 return size; 721 } 722 723 int MachEpilogNode::reloc() const { 724 return 0; // a large enough number 725 } 726 727 const Pipeline * MachEpilogNode::pipeline() const { 728 return MachNode::pipeline_class(); 729 } 730 731 int MachEpilogNode::safepoint_offset() const { return 0; } 732 733 //============================================================================= 734 735 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 736 static enum RC rc_class( OptoReg::Name reg ) { 737 738 if( !OptoReg::is_valid(reg) ) return rc_bad; 739 if (OptoReg::is_stack(reg)) return rc_stack; 740 741 VMReg r = OptoReg::as_VMReg(reg); 742 if (r->is_Register()) return rc_int; 743 if (r->is_FloatRegister()) { 744 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 745 return rc_float; 746 } 747 assert(r->is_XMMRegister(), "must be"); 748 return rc_xmm; 749 } 750 751 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 752 int opcode, const char *op_str, int size, outputStream* st ) { 753 if( cbuf ) { 754 emit_opcode (*cbuf, opcode ); 755 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 756 #ifndef PRODUCT 757 } else if( !do_size ) { 758 if( size != 0 ) st->print("\n\t"); 759 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 760 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 761 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 762 } else { // FLD, FST, PUSH, POP 763 st->print("%s [ESP + #%d]",op_str,offset); 764 } 765 #endif 766 } 767 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 768 return size+3+offset_size; 769 } 770 771 // Helper for XMM registers. Extra opcode bits, limited syntax. 772 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 773 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 774 if (cbuf) { 775 MacroAssembler _masm(cbuf); 776 if (reg_lo+1 == reg_hi) { // double move? 777 if (is_load) { 778 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 779 } else { 780 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 781 } 782 } else { 783 if (is_load) { 784 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 785 } else { 786 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 787 } 788 } 789 #ifndef PRODUCT 790 } else if (!do_size) { 791 if (size != 0) st->print("\n\t"); 792 if (reg_lo+1 == reg_hi) { // double move? 793 if (is_load) st->print("%s %s,[ESP + #%d]", 794 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 795 Matcher::regName[reg_lo], offset); 796 else st->print("MOVSD [ESP + #%d],%s", 797 offset, Matcher::regName[reg_lo]); 798 } else { 799 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 800 Matcher::regName[reg_lo], offset); 801 else st->print("MOVSS [ESP + #%d],%s", 802 offset, Matcher::regName[reg_lo]); 803 } 804 #endif 805 } 806 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 807 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 808 return size+5+offset_size; 809 } 810 811 812 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 813 int src_hi, int dst_hi, int size, outputStream* st ) { 814 if (cbuf) { 815 MacroAssembler _masm(cbuf); 816 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 817 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 818 as_XMMRegister(Matcher::_regEncode[src_lo])); 819 } else { 820 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 821 as_XMMRegister(Matcher::_regEncode[src_lo])); 822 } 823 #ifndef PRODUCT 824 } else if (!do_size) { 825 if (size != 0) st->print("\n\t"); 826 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 827 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 828 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 829 } else { 830 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 831 } 832 } else { 833 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 834 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 835 } else { 836 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 837 } 838 } 839 #endif 840 } 841 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 842 // Only MOVAPS SSE prefix uses 1 byte. 843 int sz = 4; 844 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 845 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 846 return size + sz; 847 } 848 849 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 850 int src_hi, int dst_hi, int size, outputStream* st ) { 851 // 32-bit 852 if (cbuf) { 853 MacroAssembler _masm(cbuf); 854 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 855 as_Register(Matcher::_regEncode[src_lo])); 856 #ifndef PRODUCT 857 } else if (!do_size) { 858 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 859 #endif 860 } 861 return 4; 862 } 863 864 865 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 866 int src_hi, int dst_hi, int size, outputStream* st ) { 867 // 32-bit 868 if (cbuf) { 869 MacroAssembler _masm(cbuf); 870 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 871 as_XMMRegister(Matcher::_regEncode[src_lo])); 872 #ifndef PRODUCT 873 } else if (!do_size) { 874 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 875 #endif 876 } 877 return 4; 878 } 879 880 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 881 if( cbuf ) { 882 emit_opcode(*cbuf, 0x8B ); 883 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 884 #ifndef PRODUCT 885 } else if( !do_size ) { 886 if( size != 0 ) st->print("\n\t"); 887 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 888 #endif 889 } 890 return size+2; 891 } 892 893 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 894 int offset, int size, outputStream* st ) { 895 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 896 if( cbuf ) { 897 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 898 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 899 #ifndef PRODUCT 900 } else if( !do_size ) { 901 if( size != 0 ) st->print("\n\t"); 902 st->print("FLD %s",Matcher::regName[src_lo]); 903 #endif 904 } 905 size += 2; 906 } 907 908 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 909 const char *op_str; 910 int op; 911 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 912 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 913 op = 0xDD; 914 } else { // 32-bit store 915 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 916 op = 0xD9; 917 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 918 } 919 920 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 921 } 922 923 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 924 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 925 int src_hi, int dst_hi, uint ireg, outputStream* st); 926 927 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 928 int stack_offset, int reg, uint ireg, outputStream* st); 929 930 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 931 int dst_offset, uint ireg, outputStream* st) { 932 int calc_size = 0; 933 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 934 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 935 switch (ireg) { 936 case Op_VecS: 937 calc_size = 3+src_offset_size + 3+dst_offset_size; 938 break; 939 case Op_VecD: 940 calc_size = 3+src_offset_size + 3+dst_offset_size; 941 src_offset += 4; 942 dst_offset += 4; 943 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 944 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 945 calc_size += 3+src_offset_size + 3+dst_offset_size; 946 break; 947 case Op_VecX: 948 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 949 break; 950 case Op_VecY: 951 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 952 break; 953 default: 954 ShouldNotReachHere(); 955 } 956 if (cbuf) { 957 MacroAssembler _masm(cbuf); 958 int offset = __ offset(); 959 switch (ireg) { 960 case Op_VecS: 961 __ pushl(Address(rsp, src_offset)); 962 __ popl (Address(rsp, dst_offset)); 963 break; 964 case Op_VecD: 965 __ pushl(Address(rsp, src_offset)); 966 __ popl (Address(rsp, dst_offset)); 967 __ pushl(Address(rsp, src_offset+4)); 968 __ popl (Address(rsp, dst_offset+4)); 969 break; 970 case Op_VecX: 971 __ movdqu(Address(rsp, -16), xmm0); 972 __ movdqu(xmm0, Address(rsp, src_offset)); 973 __ movdqu(Address(rsp, dst_offset), xmm0); 974 __ movdqu(xmm0, Address(rsp, -16)); 975 break; 976 case Op_VecY: 977 __ vmovdqu(Address(rsp, -32), xmm0); 978 __ vmovdqu(xmm0, Address(rsp, src_offset)); 979 __ vmovdqu(Address(rsp, dst_offset), xmm0); 980 __ vmovdqu(xmm0, Address(rsp, -32)); 981 break; 982 default: 983 ShouldNotReachHere(); 984 } 985 int size = __ offset() - offset; 986 assert(size == calc_size, "incorrect size calculattion"); 987 return size; 988 #ifndef PRODUCT 989 } else if (!do_size) { 990 switch (ireg) { 991 case Op_VecS: 992 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 993 "popl [rsp + #%d]", 994 src_offset, dst_offset); 995 break; 996 case Op_VecD: 997 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 998 "popq [rsp + #%d]\n\t" 999 "pushl [rsp + #%d]\n\t" 1000 "popq [rsp + #%d]", 1001 src_offset, dst_offset, src_offset+4, dst_offset+4); 1002 break; 1003 case Op_VecX: 1004 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1005 "movdqu xmm0, [rsp + #%d]\n\t" 1006 "movdqu [rsp + #%d], xmm0\n\t" 1007 "movdqu xmm0, [rsp - #16]", 1008 src_offset, dst_offset); 1009 break; 1010 case Op_VecY: 1011 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1012 "vmovdqu xmm0, [rsp + #%d]\n\t" 1013 "vmovdqu [rsp + #%d], xmm0\n\t" 1014 "vmovdqu xmm0, [rsp - #32]", 1015 src_offset, dst_offset); 1016 break; 1017 default: 1018 ShouldNotReachHere(); 1019 } 1020 #endif 1021 } 1022 return calc_size; 1023 } 1024 1025 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1026 // Get registers to move 1027 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1028 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1029 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1030 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1031 1032 enum RC src_second_rc = rc_class(src_second); 1033 enum RC src_first_rc = rc_class(src_first); 1034 enum RC dst_second_rc = rc_class(dst_second); 1035 enum RC dst_first_rc = rc_class(dst_first); 1036 1037 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1038 1039 // Generate spill code! 1040 int size = 0; 1041 1042 if( src_first == dst_first && src_second == dst_second ) 1043 return size; // Self copy, no move 1044 1045 if (bottom_type()->isa_vect() != NULL) { 1046 uint ireg = ideal_reg(); 1047 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1048 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1049 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity"); 1050 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1051 // mem -> mem 1052 int src_offset = ra_->reg2offset(src_first); 1053 int dst_offset = ra_->reg2offset(dst_first); 1054 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1055 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1056 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1057 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1058 int stack_offset = ra_->reg2offset(dst_first); 1059 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1060 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1061 int stack_offset = ra_->reg2offset(src_first); 1062 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1063 } else { 1064 ShouldNotReachHere(); 1065 } 1066 } 1067 1068 // -------------------------------------- 1069 // Check for mem-mem move. push/pop to move. 1070 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1071 if( src_second == dst_first ) { // overlapping stack copy ranges 1072 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1073 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1074 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1075 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1076 } 1077 // move low bits 1078 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1079 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1080 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1081 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1082 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1083 } 1084 return size; 1085 } 1086 1087 // -------------------------------------- 1088 // Check for integer reg-reg copy 1089 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1090 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1091 1092 // Check for integer store 1093 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1094 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1095 1096 // Check for integer load 1097 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1098 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1099 1100 // Check for integer reg-xmm reg copy 1101 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1102 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1103 "no 64 bit integer-float reg moves" ); 1104 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1105 } 1106 // -------------------------------------- 1107 // Check for float reg-reg copy 1108 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1109 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1110 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1111 if( cbuf ) { 1112 1113 // Note the mucking with the register encode to compensate for the 0/1 1114 // indexing issue mentioned in a comment in the reg_def sections 1115 // for FPR registers many lines above here. 1116 1117 if( src_first != FPR1L_num ) { 1118 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1119 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1120 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1121 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1122 } else { 1123 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1124 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1125 } 1126 #ifndef PRODUCT 1127 } else if( !do_size ) { 1128 if( size != 0 ) st->print("\n\t"); 1129 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1130 else st->print( "FST %s", Matcher::regName[dst_first]); 1131 #endif 1132 } 1133 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1134 } 1135 1136 // Check for float store 1137 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1138 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1139 } 1140 1141 // Check for float load 1142 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1143 int offset = ra_->reg2offset(src_first); 1144 const char *op_str; 1145 int op; 1146 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1147 op_str = "FLD_D"; 1148 op = 0xDD; 1149 } else { // 32-bit load 1150 op_str = "FLD_S"; 1151 op = 0xD9; 1152 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1153 } 1154 if( cbuf ) { 1155 emit_opcode (*cbuf, op ); 1156 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1157 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1158 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1159 #ifndef PRODUCT 1160 } else if( !do_size ) { 1161 if( size != 0 ) st->print("\n\t"); 1162 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1163 #endif 1164 } 1165 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1166 return size + 3+offset_size+2; 1167 } 1168 1169 // Check for xmm reg-reg copy 1170 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1171 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1172 (src_first+1 == src_second && dst_first+1 == dst_second), 1173 "no non-adjacent float-moves" ); 1174 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1175 } 1176 1177 // Check for xmm reg-integer reg copy 1178 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1179 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1180 "no 64 bit float-integer reg moves" ); 1181 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1182 } 1183 1184 // Check for xmm store 1185 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1186 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1187 } 1188 1189 // Check for float xmm load 1190 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1191 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1192 } 1193 1194 // Copy from float reg to xmm reg 1195 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1196 // copy to the top of stack from floating point reg 1197 // and use LEA to preserve flags 1198 if( cbuf ) { 1199 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1200 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1201 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1202 emit_d8(*cbuf,0xF8); 1203 #ifndef PRODUCT 1204 } else if( !do_size ) { 1205 if( size != 0 ) st->print("\n\t"); 1206 st->print("LEA ESP,[ESP-8]"); 1207 #endif 1208 } 1209 size += 4; 1210 1211 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1212 1213 // Copy from the temp memory to the xmm reg. 1214 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1215 1216 if( cbuf ) { 1217 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1218 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1219 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1220 emit_d8(*cbuf,0x08); 1221 #ifndef PRODUCT 1222 } else if( !do_size ) { 1223 if( size != 0 ) st->print("\n\t"); 1224 st->print("LEA ESP,[ESP+8]"); 1225 #endif 1226 } 1227 size += 4; 1228 return size; 1229 } 1230 1231 assert( size > 0, "missed a case" ); 1232 1233 // -------------------------------------------------------------------- 1234 // Check for second bits still needing moving. 1235 if( src_second == dst_second ) 1236 return size; // Self copy; no move 1237 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1238 1239 // Check for second word int-int move 1240 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1241 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1242 1243 // Check for second word integer store 1244 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1245 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1246 1247 // Check for second word integer load 1248 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1249 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1250 1251 1252 Unimplemented(); 1253 return 0; // Mute compiler 1254 } 1255 1256 #ifndef PRODUCT 1257 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1258 implementation( NULL, ra_, false, st ); 1259 } 1260 #endif 1261 1262 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1263 implementation( &cbuf, ra_, false, NULL ); 1264 } 1265 1266 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1267 return implementation( NULL, ra_, true, NULL ); 1268 } 1269 1270 1271 //============================================================================= 1272 #ifndef PRODUCT 1273 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1274 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1275 int reg = ra_->get_reg_first(this); 1276 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1277 } 1278 #endif 1279 1280 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1281 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1282 int reg = ra_->get_encode(this); 1283 if( offset >= 128 ) { 1284 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1285 emit_rm(cbuf, 0x2, reg, 0x04); 1286 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1287 emit_d32(cbuf, offset); 1288 } 1289 else { 1290 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1291 emit_rm(cbuf, 0x1, reg, 0x04); 1292 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1293 emit_d8(cbuf, offset); 1294 } 1295 } 1296 1297 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1298 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1299 if( offset >= 128 ) { 1300 return 7; 1301 } 1302 else { 1303 return 4; 1304 } 1305 } 1306 1307 //============================================================================= 1308 #ifndef PRODUCT 1309 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1310 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1311 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1312 st->print_cr("\tNOP"); 1313 st->print_cr("\tNOP"); 1314 if( !OptoBreakpoint ) 1315 st->print_cr("\tNOP"); 1316 } 1317 #endif 1318 1319 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1320 MacroAssembler masm(&cbuf); 1321 #ifdef ASSERT 1322 uint insts_size = cbuf.insts_size(); 1323 #endif 1324 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1325 masm.jump_cc(Assembler::notEqual, 1326 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1327 /* WARNING these NOPs are critical so that verified entry point is properly 1328 aligned for patching by NativeJump::patch_verified_entry() */ 1329 int nops_cnt = 2; 1330 if( !OptoBreakpoint ) // Leave space for int3 1331 nops_cnt += 1; 1332 masm.nop(nops_cnt); 1333 1334 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1335 } 1336 1337 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1338 return OptoBreakpoint ? 11 : 12; 1339 } 1340 1341 1342 //============================================================================= 1343 1344 int Matcher::regnum_to_fpu_offset(int regnum) { 1345 return regnum - 32; // The FP registers are in the second chunk 1346 } 1347 1348 // This is UltraSparc specific, true just means we have fast l2f conversion 1349 const bool Matcher::convL2FSupported(void) { 1350 return true; 1351 } 1352 1353 // Is this branch offset short enough that a short branch can be used? 1354 // 1355 // NOTE: If the platform does not provide any short branch variants, then 1356 // this method should return false for offset 0. 1357 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1358 // The passed offset is relative to address of the branch. 1359 // On 86 a branch displacement is calculated relative to address 1360 // of a next instruction. 1361 offset -= br_size; 1362 1363 // the short version of jmpConUCF2 contains multiple branches, 1364 // making the reach slightly less 1365 if (rule == jmpConUCF2_rule) 1366 return (-126 <= offset && offset <= 125); 1367 return (-128 <= offset && offset <= 127); 1368 } 1369 1370 const bool Matcher::isSimpleConstant64(jlong value) { 1371 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1372 return false; 1373 } 1374 1375 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1376 const bool Matcher::init_array_count_is_in_bytes = false; 1377 1378 // Threshold size for cleararray. 1379 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1380 1381 // Needs 2 CMOV's for longs. 1382 const int Matcher::long_cmove_cost() { return 1; } 1383 1384 // No CMOVF/CMOVD with SSE/SSE2 1385 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1386 1387 // Does the CPU require late expand (see block.cpp for description of late expand)? 1388 const bool Matcher::require_postalloc_expand = false; 1389 1390 // Should the Matcher clone shifts on addressing modes, expecting them to 1391 // be subsumed into complex addressing expressions or compute them into 1392 // registers? True for Intel but false for most RISCs 1393 const bool Matcher::clone_shift_expressions = true; 1394 1395 // Do we need to mask the count passed to shift instructions or does 1396 // the cpu only look at the lower 5/6 bits anyway? 1397 const bool Matcher::need_masked_shift_count = false; 1398 1399 bool Matcher::narrow_oop_use_complex_address() { 1400 ShouldNotCallThis(); 1401 return true; 1402 } 1403 1404 bool Matcher::narrow_klass_use_complex_address() { 1405 ShouldNotCallThis(); 1406 return true; 1407 } 1408 1409 1410 // Is it better to copy float constants, or load them directly from memory? 1411 // Intel can load a float constant from a direct address, requiring no 1412 // extra registers. Most RISCs will have to materialize an address into a 1413 // register first, so they would do better to copy the constant from stack. 1414 const bool Matcher::rematerialize_float_constants = true; 1415 1416 // If CPU can load and store mis-aligned doubles directly then no fixup is 1417 // needed. Else we split the double into 2 integer pieces and move it 1418 // piece-by-piece. Only happens when passing doubles into C code as the 1419 // Java calling convention forces doubles to be aligned. 1420 const bool Matcher::misaligned_doubles_ok = true; 1421 1422 1423 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1424 // Get the memory operand from the node 1425 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1426 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1427 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1428 uint opcnt = 1; // First operand 1429 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1430 while( idx >= skipped+num_edges ) { 1431 skipped += num_edges; 1432 opcnt++; // Bump operand count 1433 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1434 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1435 } 1436 1437 MachOper *memory = node->_opnds[opcnt]; 1438 MachOper *new_memory = NULL; 1439 switch (memory->opcode()) { 1440 case DIRECT: 1441 case INDOFFSET32X: 1442 // No transformation necessary. 1443 return; 1444 case INDIRECT: 1445 new_memory = new (C) indirect_win95_safeOper( ); 1446 break; 1447 case INDOFFSET8: 1448 new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1449 break; 1450 case INDOFFSET32: 1451 new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1452 break; 1453 case INDINDEXOFFSET: 1454 new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1455 break; 1456 case INDINDEXSCALE: 1457 new_memory = new (C) indIndexScale_win95_safeOper(memory->scale()); 1458 break; 1459 case INDINDEXSCALEOFFSET: 1460 new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1461 break; 1462 case LOAD_LONG_INDIRECT: 1463 case LOAD_LONG_INDOFFSET32: 1464 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1465 return; 1466 default: 1467 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1468 return; 1469 } 1470 node->_opnds[opcnt] = new_memory; 1471 } 1472 1473 // Advertise here if the CPU requires explicit rounding operations 1474 // to implement the UseStrictFP mode. 1475 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1476 1477 // Are floats conerted to double when stored to stack during deoptimization? 1478 // On x32 it is stored with convertion only when FPU is used for floats. 1479 bool Matcher::float_in_double() { return (UseSSE == 0); } 1480 1481 // Do ints take an entire long register or just half? 1482 const bool Matcher::int_in_long = false; 1483 1484 // Return whether or not this register is ever used as an argument. This 1485 // function is used on startup to build the trampoline stubs in generateOptoStub. 1486 // Registers not mentioned will be killed by the VM call in the trampoline, and 1487 // arguments in those registers not be available to the callee. 1488 bool Matcher::can_be_java_arg( int reg ) { 1489 if( reg == ECX_num || reg == EDX_num ) return true; 1490 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1491 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1492 return false; 1493 } 1494 1495 bool Matcher::is_spillable_arg( int reg ) { 1496 return can_be_java_arg(reg); 1497 } 1498 1499 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1500 // Use hardware integer DIV instruction when 1501 // it is faster than a code which use multiply. 1502 // Only when constant divisor fits into 32 bit 1503 // (min_jint is excluded to get only correct 1504 // positive 32 bit values from negative). 1505 return VM_Version::has_fast_idiv() && 1506 (divisor == (int)divisor && divisor != min_jint); 1507 } 1508 1509 // Register for DIVI projection of divmodI 1510 RegMask Matcher::divI_proj_mask() { 1511 return EAX_REG_mask(); 1512 } 1513 1514 // Register for MODI projection of divmodI 1515 RegMask Matcher::modI_proj_mask() { 1516 return EDX_REG_mask(); 1517 } 1518 1519 // Register for DIVL projection of divmodL 1520 RegMask Matcher::divL_proj_mask() { 1521 ShouldNotReachHere(); 1522 return RegMask(); 1523 } 1524 1525 // Register for MODL projection of divmodL 1526 RegMask Matcher::modL_proj_mask() { 1527 ShouldNotReachHere(); 1528 return RegMask(); 1529 } 1530 1531 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1532 return NO_REG_mask(); 1533 } 1534 1535 // Returns true if the high 32 bits of the value is known to be zero. 1536 bool is_operand_hi32_zero(Node* n) { 1537 int opc = n->Opcode(); 1538 if (opc == Op_AndL) { 1539 Node* o2 = n->in(2); 1540 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1541 return true; 1542 } 1543 } 1544 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1545 return true; 1546 } 1547 return false; 1548 } 1549 1550 %} 1551 1552 //----------ENCODING BLOCK----------------------------------------------------- 1553 // This block specifies the encoding classes used by the compiler to output 1554 // byte streams. Encoding classes generate functions which are called by 1555 // Machine Instruction Nodes in order to generate the bit encoding of the 1556 // instruction. Operands specify their base encoding interface with the 1557 // interface keyword. There are currently supported four interfaces, 1558 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1559 // operand to generate a function which returns its register number when 1560 // queried. CONST_INTER causes an operand to generate a function which 1561 // returns the value of the constant when queried. MEMORY_INTER causes an 1562 // operand to generate four functions which return the Base Register, the 1563 // Index Register, the Scale Value, and the Offset Value of the operand when 1564 // queried. COND_INTER causes an operand to generate six functions which 1565 // return the encoding code (ie - encoding bits for the instruction) 1566 // associated with each basic boolean condition for a conditional instruction. 1567 // Instructions specify two basic values for encoding. They use the 1568 // ins_encode keyword to specify their encoding class (which must be one of 1569 // the class names specified in the encoding block), and they use the 1570 // opcode keyword to specify, in order, their primary, secondary, and 1571 // tertiary opcode. Only the opcode sections which a particular instruction 1572 // needs for encoding need to be specified. 1573 encode %{ 1574 // Build emit functions for each basic byte or larger field in the intel 1575 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1576 // code in the enc_class source block. Emit functions will live in the 1577 // main source block for now. In future, we can generalize this by 1578 // adding a syntax that specifies the sizes of fields in an order, 1579 // so that the adlc can build the emit functions automagically 1580 1581 // Emit primary opcode 1582 enc_class OpcP %{ 1583 emit_opcode(cbuf, $primary); 1584 %} 1585 1586 // Emit secondary opcode 1587 enc_class OpcS %{ 1588 emit_opcode(cbuf, $secondary); 1589 %} 1590 1591 // Emit opcode directly 1592 enc_class Opcode(immI d8) %{ 1593 emit_opcode(cbuf, $d8$$constant); 1594 %} 1595 1596 enc_class SizePrefix %{ 1597 emit_opcode(cbuf,0x66); 1598 %} 1599 1600 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1601 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1602 %} 1603 1604 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1605 emit_opcode(cbuf,$opcode$$constant); 1606 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1607 %} 1608 1609 enc_class mov_r32_imm0( rRegI dst ) %{ 1610 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1611 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1612 %} 1613 1614 enc_class cdq_enc %{ 1615 // Full implementation of Java idiv and irem; checks for 1616 // special case as described in JVM spec., p.243 & p.271. 1617 // 1618 // normal case special case 1619 // 1620 // input : rax,: dividend min_int 1621 // reg: divisor -1 1622 // 1623 // output: rax,: quotient (= rax, idiv reg) min_int 1624 // rdx: remainder (= rax, irem reg) 0 1625 // 1626 // Code sequnce: 1627 // 1628 // 81 F8 00 00 00 80 cmp rax,80000000h 1629 // 0F 85 0B 00 00 00 jne normal_case 1630 // 33 D2 xor rdx,edx 1631 // 83 F9 FF cmp rcx,0FFh 1632 // 0F 84 03 00 00 00 je done 1633 // normal_case: 1634 // 99 cdq 1635 // F7 F9 idiv rax,ecx 1636 // done: 1637 // 1638 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1639 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1640 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1641 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1642 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1643 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1644 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1645 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1646 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1647 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1648 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1649 // normal_case: 1650 emit_opcode(cbuf,0x99); // cdq 1651 // idiv (note: must be emitted by the user of this rule) 1652 // normal: 1653 %} 1654 1655 // Dense encoding for older common ops 1656 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1657 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1658 %} 1659 1660 1661 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1662 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1663 // Check for 8-bit immediate, and set sign extend bit in opcode 1664 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1665 emit_opcode(cbuf, $primary | 0x02); 1666 } 1667 else { // If 32-bit immediate 1668 emit_opcode(cbuf, $primary); 1669 } 1670 %} 1671 1672 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1673 // Emit primary opcode and set sign-extend bit 1674 // Check for 8-bit immediate, and set sign extend bit in opcode 1675 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1676 emit_opcode(cbuf, $primary | 0x02); } 1677 else { // If 32-bit immediate 1678 emit_opcode(cbuf, $primary); 1679 } 1680 // Emit r/m byte with secondary opcode, after primary opcode. 1681 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1682 %} 1683 1684 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1685 // Check for 8-bit immediate, and set sign extend bit in opcode 1686 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1687 $$$emit8$imm$$constant; 1688 } 1689 else { // If 32-bit immediate 1690 // Output immediate 1691 $$$emit32$imm$$constant; 1692 } 1693 %} 1694 1695 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1696 // Emit primary opcode and set sign-extend bit 1697 // Check for 8-bit immediate, and set sign extend bit in opcode 1698 int con = (int)$imm$$constant; // Throw away top bits 1699 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1700 // Emit r/m byte with secondary opcode, after primary opcode. 1701 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1702 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1703 else emit_d32(cbuf,con); 1704 %} 1705 1706 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1707 // Emit primary opcode and set sign-extend bit 1708 // Check for 8-bit immediate, and set sign extend bit in opcode 1709 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1710 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1711 // Emit r/m byte with tertiary opcode, after primary opcode. 1712 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1713 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1714 else emit_d32(cbuf,con); 1715 %} 1716 1717 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1718 emit_cc(cbuf, $secondary, $dst$$reg ); 1719 %} 1720 1721 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1722 int destlo = $dst$$reg; 1723 int desthi = HIGH_FROM_LOW(destlo); 1724 // bswap lo 1725 emit_opcode(cbuf, 0x0F); 1726 emit_cc(cbuf, 0xC8, destlo); 1727 // bswap hi 1728 emit_opcode(cbuf, 0x0F); 1729 emit_cc(cbuf, 0xC8, desthi); 1730 // xchg lo and hi 1731 emit_opcode(cbuf, 0x87); 1732 emit_rm(cbuf, 0x3, destlo, desthi); 1733 %} 1734 1735 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1736 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1737 %} 1738 1739 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1740 $$$emit8$primary; 1741 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1742 %} 1743 1744 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1745 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1746 emit_d8(cbuf, op >> 8 ); 1747 emit_d8(cbuf, op & 255); 1748 %} 1749 1750 // emulate a CMOV with a conditional branch around a MOV 1751 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1752 // Invert sense of branch from sense of CMOV 1753 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1754 emit_d8( cbuf, $brOffs$$constant ); 1755 %} 1756 1757 enc_class enc_PartialSubtypeCheck( ) %{ 1758 Register Redi = as_Register(EDI_enc); // result register 1759 Register Reax = as_Register(EAX_enc); // super class 1760 Register Recx = as_Register(ECX_enc); // killed 1761 Register Resi = as_Register(ESI_enc); // sub class 1762 Label miss; 1763 1764 MacroAssembler _masm(&cbuf); 1765 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1766 NULL, &miss, 1767 /*set_cond_codes:*/ true); 1768 if ($primary) { 1769 __ xorptr(Redi, Redi); 1770 } 1771 __ bind(miss); 1772 %} 1773 1774 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1775 MacroAssembler masm(&cbuf); 1776 int start = masm.offset(); 1777 if (UseSSE >= 2) { 1778 if (VerifyFPU) { 1779 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1780 } 1781 } else { 1782 // External c_calling_convention expects the FPU stack to be 'clean'. 1783 // Compiled code leaves it dirty. Do cleanup now. 1784 masm.empty_FPU_stack(); 1785 } 1786 if (sizeof_FFree_Float_Stack_All == -1) { 1787 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1788 } else { 1789 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1790 } 1791 %} 1792 1793 enc_class Verify_FPU_For_Leaf %{ 1794 if( VerifyFPU ) { 1795 MacroAssembler masm(&cbuf); 1796 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1797 } 1798 %} 1799 1800 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1801 // This is the instruction starting address for relocation info. 1802 cbuf.set_insts_mark(); 1803 $$$emit8$primary; 1804 // CALL directly to the runtime 1805 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1806 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1807 1808 if (UseSSE >= 2) { 1809 MacroAssembler _masm(&cbuf); 1810 BasicType rt = tf()->return_type(); 1811 1812 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1813 // A C runtime call where the return value is unused. In SSE2+ 1814 // mode the result needs to be removed from the FPU stack. It's 1815 // likely that this function call could be removed by the 1816 // optimizer if the C function is a pure function. 1817 __ ffree(0); 1818 } else if (rt == T_FLOAT) { 1819 __ lea(rsp, Address(rsp, -4)); 1820 __ fstp_s(Address(rsp, 0)); 1821 __ movflt(xmm0, Address(rsp, 0)); 1822 __ lea(rsp, Address(rsp, 4)); 1823 } else if (rt == T_DOUBLE) { 1824 __ lea(rsp, Address(rsp, -8)); 1825 __ fstp_d(Address(rsp, 0)); 1826 __ movdbl(xmm0, Address(rsp, 0)); 1827 __ lea(rsp, Address(rsp, 8)); 1828 } 1829 } 1830 %} 1831 1832 1833 enc_class pre_call_resets %{ 1834 // If method sets FPU control word restore it here 1835 debug_only(int off0 = cbuf.insts_size()); 1836 if (ra_->C->in_24_bit_fp_mode()) { 1837 MacroAssembler _masm(&cbuf); 1838 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1839 } 1840 if (ra_->C->max_vector_size() > 16) { 1841 // Clear upper bits of YMM registers when current compiled code uses 1842 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1843 MacroAssembler _masm(&cbuf); 1844 __ vzeroupper(); 1845 } 1846 debug_only(int off1 = cbuf.insts_size()); 1847 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1848 %} 1849 1850 enc_class post_call_FPU %{ 1851 // If method sets FPU control word do it here also 1852 if (Compile::current()->in_24_bit_fp_mode()) { 1853 MacroAssembler masm(&cbuf); 1854 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1855 } 1856 %} 1857 1858 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1859 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1860 // who we intended to call. 1861 cbuf.set_insts_mark(); 1862 $$$emit8$primary; 1863 if (!_method) { 1864 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1865 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1866 } else if (_optimized_virtual) { 1867 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1868 opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1869 } else { 1870 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1871 static_call_Relocation::spec(), RELOC_IMM32 ); 1872 } 1873 if (_method) { // Emit stub for static call. 1874 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1875 if (stub == NULL) { 1876 ciEnv::current()->record_failure("CodeCache is full"); 1877 return; 1878 } 1879 } 1880 %} 1881 1882 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1883 MacroAssembler _masm(&cbuf); 1884 __ ic_call((address)$meth$$method); 1885 %} 1886 1887 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1888 int disp = in_bytes(Method::from_compiled_offset()); 1889 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1890 1891 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1892 cbuf.set_insts_mark(); 1893 $$$emit8$primary; 1894 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1895 emit_d8(cbuf, disp); // Displacement 1896 1897 %} 1898 1899 // Following encoding is no longer used, but may be restored if calling 1900 // convention changes significantly. 1901 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1902 // 1903 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1904 // // int ic_reg = Matcher::inline_cache_reg(); 1905 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1906 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1907 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1908 // 1909 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1910 // // // so we load it immediately before the call 1911 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1912 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1913 // 1914 // // xor rbp,ebp 1915 // emit_opcode(cbuf, 0x33); 1916 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1917 // 1918 // // CALL to interpreter. 1919 // cbuf.set_insts_mark(); 1920 // $$$emit8$primary; 1921 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1922 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1923 // %} 1924 1925 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1926 $$$emit8$primary; 1927 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1928 $$$emit8$shift$$constant; 1929 %} 1930 1931 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1932 // Load immediate does not have a zero or sign extended version 1933 // for 8-bit immediates 1934 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1935 $$$emit32$src$$constant; 1936 %} 1937 1938 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1939 // Load immediate does not have a zero or sign extended version 1940 // for 8-bit immediates 1941 emit_opcode(cbuf, $primary + $dst$$reg); 1942 $$$emit32$src$$constant; 1943 %} 1944 1945 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1946 // Load immediate does not have a zero or sign extended version 1947 // for 8-bit immediates 1948 int dst_enc = $dst$$reg; 1949 int src_con = $src$$constant & 0x0FFFFFFFFL; 1950 if (src_con == 0) { 1951 // xor dst, dst 1952 emit_opcode(cbuf, 0x33); 1953 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1954 } else { 1955 emit_opcode(cbuf, $primary + dst_enc); 1956 emit_d32(cbuf, src_con); 1957 } 1958 %} 1959 1960 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1961 // Load immediate does not have a zero or sign extended version 1962 // for 8-bit immediates 1963 int dst_enc = $dst$$reg + 2; 1964 int src_con = ((julong)($src$$constant)) >> 32; 1965 if (src_con == 0) { 1966 // xor dst, dst 1967 emit_opcode(cbuf, 0x33); 1968 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1969 } else { 1970 emit_opcode(cbuf, $primary + dst_enc); 1971 emit_d32(cbuf, src_con); 1972 } 1973 %} 1974 1975 1976 // Encode a reg-reg copy. If it is useless, then empty encoding. 1977 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1978 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1979 %} 1980 1981 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1982 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1983 %} 1984 1985 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1986 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1987 %} 1988 1989 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1990 $$$emit8$primary; 1991 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1992 %} 1993 1994 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1995 $$$emit8$secondary; 1996 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1997 %} 1998 1999 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2000 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2001 %} 2002 2003 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2004 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2005 %} 2006 2007 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2008 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2009 %} 2010 2011 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2012 // Output immediate 2013 $$$emit32$src$$constant; 2014 %} 2015 2016 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2017 // Output Float immediate bits 2018 jfloat jf = $src$$constant; 2019 int jf_as_bits = jint_cast( jf ); 2020 emit_d32(cbuf, jf_as_bits); 2021 %} 2022 2023 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2024 // Output Float immediate bits 2025 jfloat jf = $src$$constant; 2026 int jf_as_bits = jint_cast( jf ); 2027 emit_d32(cbuf, jf_as_bits); 2028 %} 2029 2030 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2031 // Output immediate 2032 $$$emit16$src$$constant; 2033 %} 2034 2035 enc_class Con_d32(immI src) %{ 2036 emit_d32(cbuf,$src$$constant); 2037 %} 2038 2039 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2040 // Output immediate memory reference 2041 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2042 emit_d32(cbuf, 0x00); 2043 %} 2044 2045 enc_class lock_prefix( ) %{ 2046 if( os::is_MP() ) 2047 emit_opcode(cbuf,0xF0); // [Lock] 2048 %} 2049 2050 // Cmp-xchg long value. 2051 // Note: we need to swap rbx, and rcx before and after the 2052 // cmpxchg8 instruction because the instruction uses 2053 // rcx as the high order word of the new value to store but 2054 // our register encoding uses rbx,. 2055 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2056 2057 // XCHG rbx,ecx 2058 emit_opcode(cbuf,0x87); 2059 emit_opcode(cbuf,0xD9); 2060 // [Lock] 2061 if( os::is_MP() ) 2062 emit_opcode(cbuf,0xF0); 2063 // CMPXCHG8 [Eptr] 2064 emit_opcode(cbuf,0x0F); 2065 emit_opcode(cbuf,0xC7); 2066 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2067 // XCHG rbx,ecx 2068 emit_opcode(cbuf,0x87); 2069 emit_opcode(cbuf,0xD9); 2070 %} 2071 2072 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2073 // [Lock] 2074 if( os::is_MP() ) 2075 emit_opcode(cbuf,0xF0); 2076 2077 // CMPXCHG [Eptr] 2078 emit_opcode(cbuf,0x0F); 2079 emit_opcode(cbuf,0xB1); 2080 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2081 %} 2082 2083 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2084 int res_encoding = $res$$reg; 2085 2086 // MOV res,0 2087 emit_opcode( cbuf, 0xB8 + res_encoding); 2088 emit_d32( cbuf, 0 ); 2089 // JNE,s fail 2090 emit_opcode(cbuf,0x75); 2091 emit_d8(cbuf, 5 ); 2092 // MOV res,1 2093 emit_opcode( cbuf, 0xB8 + res_encoding); 2094 emit_d32( cbuf, 1 ); 2095 // fail: 2096 %} 2097 2098 enc_class set_instruction_start( ) %{ 2099 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2100 %} 2101 2102 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2103 int reg_encoding = $ereg$$reg; 2104 int base = $mem$$base; 2105 int index = $mem$$index; 2106 int scale = $mem$$scale; 2107 int displace = $mem$$disp; 2108 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2109 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2110 %} 2111 2112 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2113 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2114 int base = $mem$$base; 2115 int index = $mem$$index; 2116 int scale = $mem$$scale; 2117 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2118 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2119 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2120 %} 2121 2122 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2123 int r1, r2; 2124 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2125 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2126 emit_opcode(cbuf,0x0F); 2127 emit_opcode(cbuf,$tertiary); 2128 emit_rm(cbuf, 0x3, r1, r2); 2129 emit_d8(cbuf,$cnt$$constant); 2130 emit_d8(cbuf,$primary); 2131 emit_rm(cbuf, 0x3, $secondary, r1); 2132 emit_d8(cbuf,$cnt$$constant); 2133 %} 2134 2135 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2136 emit_opcode( cbuf, 0x8B ); // Move 2137 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2138 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2139 emit_d8(cbuf,$primary); 2140 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2141 emit_d8(cbuf,$cnt$$constant-32); 2142 } 2143 emit_d8(cbuf,$primary); 2144 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2145 emit_d8(cbuf,31); 2146 %} 2147 2148 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2149 int r1, r2; 2150 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2151 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2152 2153 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2154 emit_rm(cbuf, 0x3, r1, r2); 2155 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2156 emit_opcode(cbuf,$primary); 2157 emit_rm(cbuf, 0x3, $secondary, r1); 2158 emit_d8(cbuf,$cnt$$constant-32); 2159 } 2160 emit_opcode(cbuf,0x33); // XOR r2,r2 2161 emit_rm(cbuf, 0x3, r2, r2); 2162 %} 2163 2164 // Clone of RegMem but accepts an extra parameter to access each 2165 // half of a double in memory; it never needs relocation info. 2166 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2167 emit_opcode(cbuf,$opcode$$constant); 2168 int reg_encoding = $rm_reg$$reg; 2169 int base = $mem$$base; 2170 int index = $mem$$index; 2171 int scale = $mem$$scale; 2172 int displace = $mem$$disp + $disp_for_half$$constant; 2173 relocInfo::relocType disp_reloc = relocInfo::none; 2174 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2175 %} 2176 2177 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2178 // 2179 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2180 // and it never needs relocation information. 2181 // Frequently used to move data between FPU's Stack Top and memory. 2182 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2183 int rm_byte_opcode = $rm_opcode$$constant; 2184 int base = $mem$$base; 2185 int index = $mem$$index; 2186 int scale = $mem$$scale; 2187 int displace = $mem$$disp; 2188 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2189 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2190 %} 2191 2192 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2193 int rm_byte_opcode = $rm_opcode$$constant; 2194 int base = $mem$$base; 2195 int index = $mem$$index; 2196 int scale = $mem$$scale; 2197 int displace = $mem$$disp; 2198 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2199 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2200 %} 2201 2202 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2203 int reg_encoding = $dst$$reg; 2204 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2205 int index = 0x04; // 0x04 indicates no index 2206 int scale = 0x00; // 0x00 indicates no scale 2207 int displace = $src1$$constant; // 0x00 indicates no displacement 2208 relocInfo::relocType disp_reloc = relocInfo::none; 2209 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2210 %} 2211 2212 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2213 // Compare dst,src 2214 emit_opcode(cbuf,0x3B); 2215 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2216 // jmp dst < src around move 2217 emit_opcode(cbuf,0x7C); 2218 emit_d8(cbuf,2); 2219 // move dst,src 2220 emit_opcode(cbuf,0x8B); 2221 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2222 %} 2223 2224 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2225 // Compare dst,src 2226 emit_opcode(cbuf,0x3B); 2227 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2228 // jmp dst > src around move 2229 emit_opcode(cbuf,0x7F); 2230 emit_d8(cbuf,2); 2231 // move dst,src 2232 emit_opcode(cbuf,0x8B); 2233 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2234 %} 2235 2236 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2237 // If src is FPR1, we can just FST to store it. 2238 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2239 int reg_encoding = 0x2; // Just store 2240 int base = $mem$$base; 2241 int index = $mem$$index; 2242 int scale = $mem$$scale; 2243 int displace = $mem$$disp; 2244 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2245 if( $src$$reg != FPR1L_enc ) { 2246 reg_encoding = 0x3; // Store & pop 2247 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2248 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2249 } 2250 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2251 emit_opcode(cbuf,$primary); 2252 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2253 %} 2254 2255 enc_class neg_reg(rRegI dst) %{ 2256 // NEG $dst 2257 emit_opcode(cbuf,0xF7); 2258 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2259 %} 2260 2261 enc_class setLT_reg(eCXRegI dst) %{ 2262 // SETLT $dst 2263 emit_opcode(cbuf,0x0F); 2264 emit_opcode(cbuf,0x9C); 2265 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2266 %} 2267 2268 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2269 int tmpReg = $tmp$$reg; 2270 2271 // SUB $p,$q 2272 emit_opcode(cbuf,0x2B); 2273 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2274 // SBB $tmp,$tmp 2275 emit_opcode(cbuf,0x1B); 2276 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2277 // AND $tmp,$y 2278 emit_opcode(cbuf,0x23); 2279 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2280 // ADD $p,$tmp 2281 emit_opcode(cbuf,0x03); 2282 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2283 %} 2284 2285 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2286 // TEST shift,32 2287 emit_opcode(cbuf,0xF7); 2288 emit_rm(cbuf, 0x3, 0, ECX_enc); 2289 emit_d32(cbuf,0x20); 2290 // JEQ,s small 2291 emit_opcode(cbuf, 0x74); 2292 emit_d8(cbuf, 0x04); 2293 // MOV $dst.hi,$dst.lo 2294 emit_opcode( cbuf, 0x8B ); 2295 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2296 // CLR $dst.lo 2297 emit_opcode(cbuf, 0x33); 2298 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2299 // small: 2300 // SHLD $dst.hi,$dst.lo,$shift 2301 emit_opcode(cbuf,0x0F); 2302 emit_opcode(cbuf,0xA5); 2303 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2304 // SHL $dst.lo,$shift" 2305 emit_opcode(cbuf,0xD3); 2306 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2307 %} 2308 2309 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2310 // TEST shift,32 2311 emit_opcode(cbuf,0xF7); 2312 emit_rm(cbuf, 0x3, 0, ECX_enc); 2313 emit_d32(cbuf,0x20); 2314 // JEQ,s small 2315 emit_opcode(cbuf, 0x74); 2316 emit_d8(cbuf, 0x04); 2317 // MOV $dst.lo,$dst.hi 2318 emit_opcode( cbuf, 0x8B ); 2319 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2320 // CLR $dst.hi 2321 emit_opcode(cbuf, 0x33); 2322 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2323 // small: 2324 // SHRD $dst.lo,$dst.hi,$shift 2325 emit_opcode(cbuf,0x0F); 2326 emit_opcode(cbuf,0xAD); 2327 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2328 // SHR $dst.hi,$shift" 2329 emit_opcode(cbuf,0xD3); 2330 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2331 %} 2332 2333 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2334 // TEST shift,32 2335 emit_opcode(cbuf,0xF7); 2336 emit_rm(cbuf, 0x3, 0, ECX_enc); 2337 emit_d32(cbuf,0x20); 2338 // JEQ,s small 2339 emit_opcode(cbuf, 0x74); 2340 emit_d8(cbuf, 0x05); 2341 // MOV $dst.lo,$dst.hi 2342 emit_opcode( cbuf, 0x8B ); 2343 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2344 // SAR $dst.hi,31 2345 emit_opcode(cbuf, 0xC1); 2346 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2347 emit_d8(cbuf, 0x1F ); 2348 // small: 2349 // SHRD $dst.lo,$dst.hi,$shift 2350 emit_opcode(cbuf,0x0F); 2351 emit_opcode(cbuf,0xAD); 2352 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2353 // SAR $dst.hi,$shift" 2354 emit_opcode(cbuf,0xD3); 2355 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2356 %} 2357 2358 2359 // ----------------- Encodings for floating point unit ----------------- 2360 // May leave result in FPU-TOS or FPU reg depending on opcodes 2361 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2362 $$$emit8$primary; 2363 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2364 %} 2365 2366 // Pop argument in FPR0 with FSTP ST(0) 2367 enc_class PopFPU() %{ 2368 emit_opcode( cbuf, 0xDD ); 2369 emit_d8( cbuf, 0xD8 ); 2370 %} 2371 2372 // !!!!! equivalent to Pop_Reg_F 2373 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2374 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2375 emit_d8( cbuf, 0xD8+$dst$$reg ); 2376 %} 2377 2378 enc_class Push_Reg_DPR( regDPR dst ) %{ 2379 emit_opcode( cbuf, 0xD9 ); 2380 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2381 %} 2382 2383 enc_class strictfp_bias1( regDPR dst ) %{ 2384 emit_opcode( cbuf, 0xDB ); // FLD m80real 2385 emit_opcode( cbuf, 0x2D ); 2386 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2387 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2388 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2389 %} 2390 2391 enc_class strictfp_bias2( regDPR dst ) %{ 2392 emit_opcode( cbuf, 0xDB ); // FLD m80real 2393 emit_opcode( cbuf, 0x2D ); 2394 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2395 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2396 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2397 %} 2398 2399 // Special case for moving an integer register to a stack slot. 2400 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2401 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2402 %} 2403 2404 // Special case for moving a register to a stack slot. 2405 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2406 // Opcode already emitted 2407 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2408 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2409 emit_d32(cbuf, $dst$$disp); // Displacement 2410 %} 2411 2412 // Push the integer in stackSlot 'src' onto FP-stack 2413 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2414 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2415 %} 2416 2417 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2418 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2419 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2420 %} 2421 2422 // Same as Pop_Mem_F except for opcode 2423 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2424 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2425 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2426 %} 2427 2428 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2429 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2430 emit_d8( cbuf, 0xD8+$dst$$reg ); 2431 %} 2432 2433 enc_class Push_Reg_FPR( regFPR dst ) %{ 2434 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2435 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2436 %} 2437 2438 // Push FPU's float to a stack-slot, and pop FPU-stack 2439 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2440 int pop = 0x02; 2441 if ($src$$reg != FPR1L_enc) { 2442 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2443 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2444 pop = 0x03; 2445 } 2446 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2447 %} 2448 2449 // Push FPU's double to a stack-slot, and pop FPU-stack 2450 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2451 int pop = 0x02; 2452 if ($src$$reg != FPR1L_enc) { 2453 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2454 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2455 pop = 0x03; 2456 } 2457 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2458 %} 2459 2460 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2461 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2462 int pop = 0xD0 - 1; // -1 since we skip FLD 2463 if ($src$$reg != FPR1L_enc) { 2464 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2465 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2466 pop = 0xD8; 2467 } 2468 emit_opcode( cbuf, 0xDD ); 2469 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2470 %} 2471 2472 2473 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2474 // load dst in FPR0 2475 emit_opcode( cbuf, 0xD9 ); 2476 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2477 if ($src$$reg != FPR1L_enc) { 2478 // fincstp 2479 emit_opcode (cbuf, 0xD9); 2480 emit_opcode (cbuf, 0xF7); 2481 // swap src with FPR1: 2482 // FXCH FPR1 with src 2483 emit_opcode(cbuf, 0xD9); 2484 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2485 // fdecstp 2486 emit_opcode (cbuf, 0xD9); 2487 emit_opcode (cbuf, 0xF6); 2488 } 2489 %} 2490 2491 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2492 MacroAssembler _masm(&cbuf); 2493 __ subptr(rsp, 8); 2494 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2495 __ fld_d(Address(rsp, 0)); 2496 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2497 __ fld_d(Address(rsp, 0)); 2498 %} 2499 2500 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2501 MacroAssembler _masm(&cbuf); 2502 __ subptr(rsp, 4); 2503 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2504 __ fld_s(Address(rsp, 0)); 2505 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2506 __ fld_s(Address(rsp, 0)); 2507 %} 2508 2509 enc_class Push_ResultD(regD dst) %{ 2510 MacroAssembler _masm(&cbuf); 2511 __ fstp_d(Address(rsp, 0)); 2512 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2513 __ addptr(rsp, 8); 2514 %} 2515 2516 enc_class Push_ResultF(regF dst, immI d8) %{ 2517 MacroAssembler _masm(&cbuf); 2518 __ fstp_s(Address(rsp, 0)); 2519 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2520 __ addptr(rsp, $d8$$constant); 2521 %} 2522 2523 enc_class Push_SrcD(regD src) %{ 2524 MacroAssembler _masm(&cbuf); 2525 __ subptr(rsp, 8); 2526 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2527 __ fld_d(Address(rsp, 0)); 2528 %} 2529 2530 enc_class push_stack_temp_qword() %{ 2531 MacroAssembler _masm(&cbuf); 2532 __ subptr(rsp, 8); 2533 %} 2534 2535 enc_class pop_stack_temp_qword() %{ 2536 MacroAssembler _masm(&cbuf); 2537 __ addptr(rsp, 8); 2538 %} 2539 2540 enc_class push_xmm_to_fpr1(regD src) %{ 2541 MacroAssembler _masm(&cbuf); 2542 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2543 __ fld_d(Address(rsp, 0)); 2544 %} 2545 2546 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2547 if ($src$$reg != FPR1L_enc) { 2548 // fincstp 2549 emit_opcode (cbuf, 0xD9); 2550 emit_opcode (cbuf, 0xF7); 2551 // FXCH FPR1 with src 2552 emit_opcode(cbuf, 0xD9); 2553 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2554 // fdecstp 2555 emit_opcode (cbuf, 0xD9); 2556 emit_opcode (cbuf, 0xF6); 2557 } 2558 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2559 // // FSTP FPR$dst$$reg 2560 // emit_opcode( cbuf, 0xDD ); 2561 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2562 %} 2563 2564 enc_class fnstsw_sahf_skip_parity() %{ 2565 // fnstsw ax 2566 emit_opcode( cbuf, 0xDF ); 2567 emit_opcode( cbuf, 0xE0 ); 2568 // sahf 2569 emit_opcode( cbuf, 0x9E ); 2570 // jnp ::skip 2571 emit_opcode( cbuf, 0x7B ); 2572 emit_opcode( cbuf, 0x05 ); 2573 %} 2574 2575 enc_class emitModDPR() %{ 2576 // fprem must be iterative 2577 // :: loop 2578 // fprem 2579 emit_opcode( cbuf, 0xD9 ); 2580 emit_opcode( cbuf, 0xF8 ); 2581 // wait 2582 emit_opcode( cbuf, 0x9b ); 2583 // fnstsw ax 2584 emit_opcode( cbuf, 0xDF ); 2585 emit_opcode( cbuf, 0xE0 ); 2586 // sahf 2587 emit_opcode( cbuf, 0x9E ); 2588 // jp ::loop 2589 emit_opcode( cbuf, 0x0F ); 2590 emit_opcode( cbuf, 0x8A ); 2591 emit_opcode( cbuf, 0xF4 ); 2592 emit_opcode( cbuf, 0xFF ); 2593 emit_opcode( cbuf, 0xFF ); 2594 emit_opcode( cbuf, 0xFF ); 2595 %} 2596 2597 enc_class fpu_flags() %{ 2598 // fnstsw_ax 2599 emit_opcode( cbuf, 0xDF); 2600 emit_opcode( cbuf, 0xE0); 2601 // test ax,0x0400 2602 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2603 emit_opcode( cbuf, 0xA9 ); 2604 emit_d16 ( cbuf, 0x0400 ); 2605 // // // This sequence works, but stalls for 12-16 cycles on PPro 2606 // // test rax,0x0400 2607 // emit_opcode( cbuf, 0xA9 ); 2608 // emit_d32 ( cbuf, 0x00000400 ); 2609 // 2610 // jz exit (no unordered comparison) 2611 emit_opcode( cbuf, 0x74 ); 2612 emit_d8 ( cbuf, 0x02 ); 2613 // mov ah,1 - treat as LT case (set carry flag) 2614 emit_opcode( cbuf, 0xB4 ); 2615 emit_d8 ( cbuf, 0x01 ); 2616 // sahf 2617 emit_opcode( cbuf, 0x9E); 2618 %} 2619 2620 enc_class cmpF_P6_fixup() %{ 2621 // Fixup the integer flags in case comparison involved a NaN 2622 // 2623 // JNP exit (no unordered comparison, P-flag is set by NaN) 2624 emit_opcode( cbuf, 0x7B ); 2625 emit_d8 ( cbuf, 0x03 ); 2626 // MOV AH,1 - treat as LT case (set carry flag) 2627 emit_opcode( cbuf, 0xB4 ); 2628 emit_d8 ( cbuf, 0x01 ); 2629 // SAHF 2630 emit_opcode( cbuf, 0x9E); 2631 // NOP // target for branch to avoid branch to branch 2632 emit_opcode( cbuf, 0x90); 2633 %} 2634 2635 // fnstsw_ax(); 2636 // sahf(); 2637 // movl(dst, nan_result); 2638 // jcc(Assembler::parity, exit); 2639 // movl(dst, less_result); 2640 // jcc(Assembler::below, exit); 2641 // movl(dst, equal_result); 2642 // jcc(Assembler::equal, exit); 2643 // movl(dst, greater_result); 2644 2645 // less_result = 1; 2646 // greater_result = -1; 2647 // equal_result = 0; 2648 // nan_result = -1; 2649 2650 enc_class CmpF_Result(rRegI dst) %{ 2651 // fnstsw_ax(); 2652 emit_opcode( cbuf, 0xDF); 2653 emit_opcode( cbuf, 0xE0); 2654 // sahf 2655 emit_opcode( cbuf, 0x9E); 2656 // movl(dst, nan_result); 2657 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2658 emit_d32( cbuf, -1 ); 2659 // jcc(Assembler::parity, exit); 2660 emit_opcode( cbuf, 0x7A ); 2661 emit_d8 ( cbuf, 0x13 ); 2662 // movl(dst, less_result); 2663 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2664 emit_d32( cbuf, -1 ); 2665 // jcc(Assembler::below, exit); 2666 emit_opcode( cbuf, 0x72 ); 2667 emit_d8 ( cbuf, 0x0C ); 2668 // movl(dst, equal_result); 2669 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2670 emit_d32( cbuf, 0 ); 2671 // jcc(Assembler::equal, exit); 2672 emit_opcode( cbuf, 0x74 ); 2673 emit_d8 ( cbuf, 0x05 ); 2674 // movl(dst, greater_result); 2675 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2676 emit_d32( cbuf, 1 ); 2677 %} 2678 2679 2680 // Compare the longs and set flags 2681 // BROKEN! Do Not use as-is 2682 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2683 // CMP $src1.hi,$src2.hi 2684 emit_opcode( cbuf, 0x3B ); 2685 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2686 // JNE,s done 2687 emit_opcode(cbuf,0x75); 2688 emit_d8(cbuf, 2 ); 2689 // CMP $src1.lo,$src2.lo 2690 emit_opcode( cbuf, 0x3B ); 2691 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2692 // done: 2693 %} 2694 2695 enc_class convert_int_long( regL dst, rRegI src ) %{ 2696 // mov $dst.lo,$src 2697 int dst_encoding = $dst$$reg; 2698 int src_encoding = $src$$reg; 2699 encode_Copy( cbuf, dst_encoding , src_encoding ); 2700 // mov $dst.hi,$src 2701 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2702 // sar $dst.hi,31 2703 emit_opcode( cbuf, 0xC1 ); 2704 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2705 emit_d8(cbuf, 0x1F ); 2706 %} 2707 2708 enc_class convert_long_double( eRegL src ) %{ 2709 // push $src.hi 2710 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2711 // push $src.lo 2712 emit_opcode(cbuf, 0x50+$src$$reg ); 2713 // fild 64-bits at [SP] 2714 emit_opcode(cbuf,0xdf); 2715 emit_d8(cbuf, 0x6C); 2716 emit_d8(cbuf, 0x24); 2717 emit_d8(cbuf, 0x00); 2718 // pop stack 2719 emit_opcode(cbuf, 0x83); // add SP, #8 2720 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2721 emit_d8(cbuf, 0x8); 2722 %} 2723 2724 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2725 // IMUL EDX:EAX,$src1 2726 emit_opcode( cbuf, 0xF7 ); 2727 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2728 // SAR EDX,$cnt-32 2729 int shift_count = ((int)$cnt$$constant) - 32; 2730 if (shift_count > 0) { 2731 emit_opcode(cbuf, 0xC1); 2732 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2733 emit_d8(cbuf, shift_count); 2734 } 2735 %} 2736 2737 // this version doesn't have add sp, 8 2738 enc_class convert_long_double2( eRegL src ) %{ 2739 // push $src.hi 2740 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2741 // push $src.lo 2742 emit_opcode(cbuf, 0x50+$src$$reg ); 2743 // fild 64-bits at [SP] 2744 emit_opcode(cbuf,0xdf); 2745 emit_d8(cbuf, 0x6C); 2746 emit_d8(cbuf, 0x24); 2747 emit_d8(cbuf, 0x00); 2748 %} 2749 2750 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2751 // Basic idea: long = (long)int * (long)int 2752 // IMUL EDX:EAX, src 2753 emit_opcode( cbuf, 0xF7 ); 2754 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2755 %} 2756 2757 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2758 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2759 // MUL EDX:EAX, src 2760 emit_opcode( cbuf, 0xF7 ); 2761 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2762 %} 2763 2764 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2765 // Basic idea: lo(result) = lo(x_lo * y_lo) 2766 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2767 // MOV $tmp,$src.lo 2768 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2769 // IMUL $tmp,EDX 2770 emit_opcode( cbuf, 0x0F ); 2771 emit_opcode( cbuf, 0xAF ); 2772 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2773 // MOV EDX,$src.hi 2774 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2775 // IMUL EDX,EAX 2776 emit_opcode( cbuf, 0x0F ); 2777 emit_opcode( cbuf, 0xAF ); 2778 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2779 // ADD $tmp,EDX 2780 emit_opcode( cbuf, 0x03 ); 2781 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2782 // MUL EDX:EAX,$src.lo 2783 emit_opcode( cbuf, 0xF7 ); 2784 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2785 // ADD EDX,ESI 2786 emit_opcode( cbuf, 0x03 ); 2787 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2788 %} 2789 2790 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2791 // Basic idea: lo(result) = lo(src * y_lo) 2792 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2793 // IMUL $tmp,EDX,$src 2794 emit_opcode( cbuf, 0x6B ); 2795 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2796 emit_d8( cbuf, (int)$src$$constant ); 2797 // MOV EDX,$src 2798 emit_opcode(cbuf, 0xB8 + EDX_enc); 2799 emit_d32( cbuf, (int)$src$$constant ); 2800 // MUL EDX:EAX,EDX 2801 emit_opcode( cbuf, 0xF7 ); 2802 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2803 // ADD EDX,ESI 2804 emit_opcode( cbuf, 0x03 ); 2805 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2806 %} 2807 2808 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2809 // PUSH src1.hi 2810 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2811 // PUSH src1.lo 2812 emit_opcode(cbuf, 0x50+$src1$$reg ); 2813 // PUSH src2.hi 2814 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2815 // PUSH src2.lo 2816 emit_opcode(cbuf, 0x50+$src2$$reg ); 2817 // CALL directly to the runtime 2818 cbuf.set_insts_mark(); 2819 emit_opcode(cbuf,0xE8); // Call into runtime 2820 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2821 // Restore stack 2822 emit_opcode(cbuf, 0x83); // add SP, #framesize 2823 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2824 emit_d8(cbuf, 4*4); 2825 %} 2826 2827 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2828 // PUSH src1.hi 2829 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2830 // PUSH src1.lo 2831 emit_opcode(cbuf, 0x50+$src1$$reg ); 2832 // PUSH src2.hi 2833 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2834 // PUSH src2.lo 2835 emit_opcode(cbuf, 0x50+$src2$$reg ); 2836 // CALL directly to the runtime 2837 cbuf.set_insts_mark(); 2838 emit_opcode(cbuf,0xE8); // Call into runtime 2839 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2840 // Restore stack 2841 emit_opcode(cbuf, 0x83); // add SP, #framesize 2842 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2843 emit_d8(cbuf, 4*4); 2844 %} 2845 2846 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2847 // MOV $tmp,$src.lo 2848 emit_opcode(cbuf, 0x8B); 2849 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2850 // OR $tmp,$src.hi 2851 emit_opcode(cbuf, 0x0B); 2852 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2853 %} 2854 2855 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2856 // CMP $src1.lo,$src2.lo 2857 emit_opcode( cbuf, 0x3B ); 2858 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2859 // JNE,s skip 2860 emit_cc(cbuf, 0x70, 0x5); 2861 emit_d8(cbuf,2); 2862 // CMP $src1.hi,$src2.hi 2863 emit_opcode( cbuf, 0x3B ); 2864 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2865 %} 2866 2867 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2868 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2869 emit_opcode( cbuf, 0x3B ); 2870 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2871 // MOV $tmp,$src1.hi 2872 emit_opcode( cbuf, 0x8B ); 2873 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2874 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2875 emit_opcode( cbuf, 0x1B ); 2876 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2877 %} 2878 2879 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2880 // XOR $tmp,$tmp 2881 emit_opcode(cbuf,0x33); // XOR 2882 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2883 // CMP $tmp,$src.lo 2884 emit_opcode( cbuf, 0x3B ); 2885 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2886 // SBB $tmp,$src.hi 2887 emit_opcode( cbuf, 0x1B ); 2888 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2889 %} 2890 2891 // Sniff, sniff... smells like Gnu Superoptimizer 2892 enc_class neg_long( eRegL dst ) %{ 2893 emit_opcode(cbuf,0xF7); // NEG hi 2894 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2895 emit_opcode(cbuf,0xF7); // NEG lo 2896 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2897 emit_opcode(cbuf,0x83); // SBB hi,0 2898 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2899 emit_d8 (cbuf,0 ); 2900 %} 2901 2902 enc_class enc_pop_rdx() %{ 2903 emit_opcode(cbuf,0x5A); 2904 %} 2905 2906 enc_class enc_rethrow() %{ 2907 cbuf.set_insts_mark(); 2908 emit_opcode(cbuf, 0xE9); // jmp entry 2909 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2910 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2911 %} 2912 2913 2914 // Convert a double to an int. Java semantics require we do complex 2915 // manglelations in the corner cases. So we set the rounding mode to 2916 // 'zero', store the darned double down as an int, and reset the 2917 // rounding mode to 'nearest'. The hardware throws an exception which 2918 // patches up the correct value directly to the stack. 2919 enc_class DPR2I_encoding( regDPR src ) %{ 2920 // Flip to round-to-zero mode. We attempted to allow invalid-op 2921 // exceptions here, so that a NAN or other corner-case value will 2922 // thrown an exception (but normal values get converted at full speed). 2923 // However, I2C adapters and other float-stack manglers leave pending 2924 // invalid-op exceptions hanging. We would have to clear them before 2925 // enabling them and that is more expensive than just testing for the 2926 // invalid value Intel stores down in the corner cases. 2927 emit_opcode(cbuf,0xD9); // FLDCW trunc 2928 emit_opcode(cbuf,0x2D); 2929 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2930 // Allocate a word 2931 emit_opcode(cbuf,0x83); // SUB ESP,4 2932 emit_opcode(cbuf,0xEC); 2933 emit_d8(cbuf,0x04); 2934 // Encoding assumes a double has been pushed into FPR0. 2935 // Store down the double as an int, popping the FPU stack 2936 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2937 emit_opcode(cbuf,0x1C); 2938 emit_d8(cbuf,0x24); 2939 // Restore the rounding mode; mask the exception 2940 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2941 emit_opcode(cbuf,0x2D); 2942 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2943 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2944 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2945 2946 // Load the converted int; adjust CPU stack 2947 emit_opcode(cbuf,0x58); // POP EAX 2948 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2949 emit_d32 (cbuf,0x80000000); // 0x80000000 2950 emit_opcode(cbuf,0x75); // JNE around_slow_call 2951 emit_d8 (cbuf,0x07); // Size of slow_call 2952 // Push src onto stack slow-path 2953 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2954 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2955 // CALL directly to the runtime 2956 cbuf.set_insts_mark(); 2957 emit_opcode(cbuf,0xE8); // Call into runtime 2958 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2959 // Carry on here... 2960 %} 2961 2962 enc_class DPR2L_encoding( regDPR src ) %{ 2963 emit_opcode(cbuf,0xD9); // FLDCW trunc 2964 emit_opcode(cbuf,0x2D); 2965 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2966 // Allocate a word 2967 emit_opcode(cbuf,0x83); // SUB ESP,8 2968 emit_opcode(cbuf,0xEC); 2969 emit_d8(cbuf,0x08); 2970 // Encoding assumes a double has been pushed into FPR0. 2971 // Store down the double as a long, popping the FPU stack 2972 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2973 emit_opcode(cbuf,0x3C); 2974 emit_d8(cbuf,0x24); 2975 // Restore the rounding mode; mask the exception 2976 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2977 emit_opcode(cbuf,0x2D); 2978 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2979 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2980 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2981 2982 // Load the converted int; adjust CPU stack 2983 emit_opcode(cbuf,0x58); // POP EAX 2984 emit_opcode(cbuf,0x5A); // POP EDX 2985 emit_opcode(cbuf,0x81); // CMP EDX,imm 2986 emit_d8 (cbuf,0xFA); // rdx 2987 emit_d32 (cbuf,0x80000000); // 0x80000000 2988 emit_opcode(cbuf,0x75); // JNE around_slow_call 2989 emit_d8 (cbuf,0x07+4); // Size of slow_call 2990 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2991 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2992 emit_opcode(cbuf,0x75); // JNE around_slow_call 2993 emit_d8 (cbuf,0x07); // Size of slow_call 2994 // Push src onto stack slow-path 2995 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2996 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2997 // CALL directly to the runtime 2998 cbuf.set_insts_mark(); 2999 emit_opcode(cbuf,0xE8); // Call into runtime 3000 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3001 // Carry on here... 3002 %} 3003 3004 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3005 // Operand was loaded from memory into fp ST (stack top) 3006 // FMUL ST,$src /* D8 C8+i */ 3007 emit_opcode(cbuf, 0xD8); 3008 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3009 %} 3010 3011 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3012 // FADDP ST,src2 /* D8 C0+i */ 3013 emit_opcode(cbuf, 0xD8); 3014 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3015 //could use FADDP src2,fpST /* DE C0+i */ 3016 %} 3017 3018 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3019 // FADDP src2,ST /* DE C0+i */ 3020 emit_opcode(cbuf, 0xDE); 3021 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3022 %} 3023 3024 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3025 // Operand has been loaded into fp ST (stack top) 3026 // FSUB ST,$src1 3027 emit_opcode(cbuf, 0xD8); 3028 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3029 3030 // FDIV 3031 emit_opcode(cbuf, 0xD8); 3032 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3033 %} 3034 3035 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3036 // Operand was loaded from memory into fp ST (stack top) 3037 // FADD ST,$src /* D8 C0+i */ 3038 emit_opcode(cbuf, 0xD8); 3039 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3040 3041 // FMUL ST,src2 /* D8 C*+i */ 3042 emit_opcode(cbuf, 0xD8); 3043 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3044 %} 3045 3046 3047 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3048 // Operand was loaded from memory into fp ST (stack top) 3049 // FADD ST,$src /* D8 C0+i */ 3050 emit_opcode(cbuf, 0xD8); 3051 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3052 3053 // FMULP src2,ST /* DE C8+i */ 3054 emit_opcode(cbuf, 0xDE); 3055 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3056 %} 3057 3058 // Atomically load the volatile long 3059 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3060 emit_opcode(cbuf,0xDF); 3061 int rm_byte_opcode = 0x05; 3062 int base = $mem$$base; 3063 int index = $mem$$index; 3064 int scale = $mem$$scale; 3065 int displace = $mem$$disp; 3066 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3067 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3068 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3069 %} 3070 3071 // Volatile Store Long. Must be atomic, so move it into 3072 // the FP TOS and then do a 64-bit FIST. Has to probe the 3073 // target address before the store (for null-ptr checks) 3074 // so the memory operand is used twice in the encoding. 3075 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3076 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3077 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3078 emit_opcode(cbuf,0xDF); 3079 int rm_byte_opcode = 0x07; 3080 int base = $mem$$base; 3081 int index = $mem$$index; 3082 int scale = $mem$$scale; 3083 int displace = $mem$$disp; 3084 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3085 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3086 %} 3087 3088 // Safepoint Poll. This polls the safepoint page, and causes an 3089 // exception if it is not readable. Unfortunately, it kills the condition code 3090 // in the process 3091 // We current use TESTL [spp],EDI 3092 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3093 3094 enc_class Safepoint_Poll() %{ 3095 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3096 emit_opcode(cbuf,0x85); 3097 emit_rm (cbuf, 0x0, 0x7, 0x5); 3098 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3099 %} 3100 %} 3101 3102 3103 //----------FRAME-------------------------------------------------------------- 3104 // Definition of frame structure and management information. 3105 // 3106 // S T A C K L A Y O U T Allocators stack-slot number 3107 // | (to get allocators register number 3108 // G Owned by | | v add OptoReg::stack0()) 3109 // r CALLER | | 3110 // o | +--------+ pad to even-align allocators stack-slot 3111 // w V | pad0 | numbers; owned by CALLER 3112 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3113 // h ^ | in | 5 3114 // | | args | 4 Holes in incoming args owned by SELF 3115 // | | | | 3 3116 // | | +--------+ 3117 // V | | old out| Empty on Intel, window on Sparc 3118 // | old |preserve| Must be even aligned. 3119 // | SP-+--------+----> Matcher::_old_SP, even aligned 3120 // | | in | 3 area for Intel ret address 3121 // Owned by |preserve| Empty on Sparc. 3122 // SELF +--------+ 3123 // | | pad2 | 2 pad to align old SP 3124 // | +--------+ 1 3125 // | | locks | 0 3126 // | +--------+----> OptoReg::stack0(), even aligned 3127 // | | pad1 | 11 pad to align new SP 3128 // | +--------+ 3129 // | | | 10 3130 // | | spills | 9 spills 3131 // V | | 8 (pad0 slot for callee) 3132 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3133 // ^ | out | 7 3134 // | | args | 6 Holes in outgoing args owned by CALLEE 3135 // Owned by +--------+ 3136 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3137 // | new |preserve| Must be even-aligned. 3138 // | SP-+--------+----> Matcher::_new_SP, even aligned 3139 // | | | 3140 // 3141 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3142 // known from SELF's arguments and the Java calling convention. 3143 // Region 6-7 is determined per call site. 3144 // Note 2: If the calling convention leaves holes in the incoming argument 3145 // area, those holes are owned by SELF. Holes in the outgoing area 3146 // are owned by the CALLEE. Holes should not be nessecary in the 3147 // incoming area, as the Java calling convention is completely under 3148 // the control of the AD file. Doubles can be sorted and packed to 3149 // avoid holes. Holes in the outgoing arguments may be nessecary for 3150 // varargs C calling conventions. 3151 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3152 // even aligned with pad0 as needed. 3153 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3154 // region 6-11 is even aligned; it may be padded out more so that 3155 // the region from SP to FP meets the minimum stack alignment. 3156 3157 frame %{ 3158 // What direction does stack grow in (assumed to be same for C & Java) 3159 stack_direction(TOWARDS_LOW); 3160 3161 // These three registers define part of the calling convention 3162 // between compiled code and the interpreter. 3163 inline_cache_reg(EAX); // Inline Cache Register 3164 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3165 3166 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3167 cisc_spilling_operand_name(indOffset32); 3168 3169 // Number of stack slots consumed by locking an object 3170 sync_stack_slots(1); 3171 3172 // Compiled code's Frame Pointer 3173 frame_pointer(ESP); 3174 // Interpreter stores its frame pointer in a register which is 3175 // stored to the stack by I2CAdaptors. 3176 // I2CAdaptors convert from interpreted java to compiled java. 3177 interpreter_frame_pointer(EBP); 3178 3179 // Stack alignment requirement 3180 // Alignment size in bytes (128-bit -> 16 bytes) 3181 stack_alignment(StackAlignmentInBytes); 3182 3183 // Number of stack slots between incoming argument block and the start of 3184 // a new frame. The PROLOG must add this many slots to the stack. The 3185 // EPILOG must remove this many slots. Intel needs one slot for 3186 // return address and one for rbp, (must save rbp) 3187 in_preserve_stack_slots(2+VerifyStackAtCalls); 3188 3189 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3190 // for calls to C. Supports the var-args backing area for register parms. 3191 varargs_C_out_slots_killed(0); 3192 3193 // The after-PROLOG location of the return address. Location of 3194 // return address specifies a type (REG or STACK) and a number 3195 // representing the register number (i.e. - use a register name) or 3196 // stack slot. 3197 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3198 // Otherwise, it is above the locks and verification slot and alignment word 3199 return_addr(STACK - 1 + 3200 round_to((Compile::current()->in_preserve_stack_slots() + 3201 Compile::current()->fixed_slots()), 3202 stack_alignment_in_slots())); 3203 3204 // Body of function which returns an integer array locating 3205 // arguments either in registers or in stack slots. Passed an array 3206 // of ideal registers called "sig" and a "length" count. Stack-slot 3207 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3208 // arguments for a CALLEE. Incoming stack arguments are 3209 // automatically biased by the preserve_stack_slots field above. 3210 calling_convention %{ 3211 // No difference between ingoing/outgoing just pass false 3212 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3213 %} 3214 3215 3216 // Body of function which returns an integer array locating 3217 // arguments either in registers or in stack slots. Passed an array 3218 // of ideal registers called "sig" and a "length" count. Stack-slot 3219 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3220 // arguments for a CALLEE. Incoming stack arguments are 3221 // automatically biased by the preserve_stack_slots field above. 3222 c_calling_convention %{ 3223 // This is obviously always outgoing 3224 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3225 %} 3226 3227 // Location of C & interpreter return values 3228 c_return_value %{ 3229 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3230 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3231 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3232 3233 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3234 // that C functions return float and double results in XMM0. 3235 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3236 return OptoRegPair(XMM0b_num,XMM0_num); 3237 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3238 return OptoRegPair(OptoReg::Bad,XMM0_num); 3239 3240 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3241 %} 3242 3243 // Location of return values 3244 return_value %{ 3245 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3246 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3247 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3248 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3249 return OptoRegPair(XMM0b_num,XMM0_num); 3250 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3251 return OptoRegPair(OptoReg::Bad,XMM0_num); 3252 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3253 %} 3254 3255 %} 3256 3257 //----------ATTRIBUTES--------------------------------------------------------- 3258 //----------Operand Attributes------------------------------------------------- 3259 op_attrib op_cost(0); // Required cost attribute 3260 3261 //----------Instruction Attributes--------------------------------------------- 3262 ins_attrib ins_cost(100); // Required cost attribute 3263 ins_attrib ins_size(8); // Required size attribute (in bits) 3264 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3265 // non-matching short branch variant of some 3266 // long branch? 3267 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3268 // specifies the alignment that some part of the instruction (not 3269 // necessarily the start) requires. If > 1, a compute_padding() 3270 // function must be provided for the instruction 3271 3272 //----------OPERANDS----------------------------------------------------------- 3273 // Operand definitions must precede instruction definitions for correct parsing 3274 // in the ADLC because operands constitute user defined types which are used in 3275 // instruction definitions. 3276 3277 //----------Simple Operands---------------------------------------------------- 3278 // Immediate Operands 3279 // Integer Immediate 3280 operand immI() %{ 3281 match(ConI); 3282 3283 op_cost(10); 3284 format %{ %} 3285 interface(CONST_INTER); 3286 %} 3287 3288 // Constant for test vs zero 3289 operand immI0() %{ 3290 predicate(n->get_int() == 0); 3291 match(ConI); 3292 3293 op_cost(0); 3294 format %{ %} 3295 interface(CONST_INTER); 3296 %} 3297 3298 // Constant for increment 3299 operand immI1() %{ 3300 predicate(n->get_int() == 1); 3301 match(ConI); 3302 3303 op_cost(0); 3304 format %{ %} 3305 interface(CONST_INTER); 3306 %} 3307 3308 // Constant for decrement 3309 operand immI_M1() %{ 3310 predicate(n->get_int() == -1); 3311 match(ConI); 3312 3313 op_cost(0); 3314 format %{ %} 3315 interface(CONST_INTER); 3316 %} 3317 3318 // Valid scale values for addressing modes 3319 operand immI2() %{ 3320 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3321 match(ConI); 3322 3323 format %{ %} 3324 interface(CONST_INTER); 3325 %} 3326 3327 operand immI8() %{ 3328 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3329 match(ConI); 3330 3331 op_cost(5); 3332 format %{ %} 3333 interface(CONST_INTER); 3334 %} 3335 3336 operand immI16() %{ 3337 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3338 match(ConI); 3339 3340 op_cost(10); 3341 format %{ %} 3342 interface(CONST_INTER); 3343 %} 3344 3345 // Int Immediate non-negative 3346 operand immU31() 3347 %{ 3348 predicate(n->get_int() >= 0); 3349 match(ConI); 3350 3351 op_cost(0); 3352 format %{ %} 3353 interface(CONST_INTER); 3354 %} 3355 3356 // Constant for long shifts 3357 operand immI_32() %{ 3358 predicate( n->get_int() == 32 ); 3359 match(ConI); 3360 3361 op_cost(0); 3362 format %{ %} 3363 interface(CONST_INTER); 3364 %} 3365 3366 operand immI_1_31() %{ 3367 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3368 match(ConI); 3369 3370 op_cost(0); 3371 format %{ %} 3372 interface(CONST_INTER); 3373 %} 3374 3375 operand immI_32_63() %{ 3376 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3377 match(ConI); 3378 op_cost(0); 3379 3380 format %{ %} 3381 interface(CONST_INTER); 3382 %} 3383 3384 operand immI_1() %{ 3385 predicate( n->get_int() == 1 ); 3386 match(ConI); 3387 3388 op_cost(0); 3389 format %{ %} 3390 interface(CONST_INTER); 3391 %} 3392 3393 operand immI_2() %{ 3394 predicate( n->get_int() == 2 ); 3395 match(ConI); 3396 3397 op_cost(0); 3398 format %{ %} 3399 interface(CONST_INTER); 3400 %} 3401 3402 operand immI_3() %{ 3403 predicate( n->get_int() == 3 ); 3404 match(ConI); 3405 3406 op_cost(0); 3407 format %{ %} 3408 interface(CONST_INTER); 3409 %} 3410 3411 // Pointer Immediate 3412 operand immP() %{ 3413 match(ConP); 3414 3415 op_cost(10); 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 // NULL Pointer Immediate 3421 operand immP0() %{ 3422 predicate( n->get_ptr() == 0 ); 3423 match(ConP); 3424 op_cost(0); 3425 3426 format %{ %} 3427 interface(CONST_INTER); 3428 %} 3429 3430 // Long Immediate 3431 operand immL() %{ 3432 match(ConL); 3433 3434 op_cost(20); 3435 format %{ %} 3436 interface(CONST_INTER); 3437 %} 3438 3439 // Long Immediate zero 3440 operand immL0() %{ 3441 predicate( n->get_long() == 0L ); 3442 match(ConL); 3443 op_cost(0); 3444 3445 format %{ %} 3446 interface(CONST_INTER); 3447 %} 3448 3449 // Long Immediate zero 3450 operand immL_M1() %{ 3451 predicate( n->get_long() == -1L ); 3452 match(ConL); 3453 op_cost(0); 3454 3455 format %{ %} 3456 interface(CONST_INTER); 3457 %} 3458 3459 // Long immediate from 0 to 127. 3460 // Used for a shorter form of long mul by 10. 3461 operand immL_127() %{ 3462 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3463 match(ConL); 3464 op_cost(0); 3465 3466 format %{ %} 3467 interface(CONST_INTER); 3468 %} 3469 3470 // Long Immediate: low 32-bit mask 3471 operand immL_32bits() %{ 3472 predicate(n->get_long() == 0xFFFFFFFFL); 3473 match(ConL); 3474 op_cost(0); 3475 3476 format %{ %} 3477 interface(CONST_INTER); 3478 %} 3479 3480 // Long Immediate: low 32-bit mask 3481 operand immL32() %{ 3482 predicate(n->get_long() == (int)(n->get_long())); 3483 match(ConL); 3484 op_cost(20); 3485 3486 format %{ %} 3487 interface(CONST_INTER); 3488 %} 3489 3490 //Double Immediate zero 3491 operand immDPR0() %{ 3492 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3493 // bug that generates code such that NaNs compare equal to 0.0 3494 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3495 match(ConD); 3496 3497 op_cost(5); 3498 format %{ %} 3499 interface(CONST_INTER); 3500 %} 3501 3502 // Double Immediate one 3503 operand immDPR1() %{ 3504 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3505 match(ConD); 3506 3507 op_cost(5); 3508 format %{ %} 3509 interface(CONST_INTER); 3510 %} 3511 3512 // Double Immediate 3513 operand immDPR() %{ 3514 predicate(UseSSE<=1); 3515 match(ConD); 3516 3517 op_cost(5); 3518 format %{ %} 3519 interface(CONST_INTER); 3520 %} 3521 3522 operand immD() %{ 3523 predicate(UseSSE>=2); 3524 match(ConD); 3525 3526 op_cost(5); 3527 format %{ %} 3528 interface(CONST_INTER); 3529 %} 3530 3531 // Double Immediate zero 3532 operand immD0() %{ 3533 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3534 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3535 // compare equal to -0.0. 3536 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3537 match(ConD); 3538 3539 format %{ %} 3540 interface(CONST_INTER); 3541 %} 3542 3543 // Float Immediate zero 3544 operand immFPR0() %{ 3545 predicate(UseSSE == 0 && n->getf() == 0.0F); 3546 match(ConF); 3547 3548 op_cost(5); 3549 format %{ %} 3550 interface(CONST_INTER); 3551 %} 3552 3553 // Float Immediate one 3554 operand immFPR1() %{ 3555 predicate(UseSSE == 0 && n->getf() == 1.0F); 3556 match(ConF); 3557 3558 op_cost(5); 3559 format %{ %} 3560 interface(CONST_INTER); 3561 %} 3562 3563 // Float Immediate 3564 operand immFPR() %{ 3565 predicate( UseSSE == 0 ); 3566 match(ConF); 3567 3568 op_cost(5); 3569 format %{ %} 3570 interface(CONST_INTER); 3571 %} 3572 3573 // Float Immediate 3574 operand immF() %{ 3575 predicate(UseSSE >= 1); 3576 match(ConF); 3577 3578 op_cost(5); 3579 format %{ %} 3580 interface(CONST_INTER); 3581 %} 3582 3583 // Float Immediate zero. Zero and not -0.0 3584 operand immF0() %{ 3585 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3586 match(ConF); 3587 3588 op_cost(5); 3589 format %{ %} 3590 interface(CONST_INTER); 3591 %} 3592 3593 // Immediates for special shifts (sign extend) 3594 3595 // Constants for increment 3596 operand immI_16() %{ 3597 predicate( n->get_int() == 16 ); 3598 match(ConI); 3599 3600 format %{ %} 3601 interface(CONST_INTER); 3602 %} 3603 3604 operand immI_24() %{ 3605 predicate( n->get_int() == 24 ); 3606 match(ConI); 3607 3608 format %{ %} 3609 interface(CONST_INTER); 3610 %} 3611 3612 // Constant for byte-wide masking 3613 operand immI_255() %{ 3614 predicate( n->get_int() == 255 ); 3615 match(ConI); 3616 3617 format %{ %} 3618 interface(CONST_INTER); 3619 %} 3620 3621 // Constant for short-wide masking 3622 operand immI_65535() %{ 3623 predicate(n->get_int() == 65535); 3624 match(ConI); 3625 3626 format %{ %} 3627 interface(CONST_INTER); 3628 %} 3629 3630 // Register Operands 3631 // Integer Register 3632 operand rRegI() %{ 3633 constraint(ALLOC_IN_RC(int_reg)); 3634 match(RegI); 3635 match(xRegI); 3636 match(eAXRegI); 3637 match(eBXRegI); 3638 match(eCXRegI); 3639 match(eDXRegI); 3640 match(eDIRegI); 3641 match(eSIRegI); 3642 3643 format %{ %} 3644 interface(REG_INTER); 3645 %} 3646 3647 // Subset of Integer Register 3648 operand xRegI(rRegI reg) %{ 3649 constraint(ALLOC_IN_RC(int_x_reg)); 3650 match(reg); 3651 match(eAXRegI); 3652 match(eBXRegI); 3653 match(eCXRegI); 3654 match(eDXRegI); 3655 3656 format %{ %} 3657 interface(REG_INTER); 3658 %} 3659 3660 // Special Registers 3661 operand eAXRegI(xRegI reg) %{ 3662 constraint(ALLOC_IN_RC(eax_reg)); 3663 match(reg); 3664 match(rRegI); 3665 3666 format %{ "EAX" %} 3667 interface(REG_INTER); 3668 %} 3669 3670 // Special Registers 3671 operand eBXRegI(xRegI reg) %{ 3672 constraint(ALLOC_IN_RC(ebx_reg)); 3673 match(reg); 3674 match(rRegI); 3675 3676 format %{ "EBX" %} 3677 interface(REG_INTER); 3678 %} 3679 3680 operand eCXRegI(xRegI reg) %{ 3681 constraint(ALLOC_IN_RC(ecx_reg)); 3682 match(reg); 3683 match(rRegI); 3684 3685 format %{ "ECX" %} 3686 interface(REG_INTER); 3687 %} 3688 3689 operand eDXRegI(xRegI reg) %{ 3690 constraint(ALLOC_IN_RC(edx_reg)); 3691 match(reg); 3692 match(rRegI); 3693 3694 format %{ "EDX" %} 3695 interface(REG_INTER); 3696 %} 3697 3698 operand eDIRegI(xRegI reg) %{ 3699 constraint(ALLOC_IN_RC(edi_reg)); 3700 match(reg); 3701 match(rRegI); 3702 3703 format %{ "EDI" %} 3704 interface(REG_INTER); 3705 %} 3706 3707 operand naxRegI() %{ 3708 constraint(ALLOC_IN_RC(nax_reg)); 3709 match(RegI); 3710 match(eCXRegI); 3711 match(eDXRegI); 3712 match(eSIRegI); 3713 match(eDIRegI); 3714 3715 format %{ %} 3716 interface(REG_INTER); 3717 %} 3718 3719 operand nadxRegI() %{ 3720 constraint(ALLOC_IN_RC(nadx_reg)); 3721 match(RegI); 3722 match(eBXRegI); 3723 match(eCXRegI); 3724 match(eSIRegI); 3725 match(eDIRegI); 3726 3727 format %{ %} 3728 interface(REG_INTER); 3729 %} 3730 3731 operand ncxRegI() %{ 3732 constraint(ALLOC_IN_RC(ncx_reg)); 3733 match(RegI); 3734 match(eAXRegI); 3735 match(eDXRegI); 3736 match(eSIRegI); 3737 match(eDIRegI); 3738 3739 format %{ %} 3740 interface(REG_INTER); 3741 %} 3742 3743 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3744 // // 3745 operand eSIRegI(xRegI reg) %{ 3746 constraint(ALLOC_IN_RC(esi_reg)); 3747 match(reg); 3748 match(rRegI); 3749 3750 format %{ "ESI" %} 3751 interface(REG_INTER); 3752 %} 3753 3754 // Pointer Register 3755 operand anyRegP() %{ 3756 constraint(ALLOC_IN_RC(any_reg)); 3757 match(RegP); 3758 match(eAXRegP); 3759 match(eBXRegP); 3760 match(eCXRegP); 3761 match(eDIRegP); 3762 match(eRegP); 3763 3764 format %{ %} 3765 interface(REG_INTER); 3766 %} 3767 3768 operand eRegP() %{ 3769 constraint(ALLOC_IN_RC(int_reg)); 3770 match(RegP); 3771 match(eAXRegP); 3772 match(eBXRegP); 3773 match(eCXRegP); 3774 match(eDIRegP); 3775 3776 format %{ %} 3777 interface(REG_INTER); 3778 %} 3779 3780 // On windows95, EBP is not safe to use for implicit null tests. 3781 operand eRegP_no_EBP() %{ 3782 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3783 match(RegP); 3784 match(eAXRegP); 3785 match(eBXRegP); 3786 match(eCXRegP); 3787 match(eDIRegP); 3788 3789 op_cost(100); 3790 format %{ %} 3791 interface(REG_INTER); 3792 %} 3793 3794 operand naxRegP() %{ 3795 constraint(ALLOC_IN_RC(nax_reg)); 3796 match(RegP); 3797 match(eBXRegP); 3798 match(eDXRegP); 3799 match(eCXRegP); 3800 match(eSIRegP); 3801 match(eDIRegP); 3802 3803 format %{ %} 3804 interface(REG_INTER); 3805 %} 3806 3807 operand nabxRegP() %{ 3808 constraint(ALLOC_IN_RC(nabx_reg)); 3809 match(RegP); 3810 match(eCXRegP); 3811 match(eDXRegP); 3812 match(eSIRegP); 3813 match(eDIRegP); 3814 3815 format %{ %} 3816 interface(REG_INTER); 3817 %} 3818 3819 operand pRegP() %{ 3820 constraint(ALLOC_IN_RC(p_reg)); 3821 match(RegP); 3822 match(eBXRegP); 3823 match(eDXRegP); 3824 match(eSIRegP); 3825 match(eDIRegP); 3826 3827 format %{ %} 3828 interface(REG_INTER); 3829 %} 3830 3831 // Special Registers 3832 // Return a pointer value 3833 operand eAXRegP(eRegP reg) %{ 3834 constraint(ALLOC_IN_RC(eax_reg)); 3835 match(reg); 3836 format %{ "EAX" %} 3837 interface(REG_INTER); 3838 %} 3839 3840 // Used in AtomicAdd 3841 operand eBXRegP(eRegP reg) %{ 3842 constraint(ALLOC_IN_RC(ebx_reg)); 3843 match(reg); 3844 format %{ "EBX" %} 3845 interface(REG_INTER); 3846 %} 3847 3848 // Tail-call (interprocedural jump) to interpreter 3849 operand eCXRegP(eRegP reg) %{ 3850 constraint(ALLOC_IN_RC(ecx_reg)); 3851 match(reg); 3852 format %{ "ECX" %} 3853 interface(REG_INTER); 3854 %} 3855 3856 operand eSIRegP(eRegP reg) %{ 3857 constraint(ALLOC_IN_RC(esi_reg)); 3858 match(reg); 3859 format %{ "ESI" %} 3860 interface(REG_INTER); 3861 %} 3862 3863 // Used in rep stosw 3864 operand eDIRegP(eRegP reg) %{ 3865 constraint(ALLOC_IN_RC(edi_reg)); 3866 match(reg); 3867 format %{ "EDI" %} 3868 interface(REG_INTER); 3869 %} 3870 3871 operand eRegL() %{ 3872 constraint(ALLOC_IN_RC(long_reg)); 3873 match(RegL); 3874 match(eADXRegL); 3875 3876 format %{ %} 3877 interface(REG_INTER); 3878 %} 3879 3880 operand eADXRegL( eRegL reg ) %{ 3881 constraint(ALLOC_IN_RC(eadx_reg)); 3882 match(reg); 3883 3884 format %{ "EDX:EAX" %} 3885 interface(REG_INTER); 3886 %} 3887 3888 operand eBCXRegL( eRegL reg ) %{ 3889 constraint(ALLOC_IN_RC(ebcx_reg)); 3890 match(reg); 3891 3892 format %{ "EBX:ECX" %} 3893 interface(REG_INTER); 3894 %} 3895 3896 // Special case for integer high multiply 3897 operand eADXRegL_low_only() %{ 3898 constraint(ALLOC_IN_RC(eadx_reg)); 3899 match(RegL); 3900 3901 format %{ "EAX" %} 3902 interface(REG_INTER); 3903 %} 3904 3905 // Flags register, used as output of compare instructions 3906 operand eFlagsReg() %{ 3907 constraint(ALLOC_IN_RC(int_flags)); 3908 match(RegFlags); 3909 3910 format %{ "EFLAGS" %} 3911 interface(REG_INTER); 3912 %} 3913 3914 // Flags register, used as output of FLOATING POINT compare instructions 3915 operand eFlagsRegU() %{ 3916 constraint(ALLOC_IN_RC(int_flags)); 3917 match(RegFlags); 3918 3919 format %{ "EFLAGS_U" %} 3920 interface(REG_INTER); 3921 %} 3922 3923 operand eFlagsRegUCF() %{ 3924 constraint(ALLOC_IN_RC(int_flags)); 3925 match(RegFlags); 3926 predicate(false); 3927 3928 format %{ "EFLAGS_U_CF" %} 3929 interface(REG_INTER); 3930 %} 3931 3932 // Condition Code Register used by long compare 3933 operand flagsReg_long_LTGE() %{ 3934 constraint(ALLOC_IN_RC(int_flags)); 3935 match(RegFlags); 3936 format %{ "FLAGS_LTGE" %} 3937 interface(REG_INTER); 3938 %} 3939 operand flagsReg_long_EQNE() %{ 3940 constraint(ALLOC_IN_RC(int_flags)); 3941 match(RegFlags); 3942 format %{ "FLAGS_EQNE" %} 3943 interface(REG_INTER); 3944 %} 3945 operand flagsReg_long_LEGT() %{ 3946 constraint(ALLOC_IN_RC(int_flags)); 3947 match(RegFlags); 3948 format %{ "FLAGS_LEGT" %} 3949 interface(REG_INTER); 3950 %} 3951 3952 // Condition Code Register used by unsigned long compare 3953 operand flagsReg_ulong_LTGE() %{ 3954 constraint(ALLOC_IN_RC(int_flags)); 3955 match(RegFlags); 3956 format %{ "FLAGS_U_LTGE" %} 3957 interface(REG_INTER); 3958 %} 3959 operand flagsReg_ulong_EQNE() %{ 3960 constraint(ALLOC_IN_RC(int_flags)); 3961 match(RegFlags); 3962 format %{ "FLAGS_U_EQNE" %} 3963 interface(REG_INTER); 3964 %} 3965 operand flagsReg_ulong_LEGT() %{ 3966 constraint(ALLOC_IN_RC(int_flags)); 3967 match(RegFlags); 3968 format %{ "FLAGS_U_LEGT" %} 3969 interface(REG_INTER); 3970 %} 3971 3972 // Float register operands 3973 operand regDPR() %{ 3974 predicate( UseSSE < 2 ); 3975 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3976 match(RegD); 3977 match(regDPR1); 3978 match(regDPR2); 3979 format %{ %} 3980 interface(REG_INTER); 3981 %} 3982 3983 operand regDPR1(regDPR reg) %{ 3984 predicate( UseSSE < 2 ); 3985 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 3986 match(reg); 3987 format %{ "FPR1" %} 3988 interface(REG_INTER); 3989 %} 3990 3991 operand regDPR2(regDPR reg) %{ 3992 predicate( UseSSE < 2 ); 3993 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 3994 match(reg); 3995 format %{ "FPR2" %} 3996 interface(REG_INTER); 3997 %} 3998 3999 operand regnotDPR1(regDPR reg) %{ 4000 predicate( UseSSE < 2 ); 4001 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4002 match(reg); 4003 format %{ %} 4004 interface(REG_INTER); 4005 %} 4006 4007 // Float register operands 4008 operand regFPR() %{ 4009 predicate( UseSSE < 2 ); 4010 constraint(ALLOC_IN_RC(fp_flt_reg)); 4011 match(RegF); 4012 match(regFPR1); 4013 format %{ %} 4014 interface(REG_INTER); 4015 %} 4016 4017 // Float register operands 4018 operand regFPR1(regFPR reg) %{ 4019 predicate( UseSSE < 2 ); 4020 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4021 match(reg); 4022 format %{ "FPR1" %} 4023 interface(REG_INTER); 4024 %} 4025 4026 // XMM Float register operands 4027 operand regF() %{ 4028 predicate( UseSSE>=1 ); 4029 constraint(ALLOC_IN_RC(float_reg)); 4030 match(RegF); 4031 format %{ %} 4032 interface(REG_INTER); 4033 %} 4034 4035 // XMM Double register operands 4036 operand regD() %{ 4037 predicate( UseSSE>=2 ); 4038 constraint(ALLOC_IN_RC(double_reg)); 4039 match(RegD); 4040 format %{ %} 4041 interface(REG_INTER); 4042 %} 4043 4044 4045 //----------Memory Operands---------------------------------------------------- 4046 // Direct Memory Operand 4047 operand direct(immP addr) %{ 4048 match(addr); 4049 4050 format %{ "[$addr]" %} 4051 interface(MEMORY_INTER) %{ 4052 base(0xFFFFFFFF); 4053 index(0x4); 4054 scale(0x0); 4055 disp($addr); 4056 %} 4057 %} 4058 4059 // Indirect Memory Operand 4060 operand indirect(eRegP reg) %{ 4061 constraint(ALLOC_IN_RC(int_reg)); 4062 match(reg); 4063 4064 format %{ "[$reg]" %} 4065 interface(MEMORY_INTER) %{ 4066 base($reg); 4067 index(0x4); 4068 scale(0x0); 4069 disp(0x0); 4070 %} 4071 %} 4072 4073 // Indirect Memory Plus Short Offset Operand 4074 operand indOffset8(eRegP reg, immI8 off) %{ 4075 match(AddP reg off); 4076 4077 format %{ "[$reg + $off]" %} 4078 interface(MEMORY_INTER) %{ 4079 base($reg); 4080 index(0x4); 4081 scale(0x0); 4082 disp($off); 4083 %} 4084 %} 4085 4086 // Indirect Memory Plus Long Offset Operand 4087 operand indOffset32(eRegP reg, immI off) %{ 4088 match(AddP reg off); 4089 4090 format %{ "[$reg + $off]" %} 4091 interface(MEMORY_INTER) %{ 4092 base($reg); 4093 index(0x4); 4094 scale(0x0); 4095 disp($off); 4096 %} 4097 %} 4098 4099 // Indirect Memory Plus Long Offset Operand 4100 operand indOffset32X(rRegI reg, immP off) %{ 4101 match(AddP off reg); 4102 4103 format %{ "[$reg + $off]" %} 4104 interface(MEMORY_INTER) %{ 4105 base($reg); 4106 index(0x4); 4107 scale(0x0); 4108 disp($off); 4109 %} 4110 %} 4111 4112 // Indirect Memory Plus Index Register Plus Offset Operand 4113 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4114 match(AddP (AddP reg ireg) off); 4115 4116 op_cost(10); 4117 format %{"[$reg + $off + $ireg]" %} 4118 interface(MEMORY_INTER) %{ 4119 base($reg); 4120 index($ireg); 4121 scale(0x0); 4122 disp($off); 4123 %} 4124 %} 4125 4126 // Indirect Memory Plus Index Register Plus Offset Operand 4127 operand indIndex(eRegP reg, rRegI ireg) %{ 4128 match(AddP reg ireg); 4129 4130 op_cost(10); 4131 format %{"[$reg + $ireg]" %} 4132 interface(MEMORY_INTER) %{ 4133 base($reg); 4134 index($ireg); 4135 scale(0x0); 4136 disp(0x0); 4137 %} 4138 %} 4139 4140 // // ------------------------------------------------------------------------- 4141 // // 486 architecture doesn't support "scale * index + offset" with out a base 4142 // // ------------------------------------------------------------------------- 4143 // // Scaled Memory Operands 4144 // // Indirect Memory Times Scale Plus Offset Operand 4145 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4146 // match(AddP off (LShiftI ireg scale)); 4147 // 4148 // op_cost(10); 4149 // format %{"[$off + $ireg << $scale]" %} 4150 // interface(MEMORY_INTER) %{ 4151 // base(0x4); 4152 // index($ireg); 4153 // scale($scale); 4154 // disp($off); 4155 // %} 4156 // %} 4157 4158 // Indirect Memory Times Scale Plus Index Register 4159 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4160 match(AddP reg (LShiftI ireg scale)); 4161 4162 op_cost(10); 4163 format %{"[$reg + $ireg << $scale]" %} 4164 interface(MEMORY_INTER) %{ 4165 base($reg); 4166 index($ireg); 4167 scale($scale); 4168 disp(0x0); 4169 %} 4170 %} 4171 4172 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4173 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4174 match(AddP (AddP reg (LShiftI ireg scale)) off); 4175 4176 op_cost(10); 4177 format %{"[$reg + $off + $ireg << $scale]" %} 4178 interface(MEMORY_INTER) %{ 4179 base($reg); 4180 index($ireg); 4181 scale($scale); 4182 disp($off); 4183 %} 4184 %} 4185 4186 //----------Load Long Memory Operands------------------------------------------ 4187 // The load-long idiom will use it's address expression again after loading 4188 // the first word of the long. If the load-long destination overlaps with 4189 // registers used in the addressing expression, the 2nd half will be loaded 4190 // from a clobbered address. Fix this by requiring that load-long use 4191 // address registers that do not overlap with the load-long target. 4192 4193 // load-long support 4194 operand load_long_RegP() %{ 4195 constraint(ALLOC_IN_RC(esi_reg)); 4196 match(RegP); 4197 match(eSIRegP); 4198 op_cost(100); 4199 format %{ %} 4200 interface(REG_INTER); 4201 %} 4202 4203 // Indirect Memory Operand Long 4204 operand load_long_indirect(load_long_RegP reg) %{ 4205 constraint(ALLOC_IN_RC(esi_reg)); 4206 match(reg); 4207 4208 format %{ "[$reg]" %} 4209 interface(MEMORY_INTER) %{ 4210 base($reg); 4211 index(0x4); 4212 scale(0x0); 4213 disp(0x0); 4214 %} 4215 %} 4216 4217 // Indirect Memory Plus Long Offset Operand 4218 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4219 match(AddP reg off); 4220 4221 format %{ "[$reg + $off]" %} 4222 interface(MEMORY_INTER) %{ 4223 base($reg); 4224 index(0x4); 4225 scale(0x0); 4226 disp($off); 4227 %} 4228 %} 4229 4230 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4231 4232 4233 //----------Special Memory Operands-------------------------------------------- 4234 // Stack Slot Operand - This operand is used for loading and storing temporary 4235 // values on the stack where a match requires a value to 4236 // flow through memory. 4237 operand stackSlotP(sRegP reg) %{ 4238 constraint(ALLOC_IN_RC(stack_slots)); 4239 // No match rule because this operand is only generated in matching 4240 format %{ "[$reg]" %} 4241 interface(MEMORY_INTER) %{ 4242 base(0x4); // ESP 4243 index(0x4); // No Index 4244 scale(0x0); // No Scale 4245 disp($reg); // Stack Offset 4246 %} 4247 %} 4248 4249 operand stackSlotI(sRegI reg) %{ 4250 constraint(ALLOC_IN_RC(stack_slots)); 4251 // No match rule because this operand is only generated in matching 4252 format %{ "[$reg]" %} 4253 interface(MEMORY_INTER) %{ 4254 base(0x4); // ESP 4255 index(0x4); // No Index 4256 scale(0x0); // No Scale 4257 disp($reg); // Stack Offset 4258 %} 4259 %} 4260 4261 operand stackSlotF(sRegF reg) %{ 4262 constraint(ALLOC_IN_RC(stack_slots)); 4263 // No match rule because this operand is only generated in matching 4264 format %{ "[$reg]" %} 4265 interface(MEMORY_INTER) %{ 4266 base(0x4); // ESP 4267 index(0x4); // No Index 4268 scale(0x0); // No Scale 4269 disp($reg); // Stack Offset 4270 %} 4271 %} 4272 4273 operand stackSlotD(sRegD reg) %{ 4274 constraint(ALLOC_IN_RC(stack_slots)); 4275 // No match rule because this operand is only generated in matching 4276 format %{ "[$reg]" %} 4277 interface(MEMORY_INTER) %{ 4278 base(0x4); // ESP 4279 index(0x4); // No Index 4280 scale(0x0); // No Scale 4281 disp($reg); // Stack Offset 4282 %} 4283 %} 4284 4285 operand stackSlotL(sRegL reg) %{ 4286 constraint(ALLOC_IN_RC(stack_slots)); 4287 // No match rule because this operand is only generated in matching 4288 format %{ "[$reg]" %} 4289 interface(MEMORY_INTER) %{ 4290 base(0x4); // ESP 4291 index(0x4); // No Index 4292 scale(0x0); // No Scale 4293 disp($reg); // Stack Offset 4294 %} 4295 %} 4296 4297 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4298 // Indirect Memory Operand 4299 operand indirect_win95_safe(eRegP_no_EBP reg) 4300 %{ 4301 constraint(ALLOC_IN_RC(int_reg)); 4302 match(reg); 4303 4304 op_cost(100); 4305 format %{ "[$reg]" %} 4306 interface(MEMORY_INTER) %{ 4307 base($reg); 4308 index(0x4); 4309 scale(0x0); 4310 disp(0x0); 4311 %} 4312 %} 4313 4314 // Indirect Memory Plus Short Offset Operand 4315 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4316 %{ 4317 match(AddP reg off); 4318 4319 op_cost(100); 4320 format %{ "[$reg + $off]" %} 4321 interface(MEMORY_INTER) %{ 4322 base($reg); 4323 index(0x4); 4324 scale(0x0); 4325 disp($off); 4326 %} 4327 %} 4328 4329 // Indirect Memory Plus Long Offset Operand 4330 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4331 %{ 4332 match(AddP reg off); 4333 4334 op_cost(100); 4335 format %{ "[$reg + $off]" %} 4336 interface(MEMORY_INTER) %{ 4337 base($reg); 4338 index(0x4); 4339 scale(0x0); 4340 disp($off); 4341 %} 4342 %} 4343 4344 // Indirect Memory Plus Index Register Plus Offset Operand 4345 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4346 %{ 4347 match(AddP (AddP reg ireg) off); 4348 4349 op_cost(100); 4350 format %{"[$reg + $off + $ireg]" %} 4351 interface(MEMORY_INTER) %{ 4352 base($reg); 4353 index($ireg); 4354 scale(0x0); 4355 disp($off); 4356 %} 4357 %} 4358 4359 // Indirect Memory Times Scale Plus Index Register 4360 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4361 %{ 4362 match(AddP reg (LShiftI ireg scale)); 4363 4364 op_cost(100); 4365 format %{"[$reg + $ireg << $scale]" %} 4366 interface(MEMORY_INTER) %{ 4367 base($reg); 4368 index($ireg); 4369 scale($scale); 4370 disp(0x0); 4371 %} 4372 %} 4373 4374 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4375 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4376 %{ 4377 match(AddP (AddP reg (LShiftI ireg scale)) off); 4378 4379 op_cost(100); 4380 format %{"[$reg + $off + $ireg << $scale]" %} 4381 interface(MEMORY_INTER) %{ 4382 base($reg); 4383 index($ireg); 4384 scale($scale); 4385 disp($off); 4386 %} 4387 %} 4388 4389 //----------Conditional Branch Operands---------------------------------------- 4390 // Comparison Op - This is the operation of the comparison, and is limited to 4391 // the following set of codes: 4392 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4393 // 4394 // Other attributes of the comparison, such as unsignedness, are specified 4395 // by the comparison instruction that sets a condition code flags register. 4396 // That result is represented by a flags operand whose subtype is appropriate 4397 // to the unsignedness (etc.) of the comparison. 4398 // 4399 // Later, the instruction which matches both the Comparison Op (a Bool) and 4400 // the flags (produced by the Cmp) specifies the coding of the comparison op 4401 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4402 4403 // Comparision Code 4404 operand cmpOp() %{ 4405 match(Bool); 4406 4407 format %{ "" %} 4408 interface(COND_INTER) %{ 4409 equal(0x4, "e"); 4410 not_equal(0x5, "ne"); 4411 less(0xC, "l"); 4412 greater_equal(0xD, "ge"); 4413 less_equal(0xE, "le"); 4414 greater(0xF, "g"); 4415 overflow(0x0, "o"); 4416 no_overflow(0x1, "no"); 4417 %} 4418 %} 4419 4420 // Comparison Code, unsigned compare. Used by FP also, with 4421 // C2 (unordered) turned into GT or LT already. The other bits 4422 // C0 and C3 are turned into Carry & Zero flags. 4423 operand cmpOpU() %{ 4424 match(Bool); 4425 4426 format %{ "" %} 4427 interface(COND_INTER) %{ 4428 equal(0x4, "e"); 4429 not_equal(0x5, "ne"); 4430 less(0x2, "b"); 4431 greater_equal(0x3, "nb"); 4432 less_equal(0x6, "be"); 4433 greater(0x7, "nbe"); 4434 overflow(0x0, "o"); 4435 no_overflow(0x1, "no"); 4436 %} 4437 %} 4438 4439 // Floating comparisons that don't require any fixup for the unordered case 4440 operand cmpOpUCF() %{ 4441 match(Bool); 4442 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4443 n->as_Bool()->_test._test == BoolTest::ge || 4444 n->as_Bool()->_test._test == BoolTest::le || 4445 n->as_Bool()->_test._test == BoolTest::gt); 4446 format %{ "" %} 4447 interface(COND_INTER) %{ 4448 equal(0x4, "e"); 4449 not_equal(0x5, "ne"); 4450 less(0x2, "b"); 4451 greater_equal(0x3, "nb"); 4452 less_equal(0x6, "be"); 4453 greater(0x7, "nbe"); 4454 overflow(0x0, "o"); 4455 no_overflow(0x1, "no"); 4456 %} 4457 %} 4458 4459 4460 // Floating comparisons that can be fixed up with extra conditional jumps 4461 operand cmpOpUCF2() %{ 4462 match(Bool); 4463 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4464 n->as_Bool()->_test._test == BoolTest::eq); 4465 format %{ "" %} 4466 interface(COND_INTER) %{ 4467 equal(0x4, "e"); 4468 not_equal(0x5, "ne"); 4469 less(0x2, "b"); 4470 greater_equal(0x3, "nb"); 4471 less_equal(0x6, "be"); 4472 greater(0x7, "nbe"); 4473 overflow(0x0, "o"); 4474 no_overflow(0x1, "no"); 4475 %} 4476 %} 4477 4478 // Comparison Code for FP conditional move 4479 operand cmpOp_fcmov() %{ 4480 match(Bool); 4481 4482 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4483 n->as_Bool()->_test._test != BoolTest::no_overflow); 4484 format %{ "" %} 4485 interface(COND_INTER) %{ 4486 equal (0x0C8); 4487 not_equal (0x1C8); 4488 less (0x0C0); 4489 greater_equal(0x1C0); 4490 less_equal (0x0D0); 4491 greater (0x1D0); 4492 overflow(0x0, "o"); // not really supported by the instruction 4493 no_overflow(0x1, "no"); // not really supported by the instruction 4494 %} 4495 %} 4496 4497 // Comparison Code used in long compares 4498 operand cmpOp_commute() %{ 4499 match(Bool); 4500 4501 format %{ "" %} 4502 interface(COND_INTER) %{ 4503 equal(0x4, "e"); 4504 not_equal(0x5, "ne"); 4505 less(0xF, "g"); 4506 greater_equal(0xE, "le"); 4507 less_equal(0xD, "ge"); 4508 greater(0xC, "l"); 4509 overflow(0x0, "o"); 4510 no_overflow(0x1, "no"); 4511 %} 4512 %} 4513 4514 // Comparison Code used in unsigned long compares 4515 operand cmpOpU_commute() %{ 4516 match(Bool); 4517 4518 format %{ "" %} 4519 interface(COND_INTER) %{ 4520 equal(0x4, "e"); 4521 not_equal(0x5, "ne"); 4522 less(0x7, "nbe"); 4523 greater_equal(0x6, "be"); 4524 less_equal(0x3, "nb"); 4525 greater(0x2, "b"); 4526 overflow(0x0, "o"); 4527 no_overflow(0x1, "no"); 4528 %} 4529 %} 4530 4531 //----------OPERAND CLASSES---------------------------------------------------- 4532 // Operand Classes are groups of operands that are used as to simplify 4533 // instruction definitions by not requiring the AD writer to specify separate 4534 // instructions for every form of operand when the instruction accepts 4535 // multiple operand types with the same basic encoding and format. The classic 4536 // case of this is memory operands. 4537 4538 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4539 indIndex, indIndexScale, indIndexScaleOffset); 4540 4541 // Long memory operations are encoded in 2 instructions and a +4 offset. 4542 // This means some kind of offset is always required and you cannot use 4543 // an oop as the offset (done when working on static globals). 4544 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4545 indIndex, indIndexScale, indIndexScaleOffset); 4546 4547 4548 //----------PIPELINE----------------------------------------------------------- 4549 // Rules which define the behavior of the target architectures pipeline. 4550 pipeline %{ 4551 4552 //----------ATTRIBUTES--------------------------------------------------------- 4553 attributes %{ 4554 variable_size_instructions; // Fixed size instructions 4555 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4556 instruction_unit_size = 1; // An instruction is 1 bytes long 4557 instruction_fetch_unit_size = 16; // The processor fetches one line 4558 instruction_fetch_units = 1; // of 16 bytes 4559 4560 // List of nop instructions 4561 nops( MachNop ); 4562 %} 4563 4564 //----------RESOURCES---------------------------------------------------------- 4565 // Resources are the functional units available to the machine 4566 4567 // Generic P2/P3 pipeline 4568 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4569 // 3 instructions decoded per cycle. 4570 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4571 // 2 ALU op, only ALU0 handles mul/div instructions. 4572 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4573 MS0, MS1, MEM = MS0 | MS1, 4574 BR, FPU, 4575 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4576 4577 //----------PIPELINE DESCRIPTION----------------------------------------------- 4578 // Pipeline Description specifies the stages in the machine's pipeline 4579 4580 // Generic P2/P3 pipeline 4581 pipe_desc(S0, S1, S2, S3, S4, S5); 4582 4583 //----------PIPELINE CLASSES--------------------------------------------------- 4584 // Pipeline Classes describe the stages in which input and output are 4585 // referenced by the hardware pipeline. 4586 4587 // Naming convention: ialu or fpu 4588 // Then: _reg 4589 // Then: _reg if there is a 2nd register 4590 // Then: _long if it's a pair of instructions implementing a long 4591 // Then: _fat if it requires the big decoder 4592 // Or: _mem if it requires the big decoder and a memory unit. 4593 4594 // Integer ALU reg operation 4595 pipe_class ialu_reg(rRegI dst) %{ 4596 single_instruction; 4597 dst : S4(write); 4598 dst : S3(read); 4599 DECODE : S0; // any decoder 4600 ALU : S3; // any alu 4601 %} 4602 4603 // Long ALU reg operation 4604 pipe_class ialu_reg_long(eRegL dst) %{ 4605 instruction_count(2); 4606 dst : S4(write); 4607 dst : S3(read); 4608 DECODE : S0(2); // any 2 decoders 4609 ALU : S3(2); // both alus 4610 %} 4611 4612 // Integer ALU reg operation using big decoder 4613 pipe_class ialu_reg_fat(rRegI dst) %{ 4614 single_instruction; 4615 dst : S4(write); 4616 dst : S3(read); 4617 D0 : S0; // big decoder only 4618 ALU : S3; // any alu 4619 %} 4620 4621 // Long ALU reg operation using big decoder 4622 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4623 instruction_count(2); 4624 dst : S4(write); 4625 dst : S3(read); 4626 D0 : S0(2); // big decoder only; twice 4627 ALU : S3(2); // any 2 alus 4628 %} 4629 4630 // Integer ALU reg-reg operation 4631 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4632 single_instruction; 4633 dst : S4(write); 4634 src : S3(read); 4635 DECODE : S0; // any decoder 4636 ALU : S3; // any alu 4637 %} 4638 4639 // Long ALU reg-reg operation 4640 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4641 instruction_count(2); 4642 dst : S4(write); 4643 src : S3(read); 4644 DECODE : S0(2); // any 2 decoders 4645 ALU : S3(2); // both alus 4646 %} 4647 4648 // Integer ALU reg-reg operation 4649 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4650 single_instruction; 4651 dst : S4(write); 4652 src : S3(read); 4653 D0 : S0; // big decoder only 4654 ALU : S3; // any alu 4655 %} 4656 4657 // Long ALU reg-reg operation 4658 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4659 instruction_count(2); 4660 dst : S4(write); 4661 src : S3(read); 4662 D0 : S0(2); // big decoder only; twice 4663 ALU : S3(2); // both alus 4664 %} 4665 4666 // Integer ALU reg-mem operation 4667 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4668 single_instruction; 4669 dst : S5(write); 4670 mem : S3(read); 4671 D0 : S0; // big decoder only 4672 ALU : S4; // any alu 4673 MEM : S3; // any mem 4674 %} 4675 4676 // Long ALU reg-mem operation 4677 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4678 instruction_count(2); 4679 dst : S5(write); 4680 mem : S3(read); 4681 D0 : S0(2); // big decoder only; twice 4682 ALU : S4(2); // any 2 alus 4683 MEM : S3(2); // both mems 4684 %} 4685 4686 // Integer mem operation (prefetch) 4687 pipe_class ialu_mem(memory mem) 4688 %{ 4689 single_instruction; 4690 mem : S3(read); 4691 D0 : S0; // big decoder only 4692 MEM : S3; // any mem 4693 %} 4694 4695 // Integer Store to Memory 4696 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4697 single_instruction; 4698 mem : S3(read); 4699 src : S5(read); 4700 D0 : S0; // big decoder only 4701 ALU : S4; // any alu 4702 MEM : S3; 4703 %} 4704 4705 // Long Store to Memory 4706 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4707 instruction_count(2); 4708 mem : S3(read); 4709 src : S5(read); 4710 D0 : S0(2); // big decoder only; twice 4711 ALU : S4(2); // any 2 alus 4712 MEM : S3(2); // Both mems 4713 %} 4714 4715 // Integer Store to Memory 4716 pipe_class ialu_mem_imm(memory mem) %{ 4717 single_instruction; 4718 mem : S3(read); 4719 D0 : S0; // big decoder only 4720 ALU : S4; // any alu 4721 MEM : S3; 4722 %} 4723 4724 // Integer ALU0 reg-reg operation 4725 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4726 single_instruction; 4727 dst : S4(write); 4728 src : S3(read); 4729 D0 : S0; // Big decoder only 4730 ALU0 : S3; // only alu0 4731 %} 4732 4733 // Integer ALU0 reg-mem operation 4734 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4735 single_instruction; 4736 dst : S5(write); 4737 mem : S3(read); 4738 D0 : S0; // big decoder only 4739 ALU0 : S4; // ALU0 only 4740 MEM : S3; // any mem 4741 %} 4742 4743 // Integer ALU reg-reg operation 4744 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4745 single_instruction; 4746 cr : S4(write); 4747 src1 : S3(read); 4748 src2 : S3(read); 4749 DECODE : S0; // any decoder 4750 ALU : S3; // any alu 4751 %} 4752 4753 // Integer ALU reg-imm operation 4754 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4755 single_instruction; 4756 cr : S4(write); 4757 src1 : S3(read); 4758 DECODE : S0; // any decoder 4759 ALU : S3; // any alu 4760 %} 4761 4762 // Integer ALU reg-mem operation 4763 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4764 single_instruction; 4765 cr : S4(write); 4766 src1 : S3(read); 4767 src2 : S3(read); 4768 D0 : S0; // big decoder only 4769 ALU : S4; // any alu 4770 MEM : S3; 4771 %} 4772 4773 // Conditional move reg-reg 4774 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4775 instruction_count(4); 4776 y : S4(read); 4777 q : S3(read); 4778 p : S3(read); 4779 DECODE : S0(4); // any decoder 4780 %} 4781 4782 // Conditional move reg-reg 4783 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4784 single_instruction; 4785 dst : S4(write); 4786 src : S3(read); 4787 cr : S3(read); 4788 DECODE : S0; // any decoder 4789 %} 4790 4791 // Conditional move reg-mem 4792 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4793 single_instruction; 4794 dst : S4(write); 4795 src : S3(read); 4796 cr : S3(read); 4797 DECODE : S0; // any decoder 4798 MEM : S3; 4799 %} 4800 4801 // Conditional move reg-reg long 4802 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4803 single_instruction; 4804 dst : S4(write); 4805 src : S3(read); 4806 cr : S3(read); 4807 DECODE : S0(2); // any 2 decoders 4808 %} 4809 4810 // Conditional move double reg-reg 4811 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4812 single_instruction; 4813 dst : S4(write); 4814 src : S3(read); 4815 cr : S3(read); 4816 DECODE : S0; // any decoder 4817 %} 4818 4819 // Float reg-reg operation 4820 pipe_class fpu_reg(regDPR dst) %{ 4821 instruction_count(2); 4822 dst : S3(read); 4823 DECODE : S0(2); // any 2 decoders 4824 FPU : S3; 4825 %} 4826 4827 // Float reg-reg operation 4828 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4829 instruction_count(2); 4830 dst : S4(write); 4831 src : S3(read); 4832 DECODE : S0(2); // any 2 decoders 4833 FPU : S3; 4834 %} 4835 4836 // Float reg-reg operation 4837 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4838 instruction_count(3); 4839 dst : S4(write); 4840 src1 : S3(read); 4841 src2 : S3(read); 4842 DECODE : S0(3); // any 3 decoders 4843 FPU : S3(2); 4844 %} 4845 4846 // Float reg-reg operation 4847 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4848 instruction_count(4); 4849 dst : S4(write); 4850 src1 : S3(read); 4851 src2 : S3(read); 4852 src3 : S3(read); 4853 DECODE : S0(4); // any 3 decoders 4854 FPU : S3(2); 4855 %} 4856 4857 // Float reg-reg operation 4858 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4859 instruction_count(4); 4860 dst : S4(write); 4861 src1 : S3(read); 4862 src2 : S3(read); 4863 src3 : S3(read); 4864 DECODE : S1(3); // any 3 decoders 4865 D0 : S0; // Big decoder only 4866 FPU : S3(2); 4867 MEM : S3; 4868 %} 4869 4870 // Float reg-mem operation 4871 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4872 instruction_count(2); 4873 dst : S5(write); 4874 mem : S3(read); 4875 D0 : S0; // big decoder only 4876 DECODE : S1; // any decoder for FPU POP 4877 FPU : S4; 4878 MEM : S3; // any mem 4879 %} 4880 4881 // Float reg-mem operation 4882 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4883 instruction_count(3); 4884 dst : S5(write); 4885 src1 : S3(read); 4886 mem : S3(read); 4887 D0 : S0; // big decoder only 4888 DECODE : S1(2); // any decoder for FPU POP 4889 FPU : S4; 4890 MEM : S3; // any mem 4891 %} 4892 4893 // Float mem-reg operation 4894 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4895 instruction_count(2); 4896 src : S5(read); 4897 mem : S3(read); 4898 DECODE : S0; // any decoder for FPU PUSH 4899 D0 : S1; // big decoder only 4900 FPU : S4; 4901 MEM : S3; // any mem 4902 %} 4903 4904 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4905 instruction_count(3); 4906 src1 : S3(read); 4907 src2 : S3(read); 4908 mem : S3(read); 4909 DECODE : S0(2); // any decoder for FPU PUSH 4910 D0 : S1; // big decoder only 4911 FPU : S4; 4912 MEM : S3; // any mem 4913 %} 4914 4915 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4916 instruction_count(3); 4917 src1 : S3(read); 4918 src2 : S3(read); 4919 mem : S4(read); 4920 DECODE : S0; // any decoder for FPU PUSH 4921 D0 : S0(2); // big decoder only 4922 FPU : S4; 4923 MEM : S3(2); // any mem 4924 %} 4925 4926 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4927 instruction_count(2); 4928 src1 : S3(read); 4929 dst : S4(read); 4930 D0 : S0(2); // big decoder only 4931 MEM : S3(2); // any mem 4932 %} 4933 4934 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4935 instruction_count(3); 4936 src1 : S3(read); 4937 src2 : S3(read); 4938 dst : S4(read); 4939 D0 : S0(3); // big decoder only 4940 FPU : S4; 4941 MEM : S3(3); // any mem 4942 %} 4943 4944 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4945 instruction_count(3); 4946 src1 : S4(read); 4947 mem : S4(read); 4948 DECODE : S0; // any decoder for FPU PUSH 4949 D0 : S0(2); // big decoder only 4950 FPU : S4; 4951 MEM : S3(2); // any mem 4952 %} 4953 4954 // Float load constant 4955 pipe_class fpu_reg_con(regDPR dst) %{ 4956 instruction_count(2); 4957 dst : S5(write); 4958 D0 : S0; // big decoder only for the load 4959 DECODE : S1; // any decoder for FPU POP 4960 FPU : S4; 4961 MEM : S3; // any mem 4962 %} 4963 4964 // Float load constant 4965 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4966 instruction_count(3); 4967 dst : S5(write); 4968 src : S3(read); 4969 D0 : S0; // big decoder only for the load 4970 DECODE : S1(2); // any decoder for FPU POP 4971 FPU : S4; 4972 MEM : S3; // any mem 4973 %} 4974 4975 // UnConditional branch 4976 pipe_class pipe_jmp( label labl ) %{ 4977 single_instruction; 4978 BR : S3; 4979 %} 4980 4981 // Conditional branch 4982 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4983 single_instruction; 4984 cr : S1(read); 4985 BR : S3; 4986 %} 4987 4988 // Allocation idiom 4989 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4990 instruction_count(1); force_serialization; 4991 fixed_latency(6); 4992 heap_ptr : S3(read); 4993 DECODE : S0(3); 4994 D0 : S2; 4995 MEM : S3; 4996 ALU : S3(2); 4997 dst : S5(write); 4998 BR : S5; 4999 %} 5000 5001 // Generic big/slow expanded idiom 5002 pipe_class pipe_slow( ) %{ 5003 instruction_count(10); multiple_bundles; force_serialization; 5004 fixed_latency(100); 5005 D0 : S0(2); 5006 MEM : S3(2); 5007 %} 5008 5009 // The real do-nothing guy 5010 pipe_class empty( ) %{ 5011 instruction_count(0); 5012 %} 5013 5014 // Define the class for the Nop node 5015 define %{ 5016 MachNop = empty; 5017 %} 5018 5019 %} 5020 5021 //----------INSTRUCTIONS------------------------------------------------------- 5022 // 5023 // match -- States which machine-independent subtree may be replaced 5024 // by this instruction. 5025 // ins_cost -- The estimated cost of this instruction is used by instruction 5026 // selection to identify a minimum cost tree of machine 5027 // instructions that matches a tree of machine-independent 5028 // instructions. 5029 // format -- A string providing the disassembly for this instruction. 5030 // The value of an instruction's operand may be inserted 5031 // by referring to it with a '$' prefix. 5032 // opcode -- Three instruction opcodes may be provided. These are referred 5033 // to within an encode class as $primary, $secondary, and $tertiary 5034 // respectively. The primary opcode is commonly used to 5035 // indicate the type of machine instruction, while secondary 5036 // and tertiary are often used for prefix options or addressing 5037 // modes. 5038 // ins_encode -- A list of encode classes with parameters. The encode class 5039 // name must have been defined in an 'enc_class' specification 5040 // in the encode section of the architecture description. 5041 5042 //----------BSWAP-Instruction-------------------------------------------------- 5043 instruct bytes_reverse_int(rRegI dst) %{ 5044 match(Set dst (ReverseBytesI dst)); 5045 5046 format %{ "BSWAP $dst" %} 5047 opcode(0x0F, 0xC8); 5048 ins_encode( OpcP, OpcSReg(dst) ); 5049 ins_pipe( ialu_reg ); 5050 %} 5051 5052 instruct bytes_reverse_long(eRegL dst) %{ 5053 match(Set dst (ReverseBytesL dst)); 5054 5055 format %{ "BSWAP $dst.lo\n\t" 5056 "BSWAP $dst.hi\n\t" 5057 "XCHG $dst.lo $dst.hi" %} 5058 5059 ins_cost(125); 5060 ins_encode( bswap_long_bytes(dst) ); 5061 ins_pipe( ialu_reg_reg); 5062 %} 5063 5064 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5065 match(Set dst (ReverseBytesUS dst)); 5066 effect(KILL cr); 5067 5068 format %{ "BSWAP $dst\n\t" 5069 "SHR $dst,16\n\t" %} 5070 ins_encode %{ 5071 __ bswapl($dst$$Register); 5072 __ shrl($dst$$Register, 16); 5073 %} 5074 ins_pipe( ialu_reg ); 5075 %} 5076 5077 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5078 match(Set dst (ReverseBytesS dst)); 5079 effect(KILL cr); 5080 5081 format %{ "BSWAP $dst\n\t" 5082 "SAR $dst,16\n\t" %} 5083 ins_encode %{ 5084 __ bswapl($dst$$Register); 5085 __ sarl($dst$$Register, 16); 5086 %} 5087 ins_pipe( ialu_reg ); 5088 %} 5089 5090 5091 //---------- Zeros Count Instructions ------------------------------------------ 5092 5093 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5094 predicate(UseCountLeadingZerosInstruction); 5095 match(Set dst (CountLeadingZerosI src)); 5096 effect(KILL cr); 5097 5098 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5099 ins_encode %{ 5100 __ lzcntl($dst$$Register, $src$$Register); 5101 %} 5102 ins_pipe(ialu_reg); 5103 %} 5104 5105 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5106 predicate(!UseCountLeadingZerosInstruction); 5107 match(Set dst (CountLeadingZerosI src)); 5108 effect(KILL cr); 5109 5110 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5111 "JNZ skip\n\t" 5112 "MOV $dst, -1\n" 5113 "skip:\n\t" 5114 "NEG $dst\n\t" 5115 "ADD $dst, 31" %} 5116 ins_encode %{ 5117 Register Rdst = $dst$$Register; 5118 Register Rsrc = $src$$Register; 5119 Label skip; 5120 __ bsrl(Rdst, Rsrc); 5121 __ jccb(Assembler::notZero, skip); 5122 __ movl(Rdst, -1); 5123 __ bind(skip); 5124 __ negl(Rdst); 5125 __ addl(Rdst, BitsPerInt - 1); 5126 %} 5127 ins_pipe(ialu_reg); 5128 %} 5129 5130 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5131 predicate(UseCountLeadingZerosInstruction); 5132 match(Set dst (CountLeadingZerosL src)); 5133 effect(TEMP dst, KILL cr); 5134 5135 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5136 "JNC done\n\t" 5137 "LZCNT $dst, $src.lo\n\t" 5138 "ADD $dst, 32\n" 5139 "done:" %} 5140 ins_encode %{ 5141 Register Rdst = $dst$$Register; 5142 Register Rsrc = $src$$Register; 5143 Label done; 5144 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5145 __ jccb(Assembler::carryClear, done); 5146 __ lzcntl(Rdst, Rsrc); 5147 __ addl(Rdst, BitsPerInt); 5148 __ bind(done); 5149 %} 5150 ins_pipe(ialu_reg); 5151 %} 5152 5153 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5154 predicate(!UseCountLeadingZerosInstruction); 5155 match(Set dst (CountLeadingZerosL src)); 5156 effect(TEMP dst, KILL cr); 5157 5158 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5159 "JZ msw_is_zero\n\t" 5160 "ADD $dst, 32\n\t" 5161 "JMP not_zero\n" 5162 "msw_is_zero:\n\t" 5163 "BSR $dst, $src.lo\n\t" 5164 "JNZ not_zero\n\t" 5165 "MOV $dst, -1\n" 5166 "not_zero:\n\t" 5167 "NEG $dst\n\t" 5168 "ADD $dst, 63\n" %} 5169 ins_encode %{ 5170 Register Rdst = $dst$$Register; 5171 Register Rsrc = $src$$Register; 5172 Label msw_is_zero; 5173 Label not_zero; 5174 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5175 __ jccb(Assembler::zero, msw_is_zero); 5176 __ addl(Rdst, BitsPerInt); 5177 __ jmpb(not_zero); 5178 __ bind(msw_is_zero); 5179 __ bsrl(Rdst, Rsrc); 5180 __ jccb(Assembler::notZero, not_zero); 5181 __ movl(Rdst, -1); 5182 __ bind(not_zero); 5183 __ negl(Rdst); 5184 __ addl(Rdst, BitsPerLong - 1); 5185 %} 5186 ins_pipe(ialu_reg); 5187 %} 5188 5189 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5190 predicate(UseCountTrailingZerosInstruction); 5191 match(Set dst (CountTrailingZerosI src)); 5192 effect(KILL cr); 5193 5194 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5195 ins_encode %{ 5196 __ tzcntl($dst$$Register, $src$$Register); 5197 %} 5198 ins_pipe(ialu_reg); 5199 %} 5200 5201 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5202 predicate(!UseCountTrailingZerosInstruction); 5203 match(Set dst (CountTrailingZerosI src)); 5204 effect(KILL cr); 5205 5206 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5207 "JNZ done\n\t" 5208 "MOV $dst, 32\n" 5209 "done:" %} 5210 ins_encode %{ 5211 Register Rdst = $dst$$Register; 5212 Label done; 5213 __ bsfl(Rdst, $src$$Register); 5214 __ jccb(Assembler::notZero, done); 5215 __ movl(Rdst, BitsPerInt); 5216 __ bind(done); 5217 %} 5218 ins_pipe(ialu_reg); 5219 %} 5220 5221 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5222 predicate(UseCountTrailingZerosInstruction); 5223 match(Set dst (CountTrailingZerosL src)); 5224 effect(TEMP dst, KILL cr); 5225 5226 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5227 "JNC done\n\t" 5228 "TZCNT $dst, $src.hi\n\t" 5229 "ADD $dst, 32\n" 5230 "done:" %} 5231 ins_encode %{ 5232 Register Rdst = $dst$$Register; 5233 Register Rsrc = $src$$Register; 5234 Label done; 5235 __ tzcntl(Rdst, Rsrc); 5236 __ jccb(Assembler::carryClear, done); 5237 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5238 __ addl(Rdst, BitsPerInt); 5239 __ bind(done); 5240 %} 5241 ins_pipe(ialu_reg); 5242 %} 5243 5244 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5245 predicate(!UseCountTrailingZerosInstruction); 5246 match(Set dst (CountTrailingZerosL src)); 5247 effect(TEMP dst, KILL cr); 5248 5249 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5250 "JNZ done\n\t" 5251 "BSF $dst, $src.hi\n\t" 5252 "JNZ msw_not_zero\n\t" 5253 "MOV $dst, 32\n" 5254 "msw_not_zero:\n\t" 5255 "ADD $dst, 32\n" 5256 "done:" %} 5257 ins_encode %{ 5258 Register Rdst = $dst$$Register; 5259 Register Rsrc = $src$$Register; 5260 Label msw_not_zero; 5261 Label done; 5262 __ bsfl(Rdst, Rsrc); 5263 __ jccb(Assembler::notZero, done); 5264 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5265 __ jccb(Assembler::notZero, msw_not_zero); 5266 __ movl(Rdst, BitsPerInt); 5267 __ bind(msw_not_zero); 5268 __ addl(Rdst, BitsPerInt); 5269 __ bind(done); 5270 %} 5271 ins_pipe(ialu_reg); 5272 %} 5273 5274 5275 //---------- Population Count Instructions ------------------------------------- 5276 5277 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5278 predicate(UsePopCountInstruction); 5279 match(Set dst (PopCountI src)); 5280 effect(KILL cr); 5281 5282 format %{ "POPCNT $dst, $src" %} 5283 ins_encode %{ 5284 __ popcntl($dst$$Register, $src$$Register); 5285 %} 5286 ins_pipe(ialu_reg); 5287 %} 5288 5289 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5290 predicate(UsePopCountInstruction); 5291 match(Set dst (PopCountI (LoadI mem))); 5292 effect(KILL cr); 5293 5294 format %{ "POPCNT $dst, $mem" %} 5295 ins_encode %{ 5296 __ popcntl($dst$$Register, $mem$$Address); 5297 %} 5298 ins_pipe(ialu_reg); 5299 %} 5300 5301 // Note: Long.bitCount(long) returns an int. 5302 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5303 predicate(UsePopCountInstruction); 5304 match(Set dst (PopCountL src)); 5305 effect(KILL cr, TEMP tmp, TEMP dst); 5306 5307 format %{ "POPCNT $dst, $src.lo\n\t" 5308 "POPCNT $tmp, $src.hi\n\t" 5309 "ADD $dst, $tmp" %} 5310 ins_encode %{ 5311 __ popcntl($dst$$Register, $src$$Register); 5312 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5313 __ addl($dst$$Register, $tmp$$Register); 5314 %} 5315 ins_pipe(ialu_reg); 5316 %} 5317 5318 // Note: Long.bitCount(long) returns an int. 5319 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5320 predicate(UsePopCountInstruction); 5321 match(Set dst (PopCountL (LoadL mem))); 5322 effect(KILL cr, TEMP tmp, TEMP dst); 5323 5324 format %{ "POPCNT $dst, $mem\n\t" 5325 "POPCNT $tmp, $mem+4\n\t" 5326 "ADD $dst, $tmp" %} 5327 ins_encode %{ 5328 //__ popcntl($dst$$Register, $mem$$Address$$first); 5329 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5330 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5331 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5332 __ addl($dst$$Register, $tmp$$Register); 5333 %} 5334 ins_pipe(ialu_reg); 5335 %} 5336 5337 5338 //----------Load/Store/Move Instructions--------------------------------------- 5339 //----------Load Instructions-------------------------------------------------- 5340 // Load Byte (8bit signed) 5341 instruct loadB(xRegI dst, memory mem) %{ 5342 match(Set dst (LoadB mem)); 5343 5344 ins_cost(125); 5345 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5346 5347 ins_encode %{ 5348 __ movsbl($dst$$Register, $mem$$Address); 5349 %} 5350 5351 ins_pipe(ialu_reg_mem); 5352 %} 5353 5354 // Load Byte (8bit signed) into Long Register 5355 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5356 match(Set dst (ConvI2L (LoadB mem))); 5357 effect(KILL cr); 5358 5359 ins_cost(375); 5360 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5361 "MOV $dst.hi,$dst.lo\n\t" 5362 "SAR $dst.hi,7" %} 5363 5364 ins_encode %{ 5365 __ movsbl($dst$$Register, $mem$$Address); 5366 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5367 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5368 %} 5369 5370 ins_pipe(ialu_reg_mem); 5371 %} 5372 5373 // Load Unsigned Byte (8bit UNsigned) 5374 instruct loadUB(xRegI dst, memory mem) %{ 5375 match(Set dst (LoadUB mem)); 5376 5377 ins_cost(125); 5378 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5379 5380 ins_encode %{ 5381 __ movzbl($dst$$Register, $mem$$Address); 5382 %} 5383 5384 ins_pipe(ialu_reg_mem); 5385 %} 5386 5387 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5388 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5389 match(Set dst (ConvI2L (LoadUB mem))); 5390 effect(KILL cr); 5391 5392 ins_cost(250); 5393 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5394 "XOR $dst.hi,$dst.hi" %} 5395 5396 ins_encode %{ 5397 Register Rdst = $dst$$Register; 5398 __ movzbl(Rdst, $mem$$Address); 5399 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5400 %} 5401 5402 ins_pipe(ialu_reg_mem); 5403 %} 5404 5405 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5406 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{ 5407 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5408 effect(KILL cr); 5409 5410 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t" 5411 "XOR $dst.hi,$dst.hi\n\t" 5412 "AND $dst.lo,$mask" %} 5413 ins_encode %{ 5414 Register Rdst = $dst$$Register; 5415 __ movzbl(Rdst, $mem$$Address); 5416 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5417 __ andl(Rdst, $mask$$constant); 5418 %} 5419 ins_pipe(ialu_reg_mem); 5420 %} 5421 5422 // Load Short (16bit signed) 5423 instruct loadS(rRegI dst, memory mem) %{ 5424 match(Set dst (LoadS mem)); 5425 5426 ins_cost(125); 5427 format %{ "MOVSX $dst,$mem\t# short" %} 5428 5429 ins_encode %{ 5430 __ movswl($dst$$Register, $mem$$Address); 5431 %} 5432 5433 ins_pipe(ialu_reg_mem); 5434 %} 5435 5436 // Load Short (16 bit signed) to Byte (8 bit signed) 5437 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5438 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5439 5440 ins_cost(125); 5441 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5442 ins_encode %{ 5443 __ movsbl($dst$$Register, $mem$$Address); 5444 %} 5445 ins_pipe(ialu_reg_mem); 5446 %} 5447 5448 // Load Short (16bit signed) into Long Register 5449 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5450 match(Set dst (ConvI2L (LoadS mem))); 5451 effect(KILL cr); 5452 5453 ins_cost(375); 5454 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5455 "MOV $dst.hi,$dst.lo\n\t" 5456 "SAR $dst.hi,15" %} 5457 5458 ins_encode %{ 5459 __ movswl($dst$$Register, $mem$$Address); 5460 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5461 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5462 %} 5463 5464 ins_pipe(ialu_reg_mem); 5465 %} 5466 5467 // Load Unsigned Short/Char (16bit unsigned) 5468 instruct loadUS(rRegI dst, memory mem) %{ 5469 match(Set dst (LoadUS mem)); 5470 5471 ins_cost(125); 5472 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5473 5474 ins_encode %{ 5475 __ movzwl($dst$$Register, $mem$$Address); 5476 %} 5477 5478 ins_pipe(ialu_reg_mem); 5479 %} 5480 5481 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5482 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5483 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5484 5485 ins_cost(125); 5486 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5487 ins_encode %{ 5488 __ movsbl($dst$$Register, $mem$$Address); 5489 %} 5490 ins_pipe(ialu_reg_mem); 5491 %} 5492 5493 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5494 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5495 match(Set dst (ConvI2L (LoadUS mem))); 5496 effect(KILL cr); 5497 5498 ins_cost(250); 5499 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5500 "XOR $dst.hi,$dst.hi" %} 5501 5502 ins_encode %{ 5503 __ movzwl($dst$$Register, $mem$$Address); 5504 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5505 %} 5506 5507 ins_pipe(ialu_reg_mem); 5508 %} 5509 5510 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5511 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5512 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5513 effect(KILL cr); 5514 5515 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5516 "XOR $dst.hi,$dst.hi" %} 5517 ins_encode %{ 5518 Register Rdst = $dst$$Register; 5519 __ movzbl(Rdst, $mem$$Address); 5520 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5521 %} 5522 ins_pipe(ialu_reg_mem); 5523 %} 5524 5525 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register 5526 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{ 5527 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5528 effect(KILL cr); 5529 5530 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t" 5531 "XOR $dst.hi,$dst.hi\n\t" 5532 "AND $dst.lo,$mask" %} 5533 ins_encode %{ 5534 Register Rdst = $dst$$Register; 5535 __ movzwl(Rdst, $mem$$Address); 5536 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5537 __ andl(Rdst, $mask$$constant); 5538 %} 5539 ins_pipe(ialu_reg_mem); 5540 %} 5541 5542 // Load Integer 5543 instruct loadI(rRegI dst, memory mem) %{ 5544 match(Set dst (LoadI mem)); 5545 5546 ins_cost(125); 5547 format %{ "MOV $dst,$mem\t# int" %} 5548 5549 ins_encode %{ 5550 __ movl($dst$$Register, $mem$$Address); 5551 %} 5552 5553 ins_pipe(ialu_reg_mem); 5554 %} 5555 5556 // Load Integer (32 bit signed) to Byte (8 bit signed) 5557 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5558 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5559 5560 ins_cost(125); 5561 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5562 ins_encode %{ 5563 __ movsbl($dst$$Register, $mem$$Address); 5564 %} 5565 ins_pipe(ialu_reg_mem); 5566 %} 5567 5568 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5569 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5570 match(Set dst (AndI (LoadI mem) mask)); 5571 5572 ins_cost(125); 5573 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5574 ins_encode %{ 5575 __ movzbl($dst$$Register, $mem$$Address); 5576 %} 5577 ins_pipe(ialu_reg_mem); 5578 %} 5579 5580 // Load Integer (32 bit signed) to Short (16 bit signed) 5581 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5582 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5583 5584 ins_cost(125); 5585 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5586 ins_encode %{ 5587 __ movswl($dst$$Register, $mem$$Address); 5588 %} 5589 ins_pipe(ialu_reg_mem); 5590 %} 5591 5592 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5593 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5594 match(Set dst (AndI (LoadI mem) mask)); 5595 5596 ins_cost(125); 5597 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5598 ins_encode %{ 5599 __ movzwl($dst$$Register, $mem$$Address); 5600 %} 5601 ins_pipe(ialu_reg_mem); 5602 %} 5603 5604 // Load Integer into Long Register 5605 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5606 match(Set dst (ConvI2L (LoadI mem))); 5607 effect(KILL cr); 5608 5609 ins_cost(375); 5610 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5611 "MOV $dst.hi,$dst.lo\n\t" 5612 "SAR $dst.hi,31" %} 5613 5614 ins_encode %{ 5615 __ movl($dst$$Register, $mem$$Address); 5616 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5617 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5618 %} 5619 5620 ins_pipe(ialu_reg_mem); 5621 %} 5622 5623 // Load Integer with mask 0xFF into Long Register 5624 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5625 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5626 effect(KILL cr); 5627 5628 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5629 "XOR $dst.hi,$dst.hi" %} 5630 ins_encode %{ 5631 Register Rdst = $dst$$Register; 5632 __ movzbl(Rdst, $mem$$Address); 5633 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5634 %} 5635 ins_pipe(ialu_reg_mem); 5636 %} 5637 5638 // Load Integer with mask 0xFFFF into Long Register 5639 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5640 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5641 effect(KILL cr); 5642 5643 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5644 "XOR $dst.hi,$dst.hi" %} 5645 ins_encode %{ 5646 Register Rdst = $dst$$Register; 5647 __ movzwl(Rdst, $mem$$Address); 5648 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5649 %} 5650 ins_pipe(ialu_reg_mem); 5651 %} 5652 5653 // Load Integer with 31-bit mask into Long Register 5654 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5655 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5656 effect(KILL cr); 5657 5658 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5659 "XOR $dst.hi,$dst.hi\n\t" 5660 "AND $dst.lo,$mask" %} 5661 ins_encode %{ 5662 Register Rdst = $dst$$Register; 5663 __ movl(Rdst, $mem$$Address); 5664 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5665 __ andl(Rdst, $mask$$constant); 5666 %} 5667 ins_pipe(ialu_reg_mem); 5668 %} 5669 5670 // Load Unsigned Integer into Long Register 5671 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5672 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5673 effect(KILL cr); 5674 5675 ins_cost(250); 5676 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5677 "XOR $dst.hi,$dst.hi" %} 5678 5679 ins_encode %{ 5680 __ movl($dst$$Register, $mem$$Address); 5681 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5682 %} 5683 5684 ins_pipe(ialu_reg_mem); 5685 %} 5686 5687 // Load Long. Cannot clobber address while loading, so restrict address 5688 // register to ESI 5689 instruct loadL(eRegL dst, load_long_memory mem) %{ 5690 predicate(!((LoadLNode*)n)->require_atomic_access()); 5691 match(Set dst (LoadL mem)); 5692 5693 ins_cost(250); 5694 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5695 "MOV $dst.hi,$mem+4" %} 5696 5697 ins_encode %{ 5698 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5699 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5700 __ movl($dst$$Register, Amemlo); 5701 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5702 %} 5703 5704 ins_pipe(ialu_reg_long_mem); 5705 %} 5706 5707 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5708 // then store it down to the stack and reload on the int 5709 // side. 5710 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5711 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5712 match(Set dst (LoadL mem)); 5713 5714 ins_cost(200); 5715 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5716 "FISTp $dst" %} 5717 ins_encode(enc_loadL_volatile(mem,dst)); 5718 ins_pipe( fpu_reg_mem ); 5719 %} 5720 5721 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5722 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5723 match(Set dst (LoadL mem)); 5724 effect(TEMP tmp); 5725 ins_cost(180); 5726 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5727 "MOVSD $dst,$tmp" %} 5728 ins_encode %{ 5729 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5730 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5731 %} 5732 ins_pipe( pipe_slow ); 5733 %} 5734 5735 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5736 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5737 match(Set dst (LoadL mem)); 5738 effect(TEMP tmp); 5739 ins_cost(160); 5740 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5741 "MOVD $dst.lo,$tmp\n\t" 5742 "PSRLQ $tmp,32\n\t" 5743 "MOVD $dst.hi,$tmp" %} 5744 ins_encode %{ 5745 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5746 __ movdl($dst$$Register, $tmp$$XMMRegister); 5747 __ psrlq($tmp$$XMMRegister, 32); 5748 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5749 %} 5750 ins_pipe( pipe_slow ); 5751 %} 5752 5753 // Load Range 5754 instruct loadRange(rRegI dst, memory mem) %{ 5755 match(Set dst (LoadRange mem)); 5756 5757 ins_cost(125); 5758 format %{ "MOV $dst,$mem" %} 5759 opcode(0x8B); 5760 ins_encode( OpcP, RegMem(dst,mem)); 5761 ins_pipe( ialu_reg_mem ); 5762 %} 5763 5764 5765 // Load Pointer 5766 instruct loadP(eRegP dst, memory mem) %{ 5767 match(Set dst (LoadP mem)); 5768 5769 ins_cost(125); 5770 format %{ "MOV $dst,$mem" %} 5771 opcode(0x8B); 5772 ins_encode( OpcP, RegMem(dst,mem)); 5773 ins_pipe( ialu_reg_mem ); 5774 %} 5775 5776 // Load Klass Pointer 5777 instruct loadKlass(eRegP dst, memory mem) %{ 5778 match(Set dst (LoadKlass mem)); 5779 5780 ins_cost(125); 5781 format %{ "MOV $dst,$mem" %} 5782 opcode(0x8B); 5783 ins_encode( OpcP, RegMem(dst,mem)); 5784 ins_pipe( ialu_reg_mem ); 5785 %} 5786 5787 // Load Double 5788 instruct loadDPR(regDPR dst, memory mem) %{ 5789 predicate(UseSSE<=1); 5790 match(Set dst (LoadD mem)); 5791 5792 ins_cost(150); 5793 format %{ "FLD_D ST,$mem\n\t" 5794 "FSTP $dst" %} 5795 opcode(0xDD); /* DD /0 */ 5796 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5797 Pop_Reg_DPR(dst) ); 5798 ins_pipe( fpu_reg_mem ); 5799 %} 5800 5801 // Load Double to XMM 5802 instruct loadD(regD dst, memory mem) %{ 5803 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5804 match(Set dst (LoadD mem)); 5805 ins_cost(145); 5806 format %{ "MOVSD $dst,$mem" %} 5807 ins_encode %{ 5808 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5809 %} 5810 ins_pipe( pipe_slow ); 5811 %} 5812 5813 instruct loadD_partial(regD dst, memory mem) %{ 5814 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5815 match(Set dst (LoadD mem)); 5816 ins_cost(145); 5817 format %{ "MOVLPD $dst,$mem" %} 5818 ins_encode %{ 5819 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5820 %} 5821 ins_pipe( pipe_slow ); 5822 %} 5823 5824 // Load to XMM register (single-precision floating point) 5825 // MOVSS instruction 5826 instruct loadF(regF dst, memory mem) %{ 5827 predicate(UseSSE>=1); 5828 match(Set dst (LoadF mem)); 5829 ins_cost(145); 5830 format %{ "MOVSS $dst,$mem" %} 5831 ins_encode %{ 5832 __ movflt ($dst$$XMMRegister, $mem$$Address); 5833 %} 5834 ins_pipe( pipe_slow ); 5835 %} 5836 5837 // Load Float 5838 instruct loadFPR(regFPR dst, memory mem) %{ 5839 predicate(UseSSE==0); 5840 match(Set dst (LoadF mem)); 5841 5842 ins_cost(150); 5843 format %{ "FLD_S ST,$mem\n\t" 5844 "FSTP $dst" %} 5845 opcode(0xD9); /* D9 /0 */ 5846 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5847 Pop_Reg_FPR(dst) ); 5848 ins_pipe( fpu_reg_mem ); 5849 %} 5850 5851 // Load Effective Address 5852 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5853 match(Set dst mem); 5854 5855 ins_cost(110); 5856 format %{ "LEA $dst,$mem" %} 5857 opcode(0x8D); 5858 ins_encode( OpcP, RegMem(dst,mem)); 5859 ins_pipe( ialu_reg_reg_fat ); 5860 %} 5861 5862 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5863 match(Set dst mem); 5864 5865 ins_cost(110); 5866 format %{ "LEA $dst,$mem" %} 5867 opcode(0x8D); 5868 ins_encode( OpcP, RegMem(dst,mem)); 5869 ins_pipe( ialu_reg_reg_fat ); 5870 %} 5871 5872 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5873 match(Set dst mem); 5874 5875 ins_cost(110); 5876 format %{ "LEA $dst,$mem" %} 5877 opcode(0x8D); 5878 ins_encode( OpcP, RegMem(dst,mem)); 5879 ins_pipe( ialu_reg_reg_fat ); 5880 %} 5881 5882 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5883 match(Set dst mem); 5884 5885 ins_cost(110); 5886 format %{ "LEA $dst,$mem" %} 5887 opcode(0x8D); 5888 ins_encode( OpcP, RegMem(dst,mem)); 5889 ins_pipe( ialu_reg_reg_fat ); 5890 %} 5891 5892 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5893 match(Set dst mem); 5894 5895 ins_cost(110); 5896 format %{ "LEA $dst,$mem" %} 5897 opcode(0x8D); 5898 ins_encode( OpcP, RegMem(dst,mem)); 5899 ins_pipe( ialu_reg_reg_fat ); 5900 %} 5901 5902 // Load Constant 5903 instruct loadConI(rRegI dst, immI src) %{ 5904 match(Set dst src); 5905 5906 format %{ "MOV $dst,$src" %} 5907 ins_encode( LdImmI(dst, src) ); 5908 ins_pipe( ialu_reg_fat ); 5909 %} 5910 5911 // Load Constant zero 5912 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5913 match(Set dst src); 5914 effect(KILL cr); 5915 5916 ins_cost(50); 5917 format %{ "XOR $dst,$dst" %} 5918 opcode(0x33); /* + rd */ 5919 ins_encode( OpcP, RegReg( dst, dst ) ); 5920 ins_pipe( ialu_reg ); 5921 %} 5922 5923 instruct loadConP(eRegP dst, immP src) %{ 5924 match(Set dst src); 5925 5926 format %{ "MOV $dst,$src" %} 5927 opcode(0xB8); /* + rd */ 5928 ins_encode( LdImmP(dst, src) ); 5929 ins_pipe( ialu_reg_fat ); 5930 %} 5931 5932 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5933 match(Set dst src); 5934 effect(KILL cr); 5935 ins_cost(200); 5936 format %{ "MOV $dst.lo,$src.lo\n\t" 5937 "MOV $dst.hi,$src.hi" %} 5938 opcode(0xB8); 5939 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5940 ins_pipe( ialu_reg_long_fat ); 5941 %} 5942 5943 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5944 match(Set dst src); 5945 effect(KILL cr); 5946 ins_cost(150); 5947 format %{ "XOR $dst.lo,$dst.lo\n\t" 5948 "XOR $dst.hi,$dst.hi" %} 5949 opcode(0x33,0x33); 5950 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5951 ins_pipe( ialu_reg_long ); 5952 %} 5953 5954 // The instruction usage is guarded by predicate in operand immFPR(). 5955 instruct loadConFPR(regFPR dst, immFPR con) %{ 5956 match(Set dst con); 5957 ins_cost(125); 5958 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5959 "FSTP $dst" %} 5960 ins_encode %{ 5961 __ fld_s($constantaddress($con)); 5962 __ fstp_d($dst$$reg); 5963 %} 5964 ins_pipe(fpu_reg_con); 5965 %} 5966 5967 // The instruction usage is guarded by predicate in operand immFPR0(). 5968 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5969 match(Set dst con); 5970 ins_cost(125); 5971 format %{ "FLDZ ST\n\t" 5972 "FSTP $dst" %} 5973 ins_encode %{ 5974 __ fldz(); 5975 __ fstp_d($dst$$reg); 5976 %} 5977 ins_pipe(fpu_reg_con); 5978 %} 5979 5980 // The instruction usage is guarded by predicate in operand immFPR1(). 5981 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 5982 match(Set dst con); 5983 ins_cost(125); 5984 format %{ "FLD1 ST\n\t" 5985 "FSTP $dst" %} 5986 ins_encode %{ 5987 __ fld1(); 5988 __ fstp_d($dst$$reg); 5989 %} 5990 ins_pipe(fpu_reg_con); 5991 %} 5992 5993 // The instruction usage is guarded by predicate in operand immF(). 5994 instruct loadConF(regF dst, immF con) %{ 5995 match(Set dst con); 5996 ins_cost(125); 5997 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 5998 ins_encode %{ 5999 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6000 %} 6001 ins_pipe(pipe_slow); 6002 %} 6003 6004 // The instruction usage is guarded by predicate in operand immF0(). 6005 instruct loadConF0(regF dst, immF0 src) %{ 6006 match(Set dst src); 6007 ins_cost(100); 6008 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6009 ins_encode %{ 6010 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6011 %} 6012 ins_pipe(pipe_slow); 6013 %} 6014 6015 // The instruction usage is guarded by predicate in operand immDPR(). 6016 instruct loadConDPR(regDPR dst, immDPR con) %{ 6017 match(Set dst con); 6018 ins_cost(125); 6019 6020 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6021 "FSTP $dst" %} 6022 ins_encode %{ 6023 __ fld_d($constantaddress($con)); 6024 __ fstp_d($dst$$reg); 6025 %} 6026 ins_pipe(fpu_reg_con); 6027 %} 6028 6029 // The instruction usage is guarded by predicate in operand immDPR0(). 6030 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6031 match(Set dst con); 6032 ins_cost(125); 6033 6034 format %{ "FLDZ ST\n\t" 6035 "FSTP $dst" %} 6036 ins_encode %{ 6037 __ fldz(); 6038 __ fstp_d($dst$$reg); 6039 %} 6040 ins_pipe(fpu_reg_con); 6041 %} 6042 6043 // The instruction usage is guarded by predicate in operand immDPR1(). 6044 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6045 match(Set dst con); 6046 ins_cost(125); 6047 6048 format %{ "FLD1 ST\n\t" 6049 "FSTP $dst" %} 6050 ins_encode %{ 6051 __ fld1(); 6052 __ fstp_d($dst$$reg); 6053 %} 6054 ins_pipe(fpu_reg_con); 6055 %} 6056 6057 // The instruction usage is guarded by predicate in operand immD(). 6058 instruct loadConD(regD dst, immD con) %{ 6059 match(Set dst con); 6060 ins_cost(125); 6061 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6062 ins_encode %{ 6063 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6064 %} 6065 ins_pipe(pipe_slow); 6066 %} 6067 6068 // The instruction usage is guarded by predicate in operand immD0(). 6069 instruct loadConD0(regD dst, immD0 src) %{ 6070 match(Set dst src); 6071 ins_cost(100); 6072 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6073 ins_encode %{ 6074 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6075 %} 6076 ins_pipe( pipe_slow ); 6077 %} 6078 6079 // Load Stack Slot 6080 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6081 match(Set dst src); 6082 ins_cost(125); 6083 6084 format %{ "MOV $dst,$src" %} 6085 opcode(0x8B); 6086 ins_encode( OpcP, RegMem(dst,src)); 6087 ins_pipe( ialu_reg_mem ); 6088 %} 6089 6090 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6091 match(Set dst src); 6092 6093 ins_cost(200); 6094 format %{ "MOV $dst,$src.lo\n\t" 6095 "MOV $dst+4,$src.hi" %} 6096 opcode(0x8B, 0x8B); 6097 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6098 ins_pipe( ialu_mem_long_reg ); 6099 %} 6100 6101 // Load Stack Slot 6102 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6103 match(Set dst src); 6104 ins_cost(125); 6105 6106 format %{ "MOV $dst,$src" %} 6107 opcode(0x8B); 6108 ins_encode( OpcP, RegMem(dst,src)); 6109 ins_pipe( ialu_reg_mem ); 6110 %} 6111 6112 // Load Stack Slot 6113 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6114 match(Set dst src); 6115 ins_cost(125); 6116 6117 format %{ "FLD_S $src\n\t" 6118 "FSTP $dst" %} 6119 opcode(0xD9); /* D9 /0, FLD m32real */ 6120 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6121 Pop_Reg_FPR(dst) ); 6122 ins_pipe( fpu_reg_mem ); 6123 %} 6124 6125 // Load Stack Slot 6126 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6127 match(Set dst src); 6128 ins_cost(125); 6129 6130 format %{ "FLD_D $src\n\t" 6131 "FSTP $dst" %} 6132 opcode(0xDD); /* DD /0, FLD m64real */ 6133 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6134 Pop_Reg_DPR(dst) ); 6135 ins_pipe( fpu_reg_mem ); 6136 %} 6137 6138 // Prefetch instructions. 6139 // Must be safe to execute with invalid address (cannot fault). 6140 6141 instruct prefetchr0( memory mem ) %{ 6142 predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch()); 6143 match(PrefetchRead mem); 6144 ins_cost(0); 6145 size(0); 6146 format %{ "PREFETCHR (non-SSE is empty encoding)" %} 6147 ins_encode(); 6148 ins_pipe(empty); 6149 %} 6150 6151 instruct prefetchr( memory mem ) %{ 6152 predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3); 6153 match(PrefetchRead mem); 6154 ins_cost(100); 6155 6156 format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %} 6157 ins_encode %{ 6158 __ prefetchr($mem$$Address); 6159 %} 6160 ins_pipe(ialu_mem); 6161 %} 6162 6163 instruct prefetchrNTA( memory mem ) %{ 6164 predicate(UseSSE>=1 && ReadPrefetchInstr==0); 6165 match(PrefetchRead mem); 6166 ins_cost(100); 6167 6168 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %} 6169 ins_encode %{ 6170 __ prefetchnta($mem$$Address); 6171 %} 6172 ins_pipe(ialu_mem); 6173 %} 6174 6175 instruct prefetchrT0( memory mem ) %{ 6176 predicate(UseSSE>=1 && ReadPrefetchInstr==1); 6177 match(PrefetchRead mem); 6178 ins_cost(100); 6179 6180 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %} 6181 ins_encode %{ 6182 __ prefetcht0($mem$$Address); 6183 %} 6184 ins_pipe(ialu_mem); 6185 %} 6186 6187 instruct prefetchrT2( memory mem ) %{ 6188 predicate(UseSSE>=1 && ReadPrefetchInstr==2); 6189 match(PrefetchRead mem); 6190 ins_cost(100); 6191 6192 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %} 6193 ins_encode %{ 6194 __ prefetcht2($mem$$Address); 6195 %} 6196 ins_pipe(ialu_mem); 6197 %} 6198 6199 instruct prefetchw0( memory mem ) %{ 6200 predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch()); 6201 match(PrefetchWrite mem); 6202 ins_cost(0); 6203 size(0); 6204 format %{ "Prefetch (non-SSE is empty encoding)" %} 6205 ins_encode(); 6206 ins_pipe(empty); 6207 %} 6208 6209 instruct prefetchw( memory mem ) %{ 6210 predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch()); 6211 match( PrefetchWrite mem ); 6212 ins_cost(100); 6213 6214 format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %} 6215 ins_encode %{ 6216 __ prefetchw($mem$$Address); 6217 %} 6218 ins_pipe(ialu_mem); 6219 %} 6220 6221 instruct prefetchwNTA( memory mem ) %{ 6222 predicate(UseSSE>=1); 6223 match(PrefetchWrite mem); 6224 ins_cost(100); 6225 6226 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %} 6227 ins_encode %{ 6228 __ prefetchnta($mem$$Address); 6229 %} 6230 ins_pipe(ialu_mem); 6231 %} 6232 6233 // Prefetch instructions for allocation. 6234 6235 instruct prefetchAlloc0( memory mem ) %{ 6236 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6237 match(PrefetchAllocation mem); 6238 ins_cost(0); 6239 size(0); 6240 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6241 ins_encode(); 6242 ins_pipe(empty); 6243 %} 6244 6245 instruct prefetchAlloc( memory mem ) %{ 6246 predicate(AllocatePrefetchInstr==3); 6247 match( PrefetchAllocation mem ); 6248 ins_cost(100); 6249 6250 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6251 ins_encode %{ 6252 __ prefetchw($mem$$Address); 6253 %} 6254 ins_pipe(ialu_mem); 6255 %} 6256 6257 instruct prefetchAllocNTA( memory mem ) %{ 6258 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6259 match(PrefetchAllocation mem); 6260 ins_cost(100); 6261 6262 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6263 ins_encode %{ 6264 __ prefetchnta($mem$$Address); 6265 %} 6266 ins_pipe(ialu_mem); 6267 %} 6268 6269 instruct prefetchAllocT0( memory mem ) %{ 6270 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6271 match(PrefetchAllocation mem); 6272 ins_cost(100); 6273 6274 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6275 ins_encode %{ 6276 __ prefetcht0($mem$$Address); 6277 %} 6278 ins_pipe(ialu_mem); 6279 %} 6280 6281 instruct prefetchAllocT2( memory mem ) %{ 6282 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6283 match(PrefetchAllocation mem); 6284 ins_cost(100); 6285 6286 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6287 ins_encode %{ 6288 __ prefetcht2($mem$$Address); 6289 %} 6290 ins_pipe(ialu_mem); 6291 %} 6292 6293 //----------Store Instructions------------------------------------------------- 6294 6295 // Store Byte 6296 instruct storeB(memory mem, xRegI src) %{ 6297 match(Set mem (StoreB mem src)); 6298 6299 ins_cost(125); 6300 format %{ "MOV8 $mem,$src" %} 6301 opcode(0x88); 6302 ins_encode( OpcP, RegMem( src, mem ) ); 6303 ins_pipe( ialu_mem_reg ); 6304 %} 6305 6306 // Store Char/Short 6307 instruct storeC(memory mem, rRegI src) %{ 6308 match(Set mem (StoreC mem src)); 6309 6310 ins_cost(125); 6311 format %{ "MOV16 $mem,$src" %} 6312 opcode(0x89, 0x66); 6313 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6314 ins_pipe( ialu_mem_reg ); 6315 %} 6316 6317 // Store Integer 6318 instruct storeI(memory mem, rRegI src) %{ 6319 match(Set mem (StoreI mem src)); 6320 6321 ins_cost(125); 6322 format %{ "MOV $mem,$src" %} 6323 opcode(0x89); 6324 ins_encode( OpcP, RegMem( src, mem ) ); 6325 ins_pipe( ialu_mem_reg ); 6326 %} 6327 6328 // Store Long 6329 instruct storeL(long_memory mem, eRegL src) %{ 6330 predicate(!((StoreLNode*)n)->require_atomic_access()); 6331 match(Set mem (StoreL mem src)); 6332 6333 ins_cost(200); 6334 format %{ "MOV $mem,$src.lo\n\t" 6335 "MOV $mem+4,$src.hi" %} 6336 opcode(0x89, 0x89); 6337 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6338 ins_pipe( ialu_mem_long_reg ); 6339 %} 6340 6341 // Store Long to Integer 6342 instruct storeL2I(memory mem, eRegL src) %{ 6343 match(Set mem (StoreI mem (ConvL2I src))); 6344 6345 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6346 ins_encode %{ 6347 __ movl($mem$$Address, $src$$Register); 6348 %} 6349 ins_pipe(ialu_mem_reg); 6350 %} 6351 6352 // Volatile Store Long. Must be atomic, so move it into 6353 // the FP TOS and then do a 64-bit FIST. Has to probe the 6354 // target address before the store (for null-ptr checks) 6355 // so the memory operand is used twice in the encoding. 6356 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6357 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6358 match(Set mem (StoreL mem src)); 6359 effect( KILL cr ); 6360 ins_cost(400); 6361 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6362 "FILD $src\n\t" 6363 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6364 opcode(0x3B); 6365 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6366 ins_pipe( fpu_reg_mem ); 6367 %} 6368 6369 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6370 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6371 match(Set mem (StoreL mem src)); 6372 effect( TEMP tmp, KILL cr ); 6373 ins_cost(380); 6374 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6375 "MOVSD $tmp,$src\n\t" 6376 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6377 ins_encode %{ 6378 __ cmpl(rax, $mem$$Address); 6379 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6380 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6381 %} 6382 ins_pipe( pipe_slow ); 6383 %} 6384 6385 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6386 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6387 match(Set mem (StoreL mem src)); 6388 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6389 ins_cost(360); 6390 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6391 "MOVD $tmp,$src.lo\n\t" 6392 "MOVD $tmp2,$src.hi\n\t" 6393 "PUNPCKLDQ $tmp,$tmp2\n\t" 6394 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6395 ins_encode %{ 6396 __ cmpl(rax, $mem$$Address); 6397 __ movdl($tmp$$XMMRegister, $src$$Register); 6398 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6399 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6400 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6401 %} 6402 ins_pipe( pipe_slow ); 6403 %} 6404 6405 // Store Pointer; for storing unknown oops and raw pointers 6406 instruct storeP(memory mem, anyRegP src) %{ 6407 match(Set mem (StoreP mem src)); 6408 6409 ins_cost(125); 6410 format %{ "MOV $mem,$src" %} 6411 opcode(0x89); 6412 ins_encode( OpcP, RegMem( src, mem ) ); 6413 ins_pipe( ialu_mem_reg ); 6414 %} 6415 6416 // Store Integer Immediate 6417 instruct storeImmI(memory mem, immI src) %{ 6418 match(Set mem (StoreI mem src)); 6419 6420 ins_cost(150); 6421 format %{ "MOV $mem,$src" %} 6422 opcode(0xC7); /* C7 /0 */ 6423 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6424 ins_pipe( ialu_mem_imm ); 6425 %} 6426 6427 // Store Short/Char Immediate 6428 instruct storeImmI16(memory mem, immI16 src) %{ 6429 predicate(UseStoreImmI16); 6430 match(Set mem (StoreC mem src)); 6431 6432 ins_cost(150); 6433 format %{ "MOV16 $mem,$src" %} 6434 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6435 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6436 ins_pipe( ialu_mem_imm ); 6437 %} 6438 6439 // Store Pointer Immediate; null pointers or constant oops that do not 6440 // need card-mark barriers. 6441 instruct storeImmP(memory mem, immP src) %{ 6442 match(Set mem (StoreP mem src)); 6443 6444 ins_cost(150); 6445 format %{ "MOV $mem,$src" %} 6446 opcode(0xC7); /* C7 /0 */ 6447 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6448 ins_pipe( ialu_mem_imm ); 6449 %} 6450 6451 // Store Byte Immediate 6452 instruct storeImmB(memory mem, immI8 src) %{ 6453 match(Set mem (StoreB mem src)); 6454 6455 ins_cost(150); 6456 format %{ "MOV8 $mem,$src" %} 6457 opcode(0xC6); /* C6 /0 */ 6458 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6459 ins_pipe( ialu_mem_imm ); 6460 %} 6461 6462 // Store CMS card-mark Immediate 6463 instruct storeImmCM(memory mem, immI8 src) %{ 6464 match(Set mem (StoreCM mem src)); 6465 6466 ins_cost(150); 6467 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6468 opcode(0xC6); /* C6 /0 */ 6469 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6470 ins_pipe( ialu_mem_imm ); 6471 %} 6472 6473 // Store Double 6474 instruct storeDPR( memory mem, regDPR1 src) %{ 6475 predicate(UseSSE<=1); 6476 match(Set mem (StoreD mem src)); 6477 6478 ins_cost(100); 6479 format %{ "FST_D $mem,$src" %} 6480 opcode(0xDD); /* DD /2 */ 6481 ins_encode( enc_FPR_store(mem,src) ); 6482 ins_pipe( fpu_mem_reg ); 6483 %} 6484 6485 // Store double does rounding on x86 6486 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6487 predicate(UseSSE<=1); 6488 match(Set mem (StoreD mem (RoundDouble src))); 6489 6490 ins_cost(100); 6491 format %{ "FST_D $mem,$src\t# round" %} 6492 opcode(0xDD); /* DD /2 */ 6493 ins_encode( enc_FPR_store(mem,src) ); 6494 ins_pipe( fpu_mem_reg ); 6495 %} 6496 6497 // Store XMM register to memory (double-precision floating points) 6498 // MOVSD instruction 6499 instruct storeD(memory mem, regD src) %{ 6500 predicate(UseSSE>=2); 6501 match(Set mem (StoreD mem src)); 6502 ins_cost(95); 6503 format %{ "MOVSD $mem,$src" %} 6504 ins_encode %{ 6505 __ movdbl($mem$$Address, $src$$XMMRegister); 6506 %} 6507 ins_pipe( pipe_slow ); 6508 %} 6509 6510 // Store XMM register to memory (single-precision floating point) 6511 // MOVSS instruction 6512 instruct storeF(memory mem, regF src) %{ 6513 predicate(UseSSE>=1); 6514 match(Set mem (StoreF mem src)); 6515 ins_cost(95); 6516 format %{ "MOVSS $mem,$src" %} 6517 ins_encode %{ 6518 __ movflt($mem$$Address, $src$$XMMRegister); 6519 %} 6520 ins_pipe( pipe_slow ); 6521 %} 6522 6523 // Store Float 6524 instruct storeFPR( memory mem, regFPR1 src) %{ 6525 predicate(UseSSE==0); 6526 match(Set mem (StoreF mem src)); 6527 6528 ins_cost(100); 6529 format %{ "FST_S $mem,$src" %} 6530 opcode(0xD9); /* D9 /2 */ 6531 ins_encode( enc_FPR_store(mem,src) ); 6532 ins_pipe( fpu_mem_reg ); 6533 %} 6534 6535 // Store Float does rounding on x86 6536 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6537 predicate(UseSSE==0); 6538 match(Set mem (StoreF mem (RoundFloat src))); 6539 6540 ins_cost(100); 6541 format %{ "FST_S $mem,$src\t# round" %} 6542 opcode(0xD9); /* D9 /2 */ 6543 ins_encode( enc_FPR_store(mem,src) ); 6544 ins_pipe( fpu_mem_reg ); 6545 %} 6546 6547 // Store Float does rounding on x86 6548 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6549 predicate(UseSSE<=1); 6550 match(Set mem (StoreF mem (ConvD2F src))); 6551 6552 ins_cost(100); 6553 format %{ "FST_S $mem,$src\t# D-round" %} 6554 opcode(0xD9); /* D9 /2 */ 6555 ins_encode( enc_FPR_store(mem,src) ); 6556 ins_pipe( fpu_mem_reg ); 6557 %} 6558 6559 // Store immediate Float value (it is faster than store from FPU register) 6560 // The instruction usage is guarded by predicate in operand immFPR(). 6561 instruct storeFPR_imm( memory mem, immFPR src) %{ 6562 match(Set mem (StoreF mem src)); 6563 6564 ins_cost(50); 6565 format %{ "MOV $mem,$src\t# store float" %} 6566 opcode(0xC7); /* C7 /0 */ 6567 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6568 ins_pipe( ialu_mem_imm ); 6569 %} 6570 6571 // Store immediate Float value (it is faster than store from XMM register) 6572 // The instruction usage is guarded by predicate in operand immF(). 6573 instruct storeF_imm( memory mem, immF src) %{ 6574 match(Set mem (StoreF mem src)); 6575 6576 ins_cost(50); 6577 format %{ "MOV $mem,$src\t# store float" %} 6578 opcode(0xC7); /* C7 /0 */ 6579 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6580 ins_pipe( ialu_mem_imm ); 6581 %} 6582 6583 // Store Integer to stack slot 6584 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6585 match(Set dst src); 6586 6587 ins_cost(100); 6588 format %{ "MOV $dst,$src" %} 6589 opcode(0x89); 6590 ins_encode( OpcPRegSS( dst, src ) ); 6591 ins_pipe( ialu_mem_reg ); 6592 %} 6593 6594 // Store Integer to stack slot 6595 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6596 match(Set dst src); 6597 6598 ins_cost(100); 6599 format %{ "MOV $dst,$src" %} 6600 opcode(0x89); 6601 ins_encode( OpcPRegSS( dst, src ) ); 6602 ins_pipe( ialu_mem_reg ); 6603 %} 6604 6605 // Store Long to stack slot 6606 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6607 match(Set dst src); 6608 6609 ins_cost(200); 6610 format %{ "MOV $dst,$src.lo\n\t" 6611 "MOV $dst+4,$src.hi" %} 6612 opcode(0x89, 0x89); 6613 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6614 ins_pipe( ialu_mem_long_reg ); 6615 %} 6616 6617 //----------MemBar Instructions----------------------------------------------- 6618 // Memory barrier flavors 6619 6620 instruct membar_acquire() %{ 6621 match(MemBarAcquire); 6622 match(LoadFence); 6623 ins_cost(400); 6624 6625 size(0); 6626 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6627 ins_encode(); 6628 ins_pipe(empty); 6629 %} 6630 6631 instruct membar_acquire_lock() %{ 6632 match(MemBarAcquireLock); 6633 ins_cost(0); 6634 6635 size(0); 6636 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6637 ins_encode( ); 6638 ins_pipe(empty); 6639 %} 6640 6641 instruct membar_release() %{ 6642 match(MemBarRelease); 6643 match(StoreFence); 6644 ins_cost(400); 6645 6646 size(0); 6647 format %{ "MEMBAR-release ! (empty encoding)" %} 6648 ins_encode( ); 6649 ins_pipe(empty); 6650 %} 6651 6652 instruct membar_release_lock() %{ 6653 match(MemBarReleaseLock); 6654 ins_cost(0); 6655 6656 size(0); 6657 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6658 ins_encode( ); 6659 ins_pipe(empty); 6660 %} 6661 6662 instruct membar_volatile(eFlagsReg cr) %{ 6663 match(MemBarVolatile); 6664 effect(KILL cr); 6665 ins_cost(400); 6666 6667 format %{ 6668 $$template 6669 if (os::is_MP()) { 6670 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6671 } else { 6672 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6673 } 6674 %} 6675 ins_encode %{ 6676 __ membar(Assembler::StoreLoad); 6677 %} 6678 ins_pipe(pipe_slow); 6679 %} 6680 6681 instruct unnecessary_membar_volatile() %{ 6682 match(MemBarVolatile); 6683 predicate(Matcher::post_store_load_barrier(n)); 6684 ins_cost(0); 6685 6686 size(0); 6687 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6688 ins_encode( ); 6689 ins_pipe(empty); 6690 %} 6691 6692 instruct membar_storestore() %{ 6693 match(MemBarStoreStore); 6694 ins_cost(0); 6695 6696 size(0); 6697 format %{ "MEMBAR-storestore (empty encoding)" %} 6698 ins_encode( ); 6699 ins_pipe(empty); 6700 %} 6701 6702 //----------Move Instructions-------------------------------------------------- 6703 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6704 match(Set dst (CastX2P src)); 6705 format %{ "# X2P $dst, $src" %} 6706 ins_encode( /*empty encoding*/ ); 6707 ins_cost(0); 6708 ins_pipe(empty); 6709 %} 6710 6711 instruct castP2X(rRegI dst, eRegP src ) %{ 6712 match(Set dst (CastP2X src)); 6713 ins_cost(50); 6714 format %{ "MOV $dst, $src\t# CastP2X" %} 6715 ins_encode( enc_Copy( dst, src) ); 6716 ins_pipe( ialu_reg_reg ); 6717 %} 6718 6719 //----------Conditional Move--------------------------------------------------- 6720 // Conditional move 6721 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6722 predicate(!VM_Version::supports_cmov() ); 6723 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6724 ins_cost(200); 6725 format %{ "J$cop,us skip\t# signed cmove\n\t" 6726 "MOV $dst,$src\n" 6727 "skip:" %} 6728 ins_encode %{ 6729 Label Lskip; 6730 // Invert sense of branch from sense of CMOV 6731 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6732 __ movl($dst$$Register, $src$$Register); 6733 __ bind(Lskip); 6734 %} 6735 ins_pipe( pipe_cmov_reg ); 6736 %} 6737 6738 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6739 predicate(!VM_Version::supports_cmov() ); 6740 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6741 ins_cost(200); 6742 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6743 "MOV $dst,$src\n" 6744 "skip:" %} 6745 ins_encode %{ 6746 Label Lskip; 6747 // Invert sense of branch from sense of CMOV 6748 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6749 __ movl($dst$$Register, $src$$Register); 6750 __ bind(Lskip); 6751 %} 6752 ins_pipe( pipe_cmov_reg ); 6753 %} 6754 6755 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6756 predicate(VM_Version::supports_cmov() ); 6757 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6758 ins_cost(200); 6759 format %{ "CMOV$cop $dst,$src" %} 6760 opcode(0x0F,0x40); 6761 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6762 ins_pipe( pipe_cmov_reg ); 6763 %} 6764 6765 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6766 predicate(VM_Version::supports_cmov() ); 6767 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6768 ins_cost(200); 6769 format %{ "CMOV$cop $dst,$src" %} 6770 opcode(0x0F,0x40); 6771 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6772 ins_pipe( pipe_cmov_reg ); 6773 %} 6774 6775 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6776 predicate(VM_Version::supports_cmov() ); 6777 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6778 ins_cost(200); 6779 expand %{ 6780 cmovI_regU(cop, cr, dst, src); 6781 %} 6782 %} 6783 6784 // Conditional move 6785 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6786 predicate(VM_Version::supports_cmov() ); 6787 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6788 ins_cost(250); 6789 format %{ "CMOV$cop $dst,$src" %} 6790 opcode(0x0F,0x40); 6791 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6792 ins_pipe( pipe_cmov_mem ); 6793 %} 6794 6795 // Conditional move 6796 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6797 predicate(VM_Version::supports_cmov() ); 6798 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6799 ins_cost(250); 6800 format %{ "CMOV$cop $dst,$src" %} 6801 opcode(0x0F,0x40); 6802 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6803 ins_pipe( pipe_cmov_mem ); 6804 %} 6805 6806 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6807 predicate(VM_Version::supports_cmov() ); 6808 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6809 ins_cost(250); 6810 expand %{ 6811 cmovI_memU(cop, cr, dst, src); 6812 %} 6813 %} 6814 6815 // Conditional move 6816 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6817 predicate(VM_Version::supports_cmov() ); 6818 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6819 ins_cost(200); 6820 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6821 opcode(0x0F,0x40); 6822 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6823 ins_pipe( pipe_cmov_reg ); 6824 %} 6825 6826 // Conditional move (non-P6 version) 6827 // Note: a CMoveP is generated for stubs and native wrappers 6828 // regardless of whether we are on a P6, so we 6829 // emulate a cmov here 6830 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6831 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6832 ins_cost(300); 6833 format %{ "Jn$cop skip\n\t" 6834 "MOV $dst,$src\t# pointer\n" 6835 "skip:" %} 6836 opcode(0x8b); 6837 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6838 ins_pipe( pipe_cmov_reg ); 6839 %} 6840 6841 // Conditional move 6842 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6843 predicate(VM_Version::supports_cmov() ); 6844 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6845 ins_cost(200); 6846 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6847 opcode(0x0F,0x40); 6848 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6849 ins_pipe( pipe_cmov_reg ); 6850 %} 6851 6852 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6853 predicate(VM_Version::supports_cmov() ); 6854 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6855 ins_cost(200); 6856 expand %{ 6857 cmovP_regU(cop, cr, dst, src); 6858 %} 6859 %} 6860 6861 // DISABLED: Requires the ADLC to emit a bottom_type call that 6862 // correctly meets the two pointer arguments; one is an incoming 6863 // register but the other is a memory operand. ALSO appears to 6864 // be buggy with implicit null checks. 6865 // 6866 //// Conditional move 6867 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6868 // predicate(VM_Version::supports_cmov() ); 6869 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6870 // ins_cost(250); 6871 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6872 // opcode(0x0F,0x40); 6873 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6874 // ins_pipe( pipe_cmov_mem ); 6875 //%} 6876 // 6877 //// Conditional move 6878 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6879 // predicate(VM_Version::supports_cmov() ); 6880 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6881 // ins_cost(250); 6882 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6883 // opcode(0x0F,0x40); 6884 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6885 // ins_pipe( pipe_cmov_mem ); 6886 //%} 6887 6888 // Conditional move 6889 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6890 predicate(UseSSE<=1); 6891 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6892 ins_cost(200); 6893 format %{ "FCMOV$cop $dst,$src\t# double" %} 6894 opcode(0xDA); 6895 ins_encode( enc_cmov_dpr(cop,src) ); 6896 ins_pipe( pipe_cmovDPR_reg ); 6897 %} 6898 6899 // Conditional move 6900 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6901 predicate(UseSSE==0); 6902 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6903 ins_cost(200); 6904 format %{ "FCMOV$cop $dst,$src\t# float" %} 6905 opcode(0xDA); 6906 ins_encode( enc_cmov_dpr(cop,src) ); 6907 ins_pipe( pipe_cmovDPR_reg ); 6908 %} 6909 6910 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6911 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6912 predicate(UseSSE<=1); 6913 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6914 ins_cost(200); 6915 format %{ "Jn$cop skip\n\t" 6916 "MOV $dst,$src\t# double\n" 6917 "skip:" %} 6918 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6919 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6920 ins_pipe( pipe_cmovDPR_reg ); 6921 %} 6922 6923 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6924 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6925 predicate(UseSSE==0); 6926 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6927 ins_cost(200); 6928 format %{ "Jn$cop skip\n\t" 6929 "MOV $dst,$src\t# float\n" 6930 "skip:" %} 6931 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6932 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6933 ins_pipe( pipe_cmovDPR_reg ); 6934 %} 6935 6936 // No CMOVE with SSE/SSE2 6937 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6938 predicate (UseSSE>=1); 6939 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6940 ins_cost(200); 6941 format %{ "Jn$cop skip\n\t" 6942 "MOVSS $dst,$src\t# float\n" 6943 "skip:" %} 6944 ins_encode %{ 6945 Label skip; 6946 // Invert sense of branch from sense of CMOV 6947 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6948 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6949 __ bind(skip); 6950 %} 6951 ins_pipe( pipe_slow ); 6952 %} 6953 6954 // No CMOVE with SSE/SSE2 6955 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6956 predicate (UseSSE>=2); 6957 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6958 ins_cost(200); 6959 format %{ "Jn$cop skip\n\t" 6960 "MOVSD $dst,$src\t# float\n" 6961 "skip:" %} 6962 ins_encode %{ 6963 Label skip; 6964 // Invert sense of branch from sense of CMOV 6965 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6966 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6967 __ bind(skip); 6968 %} 6969 ins_pipe( pipe_slow ); 6970 %} 6971 6972 // unsigned version 6973 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6974 predicate (UseSSE>=1); 6975 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6976 ins_cost(200); 6977 format %{ "Jn$cop skip\n\t" 6978 "MOVSS $dst,$src\t# float\n" 6979 "skip:" %} 6980 ins_encode %{ 6981 Label skip; 6982 // Invert sense of branch from sense of CMOV 6983 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6984 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6985 __ bind(skip); 6986 %} 6987 ins_pipe( pipe_slow ); 6988 %} 6989 6990 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6991 predicate (UseSSE>=1); 6992 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6993 ins_cost(200); 6994 expand %{ 6995 fcmovF_regU(cop, cr, dst, src); 6996 %} 6997 %} 6998 6999 // unsigned version 7000 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7001 predicate (UseSSE>=2); 7002 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7003 ins_cost(200); 7004 format %{ "Jn$cop skip\n\t" 7005 "MOVSD $dst,$src\t# float\n" 7006 "skip:" %} 7007 ins_encode %{ 7008 Label skip; 7009 // Invert sense of branch from sense of CMOV 7010 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7011 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7012 __ bind(skip); 7013 %} 7014 ins_pipe( pipe_slow ); 7015 %} 7016 7017 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7018 predicate (UseSSE>=2); 7019 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7020 ins_cost(200); 7021 expand %{ 7022 fcmovD_regU(cop, cr, dst, src); 7023 %} 7024 %} 7025 7026 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7027 predicate(VM_Version::supports_cmov() ); 7028 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7029 ins_cost(200); 7030 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7031 "CMOV$cop $dst.hi,$src.hi" %} 7032 opcode(0x0F,0x40); 7033 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7034 ins_pipe( pipe_cmov_reg_long ); 7035 %} 7036 7037 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7038 predicate(VM_Version::supports_cmov() ); 7039 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7040 ins_cost(200); 7041 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7042 "CMOV$cop $dst.hi,$src.hi" %} 7043 opcode(0x0F,0x40); 7044 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7045 ins_pipe( pipe_cmov_reg_long ); 7046 %} 7047 7048 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7049 predicate(VM_Version::supports_cmov() ); 7050 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7051 ins_cost(200); 7052 expand %{ 7053 cmovL_regU(cop, cr, dst, src); 7054 %} 7055 %} 7056 7057 //----------Arithmetic Instructions-------------------------------------------- 7058 //----------Addition Instructions---------------------------------------------- 7059 7060 // Integer Addition Instructions 7061 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7062 match(Set dst (AddI dst src)); 7063 effect(KILL cr); 7064 7065 size(2); 7066 format %{ "ADD $dst,$src" %} 7067 opcode(0x03); 7068 ins_encode( OpcP, RegReg( dst, src) ); 7069 ins_pipe( ialu_reg_reg ); 7070 %} 7071 7072 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7073 match(Set dst (AddI dst src)); 7074 effect(KILL cr); 7075 7076 format %{ "ADD $dst,$src" %} 7077 opcode(0x81, 0x00); /* /0 id */ 7078 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7079 ins_pipe( ialu_reg ); 7080 %} 7081 7082 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7083 predicate(UseIncDec); 7084 match(Set dst (AddI dst src)); 7085 effect(KILL cr); 7086 7087 size(1); 7088 format %{ "INC $dst" %} 7089 opcode(0x40); /* */ 7090 ins_encode( Opc_plus( primary, dst ) ); 7091 ins_pipe( ialu_reg ); 7092 %} 7093 7094 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7095 match(Set dst (AddI src0 src1)); 7096 ins_cost(110); 7097 7098 format %{ "LEA $dst,[$src0 + $src1]" %} 7099 opcode(0x8D); /* 0x8D /r */ 7100 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7101 ins_pipe( ialu_reg_reg ); 7102 %} 7103 7104 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7105 match(Set dst (AddP src0 src1)); 7106 ins_cost(110); 7107 7108 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7109 opcode(0x8D); /* 0x8D /r */ 7110 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7111 ins_pipe( ialu_reg_reg ); 7112 %} 7113 7114 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7115 predicate(UseIncDec); 7116 match(Set dst (AddI dst src)); 7117 effect(KILL cr); 7118 7119 size(1); 7120 format %{ "DEC $dst" %} 7121 opcode(0x48); /* */ 7122 ins_encode( Opc_plus( primary, dst ) ); 7123 ins_pipe( ialu_reg ); 7124 %} 7125 7126 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7127 match(Set dst (AddP dst src)); 7128 effect(KILL cr); 7129 7130 size(2); 7131 format %{ "ADD $dst,$src" %} 7132 opcode(0x03); 7133 ins_encode( OpcP, RegReg( dst, src) ); 7134 ins_pipe( ialu_reg_reg ); 7135 %} 7136 7137 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7138 match(Set dst (AddP dst src)); 7139 effect(KILL cr); 7140 7141 format %{ "ADD $dst,$src" %} 7142 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7143 // ins_encode( RegImm( dst, src) ); 7144 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7145 ins_pipe( ialu_reg ); 7146 %} 7147 7148 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7149 match(Set dst (AddI dst (LoadI src))); 7150 effect(KILL cr); 7151 7152 ins_cost(125); 7153 format %{ "ADD $dst,$src" %} 7154 opcode(0x03); 7155 ins_encode( OpcP, RegMem( dst, src) ); 7156 ins_pipe( ialu_reg_mem ); 7157 %} 7158 7159 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7160 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7161 effect(KILL cr); 7162 7163 ins_cost(150); 7164 format %{ "ADD $dst,$src" %} 7165 opcode(0x01); /* Opcode 01 /r */ 7166 ins_encode( OpcP, RegMem( src, dst ) ); 7167 ins_pipe( ialu_mem_reg ); 7168 %} 7169 7170 // Add Memory with Immediate 7171 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7172 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7173 effect(KILL cr); 7174 7175 ins_cost(125); 7176 format %{ "ADD $dst,$src" %} 7177 opcode(0x81); /* Opcode 81 /0 id */ 7178 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7179 ins_pipe( ialu_mem_imm ); 7180 %} 7181 7182 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7183 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7184 effect(KILL cr); 7185 7186 ins_cost(125); 7187 format %{ "INC $dst" %} 7188 opcode(0xFF); /* Opcode FF /0 */ 7189 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7190 ins_pipe( ialu_mem_imm ); 7191 %} 7192 7193 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7194 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7195 effect(KILL cr); 7196 7197 ins_cost(125); 7198 format %{ "DEC $dst" %} 7199 opcode(0xFF); /* Opcode FF /1 */ 7200 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7201 ins_pipe( ialu_mem_imm ); 7202 %} 7203 7204 7205 instruct checkCastPP( eRegP dst ) %{ 7206 match(Set dst (CheckCastPP dst)); 7207 7208 size(0); 7209 format %{ "#checkcastPP of $dst" %} 7210 ins_encode( /*empty encoding*/ ); 7211 ins_pipe( empty ); 7212 %} 7213 7214 instruct castPP( eRegP dst ) %{ 7215 match(Set dst (CastPP dst)); 7216 format %{ "#castPP of $dst" %} 7217 ins_encode( /*empty encoding*/ ); 7218 ins_pipe( empty ); 7219 %} 7220 7221 instruct castII( rRegI dst ) %{ 7222 match(Set dst (CastII dst)); 7223 format %{ "#castII of $dst" %} 7224 ins_encode( /*empty encoding*/ ); 7225 ins_cost(0); 7226 ins_pipe( empty ); 7227 %} 7228 7229 7230 // Load-locked - same as a regular pointer load when used with compare-swap 7231 instruct loadPLocked(eRegP dst, memory mem) %{ 7232 match(Set dst (LoadPLocked mem)); 7233 7234 ins_cost(125); 7235 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7236 opcode(0x8B); 7237 ins_encode( OpcP, RegMem(dst,mem)); 7238 ins_pipe( ialu_reg_mem ); 7239 %} 7240 7241 // Conditional-store of the updated heap-top. 7242 // Used during allocation of the shared heap. 7243 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7244 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7245 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7246 // EAX is killed if there is contention, but then it's also unused. 7247 // In the common case of no contention, EAX holds the new oop address. 7248 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7249 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7250 ins_pipe( pipe_cmpxchg ); 7251 %} 7252 7253 // Conditional-store of an int value. 7254 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7255 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7256 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7257 effect(KILL oldval); 7258 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7259 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7260 ins_pipe( pipe_cmpxchg ); 7261 %} 7262 7263 // Conditional-store of a long value. 7264 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7265 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7266 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7267 effect(KILL oldval); 7268 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7269 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7270 "XCHG EBX,ECX" 7271 %} 7272 ins_encode %{ 7273 // Note: we need to swap rbx, and rcx before and after the 7274 // cmpxchg8 instruction because the instruction uses 7275 // rcx as the high order word of the new value to store but 7276 // our register encoding uses rbx. 7277 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7278 if( os::is_MP() ) 7279 __ lock(); 7280 __ cmpxchg8($mem$$Address); 7281 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7282 %} 7283 ins_pipe( pipe_cmpxchg ); 7284 %} 7285 7286 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7287 7288 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7289 predicate(VM_Version::supports_cx8()); 7290 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7291 effect(KILL cr, KILL oldval); 7292 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7293 "MOV $res,0\n\t" 7294 "JNE,s fail\n\t" 7295 "MOV $res,1\n" 7296 "fail:" %} 7297 ins_encode( enc_cmpxchg8(mem_ptr), 7298 enc_flags_ne_to_boolean(res) ); 7299 ins_pipe( pipe_cmpxchg ); 7300 %} 7301 7302 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7303 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7304 effect(KILL cr, KILL oldval); 7305 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7306 "MOV $res,0\n\t" 7307 "JNE,s fail\n\t" 7308 "MOV $res,1\n" 7309 "fail:" %} 7310 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7311 ins_pipe( pipe_cmpxchg ); 7312 %} 7313 7314 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7315 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7316 effect(KILL cr, KILL oldval); 7317 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7318 "MOV $res,0\n\t" 7319 "JNE,s fail\n\t" 7320 "MOV $res,1\n" 7321 "fail:" %} 7322 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7323 ins_pipe( pipe_cmpxchg ); 7324 %} 7325 7326 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7327 predicate(n->as_LoadStore()->result_not_used()); 7328 match(Set dummy (GetAndAddI mem add)); 7329 effect(KILL cr); 7330 format %{ "ADDL [$mem],$add" %} 7331 ins_encode %{ 7332 if (os::is_MP()) { __ lock(); } 7333 __ addl($mem$$Address, $add$$constant); 7334 %} 7335 ins_pipe( pipe_cmpxchg ); 7336 %} 7337 7338 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7339 match(Set newval (GetAndAddI mem newval)); 7340 effect(KILL cr); 7341 format %{ "XADDL [$mem],$newval" %} 7342 ins_encode %{ 7343 if (os::is_MP()) { __ lock(); } 7344 __ xaddl($mem$$Address, $newval$$Register); 7345 %} 7346 ins_pipe( pipe_cmpxchg ); 7347 %} 7348 7349 instruct xchgI( memory mem, rRegI newval) %{ 7350 match(Set newval (GetAndSetI mem newval)); 7351 format %{ "XCHGL $newval,[$mem]" %} 7352 ins_encode %{ 7353 __ xchgl($newval$$Register, $mem$$Address); 7354 %} 7355 ins_pipe( pipe_cmpxchg ); 7356 %} 7357 7358 instruct xchgP( memory mem, pRegP newval) %{ 7359 match(Set newval (GetAndSetP mem newval)); 7360 format %{ "XCHGL $newval,[$mem]" %} 7361 ins_encode %{ 7362 __ xchgl($newval$$Register, $mem$$Address); 7363 %} 7364 ins_pipe( pipe_cmpxchg ); 7365 %} 7366 7367 //----------Subtraction Instructions------------------------------------------- 7368 7369 // Integer Subtraction Instructions 7370 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7371 match(Set dst (SubI dst src)); 7372 effect(KILL cr); 7373 7374 size(2); 7375 format %{ "SUB $dst,$src" %} 7376 opcode(0x2B); 7377 ins_encode( OpcP, RegReg( dst, src) ); 7378 ins_pipe( ialu_reg_reg ); 7379 %} 7380 7381 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7382 match(Set dst (SubI dst src)); 7383 effect(KILL cr); 7384 7385 format %{ "SUB $dst,$src" %} 7386 opcode(0x81,0x05); /* Opcode 81 /5 */ 7387 // ins_encode( RegImm( dst, src) ); 7388 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7389 ins_pipe( ialu_reg ); 7390 %} 7391 7392 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7393 match(Set dst (SubI dst (LoadI src))); 7394 effect(KILL cr); 7395 7396 ins_cost(125); 7397 format %{ "SUB $dst,$src" %} 7398 opcode(0x2B); 7399 ins_encode( OpcP, RegMem( dst, src) ); 7400 ins_pipe( ialu_reg_mem ); 7401 %} 7402 7403 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7404 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7405 effect(KILL cr); 7406 7407 ins_cost(150); 7408 format %{ "SUB $dst,$src" %} 7409 opcode(0x29); /* Opcode 29 /r */ 7410 ins_encode( OpcP, RegMem( src, dst ) ); 7411 ins_pipe( ialu_mem_reg ); 7412 %} 7413 7414 // Subtract from a pointer 7415 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7416 match(Set dst (AddP dst (SubI zero src))); 7417 effect(KILL cr); 7418 7419 size(2); 7420 format %{ "SUB $dst,$src" %} 7421 opcode(0x2B); 7422 ins_encode( OpcP, RegReg( dst, src) ); 7423 ins_pipe( ialu_reg_reg ); 7424 %} 7425 7426 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7427 match(Set dst (SubI zero dst)); 7428 effect(KILL cr); 7429 7430 size(2); 7431 format %{ "NEG $dst" %} 7432 opcode(0xF7,0x03); // Opcode F7 /3 7433 ins_encode( OpcP, RegOpc( dst ) ); 7434 ins_pipe( ialu_reg ); 7435 %} 7436 7437 //----------Multiplication/Division Instructions------------------------------- 7438 // Integer Multiplication Instructions 7439 // Multiply Register 7440 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7441 match(Set dst (MulI dst src)); 7442 effect(KILL cr); 7443 7444 size(3); 7445 ins_cost(300); 7446 format %{ "IMUL $dst,$src" %} 7447 opcode(0xAF, 0x0F); 7448 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7449 ins_pipe( ialu_reg_reg_alu0 ); 7450 %} 7451 7452 // Multiply 32-bit Immediate 7453 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7454 match(Set dst (MulI src imm)); 7455 effect(KILL cr); 7456 7457 ins_cost(300); 7458 format %{ "IMUL $dst,$src,$imm" %} 7459 opcode(0x69); /* 69 /r id */ 7460 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7461 ins_pipe( ialu_reg_reg_alu0 ); 7462 %} 7463 7464 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7465 match(Set dst src); 7466 effect(KILL cr); 7467 7468 // Note that this is artificially increased to make it more expensive than loadConL 7469 ins_cost(250); 7470 format %{ "MOV EAX,$src\t// low word only" %} 7471 opcode(0xB8); 7472 ins_encode( LdImmL_Lo(dst, src) ); 7473 ins_pipe( ialu_reg_fat ); 7474 %} 7475 7476 // Multiply by 32-bit Immediate, taking the shifted high order results 7477 // (special case for shift by 32) 7478 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7479 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7480 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7481 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7482 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7483 effect(USE src1, KILL cr); 7484 7485 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7486 ins_cost(0*100 + 1*400 - 150); 7487 format %{ "IMUL EDX:EAX,$src1" %} 7488 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7489 ins_pipe( pipe_slow ); 7490 %} 7491 7492 // Multiply by 32-bit Immediate, taking the shifted high order results 7493 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7494 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7495 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7496 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7497 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7498 effect(USE src1, KILL cr); 7499 7500 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7501 ins_cost(1*100 + 1*400 - 150); 7502 format %{ "IMUL EDX:EAX,$src1\n\t" 7503 "SAR EDX,$cnt-32" %} 7504 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7505 ins_pipe( pipe_slow ); 7506 %} 7507 7508 // Multiply Memory 32-bit Immediate 7509 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7510 match(Set dst (MulI (LoadI src) imm)); 7511 effect(KILL cr); 7512 7513 ins_cost(300); 7514 format %{ "IMUL $dst,$src,$imm" %} 7515 opcode(0x69); /* 69 /r id */ 7516 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7517 ins_pipe( ialu_reg_mem_alu0 ); 7518 %} 7519 7520 // Multiply Memory 7521 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7522 match(Set dst (MulI dst (LoadI src))); 7523 effect(KILL cr); 7524 7525 ins_cost(350); 7526 format %{ "IMUL $dst,$src" %} 7527 opcode(0xAF, 0x0F); 7528 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7529 ins_pipe( ialu_reg_mem_alu0 ); 7530 %} 7531 7532 // Multiply Register Int to Long 7533 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7534 // Basic Idea: long = (long)int * (long)int 7535 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7536 effect(DEF dst, USE src, USE src1, KILL flags); 7537 7538 ins_cost(300); 7539 format %{ "IMUL $dst,$src1" %} 7540 7541 ins_encode( long_int_multiply( dst, src1 ) ); 7542 ins_pipe( ialu_reg_reg_alu0 ); 7543 %} 7544 7545 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7546 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7547 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7548 effect(KILL flags); 7549 7550 ins_cost(300); 7551 format %{ "MUL $dst,$src1" %} 7552 7553 ins_encode( long_uint_multiply(dst, src1) ); 7554 ins_pipe( ialu_reg_reg_alu0 ); 7555 %} 7556 7557 // Multiply Register Long 7558 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7559 match(Set dst (MulL dst src)); 7560 effect(KILL cr, TEMP tmp); 7561 ins_cost(4*100+3*400); 7562 // Basic idea: lo(result) = lo(x_lo * y_lo) 7563 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7564 format %{ "MOV $tmp,$src.lo\n\t" 7565 "IMUL $tmp,EDX\n\t" 7566 "MOV EDX,$src.hi\n\t" 7567 "IMUL EDX,EAX\n\t" 7568 "ADD $tmp,EDX\n\t" 7569 "MUL EDX:EAX,$src.lo\n\t" 7570 "ADD EDX,$tmp" %} 7571 ins_encode( long_multiply( dst, src, tmp ) ); 7572 ins_pipe( pipe_slow ); 7573 %} 7574 7575 // Multiply Register Long where the left operand's high 32 bits are zero 7576 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7577 predicate(is_operand_hi32_zero(n->in(1))); 7578 match(Set dst (MulL dst src)); 7579 effect(KILL cr, TEMP tmp); 7580 ins_cost(2*100+2*400); 7581 // Basic idea: lo(result) = lo(x_lo * y_lo) 7582 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7583 format %{ "MOV $tmp,$src.hi\n\t" 7584 "IMUL $tmp,EAX\n\t" 7585 "MUL EDX:EAX,$src.lo\n\t" 7586 "ADD EDX,$tmp" %} 7587 ins_encode %{ 7588 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7589 __ imull($tmp$$Register, rax); 7590 __ mull($src$$Register); 7591 __ addl(rdx, $tmp$$Register); 7592 %} 7593 ins_pipe( pipe_slow ); 7594 %} 7595 7596 // Multiply Register Long where the right operand's high 32 bits are zero 7597 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7598 predicate(is_operand_hi32_zero(n->in(2))); 7599 match(Set dst (MulL dst src)); 7600 effect(KILL cr, TEMP tmp); 7601 ins_cost(2*100+2*400); 7602 // Basic idea: lo(result) = lo(x_lo * y_lo) 7603 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7604 format %{ "MOV $tmp,$src.lo\n\t" 7605 "IMUL $tmp,EDX\n\t" 7606 "MUL EDX:EAX,$src.lo\n\t" 7607 "ADD EDX,$tmp" %} 7608 ins_encode %{ 7609 __ movl($tmp$$Register, $src$$Register); 7610 __ imull($tmp$$Register, rdx); 7611 __ mull($src$$Register); 7612 __ addl(rdx, $tmp$$Register); 7613 %} 7614 ins_pipe( pipe_slow ); 7615 %} 7616 7617 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7618 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7619 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7620 match(Set dst (MulL dst src)); 7621 effect(KILL cr); 7622 ins_cost(1*400); 7623 // Basic idea: lo(result) = lo(x_lo * y_lo) 7624 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7625 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7626 ins_encode %{ 7627 __ mull($src$$Register); 7628 %} 7629 ins_pipe( pipe_slow ); 7630 %} 7631 7632 // Multiply Register Long by small constant 7633 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7634 match(Set dst (MulL dst src)); 7635 effect(KILL cr, TEMP tmp); 7636 ins_cost(2*100+2*400); 7637 size(12); 7638 // Basic idea: lo(result) = lo(src * EAX) 7639 // hi(result) = hi(src * EAX) + lo(src * EDX) 7640 format %{ "IMUL $tmp,EDX,$src\n\t" 7641 "MOV EDX,$src\n\t" 7642 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7643 "ADD EDX,$tmp" %} 7644 ins_encode( long_multiply_con( dst, src, tmp ) ); 7645 ins_pipe( pipe_slow ); 7646 %} 7647 7648 // Integer DIV with Register 7649 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7650 match(Set rax (DivI rax div)); 7651 effect(KILL rdx, KILL cr); 7652 size(26); 7653 ins_cost(30*100+10*100); 7654 format %{ "CMP EAX,0x80000000\n\t" 7655 "JNE,s normal\n\t" 7656 "XOR EDX,EDX\n\t" 7657 "CMP ECX,-1\n\t" 7658 "JE,s done\n" 7659 "normal: CDQ\n\t" 7660 "IDIV $div\n\t" 7661 "done:" %} 7662 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7663 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7664 ins_pipe( ialu_reg_reg_alu0 ); 7665 %} 7666 7667 // Divide Register Long 7668 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7669 match(Set dst (DivL src1 src2)); 7670 effect( KILL cr, KILL cx, KILL bx ); 7671 ins_cost(10000); 7672 format %{ "PUSH $src1.hi\n\t" 7673 "PUSH $src1.lo\n\t" 7674 "PUSH $src2.hi\n\t" 7675 "PUSH $src2.lo\n\t" 7676 "CALL SharedRuntime::ldiv\n\t" 7677 "ADD ESP,16" %} 7678 ins_encode( long_div(src1,src2) ); 7679 ins_pipe( pipe_slow ); 7680 %} 7681 7682 // Integer DIVMOD with Register, both quotient and mod results 7683 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7684 match(DivModI rax div); 7685 effect(KILL cr); 7686 size(26); 7687 ins_cost(30*100+10*100); 7688 format %{ "CMP EAX,0x80000000\n\t" 7689 "JNE,s normal\n\t" 7690 "XOR EDX,EDX\n\t" 7691 "CMP ECX,-1\n\t" 7692 "JE,s done\n" 7693 "normal: CDQ\n\t" 7694 "IDIV $div\n\t" 7695 "done:" %} 7696 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7697 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7698 ins_pipe( pipe_slow ); 7699 %} 7700 7701 // Integer MOD with Register 7702 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7703 match(Set rdx (ModI rax div)); 7704 effect(KILL rax, KILL cr); 7705 7706 size(26); 7707 ins_cost(300); 7708 format %{ "CDQ\n\t" 7709 "IDIV $div" %} 7710 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7711 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7712 ins_pipe( ialu_reg_reg_alu0 ); 7713 %} 7714 7715 // Remainder Register Long 7716 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7717 match(Set dst (ModL src1 src2)); 7718 effect( KILL cr, KILL cx, KILL bx ); 7719 ins_cost(10000); 7720 format %{ "PUSH $src1.hi\n\t" 7721 "PUSH $src1.lo\n\t" 7722 "PUSH $src2.hi\n\t" 7723 "PUSH $src2.lo\n\t" 7724 "CALL SharedRuntime::lrem\n\t" 7725 "ADD ESP,16" %} 7726 ins_encode( long_mod(src1,src2) ); 7727 ins_pipe( pipe_slow ); 7728 %} 7729 7730 // Divide Register Long (no special case since divisor != -1) 7731 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7732 match(Set dst (DivL dst imm)); 7733 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7734 ins_cost(1000); 7735 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7736 "XOR $tmp2,$tmp2\n\t" 7737 "CMP $tmp,EDX\n\t" 7738 "JA,s fast\n\t" 7739 "MOV $tmp2,EAX\n\t" 7740 "MOV EAX,EDX\n\t" 7741 "MOV EDX,0\n\t" 7742 "JLE,s pos\n\t" 7743 "LNEG EAX : $tmp2\n\t" 7744 "DIV $tmp # unsigned division\n\t" 7745 "XCHG EAX,$tmp2\n\t" 7746 "DIV $tmp\n\t" 7747 "LNEG $tmp2 : EAX\n\t" 7748 "JMP,s done\n" 7749 "pos:\n\t" 7750 "DIV $tmp\n\t" 7751 "XCHG EAX,$tmp2\n" 7752 "fast:\n\t" 7753 "DIV $tmp\n" 7754 "done:\n\t" 7755 "MOV EDX,$tmp2\n\t" 7756 "NEG EDX:EAX # if $imm < 0" %} 7757 ins_encode %{ 7758 int con = (int)$imm$$constant; 7759 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7760 int pcon = (con > 0) ? con : -con; 7761 Label Lfast, Lpos, Ldone; 7762 7763 __ movl($tmp$$Register, pcon); 7764 __ xorl($tmp2$$Register,$tmp2$$Register); 7765 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7766 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7767 7768 __ movl($tmp2$$Register, $dst$$Register); // save 7769 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7770 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7771 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7772 7773 // Negative dividend. 7774 // convert value to positive to use unsigned division 7775 __ lneg($dst$$Register, $tmp2$$Register); 7776 __ divl($tmp$$Register); 7777 __ xchgl($dst$$Register, $tmp2$$Register); 7778 __ divl($tmp$$Register); 7779 // revert result back to negative 7780 __ lneg($tmp2$$Register, $dst$$Register); 7781 __ jmpb(Ldone); 7782 7783 __ bind(Lpos); 7784 __ divl($tmp$$Register); // Use unsigned division 7785 __ xchgl($dst$$Register, $tmp2$$Register); 7786 // Fallthrow for final divide, tmp2 has 32 bit hi result 7787 7788 __ bind(Lfast); 7789 // fast path: src is positive 7790 __ divl($tmp$$Register); // Use unsigned division 7791 7792 __ bind(Ldone); 7793 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7794 if (con < 0) { 7795 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7796 } 7797 %} 7798 ins_pipe( pipe_slow ); 7799 %} 7800 7801 // Remainder Register Long (remainder fit into 32 bits) 7802 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7803 match(Set dst (ModL dst imm)); 7804 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7805 ins_cost(1000); 7806 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7807 "CMP $tmp,EDX\n\t" 7808 "JA,s fast\n\t" 7809 "MOV $tmp2,EAX\n\t" 7810 "MOV EAX,EDX\n\t" 7811 "MOV EDX,0\n\t" 7812 "JLE,s pos\n\t" 7813 "LNEG EAX : $tmp2\n\t" 7814 "DIV $tmp # unsigned division\n\t" 7815 "MOV EAX,$tmp2\n\t" 7816 "DIV $tmp\n\t" 7817 "NEG EDX\n\t" 7818 "JMP,s done\n" 7819 "pos:\n\t" 7820 "DIV $tmp\n\t" 7821 "MOV EAX,$tmp2\n" 7822 "fast:\n\t" 7823 "DIV $tmp\n" 7824 "done:\n\t" 7825 "MOV EAX,EDX\n\t" 7826 "SAR EDX,31\n\t" %} 7827 ins_encode %{ 7828 int con = (int)$imm$$constant; 7829 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7830 int pcon = (con > 0) ? con : -con; 7831 Label Lfast, Lpos, Ldone; 7832 7833 __ movl($tmp$$Register, pcon); 7834 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7835 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7836 7837 __ movl($tmp2$$Register, $dst$$Register); // save 7838 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7839 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7840 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7841 7842 // Negative dividend. 7843 // convert value to positive to use unsigned division 7844 __ lneg($dst$$Register, $tmp2$$Register); 7845 __ divl($tmp$$Register); 7846 __ movl($dst$$Register, $tmp2$$Register); 7847 __ divl($tmp$$Register); 7848 // revert remainder back to negative 7849 __ negl(HIGH_FROM_LOW($dst$$Register)); 7850 __ jmpb(Ldone); 7851 7852 __ bind(Lpos); 7853 __ divl($tmp$$Register); 7854 __ movl($dst$$Register, $tmp2$$Register); 7855 7856 __ bind(Lfast); 7857 // fast path: src is positive 7858 __ divl($tmp$$Register); 7859 7860 __ bind(Ldone); 7861 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7862 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7863 7864 %} 7865 ins_pipe( pipe_slow ); 7866 %} 7867 7868 // Integer Shift Instructions 7869 // Shift Left by one 7870 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7871 match(Set dst (LShiftI dst shift)); 7872 effect(KILL cr); 7873 7874 size(2); 7875 format %{ "SHL $dst,$shift" %} 7876 opcode(0xD1, 0x4); /* D1 /4 */ 7877 ins_encode( OpcP, RegOpc( dst ) ); 7878 ins_pipe( ialu_reg ); 7879 %} 7880 7881 // Shift Left by 8-bit immediate 7882 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7883 match(Set dst (LShiftI dst shift)); 7884 effect(KILL cr); 7885 7886 size(3); 7887 format %{ "SHL $dst,$shift" %} 7888 opcode(0xC1, 0x4); /* C1 /4 ib */ 7889 ins_encode( RegOpcImm( dst, shift) ); 7890 ins_pipe( ialu_reg ); 7891 %} 7892 7893 // Shift Left by variable 7894 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7895 match(Set dst (LShiftI dst shift)); 7896 effect(KILL cr); 7897 7898 size(2); 7899 format %{ "SHL $dst,$shift" %} 7900 opcode(0xD3, 0x4); /* D3 /4 */ 7901 ins_encode( OpcP, RegOpc( dst ) ); 7902 ins_pipe( ialu_reg_reg ); 7903 %} 7904 7905 // Arithmetic shift right by one 7906 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7907 match(Set dst (RShiftI dst shift)); 7908 effect(KILL cr); 7909 7910 size(2); 7911 format %{ "SAR $dst,$shift" %} 7912 opcode(0xD1, 0x7); /* D1 /7 */ 7913 ins_encode( OpcP, RegOpc( dst ) ); 7914 ins_pipe( ialu_reg ); 7915 %} 7916 7917 // Arithmetic shift right by one 7918 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7919 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7920 effect(KILL cr); 7921 format %{ "SAR $dst,$shift" %} 7922 opcode(0xD1, 0x7); /* D1 /7 */ 7923 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7924 ins_pipe( ialu_mem_imm ); 7925 %} 7926 7927 // Arithmetic Shift Right by 8-bit immediate 7928 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7929 match(Set dst (RShiftI dst shift)); 7930 effect(KILL cr); 7931 7932 size(3); 7933 format %{ "SAR $dst,$shift" %} 7934 opcode(0xC1, 0x7); /* C1 /7 ib */ 7935 ins_encode( RegOpcImm( dst, shift ) ); 7936 ins_pipe( ialu_mem_imm ); 7937 %} 7938 7939 // Arithmetic Shift Right by 8-bit immediate 7940 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7941 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7942 effect(KILL cr); 7943 7944 format %{ "SAR $dst,$shift" %} 7945 opcode(0xC1, 0x7); /* C1 /7 ib */ 7946 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7947 ins_pipe( ialu_mem_imm ); 7948 %} 7949 7950 // Arithmetic Shift Right by variable 7951 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7952 match(Set dst (RShiftI dst shift)); 7953 effect(KILL cr); 7954 7955 size(2); 7956 format %{ "SAR $dst,$shift" %} 7957 opcode(0xD3, 0x7); /* D3 /7 */ 7958 ins_encode( OpcP, RegOpc( dst ) ); 7959 ins_pipe( ialu_reg_reg ); 7960 %} 7961 7962 // Logical shift right by one 7963 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7964 match(Set dst (URShiftI dst shift)); 7965 effect(KILL cr); 7966 7967 size(2); 7968 format %{ "SHR $dst,$shift" %} 7969 opcode(0xD1, 0x5); /* D1 /5 */ 7970 ins_encode( OpcP, RegOpc( dst ) ); 7971 ins_pipe( ialu_reg ); 7972 %} 7973 7974 // Logical Shift Right by 8-bit immediate 7975 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7976 match(Set dst (URShiftI dst shift)); 7977 effect(KILL cr); 7978 7979 size(3); 7980 format %{ "SHR $dst,$shift" %} 7981 opcode(0xC1, 0x5); /* C1 /5 ib */ 7982 ins_encode( RegOpcImm( dst, shift) ); 7983 ins_pipe( ialu_reg ); 7984 %} 7985 7986 7987 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7988 // This idiom is used by the compiler for the i2b bytecode. 7989 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7990 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7991 7992 size(3); 7993 format %{ "MOVSX $dst,$src :8" %} 7994 ins_encode %{ 7995 __ movsbl($dst$$Register, $src$$Register); 7996 %} 7997 ins_pipe(ialu_reg_reg); 7998 %} 7999 8000 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8001 // This idiom is used by the compiler the i2s bytecode. 8002 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8003 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8004 8005 size(3); 8006 format %{ "MOVSX $dst,$src :16" %} 8007 ins_encode %{ 8008 __ movswl($dst$$Register, $src$$Register); 8009 %} 8010 ins_pipe(ialu_reg_reg); 8011 %} 8012 8013 8014 // Logical Shift Right by variable 8015 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8016 match(Set dst (URShiftI dst shift)); 8017 effect(KILL cr); 8018 8019 size(2); 8020 format %{ "SHR $dst,$shift" %} 8021 opcode(0xD3, 0x5); /* D3 /5 */ 8022 ins_encode( OpcP, RegOpc( dst ) ); 8023 ins_pipe( ialu_reg_reg ); 8024 %} 8025 8026 8027 //----------Logical Instructions----------------------------------------------- 8028 //----------Integer Logical Instructions--------------------------------------- 8029 // And Instructions 8030 // And Register with Register 8031 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8032 match(Set dst (AndI dst src)); 8033 effect(KILL cr); 8034 8035 size(2); 8036 format %{ "AND $dst,$src" %} 8037 opcode(0x23); 8038 ins_encode( OpcP, RegReg( dst, src) ); 8039 ins_pipe( ialu_reg_reg ); 8040 %} 8041 8042 // And Register with Immediate 8043 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8044 match(Set dst (AndI dst src)); 8045 effect(KILL cr); 8046 8047 format %{ "AND $dst,$src" %} 8048 opcode(0x81,0x04); /* Opcode 81 /4 */ 8049 // ins_encode( RegImm( dst, src) ); 8050 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8051 ins_pipe( ialu_reg ); 8052 %} 8053 8054 // And Register with Memory 8055 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8056 match(Set dst (AndI dst (LoadI src))); 8057 effect(KILL cr); 8058 8059 ins_cost(125); 8060 format %{ "AND $dst,$src" %} 8061 opcode(0x23); 8062 ins_encode( OpcP, RegMem( dst, src) ); 8063 ins_pipe( ialu_reg_mem ); 8064 %} 8065 8066 // And Memory with Register 8067 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8068 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8069 effect(KILL cr); 8070 8071 ins_cost(150); 8072 format %{ "AND $dst,$src" %} 8073 opcode(0x21); /* Opcode 21 /r */ 8074 ins_encode( OpcP, RegMem( src, dst ) ); 8075 ins_pipe( ialu_mem_reg ); 8076 %} 8077 8078 // And Memory with Immediate 8079 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8080 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8081 effect(KILL cr); 8082 8083 ins_cost(125); 8084 format %{ "AND $dst,$src" %} 8085 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8086 // ins_encode( MemImm( dst, src) ); 8087 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8088 ins_pipe( ialu_mem_imm ); 8089 %} 8090 8091 // BMI1 instructions 8092 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8093 match(Set dst (AndI (XorI src1 minus_1) src2)); 8094 predicate(UseBMI1Instructions); 8095 effect(KILL cr); 8096 8097 format %{ "ANDNL $dst, $src1, $src2" %} 8098 8099 ins_encode %{ 8100 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8101 %} 8102 ins_pipe(ialu_reg); 8103 %} 8104 8105 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8106 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8107 predicate(UseBMI1Instructions); 8108 effect(KILL cr); 8109 8110 ins_cost(125); 8111 format %{ "ANDNL $dst, $src1, $src2" %} 8112 8113 ins_encode %{ 8114 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8115 %} 8116 ins_pipe(ialu_reg_mem); 8117 %} 8118 8119 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8120 match(Set dst (AndI (SubI imm_zero src) src)); 8121 predicate(UseBMI1Instructions); 8122 effect(KILL cr); 8123 8124 format %{ "BLSIL $dst, $src" %} 8125 8126 ins_encode %{ 8127 __ blsil($dst$$Register, $src$$Register); 8128 %} 8129 ins_pipe(ialu_reg); 8130 %} 8131 8132 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8133 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8134 predicate(UseBMI1Instructions); 8135 effect(KILL cr); 8136 8137 ins_cost(125); 8138 format %{ "BLSIL $dst, $src" %} 8139 8140 ins_encode %{ 8141 __ blsil($dst$$Register, $src$$Address); 8142 %} 8143 ins_pipe(ialu_reg_mem); 8144 %} 8145 8146 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8147 %{ 8148 match(Set dst (XorI (AddI src minus_1) src)); 8149 predicate(UseBMI1Instructions); 8150 effect(KILL cr); 8151 8152 format %{ "BLSMSKL $dst, $src" %} 8153 8154 ins_encode %{ 8155 __ blsmskl($dst$$Register, $src$$Register); 8156 %} 8157 8158 ins_pipe(ialu_reg); 8159 %} 8160 8161 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8162 %{ 8163 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8164 predicate(UseBMI1Instructions); 8165 effect(KILL cr); 8166 8167 ins_cost(125); 8168 format %{ "BLSMSKL $dst, $src" %} 8169 8170 ins_encode %{ 8171 __ blsmskl($dst$$Register, $src$$Address); 8172 %} 8173 8174 ins_pipe(ialu_reg_mem); 8175 %} 8176 8177 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8178 %{ 8179 match(Set dst (AndI (AddI src minus_1) src) ); 8180 predicate(UseBMI1Instructions); 8181 effect(KILL cr); 8182 8183 format %{ "BLSRL $dst, $src" %} 8184 8185 ins_encode %{ 8186 __ blsrl($dst$$Register, $src$$Register); 8187 %} 8188 8189 ins_pipe(ialu_reg); 8190 %} 8191 8192 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8193 %{ 8194 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8195 predicate(UseBMI1Instructions); 8196 effect(KILL cr); 8197 8198 ins_cost(125); 8199 format %{ "BLSRL $dst, $src" %} 8200 8201 ins_encode %{ 8202 __ blsrl($dst$$Register, $src$$Address); 8203 %} 8204 8205 ins_pipe(ialu_reg_mem); 8206 %} 8207 8208 // Or Instructions 8209 // Or Register with Register 8210 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8211 match(Set dst (OrI dst src)); 8212 effect(KILL cr); 8213 8214 size(2); 8215 format %{ "OR $dst,$src" %} 8216 opcode(0x0B); 8217 ins_encode( OpcP, RegReg( dst, src) ); 8218 ins_pipe( ialu_reg_reg ); 8219 %} 8220 8221 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8222 match(Set dst (OrI dst (CastP2X src))); 8223 effect(KILL cr); 8224 8225 size(2); 8226 format %{ "OR $dst,$src" %} 8227 opcode(0x0B); 8228 ins_encode( OpcP, RegReg( dst, src) ); 8229 ins_pipe( ialu_reg_reg ); 8230 %} 8231 8232 8233 // Or Register with Immediate 8234 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8235 match(Set dst (OrI dst src)); 8236 effect(KILL cr); 8237 8238 format %{ "OR $dst,$src" %} 8239 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8240 // ins_encode( RegImm( dst, src) ); 8241 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8242 ins_pipe( ialu_reg ); 8243 %} 8244 8245 // Or Register with Memory 8246 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8247 match(Set dst (OrI dst (LoadI src))); 8248 effect(KILL cr); 8249 8250 ins_cost(125); 8251 format %{ "OR $dst,$src" %} 8252 opcode(0x0B); 8253 ins_encode( OpcP, RegMem( dst, src) ); 8254 ins_pipe( ialu_reg_mem ); 8255 %} 8256 8257 // Or Memory with Register 8258 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8259 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8260 effect(KILL cr); 8261 8262 ins_cost(150); 8263 format %{ "OR $dst,$src" %} 8264 opcode(0x09); /* Opcode 09 /r */ 8265 ins_encode( OpcP, RegMem( src, dst ) ); 8266 ins_pipe( ialu_mem_reg ); 8267 %} 8268 8269 // Or Memory with Immediate 8270 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8271 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8272 effect(KILL cr); 8273 8274 ins_cost(125); 8275 format %{ "OR $dst,$src" %} 8276 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8277 // ins_encode( MemImm( dst, src) ); 8278 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8279 ins_pipe( ialu_mem_imm ); 8280 %} 8281 8282 // ROL/ROR 8283 // ROL expand 8284 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8285 effect(USE_DEF dst, USE shift, KILL cr); 8286 8287 format %{ "ROL $dst, $shift" %} 8288 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8289 ins_encode( OpcP, RegOpc( dst )); 8290 ins_pipe( ialu_reg ); 8291 %} 8292 8293 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8294 effect(USE_DEF dst, USE shift, KILL cr); 8295 8296 format %{ "ROL $dst, $shift" %} 8297 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8298 ins_encode( RegOpcImm(dst, shift) ); 8299 ins_pipe(ialu_reg); 8300 %} 8301 8302 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8303 effect(USE_DEF dst, USE shift, KILL cr); 8304 8305 format %{ "ROL $dst, $shift" %} 8306 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8307 ins_encode(OpcP, RegOpc(dst)); 8308 ins_pipe( ialu_reg_reg ); 8309 %} 8310 // end of ROL expand 8311 8312 // ROL 32bit by one once 8313 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8314 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8315 8316 expand %{ 8317 rolI_eReg_imm1(dst, lshift, cr); 8318 %} 8319 %} 8320 8321 // ROL 32bit var by imm8 once 8322 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8323 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8324 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8325 8326 expand %{ 8327 rolI_eReg_imm8(dst, lshift, cr); 8328 %} 8329 %} 8330 8331 // ROL 32bit var by var once 8332 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8333 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8334 8335 expand %{ 8336 rolI_eReg_CL(dst, shift, cr); 8337 %} 8338 %} 8339 8340 // ROL 32bit var by var once 8341 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8342 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8343 8344 expand %{ 8345 rolI_eReg_CL(dst, shift, cr); 8346 %} 8347 %} 8348 8349 // ROR expand 8350 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8351 effect(USE_DEF dst, USE shift, KILL cr); 8352 8353 format %{ "ROR $dst, $shift" %} 8354 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8355 ins_encode( OpcP, RegOpc( dst ) ); 8356 ins_pipe( ialu_reg ); 8357 %} 8358 8359 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8360 effect (USE_DEF dst, USE shift, KILL cr); 8361 8362 format %{ "ROR $dst, $shift" %} 8363 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8364 ins_encode( RegOpcImm(dst, shift) ); 8365 ins_pipe( ialu_reg ); 8366 %} 8367 8368 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8369 effect(USE_DEF dst, USE shift, KILL cr); 8370 8371 format %{ "ROR $dst, $shift" %} 8372 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8373 ins_encode(OpcP, RegOpc(dst)); 8374 ins_pipe( ialu_reg_reg ); 8375 %} 8376 // end of ROR expand 8377 8378 // ROR right once 8379 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8380 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8381 8382 expand %{ 8383 rorI_eReg_imm1(dst, rshift, cr); 8384 %} 8385 %} 8386 8387 // ROR 32bit by immI8 once 8388 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8389 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8390 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8391 8392 expand %{ 8393 rorI_eReg_imm8(dst, rshift, cr); 8394 %} 8395 %} 8396 8397 // ROR 32bit var by var once 8398 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8399 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8400 8401 expand %{ 8402 rorI_eReg_CL(dst, shift, cr); 8403 %} 8404 %} 8405 8406 // ROR 32bit var by var once 8407 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8408 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8409 8410 expand %{ 8411 rorI_eReg_CL(dst, shift, cr); 8412 %} 8413 %} 8414 8415 // Xor Instructions 8416 // Xor Register with Register 8417 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8418 match(Set dst (XorI dst src)); 8419 effect(KILL cr); 8420 8421 size(2); 8422 format %{ "XOR $dst,$src" %} 8423 opcode(0x33); 8424 ins_encode( OpcP, RegReg( dst, src) ); 8425 ins_pipe( ialu_reg_reg ); 8426 %} 8427 8428 // Xor Register with Immediate -1 8429 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8430 match(Set dst (XorI dst imm)); 8431 8432 size(2); 8433 format %{ "NOT $dst" %} 8434 ins_encode %{ 8435 __ notl($dst$$Register); 8436 %} 8437 ins_pipe( ialu_reg ); 8438 %} 8439 8440 // Xor Register with Immediate 8441 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8442 match(Set dst (XorI dst src)); 8443 effect(KILL cr); 8444 8445 format %{ "XOR $dst,$src" %} 8446 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8447 // ins_encode( RegImm( dst, src) ); 8448 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8449 ins_pipe( ialu_reg ); 8450 %} 8451 8452 // Xor Register with Memory 8453 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8454 match(Set dst (XorI dst (LoadI src))); 8455 effect(KILL cr); 8456 8457 ins_cost(125); 8458 format %{ "XOR $dst,$src" %} 8459 opcode(0x33); 8460 ins_encode( OpcP, RegMem(dst, src) ); 8461 ins_pipe( ialu_reg_mem ); 8462 %} 8463 8464 // Xor Memory with Register 8465 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8466 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8467 effect(KILL cr); 8468 8469 ins_cost(150); 8470 format %{ "XOR $dst,$src" %} 8471 opcode(0x31); /* Opcode 31 /r */ 8472 ins_encode( OpcP, RegMem( src, dst ) ); 8473 ins_pipe( ialu_mem_reg ); 8474 %} 8475 8476 // Xor Memory with Immediate 8477 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8478 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8479 effect(KILL cr); 8480 8481 ins_cost(125); 8482 format %{ "XOR $dst,$src" %} 8483 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8484 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8485 ins_pipe( ialu_mem_imm ); 8486 %} 8487 8488 //----------Convert Int to Boolean--------------------------------------------- 8489 8490 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8491 effect( DEF dst, USE src ); 8492 format %{ "MOV $dst,$src" %} 8493 ins_encode( enc_Copy( dst, src) ); 8494 ins_pipe( ialu_reg_reg ); 8495 %} 8496 8497 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8498 effect( USE_DEF dst, USE src, KILL cr ); 8499 8500 size(4); 8501 format %{ "NEG $dst\n\t" 8502 "ADC $dst,$src" %} 8503 ins_encode( neg_reg(dst), 8504 OpcRegReg(0x13,dst,src) ); 8505 ins_pipe( ialu_reg_reg_long ); 8506 %} 8507 8508 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8509 match(Set dst (Conv2B src)); 8510 8511 expand %{ 8512 movI_nocopy(dst,src); 8513 ci2b(dst,src,cr); 8514 %} 8515 %} 8516 8517 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8518 effect( DEF dst, USE src ); 8519 format %{ "MOV $dst,$src" %} 8520 ins_encode( enc_Copy( dst, src) ); 8521 ins_pipe( ialu_reg_reg ); 8522 %} 8523 8524 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8525 effect( USE_DEF dst, USE src, KILL cr ); 8526 format %{ "NEG $dst\n\t" 8527 "ADC $dst,$src" %} 8528 ins_encode( neg_reg(dst), 8529 OpcRegReg(0x13,dst,src) ); 8530 ins_pipe( ialu_reg_reg_long ); 8531 %} 8532 8533 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8534 match(Set dst (Conv2B src)); 8535 8536 expand %{ 8537 movP_nocopy(dst,src); 8538 cp2b(dst,src,cr); 8539 %} 8540 %} 8541 8542 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8543 match(Set dst (CmpLTMask p q)); 8544 effect(KILL cr); 8545 ins_cost(400); 8546 8547 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8548 format %{ "XOR $dst,$dst\n\t" 8549 "CMP $p,$q\n\t" 8550 "SETlt $dst\n\t" 8551 "NEG $dst" %} 8552 ins_encode %{ 8553 Register Rp = $p$$Register; 8554 Register Rq = $q$$Register; 8555 Register Rd = $dst$$Register; 8556 Label done; 8557 __ xorl(Rd, Rd); 8558 __ cmpl(Rp, Rq); 8559 __ setb(Assembler::less, Rd); 8560 __ negl(Rd); 8561 %} 8562 8563 ins_pipe(pipe_slow); 8564 %} 8565 8566 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8567 match(Set dst (CmpLTMask dst zero)); 8568 effect(DEF dst, KILL cr); 8569 ins_cost(100); 8570 8571 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8572 ins_encode %{ 8573 __ sarl($dst$$Register, 31); 8574 %} 8575 ins_pipe(ialu_reg); 8576 %} 8577 8578 /* better to save a register than avoid a branch */ 8579 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8580 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8581 effect(KILL cr); 8582 ins_cost(400); 8583 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8584 "JGE done\n\t" 8585 "ADD $p,$y\n" 8586 "done: " %} 8587 ins_encode %{ 8588 Register Rp = $p$$Register; 8589 Register Rq = $q$$Register; 8590 Register Ry = $y$$Register; 8591 Label done; 8592 __ subl(Rp, Rq); 8593 __ jccb(Assembler::greaterEqual, done); 8594 __ addl(Rp, Ry); 8595 __ bind(done); 8596 %} 8597 8598 ins_pipe(pipe_cmplt); 8599 %} 8600 8601 /* better to save a register than avoid a branch */ 8602 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8603 match(Set y (AndI (CmpLTMask p q) y)); 8604 effect(KILL cr); 8605 8606 ins_cost(300); 8607 8608 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8609 "JLT done\n\t" 8610 "XORL $y, $y\n" 8611 "done: " %} 8612 ins_encode %{ 8613 Register Rp = $p$$Register; 8614 Register Rq = $q$$Register; 8615 Register Ry = $y$$Register; 8616 Label done; 8617 __ cmpl(Rp, Rq); 8618 __ jccb(Assembler::less, done); 8619 __ xorl(Ry, Ry); 8620 __ bind(done); 8621 %} 8622 8623 ins_pipe(pipe_cmplt); 8624 %} 8625 8626 /* If I enable this, I encourage spilling in the inner loop of compress. 8627 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8628 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8629 */ 8630 //----------Overflow Math Instructions----------------------------------------- 8631 8632 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8633 %{ 8634 match(Set cr (OverflowAddI op1 op2)); 8635 effect(DEF cr, USE_KILL op1, USE op2); 8636 8637 format %{ "ADD $op1, $op2\t# overflow check int" %} 8638 8639 ins_encode %{ 8640 __ addl($op1$$Register, $op2$$Register); 8641 %} 8642 ins_pipe(ialu_reg_reg); 8643 %} 8644 8645 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8646 %{ 8647 match(Set cr (OverflowAddI op1 op2)); 8648 effect(DEF cr, USE_KILL op1, USE op2); 8649 8650 format %{ "ADD $op1, $op2\t# overflow check int" %} 8651 8652 ins_encode %{ 8653 __ addl($op1$$Register, $op2$$constant); 8654 %} 8655 ins_pipe(ialu_reg_reg); 8656 %} 8657 8658 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8659 %{ 8660 match(Set cr (OverflowSubI op1 op2)); 8661 8662 format %{ "CMP $op1, $op2\t# overflow check int" %} 8663 ins_encode %{ 8664 __ cmpl($op1$$Register, $op2$$Register); 8665 %} 8666 ins_pipe(ialu_reg_reg); 8667 %} 8668 8669 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8670 %{ 8671 match(Set cr (OverflowSubI op1 op2)); 8672 8673 format %{ "CMP $op1, $op2\t# overflow check int" %} 8674 ins_encode %{ 8675 __ cmpl($op1$$Register, $op2$$constant); 8676 %} 8677 ins_pipe(ialu_reg_reg); 8678 %} 8679 8680 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8681 %{ 8682 match(Set cr (OverflowSubI zero op2)); 8683 effect(DEF cr, USE_KILL op2); 8684 8685 format %{ "NEG $op2\t# overflow check int" %} 8686 ins_encode %{ 8687 __ negl($op2$$Register); 8688 %} 8689 ins_pipe(ialu_reg_reg); 8690 %} 8691 8692 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8693 %{ 8694 match(Set cr (OverflowMulI op1 op2)); 8695 effect(DEF cr, USE_KILL op1, USE op2); 8696 8697 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8698 ins_encode %{ 8699 __ imull($op1$$Register, $op2$$Register); 8700 %} 8701 ins_pipe(ialu_reg_reg_alu0); 8702 %} 8703 8704 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8705 %{ 8706 match(Set cr (OverflowMulI op1 op2)); 8707 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8708 8709 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8710 ins_encode %{ 8711 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8712 %} 8713 ins_pipe(ialu_reg_reg_alu0); 8714 %} 8715 8716 //----------Long Instructions------------------------------------------------ 8717 // Add Long Register with Register 8718 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8719 match(Set dst (AddL dst src)); 8720 effect(KILL cr); 8721 ins_cost(200); 8722 format %{ "ADD $dst.lo,$src.lo\n\t" 8723 "ADC $dst.hi,$src.hi" %} 8724 opcode(0x03, 0x13); 8725 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8726 ins_pipe( ialu_reg_reg_long ); 8727 %} 8728 8729 // Add Long Register with Immediate 8730 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8731 match(Set dst (AddL dst src)); 8732 effect(KILL cr); 8733 format %{ "ADD $dst.lo,$src.lo\n\t" 8734 "ADC $dst.hi,$src.hi" %} 8735 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8736 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8737 ins_pipe( ialu_reg_long ); 8738 %} 8739 8740 // Add Long Register with Memory 8741 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8742 match(Set dst (AddL dst (LoadL mem))); 8743 effect(KILL cr); 8744 ins_cost(125); 8745 format %{ "ADD $dst.lo,$mem\n\t" 8746 "ADC $dst.hi,$mem+4" %} 8747 opcode(0x03, 0x13); 8748 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8749 ins_pipe( ialu_reg_long_mem ); 8750 %} 8751 8752 // Subtract Long Register with Register. 8753 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8754 match(Set dst (SubL dst src)); 8755 effect(KILL cr); 8756 ins_cost(200); 8757 format %{ "SUB $dst.lo,$src.lo\n\t" 8758 "SBB $dst.hi,$src.hi" %} 8759 opcode(0x2B, 0x1B); 8760 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8761 ins_pipe( ialu_reg_reg_long ); 8762 %} 8763 8764 // Subtract Long Register with Immediate 8765 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8766 match(Set dst (SubL dst src)); 8767 effect(KILL cr); 8768 format %{ "SUB $dst.lo,$src.lo\n\t" 8769 "SBB $dst.hi,$src.hi" %} 8770 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8771 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8772 ins_pipe( ialu_reg_long ); 8773 %} 8774 8775 // Subtract Long Register with Memory 8776 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8777 match(Set dst (SubL dst (LoadL mem))); 8778 effect(KILL cr); 8779 ins_cost(125); 8780 format %{ "SUB $dst.lo,$mem\n\t" 8781 "SBB $dst.hi,$mem+4" %} 8782 opcode(0x2B, 0x1B); 8783 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8784 ins_pipe( ialu_reg_long_mem ); 8785 %} 8786 8787 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8788 match(Set dst (SubL zero dst)); 8789 effect(KILL cr); 8790 ins_cost(300); 8791 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8792 ins_encode( neg_long(dst) ); 8793 ins_pipe( ialu_reg_reg_long ); 8794 %} 8795 8796 // And Long Register with Register 8797 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8798 match(Set dst (AndL dst src)); 8799 effect(KILL cr); 8800 format %{ "AND $dst.lo,$src.lo\n\t" 8801 "AND $dst.hi,$src.hi" %} 8802 opcode(0x23,0x23); 8803 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8804 ins_pipe( ialu_reg_reg_long ); 8805 %} 8806 8807 // And Long Register with Immediate 8808 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8809 match(Set dst (AndL dst src)); 8810 effect(KILL cr); 8811 format %{ "AND $dst.lo,$src.lo\n\t" 8812 "AND $dst.hi,$src.hi" %} 8813 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8814 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8815 ins_pipe( ialu_reg_long ); 8816 %} 8817 8818 // And Long Register with Memory 8819 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8820 match(Set dst (AndL dst (LoadL mem))); 8821 effect(KILL cr); 8822 ins_cost(125); 8823 format %{ "AND $dst.lo,$mem\n\t" 8824 "AND $dst.hi,$mem+4" %} 8825 opcode(0x23, 0x23); 8826 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8827 ins_pipe( ialu_reg_long_mem ); 8828 %} 8829 8830 // BMI1 instructions 8831 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8832 match(Set dst (AndL (XorL src1 minus_1) src2)); 8833 predicate(UseBMI1Instructions); 8834 effect(KILL cr, TEMP dst); 8835 8836 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8837 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8838 %} 8839 8840 ins_encode %{ 8841 Register Rdst = $dst$$Register; 8842 Register Rsrc1 = $src1$$Register; 8843 Register Rsrc2 = $src2$$Register; 8844 __ andnl(Rdst, Rsrc1, Rsrc2); 8845 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8846 %} 8847 ins_pipe(ialu_reg_reg_long); 8848 %} 8849 8850 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8851 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8852 predicate(UseBMI1Instructions); 8853 effect(KILL cr, TEMP dst); 8854 8855 ins_cost(125); 8856 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8857 "ANDNL $dst.hi, $src1.hi, $src2+4" 8858 %} 8859 8860 ins_encode %{ 8861 Register Rdst = $dst$$Register; 8862 Register Rsrc1 = $src1$$Register; 8863 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8864 8865 __ andnl(Rdst, Rsrc1, $src2$$Address); 8866 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8867 %} 8868 ins_pipe(ialu_reg_mem); 8869 %} 8870 8871 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8872 match(Set dst (AndL (SubL imm_zero src) src)); 8873 predicate(UseBMI1Instructions); 8874 effect(KILL cr, TEMP dst); 8875 8876 format %{ "MOVL $dst.hi, 0\n\t" 8877 "BLSIL $dst.lo, $src.lo\n\t" 8878 "JNZ done\n\t" 8879 "BLSIL $dst.hi, $src.hi\n" 8880 "done:" 8881 %} 8882 8883 ins_encode %{ 8884 Label done; 8885 Register Rdst = $dst$$Register; 8886 Register Rsrc = $src$$Register; 8887 __ movl(HIGH_FROM_LOW(Rdst), 0); 8888 __ blsil(Rdst, Rsrc); 8889 __ jccb(Assembler::notZero, done); 8890 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8891 __ bind(done); 8892 %} 8893 ins_pipe(ialu_reg); 8894 %} 8895 8896 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8897 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8898 predicate(UseBMI1Instructions); 8899 effect(KILL cr, TEMP dst); 8900 8901 ins_cost(125); 8902 format %{ "MOVL $dst.hi, 0\n\t" 8903 "BLSIL $dst.lo, $src\n\t" 8904 "JNZ done\n\t" 8905 "BLSIL $dst.hi, $src+4\n" 8906 "done:" 8907 %} 8908 8909 ins_encode %{ 8910 Label done; 8911 Register Rdst = $dst$$Register; 8912 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8913 8914 __ movl(HIGH_FROM_LOW(Rdst), 0); 8915 __ blsil(Rdst, $src$$Address); 8916 __ jccb(Assembler::notZero, done); 8917 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8918 __ bind(done); 8919 %} 8920 ins_pipe(ialu_reg_mem); 8921 %} 8922 8923 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8924 %{ 8925 match(Set dst (XorL (AddL src minus_1) src)); 8926 predicate(UseBMI1Instructions); 8927 effect(KILL cr, TEMP dst); 8928 8929 format %{ "MOVL $dst.hi, 0\n\t" 8930 "BLSMSKL $dst.lo, $src.lo\n\t" 8931 "JNC done\n\t" 8932 "BLSMSKL $dst.hi, $src.hi\n" 8933 "done:" 8934 %} 8935 8936 ins_encode %{ 8937 Label done; 8938 Register Rdst = $dst$$Register; 8939 Register Rsrc = $src$$Register; 8940 __ movl(HIGH_FROM_LOW(Rdst), 0); 8941 __ blsmskl(Rdst, Rsrc); 8942 __ jccb(Assembler::carryClear, done); 8943 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8944 __ bind(done); 8945 %} 8946 8947 ins_pipe(ialu_reg); 8948 %} 8949 8950 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8951 %{ 8952 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8953 predicate(UseBMI1Instructions); 8954 effect(KILL cr, TEMP dst); 8955 8956 ins_cost(125); 8957 format %{ "MOVL $dst.hi, 0\n\t" 8958 "BLSMSKL $dst.lo, $src\n\t" 8959 "JNC done\n\t" 8960 "BLSMSKL $dst.hi, $src+4\n" 8961 "done:" 8962 %} 8963 8964 ins_encode %{ 8965 Label done; 8966 Register Rdst = $dst$$Register; 8967 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8968 8969 __ movl(HIGH_FROM_LOW(Rdst), 0); 8970 __ blsmskl(Rdst, $src$$Address); 8971 __ jccb(Assembler::carryClear, done); 8972 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8973 __ bind(done); 8974 %} 8975 8976 ins_pipe(ialu_reg_mem); 8977 %} 8978 8979 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8980 %{ 8981 match(Set dst (AndL (AddL src minus_1) src) ); 8982 predicate(UseBMI1Instructions); 8983 effect(KILL cr, TEMP dst); 8984 8985 format %{ "MOVL $dst.hi, $src.hi\n\t" 8986 "BLSRL $dst.lo, $src.lo\n\t" 8987 "JNC done\n\t" 8988 "BLSRL $dst.hi, $src.hi\n" 8989 "done:" 8990 %} 8991 8992 ins_encode %{ 8993 Label done; 8994 Register Rdst = $dst$$Register; 8995 Register Rsrc = $src$$Register; 8996 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8997 __ blsrl(Rdst, Rsrc); 8998 __ jccb(Assembler::carryClear, done); 8999 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9000 __ bind(done); 9001 %} 9002 9003 ins_pipe(ialu_reg); 9004 %} 9005 9006 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9007 %{ 9008 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9009 predicate(UseBMI1Instructions); 9010 effect(KILL cr, TEMP dst); 9011 9012 ins_cost(125); 9013 format %{ "MOVL $dst.hi, $src+4\n\t" 9014 "BLSRL $dst.lo, $src\n\t" 9015 "JNC done\n\t" 9016 "BLSRL $dst.hi, $src+4\n" 9017 "done:" 9018 %} 9019 9020 ins_encode %{ 9021 Label done; 9022 Register Rdst = $dst$$Register; 9023 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9024 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9025 __ blsrl(Rdst, $src$$Address); 9026 __ jccb(Assembler::carryClear, done); 9027 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9028 __ bind(done); 9029 %} 9030 9031 ins_pipe(ialu_reg_mem); 9032 %} 9033 9034 // Or Long Register with Register 9035 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9036 match(Set dst (OrL dst src)); 9037 effect(KILL cr); 9038 format %{ "OR $dst.lo,$src.lo\n\t" 9039 "OR $dst.hi,$src.hi" %} 9040 opcode(0x0B,0x0B); 9041 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9042 ins_pipe( ialu_reg_reg_long ); 9043 %} 9044 9045 // Or Long Register with Immediate 9046 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9047 match(Set dst (OrL dst src)); 9048 effect(KILL cr); 9049 format %{ "OR $dst.lo,$src.lo\n\t" 9050 "OR $dst.hi,$src.hi" %} 9051 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9052 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9053 ins_pipe( ialu_reg_long ); 9054 %} 9055 9056 // Or Long Register with Memory 9057 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9058 match(Set dst (OrL dst (LoadL mem))); 9059 effect(KILL cr); 9060 ins_cost(125); 9061 format %{ "OR $dst.lo,$mem\n\t" 9062 "OR $dst.hi,$mem+4" %} 9063 opcode(0x0B,0x0B); 9064 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9065 ins_pipe( ialu_reg_long_mem ); 9066 %} 9067 9068 // Xor Long Register with Register 9069 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9070 match(Set dst (XorL dst src)); 9071 effect(KILL cr); 9072 format %{ "XOR $dst.lo,$src.lo\n\t" 9073 "XOR $dst.hi,$src.hi" %} 9074 opcode(0x33,0x33); 9075 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9076 ins_pipe( ialu_reg_reg_long ); 9077 %} 9078 9079 // Xor Long Register with Immediate -1 9080 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9081 match(Set dst (XorL dst imm)); 9082 format %{ "NOT $dst.lo\n\t" 9083 "NOT $dst.hi" %} 9084 ins_encode %{ 9085 __ notl($dst$$Register); 9086 __ notl(HIGH_FROM_LOW($dst$$Register)); 9087 %} 9088 ins_pipe( ialu_reg_long ); 9089 %} 9090 9091 // Xor Long Register with Immediate 9092 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9093 match(Set dst (XorL dst src)); 9094 effect(KILL cr); 9095 format %{ "XOR $dst.lo,$src.lo\n\t" 9096 "XOR $dst.hi,$src.hi" %} 9097 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9098 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9099 ins_pipe( ialu_reg_long ); 9100 %} 9101 9102 // Xor Long Register with Memory 9103 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9104 match(Set dst (XorL dst (LoadL mem))); 9105 effect(KILL cr); 9106 ins_cost(125); 9107 format %{ "XOR $dst.lo,$mem\n\t" 9108 "XOR $dst.hi,$mem+4" %} 9109 opcode(0x33,0x33); 9110 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9111 ins_pipe( ialu_reg_long_mem ); 9112 %} 9113 9114 // Shift Left Long by 1 9115 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9116 predicate(UseNewLongLShift); 9117 match(Set dst (LShiftL dst cnt)); 9118 effect(KILL cr); 9119 ins_cost(100); 9120 format %{ "ADD $dst.lo,$dst.lo\n\t" 9121 "ADC $dst.hi,$dst.hi" %} 9122 ins_encode %{ 9123 __ addl($dst$$Register,$dst$$Register); 9124 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9125 %} 9126 ins_pipe( ialu_reg_long ); 9127 %} 9128 9129 // Shift Left Long by 2 9130 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9131 predicate(UseNewLongLShift); 9132 match(Set dst (LShiftL dst cnt)); 9133 effect(KILL cr); 9134 ins_cost(100); 9135 format %{ "ADD $dst.lo,$dst.lo\n\t" 9136 "ADC $dst.hi,$dst.hi\n\t" 9137 "ADD $dst.lo,$dst.lo\n\t" 9138 "ADC $dst.hi,$dst.hi" %} 9139 ins_encode %{ 9140 __ addl($dst$$Register,$dst$$Register); 9141 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9142 __ addl($dst$$Register,$dst$$Register); 9143 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9144 %} 9145 ins_pipe( ialu_reg_long ); 9146 %} 9147 9148 // Shift Left Long by 3 9149 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9150 predicate(UseNewLongLShift); 9151 match(Set dst (LShiftL dst cnt)); 9152 effect(KILL cr); 9153 ins_cost(100); 9154 format %{ "ADD $dst.lo,$dst.lo\n\t" 9155 "ADC $dst.hi,$dst.hi\n\t" 9156 "ADD $dst.lo,$dst.lo\n\t" 9157 "ADC $dst.hi,$dst.hi\n\t" 9158 "ADD $dst.lo,$dst.lo\n\t" 9159 "ADC $dst.hi,$dst.hi" %} 9160 ins_encode %{ 9161 __ addl($dst$$Register,$dst$$Register); 9162 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9163 __ addl($dst$$Register,$dst$$Register); 9164 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9165 __ addl($dst$$Register,$dst$$Register); 9166 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9167 %} 9168 ins_pipe( ialu_reg_long ); 9169 %} 9170 9171 // Shift Left Long by 1-31 9172 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9173 match(Set dst (LShiftL dst cnt)); 9174 effect(KILL cr); 9175 ins_cost(200); 9176 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9177 "SHL $dst.lo,$cnt" %} 9178 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9179 ins_encode( move_long_small_shift(dst,cnt) ); 9180 ins_pipe( ialu_reg_long ); 9181 %} 9182 9183 // Shift Left Long by 32-63 9184 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9185 match(Set dst (LShiftL dst cnt)); 9186 effect(KILL cr); 9187 ins_cost(300); 9188 format %{ "MOV $dst.hi,$dst.lo\n" 9189 "\tSHL $dst.hi,$cnt-32\n" 9190 "\tXOR $dst.lo,$dst.lo" %} 9191 opcode(0xC1, 0x4); /* C1 /4 ib */ 9192 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9193 ins_pipe( ialu_reg_long ); 9194 %} 9195 9196 // Shift Left Long by variable 9197 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9198 match(Set dst (LShiftL dst shift)); 9199 effect(KILL cr); 9200 ins_cost(500+200); 9201 size(17); 9202 format %{ "TEST $shift,32\n\t" 9203 "JEQ,s small\n\t" 9204 "MOV $dst.hi,$dst.lo\n\t" 9205 "XOR $dst.lo,$dst.lo\n" 9206 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9207 "SHL $dst.lo,$shift" %} 9208 ins_encode( shift_left_long( dst, shift ) ); 9209 ins_pipe( pipe_slow ); 9210 %} 9211 9212 // Shift Right Long by 1-31 9213 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9214 match(Set dst (URShiftL dst cnt)); 9215 effect(KILL cr); 9216 ins_cost(200); 9217 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9218 "SHR $dst.hi,$cnt" %} 9219 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9220 ins_encode( move_long_small_shift(dst,cnt) ); 9221 ins_pipe( ialu_reg_long ); 9222 %} 9223 9224 // Shift Right Long by 32-63 9225 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9226 match(Set dst (URShiftL dst cnt)); 9227 effect(KILL cr); 9228 ins_cost(300); 9229 format %{ "MOV $dst.lo,$dst.hi\n" 9230 "\tSHR $dst.lo,$cnt-32\n" 9231 "\tXOR $dst.hi,$dst.hi" %} 9232 opcode(0xC1, 0x5); /* C1 /5 ib */ 9233 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9234 ins_pipe( ialu_reg_long ); 9235 %} 9236 9237 // Shift Right Long by variable 9238 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9239 match(Set dst (URShiftL dst shift)); 9240 effect(KILL cr); 9241 ins_cost(600); 9242 size(17); 9243 format %{ "TEST $shift,32\n\t" 9244 "JEQ,s small\n\t" 9245 "MOV $dst.lo,$dst.hi\n\t" 9246 "XOR $dst.hi,$dst.hi\n" 9247 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9248 "SHR $dst.hi,$shift" %} 9249 ins_encode( shift_right_long( dst, shift ) ); 9250 ins_pipe( pipe_slow ); 9251 %} 9252 9253 // Shift Right Long by 1-31 9254 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9255 match(Set dst (RShiftL dst cnt)); 9256 effect(KILL cr); 9257 ins_cost(200); 9258 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9259 "SAR $dst.hi,$cnt" %} 9260 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9261 ins_encode( move_long_small_shift(dst,cnt) ); 9262 ins_pipe( ialu_reg_long ); 9263 %} 9264 9265 // Shift Right Long by 32-63 9266 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9267 match(Set dst (RShiftL dst cnt)); 9268 effect(KILL cr); 9269 ins_cost(300); 9270 format %{ "MOV $dst.lo,$dst.hi\n" 9271 "\tSAR $dst.lo,$cnt-32\n" 9272 "\tSAR $dst.hi,31" %} 9273 opcode(0xC1, 0x7); /* C1 /7 ib */ 9274 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9275 ins_pipe( ialu_reg_long ); 9276 %} 9277 9278 // Shift Right arithmetic Long by variable 9279 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9280 match(Set dst (RShiftL dst shift)); 9281 effect(KILL cr); 9282 ins_cost(600); 9283 size(18); 9284 format %{ "TEST $shift,32\n\t" 9285 "JEQ,s small\n\t" 9286 "MOV $dst.lo,$dst.hi\n\t" 9287 "SAR $dst.hi,31\n" 9288 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9289 "SAR $dst.hi,$shift" %} 9290 ins_encode( shift_right_arith_long( dst, shift ) ); 9291 ins_pipe( pipe_slow ); 9292 %} 9293 9294 9295 //----------Double Instructions------------------------------------------------ 9296 // Double Math 9297 9298 // Compare & branch 9299 9300 // P6 version of float compare, sets condition codes in EFLAGS 9301 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9302 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9303 match(Set cr (CmpD src1 src2)); 9304 effect(KILL rax); 9305 ins_cost(150); 9306 format %{ "FLD $src1\n\t" 9307 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9308 "JNP exit\n\t" 9309 "MOV ah,1 // saw a NaN, set CF\n\t" 9310 "SAHF\n" 9311 "exit:\tNOP // avoid branch to branch" %} 9312 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9313 ins_encode( Push_Reg_DPR(src1), 9314 OpcP, RegOpc(src2), 9315 cmpF_P6_fixup ); 9316 ins_pipe( pipe_slow ); 9317 %} 9318 9319 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9320 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9321 match(Set cr (CmpD src1 src2)); 9322 ins_cost(150); 9323 format %{ "FLD $src1\n\t" 9324 "FUCOMIP ST,$src2 // P6 instruction" %} 9325 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9326 ins_encode( Push_Reg_DPR(src1), 9327 OpcP, RegOpc(src2)); 9328 ins_pipe( pipe_slow ); 9329 %} 9330 9331 // Compare & branch 9332 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9333 predicate(UseSSE<=1); 9334 match(Set cr (CmpD src1 src2)); 9335 effect(KILL rax); 9336 ins_cost(200); 9337 format %{ "FLD $src1\n\t" 9338 "FCOMp $src2\n\t" 9339 "FNSTSW AX\n\t" 9340 "TEST AX,0x400\n\t" 9341 "JZ,s flags\n\t" 9342 "MOV AH,1\t# unordered treat as LT\n" 9343 "flags:\tSAHF" %} 9344 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9345 ins_encode( Push_Reg_DPR(src1), 9346 OpcP, RegOpc(src2), 9347 fpu_flags); 9348 ins_pipe( pipe_slow ); 9349 %} 9350 9351 // Compare vs zero into -1,0,1 9352 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9353 predicate(UseSSE<=1); 9354 match(Set dst (CmpD3 src1 zero)); 9355 effect(KILL cr, KILL rax); 9356 ins_cost(280); 9357 format %{ "FTSTD $dst,$src1" %} 9358 opcode(0xE4, 0xD9); 9359 ins_encode( Push_Reg_DPR(src1), 9360 OpcS, OpcP, PopFPU, 9361 CmpF_Result(dst)); 9362 ins_pipe( pipe_slow ); 9363 %} 9364 9365 // Compare into -1,0,1 9366 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9367 predicate(UseSSE<=1); 9368 match(Set dst (CmpD3 src1 src2)); 9369 effect(KILL cr, KILL rax); 9370 ins_cost(300); 9371 format %{ "FCMPD $dst,$src1,$src2" %} 9372 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9373 ins_encode( Push_Reg_DPR(src1), 9374 OpcP, RegOpc(src2), 9375 CmpF_Result(dst)); 9376 ins_pipe( pipe_slow ); 9377 %} 9378 9379 // float compare and set condition codes in EFLAGS by XMM regs 9380 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9381 predicate(UseSSE>=2); 9382 match(Set cr (CmpD src1 src2)); 9383 ins_cost(145); 9384 format %{ "UCOMISD $src1,$src2\n\t" 9385 "JNP,s exit\n\t" 9386 "PUSHF\t# saw NaN, set CF\n\t" 9387 "AND [rsp], #0xffffff2b\n\t" 9388 "POPF\n" 9389 "exit:" %} 9390 ins_encode %{ 9391 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9392 emit_cmpfp_fixup(_masm); 9393 %} 9394 ins_pipe( pipe_slow ); 9395 %} 9396 9397 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9398 predicate(UseSSE>=2); 9399 match(Set cr (CmpD src1 src2)); 9400 ins_cost(100); 9401 format %{ "UCOMISD $src1,$src2" %} 9402 ins_encode %{ 9403 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9404 %} 9405 ins_pipe( pipe_slow ); 9406 %} 9407 9408 // float compare and set condition codes in EFLAGS by XMM regs 9409 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9410 predicate(UseSSE>=2); 9411 match(Set cr (CmpD src1 (LoadD src2))); 9412 ins_cost(145); 9413 format %{ "UCOMISD $src1,$src2\n\t" 9414 "JNP,s exit\n\t" 9415 "PUSHF\t# saw NaN, set CF\n\t" 9416 "AND [rsp], #0xffffff2b\n\t" 9417 "POPF\n" 9418 "exit:" %} 9419 ins_encode %{ 9420 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9421 emit_cmpfp_fixup(_masm); 9422 %} 9423 ins_pipe( pipe_slow ); 9424 %} 9425 9426 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9427 predicate(UseSSE>=2); 9428 match(Set cr (CmpD src1 (LoadD src2))); 9429 ins_cost(100); 9430 format %{ "UCOMISD $src1,$src2" %} 9431 ins_encode %{ 9432 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9433 %} 9434 ins_pipe( pipe_slow ); 9435 %} 9436 9437 // Compare into -1,0,1 in XMM 9438 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9439 predicate(UseSSE>=2); 9440 match(Set dst (CmpD3 src1 src2)); 9441 effect(KILL cr); 9442 ins_cost(255); 9443 format %{ "UCOMISD $src1, $src2\n\t" 9444 "MOV $dst, #-1\n\t" 9445 "JP,s done\n\t" 9446 "JB,s done\n\t" 9447 "SETNE $dst\n\t" 9448 "MOVZB $dst, $dst\n" 9449 "done:" %} 9450 ins_encode %{ 9451 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9452 emit_cmpfp3(_masm, $dst$$Register); 9453 %} 9454 ins_pipe( pipe_slow ); 9455 %} 9456 9457 // Compare into -1,0,1 in XMM and memory 9458 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9459 predicate(UseSSE>=2); 9460 match(Set dst (CmpD3 src1 (LoadD src2))); 9461 effect(KILL cr); 9462 ins_cost(275); 9463 format %{ "UCOMISD $src1, $src2\n\t" 9464 "MOV $dst, #-1\n\t" 9465 "JP,s done\n\t" 9466 "JB,s done\n\t" 9467 "SETNE $dst\n\t" 9468 "MOVZB $dst, $dst\n" 9469 "done:" %} 9470 ins_encode %{ 9471 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9472 emit_cmpfp3(_masm, $dst$$Register); 9473 %} 9474 ins_pipe( pipe_slow ); 9475 %} 9476 9477 9478 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9479 predicate (UseSSE <=1); 9480 match(Set dst (SubD dst src)); 9481 9482 format %{ "FLD $src\n\t" 9483 "DSUBp $dst,ST" %} 9484 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9485 ins_cost(150); 9486 ins_encode( Push_Reg_DPR(src), 9487 OpcP, RegOpc(dst) ); 9488 ins_pipe( fpu_reg_reg ); 9489 %} 9490 9491 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9492 predicate (UseSSE <=1); 9493 match(Set dst (RoundDouble (SubD src1 src2))); 9494 ins_cost(250); 9495 9496 format %{ "FLD $src2\n\t" 9497 "DSUB ST,$src1\n\t" 9498 "FSTP_D $dst\t# D-round" %} 9499 opcode(0xD8, 0x5); 9500 ins_encode( Push_Reg_DPR(src2), 9501 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9502 ins_pipe( fpu_mem_reg_reg ); 9503 %} 9504 9505 9506 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9507 predicate (UseSSE <=1); 9508 match(Set dst (SubD dst (LoadD src))); 9509 ins_cost(150); 9510 9511 format %{ "FLD $src\n\t" 9512 "DSUBp $dst,ST" %} 9513 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9514 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9515 OpcP, RegOpc(dst) ); 9516 ins_pipe( fpu_reg_mem ); 9517 %} 9518 9519 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9520 predicate (UseSSE<=1); 9521 match(Set dst (AbsD src)); 9522 ins_cost(100); 9523 format %{ "FABS" %} 9524 opcode(0xE1, 0xD9); 9525 ins_encode( OpcS, OpcP ); 9526 ins_pipe( fpu_reg_reg ); 9527 %} 9528 9529 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9530 predicate(UseSSE<=1); 9531 match(Set dst (NegD src)); 9532 ins_cost(100); 9533 format %{ "FCHS" %} 9534 opcode(0xE0, 0xD9); 9535 ins_encode( OpcS, OpcP ); 9536 ins_pipe( fpu_reg_reg ); 9537 %} 9538 9539 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9540 predicate(UseSSE<=1); 9541 match(Set dst (AddD dst src)); 9542 format %{ "FLD $src\n\t" 9543 "DADD $dst,ST" %} 9544 size(4); 9545 ins_cost(150); 9546 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9547 ins_encode( Push_Reg_DPR(src), 9548 OpcP, RegOpc(dst) ); 9549 ins_pipe( fpu_reg_reg ); 9550 %} 9551 9552 9553 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9554 predicate(UseSSE<=1); 9555 match(Set dst (RoundDouble (AddD src1 src2))); 9556 ins_cost(250); 9557 9558 format %{ "FLD $src2\n\t" 9559 "DADD ST,$src1\n\t" 9560 "FSTP_D $dst\t# D-round" %} 9561 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9562 ins_encode( Push_Reg_DPR(src2), 9563 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9564 ins_pipe( fpu_mem_reg_reg ); 9565 %} 9566 9567 9568 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9569 predicate(UseSSE<=1); 9570 match(Set dst (AddD dst (LoadD src))); 9571 ins_cost(150); 9572 9573 format %{ "FLD $src\n\t" 9574 "DADDp $dst,ST" %} 9575 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9576 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9577 OpcP, RegOpc(dst) ); 9578 ins_pipe( fpu_reg_mem ); 9579 %} 9580 9581 // add-to-memory 9582 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9583 predicate(UseSSE<=1); 9584 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9585 ins_cost(150); 9586 9587 format %{ "FLD_D $dst\n\t" 9588 "DADD ST,$src\n\t" 9589 "FST_D $dst" %} 9590 opcode(0xDD, 0x0); 9591 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9592 Opcode(0xD8), RegOpc(src), 9593 set_instruction_start, 9594 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9595 ins_pipe( fpu_reg_mem ); 9596 %} 9597 9598 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9599 predicate(UseSSE<=1); 9600 match(Set dst (AddD dst con)); 9601 ins_cost(125); 9602 format %{ "FLD1\n\t" 9603 "DADDp $dst,ST" %} 9604 ins_encode %{ 9605 __ fld1(); 9606 __ faddp($dst$$reg); 9607 %} 9608 ins_pipe(fpu_reg); 9609 %} 9610 9611 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9612 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9613 match(Set dst (AddD dst con)); 9614 ins_cost(200); 9615 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9616 "DADDp $dst,ST" %} 9617 ins_encode %{ 9618 __ fld_d($constantaddress($con)); 9619 __ faddp($dst$$reg); 9620 %} 9621 ins_pipe(fpu_reg_mem); 9622 %} 9623 9624 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9625 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9626 match(Set dst (RoundDouble (AddD src con))); 9627 ins_cost(200); 9628 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9629 "DADD ST,$src\n\t" 9630 "FSTP_D $dst\t# D-round" %} 9631 ins_encode %{ 9632 __ fld_d($constantaddress($con)); 9633 __ fadd($src$$reg); 9634 __ fstp_d(Address(rsp, $dst$$disp)); 9635 %} 9636 ins_pipe(fpu_mem_reg_con); 9637 %} 9638 9639 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9640 predicate(UseSSE<=1); 9641 match(Set dst (MulD dst src)); 9642 format %{ "FLD $src\n\t" 9643 "DMULp $dst,ST" %} 9644 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9645 ins_cost(150); 9646 ins_encode( Push_Reg_DPR(src), 9647 OpcP, RegOpc(dst) ); 9648 ins_pipe( fpu_reg_reg ); 9649 %} 9650 9651 // Strict FP instruction biases argument before multiply then 9652 // biases result to avoid double rounding of subnormals. 9653 // 9654 // scale arg1 by multiplying arg1 by 2^(-15360) 9655 // load arg2 9656 // multiply scaled arg1 by arg2 9657 // rescale product by 2^(15360) 9658 // 9659 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9660 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9661 match(Set dst (MulD dst src)); 9662 ins_cost(1); // Select this instruction for all strict FP double multiplies 9663 9664 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9665 "DMULp $dst,ST\n\t" 9666 "FLD $src\n\t" 9667 "DMULp $dst,ST\n\t" 9668 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9669 "DMULp $dst,ST\n\t" %} 9670 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9671 ins_encode( strictfp_bias1(dst), 9672 Push_Reg_DPR(src), 9673 OpcP, RegOpc(dst), 9674 strictfp_bias2(dst) ); 9675 ins_pipe( fpu_reg_reg ); 9676 %} 9677 9678 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9679 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9680 match(Set dst (MulD dst con)); 9681 ins_cost(200); 9682 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9683 "DMULp $dst,ST" %} 9684 ins_encode %{ 9685 __ fld_d($constantaddress($con)); 9686 __ fmulp($dst$$reg); 9687 %} 9688 ins_pipe(fpu_reg_mem); 9689 %} 9690 9691 9692 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9693 predicate( UseSSE<=1 ); 9694 match(Set dst (MulD dst (LoadD src))); 9695 ins_cost(200); 9696 format %{ "FLD_D $src\n\t" 9697 "DMULp $dst,ST" %} 9698 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9699 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9700 OpcP, RegOpc(dst) ); 9701 ins_pipe( fpu_reg_mem ); 9702 %} 9703 9704 // 9705 // Cisc-alternate to reg-reg multiply 9706 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9707 predicate( UseSSE<=1 ); 9708 match(Set dst (MulD src (LoadD mem))); 9709 ins_cost(250); 9710 format %{ "FLD_D $mem\n\t" 9711 "DMUL ST,$src\n\t" 9712 "FSTP_D $dst" %} 9713 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9714 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9715 OpcReg_FPR(src), 9716 Pop_Reg_DPR(dst) ); 9717 ins_pipe( fpu_reg_reg_mem ); 9718 %} 9719 9720 9721 // MACRO3 -- addDPR a mulDPR 9722 // This instruction is a '2-address' instruction in that the result goes 9723 // back to src2. This eliminates a move from the macro; possibly the 9724 // register allocator will have to add it back (and maybe not). 9725 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9726 predicate( UseSSE<=1 ); 9727 match(Set src2 (AddD (MulD src0 src1) src2)); 9728 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9729 "DMUL ST,$src1\n\t" 9730 "DADDp $src2,ST" %} 9731 ins_cost(250); 9732 opcode(0xDD); /* LoadD DD /0 */ 9733 ins_encode( Push_Reg_FPR(src0), 9734 FMul_ST_reg(src1), 9735 FAddP_reg_ST(src2) ); 9736 ins_pipe( fpu_reg_reg_reg ); 9737 %} 9738 9739 9740 // MACRO3 -- subDPR a mulDPR 9741 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9742 predicate( UseSSE<=1 ); 9743 match(Set src2 (SubD (MulD src0 src1) src2)); 9744 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9745 "DMUL ST,$src1\n\t" 9746 "DSUBRp $src2,ST" %} 9747 ins_cost(250); 9748 ins_encode( Push_Reg_FPR(src0), 9749 FMul_ST_reg(src1), 9750 Opcode(0xDE), Opc_plus(0xE0,src2)); 9751 ins_pipe( fpu_reg_reg_reg ); 9752 %} 9753 9754 9755 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9756 predicate( UseSSE<=1 ); 9757 match(Set dst (DivD dst src)); 9758 9759 format %{ "FLD $src\n\t" 9760 "FDIVp $dst,ST" %} 9761 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9762 ins_cost(150); 9763 ins_encode( Push_Reg_DPR(src), 9764 OpcP, RegOpc(dst) ); 9765 ins_pipe( fpu_reg_reg ); 9766 %} 9767 9768 // Strict FP instruction biases argument before division then 9769 // biases result, to avoid double rounding of subnormals. 9770 // 9771 // scale dividend by multiplying dividend by 2^(-15360) 9772 // load divisor 9773 // divide scaled dividend by divisor 9774 // rescale quotient by 2^(15360) 9775 // 9776 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9777 predicate (UseSSE<=1); 9778 match(Set dst (DivD dst src)); 9779 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9780 ins_cost(01); 9781 9782 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9783 "DMULp $dst,ST\n\t" 9784 "FLD $src\n\t" 9785 "FDIVp $dst,ST\n\t" 9786 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9787 "DMULp $dst,ST\n\t" %} 9788 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9789 ins_encode( strictfp_bias1(dst), 9790 Push_Reg_DPR(src), 9791 OpcP, RegOpc(dst), 9792 strictfp_bias2(dst) ); 9793 ins_pipe( fpu_reg_reg ); 9794 %} 9795 9796 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9797 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9798 match(Set dst (RoundDouble (DivD src1 src2))); 9799 9800 format %{ "FLD $src1\n\t" 9801 "FDIV ST,$src2\n\t" 9802 "FSTP_D $dst\t# D-round" %} 9803 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9804 ins_encode( Push_Reg_DPR(src1), 9805 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9806 ins_pipe( fpu_mem_reg_reg ); 9807 %} 9808 9809 9810 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9811 predicate(UseSSE<=1); 9812 match(Set dst (ModD dst src)); 9813 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9814 9815 format %{ "DMOD $dst,$src" %} 9816 ins_cost(250); 9817 ins_encode(Push_Reg_Mod_DPR(dst, src), 9818 emitModDPR(), 9819 Push_Result_Mod_DPR(src), 9820 Pop_Reg_DPR(dst)); 9821 ins_pipe( pipe_slow ); 9822 %} 9823 9824 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9825 predicate(UseSSE>=2); 9826 match(Set dst (ModD src0 src1)); 9827 effect(KILL rax, KILL cr); 9828 9829 format %{ "SUB ESP,8\t # DMOD\n" 9830 "\tMOVSD [ESP+0],$src1\n" 9831 "\tFLD_D [ESP+0]\n" 9832 "\tMOVSD [ESP+0],$src0\n" 9833 "\tFLD_D [ESP+0]\n" 9834 "loop:\tFPREM\n" 9835 "\tFWAIT\n" 9836 "\tFNSTSW AX\n" 9837 "\tSAHF\n" 9838 "\tJP loop\n" 9839 "\tFSTP_D [ESP+0]\n" 9840 "\tMOVSD $dst,[ESP+0]\n" 9841 "\tADD ESP,8\n" 9842 "\tFSTP ST0\t # Restore FPU Stack" 9843 %} 9844 ins_cost(250); 9845 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9846 ins_pipe( pipe_slow ); 9847 %} 9848 9849 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 9850 predicate (UseSSE<=1); 9851 match(Set dst (SinD src)); 9852 ins_cost(1800); 9853 format %{ "DSIN $dst" %} 9854 opcode(0xD9, 0xFE); 9855 ins_encode( OpcP, OpcS ); 9856 ins_pipe( pipe_slow ); 9857 %} 9858 9859 instruct sinD_reg(regD dst, eFlagsReg cr) %{ 9860 predicate (UseSSE>=2); 9861 match(Set dst (SinD dst)); 9862 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9863 ins_cost(1800); 9864 format %{ "DSIN $dst" %} 9865 opcode(0xD9, 0xFE); 9866 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9867 ins_pipe( pipe_slow ); 9868 %} 9869 9870 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 9871 predicate (UseSSE<=1); 9872 match(Set dst (CosD src)); 9873 ins_cost(1800); 9874 format %{ "DCOS $dst" %} 9875 opcode(0xD9, 0xFF); 9876 ins_encode( OpcP, OpcS ); 9877 ins_pipe( pipe_slow ); 9878 %} 9879 9880 instruct cosD_reg(regD dst, eFlagsReg cr) %{ 9881 predicate (UseSSE>=2); 9882 match(Set dst (CosD dst)); 9883 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9884 ins_cost(1800); 9885 format %{ "DCOS $dst" %} 9886 opcode(0xD9, 0xFF); 9887 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9888 ins_pipe( pipe_slow ); 9889 %} 9890 9891 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9892 predicate (UseSSE<=1); 9893 match(Set dst(TanD src)); 9894 format %{ "DTAN $dst" %} 9895 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9896 Opcode(0xDD), Opcode(0xD8)); // fstp st 9897 ins_pipe( pipe_slow ); 9898 %} 9899 9900 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9901 predicate (UseSSE>=2); 9902 match(Set dst(TanD dst)); 9903 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9904 format %{ "DTAN $dst" %} 9905 ins_encode( Push_SrcD(dst), 9906 Opcode(0xD9), Opcode(0xF2), // fptan 9907 Opcode(0xDD), Opcode(0xD8), // fstp st 9908 Push_ResultD(dst) ); 9909 ins_pipe( pipe_slow ); 9910 %} 9911 9912 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9913 predicate (UseSSE<=1); 9914 match(Set dst(AtanD dst src)); 9915 format %{ "DATA $dst,$src" %} 9916 opcode(0xD9, 0xF3); 9917 ins_encode( Push_Reg_DPR(src), 9918 OpcP, OpcS, RegOpc(dst) ); 9919 ins_pipe( pipe_slow ); 9920 %} 9921 9922 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9923 predicate (UseSSE>=2); 9924 match(Set dst(AtanD dst src)); 9925 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9926 format %{ "DATA $dst,$src" %} 9927 opcode(0xD9, 0xF3); 9928 ins_encode( Push_SrcD(src), 9929 OpcP, OpcS, Push_ResultD(dst) ); 9930 ins_pipe( pipe_slow ); 9931 %} 9932 9933 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9934 predicate (UseSSE<=1); 9935 match(Set dst (SqrtD src)); 9936 format %{ "DSQRT $dst,$src" %} 9937 opcode(0xFA, 0xD9); 9938 ins_encode( Push_Reg_DPR(src), 9939 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9940 ins_pipe( pipe_slow ); 9941 %} 9942 9943 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9944 predicate (UseSSE<=1); 9945 match(Set Y (PowD X Y)); // Raise X to the Yth power 9946 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9947 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} 9948 ins_encode %{ 9949 __ subptr(rsp, 8); 9950 __ fld_s($X$$reg - 1); 9951 __ fast_pow(); 9952 __ addptr(rsp, 8); 9953 %} 9954 ins_pipe( pipe_slow ); 9955 %} 9956 9957 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9958 predicate (UseSSE>=2); 9959 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 9960 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9961 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} 9962 ins_encode %{ 9963 __ subptr(rsp, 8); 9964 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 9965 __ fld_d(Address(rsp, 0)); 9966 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 9967 __ fld_d(Address(rsp, 0)); 9968 __ fast_pow(); 9969 __ fstp_d(Address(rsp, 0)); 9970 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9971 __ addptr(rsp, 8); 9972 %} 9973 ins_pipe( pipe_slow ); 9974 %} 9975 9976 9977 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9978 predicate (UseSSE<=1); 9979 match(Set dpr1 (ExpD dpr1)); 9980 effect(KILL rax, KILL rcx, KILL rdx, KILL cr); 9981 format %{ "fast_exp $dpr1 -> $dpr1 // KILL $rax, $rcx, $rdx" %} 9982 ins_encode %{ 9983 __ fast_exp(); 9984 %} 9985 ins_pipe( pipe_slow ); 9986 %} 9987 9988 instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9989 predicate (UseSSE>=2); 9990 match(Set dst (ExpD src)); 9991 effect(KILL rax, KILL rcx, KILL rdx, KILL cr); 9992 format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %} 9993 ins_encode %{ 9994 __ subptr(rsp, 8); 9995 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 9996 __ fld_d(Address(rsp, 0)); 9997 __ fast_exp(); 9998 __ fstp_d(Address(rsp, 0)); 9999 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 10000 __ addptr(rsp, 8); 10001 %} 10002 ins_pipe( pipe_slow ); 10003 %} 10004 10005 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 10006 predicate (UseSSE<=1); 10007 // The source Double operand on FPU stack 10008 match(Set dst (Log10D src)); 10009 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 10010 // fxch ; swap ST(0) with ST(1) 10011 // fyl2x ; compute log_10(2) * log_2(x) 10012 format %{ "FLDLG2 \t\t\t#Log10\n\t" 10013 "FXCH \n\t" 10014 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 10015 %} 10016 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 10017 Opcode(0xD9), Opcode(0xC9), // fxch 10018 Opcode(0xD9), Opcode(0xF1)); // fyl2x 10019 10020 ins_pipe( pipe_slow ); 10021 %} 10022 10023 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 10024 predicate (UseSSE>=2); 10025 effect(KILL cr); 10026 match(Set dst (Log10D src)); 10027 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 10028 // fyl2x ; compute log_10(2) * log_2(x) 10029 format %{ "FLDLG2 \t\t\t#Log10\n\t" 10030 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 10031 %} 10032 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 10033 Push_SrcD(src), 10034 Opcode(0xD9), Opcode(0xF1), // fyl2x 10035 Push_ResultD(dst)); 10036 10037 ins_pipe( pipe_slow ); 10038 %} 10039 10040 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ 10041 predicate (UseSSE<=1); 10042 // The source Double operand on FPU stack 10043 match(Set dst (LogD src)); 10044 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 10045 // fxch ; swap ST(0) with ST(1) 10046 // fyl2x ; compute log_e(2) * log_2(x) 10047 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 10048 "FXCH \n\t" 10049 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 10050 %} 10051 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 10052 Opcode(0xD9), Opcode(0xC9), // fxch 10053 Opcode(0xD9), Opcode(0xF1)); // fyl2x 10054 10055 ins_pipe( pipe_slow ); 10056 %} 10057 10058 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ 10059 predicate (UseSSE>=2); 10060 effect(KILL cr); 10061 // The source and result Double operands in XMM registers 10062 match(Set dst (LogD src)); 10063 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 10064 // fyl2x ; compute log_e(2) * log_2(x) 10065 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 10066 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 10067 %} 10068 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 10069 Push_SrcD(src), 10070 Opcode(0xD9), Opcode(0xF1), // fyl2x 10071 Push_ResultD(dst)); 10072 ins_pipe( pipe_slow ); 10073 %} 10074 10075 //-------------Float Instructions------------------------------- 10076 // Float Math 10077 10078 // Code for float compare: 10079 // fcompp(); 10080 // fwait(); fnstsw_ax(); 10081 // sahf(); 10082 // movl(dst, unordered_result); 10083 // jcc(Assembler::parity, exit); 10084 // movl(dst, less_result); 10085 // jcc(Assembler::below, exit); 10086 // movl(dst, equal_result); 10087 // jcc(Assembler::equal, exit); 10088 // movl(dst, greater_result); 10089 // exit: 10090 10091 // P6 version of float compare, sets condition codes in EFLAGS 10092 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10093 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10094 match(Set cr (CmpF src1 src2)); 10095 effect(KILL rax); 10096 ins_cost(150); 10097 format %{ "FLD $src1\n\t" 10098 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10099 "JNP exit\n\t" 10100 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10101 "SAHF\n" 10102 "exit:\tNOP // avoid branch to branch" %} 10103 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10104 ins_encode( Push_Reg_DPR(src1), 10105 OpcP, RegOpc(src2), 10106 cmpF_P6_fixup ); 10107 ins_pipe( pipe_slow ); 10108 %} 10109 10110 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10111 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10112 match(Set cr (CmpF src1 src2)); 10113 ins_cost(100); 10114 format %{ "FLD $src1\n\t" 10115 "FUCOMIP ST,$src2 // P6 instruction" %} 10116 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10117 ins_encode( Push_Reg_DPR(src1), 10118 OpcP, RegOpc(src2)); 10119 ins_pipe( pipe_slow ); 10120 %} 10121 10122 10123 // Compare & branch 10124 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10125 predicate(UseSSE == 0); 10126 match(Set cr (CmpF src1 src2)); 10127 effect(KILL rax); 10128 ins_cost(200); 10129 format %{ "FLD $src1\n\t" 10130 "FCOMp $src2\n\t" 10131 "FNSTSW AX\n\t" 10132 "TEST AX,0x400\n\t" 10133 "JZ,s flags\n\t" 10134 "MOV AH,1\t# unordered treat as LT\n" 10135 "flags:\tSAHF" %} 10136 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10137 ins_encode( Push_Reg_DPR(src1), 10138 OpcP, RegOpc(src2), 10139 fpu_flags); 10140 ins_pipe( pipe_slow ); 10141 %} 10142 10143 // Compare vs zero into -1,0,1 10144 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10145 predicate(UseSSE == 0); 10146 match(Set dst (CmpF3 src1 zero)); 10147 effect(KILL cr, KILL rax); 10148 ins_cost(280); 10149 format %{ "FTSTF $dst,$src1" %} 10150 opcode(0xE4, 0xD9); 10151 ins_encode( Push_Reg_DPR(src1), 10152 OpcS, OpcP, PopFPU, 10153 CmpF_Result(dst)); 10154 ins_pipe( pipe_slow ); 10155 %} 10156 10157 // Compare into -1,0,1 10158 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10159 predicate(UseSSE == 0); 10160 match(Set dst (CmpF3 src1 src2)); 10161 effect(KILL cr, KILL rax); 10162 ins_cost(300); 10163 format %{ "FCMPF $dst,$src1,$src2" %} 10164 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10165 ins_encode( Push_Reg_DPR(src1), 10166 OpcP, RegOpc(src2), 10167 CmpF_Result(dst)); 10168 ins_pipe( pipe_slow ); 10169 %} 10170 10171 // float compare and set condition codes in EFLAGS by XMM regs 10172 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10173 predicate(UseSSE>=1); 10174 match(Set cr (CmpF src1 src2)); 10175 ins_cost(145); 10176 format %{ "UCOMISS $src1,$src2\n\t" 10177 "JNP,s exit\n\t" 10178 "PUSHF\t# saw NaN, set CF\n\t" 10179 "AND [rsp], #0xffffff2b\n\t" 10180 "POPF\n" 10181 "exit:" %} 10182 ins_encode %{ 10183 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10184 emit_cmpfp_fixup(_masm); 10185 %} 10186 ins_pipe( pipe_slow ); 10187 %} 10188 10189 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10190 predicate(UseSSE>=1); 10191 match(Set cr (CmpF src1 src2)); 10192 ins_cost(100); 10193 format %{ "UCOMISS $src1,$src2" %} 10194 ins_encode %{ 10195 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10196 %} 10197 ins_pipe( pipe_slow ); 10198 %} 10199 10200 // float compare and set condition codes in EFLAGS by XMM regs 10201 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10202 predicate(UseSSE>=1); 10203 match(Set cr (CmpF src1 (LoadF src2))); 10204 ins_cost(165); 10205 format %{ "UCOMISS $src1,$src2\n\t" 10206 "JNP,s exit\n\t" 10207 "PUSHF\t# saw NaN, set CF\n\t" 10208 "AND [rsp], #0xffffff2b\n\t" 10209 "POPF\n" 10210 "exit:" %} 10211 ins_encode %{ 10212 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10213 emit_cmpfp_fixup(_masm); 10214 %} 10215 ins_pipe( pipe_slow ); 10216 %} 10217 10218 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10219 predicate(UseSSE>=1); 10220 match(Set cr (CmpF src1 (LoadF src2))); 10221 ins_cost(100); 10222 format %{ "UCOMISS $src1,$src2" %} 10223 ins_encode %{ 10224 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10225 %} 10226 ins_pipe( pipe_slow ); 10227 %} 10228 10229 // Compare into -1,0,1 in XMM 10230 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10231 predicate(UseSSE>=1); 10232 match(Set dst (CmpF3 src1 src2)); 10233 effect(KILL cr); 10234 ins_cost(255); 10235 format %{ "UCOMISS $src1, $src2\n\t" 10236 "MOV $dst, #-1\n\t" 10237 "JP,s done\n\t" 10238 "JB,s done\n\t" 10239 "SETNE $dst\n\t" 10240 "MOVZB $dst, $dst\n" 10241 "done:" %} 10242 ins_encode %{ 10243 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10244 emit_cmpfp3(_masm, $dst$$Register); 10245 %} 10246 ins_pipe( pipe_slow ); 10247 %} 10248 10249 // Compare into -1,0,1 in XMM and memory 10250 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10251 predicate(UseSSE>=1); 10252 match(Set dst (CmpF3 src1 (LoadF src2))); 10253 effect(KILL cr); 10254 ins_cost(275); 10255 format %{ "UCOMISS $src1, $src2\n\t" 10256 "MOV $dst, #-1\n\t" 10257 "JP,s done\n\t" 10258 "JB,s done\n\t" 10259 "SETNE $dst\n\t" 10260 "MOVZB $dst, $dst\n" 10261 "done:" %} 10262 ins_encode %{ 10263 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10264 emit_cmpfp3(_masm, $dst$$Register); 10265 %} 10266 ins_pipe( pipe_slow ); 10267 %} 10268 10269 // Spill to obtain 24-bit precision 10270 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10271 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10272 match(Set dst (SubF src1 src2)); 10273 10274 format %{ "FSUB $dst,$src1 - $src2" %} 10275 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10276 ins_encode( Push_Reg_FPR(src1), 10277 OpcReg_FPR(src2), 10278 Pop_Mem_FPR(dst) ); 10279 ins_pipe( fpu_mem_reg_reg ); 10280 %} 10281 // 10282 // This instruction does not round to 24-bits 10283 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10284 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10285 match(Set dst (SubF dst src)); 10286 10287 format %{ "FSUB $dst,$src" %} 10288 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10289 ins_encode( Push_Reg_FPR(src), 10290 OpcP, RegOpc(dst) ); 10291 ins_pipe( fpu_reg_reg ); 10292 %} 10293 10294 // Spill to obtain 24-bit precision 10295 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10296 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10297 match(Set dst (AddF src1 src2)); 10298 10299 format %{ "FADD $dst,$src1,$src2" %} 10300 opcode(0xD8, 0x0); /* D8 C0+i */ 10301 ins_encode( Push_Reg_FPR(src2), 10302 OpcReg_FPR(src1), 10303 Pop_Mem_FPR(dst) ); 10304 ins_pipe( fpu_mem_reg_reg ); 10305 %} 10306 // 10307 // This instruction does not round to 24-bits 10308 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10309 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10310 match(Set dst (AddF dst src)); 10311 10312 format %{ "FLD $src\n\t" 10313 "FADDp $dst,ST" %} 10314 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10315 ins_encode( Push_Reg_FPR(src), 10316 OpcP, RegOpc(dst) ); 10317 ins_pipe( fpu_reg_reg ); 10318 %} 10319 10320 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10321 predicate(UseSSE==0); 10322 match(Set dst (AbsF src)); 10323 ins_cost(100); 10324 format %{ "FABS" %} 10325 opcode(0xE1, 0xD9); 10326 ins_encode( OpcS, OpcP ); 10327 ins_pipe( fpu_reg_reg ); 10328 %} 10329 10330 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10331 predicate(UseSSE==0); 10332 match(Set dst (NegF src)); 10333 ins_cost(100); 10334 format %{ "FCHS" %} 10335 opcode(0xE0, 0xD9); 10336 ins_encode( OpcS, OpcP ); 10337 ins_pipe( fpu_reg_reg ); 10338 %} 10339 10340 // Cisc-alternate to addFPR_reg 10341 // Spill to obtain 24-bit precision 10342 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10343 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10344 match(Set dst (AddF src1 (LoadF src2))); 10345 10346 format %{ "FLD $src2\n\t" 10347 "FADD ST,$src1\n\t" 10348 "FSTP_S $dst" %} 10349 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10350 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10351 OpcReg_FPR(src1), 10352 Pop_Mem_FPR(dst) ); 10353 ins_pipe( fpu_mem_reg_mem ); 10354 %} 10355 // 10356 // Cisc-alternate to addFPR_reg 10357 // This instruction does not round to 24-bits 10358 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10359 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10360 match(Set dst (AddF dst (LoadF src))); 10361 10362 format %{ "FADD $dst,$src" %} 10363 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10364 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10365 OpcP, RegOpc(dst) ); 10366 ins_pipe( fpu_reg_mem ); 10367 %} 10368 10369 // // Following two instructions for _222_mpegaudio 10370 // Spill to obtain 24-bit precision 10371 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10372 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10373 match(Set dst (AddF src1 src2)); 10374 10375 format %{ "FADD $dst,$src1,$src2" %} 10376 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10377 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10378 OpcReg_FPR(src2), 10379 Pop_Mem_FPR(dst) ); 10380 ins_pipe( fpu_mem_reg_mem ); 10381 %} 10382 10383 // Cisc-spill variant 10384 // Spill to obtain 24-bit precision 10385 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10386 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10387 match(Set dst (AddF src1 (LoadF src2))); 10388 10389 format %{ "FADD $dst,$src1,$src2 cisc" %} 10390 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10391 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10392 set_instruction_start, 10393 OpcP, RMopc_Mem(secondary,src1), 10394 Pop_Mem_FPR(dst) ); 10395 ins_pipe( fpu_mem_mem_mem ); 10396 %} 10397 10398 // Spill to obtain 24-bit precision 10399 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10400 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10401 match(Set dst (AddF src1 src2)); 10402 10403 format %{ "FADD $dst,$src1,$src2" %} 10404 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10405 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10406 set_instruction_start, 10407 OpcP, RMopc_Mem(secondary,src1), 10408 Pop_Mem_FPR(dst) ); 10409 ins_pipe( fpu_mem_mem_mem ); 10410 %} 10411 10412 10413 // Spill to obtain 24-bit precision 10414 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10415 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10416 match(Set dst (AddF src con)); 10417 format %{ "FLD $src\n\t" 10418 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10419 "FSTP_S $dst" %} 10420 ins_encode %{ 10421 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10422 __ fadd_s($constantaddress($con)); 10423 __ fstp_s(Address(rsp, $dst$$disp)); 10424 %} 10425 ins_pipe(fpu_mem_reg_con); 10426 %} 10427 // 10428 // This instruction does not round to 24-bits 10429 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10430 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10431 match(Set dst (AddF src con)); 10432 format %{ "FLD $src\n\t" 10433 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10434 "FSTP $dst" %} 10435 ins_encode %{ 10436 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10437 __ fadd_s($constantaddress($con)); 10438 __ fstp_d($dst$$reg); 10439 %} 10440 ins_pipe(fpu_reg_reg_con); 10441 %} 10442 10443 // Spill to obtain 24-bit precision 10444 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10445 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10446 match(Set dst (MulF src1 src2)); 10447 10448 format %{ "FLD $src1\n\t" 10449 "FMUL $src2\n\t" 10450 "FSTP_S $dst" %} 10451 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10452 ins_encode( Push_Reg_FPR(src1), 10453 OpcReg_FPR(src2), 10454 Pop_Mem_FPR(dst) ); 10455 ins_pipe( fpu_mem_reg_reg ); 10456 %} 10457 // 10458 // This instruction does not round to 24-bits 10459 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10460 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10461 match(Set dst (MulF src1 src2)); 10462 10463 format %{ "FLD $src1\n\t" 10464 "FMUL $src2\n\t" 10465 "FSTP_S $dst" %} 10466 opcode(0xD8, 0x1); /* D8 C8+i */ 10467 ins_encode( Push_Reg_FPR(src2), 10468 OpcReg_FPR(src1), 10469 Pop_Reg_FPR(dst) ); 10470 ins_pipe( fpu_reg_reg_reg ); 10471 %} 10472 10473 10474 // Spill to obtain 24-bit precision 10475 // Cisc-alternate to reg-reg multiply 10476 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10477 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10478 match(Set dst (MulF src1 (LoadF src2))); 10479 10480 format %{ "FLD_S $src2\n\t" 10481 "FMUL $src1\n\t" 10482 "FSTP_S $dst" %} 10483 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10484 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10485 OpcReg_FPR(src1), 10486 Pop_Mem_FPR(dst) ); 10487 ins_pipe( fpu_mem_reg_mem ); 10488 %} 10489 // 10490 // This instruction does not round to 24-bits 10491 // Cisc-alternate to reg-reg multiply 10492 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10493 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10494 match(Set dst (MulF src1 (LoadF src2))); 10495 10496 format %{ "FMUL $dst,$src1,$src2" %} 10497 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10498 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10499 OpcReg_FPR(src1), 10500 Pop_Reg_FPR(dst) ); 10501 ins_pipe( fpu_reg_reg_mem ); 10502 %} 10503 10504 // Spill to obtain 24-bit precision 10505 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10506 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10507 match(Set dst (MulF src1 src2)); 10508 10509 format %{ "FMUL $dst,$src1,$src2" %} 10510 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10511 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10512 set_instruction_start, 10513 OpcP, RMopc_Mem(secondary,src1), 10514 Pop_Mem_FPR(dst) ); 10515 ins_pipe( fpu_mem_mem_mem ); 10516 %} 10517 10518 // Spill to obtain 24-bit precision 10519 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10520 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10521 match(Set dst (MulF src con)); 10522 10523 format %{ "FLD $src\n\t" 10524 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10525 "FSTP_S $dst" %} 10526 ins_encode %{ 10527 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10528 __ fmul_s($constantaddress($con)); 10529 __ fstp_s(Address(rsp, $dst$$disp)); 10530 %} 10531 ins_pipe(fpu_mem_reg_con); 10532 %} 10533 // 10534 // This instruction does not round to 24-bits 10535 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10536 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10537 match(Set dst (MulF src con)); 10538 10539 format %{ "FLD $src\n\t" 10540 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10541 "FSTP $dst" %} 10542 ins_encode %{ 10543 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10544 __ fmul_s($constantaddress($con)); 10545 __ fstp_d($dst$$reg); 10546 %} 10547 ins_pipe(fpu_reg_reg_con); 10548 %} 10549 10550 10551 // 10552 // MACRO1 -- subsume unshared load into mulFPR 10553 // This instruction does not round to 24-bits 10554 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10555 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10556 match(Set dst (MulF (LoadF mem1) src)); 10557 10558 format %{ "FLD $mem1 ===MACRO1===\n\t" 10559 "FMUL ST,$src\n\t" 10560 "FSTP $dst" %} 10561 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10562 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10563 OpcReg_FPR(src), 10564 Pop_Reg_FPR(dst) ); 10565 ins_pipe( fpu_reg_reg_mem ); 10566 %} 10567 // 10568 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10569 // This instruction does not round to 24-bits 10570 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10571 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10572 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10573 ins_cost(95); 10574 10575 format %{ "FLD $mem1 ===MACRO2===\n\t" 10576 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10577 "FADD ST,$src2\n\t" 10578 "FSTP $dst" %} 10579 opcode(0xD9); /* LoadF D9 /0 */ 10580 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10581 FMul_ST_reg(src1), 10582 FAdd_ST_reg(src2), 10583 Pop_Reg_FPR(dst) ); 10584 ins_pipe( fpu_reg_mem_reg_reg ); 10585 %} 10586 10587 // MACRO3 -- addFPR a mulFPR 10588 // This instruction does not round to 24-bits. It is a '2-address' 10589 // instruction in that the result goes back to src2. This eliminates 10590 // a move from the macro; possibly the register allocator will have 10591 // to add it back (and maybe not). 10592 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10593 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10594 match(Set src2 (AddF (MulF src0 src1) src2)); 10595 10596 format %{ "FLD $src0 ===MACRO3===\n\t" 10597 "FMUL ST,$src1\n\t" 10598 "FADDP $src2,ST" %} 10599 opcode(0xD9); /* LoadF D9 /0 */ 10600 ins_encode( Push_Reg_FPR(src0), 10601 FMul_ST_reg(src1), 10602 FAddP_reg_ST(src2) ); 10603 ins_pipe( fpu_reg_reg_reg ); 10604 %} 10605 10606 // MACRO4 -- divFPR subFPR 10607 // This instruction does not round to 24-bits 10608 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10609 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10610 match(Set dst (DivF (SubF src2 src1) src3)); 10611 10612 format %{ "FLD $src2 ===MACRO4===\n\t" 10613 "FSUB ST,$src1\n\t" 10614 "FDIV ST,$src3\n\t" 10615 "FSTP $dst" %} 10616 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10617 ins_encode( Push_Reg_FPR(src2), 10618 subFPR_divFPR_encode(src1,src3), 10619 Pop_Reg_FPR(dst) ); 10620 ins_pipe( fpu_reg_reg_reg_reg ); 10621 %} 10622 10623 // Spill to obtain 24-bit precision 10624 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10625 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10626 match(Set dst (DivF src1 src2)); 10627 10628 format %{ "FDIV $dst,$src1,$src2" %} 10629 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10630 ins_encode( Push_Reg_FPR(src1), 10631 OpcReg_FPR(src2), 10632 Pop_Mem_FPR(dst) ); 10633 ins_pipe( fpu_mem_reg_reg ); 10634 %} 10635 // 10636 // This instruction does not round to 24-bits 10637 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10638 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10639 match(Set dst (DivF dst src)); 10640 10641 format %{ "FDIV $dst,$src" %} 10642 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10643 ins_encode( Push_Reg_FPR(src), 10644 OpcP, RegOpc(dst) ); 10645 ins_pipe( fpu_reg_reg ); 10646 %} 10647 10648 10649 // Spill to obtain 24-bit precision 10650 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10651 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10652 match(Set dst (ModF src1 src2)); 10653 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10654 10655 format %{ "FMOD $dst,$src1,$src2" %} 10656 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10657 emitModDPR(), 10658 Push_Result_Mod_DPR(src2), 10659 Pop_Mem_FPR(dst)); 10660 ins_pipe( pipe_slow ); 10661 %} 10662 // 10663 // This instruction does not round to 24-bits 10664 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10665 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10666 match(Set dst (ModF dst src)); 10667 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10668 10669 format %{ "FMOD $dst,$src" %} 10670 ins_encode(Push_Reg_Mod_DPR(dst, src), 10671 emitModDPR(), 10672 Push_Result_Mod_DPR(src), 10673 Pop_Reg_FPR(dst)); 10674 ins_pipe( pipe_slow ); 10675 %} 10676 10677 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10678 predicate(UseSSE>=1); 10679 match(Set dst (ModF src0 src1)); 10680 effect(KILL rax, KILL cr); 10681 format %{ "SUB ESP,4\t # FMOD\n" 10682 "\tMOVSS [ESP+0],$src1\n" 10683 "\tFLD_S [ESP+0]\n" 10684 "\tMOVSS [ESP+0],$src0\n" 10685 "\tFLD_S [ESP+0]\n" 10686 "loop:\tFPREM\n" 10687 "\tFWAIT\n" 10688 "\tFNSTSW AX\n" 10689 "\tSAHF\n" 10690 "\tJP loop\n" 10691 "\tFSTP_S [ESP+0]\n" 10692 "\tMOVSS $dst,[ESP+0]\n" 10693 "\tADD ESP,4\n" 10694 "\tFSTP ST0\t # Restore FPU Stack" 10695 %} 10696 ins_cost(250); 10697 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10698 ins_pipe( pipe_slow ); 10699 %} 10700 10701 10702 //----------Arithmetic Conversion Instructions--------------------------------- 10703 // The conversions operations are all Alpha sorted. Please keep it that way! 10704 10705 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10706 predicate(UseSSE==0); 10707 match(Set dst (RoundFloat src)); 10708 ins_cost(125); 10709 format %{ "FST_S $dst,$src\t# F-round" %} 10710 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10711 ins_pipe( fpu_mem_reg ); 10712 %} 10713 10714 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10715 predicate(UseSSE<=1); 10716 match(Set dst (RoundDouble src)); 10717 ins_cost(125); 10718 format %{ "FST_D $dst,$src\t# D-round" %} 10719 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10720 ins_pipe( fpu_mem_reg ); 10721 %} 10722 10723 // Force rounding to 24-bit precision and 6-bit exponent 10724 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10725 predicate(UseSSE==0); 10726 match(Set dst (ConvD2F src)); 10727 format %{ "FST_S $dst,$src\t# F-round" %} 10728 expand %{ 10729 roundFloat_mem_reg(dst,src); 10730 %} 10731 %} 10732 10733 // Force rounding to 24-bit precision and 6-bit exponent 10734 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10735 predicate(UseSSE==1); 10736 match(Set dst (ConvD2F src)); 10737 effect( KILL cr ); 10738 format %{ "SUB ESP,4\n\t" 10739 "FST_S [ESP],$src\t# F-round\n\t" 10740 "MOVSS $dst,[ESP]\n\t" 10741 "ADD ESP,4" %} 10742 ins_encode %{ 10743 __ subptr(rsp, 4); 10744 if ($src$$reg != FPR1L_enc) { 10745 __ fld_s($src$$reg-1); 10746 __ fstp_s(Address(rsp, 0)); 10747 } else { 10748 __ fst_s(Address(rsp, 0)); 10749 } 10750 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10751 __ addptr(rsp, 4); 10752 %} 10753 ins_pipe( pipe_slow ); 10754 %} 10755 10756 // Force rounding double precision to single precision 10757 instruct convD2F_reg(regF dst, regD src) %{ 10758 predicate(UseSSE>=2); 10759 match(Set dst (ConvD2F src)); 10760 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10761 ins_encode %{ 10762 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10763 %} 10764 ins_pipe( pipe_slow ); 10765 %} 10766 10767 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10768 predicate(UseSSE==0); 10769 match(Set dst (ConvF2D src)); 10770 format %{ "FST_S $dst,$src\t# D-round" %} 10771 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10772 ins_pipe( fpu_reg_reg ); 10773 %} 10774 10775 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10776 predicate(UseSSE==1); 10777 match(Set dst (ConvF2D src)); 10778 format %{ "FST_D $dst,$src\t# D-round" %} 10779 expand %{ 10780 roundDouble_mem_reg(dst,src); 10781 %} 10782 %} 10783 10784 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10785 predicate(UseSSE==1); 10786 match(Set dst (ConvF2D src)); 10787 effect( KILL cr ); 10788 format %{ "SUB ESP,4\n\t" 10789 "MOVSS [ESP] $src\n\t" 10790 "FLD_S [ESP]\n\t" 10791 "ADD ESP,4\n\t" 10792 "FSTP $dst\t# D-round" %} 10793 ins_encode %{ 10794 __ subptr(rsp, 4); 10795 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10796 __ fld_s(Address(rsp, 0)); 10797 __ addptr(rsp, 4); 10798 __ fstp_d($dst$$reg); 10799 %} 10800 ins_pipe( pipe_slow ); 10801 %} 10802 10803 instruct convF2D_reg(regD dst, regF src) %{ 10804 predicate(UseSSE>=2); 10805 match(Set dst (ConvF2D src)); 10806 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10807 ins_encode %{ 10808 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10809 %} 10810 ins_pipe( pipe_slow ); 10811 %} 10812 10813 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10814 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10815 predicate(UseSSE<=1); 10816 match(Set dst (ConvD2I src)); 10817 effect( KILL tmp, KILL cr ); 10818 format %{ "FLD $src\t# Convert double to int \n\t" 10819 "FLDCW trunc mode\n\t" 10820 "SUB ESP,4\n\t" 10821 "FISTp [ESP + #0]\n\t" 10822 "FLDCW std/24-bit mode\n\t" 10823 "POP EAX\n\t" 10824 "CMP EAX,0x80000000\n\t" 10825 "JNE,s fast\n\t" 10826 "FLD_D $src\n\t" 10827 "CALL d2i_wrapper\n" 10828 "fast:" %} 10829 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10830 ins_pipe( pipe_slow ); 10831 %} 10832 10833 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10834 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10835 predicate(UseSSE>=2); 10836 match(Set dst (ConvD2I src)); 10837 effect( KILL tmp, KILL cr ); 10838 format %{ "CVTTSD2SI $dst, $src\n\t" 10839 "CMP $dst,0x80000000\n\t" 10840 "JNE,s fast\n\t" 10841 "SUB ESP, 8\n\t" 10842 "MOVSD [ESP], $src\n\t" 10843 "FLD_D [ESP]\n\t" 10844 "ADD ESP, 8\n\t" 10845 "CALL d2i_wrapper\n" 10846 "fast:" %} 10847 ins_encode %{ 10848 Label fast; 10849 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10850 __ cmpl($dst$$Register, 0x80000000); 10851 __ jccb(Assembler::notEqual, fast); 10852 __ subptr(rsp, 8); 10853 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10854 __ fld_d(Address(rsp, 0)); 10855 __ addptr(rsp, 8); 10856 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10857 __ bind(fast); 10858 %} 10859 ins_pipe( pipe_slow ); 10860 %} 10861 10862 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10863 predicate(UseSSE<=1); 10864 match(Set dst (ConvD2L src)); 10865 effect( KILL cr ); 10866 format %{ "FLD $src\t# Convert double to long\n\t" 10867 "FLDCW trunc mode\n\t" 10868 "SUB ESP,8\n\t" 10869 "FISTp [ESP + #0]\n\t" 10870 "FLDCW std/24-bit mode\n\t" 10871 "POP EAX\n\t" 10872 "POP EDX\n\t" 10873 "CMP EDX,0x80000000\n\t" 10874 "JNE,s fast\n\t" 10875 "TEST EAX,EAX\n\t" 10876 "JNE,s fast\n\t" 10877 "FLD $src\n\t" 10878 "CALL d2l_wrapper\n" 10879 "fast:" %} 10880 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10881 ins_pipe( pipe_slow ); 10882 %} 10883 10884 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10885 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10886 predicate (UseSSE>=2); 10887 match(Set dst (ConvD2L src)); 10888 effect( KILL cr ); 10889 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10890 "MOVSD [ESP],$src\n\t" 10891 "FLD_D [ESP]\n\t" 10892 "FLDCW trunc mode\n\t" 10893 "FISTp [ESP + #0]\n\t" 10894 "FLDCW std/24-bit mode\n\t" 10895 "POP EAX\n\t" 10896 "POP EDX\n\t" 10897 "CMP EDX,0x80000000\n\t" 10898 "JNE,s fast\n\t" 10899 "TEST EAX,EAX\n\t" 10900 "JNE,s fast\n\t" 10901 "SUB ESP,8\n\t" 10902 "MOVSD [ESP],$src\n\t" 10903 "FLD_D [ESP]\n\t" 10904 "ADD ESP,8\n\t" 10905 "CALL d2l_wrapper\n" 10906 "fast:" %} 10907 ins_encode %{ 10908 Label fast; 10909 __ subptr(rsp, 8); 10910 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10911 __ fld_d(Address(rsp, 0)); 10912 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10913 __ fistp_d(Address(rsp, 0)); 10914 // Restore the rounding mode, mask the exception 10915 if (Compile::current()->in_24_bit_fp_mode()) { 10916 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10917 } else { 10918 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10919 } 10920 // Load the converted long, adjust CPU stack 10921 __ pop(rax); 10922 __ pop(rdx); 10923 __ cmpl(rdx, 0x80000000); 10924 __ jccb(Assembler::notEqual, fast); 10925 __ testl(rax, rax); 10926 __ jccb(Assembler::notEqual, fast); 10927 __ subptr(rsp, 8); 10928 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10929 __ fld_d(Address(rsp, 0)); 10930 __ addptr(rsp, 8); 10931 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10932 __ bind(fast); 10933 %} 10934 ins_pipe( pipe_slow ); 10935 %} 10936 10937 // Convert a double to an int. Java semantics require we do complex 10938 // manglations in the corner cases. So we set the rounding mode to 10939 // 'zero', store the darned double down as an int, and reset the 10940 // rounding mode to 'nearest'. The hardware stores a flag value down 10941 // if we would overflow or converted a NAN; we check for this and 10942 // and go the slow path if needed. 10943 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10944 predicate(UseSSE==0); 10945 match(Set dst (ConvF2I src)); 10946 effect( KILL tmp, KILL cr ); 10947 format %{ "FLD $src\t# Convert float to int \n\t" 10948 "FLDCW trunc mode\n\t" 10949 "SUB ESP,4\n\t" 10950 "FISTp [ESP + #0]\n\t" 10951 "FLDCW std/24-bit mode\n\t" 10952 "POP EAX\n\t" 10953 "CMP EAX,0x80000000\n\t" 10954 "JNE,s fast\n\t" 10955 "FLD $src\n\t" 10956 "CALL d2i_wrapper\n" 10957 "fast:" %} 10958 // DPR2I_encoding works for FPR2I 10959 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10960 ins_pipe( pipe_slow ); 10961 %} 10962 10963 // Convert a float in xmm to an int reg. 10964 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10965 predicate(UseSSE>=1); 10966 match(Set dst (ConvF2I src)); 10967 effect( KILL tmp, KILL cr ); 10968 format %{ "CVTTSS2SI $dst, $src\n\t" 10969 "CMP $dst,0x80000000\n\t" 10970 "JNE,s fast\n\t" 10971 "SUB ESP, 4\n\t" 10972 "MOVSS [ESP], $src\n\t" 10973 "FLD [ESP]\n\t" 10974 "ADD ESP, 4\n\t" 10975 "CALL d2i_wrapper\n" 10976 "fast:" %} 10977 ins_encode %{ 10978 Label fast; 10979 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10980 __ cmpl($dst$$Register, 0x80000000); 10981 __ jccb(Assembler::notEqual, fast); 10982 __ subptr(rsp, 4); 10983 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10984 __ fld_s(Address(rsp, 0)); 10985 __ addptr(rsp, 4); 10986 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10987 __ bind(fast); 10988 %} 10989 ins_pipe( pipe_slow ); 10990 %} 10991 10992 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10993 predicate(UseSSE==0); 10994 match(Set dst (ConvF2L src)); 10995 effect( KILL cr ); 10996 format %{ "FLD $src\t# Convert float to long\n\t" 10997 "FLDCW trunc mode\n\t" 10998 "SUB ESP,8\n\t" 10999 "FISTp [ESP + #0]\n\t" 11000 "FLDCW std/24-bit mode\n\t" 11001 "POP EAX\n\t" 11002 "POP EDX\n\t" 11003 "CMP EDX,0x80000000\n\t" 11004 "JNE,s fast\n\t" 11005 "TEST EAX,EAX\n\t" 11006 "JNE,s fast\n\t" 11007 "FLD $src\n\t" 11008 "CALL d2l_wrapper\n" 11009 "fast:" %} 11010 // DPR2L_encoding works for FPR2L 11011 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 11012 ins_pipe( pipe_slow ); 11013 %} 11014 11015 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11016 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11017 predicate (UseSSE>=1); 11018 match(Set dst (ConvF2L src)); 11019 effect( KILL cr ); 11020 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11021 "MOVSS [ESP],$src\n\t" 11022 "FLD_S [ESP]\n\t" 11023 "FLDCW trunc mode\n\t" 11024 "FISTp [ESP + #0]\n\t" 11025 "FLDCW std/24-bit mode\n\t" 11026 "POP EAX\n\t" 11027 "POP EDX\n\t" 11028 "CMP EDX,0x80000000\n\t" 11029 "JNE,s fast\n\t" 11030 "TEST EAX,EAX\n\t" 11031 "JNE,s fast\n\t" 11032 "SUB ESP,4\t# Convert float to long\n\t" 11033 "MOVSS [ESP],$src\n\t" 11034 "FLD_S [ESP]\n\t" 11035 "ADD ESP,4\n\t" 11036 "CALL d2l_wrapper\n" 11037 "fast:" %} 11038 ins_encode %{ 11039 Label fast; 11040 __ subptr(rsp, 8); 11041 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11042 __ fld_s(Address(rsp, 0)); 11043 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 11044 __ fistp_d(Address(rsp, 0)); 11045 // Restore the rounding mode, mask the exception 11046 if (Compile::current()->in_24_bit_fp_mode()) { 11047 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 11048 } else { 11049 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11050 } 11051 // Load the converted long, adjust CPU stack 11052 __ pop(rax); 11053 __ pop(rdx); 11054 __ cmpl(rdx, 0x80000000); 11055 __ jccb(Assembler::notEqual, fast); 11056 __ testl(rax, rax); 11057 __ jccb(Assembler::notEqual, fast); 11058 __ subptr(rsp, 4); 11059 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11060 __ fld_s(Address(rsp, 0)); 11061 __ addptr(rsp, 4); 11062 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11063 __ bind(fast); 11064 %} 11065 ins_pipe( pipe_slow ); 11066 %} 11067 11068 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11069 predicate( UseSSE<=1 ); 11070 match(Set dst (ConvI2D src)); 11071 format %{ "FILD $src\n\t" 11072 "FSTP $dst" %} 11073 opcode(0xDB, 0x0); /* DB /0 */ 11074 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11075 ins_pipe( fpu_reg_mem ); 11076 %} 11077 11078 instruct convI2D_reg(regD dst, rRegI src) %{ 11079 predicate( UseSSE>=2 && !UseXmmI2D ); 11080 match(Set dst (ConvI2D src)); 11081 format %{ "CVTSI2SD $dst,$src" %} 11082 ins_encode %{ 11083 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11084 %} 11085 ins_pipe( pipe_slow ); 11086 %} 11087 11088 instruct convI2D_mem(regD dst, memory mem) %{ 11089 predicate( UseSSE>=2 ); 11090 match(Set dst (ConvI2D (LoadI mem))); 11091 format %{ "CVTSI2SD $dst,$mem" %} 11092 ins_encode %{ 11093 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11094 %} 11095 ins_pipe( pipe_slow ); 11096 %} 11097 11098 instruct convXI2D_reg(regD dst, rRegI src) 11099 %{ 11100 predicate( UseSSE>=2 && UseXmmI2D ); 11101 match(Set dst (ConvI2D src)); 11102 11103 format %{ "MOVD $dst,$src\n\t" 11104 "CVTDQ2PD $dst,$dst\t# i2d" %} 11105 ins_encode %{ 11106 __ movdl($dst$$XMMRegister, $src$$Register); 11107 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11108 %} 11109 ins_pipe(pipe_slow); // XXX 11110 %} 11111 11112 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11113 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11114 match(Set dst (ConvI2D (LoadI mem))); 11115 format %{ "FILD $mem\n\t" 11116 "FSTP $dst" %} 11117 opcode(0xDB); /* DB /0 */ 11118 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11119 Pop_Reg_DPR(dst)); 11120 ins_pipe( fpu_reg_mem ); 11121 %} 11122 11123 // Convert a byte to a float; no rounding step needed. 11124 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11125 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11126 match(Set dst (ConvI2F src)); 11127 format %{ "FILD $src\n\t" 11128 "FSTP $dst" %} 11129 11130 opcode(0xDB, 0x0); /* DB /0 */ 11131 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11132 ins_pipe( fpu_reg_mem ); 11133 %} 11134 11135 // In 24-bit mode, force exponent rounding by storing back out 11136 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11137 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11138 match(Set dst (ConvI2F src)); 11139 ins_cost(200); 11140 format %{ "FILD $src\n\t" 11141 "FSTP_S $dst" %} 11142 opcode(0xDB, 0x0); /* DB /0 */ 11143 ins_encode( Push_Mem_I(src), 11144 Pop_Mem_FPR(dst)); 11145 ins_pipe( fpu_mem_mem ); 11146 %} 11147 11148 // In 24-bit mode, force exponent rounding by storing back out 11149 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11150 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11151 match(Set dst (ConvI2F (LoadI mem))); 11152 ins_cost(200); 11153 format %{ "FILD $mem\n\t" 11154 "FSTP_S $dst" %} 11155 opcode(0xDB); /* DB /0 */ 11156 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11157 Pop_Mem_FPR(dst)); 11158 ins_pipe( fpu_mem_mem ); 11159 %} 11160 11161 // This instruction does not round to 24-bits 11162 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11163 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11164 match(Set dst (ConvI2F src)); 11165 format %{ "FILD $src\n\t" 11166 "FSTP $dst" %} 11167 opcode(0xDB, 0x0); /* DB /0 */ 11168 ins_encode( Push_Mem_I(src), 11169 Pop_Reg_FPR(dst)); 11170 ins_pipe( fpu_reg_mem ); 11171 %} 11172 11173 // This instruction does not round to 24-bits 11174 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11175 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11176 match(Set dst (ConvI2F (LoadI mem))); 11177 format %{ "FILD $mem\n\t" 11178 "FSTP $dst" %} 11179 opcode(0xDB); /* DB /0 */ 11180 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11181 Pop_Reg_FPR(dst)); 11182 ins_pipe( fpu_reg_mem ); 11183 %} 11184 11185 // Convert an int to a float in xmm; no rounding step needed. 11186 instruct convI2F_reg(regF dst, rRegI src) %{ 11187 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11188 match(Set dst (ConvI2F src)); 11189 format %{ "CVTSI2SS $dst, $src" %} 11190 ins_encode %{ 11191 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11192 %} 11193 ins_pipe( pipe_slow ); 11194 %} 11195 11196 instruct convXI2F_reg(regF dst, rRegI src) 11197 %{ 11198 predicate( UseSSE>=2 && UseXmmI2F ); 11199 match(Set dst (ConvI2F src)); 11200 11201 format %{ "MOVD $dst,$src\n\t" 11202 "CVTDQ2PS $dst,$dst\t# i2f" %} 11203 ins_encode %{ 11204 __ movdl($dst$$XMMRegister, $src$$Register); 11205 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11206 %} 11207 ins_pipe(pipe_slow); // XXX 11208 %} 11209 11210 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11211 match(Set dst (ConvI2L src)); 11212 effect(KILL cr); 11213 ins_cost(375); 11214 format %{ "MOV $dst.lo,$src\n\t" 11215 "MOV $dst.hi,$src\n\t" 11216 "SAR $dst.hi,31" %} 11217 ins_encode(convert_int_long(dst,src)); 11218 ins_pipe( ialu_reg_reg_long ); 11219 %} 11220 11221 // Zero-extend convert int to long 11222 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11223 match(Set dst (AndL (ConvI2L src) mask) ); 11224 effect( KILL flags ); 11225 ins_cost(250); 11226 format %{ "MOV $dst.lo,$src\n\t" 11227 "XOR $dst.hi,$dst.hi" %} 11228 opcode(0x33); // XOR 11229 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11230 ins_pipe( ialu_reg_reg_long ); 11231 %} 11232 11233 // Zero-extend long 11234 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11235 match(Set dst (AndL src mask) ); 11236 effect( KILL flags ); 11237 ins_cost(250); 11238 format %{ "MOV $dst.lo,$src.lo\n\t" 11239 "XOR $dst.hi,$dst.hi\n\t" %} 11240 opcode(0x33); // XOR 11241 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11242 ins_pipe( ialu_reg_reg_long ); 11243 %} 11244 11245 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11246 predicate (UseSSE<=1); 11247 match(Set dst (ConvL2D src)); 11248 effect( KILL cr ); 11249 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11250 "PUSH $src.lo\n\t" 11251 "FILD ST,[ESP + #0]\n\t" 11252 "ADD ESP,8\n\t" 11253 "FSTP_D $dst\t# D-round" %} 11254 opcode(0xDF, 0x5); /* DF /5 */ 11255 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11256 ins_pipe( pipe_slow ); 11257 %} 11258 11259 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11260 predicate (UseSSE>=2); 11261 match(Set dst (ConvL2D src)); 11262 effect( KILL cr ); 11263 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11264 "PUSH $src.lo\n\t" 11265 "FILD_D [ESP]\n\t" 11266 "FSTP_D [ESP]\n\t" 11267 "MOVSD $dst,[ESP]\n\t" 11268 "ADD ESP,8" %} 11269 opcode(0xDF, 0x5); /* DF /5 */ 11270 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11271 ins_pipe( pipe_slow ); 11272 %} 11273 11274 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11275 predicate (UseSSE>=1); 11276 match(Set dst (ConvL2F src)); 11277 effect( KILL cr ); 11278 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11279 "PUSH $src.lo\n\t" 11280 "FILD_D [ESP]\n\t" 11281 "FSTP_S [ESP]\n\t" 11282 "MOVSS $dst,[ESP]\n\t" 11283 "ADD ESP,8" %} 11284 opcode(0xDF, 0x5); /* DF /5 */ 11285 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11286 ins_pipe( pipe_slow ); 11287 %} 11288 11289 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11290 match(Set dst (ConvL2F src)); 11291 effect( KILL cr ); 11292 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11293 "PUSH $src.lo\n\t" 11294 "FILD ST,[ESP + #0]\n\t" 11295 "ADD ESP,8\n\t" 11296 "FSTP_S $dst\t# F-round" %} 11297 opcode(0xDF, 0x5); /* DF /5 */ 11298 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11299 ins_pipe( pipe_slow ); 11300 %} 11301 11302 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11303 match(Set dst (ConvL2I src)); 11304 effect( DEF dst, USE src ); 11305 format %{ "MOV $dst,$src.lo" %} 11306 ins_encode(enc_CopyL_Lo(dst,src)); 11307 ins_pipe( ialu_reg_reg ); 11308 %} 11309 11310 11311 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11312 match(Set dst (MoveF2I src)); 11313 effect( DEF dst, USE src ); 11314 ins_cost(100); 11315 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11316 ins_encode %{ 11317 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11318 %} 11319 ins_pipe( ialu_reg_mem ); 11320 %} 11321 11322 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11323 predicate(UseSSE==0); 11324 match(Set dst (MoveF2I src)); 11325 effect( DEF dst, USE src ); 11326 11327 ins_cost(125); 11328 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11329 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11330 ins_pipe( fpu_mem_reg ); 11331 %} 11332 11333 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11334 predicate(UseSSE>=1); 11335 match(Set dst (MoveF2I src)); 11336 effect( DEF dst, USE src ); 11337 11338 ins_cost(95); 11339 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11340 ins_encode %{ 11341 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11342 %} 11343 ins_pipe( pipe_slow ); 11344 %} 11345 11346 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11347 predicate(UseSSE>=2); 11348 match(Set dst (MoveF2I src)); 11349 effect( DEF dst, USE src ); 11350 ins_cost(85); 11351 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11352 ins_encode %{ 11353 __ movdl($dst$$Register, $src$$XMMRegister); 11354 %} 11355 ins_pipe( pipe_slow ); 11356 %} 11357 11358 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11359 match(Set dst (MoveI2F src)); 11360 effect( DEF dst, USE src ); 11361 11362 ins_cost(100); 11363 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11364 ins_encode %{ 11365 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11366 %} 11367 ins_pipe( ialu_mem_reg ); 11368 %} 11369 11370 11371 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11372 predicate(UseSSE==0); 11373 match(Set dst (MoveI2F src)); 11374 effect(DEF dst, USE src); 11375 11376 ins_cost(125); 11377 format %{ "FLD_S $src\n\t" 11378 "FSTP $dst\t# MoveI2F_stack_reg" %} 11379 opcode(0xD9); /* D9 /0, FLD m32real */ 11380 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11381 Pop_Reg_FPR(dst) ); 11382 ins_pipe( fpu_reg_mem ); 11383 %} 11384 11385 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11386 predicate(UseSSE>=1); 11387 match(Set dst (MoveI2F src)); 11388 effect( DEF dst, USE src ); 11389 11390 ins_cost(95); 11391 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11392 ins_encode %{ 11393 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11394 %} 11395 ins_pipe( pipe_slow ); 11396 %} 11397 11398 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11399 predicate(UseSSE>=2); 11400 match(Set dst (MoveI2F src)); 11401 effect( DEF dst, USE src ); 11402 11403 ins_cost(85); 11404 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11405 ins_encode %{ 11406 __ movdl($dst$$XMMRegister, $src$$Register); 11407 %} 11408 ins_pipe( pipe_slow ); 11409 %} 11410 11411 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11412 match(Set dst (MoveD2L src)); 11413 effect(DEF dst, USE src); 11414 11415 ins_cost(250); 11416 format %{ "MOV $dst.lo,$src\n\t" 11417 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11418 opcode(0x8B, 0x8B); 11419 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11420 ins_pipe( ialu_mem_long_reg ); 11421 %} 11422 11423 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11424 predicate(UseSSE<=1); 11425 match(Set dst (MoveD2L src)); 11426 effect(DEF dst, USE src); 11427 11428 ins_cost(125); 11429 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11430 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11431 ins_pipe( fpu_mem_reg ); 11432 %} 11433 11434 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11435 predicate(UseSSE>=2); 11436 match(Set dst (MoveD2L src)); 11437 effect(DEF dst, USE src); 11438 ins_cost(95); 11439 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11440 ins_encode %{ 11441 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11442 %} 11443 ins_pipe( pipe_slow ); 11444 %} 11445 11446 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11447 predicate(UseSSE>=2); 11448 match(Set dst (MoveD2L src)); 11449 effect(DEF dst, USE src, TEMP tmp); 11450 ins_cost(85); 11451 format %{ "MOVD $dst.lo,$src\n\t" 11452 "PSHUFLW $tmp,$src,0x4E\n\t" 11453 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11454 ins_encode %{ 11455 __ movdl($dst$$Register, $src$$XMMRegister); 11456 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11457 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11458 %} 11459 ins_pipe( pipe_slow ); 11460 %} 11461 11462 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11463 match(Set dst (MoveL2D src)); 11464 effect(DEF dst, USE src); 11465 11466 ins_cost(200); 11467 format %{ "MOV $dst,$src.lo\n\t" 11468 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11469 opcode(0x89, 0x89); 11470 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11471 ins_pipe( ialu_mem_long_reg ); 11472 %} 11473 11474 11475 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11476 predicate(UseSSE<=1); 11477 match(Set dst (MoveL2D src)); 11478 effect(DEF dst, USE src); 11479 ins_cost(125); 11480 11481 format %{ "FLD_D $src\n\t" 11482 "FSTP $dst\t# MoveL2D_stack_reg" %} 11483 opcode(0xDD); /* DD /0, FLD m64real */ 11484 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11485 Pop_Reg_DPR(dst) ); 11486 ins_pipe( fpu_reg_mem ); 11487 %} 11488 11489 11490 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11491 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11492 match(Set dst (MoveL2D src)); 11493 effect(DEF dst, USE src); 11494 11495 ins_cost(95); 11496 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11497 ins_encode %{ 11498 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11499 %} 11500 ins_pipe( pipe_slow ); 11501 %} 11502 11503 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11504 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11505 match(Set dst (MoveL2D src)); 11506 effect(DEF dst, USE src); 11507 11508 ins_cost(95); 11509 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11510 ins_encode %{ 11511 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11512 %} 11513 ins_pipe( pipe_slow ); 11514 %} 11515 11516 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11517 predicate(UseSSE>=2); 11518 match(Set dst (MoveL2D src)); 11519 effect(TEMP dst, USE src, TEMP tmp); 11520 ins_cost(85); 11521 format %{ "MOVD $dst,$src.lo\n\t" 11522 "MOVD $tmp,$src.hi\n\t" 11523 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11524 ins_encode %{ 11525 __ movdl($dst$$XMMRegister, $src$$Register); 11526 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11527 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11528 %} 11529 ins_pipe( pipe_slow ); 11530 %} 11531 11532 11533 // ======================================================================= 11534 // fast clearing of an array 11535 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11536 predicate(!UseFastStosb); 11537 match(Set dummy (ClearArray cnt base)); 11538 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11539 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11540 "SHL ECX,1\t# Convert doublewords to words\n\t" 11541 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11542 ins_encode %{ 11543 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11544 %} 11545 ins_pipe( pipe_slow ); 11546 %} 11547 11548 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11549 predicate(UseFastStosb); 11550 match(Set dummy (ClearArray cnt base)); 11551 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11552 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11553 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11554 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11555 ins_encode %{ 11556 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11557 %} 11558 ins_pipe( pipe_slow ); 11559 %} 11560 11561 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11562 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11563 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11564 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11565 11566 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11567 ins_encode %{ 11568 __ string_compare($str1$$Register, $str2$$Register, 11569 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11570 $tmp1$$XMMRegister); 11571 %} 11572 ins_pipe( pipe_slow ); 11573 %} 11574 11575 // fast string equals 11576 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11577 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11578 match(Set result (StrEquals (Binary str1 str2) cnt)); 11579 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11580 11581 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11582 ins_encode %{ 11583 __ char_arrays_equals(false, $str1$$Register, $str2$$Register, 11584 $cnt$$Register, $result$$Register, $tmp3$$Register, 11585 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11586 %} 11587 ins_pipe( pipe_slow ); 11588 %} 11589 11590 // fast search of substring with known size. 11591 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11592 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11593 predicate(UseSSE42Intrinsics); 11594 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11595 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11596 11597 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11598 ins_encode %{ 11599 int icnt2 = (int)$int_cnt2$$constant; 11600 if (icnt2 >= 8) { 11601 // IndexOf for constant substrings with size >= 8 elements 11602 // which don't need to be loaded through stack. 11603 __ string_indexofC8($str1$$Register, $str2$$Register, 11604 $cnt1$$Register, $cnt2$$Register, 11605 icnt2, $result$$Register, 11606 $vec$$XMMRegister, $tmp$$Register); 11607 } else { 11608 // Small strings are loaded through stack if they cross page boundary. 11609 __ string_indexof($str1$$Register, $str2$$Register, 11610 $cnt1$$Register, $cnt2$$Register, 11611 icnt2, $result$$Register, 11612 $vec$$XMMRegister, $tmp$$Register); 11613 } 11614 %} 11615 ins_pipe( pipe_slow ); 11616 %} 11617 11618 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11619 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11620 predicate(UseSSE42Intrinsics); 11621 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11622 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11623 11624 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11625 ins_encode %{ 11626 __ string_indexof($str1$$Register, $str2$$Register, 11627 $cnt1$$Register, $cnt2$$Register, 11628 (-1), $result$$Register, 11629 $vec$$XMMRegister, $tmp$$Register); 11630 %} 11631 ins_pipe( pipe_slow ); 11632 %} 11633 11634 // fast array equals 11635 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11636 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11637 %{ 11638 match(Set result (AryEq ary1 ary2)); 11639 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11640 //ins_cost(300); 11641 11642 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11643 ins_encode %{ 11644 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register, 11645 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11646 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11647 %} 11648 ins_pipe( pipe_slow ); 11649 %} 11650 11651 // encode char[] to byte[] in ISO_8859_1 11652 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11653 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11654 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11655 match(Set result (EncodeISOArray src (Binary dst len))); 11656 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11657 11658 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11659 ins_encode %{ 11660 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11661 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11662 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11663 %} 11664 ins_pipe( pipe_slow ); 11665 %} 11666 11667 11668 //----------Control Flow Instructions------------------------------------------ 11669 // Signed compare Instructions 11670 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11671 match(Set cr (CmpI op1 op2)); 11672 effect( DEF cr, USE op1, USE op2 ); 11673 format %{ "CMP $op1,$op2" %} 11674 opcode(0x3B); /* Opcode 3B /r */ 11675 ins_encode( OpcP, RegReg( op1, op2) ); 11676 ins_pipe( ialu_cr_reg_reg ); 11677 %} 11678 11679 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11680 match(Set cr (CmpI op1 op2)); 11681 effect( DEF cr, USE op1 ); 11682 format %{ "CMP $op1,$op2" %} 11683 opcode(0x81,0x07); /* Opcode 81 /7 */ 11684 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11685 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11686 ins_pipe( ialu_cr_reg_imm ); 11687 %} 11688 11689 // Cisc-spilled version of cmpI_eReg 11690 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11691 match(Set cr (CmpI op1 (LoadI op2))); 11692 11693 format %{ "CMP $op1,$op2" %} 11694 ins_cost(500); 11695 opcode(0x3B); /* Opcode 3B /r */ 11696 ins_encode( OpcP, RegMem( op1, op2) ); 11697 ins_pipe( ialu_cr_reg_mem ); 11698 %} 11699 11700 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11701 match(Set cr (CmpI src zero)); 11702 effect( DEF cr, USE src ); 11703 11704 format %{ "TEST $src,$src" %} 11705 opcode(0x85); 11706 ins_encode( OpcP, RegReg( src, src ) ); 11707 ins_pipe( ialu_cr_reg_imm ); 11708 %} 11709 11710 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11711 match(Set cr (CmpI (AndI src con) zero)); 11712 11713 format %{ "TEST $src,$con" %} 11714 opcode(0xF7,0x00); 11715 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11716 ins_pipe( ialu_cr_reg_imm ); 11717 %} 11718 11719 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11720 match(Set cr (CmpI (AndI src mem) zero)); 11721 11722 format %{ "TEST $src,$mem" %} 11723 opcode(0x85); 11724 ins_encode( OpcP, RegMem( src, mem ) ); 11725 ins_pipe( ialu_cr_reg_mem ); 11726 %} 11727 11728 // Unsigned compare Instructions; really, same as signed except they 11729 // produce an eFlagsRegU instead of eFlagsReg. 11730 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11731 match(Set cr (CmpU op1 op2)); 11732 11733 format %{ "CMPu $op1,$op2" %} 11734 opcode(0x3B); /* Opcode 3B /r */ 11735 ins_encode( OpcP, RegReg( op1, op2) ); 11736 ins_pipe( ialu_cr_reg_reg ); 11737 %} 11738 11739 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11740 match(Set cr (CmpU op1 op2)); 11741 11742 format %{ "CMPu $op1,$op2" %} 11743 opcode(0x81,0x07); /* Opcode 81 /7 */ 11744 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11745 ins_pipe( ialu_cr_reg_imm ); 11746 %} 11747 11748 // // Cisc-spilled version of cmpU_eReg 11749 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11750 match(Set cr (CmpU op1 (LoadI op2))); 11751 11752 format %{ "CMPu $op1,$op2" %} 11753 ins_cost(500); 11754 opcode(0x3B); /* Opcode 3B /r */ 11755 ins_encode( OpcP, RegMem( op1, op2) ); 11756 ins_pipe( ialu_cr_reg_mem ); 11757 %} 11758 11759 // // Cisc-spilled version of cmpU_eReg 11760 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11761 // match(Set cr (CmpU (LoadI op1) op2)); 11762 // 11763 // format %{ "CMPu $op1,$op2" %} 11764 // ins_cost(500); 11765 // opcode(0x39); /* Opcode 39 /r */ 11766 // ins_encode( OpcP, RegMem( op1, op2) ); 11767 //%} 11768 11769 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11770 match(Set cr (CmpU src zero)); 11771 11772 format %{ "TESTu $src,$src" %} 11773 opcode(0x85); 11774 ins_encode( OpcP, RegReg( src, src ) ); 11775 ins_pipe( ialu_cr_reg_imm ); 11776 %} 11777 11778 // Unsigned pointer compare Instructions 11779 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11780 match(Set cr (CmpP op1 op2)); 11781 11782 format %{ "CMPu $op1,$op2" %} 11783 opcode(0x3B); /* Opcode 3B /r */ 11784 ins_encode( OpcP, RegReg( op1, op2) ); 11785 ins_pipe( ialu_cr_reg_reg ); 11786 %} 11787 11788 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11789 match(Set cr (CmpP op1 op2)); 11790 11791 format %{ "CMPu $op1,$op2" %} 11792 opcode(0x81,0x07); /* Opcode 81 /7 */ 11793 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11794 ins_pipe( ialu_cr_reg_imm ); 11795 %} 11796 11797 // // Cisc-spilled version of cmpP_eReg 11798 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11799 match(Set cr (CmpP op1 (LoadP op2))); 11800 11801 format %{ "CMPu $op1,$op2" %} 11802 ins_cost(500); 11803 opcode(0x3B); /* Opcode 3B /r */ 11804 ins_encode( OpcP, RegMem( op1, op2) ); 11805 ins_pipe( ialu_cr_reg_mem ); 11806 %} 11807 11808 // // Cisc-spilled version of cmpP_eReg 11809 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11810 // match(Set cr (CmpP (LoadP op1) op2)); 11811 // 11812 // format %{ "CMPu $op1,$op2" %} 11813 // ins_cost(500); 11814 // opcode(0x39); /* Opcode 39 /r */ 11815 // ins_encode( OpcP, RegMem( op1, op2) ); 11816 //%} 11817 11818 // Compare raw pointer (used in out-of-heap check). 11819 // Only works because non-oop pointers must be raw pointers 11820 // and raw pointers have no anti-dependencies. 11821 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11822 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11823 match(Set cr (CmpP op1 (LoadP op2))); 11824 11825 format %{ "CMPu $op1,$op2" %} 11826 opcode(0x3B); /* Opcode 3B /r */ 11827 ins_encode( OpcP, RegMem( op1, op2) ); 11828 ins_pipe( ialu_cr_reg_mem ); 11829 %} 11830 11831 // 11832 // This will generate a signed flags result. This should be ok 11833 // since any compare to a zero should be eq/neq. 11834 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11835 match(Set cr (CmpP src zero)); 11836 11837 format %{ "TEST $src,$src" %} 11838 opcode(0x85); 11839 ins_encode( OpcP, RegReg( src, src ) ); 11840 ins_pipe( ialu_cr_reg_imm ); 11841 %} 11842 11843 // Cisc-spilled version of testP_reg 11844 // This will generate a signed flags result. This should be ok 11845 // since any compare to a zero should be eq/neq. 11846 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11847 match(Set cr (CmpP (LoadP op) zero)); 11848 11849 format %{ "TEST $op,0xFFFFFFFF" %} 11850 ins_cost(500); 11851 opcode(0xF7); /* Opcode F7 /0 */ 11852 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11853 ins_pipe( ialu_cr_reg_imm ); 11854 %} 11855 11856 // Yanked all unsigned pointer compare operations. 11857 // Pointer compares are done with CmpP which is already unsigned. 11858 11859 //----------Max and Min-------------------------------------------------------- 11860 // Min Instructions 11861 //// 11862 // *** Min and Max using the conditional move are slower than the 11863 // *** branch version on a Pentium III. 11864 // // Conditional move for min 11865 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11866 // effect( USE_DEF op2, USE op1, USE cr ); 11867 // format %{ "CMOVlt $op2,$op1\t! min" %} 11868 // opcode(0x4C,0x0F); 11869 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11870 // ins_pipe( pipe_cmov_reg ); 11871 //%} 11872 // 11873 //// Min Register with Register (P6 version) 11874 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11875 // predicate(VM_Version::supports_cmov() ); 11876 // match(Set op2 (MinI op1 op2)); 11877 // ins_cost(200); 11878 // expand %{ 11879 // eFlagsReg cr; 11880 // compI_eReg(cr,op1,op2); 11881 // cmovI_reg_lt(op2,op1,cr); 11882 // %} 11883 //%} 11884 11885 // Min Register with Register (generic version) 11886 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11887 match(Set dst (MinI dst src)); 11888 effect(KILL flags); 11889 ins_cost(300); 11890 11891 format %{ "MIN $dst,$src" %} 11892 opcode(0xCC); 11893 ins_encode( min_enc(dst,src) ); 11894 ins_pipe( pipe_slow ); 11895 %} 11896 11897 // Max Register with Register 11898 // *** Min and Max using the conditional move are slower than the 11899 // *** branch version on a Pentium III. 11900 // // Conditional move for max 11901 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11902 // effect( USE_DEF op2, USE op1, USE cr ); 11903 // format %{ "CMOVgt $op2,$op1\t! max" %} 11904 // opcode(0x4F,0x0F); 11905 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11906 // ins_pipe( pipe_cmov_reg ); 11907 //%} 11908 // 11909 // // Max Register with Register (P6 version) 11910 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11911 // predicate(VM_Version::supports_cmov() ); 11912 // match(Set op2 (MaxI op1 op2)); 11913 // ins_cost(200); 11914 // expand %{ 11915 // eFlagsReg cr; 11916 // compI_eReg(cr,op1,op2); 11917 // cmovI_reg_gt(op2,op1,cr); 11918 // %} 11919 //%} 11920 11921 // Max Register with Register (generic version) 11922 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11923 match(Set dst (MaxI dst src)); 11924 effect(KILL flags); 11925 ins_cost(300); 11926 11927 format %{ "MAX $dst,$src" %} 11928 opcode(0xCC); 11929 ins_encode( max_enc(dst,src) ); 11930 ins_pipe( pipe_slow ); 11931 %} 11932 11933 // ============================================================================ 11934 // Counted Loop limit node which represents exact final iterator value. 11935 // Note: the resulting value should fit into integer range since 11936 // counted loops have limit check on overflow. 11937 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11938 match(Set limit (LoopLimit (Binary init limit) stride)); 11939 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11940 ins_cost(300); 11941 11942 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11943 ins_encode %{ 11944 int strd = (int)$stride$$constant; 11945 assert(strd != 1 && strd != -1, "sanity"); 11946 int m1 = (strd > 0) ? 1 : -1; 11947 // Convert limit to long (EAX:EDX) 11948 __ cdql(); 11949 // Convert init to long (init:tmp) 11950 __ movl($tmp$$Register, $init$$Register); 11951 __ sarl($tmp$$Register, 31); 11952 // $limit - $init 11953 __ subl($limit$$Register, $init$$Register); 11954 __ sbbl($limit_hi$$Register, $tmp$$Register); 11955 // + ($stride - 1) 11956 if (strd > 0) { 11957 __ addl($limit$$Register, (strd - 1)); 11958 __ adcl($limit_hi$$Register, 0); 11959 __ movl($tmp$$Register, strd); 11960 } else { 11961 __ addl($limit$$Register, (strd + 1)); 11962 __ adcl($limit_hi$$Register, -1); 11963 __ lneg($limit_hi$$Register, $limit$$Register); 11964 __ movl($tmp$$Register, -strd); 11965 } 11966 // signed devision: (EAX:EDX) / pos_stride 11967 __ idivl($tmp$$Register); 11968 if (strd < 0) { 11969 // restore sign 11970 __ negl($tmp$$Register); 11971 } 11972 // (EAX) * stride 11973 __ mull($tmp$$Register); 11974 // + init (ignore upper bits) 11975 __ addl($limit$$Register, $init$$Register); 11976 %} 11977 ins_pipe( pipe_slow ); 11978 %} 11979 11980 // ============================================================================ 11981 // Branch Instructions 11982 // Jump Table 11983 instruct jumpXtnd(rRegI switch_val) %{ 11984 match(Jump switch_val); 11985 ins_cost(350); 11986 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 11987 ins_encode %{ 11988 // Jump to Address(table_base + switch_reg) 11989 Address index(noreg, $switch_val$$Register, Address::times_1); 11990 __ jump(ArrayAddress($constantaddress, index)); 11991 %} 11992 ins_pipe(pipe_jmp); 11993 %} 11994 11995 // Jump Direct - Label defines a relative address from JMP+1 11996 instruct jmpDir(label labl) %{ 11997 match(Goto); 11998 effect(USE labl); 11999 12000 ins_cost(300); 12001 format %{ "JMP $labl" %} 12002 size(5); 12003 ins_encode %{ 12004 Label* L = $labl$$label; 12005 __ jmp(*L, false); // Always long jump 12006 %} 12007 ins_pipe( pipe_jmp ); 12008 %} 12009 12010 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12011 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12012 match(If cop cr); 12013 effect(USE labl); 12014 12015 ins_cost(300); 12016 format %{ "J$cop $labl" %} 12017 size(6); 12018 ins_encode %{ 12019 Label* L = $labl$$label; 12020 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12021 %} 12022 ins_pipe( pipe_jcc ); 12023 %} 12024 12025 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12026 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12027 match(CountedLoopEnd cop cr); 12028 effect(USE labl); 12029 12030 ins_cost(300); 12031 format %{ "J$cop $labl\t# Loop end" %} 12032 size(6); 12033 ins_encode %{ 12034 Label* L = $labl$$label; 12035 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12036 %} 12037 ins_pipe( pipe_jcc ); 12038 %} 12039 12040 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12041 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12042 match(CountedLoopEnd cop cmp); 12043 effect(USE labl); 12044 12045 ins_cost(300); 12046 format %{ "J$cop,u $labl\t# Loop end" %} 12047 size(6); 12048 ins_encode %{ 12049 Label* L = $labl$$label; 12050 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12051 %} 12052 ins_pipe( pipe_jcc ); 12053 %} 12054 12055 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12056 match(CountedLoopEnd cop cmp); 12057 effect(USE labl); 12058 12059 ins_cost(200); 12060 format %{ "J$cop,u $labl\t# Loop end" %} 12061 size(6); 12062 ins_encode %{ 12063 Label* L = $labl$$label; 12064 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12065 %} 12066 ins_pipe( pipe_jcc ); 12067 %} 12068 12069 // Jump Direct Conditional - using unsigned comparison 12070 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12071 match(If cop cmp); 12072 effect(USE labl); 12073 12074 ins_cost(300); 12075 format %{ "J$cop,u $labl" %} 12076 size(6); 12077 ins_encode %{ 12078 Label* L = $labl$$label; 12079 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12080 %} 12081 ins_pipe(pipe_jcc); 12082 %} 12083 12084 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12085 match(If cop cmp); 12086 effect(USE labl); 12087 12088 ins_cost(200); 12089 format %{ "J$cop,u $labl" %} 12090 size(6); 12091 ins_encode %{ 12092 Label* L = $labl$$label; 12093 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12094 %} 12095 ins_pipe(pipe_jcc); 12096 %} 12097 12098 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12099 match(If cop cmp); 12100 effect(USE labl); 12101 12102 ins_cost(200); 12103 format %{ $$template 12104 if ($cop$$cmpcode == Assembler::notEqual) { 12105 $$emit$$"JP,u $labl\n\t" 12106 $$emit$$"J$cop,u $labl" 12107 } else { 12108 $$emit$$"JP,u done\n\t" 12109 $$emit$$"J$cop,u $labl\n\t" 12110 $$emit$$"done:" 12111 } 12112 %} 12113 ins_encode %{ 12114 Label* l = $labl$$label; 12115 if ($cop$$cmpcode == Assembler::notEqual) { 12116 __ jcc(Assembler::parity, *l, false); 12117 __ jcc(Assembler::notEqual, *l, false); 12118 } else if ($cop$$cmpcode == Assembler::equal) { 12119 Label done; 12120 __ jccb(Assembler::parity, done); 12121 __ jcc(Assembler::equal, *l, false); 12122 __ bind(done); 12123 } else { 12124 ShouldNotReachHere(); 12125 } 12126 %} 12127 ins_pipe(pipe_jcc); 12128 %} 12129 12130 // ============================================================================ 12131 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12132 // array for an instance of the superklass. Set a hidden internal cache on a 12133 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12134 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12135 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12136 match(Set result (PartialSubtypeCheck sub super)); 12137 effect( KILL rcx, KILL cr ); 12138 12139 ins_cost(1100); // slightly larger than the next version 12140 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12141 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12142 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12143 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12144 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12145 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12146 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12147 "miss:\t" %} 12148 12149 opcode(0x1); // Force a XOR of EDI 12150 ins_encode( enc_PartialSubtypeCheck() ); 12151 ins_pipe( pipe_slow ); 12152 %} 12153 12154 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12155 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12156 effect( KILL rcx, KILL result ); 12157 12158 ins_cost(1000); 12159 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12160 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12161 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12162 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12163 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12164 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12165 "miss:\t" %} 12166 12167 opcode(0x0); // No need to XOR EDI 12168 ins_encode( enc_PartialSubtypeCheck() ); 12169 ins_pipe( pipe_slow ); 12170 %} 12171 12172 // ============================================================================ 12173 // Branch Instructions -- short offset versions 12174 // 12175 // These instructions are used to replace jumps of a long offset (the default 12176 // match) with jumps of a shorter offset. These instructions are all tagged 12177 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12178 // match rules in general matching. Instead, the ADLC generates a conversion 12179 // method in the MachNode which can be used to do in-place replacement of the 12180 // long variant with the shorter variant. The compiler will determine if a 12181 // branch can be taken by the is_short_branch_offset() predicate in the machine 12182 // specific code section of the file. 12183 12184 // Jump Direct - Label defines a relative address from JMP+1 12185 instruct jmpDir_short(label labl) %{ 12186 match(Goto); 12187 effect(USE labl); 12188 12189 ins_cost(300); 12190 format %{ "JMP,s $labl" %} 12191 size(2); 12192 ins_encode %{ 12193 Label* L = $labl$$label; 12194 __ jmpb(*L); 12195 %} 12196 ins_pipe( pipe_jmp ); 12197 ins_short_branch(1); 12198 %} 12199 12200 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12201 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12202 match(If cop cr); 12203 effect(USE labl); 12204 12205 ins_cost(300); 12206 format %{ "J$cop,s $labl" %} 12207 size(2); 12208 ins_encode %{ 12209 Label* L = $labl$$label; 12210 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12211 %} 12212 ins_pipe( pipe_jcc ); 12213 ins_short_branch(1); 12214 %} 12215 12216 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12217 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12218 match(CountedLoopEnd cop cr); 12219 effect(USE labl); 12220 12221 ins_cost(300); 12222 format %{ "J$cop,s $labl\t# Loop end" %} 12223 size(2); 12224 ins_encode %{ 12225 Label* L = $labl$$label; 12226 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12227 %} 12228 ins_pipe( pipe_jcc ); 12229 ins_short_branch(1); 12230 %} 12231 12232 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12233 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12234 match(CountedLoopEnd cop cmp); 12235 effect(USE labl); 12236 12237 ins_cost(300); 12238 format %{ "J$cop,us $labl\t# Loop end" %} 12239 size(2); 12240 ins_encode %{ 12241 Label* L = $labl$$label; 12242 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12243 %} 12244 ins_pipe( pipe_jcc ); 12245 ins_short_branch(1); 12246 %} 12247 12248 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12249 match(CountedLoopEnd cop cmp); 12250 effect(USE labl); 12251 12252 ins_cost(300); 12253 format %{ "J$cop,us $labl\t# Loop end" %} 12254 size(2); 12255 ins_encode %{ 12256 Label* L = $labl$$label; 12257 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12258 %} 12259 ins_pipe( pipe_jcc ); 12260 ins_short_branch(1); 12261 %} 12262 12263 // Jump Direct Conditional - using unsigned comparison 12264 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12265 match(If cop cmp); 12266 effect(USE labl); 12267 12268 ins_cost(300); 12269 format %{ "J$cop,us $labl" %} 12270 size(2); 12271 ins_encode %{ 12272 Label* L = $labl$$label; 12273 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12274 %} 12275 ins_pipe( pipe_jcc ); 12276 ins_short_branch(1); 12277 %} 12278 12279 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12280 match(If cop cmp); 12281 effect(USE labl); 12282 12283 ins_cost(300); 12284 format %{ "J$cop,us $labl" %} 12285 size(2); 12286 ins_encode %{ 12287 Label* L = $labl$$label; 12288 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12289 %} 12290 ins_pipe( pipe_jcc ); 12291 ins_short_branch(1); 12292 %} 12293 12294 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12295 match(If cop cmp); 12296 effect(USE labl); 12297 12298 ins_cost(300); 12299 format %{ $$template 12300 if ($cop$$cmpcode == Assembler::notEqual) { 12301 $$emit$$"JP,u,s $labl\n\t" 12302 $$emit$$"J$cop,u,s $labl" 12303 } else { 12304 $$emit$$"JP,u,s done\n\t" 12305 $$emit$$"J$cop,u,s $labl\n\t" 12306 $$emit$$"done:" 12307 } 12308 %} 12309 size(4); 12310 ins_encode %{ 12311 Label* l = $labl$$label; 12312 if ($cop$$cmpcode == Assembler::notEqual) { 12313 __ jccb(Assembler::parity, *l); 12314 __ jccb(Assembler::notEqual, *l); 12315 } else if ($cop$$cmpcode == Assembler::equal) { 12316 Label done; 12317 __ jccb(Assembler::parity, done); 12318 __ jccb(Assembler::equal, *l); 12319 __ bind(done); 12320 } else { 12321 ShouldNotReachHere(); 12322 } 12323 %} 12324 ins_pipe(pipe_jcc); 12325 ins_short_branch(1); 12326 %} 12327 12328 // ============================================================================ 12329 // Long Compare 12330 // 12331 // Currently we hold longs in 2 registers. Comparing such values efficiently 12332 // is tricky. The flavor of compare used depends on whether we are testing 12333 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12334 // The GE test is the negated LT test. The LE test can be had by commuting 12335 // the operands (yielding a GE test) and then negating; negate again for the 12336 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12337 // NE test is negated from that. 12338 12339 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12340 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12341 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12342 // are collapsed internally in the ADLC's dfa-gen code. The match for 12343 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12344 // foo match ends up with the wrong leaf. One fix is to not match both 12345 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12346 // both forms beat the trinary form of long-compare and both are very useful 12347 // on Intel which has so few registers. 12348 12349 // Manifest a CmpL result in an integer register. Very painful. 12350 // This is the test to avoid. 12351 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12352 match(Set dst (CmpL3 src1 src2)); 12353 effect( KILL flags ); 12354 ins_cost(1000); 12355 format %{ "XOR $dst,$dst\n\t" 12356 "CMP $src1.hi,$src2.hi\n\t" 12357 "JLT,s m_one\n\t" 12358 "JGT,s p_one\n\t" 12359 "CMP $src1.lo,$src2.lo\n\t" 12360 "JB,s m_one\n\t" 12361 "JEQ,s done\n" 12362 "p_one:\tINC $dst\n\t" 12363 "JMP,s done\n" 12364 "m_one:\tDEC $dst\n" 12365 "done:" %} 12366 ins_encode %{ 12367 Label p_one, m_one, done; 12368 __ xorptr($dst$$Register, $dst$$Register); 12369 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12370 __ jccb(Assembler::less, m_one); 12371 __ jccb(Assembler::greater, p_one); 12372 __ cmpl($src1$$Register, $src2$$Register); 12373 __ jccb(Assembler::below, m_one); 12374 __ jccb(Assembler::equal, done); 12375 __ bind(p_one); 12376 __ incrementl($dst$$Register); 12377 __ jmpb(done); 12378 __ bind(m_one); 12379 __ decrementl($dst$$Register); 12380 __ bind(done); 12381 %} 12382 ins_pipe( pipe_slow ); 12383 %} 12384 12385 //====== 12386 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12387 // compares. Can be used for LE or GT compares by reversing arguments. 12388 // NOT GOOD FOR EQ/NE tests. 12389 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12390 match( Set flags (CmpL src zero )); 12391 ins_cost(100); 12392 format %{ "TEST $src.hi,$src.hi" %} 12393 opcode(0x85); 12394 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12395 ins_pipe( ialu_cr_reg_reg ); 12396 %} 12397 12398 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12399 // compares. Can be used for LE or GT compares by reversing arguments. 12400 // NOT GOOD FOR EQ/NE tests. 12401 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12402 match( Set flags (CmpL src1 src2 )); 12403 effect( TEMP tmp ); 12404 ins_cost(300); 12405 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12406 "MOV $tmp,$src1.hi\n\t" 12407 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12408 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12409 ins_pipe( ialu_cr_reg_reg ); 12410 %} 12411 12412 // Long compares reg < zero/req OR reg >= zero/req. 12413 // Just a wrapper for a normal branch, plus the predicate test. 12414 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12415 match(If cmp flags); 12416 effect(USE labl); 12417 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12418 expand %{ 12419 jmpCon(cmp,flags,labl); // JLT or JGE... 12420 %} 12421 %} 12422 12423 //====== 12424 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12425 // compares. Can be used for LE or GT compares by reversing arguments. 12426 // NOT GOOD FOR EQ/NE tests. 12427 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12428 match(Set flags (CmpUL src zero)); 12429 ins_cost(100); 12430 format %{ "TEST $src.hi,$src.hi" %} 12431 opcode(0x85); 12432 ins_encode(OpcP, RegReg_Hi2(src, src)); 12433 ins_pipe(ialu_cr_reg_reg); 12434 %} 12435 12436 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12437 // compares. Can be used for LE or GT compares by reversing arguments. 12438 // NOT GOOD FOR EQ/NE tests. 12439 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12440 match(Set flags (CmpUL src1 src2)); 12441 effect(TEMP tmp); 12442 ins_cost(300); 12443 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12444 "MOV $tmp,$src1.hi\n\t" 12445 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12446 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12447 ins_pipe(ialu_cr_reg_reg); 12448 %} 12449 12450 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12451 // Just a wrapper for a normal branch, plus the predicate test. 12452 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12453 match(If cmp flags); 12454 effect(USE labl); 12455 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12456 expand %{ 12457 jmpCon(cmp, flags, labl); // JLT or JGE... 12458 %} 12459 %} 12460 12461 // Compare 2 longs and CMOVE longs. 12462 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12463 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12464 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12465 ins_cost(400); 12466 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12467 "CMOV$cmp $dst.hi,$src.hi" %} 12468 opcode(0x0F,0x40); 12469 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12470 ins_pipe( pipe_cmov_reg_long ); 12471 %} 12472 12473 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12474 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12475 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12476 ins_cost(500); 12477 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12478 "CMOV$cmp $dst.hi,$src.hi" %} 12479 opcode(0x0F,0x40); 12480 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12481 ins_pipe( pipe_cmov_reg_long ); 12482 %} 12483 12484 // Compare 2 longs and CMOVE ints. 12485 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12486 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12487 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12488 ins_cost(200); 12489 format %{ "CMOV$cmp $dst,$src" %} 12490 opcode(0x0F,0x40); 12491 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12492 ins_pipe( pipe_cmov_reg ); 12493 %} 12494 12495 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12496 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12497 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12498 ins_cost(250); 12499 format %{ "CMOV$cmp $dst,$src" %} 12500 opcode(0x0F,0x40); 12501 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12502 ins_pipe( pipe_cmov_mem ); 12503 %} 12504 12505 // Compare 2 longs and CMOVE ints. 12506 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12507 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12508 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12509 ins_cost(200); 12510 format %{ "CMOV$cmp $dst,$src" %} 12511 opcode(0x0F,0x40); 12512 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12513 ins_pipe( pipe_cmov_reg ); 12514 %} 12515 12516 // Compare 2 longs and CMOVE doubles 12517 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12518 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12519 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12520 ins_cost(200); 12521 expand %{ 12522 fcmovDPR_regS(cmp,flags,dst,src); 12523 %} 12524 %} 12525 12526 // Compare 2 longs and CMOVE doubles 12527 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12528 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12529 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12530 ins_cost(200); 12531 expand %{ 12532 fcmovD_regS(cmp,flags,dst,src); 12533 %} 12534 %} 12535 12536 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12537 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12538 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12539 ins_cost(200); 12540 expand %{ 12541 fcmovFPR_regS(cmp,flags,dst,src); 12542 %} 12543 %} 12544 12545 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12546 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12547 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12548 ins_cost(200); 12549 expand %{ 12550 fcmovF_regS(cmp,flags,dst,src); 12551 %} 12552 %} 12553 12554 //====== 12555 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12556 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12557 match( Set flags (CmpL src zero )); 12558 effect(TEMP tmp); 12559 ins_cost(200); 12560 format %{ "MOV $tmp,$src.lo\n\t" 12561 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12562 ins_encode( long_cmp_flags0( src, tmp ) ); 12563 ins_pipe( ialu_reg_reg_long ); 12564 %} 12565 12566 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12567 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12568 match( Set flags (CmpL src1 src2 )); 12569 ins_cost(200+300); 12570 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12571 "JNE,s skip\n\t" 12572 "CMP $src1.hi,$src2.hi\n\t" 12573 "skip:\t" %} 12574 ins_encode( long_cmp_flags1( src1, src2 ) ); 12575 ins_pipe( ialu_cr_reg_reg ); 12576 %} 12577 12578 // Long compare reg == zero/reg OR reg != zero/reg 12579 // Just a wrapper for a normal branch, plus the predicate test. 12580 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12581 match(If cmp flags); 12582 effect(USE labl); 12583 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12584 expand %{ 12585 jmpCon(cmp,flags,labl); // JEQ or JNE... 12586 %} 12587 %} 12588 12589 //====== 12590 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12591 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 12592 match(Set flags (CmpUL src zero)); 12593 effect(TEMP tmp); 12594 ins_cost(200); 12595 format %{ "MOV $tmp,$src.lo\n\t" 12596 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 12597 ins_encode(long_cmp_flags0(src, tmp)); 12598 ins_pipe(ialu_reg_reg_long); 12599 %} 12600 12601 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12602 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 12603 match(Set flags (CmpUL src1 src2)); 12604 ins_cost(200+300); 12605 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12606 "JNE,s skip\n\t" 12607 "CMP $src1.hi,$src2.hi\n\t" 12608 "skip:\t" %} 12609 ins_encode(long_cmp_flags1(src1, src2)); 12610 ins_pipe(ialu_cr_reg_reg); 12611 %} 12612 12613 // Unsigned long compare reg == zero/reg OR reg != zero/reg 12614 // Just a wrapper for a normal branch, plus the predicate test. 12615 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 12616 match(If cmp flags); 12617 effect(USE labl); 12618 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 12619 expand %{ 12620 jmpCon(cmp, flags, labl); // JEQ or JNE... 12621 %} 12622 %} 12623 12624 // Compare 2 longs and CMOVE longs. 12625 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12626 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12627 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12628 ins_cost(400); 12629 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12630 "CMOV$cmp $dst.hi,$src.hi" %} 12631 opcode(0x0F,0x40); 12632 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12633 ins_pipe( pipe_cmov_reg_long ); 12634 %} 12635 12636 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12637 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12638 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12639 ins_cost(500); 12640 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12641 "CMOV$cmp $dst.hi,$src.hi" %} 12642 opcode(0x0F,0x40); 12643 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12644 ins_pipe( pipe_cmov_reg_long ); 12645 %} 12646 12647 // Compare 2 longs and CMOVE ints. 12648 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12649 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12650 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12651 ins_cost(200); 12652 format %{ "CMOV$cmp $dst,$src" %} 12653 opcode(0x0F,0x40); 12654 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12655 ins_pipe( pipe_cmov_reg ); 12656 %} 12657 12658 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12659 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12660 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12661 ins_cost(250); 12662 format %{ "CMOV$cmp $dst,$src" %} 12663 opcode(0x0F,0x40); 12664 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12665 ins_pipe( pipe_cmov_mem ); 12666 %} 12667 12668 // Compare 2 longs and CMOVE ints. 12669 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12670 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12671 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12672 ins_cost(200); 12673 format %{ "CMOV$cmp $dst,$src" %} 12674 opcode(0x0F,0x40); 12675 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12676 ins_pipe( pipe_cmov_reg ); 12677 %} 12678 12679 // Compare 2 longs and CMOVE doubles 12680 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12681 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12682 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12683 ins_cost(200); 12684 expand %{ 12685 fcmovDPR_regS(cmp,flags,dst,src); 12686 %} 12687 %} 12688 12689 // Compare 2 longs and CMOVE doubles 12690 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12691 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12692 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12693 ins_cost(200); 12694 expand %{ 12695 fcmovD_regS(cmp,flags,dst,src); 12696 %} 12697 %} 12698 12699 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12700 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12701 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12702 ins_cost(200); 12703 expand %{ 12704 fcmovFPR_regS(cmp,flags,dst,src); 12705 %} 12706 %} 12707 12708 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12709 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12710 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12711 ins_cost(200); 12712 expand %{ 12713 fcmovF_regS(cmp,flags,dst,src); 12714 %} 12715 %} 12716 12717 //====== 12718 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12719 // Same as cmpL_reg_flags_LEGT except must negate src 12720 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12721 match( Set flags (CmpL src zero )); 12722 effect( TEMP tmp ); 12723 ins_cost(300); 12724 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12725 "CMP $tmp,$src.lo\n\t" 12726 "SBB $tmp,$src.hi\n\t" %} 12727 ins_encode( long_cmp_flags3(src, tmp) ); 12728 ins_pipe( ialu_reg_reg_long ); 12729 %} 12730 12731 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12732 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12733 // requires a commuted test to get the same result. 12734 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12735 match( Set flags (CmpL src1 src2 )); 12736 effect( TEMP tmp ); 12737 ins_cost(300); 12738 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12739 "MOV $tmp,$src2.hi\n\t" 12740 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12741 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12742 ins_pipe( ialu_cr_reg_reg ); 12743 %} 12744 12745 // Long compares reg < zero/req OR reg >= zero/req. 12746 // Just a wrapper for a normal branch, plus the predicate test 12747 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12748 match(If cmp flags); 12749 effect(USE labl); 12750 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12751 ins_cost(300); 12752 expand %{ 12753 jmpCon(cmp,flags,labl); // JGT or JLE... 12754 %} 12755 %} 12756 12757 //====== 12758 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 12759 // Same as cmpUL_reg_flags_LEGT except must negate src 12760 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 12761 match(Set flags (CmpUL src zero)); 12762 effect(TEMP tmp); 12763 ins_cost(300); 12764 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 12765 "CMP $tmp,$src.lo\n\t" 12766 "SBB $tmp,$src.hi\n\t" %} 12767 ins_encode(long_cmp_flags3(src, tmp)); 12768 ins_pipe(ialu_reg_reg_long); 12769 %} 12770 12771 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 12772 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 12773 // requires a commuted test to get the same result. 12774 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12775 match(Set flags (CmpUL src1 src2)); 12776 effect(TEMP tmp); 12777 ins_cost(300); 12778 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 12779 "MOV $tmp,$src2.hi\n\t" 12780 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 12781 ins_encode(long_cmp_flags2( src2, src1, tmp)); 12782 ins_pipe(ialu_cr_reg_reg); 12783 %} 12784 12785 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12786 // Just a wrapper for a normal branch, plus the predicate test 12787 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 12788 match(If cmp flags); 12789 effect(USE labl); 12790 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 12791 ins_cost(300); 12792 expand %{ 12793 jmpCon(cmp, flags, labl); // JGT or JLE... 12794 %} 12795 %} 12796 12797 // Compare 2 longs and CMOVE longs. 12798 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12799 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12800 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12801 ins_cost(400); 12802 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12803 "CMOV$cmp $dst.hi,$src.hi" %} 12804 opcode(0x0F,0x40); 12805 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12806 ins_pipe( pipe_cmov_reg_long ); 12807 %} 12808 12809 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12810 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12811 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12812 ins_cost(500); 12813 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12814 "CMOV$cmp $dst.hi,$src.hi+4" %} 12815 opcode(0x0F,0x40); 12816 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12817 ins_pipe( pipe_cmov_reg_long ); 12818 %} 12819 12820 // Compare 2 longs and CMOVE ints. 12821 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12822 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12823 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12824 ins_cost(200); 12825 format %{ "CMOV$cmp $dst,$src" %} 12826 opcode(0x0F,0x40); 12827 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12828 ins_pipe( pipe_cmov_reg ); 12829 %} 12830 12831 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12832 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12833 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12834 ins_cost(250); 12835 format %{ "CMOV$cmp $dst,$src" %} 12836 opcode(0x0F,0x40); 12837 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12838 ins_pipe( pipe_cmov_mem ); 12839 %} 12840 12841 // Compare 2 longs and CMOVE ptrs. 12842 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12843 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12844 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12845 ins_cost(200); 12846 format %{ "CMOV$cmp $dst,$src" %} 12847 opcode(0x0F,0x40); 12848 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12849 ins_pipe( pipe_cmov_reg ); 12850 %} 12851 12852 // Compare 2 longs and CMOVE doubles 12853 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12854 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12855 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12856 ins_cost(200); 12857 expand %{ 12858 fcmovDPR_regS(cmp,flags,dst,src); 12859 %} 12860 %} 12861 12862 // Compare 2 longs and CMOVE doubles 12863 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12864 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12865 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12866 ins_cost(200); 12867 expand %{ 12868 fcmovD_regS(cmp,flags,dst,src); 12869 %} 12870 %} 12871 12872 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12873 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12874 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12875 ins_cost(200); 12876 expand %{ 12877 fcmovFPR_regS(cmp,flags,dst,src); 12878 %} 12879 %} 12880 12881 12882 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12883 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12884 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12885 ins_cost(200); 12886 expand %{ 12887 fcmovF_regS(cmp,flags,dst,src); 12888 %} 12889 %} 12890 12891 12892 // ============================================================================ 12893 // Procedure Call/Return Instructions 12894 // Call Java Static Instruction 12895 // Note: If this code changes, the corresponding ret_addr_offset() and 12896 // compute_padding() functions will have to be adjusted. 12897 instruct CallStaticJavaDirect(method meth) %{ 12898 match(CallStaticJava); 12899 effect(USE meth); 12900 12901 ins_cost(300); 12902 format %{ "CALL,static " %} 12903 opcode(0xE8); /* E8 cd */ 12904 ins_encode( pre_call_resets, 12905 Java_Static_Call( meth ), 12906 call_epilog, 12907 post_call_FPU ); 12908 ins_pipe( pipe_slow ); 12909 ins_alignment(4); 12910 %} 12911 12912 // Call Java Dynamic Instruction 12913 // Note: If this code changes, the corresponding ret_addr_offset() and 12914 // compute_padding() functions will have to be adjusted. 12915 instruct CallDynamicJavaDirect(method meth) %{ 12916 match(CallDynamicJava); 12917 effect(USE meth); 12918 12919 ins_cost(300); 12920 format %{ "MOV EAX,(oop)-1\n\t" 12921 "CALL,dynamic" %} 12922 opcode(0xE8); /* E8 cd */ 12923 ins_encode( pre_call_resets, 12924 Java_Dynamic_Call( meth ), 12925 call_epilog, 12926 post_call_FPU ); 12927 ins_pipe( pipe_slow ); 12928 ins_alignment(4); 12929 %} 12930 12931 // Call Runtime Instruction 12932 instruct CallRuntimeDirect(method meth) %{ 12933 match(CallRuntime ); 12934 effect(USE meth); 12935 12936 ins_cost(300); 12937 format %{ "CALL,runtime " %} 12938 opcode(0xE8); /* E8 cd */ 12939 // Use FFREEs to clear entries in float stack 12940 ins_encode( pre_call_resets, 12941 FFree_Float_Stack_All, 12942 Java_To_Runtime( meth ), 12943 post_call_FPU ); 12944 ins_pipe( pipe_slow ); 12945 %} 12946 12947 // Call runtime without safepoint 12948 instruct CallLeafDirect(method meth) %{ 12949 match(CallLeaf); 12950 effect(USE meth); 12951 12952 ins_cost(300); 12953 format %{ "CALL_LEAF,runtime " %} 12954 opcode(0xE8); /* E8 cd */ 12955 ins_encode( pre_call_resets, 12956 FFree_Float_Stack_All, 12957 Java_To_Runtime( meth ), 12958 Verify_FPU_For_Leaf, post_call_FPU ); 12959 ins_pipe( pipe_slow ); 12960 %} 12961 12962 instruct CallLeafNoFPDirect(method meth) %{ 12963 match(CallLeafNoFP); 12964 effect(USE meth); 12965 12966 ins_cost(300); 12967 format %{ "CALL_LEAF_NOFP,runtime " %} 12968 opcode(0xE8); /* E8 cd */ 12969 ins_encode(Java_To_Runtime(meth)); 12970 ins_pipe( pipe_slow ); 12971 %} 12972 12973 12974 // Return Instruction 12975 // Remove the return address & jump to it. 12976 instruct Ret() %{ 12977 match(Return); 12978 format %{ "RET" %} 12979 opcode(0xC3); 12980 ins_encode(OpcP); 12981 ins_pipe( pipe_jmp ); 12982 %} 12983 12984 // Tail Call; Jump from runtime stub to Java code. 12985 // Also known as an 'interprocedural jump'. 12986 // Target of jump will eventually return to caller. 12987 // TailJump below removes the return address. 12988 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12989 match(TailCall jump_target method_oop ); 12990 ins_cost(300); 12991 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12992 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12993 ins_encode( OpcP, RegOpc(jump_target) ); 12994 ins_pipe( pipe_jmp ); 12995 %} 12996 12997 12998 // Tail Jump; remove the return address; jump to target. 12999 // TailCall above leaves the return address around. 13000 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13001 match( TailJump jump_target ex_oop ); 13002 ins_cost(300); 13003 format %{ "POP EDX\t# pop return address into dummy\n\t" 13004 "JMP $jump_target " %} 13005 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13006 ins_encode( enc_pop_rdx, 13007 OpcP, RegOpc(jump_target) ); 13008 ins_pipe( pipe_jmp ); 13009 %} 13010 13011 // Create exception oop: created by stack-crawling runtime code. 13012 // Created exception is now available to this handler, and is setup 13013 // just prior to jumping to this handler. No code emitted. 13014 instruct CreateException( eAXRegP ex_oop ) 13015 %{ 13016 match(Set ex_oop (CreateEx)); 13017 13018 size(0); 13019 // use the following format syntax 13020 format %{ "# exception oop is in EAX; no code emitted" %} 13021 ins_encode(); 13022 ins_pipe( empty ); 13023 %} 13024 13025 13026 // Rethrow exception: 13027 // The exception oop will come in the first argument position. 13028 // Then JUMP (not call) to the rethrow stub code. 13029 instruct RethrowException() 13030 %{ 13031 match(Rethrow); 13032 13033 // use the following format syntax 13034 format %{ "JMP rethrow_stub" %} 13035 ins_encode(enc_rethrow); 13036 ins_pipe( pipe_jmp ); 13037 %} 13038 13039 // inlined locking and unlocking 13040 13041 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13042 predicate(Compile::current()->use_rtm()); 13043 match(Set cr (FastLock object box)); 13044 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13045 ins_cost(300); 13046 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13047 ins_encode %{ 13048 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13049 $scr$$Register, $cx1$$Register, $cx2$$Register, 13050 _counters, _rtm_counters, _stack_rtm_counters, 13051 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13052 true, ra_->C->profile_rtm()); 13053 %} 13054 ins_pipe(pipe_slow); 13055 %} 13056 13057 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13058 predicate(!Compile::current()->use_rtm()); 13059 match(Set cr (FastLock object box)); 13060 effect(TEMP tmp, TEMP scr, USE_KILL box); 13061 ins_cost(300); 13062 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13063 ins_encode %{ 13064 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13065 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13066 %} 13067 ins_pipe(pipe_slow); 13068 %} 13069 13070 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13071 match(Set cr (FastUnlock object box)); 13072 effect(TEMP tmp, USE_KILL box); 13073 ins_cost(300); 13074 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13075 ins_encode %{ 13076 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13077 %} 13078 ins_pipe(pipe_slow); 13079 %} 13080 13081 13082 13083 // ============================================================================ 13084 // Safepoint Instruction 13085 instruct safePoint_poll(eFlagsReg cr) %{ 13086 match(SafePoint); 13087 effect(KILL cr); 13088 13089 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13090 // On SPARC that might be acceptable as we can generate the address with 13091 // just a sethi, saving an or. By polling at offset 0 we can end up 13092 // putting additional pressure on the index-0 in the D$. Because of 13093 // alignment (just like the situation at hand) the lower indices tend 13094 // to see more traffic. It'd be better to change the polling address 13095 // to offset 0 of the last $line in the polling page. 13096 13097 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13098 ins_cost(125); 13099 size(6) ; 13100 ins_encode( Safepoint_Poll() ); 13101 ins_pipe( ialu_reg_mem ); 13102 %} 13103 13104 13105 // ============================================================================ 13106 // This name is KNOWN by the ADLC and cannot be changed. 13107 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13108 // for this guy. 13109 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13110 match(Set dst (ThreadLocal)); 13111 effect(DEF dst, KILL cr); 13112 13113 format %{ "MOV $dst, Thread::current()" %} 13114 ins_encode %{ 13115 Register dstReg = as_Register($dst$$reg); 13116 __ get_thread(dstReg); 13117 %} 13118 ins_pipe( ialu_reg_fat ); 13119 %} 13120 13121 13122 13123 //----------PEEPHOLE RULES----------------------------------------------------- 13124 // These must follow all instruction definitions as they use the names 13125 // defined in the instructions definitions. 13126 // 13127 // peepmatch ( root_instr_name [preceding_instruction]* ); 13128 // 13129 // peepconstraint %{ 13130 // (instruction_number.operand_name relational_op instruction_number.operand_name 13131 // [, ...] ); 13132 // // instruction numbers are zero-based using left to right order in peepmatch 13133 // 13134 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13135 // // provide an instruction_number.operand_name for each operand that appears 13136 // // in the replacement instruction's match rule 13137 // 13138 // ---------VM FLAGS--------------------------------------------------------- 13139 // 13140 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13141 // 13142 // Each peephole rule is given an identifying number starting with zero and 13143 // increasing by one in the order seen by the parser. An individual peephole 13144 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13145 // on the command-line. 13146 // 13147 // ---------CURRENT LIMITATIONS---------------------------------------------- 13148 // 13149 // Only match adjacent instructions in same basic block 13150 // Only equality constraints 13151 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13152 // Only one replacement instruction 13153 // 13154 // ---------EXAMPLE---------------------------------------------------------- 13155 // 13156 // // pertinent parts of existing instructions in architecture description 13157 // instruct movI(rRegI dst, rRegI src) %{ 13158 // match(Set dst (CopyI src)); 13159 // %} 13160 // 13161 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13162 // match(Set dst (AddI dst src)); 13163 // effect(KILL cr); 13164 // %} 13165 // 13166 // // Change (inc mov) to lea 13167 // peephole %{ 13168 // // increment preceeded by register-register move 13169 // peepmatch ( incI_eReg movI ); 13170 // // require that the destination register of the increment 13171 // // match the destination register of the move 13172 // peepconstraint ( 0.dst == 1.dst ); 13173 // // construct a replacement instruction that sets 13174 // // the destination to ( move's source register + one ) 13175 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13176 // %} 13177 // 13178 // Implementation no longer uses movX instructions since 13179 // machine-independent system no longer uses CopyX nodes. 13180 // 13181 // peephole %{ 13182 // peepmatch ( incI_eReg movI ); 13183 // peepconstraint ( 0.dst == 1.dst ); 13184 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13185 // %} 13186 // 13187 // peephole %{ 13188 // peepmatch ( decI_eReg movI ); 13189 // peepconstraint ( 0.dst == 1.dst ); 13190 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13191 // %} 13192 // 13193 // peephole %{ 13194 // peepmatch ( addI_eReg_imm movI ); 13195 // peepconstraint ( 0.dst == 1.dst ); 13196 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13197 // %} 13198 // 13199 // peephole %{ 13200 // peepmatch ( addP_eReg_imm movP ); 13201 // peepconstraint ( 0.dst == 1.dst ); 13202 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13203 // %} 13204 13205 // // Change load of spilled value to only a spill 13206 // instruct storeI(memory mem, rRegI src) %{ 13207 // match(Set mem (StoreI mem src)); 13208 // %} 13209 // 13210 // instruct loadI(rRegI dst, memory mem) %{ 13211 // match(Set dst (LoadI mem)); 13212 // %} 13213 // 13214 peephole %{ 13215 peepmatch ( loadI storeI ); 13216 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13217 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13218 %} 13219 13220 //----------SMARTSPILL RULES--------------------------------------------------- 13221 // These must follow all instruction definitions as they use the names 13222 // defined in the instructions definitions.