1 //
2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
66
67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
76
77 // Float registers. We treat TOS/FPR0 special. It is invisible to the
78 // allocator, and only shows up in the encodings.
79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
81 // Ok so here's the trick FPR1 is really st(0) except in the midst
82 // of emission of assembly for a machnode. During the emission the fpu stack
83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
84 // the stack will not have this element so FPR1 == st(0) from the
85 // oopMap viewpoint. This same weirdness with numbering causes
86 // instruction encoding to have to play games with the register
87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
88 // where it does flt->flt moves to see an example
89 //
90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
104 //
105 // Empty fill registers, which are never used, but supply alignment to xmm regs
106 //
107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
115
116 // Specify priority of register selection within phases of register
117 // allocation. Highest priority is first. A useful heuristic is to
118 // give registers a low priority when they are required by machine
119 // instructions, like EAX and EDX. Registers which are used as
120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
121 // For the Intel integer registers, the equivalent Long pairs are
122 // EDX:EAX, EBX:ECX, and EDI:EBP.
123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
126 FPR6L, FPR6H, FPR7L, FPR7H,
127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
128
129
130 //----------Architecture Description Register Classes--------------------------
131 // Several register classes are automatically defined based upon information in
132 // this architecture description.
133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
135 //
136 // Class for no registers (empty set).
137 reg_class no_reg();
138
139 // Class for all registers
140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
141 // Class for all registers (excluding EBP)
142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
143 // Dynamic register class that selects at runtime between register classes
144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
147
148 // Class for general registers
149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
150 // Class for general registers (excluding EBP).
151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
152 // Used also if the PreserveFramePointer flag is true.
153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
156
157 // Class of "X" registers
158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
159
160 // Class of registers that can appear in an address with no offset.
161 // EBP and ESP require an extra instruction byte for zero offset.
162 // Used in fast-unlock
163 reg_class p_reg(EDX, EDI, ESI, EBX);
164
165 // Class for general registers excluding ECX
166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
167 // Class for general registers excluding ECX (and EBP)
168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
171
172 // Class for general registers excluding EAX
173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
174
175 // Class for general registers excluding EAX and EBX.
176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
177 // Class for general registers excluding EAX and EBX (and EBP)
178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
181
182 // Class of EAX (for multiply and divide operations)
183 reg_class eax_reg(EAX);
184
185 // Class of EBX (for atomic add)
186 reg_class ebx_reg(EBX);
187
188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
189 reg_class ecx_reg(ECX);
190
191 // Class of EDX (for multiply and divide operations)
192 reg_class edx_reg(EDX);
193
194 // Class of EDI (for synchronization)
195 reg_class edi_reg(EDI);
196
197 // Class of ESI (for synchronization)
198 reg_class esi_reg(ESI);
199
200 // Singleton class for stack pointer
201 reg_class sp_reg(ESP);
202
203 // Singleton class for instruction pointer
204 // reg_class ip_reg(EIP);
205
206 // Class of integer register pairs
207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
208 // Class of integer register pairs (excluding EBP and EDI);
209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
212
213 // Class of integer register pairs that aligns with calling convention
214 reg_class eadx_reg( EAX,EDX );
215 reg_class ebcx_reg( ECX,EBX );
216
217 // Not AX or DX, used in divides
218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
219 // Not AX or DX (and neither EBP), used in divides
220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
223
224 // Floating point registers. Notice FPR0 is not a choice.
225 // FPR0 is not ever allocated; we use clever encodings to fake
226 // a 2-address instructions out of Intels FP stack.
227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
228
229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
230 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
231 FPR7L,FPR7H );
232
233 reg_class fp_flt_reg0( FPR1L );
234 reg_class fp_dbl_reg0( FPR1L,FPR1H );
235 reg_class fp_dbl_reg1( FPR2L,FPR2H );
236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
237 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
238
239 %}
240
241
242 //----------SOURCE BLOCK-------------------------------------------------------
243 // This is a block of C++ code which provides values, functions, and
244 // definitions necessary in the rest of the architecture description
245 source_hpp %{
246 // Must be visible to the DFA in dfa_x86_32.cpp
247 extern bool is_operand_hi32_zero(Node* n);
248 %}
249
250 source %{
251 #define RELOC_IMM32 Assembler::imm_operand
252 #define RELOC_DISP32 Assembler::disp32_operand
253
254 #define __ _masm.
255
256 // How to find the high register of a Long pair, given the low register
257 #define HIGH_FROM_LOW(x) ((x)+2)
258
259 // These masks are used to provide 128-bit aligned bitmasks to the XMM
260 // instructions, to allow sign-masking or sign-bit flipping. They allow
261 // fast versions of NegF/NegD and AbsF/AbsD.
262
263 void reg_mask_init() {
264 if (Matcher::has_predicated_vectors()) {
265 // Post-loop multi-versioning expects mask to be present in K1 register, till the time
266 // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
267 // curruption of value held in K1 register.
268 if (PostLoopMultiversioning) {
269 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
270 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
271 }
272 }
273 }
274
275 // Note: 'double' and 'long long' have 32-bits alignment on x86.
276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
277 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
278 // of 128-bits operands for SSE instructions.
279 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
280 // Store the value to a 128-bits operand.
281 operand[0] = lo;
282 operand[1] = hi;
283 return operand;
284 }
285
286 // Buffer for 128-bits masks used by SSE instructions.
287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
288
289 // Static initialization during VM startup.
290 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
292 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
294
295 // Offset hacking within calls.
296 static int pre_call_resets_size() {
297 int size = 0;
298 Compile* C = Compile::current();
299 if (C->in_24_bit_fp_mode()) {
300 size += 6; // fldcw
301 }
302 if (VM_Version::supports_vzeroupper()) {
303 size += 3; // vzeroupper
304 }
305 return size;
306 }
307
308 // !!!!! Special hack to get all type of calls to specify the byte offset
309 // from the start of the call to the point where the return address
310 // will point.
311 int MachCallStaticJavaNode::ret_addr_offset() {
312 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points
313 }
314
315 int MachCallDynamicJavaNode::ret_addr_offset() {
316 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points
317 }
318
319 static int sizeof_FFree_Float_Stack_All = -1;
320
321 int MachCallRuntimeNode::ret_addr_offset() {
322 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
323 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
324 }
325
326 int MachCallNativeNode::ret_addr_offset() {
327 ShouldNotCallThis();
328 return -1;
329 }
330
331 //
332 // Compute padding required for nodes which need alignment
333 //
334
335 // The address of the call instruction needs to be 4-byte aligned to
336 // ensure that it does not span a cache line so that it can be patched.
337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
338 current_offset += pre_call_resets_size(); // skip fldcw, if any
339 current_offset += 1; // skip call opcode byte
340 return align_up(current_offset, alignment_required()) - current_offset;
341 }
342
343 // The address of the call instruction needs to be 4-byte aligned to
344 // ensure that it does not span a cache line so that it can be patched.
345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
346 current_offset += pre_call_resets_size(); // skip fldcw, if any
347 current_offset += 5; // skip MOV instruction
348 current_offset += 1; // skip call opcode byte
349 return align_up(current_offset, alignment_required()) - current_offset;
350 }
351
352 // EMIT_RM()
353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
354 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
355 cbuf.insts()->emit_int8(c);
356 }
357
358 // EMIT_CC()
359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
360 unsigned char c = (unsigned char)( f1 | f2 );
361 cbuf.insts()->emit_int8(c);
362 }
363
364 // EMIT_OPCODE()
365 void emit_opcode(CodeBuffer &cbuf, int code) {
366 cbuf.insts()->emit_int8((unsigned char) code);
367 }
368
369 // EMIT_OPCODE() w/ relocation information
370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
371 cbuf.relocate(cbuf.insts_mark() + offset, reloc);
372 emit_opcode(cbuf, code);
373 }
374
375 // EMIT_D8()
376 void emit_d8(CodeBuffer &cbuf, int d8) {
377 cbuf.insts()->emit_int8((unsigned char) d8);
378 }
379
380 // EMIT_D16()
381 void emit_d16(CodeBuffer &cbuf, int d16) {
382 cbuf.insts()->emit_int16(d16);
383 }
384
385 // EMIT_D32()
386 void emit_d32(CodeBuffer &cbuf, int d32) {
387 cbuf.insts()->emit_int32(d32);
388 }
389
390 // emit 32 bit value and construct relocation entry from relocInfo::relocType
391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
392 int format) {
393 cbuf.relocate(cbuf.insts_mark(), reloc, format);
394 cbuf.insts()->emit_int32(d32);
395 }
396
397 // emit 32 bit value and construct relocation entry from RelocationHolder
398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
399 int format) {
400 #ifdef ASSERT
401 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
402 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
403 }
404 #endif
405 cbuf.relocate(cbuf.insts_mark(), rspec, format);
406 cbuf.insts()->emit_int32(d32);
407 }
408
409 // Access stack slot for load or store
410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
411 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src])
412 if( -128 <= disp && disp <= 127 ) {
413 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte
414 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
415 emit_d8 (cbuf, disp); // Displacement // R/M byte
416 } else {
417 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte
418 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
419 emit_d32(cbuf, disp); // Displacement // R/M byte
420 }
421 }
422
423 // rRegI ereg, memory mem) %{ // emit_reg_mem
424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
425 // There is no index & no scale, use form without SIB byte
426 if ((index == 0x4) &&
427 (scale == 0) && (base != ESP_enc)) {
428 // If no displacement, mode is 0x0; unless base is [EBP]
429 if ( (displace == 0) && (base != EBP_enc) ) {
430 emit_rm(cbuf, 0x0, reg_encoding, base);
431 }
432 else { // If 8-bit displacement, mode 0x1
433 if ((displace >= -128) && (displace <= 127)
434 && (disp_reloc == relocInfo::none) ) {
435 emit_rm(cbuf, 0x1, reg_encoding, base);
436 emit_d8(cbuf, displace);
437 }
438 else { // If 32-bit displacement
439 if (base == -1) { // Special flag for absolute address
440 emit_rm(cbuf, 0x0, reg_encoding, 0x5);
441 // (manual lies; no SIB needed here)
442 if ( disp_reloc != relocInfo::none ) {
443 emit_d32_reloc(cbuf, displace, disp_reloc, 1);
444 } else {
445 emit_d32 (cbuf, displace);
446 }
447 }
448 else { // Normal base + offset
449 emit_rm(cbuf, 0x2, reg_encoding, base);
450 if ( disp_reloc != relocInfo::none ) {
451 emit_d32_reloc(cbuf, displace, disp_reloc, 1);
452 } else {
453 emit_d32 (cbuf, displace);
454 }
455 }
456 }
457 }
458 }
459 else { // Else, encode with the SIB byte
460 // If no displacement, mode is 0x0; unless base is [EBP]
461 if (displace == 0 && (base != EBP_enc)) { // If no displacement
462 emit_rm(cbuf, 0x0, reg_encoding, 0x4);
463 emit_rm(cbuf, scale, index, base);
464 }
465 else { // If 8-bit displacement, mode 0x1
466 if ((displace >= -128) && (displace <= 127)
467 && (disp_reloc == relocInfo::none) ) {
468 emit_rm(cbuf, 0x1, reg_encoding, 0x4);
469 emit_rm(cbuf, scale, index, base);
470 emit_d8(cbuf, displace);
471 }
472 else { // If 32-bit displacement
473 if (base == 0x04 ) {
474 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
475 emit_rm(cbuf, scale, index, 0x04);
476 } else {
477 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
478 emit_rm(cbuf, scale, index, base);
479 }
480 if ( disp_reloc != relocInfo::none ) {
481 emit_d32_reloc(cbuf, displace, disp_reloc, 1);
482 } else {
483 emit_d32 (cbuf, displace);
484 }
485 }
486 }
487 }
488 }
489
490
491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
492 if( dst_encoding == src_encoding ) {
493 // reg-reg copy, use an empty encoding
494 } else {
495 emit_opcode( cbuf, 0x8B );
496 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
497 }
498 }
499
500 void emit_cmpfp_fixup(MacroAssembler& _masm) {
501 Label exit;
502 __ jccb(Assembler::noParity, exit);
503 __ pushf();
504 //
505 // comiss/ucomiss instructions set ZF,PF,CF flags and
506 // zero OF,AF,SF for NaN values.
507 // Fixup flags by zeroing ZF,PF so that compare of NaN
508 // values returns 'less than' result (CF is set).
509 // Leave the rest of flags unchanged.
510 //
511 // 7 6 5 4 3 2 1 0
512 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
513 // 0 0 1 0 1 0 1 1 (0x2B)
514 //
515 __ andl(Address(rsp, 0), 0xffffff2b);
516 __ popf();
517 __ bind(exit);
518 }
519
520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
521 Label done;
522 __ movl(dst, -1);
523 __ jcc(Assembler::parity, done);
524 __ jcc(Assembler::below, done);
525 __ setb(Assembler::notEqual, dst);
526 __ movzbl(dst, dst);
527 __ bind(done);
528 }
529
530
531 //=============================================================================
532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
533
534 int ConstantTable::calculate_table_base_offset() const {
535 return 0; // absolute addressing, no offset
536 }
537
538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
540 ShouldNotReachHere();
541 }
542
543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
544 // Empty encoding
545 }
546
547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
548 return 0;
549 }
550
551 #ifndef PRODUCT
552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
553 st->print("# MachConstantBaseNode (empty encoding)");
554 }
555 #endif
556
557
558 //=============================================================================
559 #ifndef PRODUCT
560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
561 Compile* C = ra_->C;
562
563 int framesize = C->output()->frame_size_in_bytes();
564 int bangsize = C->output()->bang_size_in_bytes();
565 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
566 // Remove wordSize for return addr which is already pushed.
567 framesize -= wordSize;
568
569 if (C->output()->need_stack_bang(bangsize)) {
570 framesize -= wordSize;
571 st->print("# stack bang (%d bytes)", bangsize);
572 st->print("\n\t");
573 st->print("PUSH EBP\t# Save EBP");
574 if (PreserveFramePointer) {
575 st->print("\n\t");
576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
577 }
578 if (framesize) {
579 st->print("\n\t");
580 st->print("SUB ESP, #%d\t# Create frame",framesize);
581 }
582 } else {
583 st->print("SUB ESP, #%d\t# Create frame",framesize);
584 st->print("\n\t");
585 framesize -= wordSize;
586 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize);
587 if (PreserveFramePointer) {
588 st->print("\n\t");
589 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
590 if (framesize > 0) {
591 st->print("\n\t");
592 st->print("ADD EBP, #%d", framesize);
593 }
594 }
595 }
596
597 if (VerifyStackAtCalls) {
598 st->print("\n\t");
599 framesize -= wordSize;
600 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
601 }
602
603 if( C->in_24_bit_fp_mode() ) {
604 st->print("\n\t");
605 st->print("FLDCW \t# load 24 bit fpu control word");
606 }
607 if (UseSSE >= 2 && VerifyFPU) {
608 st->print("\n\t");
609 st->print("# verify FPU stack (must be clean on entry)");
610 }
611
612 #ifdef ASSERT
613 if (VerifyStackAtCalls) {
614 st->print("\n\t");
615 st->print("# stack alignment check");
616 }
617 #endif
618 st->cr();
619 }
620 #endif
621
622
623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
624 Compile* C = ra_->C;
625 MacroAssembler _masm(&cbuf);
626
627 int framesize = C->output()->frame_size_in_bytes();
628 int bangsize = C->output()->bang_size_in_bytes();
629
630 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
631
632 C->output()->set_frame_complete(cbuf.insts_size());
633
634 if (C->has_mach_constant_base_node()) {
635 // NOTE: We set the table base offset here because users might be
636 // emitted before MachConstantBaseNode.
637 ConstantTable& constant_table = C->output()->constant_table();
638 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
639 }
640 }
641
642 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
643 return MachNode::size(ra_); // too many variables; just compute it the hard way
644 }
645
646 int MachPrologNode::reloc() const {
647 return 0; // a large enough number
648 }
649
650 //=============================================================================
651 #ifndef PRODUCT
652 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
653 Compile *C = ra_->C;
654 int framesize = C->output()->frame_size_in_bytes();
655 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
656 // Remove two words for return addr and rbp,
657 framesize -= 2*wordSize;
658
659 if (C->max_vector_size() > 16) {
660 st->print("VZEROUPPER");
661 st->cr(); st->print("\t");
662 }
663 if (C->in_24_bit_fp_mode()) {
664 st->print("FLDCW standard control word");
665 st->cr(); st->print("\t");
666 }
667 if (framesize) {
668 st->print("ADD ESP,%d\t# Destroy frame",framesize);
669 st->cr(); st->print("\t");
670 }
671 st->print_cr("POPL EBP"); st->print("\t");
672 if (do_polling() && C->is_method_compilation()) {
673 st->print("CMPL rsp, poll_offset[thread] \n\t"
674 "JA #safepoint_stub\t"
675 "# Safepoint: poll for GC");
676 }
677 }
678 #endif
679
680 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
681 Compile *C = ra_->C;
682 MacroAssembler _masm(&cbuf);
683
684 if (C->max_vector_size() > 16) {
685 // Clear upper bits of YMM registers when current compiled code uses
686 // wide vectors to avoid AVX <-> SSE transition penalty during call.
687 _masm.vzeroupper();
688 }
689 // If method set FPU control word, restore to standard control word
690 if (C->in_24_bit_fp_mode()) {
691 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
692 }
693
694 int framesize = C->output()->frame_size_in_bytes();
695 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
696 // Remove two words for return addr and rbp,
697 framesize -= 2*wordSize;
698
699 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
700
701 if (framesize >= 128) {
702 emit_opcode(cbuf, 0x81); // add SP, #framesize
703 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
704 emit_d32(cbuf, framesize);
705 } else if (framesize) {
706 emit_opcode(cbuf, 0x83); // add SP, #framesize
707 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
708 emit_d8(cbuf, framesize);
709 }
710
711 emit_opcode(cbuf, 0x58 | EBP_enc);
712
713 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
714 __ reserved_stack_check();
715 }
716
717 if (do_polling() && C->is_method_compilation()) {
718 Register thread = as_Register(EBX_enc);
719 MacroAssembler masm(&cbuf);
720 __ get_thread(thread);
721 Label dummy_label;
722 Label* code_stub = &dummy_label;
723 if (!C->output()->in_scratch_emit_size()) {
724 code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
725 }
726 __ relocate(relocInfo::poll_return_type);
727 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
728 }
729 }
730
731 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
732 return MachNode::size(ra_); // too many variables; just compute it
733 // the hard way
734 }
735
736 int MachEpilogNode::reloc() const {
737 return 0; // a large enough number
738 }
739
740 const Pipeline * MachEpilogNode::pipeline() const {
741 return MachNode::pipeline_class();
742 }
743
744 //=============================================================================
745
746 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
747 static enum RC rc_class( OptoReg::Name reg ) {
748
749 if( !OptoReg::is_valid(reg) ) return rc_bad;
750 if (OptoReg::is_stack(reg)) return rc_stack;
751
752 VMReg r = OptoReg::as_VMReg(reg);
753 if (r->is_Register()) return rc_int;
754 if (r->is_FloatRegister()) {
755 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
756 return rc_float;
757 }
758 if (r->is_KRegister()) return rc_kreg;
759 assert(r->is_XMMRegister(), "must be");
760 return rc_xmm;
761 }
762
763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
764 int opcode, const char *op_str, int size, outputStream* st ) {
765 if( cbuf ) {
766 emit_opcode (*cbuf, opcode );
767 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
768 #ifndef PRODUCT
769 } else if( !do_size ) {
770 if( size != 0 ) st->print("\n\t");
771 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
772 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
773 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
774 } else { // FLD, FST, PUSH, POP
775 st->print("%s [ESP + #%d]",op_str,offset);
776 }
777 #endif
778 }
779 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
780 return size+3+offset_size;
781 }
782
783 // Helper for XMM registers. Extra opcode bits, limited syntax.
784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
785 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
786 int in_size_in_bits = Assembler::EVEX_32bit;
787 int evex_encoding = 0;
788 if (reg_lo+1 == reg_hi) {
789 in_size_in_bits = Assembler::EVEX_64bit;
790 evex_encoding = Assembler::VEX_W;
791 }
792 if (cbuf) {
793 MacroAssembler _masm(cbuf);
794 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
795 // it maps more cases to single byte displacement
796 _masm.set_managed();
797 if (reg_lo+1 == reg_hi) { // double move?
798 if (is_load) {
799 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
800 } else {
801 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
802 }
803 } else {
804 if (is_load) {
805 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
806 } else {
807 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
808 }
809 }
810 #ifndef PRODUCT
811 } else if (!do_size) {
812 if (size != 0) st->print("\n\t");
813 if (reg_lo+1 == reg_hi) { // double move?
814 if (is_load) st->print("%s %s,[ESP + #%d]",
815 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
816 Matcher::regName[reg_lo], offset);
817 else st->print("MOVSD [ESP + #%d],%s",
818 offset, Matcher::regName[reg_lo]);
819 } else {
820 if (is_load) st->print("MOVSS %s,[ESP + #%d]",
821 Matcher::regName[reg_lo], offset);
822 else st->print("MOVSS [ESP + #%d],%s",
823 offset, Matcher::regName[reg_lo]);
824 }
825 #endif
826 }
827 bool is_single_byte = false;
828 if ((UseAVX > 2) && (offset != 0)) {
829 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
830 }
831 int offset_size = 0;
832 if (UseAVX > 2 ) {
833 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
834 } else {
835 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
836 }
837 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
838 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
839 return size+5+offset_size;
840 }
841
842
843 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
844 int src_hi, int dst_hi, int size, outputStream* st ) {
845 if (cbuf) {
846 MacroAssembler _masm(cbuf);
847 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
848 _masm.set_managed();
849 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
850 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
851 as_XMMRegister(Matcher::_regEncode[src_lo]));
852 } else {
853 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
854 as_XMMRegister(Matcher::_regEncode[src_lo]));
855 }
856 #ifndef PRODUCT
857 } else if (!do_size) {
858 if (size != 0) st->print("\n\t");
859 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
860 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
861 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
862 } else {
863 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
864 }
865 } else {
866 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
867 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
868 } else {
869 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
870 }
871 }
872 #endif
873 }
874 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
875 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes.
876 int sz = (UseAVX > 2) ? 6 : 4;
877 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
878 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
879 return size + sz;
880 }
881
882 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
883 int src_hi, int dst_hi, int size, outputStream* st ) {
884 // 32-bit
885 if (cbuf) {
886 MacroAssembler _masm(cbuf);
887 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
888 _masm.set_managed();
889 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
890 as_Register(Matcher::_regEncode[src_lo]));
891 #ifndef PRODUCT
892 } else if (!do_size) {
893 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
894 #endif
895 }
896 return (UseAVX> 2) ? 6 : 4;
897 }
898
899
900 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
901 int src_hi, int dst_hi, int size, outputStream* st ) {
902 // 32-bit
903 if (cbuf) {
904 MacroAssembler _masm(cbuf);
905 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
906 _masm.set_managed();
907 __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
908 as_XMMRegister(Matcher::_regEncode[src_lo]));
909 #ifndef PRODUCT
910 } else if (!do_size) {
911 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
912 #endif
913 }
914 return (UseAVX> 2) ? 6 : 4;
915 }
916
917 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
918 if( cbuf ) {
919 emit_opcode(*cbuf, 0x8B );
920 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
921 #ifndef PRODUCT
922 } else if( !do_size ) {
923 if( size != 0 ) st->print("\n\t");
924 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
925 #endif
926 }
927 return size+2;
928 }
929
930 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
931 int offset, int size, outputStream* st ) {
932 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
933 if( cbuf ) {
934 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it)
935 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
936 #ifndef PRODUCT
937 } else if( !do_size ) {
938 if( size != 0 ) st->print("\n\t");
939 st->print("FLD %s",Matcher::regName[src_lo]);
940 #endif
941 }
942 size += 2;
943 }
944
945 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
946 const char *op_str;
947 int op;
948 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
949 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
950 op = 0xDD;
951 } else { // 32-bit store
952 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
953 op = 0xD9;
954 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
955 }
956
957 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
958 }
959
960 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
961 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
962 int src_hi, int dst_hi, uint ireg, outputStream* st);
963
964 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
965 int stack_offset, int reg, uint ireg, outputStream* st);
966
967 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
968 int dst_offset, uint ireg, outputStream* st) {
969 if (cbuf) {
970 MacroAssembler _masm(cbuf);
971 switch (ireg) {
972 case Op_VecS:
973 __ pushl(Address(rsp, src_offset));
974 __ popl (Address(rsp, dst_offset));
975 break;
976 case Op_VecD:
977 __ pushl(Address(rsp, src_offset));
978 __ popl (Address(rsp, dst_offset));
979 __ pushl(Address(rsp, src_offset+4));
980 __ popl (Address(rsp, dst_offset+4));
981 break;
982 case Op_VecX:
983 __ movdqu(Address(rsp, -16), xmm0);
984 __ movdqu(xmm0, Address(rsp, src_offset));
985 __ movdqu(Address(rsp, dst_offset), xmm0);
986 __ movdqu(xmm0, Address(rsp, -16));
987 break;
988 case Op_VecY:
989 __ vmovdqu(Address(rsp, -32), xmm0);
990 __ vmovdqu(xmm0, Address(rsp, src_offset));
991 __ vmovdqu(Address(rsp, dst_offset), xmm0);
992 __ vmovdqu(xmm0, Address(rsp, -32));
993 break;
994 case Op_VecZ:
995 __ evmovdquq(Address(rsp, -64), xmm0, 2);
996 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
997 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
998 __ evmovdquq(xmm0, Address(rsp, -64), 2);
999 break;
1000 default:
1001 ShouldNotReachHere();
1002 }
1003 #ifndef PRODUCT
1004 } else {
1005 switch (ireg) {
1006 case Op_VecS:
1007 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1008 "popl [rsp + #%d]",
1009 src_offset, dst_offset);
1010 break;
1011 case Op_VecD:
1012 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1013 "popq [rsp + #%d]\n\t"
1014 "pushl [rsp + #%d]\n\t"
1015 "popq [rsp + #%d]",
1016 src_offset, dst_offset, src_offset+4, dst_offset+4);
1017 break;
1018 case Op_VecX:
1019 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1020 "movdqu xmm0, [rsp + #%d]\n\t"
1021 "movdqu [rsp + #%d], xmm0\n\t"
1022 "movdqu xmm0, [rsp - #16]",
1023 src_offset, dst_offset);
1024 break;
1025 case Op_VecY:
1026 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1027 "vmovdqu xmm0, [rsp + #%d]\n\t"
1028 "vmovdqu [rsp + #%d], xmm0\n\t"
1029 "vmovdqu xmm0, [rsp - #32]",
1030 src_offset, dst_offset);
1031 break;
1032 case Op_VecZ:
1033 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1034 "vmovdqu xmm0, [rsp + #%d]\n\t"
1035 "vmovdqu [rsp + #%d], xmm0\n\t"
1036 "vmovdqu xmm0, [rsp - #64]",
1037 src_offset, dst_offset);
1038 break;
1039 default:
1040 ShouldNotReachHere();
1041 }
1042 #endif
1043 }
1044 }
1045
1046 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1047 // Get registers to move
1048 OptoReg::Name src_second = ra_->get_reg_second(in(1));
1049 OptoReg::Name src_first = ra_->get_reg_first(in(1));
1050 OptoReg::Name dst_second = ra_->get_reg_second(this );
1051 OptoReg::Name dst_first = ra_->get_reg_first(this );
1052
1053 enum RC src_second_rc = rc_class(src_second);
1054 enum RC src_first_rc = rc_class(src_first);
1055 enum RC dst_second_rc = rc_class(dst_second);
1056 enum RC dst_first_rc = rc_class(dst_first);
1057
1058 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1059
1060 // Generate spill code!
1061 int size = 0;
1062
1063 if( src_first == dst_first && src_second == dst_second )
1064 return size; // Self copy, no move
1065
1066 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
1067 uint ireg = ideal_reg();
1068 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1069 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1070 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1071 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1072 // mem -> mem
1073 int src_offset = ra_->reg2offset(src_first);
1074 int dst_offset = ra_->reg2offset(dst_first);
1075 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1076 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1077 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
1078 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1079 int stack_offset = ra_->reg2offset(dst_first);
1080 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
1081 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1082 int stack_offset = ra_->reg2offset(src_first);
1083 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
1084 } else {
1085 ShouldNotReachHere();
1086 }
1087 return 0;
1088 }
1089
1090 // --------------------------------------
1091 // Check for mem-mem move. push/pop to move.
1092 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1093 if( src_second == dst_first ) { // overlapping stack copy ranges
1094 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1095 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
1096 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
1097 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
1098 }
1099 // move low bits
1100 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
1101 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
1102 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1103 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
1104 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
1105 }
1106 return size;
1107 }
1108
1109 // --------------------------------------
1110 // Check for integer reg-reg copy
1111 if( src_first_rc == rc_int && dst_first_rc == rc_int )
1112 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1113
1114 // Check for integer store
1115 if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1116 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1117
1118 // Check for integer load
1119 if( src_first_rc == rc_stack && dst_first_rc == rc_int )
1120 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1121
1122 // Check for integer reg-xmm reg copy
1123 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1124 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1125 "no 64 bit integer-float reg moves" );
1126 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1127 }
1128 // --------------------------------------
1129 // Check for float reg-reg copy
1130 if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1131 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1132 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1133 if( cbuf ) {
1134
1135 // Note the mucking with the register encode to compensate for the 0/1
1136 // indexing issue mentioned in a comment in the reg_def sections
1137 // for FPR registers many lines above here.
1138
1139 if( src_first != FPR1L_num ) {
1140 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i)
1141 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1142 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
1143 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1144 } else {
1145 emit_opcode (*cbuf, 0xDD ); // FST ST(i)
1146 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1147 }
1148 #ifndef PRODUCT
1149 } else if( !do_size ) {
1150 if( size != 0 ) st->print("\n\t");
1151 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1152 else st->print( "FST %s", Matcher::regName[dst_first]);
1153 #endif
1154 }
1155 return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1156 }
1157
1158 // Check for float store
1159 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1160 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1161 }
1162
1163 // Check for float load
1164 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1165 int offset = ra_->reg2offset(src_first);
1166 const char *op_str;
1167 int op;
1168 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1169 op_str = "FLD_D";
1170 op = 0xDD;
1171 } else { // 32-bit load
1172 op_str = "FLD_S";
1173 op = 0xD9;
1174 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1175 }
1176 if( cbuf ) {
1177 emit_opcode (*cbuf, op );
1178 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1179 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
1180 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1181 #ifndef PRODUCT
1182 } else if( !do_size ) {
1183 if( size != 0 ) st->print("\n\t");
1184 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
1185 #endif
1186 }
1187 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1188 return size + 3+offset_size+2;
1189 }
1190
1191 // Check for xmm reg-reg copy
1192 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1193 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1194 (src_first+1 == src_second && dst_first+1 == dst_second),
1195 "no non-adjacent float-moves" );
1196 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1197 }
1198
1199 // Check for xmm reg-integer reg copy
1200 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1201 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1202 "no 64 bit float-integer reg moves" );
1203 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1204 }
1205
1206 // Check for xmm store
1207 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1208 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
1209 }
1210
1211 // Check for float xmm load
1212 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1213 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1214 }
1215
1216 // Copy from float reg to xmm reg
1217 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
1218 // copy to the top of stack from floating point reg
1219 // and use LEA to preserve flags
1220 if( cbuf ) {
1221 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8]
1222 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1223 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1224 emit_d8(*cbuf,0xF8);
1225 #ifndef PRODUCT
1226 } else if( !do_size ) {
1227 if( size != 0 ) st->print("\n\t");
1228 st->print("LEA ESP,[ESP-8]");
1229 #endif
1230 }
1231 size += 4;
1232
1233 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1234
1235 // Copy from the temp memory to the xmm reg.
1236 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1237
1238 if( cbuf ) {
1239 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8]
1240 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1241 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1242 emit_d8(*cbuf,0x08);
1243 #ifndef PRODUCT
1244 } else if( !do_size ) {
1245 if( size != 0 ) st->print("\n\t");
1246 st->print("LEA ESP,[ESP+8]");
1247 #endif
1248 }
1249 size += 4;
1250 return size;
1251 }
1252
1253 // AVX-512 opmask specific spilling.
1254 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
1255 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1256 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1257 MacroAssembler _masm(cbuf);
1258 int offset = ra_->reg2offset(src_first);
1259 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1260 return 0;
1261 }
1262
1263 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
1264 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1265 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1266 MacroAssembler _masm(cbuf);
1267 int offset = ra_->reg2offset(dst_first);
1268 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
1269 return 0;
1270 }
1271
1272 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
1273 Unimplemented();
1274 return 0;
1275 }
1276
1277 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
1278 Unimplemented();
1279 return 0;
1280 }
1281
1282 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
1283 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1284 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1285 MacroAssembler _masm(cbuf);
1286 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
1287 return 0;
1288 }
1289
1290 assert( size > 0, "missed a case" );
1291
1292 // --------------------------------------------------------------------
1293 // Check for second bits still needing moving.
1294 if( src_second == dst_second )
1295 return size; // Self copy; no move
1296 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1297
1298 // Check for second word int-int move
1299 if( src_second_rc == rc_int && dst_second_rc == rc_int )
1300 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1301
1302 // Check for second word integer store
1303 if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1304 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1305
1306 // Check for second word integer load
1307 if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1308 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1309
1310 Unimplemented();
1311 return 0; // Mute compiler
1312 }
1313
1314 #ifndef PRODUCT
1315 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1316 implementation( NULL, ra_, false, st );
1317 }
1318 #endif
1319
1320 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1321 implementation( &cbuf, ra_, false, NULL );
1322 }
1323
1324 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1325 return MachNode::size(ra_);
1326 }
1327
1328
1329 //=============================================================================
1330 #ifndef PRODUCT
1331 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1332 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1333 int reg = ra_->get_reg_first(this);
1334 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
1335 }
1336 #endif
1337
1338 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1339 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1340 int reg = ra_->get_encode(this);
1341 if( offset >= 128 ) {
1342 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1343 emit_rm(cbuf, 0x2, reg, 0x04);
1344 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1345 emit_d32(cbuf, offset);
1346 }
1347 else {
1348 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1349 emit_rm(cbuf, 0x1, reg, 0x04);
1350 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1351 emit_d8(cbuf, offset);
1352 }
1353 }
1354
1355 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1356 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1357 if( offset >= 128 ) {
1358 return 7;
1359 }
1360 else {
1361 return 4;
1362 }
1363 }
1364
1365 //=============================================================================
1366 #ifndef PRODUCT
1367 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1368 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
1369 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
1370 st->print_cr("\tNOP");
1371 st->print_cr("\tNOP");
1372 if( !OptoBreakpoint )
1373 st->print_cr("\tNOP");
1374 }
1375 #endif
1376
1377 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1378 MacroAssembler masm(&cbuf);
1379 #ifdef ASSERT
1380 uint insts_size = cbuf.insts_size();
1381 #endif
1382 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1383 masm.jump_cc(Assembler::notEqual,
1384 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1385 /* WARNING these NOPs are critical so that verified entry point is properly
1386 aligned for patching by NativeJump::patch_verified_entry() */
1387 int nops_cnt = 2;
1388 if( !OptoBreakpoint ) // Leave space for int3
1389 nops_cnt += 1;
1390 masm.nop(nops_cnt);
1391
1392 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1393 }
1394
1395 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1396 return OptoBreakpoint ? 11 : 12;
1397 }
1398
1399
1400 //=============================================================================
1401
1402 // Vector calling convention not supported.
1403 const bool Matcher::supports_vector_calling_convention() {
1404 return false;
1405 }
1406
1407 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1408 Unimplemented();
1409 return OptoRegPair(0, 0);
1410 }
1411
1412 // Is this branch offset short enough that a short branch can be used?
1413 //
1414 // NOTE: If the platform does not provide any short branch variants, then
1415 // this method should return false for offset 0.
1416 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1417 // The passed offset is relative to address of the branch.
1418 // On 86 a branch displacement is calculated relative to address
1419 // of a next instruction.
1420 offset -= br_size;
1421
1422 // the short version of jmpConUCF2 contains multiple branches,
1423 // making the reach slightly less
1424 if (rule == jmpConUCF2_rule)
1425 return (-126 <= offset && offset <= 125);
1426 return (-128 <= offset && offset <= 127);
1427 }
1428
1429 // Return whether or not this register is ever used as an argument. This
1430 // function is used on startup to build the trampoline stubs in generateOptoStub.
1431 // Registers not mentioned will be killed by the VM call in the trampoline, and
1432 // arguments in those registers not be available to the callee.
1433 bool Matcher::can_be_java_arg( int reg ) {
1434 if( reg == ECX_num || reg == EDX_num ) return true;
1435 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true;
1436 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1437 return false;
1438 }
1439
1440 bool Matcher::is_spillable_arg( int reg ) {
1441 return can_be_java_arg(reg);
1442 }
1443
1444 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1445 // Use hardware integer DIV instruction when
1446 // it is faster than a code which use multiply.
1447 // Only when constant divisor fits into 32 bit
1448 // (min_jint is excluded to get only correct
1449 // positive 32 bit values from negative).
1450 return VM_Version::has_fast_idiv() &&
1451 (divisor == (int)divisor && divisor != min_jint);
1452 }
1453
1454 // Register for DIVI projection of divmodI
1455 RegMask Matcher::divI_proj_mask() {
1456 return EAX_REG_mask();
1457 }
1458
1459 // Register for MODI projection of divmodI
1460 RegMask Matcher::modI_proj_mask() {
1461 return EDX_REG_mask();
1462 }
1463
1464 // Register for DIVL projection of divmodL
1465 RegMask Matcher::divL_proj_mask() {
1466 ShouldNotReachHere();
1467 return RegMask();
1468 }
1469
1470 // Register for MODL projection of divmodL
1471 RegMask Matcher::modL_proj_mask() {
1472 ShouldNotReachHere();
1473 return RegMask();
1474 }
1475
1476 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1477 return NO_REG_mask();
1478 }
1479
1480 // Returns true if the high 32 bits of the value is known to be zero.
1481 bool is_operand_hi32_zero(Node* n) {
1482 int opc = n->Opcode();
1483 if (opc == Op_AndL) {
1484 Node* o2 = n->in(2);
1485 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1486 return true;
1487 }
1488 }
1489 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1490 return true;
1491 }
1492 return false;
1493 }
1494
1495 %}
1496
1497 //----------ENCODING BLOCK-----------------------------------------------------
1498 // This block specifies the encoding classes used by the compiler to output
1499 // byte streams. Encoding classes generate functions which are called by
1500 // Machine Instruction Nodes in order to generate the bit encoding of the
1501 // instruction. Operands specify their base encoding interface with the
1502 // interface keyword. There are currently supported four interfaces,
1503 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
1504 // operand to generate a function which returns its register number when
1505 // queried. CONST_INTER causes an operand to generate a function which
1506 // returns the value of the constant when queried. MEMORY_INTER causes an
1507 // operand to generate four functions which return the Base Register, the
1508 // Index Register, the Scale Value, and the Offset Value of the operand when
1509 // queried. COND_INTER causes an operand to generate six functions which
1510 // return the encoding code (ie - encoding bits for the instruction)
1511 // associated with each basic boolean condition for a conditional instruction.
1512 // Instructions specify two basic values for encoding. They use the
1513 // ins_encode keyword to specify their encoding class (which must be one of
1514 // the class names specified in the encoding block), and they use the
1515 // opcode keyword to specify, in order, their primary, secondary, and
1516 // tertiary opcode. Only the opcode sections which a particular instruction
1517 // needs for encoding need to be specified.
1518 encode %{
1519 // Build emit functions for each basic byte or larger field in the intel
1520 // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1521 // code in the enc_class source block. Emit functions will live in the
1522 // main source block for now. In future, we can generalize this by
1523 // adding a syntax that specifies the sizes of fields in an order,
1524 // so that the adlc can build the emit functions automagically
1525
1526 // Emit primary opcode
1527 enc_class OpcP %{
1528 emit_opcode(cbuf, $primary);
1529 %}
1530
1531 // Emit secondary opcode
1532 enc_class OpcS %{
1533 emit_opcode(cbuf, $secondary);
1534 %}
1535
1536 // Emit opcode directly
1537 enc_class Opcode(immI d8) %{
1538 emit_opcode(cbuf, $d8$$constant);
1539 %}
1540
1541 enc_class SizePrefix %{
1542 emit_opcode(cbuf,0x66);
1543 %}
1544
1545 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
1546 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1547 %}
1548
1549 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many)
1550 emit_opcode(cbuf,$opcode$$constant);
1551 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1552 %}
1553
1554 enc_class mov_r32_imm0( rRegI dst ) %{
1555 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
1556 emit_d32 ( cbuf, 0x0 ); // imm32==0x0
1557 %}
1558
1559 enc_class cdq_enc %{
1560 // Full implementation of Java idiv and irem; checks for
1561 // special case as described in JVM spec., p.243 & p.271.
1562 //
1563 // normal case special case
1564 //
1565 // input : rax,: dividend min_int
1566 // reg: divisor -1
1567 //
1568 // output: rax,: quotient (= rax, idiv reg) min_int
1569 // rdx: remainder (= rax, irem reg) 0
1570 //
1571 // Code sequnce:
1572 //
1573 // 81 F8 00 00 00 80 cmp rax,80000000h
1574 // 0F 85 0B 00 00 00 jne normal_case
1575 // 33 D2 xor rdx,edx
1576 // 83 F9 FF cmp rcx,0FFh
1577 // 0F 84 03 00 00 00 je done
1578 // normal_case:
1579 // 99 cdq
1580 // F7 F9 idiv rax,ecx
1581 // done:
1582 //
1583 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1584 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1585 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h
1586 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1587 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1588 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case
1589 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx
1590 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1591 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1592 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1593 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done
1594 // normal_case:
1595 emit_opcode(cbuf,0x99); // cdq
1596 // idiv (note: must be emitted by the user of this rule)
1597 // normal:
1598 %}
1599
1600 // Dense encoding for older common ops
1601 enc_class Opc_plus(immI opcode, rRegI reg) %{
1602 emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1603 %}
1604
1605
1606 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1607 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1608 // Check for 8-bit immediate, and set sign extend bit in opcode
1609 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1610 emit_opcode(cbuf, $primary | 0x02);
1611 }
1612 else { // If 32-bit immediate
1613 emit_opcode(cbuf, $primary);
1614 }
1615 %}
1616
1617 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m
1618 // Emit primary opcode and set sign-extend bit
1619 // Check for 8-bit immediate, and set sign extend bit in opcode
1620 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1621 emit_opcode(cbuf, $primary | 0x02); }
1622 else { // If 32-bit immediate
1623 emit_opcode(cbuf, $primary);
1624 }
1625 // Emit r/m byte with secondary opcode, after primary opcode.
1626 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1627 %}
1628
1629 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
1630 // Check for 8-bit immediate, and set sign extend bit in opcode
1631 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1632 $$$emit8$imm$$constant;
1633 }
1634 else { // If 32-bit immediate
1635 // Output immediate
1636 $$$emit32$imm$$constant;
1637 }
1638 %}
1639
1640 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1641 // Emit primary opcode and set sign-extend bit
1642 // Check for 8-bit immediate, and set sign extend bit in opcode
1643 int con = (int)$imm$$constant; // Throw away top bits
1644 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1645 // Emit r/m byte with secondary opcode, after primary opcode.
1646 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1647 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1648 else emit_d32(cbuf,con);
1649 %}
1650
1651 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1652 // Emit primary opcode and set sign-extend bit
1653 // Check for 8-bit immediate, and set sign extend bit in opcode
1654 int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1655 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1656 // Emit r/m byte with tertiary opcode, after primary opcode.
1657 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1658 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1659 else emit_d32(cbuf,con);
1660 %}
1661
1662 enc_class OpcSReg (rRegI dst) %{ // BSWAP
1663 emit_cc(cbuf, $secondary, $dst$$reg );
1664 %}
1665
1666 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1667 int destlo = $dst$$reg;
1668 int desthi = HIGH_FROM_LOW(destlo);
1669 // bswap lo
1670 emit_opcode(cbuf, 0x0F);
1671 emit_cc(cbuf, 0xC8, destlo);
1672 // bswap hi
1673 emit_opcode(cbuf, 0x0F);
1674 emit_cc(cbuf, 0xC8, desthi);
1675 // xchg lo and hi
1676 emit_opcode(cbuf, 0x87);
1677 emit_rm(cbuf, 0x3, destlo, desthi);
1678 %}
1679
1680 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1681 emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1682 %}
1683
1684 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1685 $$$emit8$primary;
1686 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1687 %}
1688
1689 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1690 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1691 emit_d8(cbuf, op >> 8 );
1692 emit_d8(cbuf, op & 255);
1693 %}
1694
1695 // emulate a CMOV with a conditional branch around a MOV
1696 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1697 // Invert sense of branch from sense of CMOV
1698 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1699 emit_d8( cbuf, $brOffs$$constant );
1700 %}
1701
1702 enc_class enc_PartialSubtypeCheck( ) %{
1703 Register Redi = as_Register(EDI_enc); // result register
1704 Register Reax = as_Register(EAX_enc); // super class
1705 Register Recx = as_Register(ECX_enc); // killed
1706 Register Resi = as_Register(ESI_enc); // sub class
1707 Label miss;
1708
1709 MacroAssembler _masm(&cbuf);
1710 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1711 NULL, &miss,
1712 /*set_cond_codes:*/ true);
1713 if ($primary) {
1714 __ xorptr(Redi, Redi);
1715 }
1716 __ bind(miss);
1717 %}
1718
1719 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
1720 MacroAssembler masm(&cbuf);
1721 int start = masm.offset();
1722 if (UseSSE >= 2) {
1723 if (VerifyFPU) {
1724 masm.verify_FPU(0, "must be empty in SSE2+ mode");
1725 }
1726 } else {
1727 // External c_calling_convention expects the FPU stack to be 'clean'.
1728 // Compiled code leaves it dirty. Do cleanup now.
1729 masm.empty_FPU_stack();
1730 }
1731 if (sizeof_FFree_Float_Stack_All == -1) {
1732 sizeof_FFree_Float_Stack_All = masm.offset() - start;
1733 } else {
1734 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1735 }
1736 %}
1737
1738 enc_class Verify_FPU_For_Leaf %{
1739 if( VerifyFPU ) {
1740 MacroAssembler masm(&cbuf);
1741 masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1742 }
1743 %}
1744
1745 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1746 // This is the instruction starting address for relocation info.
1747 cbuf.set_insts_mark();
1748 $$$emit8$primary;
1749 // CALL directly to the runtime
1750 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1751 runtime_call_Relocation::spec(), RELOC_IMM32 );
1752
1753 if (UseSSE >= 2) {
1754 MacroAssembler _masm(&cbuf);
1755 BasicType rt = tf()->return_type();
1756
1757 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1758 // A C runtime call where the return value is unused. In SSE2+
1759 // mode the result needs to be removed from the FPU stack. It's
1760 // likely that this function call could be removed by the
1761 // optimizer if the C function is a pure function.
1762 __ ffree(0);
1763 } else if (rt == T_FLOAT) {
1764 __ lea(rsp, Address(rsp, -4));
1765 __ fstp_s(Address(rsp, 0));
1766 __ movflt(xmm0, Address(rsp, 0));
1767 __ lea(rsp, Address(rsp, 4));
1768 } else if (rt == T_DOUBLE) {
1769 __ lea(rsp, Address(rsp, -8));
1770 __ fstp_d(Address(rsp, 0));
1771 __ movdbl(xmm0, Address(rsp, 0));
1772 __ lea(rsp, Address(rsp, 8));
1773 }
1774 }
1775 %}
1776
1777 enc_class pre_call_resets %{
1778 // If method sets FPU control word restore it here
1779 debug_only(int off0 = cbuf.insts_size());
1780 if (ra_->C->in_24_bit_fp_mode()) {
1781 MacroAssembler _masm(&cbuf);
1782 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
1783 }
1784 // Clear upper bits of YMM registers when current compiled code uses
1785 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1786 MacroAssembler _masm(&cbuf);
1787 __ vzeroupper();
1788 debug_only(int off1 = cbuf.insts_size());
1789 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1790 %}
1791
1792 enc_class post_call_FPU %{
1793 // If method sets FPU control word do it here also
1794 if (Compile::current()->in_24_bit_fp_mode()) {
1795 MacroAssembler masm(&cbuf);
1796 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
1797 }
1798 %}
1799
1800 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL
1801 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1802 // who we intended to call.
1803 cbuf.set_insts_mark();
1804 $$$emit8$primary;
1805
1806 if (!_method) {
1807 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1808 runtime_call_Relocation::spec(),
1809 RELOC_IMM32);
1810 } else {
1811 int method_index = resolved_method_index(cbuf);
1812 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1813 : static_call_Relocation::spec(method_index);
1814 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1815 rspec, RELOC_DISP32);
1816 // Emit stubs for static call.
1817 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1818 if (stub == NULL) {
1819 ciEnv::current()->record_failure("CodeCache is full");
1820 return;
1821 }
1822 }
1823 %}
1824
1825 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
1826 MacroAssembler _masm(&cbuf);
1827 __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1828 %}
1829
1830 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1831 int disp = in_bytes(Method::from_compiled_offset());
1832 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1833
1834 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1835 cbuf.set_insts_mark();
1836 $$$emit8$primary;
1837 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
1838 emit_d8(cbuf, disp); // Displacement
1839
1840 %}
1841
1842 // Following encoding is no longer used, but may be restored if calling
1843 // convention changes significantly.
1844 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1845 //
1846 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL
1847 // // int ic_reg = Matcher::inline_cache_reg();
1848 // // int ic_encode = Matcher::_regEncode[ic_reg];
1849 // // int imo_reg = Matcher::interpreter_method_reg();
1850 // // int imo_encode = Matcher::_regEncode[imo_reg];
1851 //
1852 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
1853 // // // so we load it immediately before the call
1854 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr
1855 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1856 //
1857 // // xor rbp,ebp
1858 // emit_opcode(cbuf, 0x33);
1859 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1860 //
1861 // // CALL to interpreter.
1862 // cbuf.set_insts_mark();
1863 // $$$emit8$primary;
1864 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1865 // runtime_call_Relocation::spec(), RELOC_IMM32 );
1866 // %}
1867
1868 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR
1869 $$$emit8$primary;
1870 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1871 $$$emit8$shift$$constant;
1872 %}
1873
1874 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate
1875 // Load immediate does not have a zero or sign extended version
1876 // for 8-bit immediates
1877 emit_opcode(cbuf, 0xB8 + $dst$$reg);
1878 $$$emit32$src$$constant;
1879 %}
1880
1881 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate
1882 // Load immediate does not have a zero or sign extended version
1883 // for 8-bit immediates
1884 emit_opcode(cbuf, $primary + $dst$$reg);
1885 $$$emit32$src$$constant;
1886 %}
1887
1888 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate
1889 // Load immediate does not have a zero or sign extended version
1890 // for 8-bit immediates
1891 int dst_enc = $dst$$reg;
1892 int src_con = $src$$constant & 0x0FFFFFFFFL;
1893 if (src_con == 0) {
1894 // xor dst, dst
1895 emit_opcode(cbuf, 0x33);
1896 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1897 } else {
1898 emit_opcode(cbuf, $primary + dst_enc);
1899 emit_d32(cbuf, src_con);
1900 }
1901 %}
1902
1903 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
1904 // Load immediate does not have a zero or sign extended version
1905 // for 8-bit immediates
1906 int dst_enc = $dst$$reg + 2;
1907 int src_con = ((julong)($src$$constant)) >> 32;
1908 if (src_con == 0) {
1909 // xor dst, dst
1910 emit_opcode(cbuf, 0x33);
1911 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1912 } else {
1913 emit_opcode(cbuf, $primary + dst_enc);
1914 emit_d32(cbuf, src_con);
1915 }
1916 %}
1917
1918
1919 // Encode a reg-reg copy. If it is useless, then empty encoding.
1920 enc_class enc_Copy( rRegI dst, rRegI src ) %{
1921 encode_Copy( cbuf, $dst$$reg, $src$$reg );
1922 %}
1923
1924 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
1925 encode_Copy( cbuf, $dst$$reg, $src$$reg );
1926 %}
1927
1928 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
1929 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1930 %}
1931
1932 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
1933 $$$emit8$primary;
1934 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1935 %}
1936
1937 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
1938 $$$emit8$secondary;
1939 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
1940 %}
1941
1942 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
1943 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1944 %}
1945
1946 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
1947 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
1948 %}
1949
1950 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
1951 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
1952 %}
1953
1954 enc_class Con32 (immI src) %{ // Con32(storeImmI)
1955 // Output immediate
1956 $$$emit32$src$$constant;
1957 %}
1958
1959 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm
1960 // Output Float immediate bits
1961 jfloat jf = $src$$constant;
1962 int jf_as_bits = jint_cast( jf );
1963 emit_d32(cbuf, jf_as_bits);
1964 %}
1965
1966 enc_class Con32F_as_bits(immF src) %{ // storeX_imm
1967 // Output Float immediate bits
1968 jfloat jf = $src$$constant;
1969 int jf_as_bits = jint_cast( jf );
1970 emit_d32(cbuf, jf_as_bits);
1971 %}
1972
1973 enc_class Con16 (immI src) %{ // Con16(storeImmI)
1974 // Output immediate
1975 $$$emit16$src$$constant;
1976 %}
1977
1978 enc_class Con_d32(immI src) %{
1979 emit_d32(cbuf,$src$$constant);
1980 %}
1981
1982 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
1983 // Output immediate memory reference
1984 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
1985 emit_d32(cbuf, 0x00);
1986 %}
1987
1988 enc_class lock_prefix( ) %{
1989 emit_opcode(cbuf,0xF0); // [Lock]
1990 %}
1991
1992 // Cmp-xchg long value.
1993 // Note: we need to swap rbx, and rcx before and after the
1994 // cmpxchg8 instruction because the instruction uses
1995 // rcx as the high order word of the new value to store but
1996 // our register encoding uses rbx,.
1997 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
1998
1999 // XCHG rbx,ecx
2000 emit_opcode(cbuf,0x87);
2001 emit_opcode(cbuf,0xD9);
2002 // [Lock]
2003 emit_opcode(cbuf,0xF0);
2004 // CMPXCHG8 [Eptr]
2005 emit_opcode(cbuf,0x0F);
2006 emit_opcode(cbuf,0xC7);
2007 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2008 // XCHG rbx,ecx
2009 emit_opcode(cbuf,0x87);
2010 emit_opcode(cbuf,0xD9);
2011 %}
2012
2013 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2014 // [Lock]
2015 emit_opcode(cbuf,0xF0);
2016
2017 // CMPXCHG [Eptr]
2018 emit_opcode(cbuf,0x0F);
2019 emit_opcode(cbuf,0xB1);
2020 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2021 %}
2022
2023 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2024 // [Lock]
2025 emit_opcode(cbuf,0xF0);
2026
2027 // CMPXCHGB [Eptr]
2028 emit_opcode(cbuf,0x0F);
2029 emit_opcode(cbuf,0xB0);
2030 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2031 %}
2032
2033 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2034 // [Lock]
2035 emit_opcode(cbuf,0xF0);
2036
2037 // 16-bit mode
2038 emit_opcode(cbuf, 0x66);
2039
2040 // CMPXCHGW [Eptr]
2041 emit_opcode(cbuf,0x0F);
2042 emit_opcode(cbuf,0xB1);
2043 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2044 %}
2045
2046 enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2047 int res_encoding = $res$$reg;
2048
2049 // MOV res,0
2050 emit_opcode( cbuf, 0xB8 + res_encoding);
2051 emit_d32( cbuf, 0 );
2052 // JNE,s fail
2053 emit_opcode(cbuf,0x75);
2054 emit_d8(cbuf, 5 );
2055 // MOV res,1
2056 emit_opcode( cbuf, 0xB8 + res_encoding);
2057 emit_d32( cbuf, 1 );
2058 // fail:
2059 %}
2060
2061 enc_class set_instruction_start( ) %{
2062 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2063 %}
2064
2065 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem
2066 int reg_encoding = $ereg$$reg;
2067 int base = $mem$$base;
2068 int index = $mem$$index;
2069 int scale = $mem$$scale;
2070 int displace = $mem$$disp;
2071 relocInfo::relocType disp_reloc = $mem->disp_reloc();
2072 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2073 %}
2074
2075 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem
2076 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo
2077 int base = $mem$$base;
2078 int index = $mem$$index;
2079 int scale = $mem$$scale;
2080 int displace = $mem$$disp + 4; // Offset is 4 further in memory
2081 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2082 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2083 %}
2084
2085 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2086 int r1, r2;
2087 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2088 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2089 emit_opcode(cbuf,0x0F);
2090 emit_opcode(cbuf,$tertiary);
2091 emit_rm(cbuf, 0x3, r1, r2);
2092 emit_d8(cbuf,$cnt$$constant);
2093 emit_d8(cbuf,$primary);
2094 emit_rm(cbuf, 0x3, $secondary, r1);
2095 emit_d8(cbuf,$cnt$$constant);
2096 %}
2097
2098 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2099 emit_opcode( cbuf, 0x8B ); // Move
2100 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2101 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2102 emit_d8(cbuf,$primary);
2103 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2104 emit_d8(cbuf,$cnt$$constant-32);
2105 }
2106 emit_d8(cbuf,$primary);
2107 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2108 emit_d8(cbuf,31);
2109 %}
2110
2111 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2112 int r1, r2;
2113 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2114 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2115
2116 emit_opcode( cbuf, 0x8B ); // Move r1,r2
2117 emit_rm(cbuf, 0x3, r1, r2);
2118 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2119 emit_opcode(cbuf,$primary);
2120 emit_rm(cbuf, 0x3, $secondary, r1);
2121 emit_d8(cbuf,$cnt$$constant-32);
2122 }
2123 emit_opcode(cbuf,0x33); // XOR r2,r2
2124 emit_rm(cbuf, 0x3, r2, r2);
2125 %}
2126
2127 // Clone of RegMem but accepts an extra parameter to access each
2128 // half of a double in memory; it never needs relocation info.
2129 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2130 emit_opcode(cbuf,$opcode$$constant);
2131 int reg_encoding = $rm_reg$$reg;
2132 int base = $mem$$base;
2133 int index = $mem$$index;
2134 int scale = $mem$$scale;
2135 int displace = $mem$$disp + $disp_for_half$$constant;
2136 relocInfo::relocType disp_reloc = relocInfo::none;
2137 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2138 %}
2139
2140 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2141 //
2142 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2143 // and it never needs relocation information.
2144 // Frequently used to move data between FPU's Stack Top and memory.
2145 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2146 int rm_byte_opcode = $rm_opcode$$constant;
2147 int base = $mem$$base;
2148 int index = $mem$$index;
2149 int scale = $mem$$scale;
2150 int displace = $mem$$disp;
2151 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2152 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2153 %}
2154
2155 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2156 int rm_byte_opcode = $rm_opcode$$constant;
2157 int base = $mem$$base;
2158 int index = $mem$$index;
2159 int scale = $mem$$scale;
2160 int displace = $mem$$disp;
2161 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2162 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2163 %}
2164
2165 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea
2166 int reg_encoding = $dst$$reg;
2167 int base = $src0$$reg; // 0xFFFFFFFF indicates no base
2168 int index = 0x04; // 0x04 indicates no index
2169 int scale = 0x00; // 0x00 indicates no scale
2170 int displace = $src1$$constant; // 0x00 indicates no displacement
2171 relocInfo::relocType disp_reloc = relocInfo::none;
2172 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2173 %}
2174
2175 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN
2176 // Compare dst,src
2177 emit_opcode(cbuf,0x3B);
2178 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2179 // jmp dst < src around move
2180 emit_opcode(cbuf,0x7C);
2181 emit_d8(cbuf,2);
2182 // move dst,src
2183 emit_opcode(cbuf,0x8B);
2184 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2185 %}
2186
2187 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX
2188 // Compare dst,src
2189 emit_opcode(cbuf,0x3B);
2190 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2191 // jmp dst > src around move
2192 emit_opcode(cbuf,0x7F);
2193 emit_d8(cbuf,2);
2194 // move dst,src
2195 emit_opcode(cbuf,0x8B);
2196 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2197 %}
2198
2199 enc_class enc_FPR_store(memory mem, regDPR src) %{
2200 // If src is FPR1, we can just FST to store it.
2201 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2202 int reg_encoding = 0x2; // Just store
2203 int base = $mem$$base;
2204 int index = $mem$$index;
2205 int scale = $mem$$scale;
2206 int displace = $mem$$disp;
2207 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2208 if( $src$$reg != FPR1L_enc ) {
2209 reg_encoding = 0x3; // Store & pop
2210 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2211 emit_d8( cbuf, 0xC0-1+$src$$reg );
2212 }
2213 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2214 emit_opcode(cbuf,$primary);
2215 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2216 %}
2217
2218 enc_class neg_reg(rRegI dst) %{
2219 // NEG $dst
2220 emit_opcode(cbuf,0xF7);
2221 emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2222 %}
2223
2224 enc_class setLT_reg(eCXRegI dst) %{
2225 // SETLT $dst
2226 emit_opcode(cbuf,0x0F);
2227 emit_opcode(cbuf,0x9C);
2228 emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2229 %}
2230
2231 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT
2232 int tmpReg = $tmp$$reg;
2233
2234 // SUB $p,$q
2235 emit_opcode(cbuf,0x2B);
2236 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2237 // SBB $tmp,$tmp
2238 emit_opcode(cbuf,0x1B);
2239 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2240 // AND $tmp,$y
2241 emit_opcode(cbuf,0x23);
2242 emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2243 // ADD $p,$tmp
2244 emit_opcode(cbuf,0x03);
2245 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2246 %}
2247
2248 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2249 // TEST shift,32
2250 emit_opcode(cbuf,0xF7);
2251 emit_rm(cbuf, 0x3, 0, ECX_enc);
2252 emit_d32(cbuf,0x20);
2253 // JEQ,s small
2254 emit_opcode(cbuf, 0x74);
2255 emit_d8(cbuf, 0x04);
2256 // MOV $dst.hi,$dst.lo
2257 emit_opcode( cbuf, 0x8B );
2258 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2259 // CLR $dst.lo
2260 emit_opcode(cbuf, 0x33);
2261 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2262 // small:
2263 // SHLD $dst.hi,$dst.lo,$shift
2264 emit_opcode(cbuf,0x0F);
2265 emit_opcode(cbuf,0xA5);
2266 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2267 // SHL $dst.lo,$shift"
2268 emit_opcode(cbuf,0xD3);
2269 emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2270 %}
2271
2272 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2273 // TEST shift,32
2274 emit_opcode(cbuf,0xF7);
2275 emit_rm(cbuf, 0x3, 0, ECX_enc);
2276 emit_d32(cbuf,0x20);
2277 // JEQ,s small
2278 emit_opcode(cbuf, 0x74);
2279 emit_d8(cbuf, 0x04);
2280 // MOV $dst.lo,$dst.hi
2281 emit_opcode( cbuf, 0x8B );
2282 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2283 // CLR $dst.hi
2284 emit_opcode(cbuf, 0x33);
2285 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2286 // small:
2287 // SHRD $dst.lo,$dst.hi,$shift
2288 emit_opcode(cbuf,0x0F);
2289 emit_opcode(cbuf,0xAD);
2290 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2291 // SHR $dst.hi,$shift"
2292 emit_opcode(cbuf,0xD3);
2293 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2294 %}
2295
2296 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2297 // TEST shift,32
2298 emit_opcode(cbuf,0xF7);
2299 emit_rm(cbuf, 0x3, 0, ECX_enc);
2300 emit_d32(cbuf,0x20);
2301 // JEQ,s small
2302 emit_opcode(cbuf, 0x74);
2303 emit_d8(cbuf, 0x05);
2304 // MOV $dst.lo,$dst.hi
2305 emit_opcode( cbuf, 0x8B );
2306 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2307 // SAR $dst.hi,31
2308 emit_opcode(cbuf, 0xC1);
2309 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2310 emit_d8(cbuf, 0x1F );
2311 // small:
2312 // SHRD $dst.lo,$dst.hi,$shift
2313 emit_opcode(cbuf,0x0F);
2314 emit_opcode(cbuf,0xAD);
2315 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2316 // SAR $dst.hi,$shift"
2317 emit_opcode(cbuf,0xD3);
2318 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2319 %}
2320
2321
2322 // ----------------- Encodings for floating point unit -----------------
2323 // May leave result in FPU-TOS or FPU reg depending on opcodes
2324 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV
2325 $$$emit8$primary;
2326 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2327 %}
2328
2329 // Pop argument in FPR0 with FSTP ST(0)
2330 enc_class PopFPU() %{
2331 emit_opcode( cbuf, 0xDD );
2332 emit_d8( cbuf, 0xD8 );
2333 %}
2334
2335 // !!!!! equivalent to Pop_Reg_F
2336 enc_class Pop_Reg_DPR( regDPR dst ) %{
2337 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2338 emit_d8( cbuf, 0xD8+$dst$$reg );
2339 %}
2340
2341 enc_class Push_Reg_DPR( regDPR dst ) %{
2342 emit_opcode( cbuf, 0xD9 );
2343 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2344 %}
2345
2346 enc_class strictfp_bias1( regDPR dst ) %{
2347 emit_opcode( cbuf, 0xDB ); // FLD m80real
2348 emit_opcode( cbuf, 0x2D );
2349 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
2350 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2351 emit_opcode( cbuf, 0xC8+$dst$$reg );
2352 %}
2353
2354 enc_class strictfp_bias2( regDPR dst ) %{
2355 emit_opcode( cbuf, 0xDB ); // FLD m80real
2356 emit_opcode( cbuf, 0x2D );
2357 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
2358 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2359 emit_opcode( cbuf, 0xC8+$dst$$reg );
2360 %}
2361
2362 // Special case for moving an integer register to a stack slot.
2363 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2364 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2365 %}
2366
2367 // Special case for moving a register to a stack slot.
2368 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2369 // Opcode already emitted
2370 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
2371 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
2372 emit_d32(cbuf, $dst$$disp); // Displacement
2373 %}
2374
2375 // Push the integer in stackSlot 'src' onto FP-stack
2376 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2377 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2378 %}
2379
2380 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2381 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2382 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2383 %}
2384
2385 // Same as Pop_Mem_F except for opcode
2386 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2387 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2388 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2389 %}
2390
2391 enc_class Pop_Reg_FPR( regFPR dst ) %{
2392 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2393 emit_d8( cbuf, 0xD8+$dst$$reg );
2394 %}
2395
2396 enc_class Push_Reg_FPR( regFPR dst ) %{
2397 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2398 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2399 %}
2400
2401 // Push FPU's float to a stack-slot, and pop FPU-stack
2402 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2403 int pop = 0x02;
2404 if ($src$$reg != FPR1L_enc) {
2405 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2406 emit_d8( cbuf, 0xC0-1+$src$$reg );
2407 pop = 0x03;
2408 }
2409 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2410 %}
2411
2412 // Push FPU's double to a stack-slot, and pop FPU-stack
2413 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2414 int pop = 0x02;
2415 if ($src$$reg != FPR1L_enc) {
2416 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2417 emit_d8( cbuf, 0xC0-1+$src$$reg );
2418 pop = 0x03;
2419 }
2420 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2421 %}
2422
2423 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2424 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2425 int pop = 0xD0 - 1; // -1 since we skip FLD
2426 if ($src$$reg != FPR1L_enc) {
2427 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2428 emit_d8( cbuf, 0xC0-1+$src$$reg );
2429 pop = 0xD8;
2430 }
2431 emit_opcode( cbuf, 0xDD );
2432 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2433 %}
2434
2435
2436 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2437 // load dst in FPR0
2438 emit_opcode( cbuf, 0xD9 );
2439 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2440 if ($src$$reg != FPR1L_enc) {
2441 // fincstp
2442 emit_opcode (cbuf, 0xD9);
2443 emit_opcode (cbuf, 0xF7);
2444 // swap src with FPR1:
2445 // FXCH FPR1 with src
2446 emit_opcode(cbuf, 0xD9);
2447 emit_d8(cbuf, 0xC8-1+$src$$reg );
2448 // fdecstp
2449 emit_opcode (cbuf, 0xD9);
2450 emit_opcode (cbuf, 0xF6);
2451 }
2452 %}
2453
2454 enc_class Push_ModD_encoding(regD src0, regD src1) %{
2455 MacroAssembler _masm(&cbuf);
2456 __ subptr(rsp, 8);
2457 __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2458 __ fld_d(Address(rsp, 0));
2459 __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2460 __ fld_d(Address(rsp, 0));
2461 %}
2462
2463 enc_class Push_ModF_encoding(regF src0, regF src1) %{
2464 MacroAssembler _masm(&cbuf);
2465 __ subptr(rsp, 4);
2466 __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2467 __ fld_s(Address(rsp, 0));
2468 __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2469 __ fld_s(Address(rsp, 0));
2470 %}
2471
2472 enc_class Push_ResultD(regD dst) %{
2473 MacroAssembler _masm(&cbuf);
2474 __ fstp_d(Address(rsp, 0));
2475 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2476 __ addptr(rsp, 8);
2477 %}
2478
2479 enc_class Push_ResultF(regF dst, immI d8) %{
2480 MacroAssembler _masm(&cbuf);
2481 __ fstp_s(Address(rsp, 0));
2482 __ movflt($dst$$XMMRegister, Address(rsp, 0));
2483 __ addptr(rsp, $d8$$constant);
2484 %}
2485
2486 enc_class Push_SrcD(regD src) %{
2487 MacroAssembler _masm(&cbuf);
2488 __ subptr(rsp, 8);
2489 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2490 __ fld_d(Address(rsp, 0));
2491 %}
2492
2493 enc_class push_stack_temp_qword() %{
2494 MacroAssembler _masm(&cbuf);
2495 __ subptr(rsp, 8);
2496 %}
2497
2498 enc_class pop_stack_temp_qword() %{
2499 MacroAssembler _masm(&cbuf);
2500 __ addptr(rsp, 8);
2501 %}
2502
2503 enc_class push_xmm_to_fpr1(regD src) %{
2504 MacroAssembler _masm(&cbuf);
2505 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2506 __ fld_d(Address(rsp, 0));
2507 %}
2508
2509 enc_class Push_Result_Mod_DPR( regDPR src) %{
2510 if ($src$$reg != FPR1L_enc) {
2511 // fincstp
2512 emit_opcode (cbuf, 0xD9);
2513 emit_opcode (cbuf, 0xF7);
2514 // FXCH FPR1 with src
2515 emit_opcode(cbuf, 0xD9);
2516 emit_d8(cbuf, 0xC8-1+$src$$reg );
2517 // fdecstp
2518 emit_opcode (cbuf, 0xD9);
2519 emit_opcode (cbuf, 0xF6);
2520 }
2521 // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2522 // // FSTP FPR$dst$$reg
2523 // emit_opcode( cbuf, 0xDD );
2524 // emit_d8( cbuf, 0xD8+$dst$$reg );
2525 %}
2526
2527 enc_class fnstsw_sahf_skip_parity() %{
2528 // fnstsw ax
2529 emit_opcode( cbuf, 0xDF );
2530 emit_opcode( cbuf, 0xE0 );
2531 // sahf
2532 emit_opcode( cbuf, 0x9E );
2533 // jnp ::skip
2534 emit_opcode( cbuf, 0x7B );
2535 emit_opcode( cbuf, 0x05 );
2536 %}
2537
2538 enc_class emitModDPR() %{
2539 // fprem must be iterative
2540 // :: loop
2541 // fprem
2542 emit_opcode( cbuf, 0xD9 );
2543 emit_opcode( cbuf, 0xF8 );
2544 // wait
2545 emit_opcode( cbuf, 0x9b );
2546 // fnstsw ax
2547 emit_opcode( cbuf, 0xDF );
2548 emit_opcode( cbuf, 0xE0 );
2549 // sahf
2550 emit_opcode( cbuf, 0x9E );
2551 // jp ::loop
2552 emit_opcode( cbuf, 0x0F );
2553 emit_opcode( cbuf, 0x8A );
2554 emit_opcode( cbuf, 0xF4 );
2555 emit_opcode( cbuf, 0xFF );
2556 emit_opcode( cbuf, 0xFF );
2557 emit_opcode( cbuf, 0xFF );
2558 %}
2559
2560 enc_class fpu_flags() %{
2561 // fnstsw_ax
2562 emit_opcode( cbuf, 0xDF);
2563 emit_opcode( cbuf, 0xE0);
2564 // test ax,0x0400
2565 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate
2566 emit_opcode( cbuf, 0xA9 );
2567 emit_d16 ( cbuf, 0x0400 );
2568 // // // This sequence works, but stalls for 12-16 cycles on PPro
2569 // // test rax,0x0400
2570 // emit_opcode( cbuf, 0xA9 );
2571 // emit_d32 ( cbuf, 0x00000400 );
2572 //
2573 // jz exit (no unordered comparison)
2574 emit_opcode( cbuf, 0x74 );
2575 emit_d8 ( cbuf, 0x02 );
2576 // mov ah,1 - treat as LT case (set carry flag)
2577 emit_opcode( cbuf, 0xB4 );
2578 emit_d8 ( cbuf, 0x01 );
2579 // sahf
2580 emit_opcode( cbuf, 0x9E);
2581 %}
2582
2583 enc_class cmpF_P6_fixup() %{
2584 // Fixup the integer flags in case comparison involved a NaN
2585 //
2586 // JNP exit (no unordered comparison, P-flag is set by NaN)
2587 emit_opcode( cbuf, 0x7B );
2588 emit_d8 ( cbuf, 0x03 );
2589 // MOV AH,1 - treat as LT case (set carry flag)
2590 emit_opcode( cbuf, 0xB4 );
2591 emit_d8 ( cbuf, 0x01 );
2592 // SAHF
2593 emit_opcode( cbuf, 0x9E);
2594 // NOP // target for branch to avoid branch to branch
2595 emit_opcode( cbuf, 0x90);
2596 %}
2597
2598 // fnstsw_ax();
2599 // sahf();
2600 // movl(dst, nan_result);
2601 // jcc(Assembler::parity, exit);
2602 // movl(dst, less_result);
2603 // jcc(Assembler::below, exit);
2604 // movl(dst, equal_result);
2605 // jcc(Assembler::equal, exit);
2606 // movl(dst, greater_result);
2607
2608 // less_result = 1;
2609 // greater_result = -1;
2610 // equal_result = 0;
2611 // nan_result = -1;
2612
2613 enc_class CmpF_Result(rRegI dst) %{
2614 // fnstsw_ax();
2615 emit_opcode( cbuf, 0xDF);
2616 emit_opcode( cbuf, 0xE0);
2617 // sahf
2618 emit_opcode( cbuf, 0x9E);
2619 // movl(dst, nan_result);
2620 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2621 emit_d32( cbuf, -1 );
2622 // jcc(Assembler::parity, exit);
2623 emit_opcode( cbuf, 0x7A );
2624 emit_d8 ( cbuf, 0x13 );
2625 // movl(dst, less_result);
2626 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2627 emit_d32( cbuf, -1 );
2628 // jcc(Assembler::below, exit);
2629 emit_opcode( cbuf, 0x72 );
2630 emit_d8 ( cbuf, 0x0C );
2631 // movl(dst, equal_result);
2632 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2633 emit_d32( cbuf, 0 );
2634 // jcc(Assembler::equal, exit);
2635 emit_opcode( cbuf, 0x74 );
2636 emit_d8 ( cbuf, 0x05 );
2637 // movl(dst, greater_result);
2638 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2639 emit_d32( cbuf, 1 );
2640 %}
2641
2642
2643 // Compare the longs and set flags
2644 // BROKEN! Do Not use as-is
2645 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2646 // CMP $src1.hi,$src2.hi
2647 emit_opcode( cbuf, 0x3B );
2648 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2649 // JNE,s done
2650 emit_opcode(cbuf,0x75);
2651 emit_d8(cbuf, 2 );
2652 // CMP $src1.lo,$src2.lo
2653 emit_opcode( cbuf, 0x3B );
2654 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2655 // done:
2656 %}
2657
2658 enc_class convert_int_long( regL dst, rRegI src ) %{
2659 // mov $dst.lo,$src
2660 int dst_encoding = $dst$$reg;
2661 int src_encoding = $src$$reg;
2662 encode_Copy( cbuf, dst_encoding , src_encoding );
2663 // mov $dst.hi,$src
2664 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2665 // sar $dst.hi,31
2666 emit_opcode( cbuf, 0xC1 );
2667 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2668 emit_d8(cbuf, 0x1F );
2669 %}
2670
2671 enc_class convert_long_double( eRegL src ) %{
2672 // push $src.hi
2673 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2674 // push $src.lo
2675 emit_opcode(cbuf, 0x50+$src$$reg );
2676 // fild 64-bits at [SP]
2677 emit_opcode(cbuf,0xdf);
2678 emit_d8(cbuf, 0x6C);
2679 emit_d8(cbuf, 0x24);
2680 emit_d8(cbuf, 0x00);
2681 // pop stack
2682 emit_opcode(cbuf, 0x83); // add SP, #8
2683 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2684 emit_d8(cbuf, 0x8);
2685 %}
2686
2687 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2688 // IMUL EDX:EAX,$src1
2689 emit_opcode( cbuf, 0xF7 );
2690 emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2691 // SAR EDX,$cnt-32
2692 int shift_count = ((int)$cnt$$constant) - 32;
2693 if (shift_count > 0) {
2694 emit_opcode(cbuf, 0xC1);
2695 emit_rm(cbuf, 0x3, 7, $dst$$reg );
2696 emit_d8(cbuf, shift_count);
2697 }
2698 %}
2699
2700 // this version doesn't have add sp, 8
2701 enc_class convert_long_double2( eRegL src ) %{
2702 // push $src.hi
2703 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2704 // push $src.lo
2705 emit_opcode(cbuf, 0x50+$src$$reg );
2706 // fild 64-bits at [SP]
2707 emit_opcode(cbuf,0xdf);
2708 emit_d8(cbuf, 0x6C);
2709 emit_d8(cbuf, 0x24);
2710 emit_d8(cbuf, 0x00);
2711 %}
2712
2713 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2714 // Basic idea: long = (long)int * (long)int
2715 // IMUL EDX:EAX, src
2716 emit_opcode( cbuf, 0xF7 );
2717 emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2718 %}
2719
2720 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2721 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
2722 // MUL EDX:EAX, src
2723 emit_opcode( cbuf, 0xF7 );
2724 emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2725 %}
2726
2727 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2728 // Basic idea: lo(result) = lo(x_lo * y_lo)
2729 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2730 // MOV $tmp,$src.lo
2731 encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2732 // IMUL $tmp,EDX
2733 emit_opcode( cbuf, 0x0F );
2734 emit_opcode( cbuf, 0xAF );
2735 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2736 // MOV EDX,$src.hi
2737 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2738 // IMUL EDX,EAX
2739 emit_opcode( cbuf, 0x0F );
2740 emit_opcode( cbuf, 0xAF );
2741 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2742 // ADD $tmp,EDX
2743 emit_opcode( cbuf, 0x03 );
2744 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2745 // MUL EDX:EAX,$src.lo
2746 emit_opcode( cbuf, 0xF7 );
2747 emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2748 // ADD EDX,ESI
2749 emit_opcode( cbuf, 0x03 );
2750 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2751 %}
2752
2753 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2754 // Basic idea: lo(result) = lo(src * y_lo)
2755 // hi(result) = hi(src * y_lo) + lo(src * y_hi)
2756 // IMUL $tmp,EDX,$src
2757 emit_opcode( cbuf, 0x6B );
2758 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2759 emit_d8( cbuf, (int)$src$$constant );
2760 // MOV EDX,$src
2761 emit_opcode(cbuf, 0xB8 + EDX_enc);
2762 emit_d32( cbuf, (int)$src$$constant );
2763 // MUL EDX:EAX,EDX
2764 emit_opcode( cbuf, 0xF7 );
2765 emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2766 // ADD EDX,ESI
2767 emit_opcode( cbuf, 0x03 );
2768 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2769 %}
2770
2771 enc_class long_div( eRegL src1, eRegL src2 ) %{
2772 // PUSH src1.hi
2773 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2774 // PUSH src1.lo
2775 emit_opcode(cbuf, 0x50+$src1$$reg );
2776 // PUSH src2.hi
2777 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2778 // PUSH src2.lo
2779 emit_opcode(cbuf, 0x50+$src2$$reg );
2780 // CALL directly to the runtime
2781 cbuf.set_insts_mark();
2782 emit_opcode(cbuf,0xE8); // Call into runtime
2783 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2784 // Restore stack
2785 emit_opcode(cbuf, 0x83); // add SP, #framesize
2786 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2787 emit_d8(cbuf, 4*4);
2788 %}
2789
2790 enc_class long_mod( eRegL src1, eRegL src2 ) %{
2791 // PUSH src1.hi
2792 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2793 // PUSH src1.lo
2794 emit_opcode(cbuf, 0x50+$src1$$reg );
2795 // PUSH src2.hi
2796 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2797 // PUSH src2.lo
2798 emit_opcode(cbuf, 0x50+$src2$$reg );
2799 // CALL directly to the runtime
2800 cbuf.set_insts_mark();
2801 emit_opcode(cbuf,0xE8); // Call into runtime
2802 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2803 // Restore stack
2804 emit_opcode(cbuf, 0x83); // add SP, #framesize
2805 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2806 emit_d8(cbuf, 4*4);
2807 %}
2808
2809 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2810 // MOV $tmp,$src.lo
2811 emit_opcode(cbuf, 0x8B);
2812 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2813 // OR $tmp,$src.hi
2814 emit_opcode(cbuf, 0x0B);
2815 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2816 %}
2817
2818 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2819 // CMP $src1.lo,$src2.lo
2820 emit_opcode( cbuf, 0x3B );
2821 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2822 // JNE,s skip
2823 emit_cc(cbuf, 0x70, 0x5);
2824 emit_d8(cbuf,2);
2825 // CMP $src1.hi,$src2.hi
2826 emit_opcode( cbuf, 0x3B );
2827 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2828 %}
2829
2830 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2831 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2832 emit_opcode( cbuf, 0x3B );
2833 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2834 // MOV $tmp,$src1.hi
2835 emit_opcode( cbuf, 0x8B );
2836 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2837 // SBB $tmp,$src2.hi\t! Compute flags for long compare
2838 emit_opcode( cbuf, 0x1B );
2839 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2840 %}
2841
2842 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2843 // XOR $tmp,$tmp
2844 emit_opcode(cbuf,0x33); // XOR
2845 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2846 // CMP $tmp,$src.lo
2847 emit_opcode( cbuf, 0x3B );
2848 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2849 // SBB $tmp,$src.hi
2850 emit_opcode( cbuf, 0x1B );
2851 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2852 %}
2853
2854 // Sniff, sniff... smells like Gnu Superoptimizer
2855 enc_class neg_long( eRegL dst ) %{
2856 emit_opcode(cbuf,0xF7); // NEG hi
2857 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2858 emit_opcode(cbuf,0xF7); // NEG lo
2859 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
2860 emit_opcode(cbuf,0x83); // SBB hi,0
2861 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2862 emit_d8 (cbuf,0 );
2863 %}
2864
2865 enc_class enc_pop_rdx() %{
2866 emit_opcode(cbuf,0x5A);
2867 %}
2868
2869 enc_class enc_rethrow() %{
2870 cbuf.set_insts_mark();
2871 emit_opcode(cbuf, 0xE9); // jmp entry
2872 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2873 runtime_call_Relocation::spec(), RELOC_IMM32 );
2874 %}
2875
2876
2877 // Convert a double to an int. Java semantics require we do complex
2878 // manglelations in the corner cases. So we set the rounding mode to
2879 // 'zero', store the darned double down as an int, and reset the
2880 // rounding mode to 'nearest'. The hardware throws an exception which
2881 // patches up the correct value directly to the stack.
2882 enc_class DPR2I_encoding( regDPR src ) %{
2883 // Flip to round-to-zero mode. We attempted to allow invalid-op
2884 // exceptions here, so that a NAN or other corner-case value will
2885 // thrown an exception (but normal values get converted at full speed).
2886 // However, I2C adapters and other float-stack manglers leave pending
2887 // invalid-op exceptions hanging. We would have to clear them before
2888 // enabling them and that is more expensive than just testing for the
2889 // invalid value Intel stores down in the corner cases.
2890 emit_opcode(cbuf,0xD9); // FLDCW trunc
2891 emit_opcode(cbuf,0x2D);
2892 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2893 // Allocate a word
2894 emit_opcode(cbuf,0x83); // SUB ESP,4
2895 emit_opcode(cbuf,0xEC);
2896 emit_d8(cbuf,0x04);
2897 // Encoding assumes a double has been pushed into FPR0.
2898 // Store down the double as an int, popping the FPU stack
2899 emit_opcode(cbuf,0xDB); // FISTP [ESP]
2900 emit_opcode(cbuf,0x1C);
2901 emit_d8(cbuf,0x24);
2902 // Restore the rounding mode; mask the exception
2903 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
2904 emit_opcode(cbuf,0x2D);
2905 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2906 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2907 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2908
2909 // Load the converted int; adjust CPU stack
2910 emit_opcode(cbuf,0x58); // POP EAX
2911 emit_opcode(cbuf,0x3D); // CMP EAX,imm
2912 emit_d32 (cbuf,0x80000000); // 0x80000000
2913 emit_opcode(cbuf,0x75); // JNE around_slow_call
2914 emit_d8 (cbuf,0x07); // Size of slow_call
2915 // Push src onto stack slow-path
2916 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
2917 emit_d8 (cbuf,0xC0-1+$src$$reg );
2918 // CALL directly to the runtime
2919 cbuf.set_insts_mark();
2920 emit_opcode(cbuf,0xE8); // Call into runtime
2921 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2922 // Carry on here...
2923 %}
2924
2925 enc_class DPR2L_encoding( regDPR src ) %{
2926 emit_opcode(cbuf,0xD9); // FLDCW trunc
2927 emit_opcode(cbuf,0x2D);
2928 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2929 // Allocate a word
2930 emit_opcode(cbuf,0x83); // SUB ESP,8
2931 emit_opcode(cbuf,0xEC);
2932 emit_d8(cbuf,0x08);
2933 // Encoding assumes a double has been pushed into FPR0.
2934 // Store down the double as a long, popping the FPU stack
2935 emit_opcode(cbuf,0xDF); // FISTP [ESP]
2936 emit_opcode(cbuf,0x3C);
2937 emit_d8(cbuf,0x24);
2938 // Restore the rounding mode; mask the exception
2939 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
2940 emit_opcode(cbuf,0x2D);
2941 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2942 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2943 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2944
2945 // Load the converted int; adjust CPU stack
2946 emit_opcode(cbuf,0x58); // POP EAX
2947 emit_opcode(cbuf,0x5A); // POP EDX
2948 emit_opcode(cbuf,0x81); // CMP EDX,imm
2949 emit_d8 (cbuf,0xFA); // rdx
2950 emit_d32 (cbuf,0x80000000); // 0x80000000
2951 emit_opcode(cbuf,0x75); // JNE around_slow_call
2952 emit_d8 (cbuf,0x07+4); // Size of slow_call
2953 emit_opcode(cbuf,0x85); // TEST EAX,EAX
2954 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
2955 emit_opcode(cbuf,0x75); // JNE around_slow_call
2956 emit_d8 (cbuf,0x07); // Size of slow_call
2957 // Push src onto stack slow-path
2958 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
2959 emit_d8 (cbuf,0xC0-1+$src$$reg );
2960 // CALL directly to the runtime
2961 cbuf.set_insts_mark();
2962 emit_opcode(cbuf,0xE8); // Call into runtime
2963 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2964 // Carry on here...
2965 %}
2966
2967 enc_class FMul_ST_reg( eRegFPR src1 ) %{
2968 // Operand was loaded from memory into fp ST (stack top)
2969 // FMUL ST,$src /* D8 C8+i */
2970 emit_opcode(cbuf, 0xD8);
2971 emit_opcode(cbuf, 0xC8 + $src1$$reg);
2972 %}
2973
2974 enc_class FAdd_ST_reg( eRegFPR src2 ) %{
2975 // FADDP ST,src2 /* D8 C0+i */
2976 emit_opcode(cbuf, 0xD8);
2977 emit_opcode(cbuf, 0xC0 + $src2$$reg);
2978 //could use FADDP src2,fpST /* DE C0+i */
2979 %}
2980
2981 enc_class FAddP_reg_ST( eRegFPR src2 ) %{
2982 // FADDP src2,ST /* DE C0+i */
2983 emit_opcode(cbuf, 0xDE);
2984 emit_opcode(cbuf, 0xC0 + $src2$$reg);
2985 %}
2986
2987 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
2988 // Operand has been loaded into fp ST (stack top)
2989 // FSUB ST,$src1
2990 emit_opcode(cbuf, 0xD8);
2991 emit_opcode(cbuf, 0xE0 + $src1$$reg);
2992
2993 // FDIV
2994 emit_opcode(cbuf, 0xD8);
2995 emit_opcode(cbuf, 0xF0 + $src2$$reg);
2996 %}
2997
2998 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
2999 // Operand was loaded from memory into fp ST (stack top)
3000 // FADD ST,$src /* D8 C0+i */
3001 emit_opcode(cbuf, 0xD8);
3002 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3003
3004 // FMUL ST,src2 /* D8 C*+i */
3005 emit_opcode(cbuf, 0xD8);
3006 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3007 %}
3008
3009
3010 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3011 // Operand was loaded from memory into fp ST (stack top)
3012 // FADD ST,$src /* D8 C0+i */
3013 emit_opcode(cbuf, 0xD8);
3014 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3015
3016 // FMULP src2,ST /* DE C8+i */
3017 emit_opcode(cbuf, 0xDE);
3018 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3019 %}
3020
3021 // Atomically load the volatile long
3022 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3023 emit_opcode(cbuf,0xDF);
3024 int rm_byte_opcode = 0x05;
3025 int base = $mem$$base;
3026 int index = $mem$$index;
3027 int scale = $mem$$scale;
3028 int displace = $mem$$disp;
3029 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3030 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3031 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3032 %}
3033
3034 // Volatile Store Long. Must be atomic, so move it into
3035 // the FP TOS and then do a 64-bit FIST. Has to probe the
3036 // target address before the store (for null-ptr checks)
3037 // so the memory operand is used twice in the encoding.
3038 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3039 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3040 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop
3041 emit_opcode(cbuf,0xDF);
3042 int rm_byte_opcode = 0x07;
3043 int base = $mem$$base;
3044 int index = $mem$$index;
3045 int scale = $mem$$scale;
3046 int displace = $mem$$disp;
3047 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3048 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3049 %}
3050
3051 %}
3052
3053
3054 //----------FRAME--------------------------------------------------------------
3055 // Definition of frame structure and management information.
3056 //
3057 // S T A C K L A Y O U T Allocators stack-slot number
3058 // | (to get allocators register number
3059 // G Owned by | | v add OptoReg::stack0())
3060 // r CALLER | |
3061 // o | +--------+ pad to even-align allocators stack-slot
3062 // w V | pad0 | numbers; owned by CALLER
3063 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
3064 // h ^ | in | 5
3065 // | | args | 4 Holes in incoming args owned by SELF
3066 // | | | | 3
3067 // | | +--------+
3068 // V | | old out| Empty on Intel, window on Sparc
3069 // | old |preserve| Must be even aligned.
3070 // | SP-+--------+----> Matcher::_old_SP, even aligned
3071 // | | in | 3 area for Intel ret address
3072 // Owned by |preserve| Empty on Sparc.
3073 // SELF +--------+
3074 // | | pad2 | 2 pad to align old SP
3075 // | +--------+ 1
3076 // | | locks | 0
3077 // | +--------+----> OptoReg::stack0(), even aligned
3078 // | | pad1 | 11 pad to align new SP
3079 // | +--------+
3080 // | | | 10
3081 // | | spills | 9 spills
3082 // V | | 8 (pad0 slot for callee)
3083 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
3084 // ^ | out | 7
3085 // | | args | 6 Holes in outgoing args owned by CALLEE
3086 // Owned by +--------+
3087 // CALLEE | new out| 6 Empty on Intel, window on Sparc
3088 // | new |preserve| Must be even-aligned.
3089 // | SP-+--------+----> Matcher::_new_SP, even aligned
3090 // | | |
3091 //
3092 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
3093 // known from SELF's arguments and the Java calling convention.
3094 // Region 6-7 is determined per call site.
3095 // Note 2: If the calling convention leaves holes in the incoming argument
3096 // area, those holes are owned by SELF. Holes in the outgoing area
3097 // are owned by the CALLEE. Holes should not be nessecary in the
3098 // incoming area, as the Java calling convention is completely under
3099 // the control of the AD file. Doubles can be sorted and packed to
3100 // avoid holes. Holes in the outgoing arguments may be nessecary for
3101 // varargs C calling conventions.
3102 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
3103 // even aligned with pad0 as needed.
3104 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
3105 // region 6-11 is even aligned; it may be padded out more so that
3106 // the region from SP to FP meets the minimum stack alignment.
3107
3108 frame %{
3109 // These three registers define part of the calling convention
3110 // between compiled code and the interpreter.
3111 inline_cache_reg(EAX); // Inline Cache Register
3112
3113 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3114 cisc_spilling_operand_name(indOffset32);
3115
3116 // Number of stack slots consumed by locking an object
3117 sync_stack_slots(1);
3118
3119 // Compiled code's Frame Pointer
3120 frame_pointer(ESP);
3121 // Interpreter stores its frame pointer in a register which is
3122 // stored to the stack by I2CAdaptors.
3123 // I2CAdaptors convert from interpreted java to compiled java.
3124 interpreter_frame_pointer(EBP);
3125
3126 // Stack alignment requirement
3127 // Alignment size in bytes (128-bit -> 16 bytes)
3128 stack_alignment(StackAlignmentInBytes);
3129
3130 // Number of outgoing stack slots killed above the out_preserve_stack_slots
3131 // for calls to C. Supports the var-args backing area for register parms.
3132 varargs_C_out_slots_killed(0);
3133
3134 // The after-PROLOG location of the return address. Location of
3135 // return address specifies a type (REG or STACK) and a number
3136 // representing the register number (i.e. - use a register name) or
3137 // stack slot.
3138 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3139 // Otherwise, it is above the locks and verification slot and alignment word
3140 return_addr(STACK - 1 +
3141 align_up((Compile::current()->in_preserve_stack_slots() +
3142 Compile::current()->fixed_slots()),
3143 stack_alignment_in_slots()));
3144
3145 // Location of C & interpreter return values
3146 c_return_value %{
3147 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3148 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3149 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3150
3151 // in SSE2+ mode we want to keep the FPU stack clean so pretend
3152 // that C functions return float and double results in XMM0.
3153 if( ideal_reg == Op_RegD && UseSSE>=2 )
3154 return OptoRegPair(XMM0b_num,XMM0_num);
3155 if( ideal_reg == Op_RegF && UseSSE>=2 )
3156 return OptoRegPair(OptoReg::Bad,XMM0_num);
3157
3158 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3159 %}
3160
3161 // Location of return values
3162 return_value %{
3163 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3164 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3165 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3166 if( ideal_reg == Op_RegD && UseSSE>=2 )
3167 return OptoRegPair(XMM0b_num,XMM0_num);
3168 if( ideal_reg == Op_RegF && UseSSE>=1 )
3169 return OptoRegPair(OptoReg::Bad,XMM0_num);
3170 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3171 %}
3172
3173 %}
3174
3175 //----------ATTRIBUTES---------------------------------------------------------
3176 //----------Operand Attributes-------------------------------------------------
3177 op_attrib op_cost(0); // Required cost attribute
3178
3179 //----------Instruction Attributes---------------------------------------------
3180 ins_attrib ins_cost(100); // Required cost attribute
3181 ins_attrib ins_size(8); // Required size attribute (in bits)
3182 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3183 // non-matching short branch variant of some
3184 // long branch?
3185 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2)
3186 // specifies the alignment that some part of the instruction (not
3187 // necessarily the start) requires. If > 1, a compute_padding()
3188 // function must be provided for the instruction
3189
3190 //----------OPERANDS-----------------------------------------------------------
3191 // Operand definitions must precede instruction definitions for correct parsing
3192 // in the ADLC because operands constitute user defined types which are used in
3193 // instruction definitions.
3194
3195 //----------Simple Operands----------------------------------------------------
3196 // Immediate Operands
3197 // Integer Immediate
3198 operand immI() %{
3199 match(ConI);
3200
3201 op_cost(10);
3202 format %{ %}
3203 interface(CONST_INTER);
3204 %}
3205
3206 // Constant for test vs zero
3207 operand immI_0() %{
3208 predicate(n->get_int() == 0);
3209 match(ConI);
3210
3211 op_cost(0);
3212 format %{ %}
3213 interface(CONST_INTER);
3214 %}
3215
3216 // Constant for increment
3217 operand immI_1() %{
3218 predicate(n->get_int() == 1);
3219 match(ConI);
3220
3221 op_cost(0);
3222 format %{ %}
3223 interface(CONST_INTER);
3224 %}
3225
3226 // Constant for decrement
3227 operand immI_M1() %{
3228 predicate(n->get_int() == -1);
3229 match(ConI);
3230
3231 op_cost(0);
3232 format %{ %}
3233 interface(CONST_INTER);
3234 %}
3235
3236 // Valid scale values for addressing modes
3237 operand immI2() %{
3238 predicate(0 <= n->get_int() && (n->get_int() <= 3));
3239 match(ConI);
3240
3241 format %{ %}
3242 interface(CONST_INTER);
3243 %}
3244
3245 operand immI8() %{
3246 predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3247 match(ConI);
3248
3249 op_cost(5);
3250 format %{ %}
3251 interface(CONST_INTER);
3252 %}
3253
3254 operand immU8() %{
3255 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
3256 match(ConI);
3257
3258 op_cost(5);
3259 format %{ %}
3260 interface(CONST_INTER);
3261 %}
3262
3263 operand immI16() %{
3264 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3265 match(ConI);
3266
3267 op_cost(10);
3268 format %{ %}
3269 interface(CONST_INTER);
3270 %}
3271
3272 // Int Immediate non-negative
3273 operand immU31()
3274 %{
3275 predicate(n->get_int() >= 0);
3276 match(ConI);
3277
3278 op_cost(0);
3279 format %{ %}
3280 interface(CONST_INTER);
3281 %}
3282
3283 // Constant for long shifts
3284 operand immI_32() %{
3285 predicate( n->get_int() == 32 );
3286 match(ConI);
3287
3288 op_cost(0);
3289 format %{ %}
3290 interface(CONST_INTER);
3291 %}
3292
3293 operand immI_1_31() %{
3294 predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3295 match(ConI);
3296
3297 op_cost(0);
3298 format %{ %}
3299 interface(CONST_INTER);
3300 %}
3301
3302 operand immI_32_63() %{
3303 predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3304 match(ConI);
3305 op_cost(0);
3306
3307 format %{ %}
3308 interface(CONST_INTER);
3309 %}
3310
3311 operand immI_2() %{
3312 predicate( n->get_int() == 2 );
3313 match(ConI);
3314
3315 op_cost(0);
3316 format %{ %}
3317 interface(CONST_INTER);
3318 %}
3319
3320 operand immI_3() %{
3321 predicate( n->get_int() == 3 );
3322 match(ConI);
3323
3324 op_cost(0);
3325 format %{ %}
3326 interface(CONST_INTER);
3327 %}
3328
3329 operand immI_4()
3330 %{
3331 predicate(n->get_int() == 4);
3332 match(ConI);
3333
3334 op_cost(0);
3335 format %{ %}
3336 interface(CONST_INTER);
3337 %}
3338
3339 operand immI_8()
3340 %{
3341 predicate(n->get_int() == 8);
3342 match(ConI);
3343
3344 op_cost(0);
3345 format %{ %}
3346 interface(CONST_INTER);
3347 %}
3348
3349 // Pointer Immediate
3350 operand immP() %{
3351 match(ConP);
3352
3353 op_cost(10);
3354 format %{ %}
3355 interface(CONST_INTER);
3356 %}
3357
3358 // NULL Pointer Immediate
3359 operand immP0() %{
3360 predicate( n->get_ptr() == 0 );
3361 match(ConP);
3362 op_cost(0);
3363
3364 format %{ %}
3365 interface(CONST_INTER);
3366 %}
3367
3368 // Long Immediate
3369 operand immL() %{
3370 match(ConL);
3371
3372 op_cost(20);
3373 format %{ %}
3374 interface(CONST_INTER);
3375 %}
3376
3377 // Long Immediate zero
3378 operand immL0() %{
3379 predicate( n->get_long() == 0L );
3380 match(ConL);
3381 op_cost(0);
3382
3383 format %{ %}
3384 interface(CONST_INTER);
3385 %}
3386
3387 // Long Immediate zero
3388 operand immL_M1() %{
3389 predicate( n->get_long() == -1L );
3390 match(ConL);
3391 op_cost(0);
3392
3393 format %{ %}
3394 interface(CONST_INTER);
3395 %}
3396
3397 // Long immediate from 0 to 127.
3398 // Used for a shorter form of long mul by 10.
3399 operand immL_127() %{
3400 predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3401 match(ConL);
3402 op_cost(0);
3403
3404 format %{ %}
3405 interface(CONST_INTER);
3406 %}
3407
3408 // Long Immediate: low 32-bit mask
3409 operand immL_32bits() %{
3410 predicate(n->get_long() == 0xFFFFFFFFL);
3411 match(ConL);
3412 op_cost(0);
3413
3414 format %{ %}
3415 interface(CONST_INTER);
3416 %}
3417
3418 // Long Immediate: low 32-bit mask
3419 operand immL32() %{
3420 predicate(n->get_long() == (int)(n->get_long()));
3421 match(ConL);
3422 op_cost(20);
3423
3424 format %{ %}
3425 interface(CONST_INTER);
3426 %}
3427
3428 //Double Immediate zero
3429 operand immDPR0() %{
3430 // Do additional (and counter-intuitive) test against NaN to work around VC++
3431 // bug that generates code such that NaNs compare equal to 0.0
3432 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3433 match(ConD);
3434
3435 op_cost(5);
3436 format %{ %}
3437 interface(CONST_INTER);
3438 %}
3439
3440 // Double Immediate one
3441 operand immDPR1() %{
3442 predicate( UseSSE<=1 && n->getd() == 1.0 );
3443 match(ConD);
3444
3445 op_cost(5);
3446 format %{ %}
3447 interface(CONST_INTER);
3448 %}
3449
3450 // Double Immediate
3451 operand immDPR() %{
3452 predicate(UseSSE<=1);
3453 match(ConD);
3454
3455 op_cost(5);
3456 format %{ %}
3457 interface(CONST_INTER);
3458 %}
3459
3460 operand immD() %{
3461 predicate(UseSSE>=2);
3462 match(ConD);
3463
3464 op_cost(5);
3465 format %{ %}
3466 interface(CONST_INTER);
3467 %}
3468
3469 // Double Immediate zero
3470 operand immD0() %{
3471 // Do additional (and counter-intuitive) test against NaN to work around VC++
3472 // bug that generates code such that NaNs compare equal to 0.0 AND do not
3473 // compare equal to -0.0.
3474 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3475 match(ConD);
3476
3477 format %{ %}
3478 interface(CONST_INTER);
3479 %}
3480
3481 // Float Immediate zero
3482 operand immFPR0() %{
3483 predicate(UseSSE == 0 && n->getf() == 0.0F);
3484 match(ConF);
3485
3486 op_cost(5);
3487 format %{ %}
3488 interface(CONST_INTER);
3489 %}
3490
3491 // Float Immediate one
3492 operand immFPR1() %{
3493 predicate(UseSSE == 0 && n->getf() == 1.0F);
3494 match(ConF);
3495
3496 op_cost(5);
3497 format %{ %}
3498 interface(CONST_INTER);
3499 %}
3500
3501 // Float Immediate
3502 operand immFPR() %{
3503 predicate( UseSSE == 0 );
3504 match(ConF);
3505
3506 op_cost(5);
3507 format %{ %}
3508 interface(CONST_INTER);
3509 %}
3510
3511 // Float Immediate
3512 operand immF() %{
3513 predicate(UseSSE >= 1);
3514 match(ConF);
3515
3516 op_cost(5);
3517 format %{ %}
3518 interface(CONST_INTER);
3519 %}
3520
3521 // Float Immediate zero. Zero and not -0.0
3522 operand immF0() %{
3523 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3524 match(ConF);
3525
3526 op_cost(5);
3527 format %{ %}
3528 interface(CONST_INTER);
3529 %}
3530
3531 // Immediates for special shifts (sign extend)
3532
3533 // Constants for increment
3534 operand immI_16() %{
3535 predicate( n->get_int() == 16 );
3536 match(ConI);
3537
3538 format %{ %}
3539 interface(CONST_INTER);
3540 %}
3541
3542 operand immI_24() %{
3543 predicate( n->get_int() == 24 );
3544 match(ConI);
3545
3546 format %{ %}
3547 interface(CONST_INTER);
3548 %}
3549
3550 // Constant for byte-wide masking
3551 operand immI_255() %{
3552 predicate( n->get_int() == 255 );
3553 match(ConI);
3554
3555 format %{ %}
3556 interface(CONST_INTER);
3557 %}
3558
3559 // Constant for short-wide masking
3560 operand immI_65535() %{
3561 predicate(n->get_int() == 65535);
3562 match(ConI);
3563
3564 format %{ %}
3565 interface(CONST_INTER);
3566 %}
3567
3568 operand kReg()
3569 %{
3570 constraint(ALLOC_IN_RC(vectmask_reg));
3571 match(RegVectMask);
3572 format %{%}
3573 interface(REG_INTER);
3574 %}
3575
3576 operand kReg_K1()
3577 %{
3578 constraint(ALLOC_IN_RC(vectmask_reg_K1));
3579 match(RegVectMask);
3580 format %{%}
3581 interface(REG_INTER);
3582 %}
3583
3584 operand kReg_K2()
3585 %{
3586 constraint(ALLOC_IN_RC(vectmask_reg_K2));
3587 match(RegVectMask);
3588 format %{%}
3589 interface(REG_INTER);
3590 %}
3591
3592 // Special Registers
3593 operand kReg_K3()
3594 %{
3595 constraint(ALLOC_IN_RC(vectmask_reg_K3));
3596 match(RegVectMask);
3597 format %{%}
3598 interface(REG_INTER);
3599 %}
3600
3601 operand kReg_K4()
3602 %{
3603 constraint(ALLOC_IN_RC(vectmask_reg_K4));
3604 match(RegVectMask);
3605 format %{%}
3606 interface(REG_INTER);
3607 %}
3608
3609 operand kReg_K5()
3610 %{
3611 constraint(ALLOC_IN_RC(vectmask_reg_K5));
3612 match(RegVectMask);
3613 format %{%}
3614 interface(REG_INTER);
3615 %}
3616
3617 operand kReg_K6()
3618 %{
3619 constraint(ALLOC_IN_RC(vectmask_reg_K6));
3620 match(RegVectMask);
3621 format %{%}
3622 interface(REG_INTER);
3623 %}
3624
3625 // Special Registers
3626 operand kReg_K7()
3627 %{
3628 constraint(ALLOC_IN_RC(vectmask_reg_K7));
3629 match(RegVectMask);
3630 format %{%}
3631 interface(REG_INTER);
3632 %}
3633
3634 // Register Operands
3635 // Integer Register
3636 operand rRegI() %{
3637 constraint(ALLOC_IN_RC(int_reg));
3638 match(RegI);
3639 match(xRegI);
3640 match(eAXRegI);
3641 match(eBXRegI);
3642 match(eCXRegI);
3643 match(eDXRegI);
3644 match(eDIRegI);
3645 match(eSIRegI);
3646
3647 format %{ %}
3648 interface(REG_INTER);
3649 %}
3650
3651 // Subset of Integer Register
3652 operand xRegI(rRegI reg) %{
3653 constraint(ALLOC_IN_RC(int_x_reg));
3654 match(reg);
3655 match(eAXRegI);
3656 match(eBXRegI);
3657 match(eCXRegI);
3658 match(eDXRegI);
3659
3660 format %{ %}
3661 interface(REG_INTER);
3662 %}
3663
3664 // Special Registers
3665 operand eAXRegI(xRegI reg) %{
3666 constraint(ALLOC_IN_RC(eax_reg));
3667 match(reg);
3668 match(rRegI);
3669
3670 format %{ "EAX" %}
3671 interface(REG_INTER);
3672 %}
3673
3674 // Special Registers
3675 operand eBXRegI(xRegI reg) %{
3676 constraint(ALLOC_IN_RC(ebx_reg));
3677 match(reg);
3678 match(rRegI);
3679
3680 format %{ "EBX" %}
3681 interface(REG_INTER);
3682 %}
3683
3684 operand eCXRegI(xRegI reg) %{
3685 constraint(ALLOC_IN_RC(ecx_reg));
3686 match(reg);
3687 match(rRegI);
3688
3689 format %{ "ECX" %}
3690 interface(REG_INTER);
3691 %}
3692
3693 operand eDXRegI(xRegI reg) %{
3694 constraint(ALLOC_IN_RC(edx_reg));
3695 match(reg);
3696 match(rRegI);
3697
3698 format %{ "EDX" %}
3699 interface(REG_INTER);
3700 %}
3701
3702 operand eDIRegI(xRegI reg) %{
3703 constraint(ALLOC_IN_RC(edi_reg));
3704 match(reg);
3705 match(rRegI);
3706
3707 format %{ "EDI" %}
3708 interface(REG_INTER);
3709 %}
3710
3711 operand naxRegI() %{
3712 constraint(ALLOC_IN_RC(nax_reg));
3713 match(RegI);
3714 match(eCXRegI);
3715 match(eDXRegI);
3716 match(eSIRegI);
3717 match(eDIRegI);
3718
3719 format %{ %}
3720 interface(REG_INTER);
3721 %}
3722
3723 operand nadxRegI() %{
3724 constraint(ALLOC_IN_RC(nadx_reg));
3725 match(RegI);
3726 match(eBXRegI);
3727 match(eCXRegI);
3728 match(eSIRegI);
3729 match(eDIRegI);
3730
3731 format %{ %}
3732 interface(REG_INTER);
3733 %}
3734
3735 operand ncxRegI() %{
3736 constraint(ALLOC_IN_RC(ncx_reg));
3737 match(RegI);
3738 match(eAXRegI);
3739 match(eDXRegI);
3740 match(eSIRegI);
3741 match(eDIRegI);
3742
3743 format %{ %}
3744 interface(REG_INTER);
3745 %}
3746
3747 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3748 // //
3749 operand eSIRegI(xRegI reg) %{
3750 constraint(ALLOC_IN_RC(esi_reg));
3751 match(reg);
3752 match(rRegI);
3753
3754 format %{ "ESI" %}
3755 interface(REG_INTER);
3756 %}
3757
3758 // Pointer Register
3759 operand anyRegP() %{
3760 constraint(ALLOC_IN_RC(any_reg));
3761 match(RegP);
3762 match(eAXRegP);
3763 match(eBXRegP);
3764 match(eCXRegP);
3765 match(eDIRegP);
3766 match(eRegP);
3767
3768 format %{ %}
3769 interface(REG_INTER);
3770 %}
3771
3772 operand eRegP() %{
3773 constraint(ALLOC_IN_RC(int_reg));
3774 match(RegP);
3775 match(eAXRegP);
3776 match(eBXRegP);
3777 match(eCXRegP);
3778 match(eDIRegP);
3779
3780 format %{ %}
3781 interface(REG_INTER);
3782 %}
3783
3784 operand rRegP() %{
3785 constraint(ALLOC_IN_RC(int_reg));
3786 match(RegP);
3787 match(eAXRegP);
3788 match(eBXRegP);
3789 match(eCXRegP);
3790 match(eDIRegP);
3791
3792 format %{ %}
3793 interface(REG_INTER);
3794 %}
3795
3796 // On windows95, EBP is not safe to use for implicit null tests.
3797 operand eRegP_no_EBP() %{
3798 constraint(ALLOC_IN_RC(int_reg_no_ebp));
3799 match(RegP);
3800 match(eAXRegP);
3801 match(eBXRegP);
3802 match(eCXRegP);
3803 match(eDIRegP);
3804
3805 op_cost(100);
3806 format %{ %}
3807 interface(REG_INTER);
3808 %}
3809
3810 operand naxRegP() %{
3811 constraint(ALLOC_IN_RC(nax_reg));
3812 match(RegP);
3813 match(eBXRegP);
3814 match(eDXRegP);
3815 match(eCXRegP);
3816 match(eSIRegP);
3817 match(eDIRegP);
3818
3819 format %{ %}
3820 interface(REG_INTER);
3821 %}
3822
3823 operand nabxRegP() %{
3824 constraint(ALLOC_IN_RC(nabx_reg));
3825 match(RegP);
3826 match(eCXRegP);
3827 match(eDXRegP);
3828 match(eSIRegP);
3829 match(eDIRegP);
3830
3831 format %{ %}
3832 interface(REG_INTER);
3833 %}
3834
3835 operand pRegP() %{
3836 constraint(ALLOC_IN_RC(p_reg));
3837 match(RegP);
3838 match(eBXRegP);
3839 match(eDXRegP);
3840 match(eSIRegP);
3841 match(eDIRegP);
3842
3843 format %{ %}
3844 interface(REG_INTER);
3845 %}
3846
3847 // Special Registers
3848 // Return a pointer value
3849 operand eAXRegP(eRegP reg) %{
3850 constraint(ALLOC_IN_RC(eax_reg));
3851 match(reg);
3852 format %{ "EAX" %}
3853 interface(REG_INTER);
3854 %}
3855
3856 // Used in AtomicAdd
3857 operand eBXRegP(eRegP reg) %{
3858 constraint(ALLOC_IN_RC(ebx_reg));
3859 match(reg);
3860 format %{ "EBX" %}
3861 interface(REG_INTER);
3862 %}
3863
3864 // Tail-call (interprocedural jump) to interpreter
3865 operand eCXRegP(eRegP reg) %{
3866 constraint(ALLOC_IN_RC(ecx_reg));
3867 match(reg);
3868 format %{ "ECX" %}
3869 interface(REG_INTER);
3870 %}
3871
3872 operand eDXRegP(eRegP reg) %{
3873 constraint(ALLOC_IN_RC(edx_reg));
3874 match(reg);
3875 format %{ "EDX" %}
3876 interface(REG_INTER);
3877 %}
3878
3879 operand eSIRegP(eRegP reg) %{
3880 constraint(ALLOC_IN_RC(esi_reg));
3881 match(reg);
3882 format %{ "ESI" %}
3883 interface(REG_INTER);
3884 %}
3885
3886 // Used in rep stosw
3887 operand eDIRegP(eRegP reg) %{
3888 constraint(ALLOC_IN_RC(edi_reg));
3889 match(reg);
3890 format %{ "EDI" %}
3891 interface(REG_INTER);
3892 %}
3893
3894 operand eRegL() %{
3895 constraint(ALLOC_IN_RC(long_reg));
3896 match(RegL);
3897 match(eADXRegL);
3898
3899 format %{ %}
3900 interface(REG_INTER);
3901 %}
3902
3903 operand eADXRegL( eRegL reg ) %{
3904 constraint(ALLOC_IN_RC(eadx_reg));
3905 match(reg);
3906
3907 format %{ "EDX:EAX" %}
3908 interface(REG_INTER);
3909 %}
3910
3911 operand eBCXRegL( eRegL reg ) %{
3912 constraint(ALLOC_IN_RC(ebcx_reg));
3913 match(reg);
3914
3915 format %{ "EBX:ECX" %}
3916 interface(REG_INTER);
3917 %}
3918
3919 // Special case for integer high multiply
3920 operand eADXRegL_low_only() %{
3921 constraint(ALLOC_IN_RC(eadx_reg));
3922 match(RegL);
3923
3924 format %{ "EAX" %}
3925 interface(REG_INTER);
3926 %}
3927
3928 // Flags register, used as output of compare instructions
3929 operand rFlagsReg() %{
3930 constraint(ALLOC_IN_RC(int_flags));
3931 match(RegFlags);
3932
3933 format %{ "EFLAGS" %}
3934 interface(REG_INTER);
3935 %}
3936
3937 // Flags register, used as output of compare instructions
3938 operand eFlagsReg() %{
3939 constraint(ALLOC_IN_RC(int_flags));
3940 match(RegFlags);
3941
3942 format %{ "EFLAGS" %}
3943 interface(REG_INTER);
3944 %}
3945
3946 // Flags register, used as output of FLOATING POINT compare instructions
3947 operand eFlagsRegU() %{
3948 constraint(ALLOC_IN_RC(int_flags));
3949 match(RegFlags);
3950
3951 format %{ "EFLAGS_U" %}
3952 interface(REG_INTER);
3953 %}
3954
3955 operand eFlagsRegUCF() %{
3956 constraint(ALLOC_IN_RC(int_flags));
3957 match(RegFlags);
3958 predicate(false);
3959
3960 format %{ "EFLAGS_U_CF" %}
3961 interface(REG_INTER);
3962 %}
3963
3964 // Condition Code Register used by long compare
3965 operand flagsReg_long_LTGE() %{
3966 constraint(ALLOC_IN_RC(int_flags));
3967 match(RegFlags);
3968 format %{ "FLAGS_LTGE" %}
3969 interface(REG_INTER);
3970 %}
3971 operand flagsReg_long_EQNE() %{
3972 constraint(ALLOC_IN_RC(int_flags));
3973 match(RegFlags);
3974 format %{ "FLAGS_EQNE" %}
3975 interface(REG_INTER);
3976 %}
3977 operand flagsReg_long_LEGT() %{
3978 constraint(ALLOC_IN_RC(int_flags));
3979 match(RegFlags);
3980 format %{ "FLAGS_LEGT" %}
3981 interface(REG_INTER);
3982 %}
3983
3984 // Condition Code Register used by unsigned long compare
3985 operand flagsReg_ulong_LTGE() %{
3986 constraint(ALLOC_IN_RC(int_flags));
3987 match(RegFlags);
3988 format %{ "FLAGS_U_LTGE" %}
3989 interface(REG_INTER);
3990 %}
3991 operand flagsReg_ulong_EQNE() %{
3992 constraint(ALLOC_IN_RC(int_flags));
3993 match(RegFlags);
3994 format %{ "FLAGS_U_EQNE" %}
3995 interface(REG_INTER);
3996 %}
3997 operand flagsReg_ulong_LEGT() %{
3998 constraint(ALLOC_IN_RC(int_flags));
3999 match(RegFlags);
4000 format %{ "FLAGS_U_LEGT" %}
4001 interface(REG_INTER);
4002 %}
4003
4004 // Float register operands
4005 operand regDPR() %{
4006 predicate( UseSSE < 2 );
4007 constraint(ALLOC_IN_RC(fp_dbl_reg));
4008 match(RegD);
4009 match(regDPR1);
4010 match(regDPR2);
4011 format %{ %}
4012 interface(REG_INTER);
4013 %}
4014
4015 operand regDPR1(regDPR reg) %{
4016 predicate( UseSSE < 2 );
4017 constraint(ALLOC_IN_RC(fp_dbl_reg0));
4018 match(reg);
4019 format %{ "FPR1" %}
4020 interface(REG_INTER);
4021 %}
4022
4023 operand regDPR2(regDPR reg) %{
4024 predicate( UseSSE < 2 );
4025 constraint(ALLOC_IN_RC(fp_dbl_reg1));
4026 match(reg);
4027 format %{ "FPR2" %}
4028 interface(REG_INTER);
4029 %}
4030
4031 operand regnotDPR1(regDPR reg) %{
4032 predicate( UseSSE < 2 );
4033 constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4034 match(reg);
4035 format %{ %}
4036 interface(REG_INTER);
4037 %}
4038
4039 // Float register operands
4040 operand regFPR() %{
4041 predicate( UseSSE < 2 );
4042 constraint(ALLOC_IN_RC(fp_flt_reg));
4043 match(RegF);
4044 match(regFPR1);
4045 format %{ %}
4046 interface(REG_INTER);
4047 %}
4048
4049 // Float register operands
4050 operand regFPR1(regFPR reg) %{
4051 predicate( UseSSE < 2 );
4052 constraint(ALLOC_IN_RC(fp_flt_reg0));
4053 match(reg);
4054 format %{ "FPR1" %}
4055 interface(REG_INTER);
4056 %}
4057
4058 // XMM Float register operands
4059 operand regF() %{
4060 predicate( UseSSE>=1 );
4061 constraint(ALLOC_IN_RC(float_reg_legacy));
4062 match(RegF);
4063 format %{ %}
4064 interface(REG_INTER);
4065 %}
4066
4067 operand legRegF() %{
4068 predicate( UseSSE>=1 );
4069 constraint(ALLOC_IN_RC(float_reg_legacy));
4070 match(RegF);
4071 format %{ %}
4072 interface(REG_INTER);
4073 %}
4074
4075 // Float register operands
4076 operand vlRegF() %{
4077 constraint(ALLOC_IN_RC(float_reg_vl));
4078 match(RegF);
4079
4080 format %{ %}
4081 interface(REG_INTER);
4082 %}
4083
4084 // XMM Double register operands
4085 operand regD() %{
4086 predicate( UseSSE>=2 );
4087 constraint(ALLOC_IN_RC(double_reg_legacy));
4088 match(RegD);
4089 format %{ %}
4090 interface(REG_INTER);
4091 %}
4092
4093 // Double register operands
4094 operand legRegD() %{
4095 predicate( UseSSE>=2 );
4096 constraint(ALLOC_IN_RC(double_reg_legacy));
4097 match(RegD);
4098 format %{ %}
4099 interface(REG_INTER);
4100 %}
4101
4102 operand vlRegD() %{
4103 constraint(ALLOC_IN_RC(double_reg_vl));
4104 match(RegD);
4105
4106 format %{ %}
4107 interface(REG_INTER);
4108 %}
4109
4110 //----------Memory Operands----------------------------------------------------
4111 // Direct Memory Operand
4112 operand direct(immP addr) %{
4113 match(addr);
4114
4115 format %{ "[$addr]" %}
4116 interface(MEMORY_INTER) %{
4117 base(0xFFFFFFFF);
4118 index(0x4);
4119 scale(0x0);
4120 disp($addr);
4121 %}
4122 %}
4123
4124 // Indirect Memory Operand
4125 operand indirect(eRegP reg) %{
4126 constraint(ALLOC_IN_RC(int_reg));
4127 match(reg);
4128
4129 format %{ "[$reg]" %}
4130 interface(MEMORY_INTER) %{
4131 base($reg);
4132 index(0x4);
4133 scale(0x0);
4134 disp(0x0);
4135 %}
4136 %}
4137
4138 // Indirect Memory Plus Short Offset Operand
4139 operand indOffset8(eRegP reg, immI8 off) %{
4140 match(AddP reg off);
4141
4142 format %{ "[$reg + $off]" %}
4143 interface(MEMORY_INTER) %{
4144 base($reg);
4145 index(0x4);
4146 scale(0x0);
4147 disp($off);
4148 %}
4149 %}
4150
4151 // Indirect Memory Plus Long Offset Operand
4152 operand indOffset32(eRegP reg, immI off) %{
4153 match(AddP reg off);
4154
4155 format %{ "[$reg + $off]" %}
4156 interface(MEMORY_INTER) %{
4157 base($reg);
4158 index(0x4);
4159 scale(0x0);
4160 disp($off);
4161 %}
4162 %}
4163
4164 // Indirect Memory Plus Long Offset Operand
4165 operand indOffset32X(rRegI reg, immP off) %{
4166 match(AddP off reg);
4167
4168 format %{ "[$reg + $off]" %}
4169 interface(MEMORY_INTER) %{
4170 base($reg);
4171 index(0x4);
4172 scale(0x0);
4173 disp($off);
4174 %}
4175 %}
4176
4177 // Indirect Memory Plus Index Register Plus Offset Operand
4178 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4179 match(AddP (AddP reg ireg) off);
4180
4181 op_cost(10);
4182 format %{"[$reg + $off + $ireg]" %}
4183 interface(MEMORY_INTER) %{
4184 base($reg);
4185 index($ireg);
4186 scale(0x0);
4187 disp($off);
4188 %}
4189 %}
4190
4191 // Indirect Memory Plus Index Register Plus Offset Operand
4192 operand indIndex(eRegP reg, rRegI ireg) %{
4193 match(AddP reg ireg);
4194
4195 op_cost(10);
4196 format %{"[$reg + $ireg]" %}
4197 interface(MEMORY_INTER) %{
4198 base($reg);
4199 index($ireg);
4200 scale(0x0);
4201 disp(0x0);
4202 %}
4203 %}
4204
4205 // // -------------------------------------------------------------------------
4206 // // 486 architecture doesn't support "scale * index + offset" with out a base
4207 // // -------------------------------------------------------------------------
4208 // // Scaled Memory Operands
4209 // // Indirect Memory Times Scale Plus Offset Operand
4210 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4211 // match(AddP off (LShiftI ireg scale));
4212 //
4213 // op_cost(10);
4214 // format %{"[$off + $ireg << $scale]" %}
4215 // interface(MEMORY_INTER) %{
4216 // base(0x4);
4217 // index($ireg);
4218 // scale($scale);
4219 // disp($off);
4220 // %}
4221 // %}
4222
4223 // Indirect Memory Times Scale Plus Index Register
4224 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4225 match(AddP reg (LShiftI ireg scale));
4226
4227 op_cost(10);
4228 format %{"[$reg + $ireg << $scale]" %}
4229 interface(MEMORY_INTER) %{
4230 base($reg);
4231 index($ireg);
4232 scale($scale);
4233 disp(0x0);
4234 %}
4235 %}
4236
4237 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4238 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4239 match(AddP (AddP reg (LShiftI ireg scale)) off);
4240
4241 op_cost(10);
4242 format %{"[$reg + $off + $ireg << $scale]" %}
4243 interface(MEMORY_INTER) %{
4244 base($reg);
4245 index($ireg);
4246 scale($scale);
4247 disp($off);
4248 %}
4249 %}
4250
4251 //----------Load Long Memory Operands------------------------------------------
4252 // The load-long idiom will use it's address expression again after loading
4253 // the first word of the long. If the load-long destination overlaps with
4254 // registers used in the addressing expression, the 2nd half will be loaded
4255 // from a clobbered address. Fix this by requiring that load-long use
4256 // address registers that do not overlap with the load-long target.
4257
4258 // load-long support
4259 operand load_long_RegP() %{
4260 constraint(ALLOC_IN_RC(esi_reg));
4261 match(RegP);
4262 match(eSIRegP);
4263 op_cost(100);
4264 format %{ %}
4265 interface(REG_INTER);
4266 %}
4267
4268 // Indirect Memory Operand Long
4269 operand load_long_indirect(load_long_RegP reg) %{
4270 constraint(ALLOC_IN_RC(esi_reg));
4271 match(reg);
4272
4273 format %{ "[$reg]" %}
4274 interface(MEMORY_INTER) %{
4275 base($reg);
4276 index(0x4);
4277 scale(0x0);
4278 disp(0x0);
4279 %}
4280 %}
4281
4282 // Indirect Memory Plus Long Offset Operand
4283 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4284 match(AddP reg off);
4285
4286 format %{ "[$reg + $off]" %}
4287 interface(MEMORY_INTER) %{
4288 base($reg);
4289 index(0x4);
4290 scale(0x0);
4291 disp($off);
4292 %}
4293 %}
4294
4295 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4296
4297
4298 //----------Special Memory Operands--------------------------------------------
4299 // Stack Slot Operand - This operand is used for loading and storing temporary
4300 // values on the stack where a match requires a value to
4301 // flow through memory.
4302 operand stackSlotP(sRegP reg) %{
4303 constraint(ALLOC_IN_RC(stack_slots));
4304 // No match rule because this operand is only generated in matching
4305 format %{ "[$reg]" %}
4306 interface(MEMORY_INTER) %{
4307 base(0x4); // ESP
4308 index(0x4); // No Index
4309 scale(0x0); // No Scale
4310 disp($reg); // Stack Offset
4311 %}
4312 %}
4313
4314 operand stackSlotI(sRegI reg) %{
4315 constraint(ALLOC_IN_RC(stack_slots));
4316 // No match rule because this operand is only generated in matching
4317 format %{ "[$reg]" %}
4318 interface(MEMORY_INTER) %{
4319 base(0x4); // ESP
4320 index(0x4); // No Index
4321 scale(0x0); // No Scale
4322 disp($reg); // Stack Offset
4323 %}
4324 %}
4325
4326 operand stackSlotF(sRegF reg) %{
4327 constraint(ALLOC_IN_RC(stack_slots));
4328 // No match rule because this operand is only generated in matching
4329 format %{ "[$reg]" %}
4330 interface(MEMORY_INTER) %{
4331 base(0x4); // ESP
4332 index(0x4); // No Index
4333 scale(0x0); // No Scale
4334 disp($reg); // Stack Offset
4335 %}
4336 %}
4337
4338 operand stackSlotD(sRegD reg) %{
4339 constraint(ALLOC_IN_RC(stack_slots));
4340 // No match rule because this operand is only generated in matching
4341 format %{ "[$reg]" %}
4342 interface(MEMORY_INTER) %{
4343 base(0x4); // ESP
4344 index(0x4); // No Index
4345 scale(0x0); // No Scale
4346 disp($reg); // Stack Offset
4347 %}
4348 %}
4349
4350 operand stackSlotL(sRegL reg) %{
4351 constraint(ALLOC_IN_RC(stack_slots));
4352 // No match rule because this operand is only generated in matching
4353 format %{ "[$reg]" %}
4354 interface(MEMORY_INTER) %{
4355 base(0x4); // ESP
4356 index(0x4); // No Index
4357 scale(0x0); // No Scale
4358 disp($reg); // Stack Offset
4359 %}
4360 %}
4361
4362 //----------Conditional Branch Operands----------------------------------------
4363 // Comparison Op - This is the operation of the comparison, and is limited to
4364 // the following set of codes:
4365 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4366 //
4367 // Other attributes of the comparison, such as unsignedness, are specified
4368 // by the comparison instruction that sets a condition code flags register.
4369 // That result is represented by a flags operand whose subtype is appropriate
4370 // to the unsignedness (etc.) of the comparison.
4371 //
4372 // Later, the instruction which matches both the Comparison Op (a Bool) and
4373 // the flags (produced by the Cmp) specifies the coding of the comparison op
4374 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4375
4376 // Comparision Code
4377 operand cmpOp() %{
4378 match(Bool);
4379
4380 format %{ "" %}
4381 interface(COND_INTER) %{
4382 equal(0x4, "e");
4383 not_equal(0x5, "ne");
4384 less(0xC, "l");
4385 greater_equal(0xD, "ge");
4386 less_equal(0xE, "le");
4387 greater(0xF, "g");
4388 overflow(0x0, "o");
4389 no_overflow(0x1, "no");
4390 %}
4391 %}
4392
4393 // Comparison Code, unsigned compare. Used by FP also, with
4394 // C2 (unordered) turned into GT or LT already. The other bits
4395 // C0 and C3 are turned into Carry & Zero flags.
4396 operand cmpOpU() %{
4397 match(Bool);
4398
4399 format %{ "" %}
4400 interface(COND_INTER) %{
4401 equal(0x4, "e");
4402 not_equal(0x5, "ne");
4403 less(0x2, "b");
4404 greater_equal(0x3, "nb");
4405 less_equal(0x6, "be");
4406 greater(0x7, "nbe");
4407 overflow(0x0, "o");
4408 no_overflow(0x1, "no");
4409 %}
4410 %}
4411
4412 // Floating comparisons that don't require any fixup for the unordered case
4413 operand cmpOpUCF() %{
4414 match(Bool);
4415 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4416 n->as_Bool()->_test._test == BoolTest::ge ||
4417 n->as_Bool()->_test._test == BoolTest::le ||
4418 n->as_Bool()->_test._test == BoolTest::gt);
4419 format %{ "" %}
4420 interface(COND_INTER) %{
4421 equal(0x4, "e");
4422 not_equal(0x5, "ne");
4423 less(0x2, "b");
4424 greater_equal(0x3, "nb");
4425 less_equal(0x6, "be");
4426 greater(0x7, "nbe");
4427 overflow(0x0, "o");
4428 no_overflow(0x1, "no");
4429 %}
4430 %}
4431
4432
4433 // Floating comparisons that can be fixed up with extra conditional jumps
4434 operand cmpOpUCF2() %{
4435 match(Bool);
4436 predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4437 n->as_Bool()->_test._test == BoolTest::eq);
4438 format %{ "" %}
4439 interface(COND_INTER) %{
4440 equal(0x4, "e");
4441 not_equal(0x5, "ne");
4442 less(0x2, "b");
4443 greater_equal(0x3, "nb");
4444 less_equal(0x6, "be");
4445 greater(0x7, "nbe");
4446 overflow(0x0, "o");
4447 no_overflow(0x1, "no");
4448 %}
4449 %}
4450
4451 // Comparison Code for FP conditional move
4452 operand cmpOp_fcmov() %{
4453 match(Bool);
4454
4455 predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4456 n->as_Bool()->_test._test != BoolTest::no_overflow);
4457 format %{ "" %}
4458 interface(COND_INTER) %{
4459 equal (0x0C8);
4460 not_equal (0x1C8);
4461 less (0x0C0);
4462 greater_equal(0x1C0);
4463 less_equal (0x0D0);
4464 greater (0x1D0);
4465 overflow(0x0, "o"); // not really supported by the instruction
4466 no_overflow(0x1, "no"); // not really supported by the instruction
4467 %}
4468 %}
4469
4470 // Comparison Code used in long compares
4471 operand cmpOp_commute() %{
4472 match(Bool);
4473
4474 format %{ "" %}
4475 interface(COND_INTER) %{
4476 equal(0x4, "e");
4477 not_equal(0x5, "ne");
4478 less(0xF, "g");
4479 greater_equal(0xE, "le");
4480 less_equal(0xD, "ge");
4481 greater(0xC, "l");
4482 overflow(0x0, "o");
4483 no_overflow(0x1, "no");
4484 %}
4485 %}
4486
4487 // Comparison Code used in unsigned long compares
4488 operand cmpOpU_commute() %{
4489 match(Bool);
4490
4491 format %{ "" %}
4492 interface(COND_INTER) %{
4493 equal(0x4, "e");
4494 not_equal(0x5, "ne");
4495 less(0x7, "nbe");
4496 greater_equal(0x6, "be");
4497 less_equal(0x3, "nb");
4498 greater(0x2, "b");
4499 overflow(0x0, "o");
4500 no_overflow(0x1, "no");
4501 %}
4502 %}
4503
4504 //----------OPERAND CLASSES----------------------------------------------------
4505 // Operand Classes are groups of operands that are used as to simplify
4506 // instruction definitions by not requiring the AD writer to specify separate
4507 // instructions for every form of operand when the instruction accepts
4508 // multiple operand types with the same basic encoding and format. The classic
4509 // case of this is memory operands.
4510
4511 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4512 indIndex, indIndexScale, indIndexScaleOffset);
4513
4514 // Long memory operations are encoded in 2 instructions and a +4 offset.
4515 // This means some kind of offset is always required and you cannot use
4516 // an oop as the offset (done when working on static globals).
4517 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4518 indIndex, indIndexScale, indIndexScaleOffset);
4519
4520
4521 //----------PIPELINE-----------------------------------------------------------
4522 // Rules which define the behavior of the target architectures pipeline.
4523 pipeline %{
4524
4525 //----------ATTRIBUTES---------------------------------------------------------
4526 attributes %{
4527 variable_size_instructions; // Fixed size instructions
4528 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
4529 instruction_unit_size = 1; // An instruction is 1 bytes long
4530 instruction_fetch_unit_size = 16; // The processor fetches one line
4531 instruction_fetch_units = 1; // of 16 bytes
4532
4533 // List of nop instructions
4534 nops( MachNop );
4535 %}
4536
4537 //----------RESOURCES----------------------------------------------------------
4538 // Resources are the functional units available to the machine
4539
4540 // Generic P2/P3 pipeline
4541 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4542 // 3 instructions decoded per cycle.
4543 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4544 // 2 ALU op, only ALU0 handles mul/div instructions.
4545 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4546 MS0, MS1, MEM = MS0 | MS1,
4547 BR, FPU,
4548 ALU0, ALU1, ALU = ALU0 | ALU1 );
4549
4550 //----------PIPELINE DESCRIPTION-----------------------------------------------
4551 // Pipeline Description specifies the stages in the machine's pipeline
4552
4553 // Generic P2/P3 pipeline
4554 pipe_desc(S0, S1, S2, S3, S4, S5);
4555
4556 //----------PIPELINE CLASSES---------------------------------------------------
4557 // Pipeline Classes describe the stages in which input and output are
4558 // referenced by the hardware pipeline.
4559
4560 // Naming convention: ialu or fpu
4561 // Then: _reg
4562 // Then: _reg if there is a 2nd register
4563 // Then: _long if it's a pair of instructions implementing a long
4564 // Then: _fat if it requires the big decoder
4565 // Or: _mem if it requires the big decoder and a memory unit.
4566
4567 // Integer ALU reg operation
4568 pipe_class ialu_reg(rRegI dst) %{
4569 single_instruction;
4570 dst : S4(write);
4571 dst : S3(read);
4572 DECODE : S0; // any decoder
4573 ALU : S3; // any alu
4574 %}
4575
4576 // Long ALU reg operation
4577 pipe_class ialu_reg_long(eRegL dst) %{
4578 instruction_count(2);
4579 dst : S4(write);
4580 dst : S3(read);
4581 DECODE : S0(2); // any 2 decoders
4582 ALU : S3(2); // both alus
4583 %}
4584
4585 // Integer ALU reg operation using big decoder
4586 pipe_class ialu_reg_fat(rRegI dst) %{
4587 single_instruction;
4588 dst : S4(write);
4589 dst : S3(read);
4590 D0 : S0; // big decoder only
4591 ALU : S3; // any alu
4592 %}
4593
4594 // Long ALU reg operation using big decoder
4595 pipe_class ialu_reg_long_fat(eRegL dst) %{
4596 instruction_count(2);
4597 dst : S4(write);
4598 dst : S3(read);
4599 D0 : S0(2); // big decoder only; twice
4600 ALU : S3(2); // any 2 alus
4601 %}
4602
4603 // Integer ALU reg-reg operation
4604 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4605 single_instruction;
4606 dst : S4(write);
4607 src : S3(read);
4608 DECODE : S0; // any decoder
4609 ALU : S3; // any alu
4610 %}
4611
4612 // Long ALU reg-reg operation
4613 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4614 instruction_count(2);
4615 dst : S4(write);
4616 src : S3(read);
4617 DECODE : S0(2); // any 2 decoders
4618 ALU : S3(2); // both alus
4619 %}
4620
4621 // Integer ALU reg-reg operation
4622 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4623 single_instruction;
4624 dst : S4(write);
4625 src : S3(read);
4626 D0 : S0; // big decoder only
4627 ALU : S3; // any alu
4628 %}
4629
4630 // Long ALU reg-reg operation
4631 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4632 instruction_count(2);
4633 dst : S4(write);
4634 src : S3(read);
4635 D0 : S0(2); // big decoder only; twice
4636 ALU : S3(2); // both alus
4637 %}
4638
4639 // Integer ALU reg-mem operation
4640 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4641 single_instruction;
4642 dst : S5(write);
4643 mem : S3(read);
4644 D0 : S0; // big decoder only
4645 ALU : S4; // any alu
4646 MEM : S3; // any mem
4647 %}
4648
4649 // Long ALU reg-mem operation
4650 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4651 instruction_count(2);
4652 dst : S5(write);
4653 mem : S3(read);
4654 D0 : S0(2); // big decoder only; twice
4655 ALU : S4(2); // any 2 alus
4656 MEM : S3(2); // both mems
4657 %}
4658
4659 // Integer mem operation (prefetch)
4660 pipe_class ialu_mem(memory mem)
4661 %{
4662 single_instruction;
4663 mem : S3(read);
4664 D0 : S0; // big decoder only
4665 MEM : S3; // any mem
4666 %}
4667
4668 // Integer Store to Memory
4669 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4670 single_instruction;
4671 mem : S3(read);
4672 src : S5(read);
4673 D0 : S0; // big decoder only
4674 ALU : S4; // any alu
4675 MEM : S3;
4676 %}
4677
4678 // Long Store to Memory
4679 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4680 instruction_count(2);
4681 mem : S3(read);
4682 src : S5(read);
4683 D0 : S0(2); // big decoder only; twice
4684 ALU : S4(2); // any 2 alus
4685 MEM : S3(2); // Both mems
4686 %}
4687
4688 // Integer Store to Memory
4689 pipe_class ialu_mem_imm(memory mem) %{
4690 single_instruction;
4691 mem : S3(read);
4692 D0 : S0; // big decoder only
4693 ALU : S4; // any alu
4694 MEM : S3;
4695 %}
4696
4697 // Integer ALU0 reg-reg operation
4698 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4699 single_instruction;
4700 dst : S4(write);
4701 src : S3(read);
4702 D0 : S0; // Big decoder only
4703 ALU0 : S3; // only alu0
4704 %}
4705
4706 // Integer ALU0 reg-mem operation
4707 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4708 single_instruction;
4709 dst : S5(write);
4710 mem : S3(read);
4711 D0 : S0; // big decoder only
4712 ALU0 : S4; // ALU0 only
4713 MEM : S3; // any mem
4714 %}
4715
4716 // Integer ALU reg-reg operation
4717 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4718 single_instruction;
4719 cr : S4(write);
4720 src1 : S3(read);
4721 src2 : S3(read);
4722 DECODE : S0; // any decoder
4723 ALU : S3; // any alu
4724 %}
4725
4726 // Integer ALU reg-imm operation
4727 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4728 single_instruction;
4729 cr : S4(write);
4730 src1 : S3(read);
4731 DECODE : S0; // any decoder
4732 ALU : S3; // any alu
4733 %}
4734
4735 // Integer ALU reg-mem operation
4736 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4737 single_instruction;
4738 cr : S4(write);
4739 src1 : S3(read);
4740 src2 : S3(read);
4741 D0 : S0; // big decoder only
4742 ALU : S4; // any alu
4743 MEM : S3;
4744 %}
4745
4746 // Conditional move reg-reg
4747 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4748 instruction_count(4);
4749 y : S4(read);
4750 q : S3(read);
4751 p : S3(read);
4752 DECODE : S0(4); // any decoder
4753 %}
4754
4755 // Conditional move reg-reg
4756 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4757 single_instruction;
4758 dst : S4(write);
4759 src : S3(read);
4760 cr : S3(read);
4761 DECODE : S0; // any decoder
4762 %}
4763
4764 // Conditional move reg-mem
4765 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4766 single_instruction;
4767 dst : S4(write);
4768 src : S3(read);
4769 cr : S3(read);
4770 DECODE : S0; // any decoder
4771 MEM : S3;
4772 %}
4773
4774 // Conditional move reg-reg long
4775 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4776 single_instruction;
4777 dst : S4(write);
4778 src : S3(read);
4779 cr : S3(read);
4780 DECODE : S0(2); // any 2 decoders
4781 %}
4782
4783 // Conditional move double reg-reg
4784 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4785 single_instruction;
4786 dst : S4(write);
4787 src : S3(read);
4788 cr : S3(read);
4789 DECODE : S0; // any decoder
4790 %}
4791
4792 // Float reg-reg operation
4793 pipe_class fpu_reg(regDPR dst) %{
4794 instruction_count(2);
4795 dst : S3(read);
4796 DECODE : S0(2); // any 2 decoders
4797 FPU : S3;
4798 %}
4799
4800 // Float reg-reg operation
4801 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4802 instruction_count(2);
4803 dst : S4(write);
4804 src : S3(read);
4805 DECODE : S0(2); // any 2 decoders
4806 FPU : S3;
4807 %}
4808
4809 // Float reg-reg operation
4810 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4811 instruction_count(3);
4812 dst : S4(write);
4813 src1 : S3(read);
4814 src2 : S3(read);
4815 DECODE : S0(3); // any 3 decoders
4816 FPU : S3(2);
4817 %}
4818
4819 // Float reg-reg operation
4820 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4821 instruction_count(4);
4822 dst : S4(write);
4823 src1 : S3(read);
4824 src2 : S3(read);
4825 src3 : S3(read);
4826 DECODE : S0(4); // any 3 decoders
4827 FPU : S3(2);
4828 %}
4829
4830 // Float reg-reg operation
4831 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4832 instruction_count(4);
4833 dst : S4(write);
4834 src1 : S3(read);
4835 src2 : S3(read);
4836 src3 : S3(read);
4837 DECODE : S1(3); // any 3 decoders
4838 D0 : S0; // Big decoder only
4839 FPU : S3(2);
4840 MEM : S3;
4841 %}
4842
4843 // Float reg-mem operation
4844 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4845 instruction_count(2);
4846 dst : S5(write);
4847 mem : S3(read);
4848 D0 : S0; // big decoder only
4849 DECODE : S1; // any decoder for FPU POP
4850 FPU : S4;
4851 MEM : S3; // any mem
4852 %}
4853
4854 // Float reg-mem operation
4855 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4856 instruction_count(3);
4857 dst : S5(write);
4858 src1 : S3(read);
4859 mem : S3(read);
4860 D0 : S0; // big decoder only
4861 DECODE : S1(2); // any decoder for FPU POP
4862 FPU : S4;
4863 MEM : S3; // any mem
4864 %}
4865
4866 // Float mem-reg operation
4867 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4868 instruction_count(2);
4869 src : S5(read);
4870 mem : S3(read);
4871 DECODE : S0; // any decoder for FPU PUSH
4872 D0 : S1; // big decoder only
4873 FPU : S4;
4874 MEM : S3; // any mem
4875 %}
4876
4877 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4878 instruction_count(3);
4879 src1 : S3(read);
4880 src2 : S3(read);
4881 mem : S3(read);
4882 DECODE : S0(2); // any decoder for FPU PUSH
4883 D0 : S1; // big decoder only
4884 FPU : S4;
4885 MEM : S3; // any mem
4886 %}
4887
4888 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4889 instruction_count(3);
4890 src1 : S3(read);
4891 src2 : S3(read);
4892 mem : S4(read);
4893 DECODE : S0; // any decoder for FPU PUSH
4894 D0 : S0(2); // big decoder only
4895 FPU : S4;
4896 MEM : S3(2); // any mem
4897 %}
4898
4899 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4900 instruction_count(2);
4901 src1 : S3(read);
4902 dst : S4(read);
4903 D0 : S0(2); // big decoder only
4904 MEM : S3(2); // any mem
4905 %}
4906
4907 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4908 instruction_count(3);
4909 src1 : S3(read);
4910 src2 : S3(read);
4911 dst : S4(read);
4912 D0 : S0(3); // big decoder only
4913 FPU : S4;
4914 MEM : S3(3); // any mem
4915 %}
4916
4917 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4918 instruction_count(3);
4919 src1 : S4(read);
4920 mem : S4(read);
4921 DECODE : S0; // any decoder for FPU PUSH
4922 D0 : S0(2); // big decoder only
4923 FPU : S4;
4924 MEM : S3(2); // any mem
4925 %}
4926
4927 // Float load constant
4928 pipe_class fpu_reg_con(regDPR dst) %{
4929 instruction_count(2);
4930 dst : S5(write);
4931 D0 : S0; // big decoder only for the load
4932 DECODE : S1; // any decoder for FPU POP
4933 FPU : S4;
4934 MEM : S3; // any mem
4935 %}
4936
4937 // Float load constant
4938 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
4939 instruction_count(3);
4940 dst : S5(write);
4941 src : S3(read);
4942 D0 : S0; // big decoder only for the load
4943 DECODE : S1(2); // any decoder for FPU POP
4944 FPU : S4;
4945 MEM : S3; // any mem
4946 %}
4947
4948 // UnConditional branch
4949 pipe_class pipe_jmp( label labl ) %{
4950 single_instruction;
4951 BR : S3;
4952 %}
4953
4954 // Conditional branch
4955 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
4956 single_instruction;
4957 cr : S1(read);
4958 BR : S3;
4959 %}
4960
4961 // Allocation idiom
4962 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
4963 instruction_count(1); force_serialization;
4964 fixed_latency(6);
4965 heap_ptr : S3(read);
4966 DECODE : S0(3);
4967 D0 : S2;
4968 MEM : S3;
4969 ALU : S3(2);
4970 dst : S5(write);
4971 BR : S5;
4972 %}
4973
4974 // Generic big/slow expanded idiom
4975 pipe_class pipe_slow( ) %{
4976 instruction_count(10); multiple_bundles; force_serialization;
4977 fixed_latency(100);
4978 D0 : S0(2);
4979 MEM : S3(2);
4980 %}
4981
4982 // The real do-nothing guy
4983 pipe_class empty( ) %{
4984 instruction_count(0);
4985 %}
4986
4987 // Define the class for the Nop node
4988 define %{
4989 MachNop = empty;
4990 %}
4991
4992 %}
4993
4994 //----------INSTRUCTIONS-------------------------------------------------------
4995 //
4996 // match -- States which machine-independent subtree may be replaced
4997 // by this instruction.
4998 // ins_cost -- The estimated cost of this instruction is used by instruction
4999 // selection to identify a minimum cost tree of machine
5000 // instructions that matches a tree of machine-independent
5001 // instructions.
5002 // format -- A string providing the disassembly for this instruction.
5003 // The value of an instruction's operand may be inserted
5004 // by referring to it with a '$' prefix.
5005 // opcode -- Three instruction opcodes may be provided. These are referred
5006 // to within an encode class as $primary, $secondary, and $tertiary
5007 // respectively. The primary opcode is commonly used to
5008 // indicate the type of machine instruction, while secondary
5009 // and tertiary are often used for prefix options or addressing
5010 // modes.
5011 // ins_encode -- A list of encode classes with parameters. The encode class
5012 // name must have been defined in an 'enc_class' specification
5013 // in the encode section of the architecture description.
5014
5015 //----------BSWAP-Instruction--------------------------------------------------
5016 instruct bytes_reverse_int(rRegI dst) %{
5017 match(Set dst (ReverseBytesI dst));
5018
5019 format %{ "BSWAP $dst" %}
5020 opcode(0x0F, 0xC8);
5021 ins_encode( OpcP, OpcSReg(dst) );
5022 ins_pipe( ialu_reg );
5023 %}
5024
5025 instruct bytes_reverse_long(eRegL dst) %{
5026 match(Set dst (ReverseBytesL dst));
5027
5028 format %{ "BSWAP $dst.lo\n\t"
5029 "BSWAP $dst.hi\n\t"
5030 "XCHG $dst.lo $dst.hi" %}
5031
5032 ins_cost(125);
5033 ins_encode( bswap_long_bytes(dst) );
5034 ins_pipe( ialu_reg_reg);
5035 %}
5036
5037 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5038 match(Set dst (ReverseBytesUS dst));
5039 effect(KILL cr);
5040
5041 format %{ "BSWAP $dst\n\t"
5042 "SHR $dst,16\n\t" %}
5043 ins_encode %{
5044 __ bswapl($dst$$Register);
5045 __ shrl($dst$$Register, 16);
5046 %}
5047 ins_pipe( ialu_reg );
5048 %}
5049
5050 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5051 match(Set dst (ReverseBytesS dst));
5052 effect(KILL cr);
5053
5054 format %{ "BSWAP $dst\n\t"
5055 "SAR $dst,16\n\t" %}
5056 ins_encode %{
5057 __ bswapl($dst$$Register);
5058 __ sarl($dst$$Register, 16);
5059 %}
5060 ins_pipe( ialu_reg );
5061 %}
5062
5063
5064 //---------- Zeros Count Instructions ------------------------------------------
5065
5066 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5067 predicate(UseCountLeadingZerosInstruction);
5068 match(Set dst (CountLeadingZerosI src));
5069 effect(KILL cr);
5070
5071 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %}
5072 ins_encode %{
5073 __ lzcntl($dst$$Register, $src$$Register);
5074 %}
5075 ins_pipe(ialu_reg);
5076 %}
5077
5078 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5079 predicate(!UseCountLeadingZerosInstruction);
5080 match(Set dst (CountLeadingZerosI src));
5081 effect(KILL cr);
5082
5083 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t"
5084 "JNZ skip\n\t"
5085 "MOV $dst, -1\n"
5086 "skip:\n\t"
5087 "NEG $dst\n\t"
5088 "ADD $dst, 31" %}
5089 ins_encode %{
5090 Register Rdst = $dst$$Register;
5091 Register Rsrc = $src$$Register;
5092 Label skip;
5093 __ bsrl(Rdst, Rsrc);
5094 __ jccb(Assembler::notZero, skip);
5095 __ movl(Rdst, -1);
5096 __ bind(skip);
5097 __ negl(Rdst);
5098 __ addl(Rdst, BitsPerInt - 1);
5099 %}
5100 ins_pipe(ialu_reg);
5101 %}
5102
5103 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5104 predicate(UseCountLeadingZerosInstruction);
5105 match(Set dst (CountLeadingZerosL src));
5106 effect(TEMP dst, KILL cr);
5107
5108 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t"
5109 "JNC done\n\t"
5110 "LZCNT $dst, $src.lo\n\t"
5111 "ADD $dst, 32\n"
5112 "done:" %}
5113 ins_encode %{
5114 Register Rdst = $dst$$Register;
5115 Register Rsrc = $src$$Register;
5116 Label done;
5117 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5118 __ jccb(Assembler::carryClear, done);
5119 __ lzcntl(Rdst, Rsrc);
5120 __ addl(Rdst, BitsPerInt);
5121 __ bind(done);
5122 %}
5123 ins_pipe(ialu_reg);
5124 %}
5125
5126 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5127 predicate(!UseCountLeadingZerosInstruction);
5128 match(Set dst (CountLeadingZerosL src));
5129 effect(TEMP dst, KILL cr);
5130
5131 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t"
5132 "JZ msw_is_zero\n\t"
5133 "ADD $dst, 32\n\t"
5134 "JMP not_zero\n"
5135 "msw_is_zero:\n\t"
5136 "BSR $dst, $src.lo\n\t"
5137 "JNZ not_zero\n\t"
5138 "MOV $dst, -1\n"
5139 "not_zero:\n\t"
5140 "NEG $dst\n\t"
5141 "ADD $dst, 63\n" %}
5142 ins_encode %{
5143 Register Rdst = $dst$$Register;
5144 Register Rsrc = $src$$Register;
5145 Label msw_is_zero;
5146 Label not_zero;
5147 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5148 __ jccb(Assembler::zero, msw_is_zero);
5149 __ addl(Rdst, BitsPerInt);
5150 __ jmpb(not_zero);
5151 __ bind(msw_is_zero);
5152 __ bsrl(Rdst, Rsrc);
5153 __ jccb(Assembler::notZero, not_zero);
5154 __ movl(Rdst, -1);
5155 __ bind(not_zero);
5156 __ negl(Rdst);
5157 __ addl(Rdst, BitsPerLong - 1);
5158 %}
5159 ins_pipe(ialu_reg);
5160 %}
5161
5162 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5163 predicate(UseCountTrailingZerosInstruction);
5164 match(Set dst (CountTrailingZerosI src));
5165 effect(KILL cr);
5166
5167 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %}
5168 ins_encode %{
5169 __ tzcntl($dst$$Register, $src$$Register);
5170 %}
5171 ins_pipe(ialu_reg);
5172 %}
5173
5174 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5175 predicate(!UseCountTrailingZerosInstruction);
5176 match(Set dst (CountTrailingZerosI src));
5177 effect(KILL cr);
5178
5179 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t"
5180 "JNZ done\n\t"
5181 "MOV $dst, 32\n"
5182 "done:" %}
5183 ins_encode %{
5184 Register Rdst = $dst$$Register;
5185 Label done;
5186 __ bsfl(Rdst, $src$$Register);
5187 __ jccb(Assembler::notZero, done);
5188 __ movl(Rdst, BitsPerInt);
5189 __ bind(done);
5190 %}
5191 ins_pipe(ialu_reg);
5192 %}
5193
5194 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5195 predicate(UseCountTrailingZerosInstruction);
5196 match(Set dst (CountTrailingZerosL src));
5197 effect(TEMP dst, KILL cr);
5198
5199 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t"
5200 "JNC done\n\t"
5201 "TZCNT $dst, $src.hi\n\t"
5202 "ADD $dst, 32\n"
5203 "done:" %}
5204 ins_encode %{
5205 Register Rdst = $dst$$Register;
5206 Register Rsrc = $src$$Register;
5207 Label done;
5208 __ tzcntl(Rdst, Rsrc);
5209 __ jccb(Assembler::carryClear, done);
5210 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5211 __ addl(Rdst, BitsPerInt);
5212 __ bind(done);
5213 %}
5214 ins_pipe(ialu_reg);
5215 %}
5216
5217 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5218 predicate(!UseCountTrailingZerosInstruction);
5219 match(Set dst (CountTrailingZerosL src));
5220 effect(TEMP dst, KILL cr);
5221
5222 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t"
5223 "JNZ done\n\t"
5224 "BSF $dst, $src.hi\n\t"
5225 "JNZ msw_not_zero\n\t"
5226 "MOV $dst, 32\n"
5227 "msw_not_zero:\n\t"
5228 "ADD $dst, 32\n"
5229 "done:" %}
5230 ins_encode %{
5231 Register Rdst = $dst$$Register;
5232 Register Rsrc = $src$$Register;
5233 Label msw_not_zero;
5234 Label done;
5235 __ bsfl(Rdst, Rsrc);
5236 __ jccb(Assembler::notZero, done);
5237 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5238 __ jccb(Assembler::notZero, msw_not_zero);
5239 __ movl(Rdst, BitsPerInt);
5240 __ bind(msw_not_zero);
5241 __ addl(Rdst, BitsPerInt);
5242 __ bind(done);
5243 %}
5244 ins_pipe(ialu_reg);
5245 %}
5246
5247
5248 //---------- Population Count Instructions -------------------------------------
5249
5250 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5251 predicate(UsePopCountInstruction);
5252 match(Set dst (PopCountI src));
5253 effect(KILL cr);
5254
5255 format %{ "POPCNT $dst, $src" %}
5256 ins_encode %{
5257 __ popcntl($dst$$Register, $src$$Register);
5258 %}
5259 ins_pipe(ialu_reg);
5260 %}
5261
5262 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5263 predicate(UsePopCountInstruction);
5264 match(Set dst (PopCountI (LoadI mem)));
5265 effect(KILL cr);
5266
5267 format %{ "POPCNT $dst, $mem" %}
5268 ins_encode %{
5269 __ popcntl($dst$$Register, $mem$$Address);
5270 %}
5271 ins_pipe(ialu_reg);
5272 %}
5273
5274 // Note: Long.bitCount(long) returns an int.
5275 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5276 predicate(UsePopCountInstruction);
5277 match(Set dst (PopCountL src));
5278 effect(KILL cr, TEMP tmp, TEMP dst);
5279
5280 format %{ "POPCNT $dst, $src.lo\n\t"
5281 "POPCNT $tmp, $src.hi\n\t"
5282 "ADD $dst, $tmp" %}
5283 ins_encode %{
5284 __ popcntl($dst$$Register, $src$$Register);
5285 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5286 __ addl($dst$$Register, $tmp$$Register);
5287 %}
5288 ins_pipe(ialu_reg);
5289 %}
5290
5291 // Note: Long.bitCount(long) returns an int.
5292 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5293 predicate(UsePopCountInstruction);
5294 match(Set dst (PopCountL (LoadL mem)));
5295 effect(KILL cr, TEMP tmp, TEMP dst);
5296
5297 format %{ "POPCNT $dst, $mem\n\t"
5298 "POPCNT $tmp, $mem+4\n\t"
5299 "ADD $dst, $tmp" %}
5300 ins_encode %{
5301 //__ popcntl($dst$$Register, $mem$$Address$$first);
5302 //__ popcntl($tmp$$Register, $mem$$Address$$second);
5303 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5304 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5305 __ addl($dst$$Register, $tmp$$Register);
5306 %}
5307 ins_pipe(ialu_reg);
5308 %}
5309
5310
5311 //----------Load/Store/Move Instructions---------------------------------------
5312 //----------Load Instructions--------------------------------------------------
5313 // Load Byte (8bit signed)
5314 instruct loadB(xRegI dst, memory mem) %{
5315 match(Set dst (LoadB mem));
5316
5317 ins_cost(125);
5318 format %{ "MOVSX8 $dst,$mem\t# byte" %}
5319
5320 ins_encode %{
5321 __ movsbl($dst$$Register, $mem$$Address);
5322 %}
5323
5324 ins_pipe(ialu_reg_mem);
5325 %}
5326
5327 // Load Byte (8bit signed) into Long Register
5328 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5329 match(Set dst (ConvI2L (LoadB mem)));
5330 effect(KILL cr);
5331
5332 ins_cost(375);
5333 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5334 "MOV $dst.hi,$dst.lo\n\t"
5335 "SAR $dst.hi,7" %}
5336
5337 ins_encode %{
5338 __ movsbl($dst$$Register, $mem$$Address);
5339 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5340 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5341 %}
5342
5343 ins_pipe(ialu_reg_mem);
5344 %}
5345
5346 // Load Unsigned Byte (8bit UNsigned)
5347 instruct loadUB(xRegI dst, memory mem) %{
5348 match(Set dst (LoadUB mem));
5349
5350 ins_cost(125);
5351 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5352
5353 ins_encode %{
5354 __ movzbl($dst$$Register, $mem$$Address);
5355 %}
5356
5357 ins_pipe(ialu_reg_mem);
5358 %}
5359
5360 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5361 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5362 match(Set dst (ConvI2L (LoadUB mem)));
5363 effect(KILL cr);
5364
5365 ins_cost(250);
5366 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5367 "XOR $dst.hi,$dst.hi" %}
5368
5369 ins_encode %{
5370 Register Rdst = $dst$$Register;
5371 __ movzbl(Rdst, $mem$$Address);
5372 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5373 %}
5374
5375 ins_pipe(ialu_reg_mem);
5376 %}
5377
5378 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5379 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5380 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5381 effect(KILL cr);
5382
5383 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5384 "XOR $dst.hi,$dst.hi\n\t"
5385 "AND $dst.lo,right_n_bits($mask, 8)" %}
5386 ins_encode %{
5387 Register Rdst = $dst$$Register;
5388 __ movzbl(Rdst, $mem$$Address);
5389 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5390 __ andl(Rdst, $mask$$constant & right_n_bits(8));
5391 %}
5392 ins_pipe(ialu_reg_mem);
5393 %}
5394
5395 // Load Short (16bit signed)
5396 instruct loadS(rRegI dst, memory mem) %{
5397 match(Set dst (LoadS mem));
5398
5399 ins_cost(125);
5400 format %{ "MOVSX $dst,$mem\t# short" %}
5401
5402 ins_encode %{
5403 __ movswl($dst$$Register, $mem$$Address);
5404 %}
5405
5406 ins_pipe(ialu_reg_mem);
5407 %}
5408
5409 // Load Short (16 bit signed) to Byte (8 bit signed)
5410 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5411 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5412
5413 ins_cost(125);
5414 format %{ "MOVSX $dst, $mem\t# short -> byte" %}
5415 ins_encode %{
5416 __ movsbl($dst$$Register, $mem$$Address);
5417 %}
5418 ins_pipe(ialu_reg_mem);
5419 %}
5420
5421 // Load Short (16bit signed) into Long Register
5422 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5423 match(Set dst (ConvI2L (LoadS mem)));
5424 effect(KILL cr);
5425
5426 ins_cost(375);
5427 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t"
5428 "MOV $dst.hi,$dst.lo\n\t"
5429 "SAR $dst.hi,15" %}
5430
5431 ins_encode %{
5432 __ movswl($dst$$Register, $mem$$Address);
5433 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5434 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5435 %}
5436
5437 ins_pipe(ialu_reg_mem);
5438 %}
5439
5440 // Load Unsigned Short/Char (16bit unsigned)
5441 instruct loadUS(rRegI dst, memory mem) %{
5442 match(Set dst (LoadUS mem));
5443
5444 ins_cost(125);
5445 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %}
5446
5447 ins_encode %{
5448 __ movzwl($dst$$Register, $mem$$Address);
5449 %}
5450
5451 ins_pipe(ialu_reg_mem);
5452 %}
5453
5454 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5455 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5456 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5457
5458 ins_cost(125);
5459 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %}
5460 ins_encode %{
5461 __ movsbl($dst$$Register, $mem$$Address);
5462 %}
5463 ins_pipe(ialu_reg_mem);
5464 %}
5465
5466 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5467 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5468 match(Set dst (ConvI2L (LoadUS mem)));
5469 effect(KILL cr);
5470
5471 ins_cost(250);
5472 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t"
5473 "XOR $dst.hi,$dst.hi" %}
5474
5475 ins_encode %{
5476 __ movzwl($dst$$Register, $mem$$Address);
5477 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5478 %}
5479
5480 ins_pipe(ialu_reg_mem);
5481 %}
5482
5483 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5484 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5485 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5486 effect(KILL cr);
5487
5488 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5489 "XOR $dst.hi,$dst.hi" %}
5490 ins_encode %{
5491 Register Rdst = $dst$$Register;
5492 __ movzbl(Rdst, $mem$$Address);
5493 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5494 %}
5495 ins_pipe(ialu_reg_mem);
5496 %}
5497
5498 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5499 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5500 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5501 effect(KILL cr);
5502
5503 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5504 "XOR $dst.hi,$dst.hi\n\t"
5505 "AND $dst.lo,right_n_bits($mask, 16)" %}
5506 ins_encode %{
5507 Register Rdst = $dst$$Register;
5508 __ movzwl(Rdst, $mem$$Address);
5509 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5510 __ andl(Rdst, $mask$$constant & right_n_bits(16));
5511 %}
5512 ins_pipe(ialu_reg_mem);
5513 %}
5514
5515 // Load Integer
5516 instruct loadI(rRegI dst, memory mem) %{
5517 match(Set dst (LoadI mem));
5518
5519 ins_cost(125);
5520 format %{ "MOV $dst,$mem\t# int" %}
5521
5522 ins_encode %{
5523 __ movl($dst$$Register, $mem$$Address);
5524 %}
5525
5526 ins_pipe(ialu_reg_mem);
5527 %}
5528
5529 // Load Integer (32 bit signed) to Byte (8 bit signed)
5530 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5531 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5532
5533 ins_cost(125);
5534 format %{ "MOVSX $dst, $mem\t# int -> byte" %}
5535 ins_encode %{
5536 __ movsbl($dst$$Register, $mem$$Address);
5537 %}
5538 ins_pipe(ialu_reg_mem);
5539 %}
5540
5541 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5542 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5543 match(Set dst (AndI (LoadI mem) mask));
5544
5545 ins_cost(125);
5546 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %}
5547 ins_encode %{
5548 __ movzbl($dst$$Register, $mem$$Address);
5549 %}
5550 ins_pipe(ialu_reg_mem);
5551 %}
5552
5553 // Load Integer (32 bit signed) to Short (16 bit signed)
5554 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5555 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5556
5557 ins_cost(125);
5558 format %{ "MOVSX $dst, $mem\t# int -> short" %}
5559 ins_encode %{
5560 __ movswl($dst$$Register, $mem$$Address);
5561 %}
5562 ins_pipe(ialu_reg_mem);
5563 %}
5564
5565 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5566 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5567 match(Set dst (AndI (LoadI mem) mask));
5568
5569 ins_cost(125);
5570 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %}
5571 ins_encode %{
5572 __ movzwl($dst$$Register, $mem$$Address);
5573 %}
5574 ins_pipe(ialu_reg_mem);
5575 %}
5576
5577 // Load Integer into Long Register
5578 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5579 match(Set dst (ConvI2L (LoadI mem)));
5580 effect(KILL cr);
5581
5582 ins_cost(375);
5583 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t"
5584 "MOV $dst.hi,$dst.lo\n\t"
5585 "SAR $dst.hi,31" %}
5586
5587 ins_encode %{
5588 __ movl($dst$$Register, $mem$$Address);
5589 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5590 __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5591 %}
5592
5593 ins_pipe(ialu_reg_mem);
5594 %}
5595
5596 // Load Integer with mask 0xFF into Long Register
5597 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5598 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5599 effect(KILL cr);
5600
5601 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5602 "XOR $dst.hi,$dst.hi" %}
5603 ins_encode %{
5604 Register Rdst = $dst$$Register;
5605 __ movzbl(Rdst, $mem$$Address);
5606 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5607 %}
5608 ins_pipe(ialu_reg_mem);
5609 %}
5610
5611 // Load Integer with mask 0xFFFF into Long Register
5612 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5613 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5614 effect(KILL cr);
5615
5616 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5617 "XOR $dst.hi,$dst.hi" %}
5618 ins_encode %{
5619 Register Rdst = $dst$$Register;
5620 __ movzwl(Rdst, $mem$$Address);
5621 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5622 %}
5623 ins_pipe(ialu_reg_mem);
5624 %}
5625
5626 // Load Integer with 31-bit mask into Long Register
5627 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5628 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5629 effect(KILL cr);
5630
5631 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5632 "XOR $dst.hi,$dst.hi\n\t"
5633 "AND $dst.lo,$mask" %}
5634 ins_encode %{
5635 Register Rdst = $dst$$Register;
5636 __ movl(Rdst, $mem$$Address);
5637 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5638 __ andl(Rdst, $mask$$constant);
5639 %}
5640 ins_pipe(ialu_reg_mem);
5641 %}
5642
5643 // Load Unsigned Integer into Long Register
5644 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5645 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5646 effect(KILL cr);
5647
5648 ins_cost(250);
5649 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t"
5650 "XOR $dst.hi,$dst.hi" %}
5651
5652 ins_encode %{
5653 __ movl($dst$$Register, $mem$$Address);
5654 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5655 %}
5656
5657 ins_pipe(ialu_reg_mem);
5658 %}
5659
5660 // Load Long. Cannot clobber address while loading, so restrict address
5661 // register to ESI
5662 instruct loadL(eRegL dst, load_long_memory mem) %{
5663 predicate(!((LoadLNode*)n)->require_atomic_access());
5664 match(Set dst (LoadL mem));
5665
5666 ins_cost(250);
5667 format %{ "MOV $dst.lo,$mem\t# long\n\t"
5668 "MOV $dst.hi,$mem+4" %}
5669
5670 ins_encode %{
5671 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5672 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5673 __ movl($dst$$Register, Amemlo);
5674 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5675 %}
5676
5677 ins_pipe(ialu_reg_long_mem);
5678 %}
5679
5680 // Volatile Load Long. Must be atomic, so do 64-bit FILD
5681 // then store it down to the stack and reload on the int
5682 // side.
5683 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5684 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5685 match(Set dst (LoadL mem));
5686
5687 ins_cost(200);
5688 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
5689 "FISTp $dst" %}
5690 ins_encode(enc_loadL_volatile(mem,dst));
5691 ins_pipe( fpu_reg_mem );
5692 %}
5693
5694 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5695 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5696 match(Set dst (LoadL mem));
5697 effect(TEMP tmp);
5698 ins_cost(180);
5699 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
5700 "MOVSD $dst,$tmp" %}
5701 ins_encode %{
5702 __ movdbl($tmp$$XMMRegister, $mem$$Address);
5703 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5704 %}
5705 ins_pipe( pipe_slow );
5706 %}
5707
5708 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5709 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5710 match(Set dst (LoadL mem));
5711 effect(TEMP tmp);
5712 ins_cost(160);
5713 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
5714 "MOVD $dst.lo,$tmp\n\t"
5715 "PSRLQ $tmp,32\n\t"
5716 "MOVD $dst.hi,$tmp" %}
5717 ins_encode %{
5718 __ movdbl($tmp$$XMMRegister, $mem$$Address);
5719 __ movdl($dst$$Register, $tmp$$XMMRegister);
5720 __ psrlq($tmp$$XMMRegister, 32);
5721 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5722 %}
5723 ins_pipe( pipe_slow );
5724 %}
5725
5726 // Load Range
5727 instruct loadRange(rRegI dst, memory mem) %{
5728 match(Set dst (LoadRange mem));
5729
5730 ins_cost(125);
5731 format %{ "MOV $dst,$mem" %}
5732 opcode(0x8B);
5733 ins_encode( OpcP, RegMem(dst,mem));
5734 ins_pipe( ialu_reg_mem );
5735 %}
5736
5737
5738 // Load Pointer
5739 instruct loadP(eRegP dst, memory mem) %{
5740 match(Set dst (LoadP mem));
5741
5742 ins_cost(125);
5743 format %{ "MOV $dst,$mem" %}
5744 opcode(0x8B);
5745 ins_encode( OpcP, RegMem(dst,mem));
5746 ins_pipe( ialu_reg_mem );
5747 %}
5748
5749 // Load Klass Pointer
5750 instruct loadKlass(eRegP dst, memory mem) %{
5751 match(Set dst (LoadKlass mem));
5752
5753 ins_cost(125);
5754 format %{ "MOV $dst,$mem" %}
5755 opcode(0x8B);
5756 ins_encode( OpcP, RegMem(dst,mem));
5757 ins_pipe( ialu_reg_mem );
5758 %}
5759
5760 // Load Float
5761 instruct MoveF2LEG(legRegF dst, regF src) %{
5762 match(Set dst src);
5763 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
5764 ins_encode %{
5765 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5766 %}
5767 ins_pipe( fpu_reg_reg );
5768 %}
5769
5770 // Load Float
5771 instruct MoveLEG2F(regF dst, legRegF src) %{
5772 match(Set dst src);
5773 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
5774 ins_encode %{
5775 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5776 %}
5777 ins_pipe( fpu_reg_reg );
5778 %}
5779
5780 // Load Double
5781 instruct MoveD2LEG(legRegD dst, regD src) %{
5782 match(Set dst src);
5783 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
5784 ins_encode %{
5785 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5786 %}
5787 ins_pipe( fpu_reg_reg );
5788 %}
5789
5790 // Load Double
5791 instruct MoveLEG2D(regD dst, legRegD src) %{
5792 match(Set dst src);
5793 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
5794 ins_encode %{
5795 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5796 %}
5797 ins_pipe( fpu_reg_reg );
5798 %}
5799
5800 // Load Double
5801 instruct loadDPR(regDPR dst, memory mem) %{
5802 predicate(UseSSE<=1);
5803 match(Set dst (LoadD mem));
5804
5805 ins_cost(150);
5806 format %{ "FLD_D ST,$mem\n\t"
5807 "FSTP $dst" %}
5808 opcode(0xDD); /* DD /0 */
5809 ins_encode( OpcP, RMopc_Mem(0x00,mem),
5810 Pop_Reg_DPR(dst) );
5811 ins_pipe( fpu_reg_mem );
5812 %}
5813
5814 // Load Double to XMM
5815 instruct loadD(regD dst, memory mem) %{
5816 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5817 match(Set dst (LoadD mem));
5818 ins_cost(145);
5819 format %{ "MOVSD $dst,$mem" %}
5820 ins_encode %{
5821 __ movdbl ($dst$$XMMRegister, $mem$$Address);
5822 %}
5823 ins_pipe( pipe_slow );
5824 %}
5825
5826 instruct loadD_partial(regD dst, memory mem) %{
5827 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5828 match(Set dst (LoadD mem));
5829 ins_cost(145);
5830 format %{ "MOVLPD $dst,$mem" %}
5831 ins_encode %{
5832 __ movdbl ($dst$$XMMRegister, $mem$$Address);
5833 %}
5834 ins_pipe( pipe_slow );
5835 %}
5836
5837 // Load to XMM register (single-precision floating point)
5838 // MOVSS instruction
5839 instruct loadF(regF dst, memory mem) %{
5840 predicate(UseSSE>=1);
5841 match(Set dst (LoadF mem));
5842 ins_cost(145);
5843 format %{ "MOVSS $dst,$mem" %}
5844 ins_encode %{
5845 __ movflt ($dst$$XMMRegister, $mem$$Address);
5846 %}
5847 ins_pipe( pipe_slow );
5848 %}
5849
5850 // Load Float
5851 instruct loadFPR(regFPR dst, memory mem) %{
5852 predicate(UseSSE==0);
5853 match(Set dst (LoadF mem));
5854
5855 ins_cost(150);
5856 format %{ "FLD_S ST,$mem\n\t"
5857 "FSTP $dst" %}
5858 opcode(0xD9); /* D9 /0 */
5859 ins_encode( OpcP, RMopc_Mem(0x00,mem),
5860 Pop_Reg_FPR(dst) );
5861 ins_pipe( fpu_reg_mem );
5862 %}
5863
5864 // Load Effective Address
5865 instruct leaP8(eRegP dst, indOffset8 mem) %{
5866 match(Set dst mem);
5867
5868 ins_cost(110);
5869 format %{ "LEA $dst,$mem" %}
5870 opcode(0x8D);
5871 ins_encode( OpcP, RegMem(dst,mem));
5872 ins_pipe( ialu_reg_reg_fat );
5873 %}
5874
5875 instruct leaP32(eRegP dst, indOffset32 mem) %{
5876 match(Set dst mem);
5877
5878 ins_cost(110);
5879 format %{ "LEA $dst,$mem" %}
5880 opcode(0x8D);
5881 ins_encode( OpcP, RegMem(dst,mem));
5882 ins_pipe( ialu_reg_reg_fat );
5883 %}
5884
5885 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5886 match(Set dst mem);
5887
5888 ins_cost(110);
5889 format %{ "LEA $dst,$mem" %}
5890 opcode(0x8D);
5891 ins_encode( OpcP, RegMem(dst,mem));
5892 ins_pipe( ialu_reg_reg_fat );
5893 %}
5894
5895 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5896 match(Set dst mem);
5897
5898 ins_cost(110);
5899 format %{ "LEA $dst,$mem" %}
5900 opcode(0x8D);
5901 ins_encode( OpcP, RegMem(dst,mem));
5902 ins_pipe( ialu_reg_reg_fat );
5903 %}
5904
5905 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5906 match(Set dst mem);
5907
5908 ins_cost(110);
5909 format %{ "LEA $dst,$mem" %}
5910 opcode(0x8D);
5911 ins_encode( OpcP, RegMem(dst,mem));
5912 ins_pipe( ialu_reg_reg_fat );
5913 %}
5914
5915 // Load Constant
5916 instruct loadConI(rRegI dst, immI src) %{
5917 match(Set dst src);
5918
5919 format %{ "MOV $dst,$src" %}
5920 ins_encode( LdImmI(dst, src) );
5921 ins_pipe( ialu_reg_fat );
5922 %}
5923
5924 // Load Constant zero
5925 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
5926 match(Set dst src);
5927 effect(KILL cr);
5928
5929 ins_cost(50);
5930 format %{ "XOR $dst,$dst" %}
5931 opcode(0x33); /* + rd */
5932 ins_encode( OpcP, RegReg( dst, dst ) );
5933 ins_pipe( ialu_reg );
5934 %}
5935
5936 instruct loadConP(eRegP dst, immP src) %{
5937 match(Set dst src);
5938
5939 format %{ "MOV $dst,$src" %}
5940 opcode(0xB8); /* + rd */
5941 ins_encode( LdImmP(dst, src) );
5942 ins_pipe( ialu_reg_fat );
5943 %}
5944
5945 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5946 match(Set dst src);
5947 effect(KILL cr);
5948 ins_cost(200);
5949 format %{ "MOV $dst.lo,$src.lo\n\t"
5950 "MOV $dst.hi,$src.hi" %}
5951 opcode(0xB8);
5952 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5953 ins_pipe( ialu_reg_long_fat );
5954 %}
5955
5956 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5957 match(Set dst src);
5958 effect(KILL cr);
5959 ins_cost(150);
5960 format %{ "XOR $dst.lo,$dst.lo\n\t"
5961 "XOR $dst.hi,$dst.hi" %}
5962 opcode(0x33,0x33);
5963 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5964 ins_pipe( ialu_reg_long );
5965 %}
5966
5967 // The instruction usage is guarded by predicate in operand immFPR().
5968 instruct loadConFPR(regFPR dst, immFPR con) %{
5969 match(Set dst con);
5970 ins_cost(125);
5971 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5972 "FSTP $dst" %}
5973 ins_encode %{
5974 __ fld_s($constantaddress($con));
5975 __ fstp_d($dst$$reg);
5976 %}
5977 ins_pipe(fpu_reg_con);
5978 %}
5979
5980 // The instruction usage is guarded by predicate in operand immFPR0().
5981 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
5982 match(Set dst con);
5983 ins_cost(125);
5984 format %{ "FLDZ ST\n\t"
5985 "FSTP $dst" %}
5986 ins_encode %{
5987 __ fldz();
5988 __ fstp_d($dst$$reg);
5989 %}
5990 ins_pipe(fpu_reg_con);
5991 %}
5992
5993 // The instruction usage is guarded by predicate in operand immFPR1().
5994 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
5995 match(Set dst con);
5996 ins_cost(125);
5997 format %{ "FLD1 ST\n\t"
5998 "FSTP $dst" %}
5999 ins_encode %{
6000 __ fld1();
6001 __ fstp_d($dst$$reg);
6002 %}
6003 ins_pipe(fpu_reg_con);
6004 %}
6005
6006 // The instruction usage is guarded by predicate in operand immF().
6007 instruct loadConF(regF dst, immF con) %{
6008 match(Set dst con);
6009 ins_cost(125);
6010 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6011 ins_encode %{
6012 __ movflt($dst$$XMMRegister, $constantaddress($con));
6013 %}
6014 ins_pipe(pipe_slow);
6015 %}
6016
6017 // The instruction usage is guarded by predicate in operand immF0().
6018 instruct loadConF0(regF dst, immF0 src) %{
6019 match(Set dst src);
6020 ins_cost(100);
6021 format %{ "XORPS $dst,$dst\t# float 0.0" %}
6022 ins_encode %{
6023 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6024 %}
6025 ins_pipe(pipe_slow);
6026 %}
6027
6028 // The instruction usage is guarded by predicate in operand immDPR().
6029 instruct loadConDPR(regDPR dst, immDPR con) %{
6030 match(Set dst con);
6031 ins_cost(125);
6032
6033 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6034 "FSTP $dst" %}
6035 ins_encode %{
6036 __ fld_d($constantaddress($con));
6037 __ fstp_d($dst$$reg);
6038 %}
6039 ins_pipe(fpu_reg_con);
6040 %}
6041
6042 // The instruction usage is guarded by predicate in operand immDPR0().
6043 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6044 match(Set dst con);
6045 ins_cost(125);
6046
6047 format %{ "FLDZ ST\n\t"
6048 "FSTP $dst" %}
6049 ins_encode %{
6050 __ fldz();
6051 __ fstp_d($dst$$reg);
6052 %}
6053 ins_pipe(fpu_reg_con);
6054 %}
6055
6056 // The instruction usage is guarded by predicate in operand immDPR1().
6057 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6058 match(Set dst con);
6059 ins_cost(125);
6060
6061 format %{ "FLD1 ST\n\t"
6062 "FSTP $dst" %}
6063 ins_encode %{
6064 __ fld1();
6065 __ fstp_d($dst$$reg);
6066 %}
6067 ins_pipe(fpu_reg_con);
6068 %}
6069
6070 // The instruction usage is guarded by predicate in operand immD().
6071 instruct loadConD(regD dst, immD con) %{
6072 match(Set dst con);
6073 ins_cost(125);
6074 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6075 ins_encode %{
6076 __ movdbl($dst$$XMMRegister, $constantaddress($con));
6077 %}
6078 ins_pipe(pipe_slow);
6079 %}
6080
6081 // The instruction usage is guarded by predicate in operand immD0().
6082 instruct loadConD0(regD dst, immD0 src) %{
6083 match(Set dst src);
6084 ins_cost(100);
6085 format %{ "XORPD $dst,$dst\t# double 0.0" %}
6086 ins_encode %{
6087 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6088 %}
6089 ins_pipe( pipe_slow );
6090 %}
6091
6092 // Load Stack Slot
6093 instruct loadSSI(rRegI dst, stackSlotI src) %{
6094 match(Set dst src);
6095 ins_cost(125);
6096
6097 format %{ "MOV $dst,$src" %}
6098 opcode(0x8B);
6099 ins_encode( OpcP, RegMem(dst,src));
6100 ins_pipe( ialu_reg_mem );
6101 %}
6102
6103 instruct loadSSL(eRegL dst, stackSlotL src) %{
6104 match(Set dst src);
6105
6106 ins_cost(200);
6107 format %{ "MOV $dst,$src.lo\n\t"
6108 "MOV $dst+4,$src.hi" %}
6109 opcode(0x8B, 0x8B);
6110 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6111 ins_pipe( ialu_mem_long_reg );
6112 %}
6113
6114 // Load Stack Slot
6115 instruct loadSSP(eRegP dst, stackSlotP src) %{
6116 match(Set dst src);
6117 ins_cost(125);
6118
6119 format %{ "MOV $dst,$src" %}
6120 opcode(0x8B);
6121 ins_encode( OpcP, RegMem(dst,src));
6122 ins_pipe( ialu_reg_mem );
6123 %}
6124
6125 // Load Stack Slot
6126 instruct loadSSF(regFPR dst, stackSlotF src) %{
6127 match(Set dst src);
6128 ins_cost(125);
6129
6130 format %{ "FLD_S $src\n\t"
6131 "FSTP $dst" %}
6132 opcode(0xD9); /* D9 /0, FLD m32real */
6133 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6134 Pop_Reg_FPR(dst) );
6135 ins_pipe( fpu_reg_mem );
6136 %}
6137
6138 // Load Stack Slot
6139 instruct loadSSD(regDPR dst, stackSlotD src) %{
6140 match(Set dst src);
6141 ins_cost(125);
6142
6143 format %{ "FLD_D $src\n\t"
6144 "FSTP $dst" %}
6145 opcode(0xDD); /* DD /0, FLD m64real */
6146 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6147 Pop_Reg_DPR(dst) );
6148 ins_pipe( fpu_reg_mem );
6149 %}
6150
6151 // Prefetch instructions for allocation.
6152 // Must be safe to execute with invalid address (cannot fault).
6153
6154 instruct prefetchAlloc0( memory mem ) %{
6155 predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6156 match(PrefetchAllocation mem);
6157 ins_cost(0);
6158 size(0);
6159 format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6160 ins_encode();
6161 ins_pipe(empty);
6162 %}
6163
6164 instruct prefetchAlloc( memory mem ) %{
6165 predicate(AllocatePrefetchInstr==3);
6166 match( PrefetchAllocation mem );
6167 ins_cost(100);
6168
6169 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6170 ins_encode %{
6171 __ prefetchw($mem$$Address);
6172 %}
6173 ins_pipe(ialu_mem);
6174 %}
6175
6176 instruct prefetchAllocNTA( memory mem ) %{
6177 predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6178 match(PrefetchAllocation mem);
6179 ins_cost(100);
6180
6181 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6182 ins_encode %{
6183 __ prefetchnta($mem$$Address);
6184 %}
6185 ins_pipe(ialu_mem);
6186 %}
6187
6188 instruct prefetchAllocT0( memory mem ) %{
6189 predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6190 match(PrefetchAllocation mem);
6191 ins_cost(100);
6192
6193 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6194 ins_encode %{
6195 __ prefetcht0($mem$$Address);
6196 %}
6197 ins_pipe(ialu_mem);
6198 %}
6199
6200 instruct prefetchAllocT2( memory mem ) %{
6201 predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6202 match(PrefetchAllocation mem);
6203 ins_cost(100);
6204
6205 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6206 ins_encode %{
6207 __ prefetcht2($mem$$Address);
6208 %}
6209 ins_pipe(ialu_mem);
6210 %}
6211
6212 //----------Store Instructions-------------------------------------------------
6213
6214 // Store Byte
6215 instruct storeB(memory mem, xRegI src) %{
6216 match(Set mem (StoreB mem src));
6217
6218 ins_cost(125);
6219 format %{ "MOV8 $mem,$src" %}
6220 opcode(0x88);
6221 ins_encode( OpcP, RegMem( src, mem ) );
6222 ins_pipe( ialu_mem_reg );
6223 %}
6224
6225 // Store Char/Short
6226 instruct storeC(memory mem, rRegI src) %{
6227 match(Set mem (StoreC mem src));
6228
6229 ins_cost(125);
6230 format %{ "MOV16 $mem,$src" %}
6231 opcode(0x89, 0x66);
6232 ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6233 ins_pipe( ialu_mem_reg );
6234 %}
6235
6236 // Store Integer
6237 instruct storeI(memory mem, rRegI src) %{
6238 match(Set mem (StoreI mem src));
6239
6240 ins_cost(125);
6241 format %{ "MOV $mem,$src" %}
6242 opcode(0x89);
6243 ins_encode( OpcP, RegMem( src, mem ) );
6244 ins_pipe( ialu_mem_reg );
6245 %}
6246
6247 // Store Long
6248 instruct storeL(long_memory mem, eRegL src) %{
6249 predicate(!((StoreLNode*)n)->require_atomic_access());
6250 match(Set mem (StoreL mem src));
6251
6252 ins_cost(200);
6253 format %{ "MOV $mem,$src.lo\n\t"
6254 "MOV $mem+4,$src.hi" %}
6255 opcode(0x89, 0x89);
6256 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6257 ins_pipe( ialu_mem_long_reg );
6258 %}
6259
6260 // Store Long to Integer
6261 instruct storeL2I(memory mem, eRegL src) %{
6262 match(Set mem (StoreI mem (ConvL2I src)));
6263
6264 format %{ "MOV $mem,$src.lo\t# long -> int" %}
6265 ins_encode %{
6266 __ movl($mem$$Address, $src$$Register);
6267 %}
6268 ins_pipe(ialu_mem_reg);
6269 %}
6270
6271 // Volatile Store Long. Must be atomic, so move it into
6272 // the FP TOS and then do a 64-bit FIST. Has to probe the
6273 // target address before the store (for null-ptr checks)
6274 // so the memory operand is used twice in the encoding.
6275 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6276 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6277 match(Set mem (StoreL mem src));
6278 effect( KILL cr );
6279 ins_cost(400);
6280 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6281 "FILD $src\n\t"
6282 "FISTp $mem\t # 64-bit atomic volatile long store" %}
6283 opcode(0x3B);
6284 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6285 ins_pipe( fpu_reg_mem );
6286 %}
6287
6288 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6289 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6290 match(Set mem (StoreL mem src));
6291 effect( TEMP tmp, KILL cr );
6292 ins_cost(380);
6293 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6294 "MOVSD $tmp,$src\n\t"
6295 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6296 ins_encode %{
6297 __ cmpl(rax, $mem$$Address);
6298 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6299 __ movdbl($mem$$Address, $tmp$$XMMRegister);
6300 %}
6301 ins_pipe( pipe_slow );
6302 %}
6303
6304 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6305 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6306 match(Set mem (StoreL mem src));
6307 effect( TEMP tmp2 , TEMP tmp, KILL cr );
6308 ins_cost(360);
6309 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6310 "MOVD $tmp,$src.lo\n\t"
6311 "MOVD $tmp2,$src.hi\n\t"
6312 "PUNPCKLDQ $tmp,$tmp2\n\t"
6313 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6314 ins_encode %{
6315 __ cmpl(rax, $mem$$Address);
6316 __ movdl($tmp$$XMMRegister, $src$$Register);
6317 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6318 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6319 __ movdbl($mem$$Address, $tmp$$XMMRegister);
6320 %}
6321 ins_pipe( pipe_slow );
6322 %}
6323
6324 // Store Pointer; for storing unknown oops and raw pointers
6325 instruct storeP(memory mem, anyRegP src) %{
6326 match(Set mem (StoreP mem src));
6327
6328 ins_cost(125);
6329 format %{ "MOV $mem,$src" %}
6330 opcode(0x89);
6331 ins_encode( OpcP, RegMem( src, mem ) );
6332 ins_pipe( ialu_mem_reg );
6333 %}
6334
6335 // Store Integer Immediate
6336 instruct storeImmI(memory mem, immI src) %{
6337 match(Set mem (StoreI mem src));
6338
6339 ins_cost(150);
6340 format %{ "MOV $mem,$src" %}
6341 opcode(0xC7); /* C7 /0 */
6342 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
6343 ins_pipe( ialu_mem_imm );
6344 %}
6345
6346 // Store Short/Char Immediate
6347 instruct storeImmI16(memory mem, immI16 src) %{
6348 predicate(UseStoreImmI16);
6349 match(Set mem (StoreC mem src));
6350
6351 ins_cost(150);
6352 format %{ "MOV16 $mem,$src" %}
6353 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6354 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src ));
6355 ins_pipe( ialu_mem_imm );
6356 %}
6357
6358 // Store Pointer Immediate; null pointers or constant oops that do not
6359 // need card-mark barriers.
6360 instruct storeImmP(memory mem, immP src) %{
6361 match(Set mem (StoreP mem src));
6362
6363 ins_cost(150);
6364 format %{ "MOV $mem,$src" %}
6365 opcode(0xC7); /* C7 /0 */
6366 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
6367 ins_pipe( ialu_mem_imm );
6368 %}
6369
6370 // Store Byte Immediate
6371 instruct storeImmB(memory mem, immI8 src) %{
6372 match(Set mem (StoreB mem src));
6373
6374 ins_cost(150);
6375 format %{ "MOV8 $mem,$src" %}
6376 opcode(0xC6); /* C6 /0 */
6377 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
6378 ins_pipe( ialu_mem_imm );
6379 %}
6380
6381 // Store CMS card-mark Immediate
6382 instruct storeImmCM(memory mem, immI8 src) %{
6383 match(Set mem (StoreCM mem src));
6384
6385 ins_cost(150);
6386 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
6387 opcode(0xC6); /* C6 /0 */
6388 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
6389 ins_pipe( ialu_mem_imm );
6390 %}
6391
6392 // Store Double
6393 instruct storeDPR( memory mem, regDPR1 src) %{
6394 predicate(UseSSE<=1);
6395 match(Set mem (StoreD mem src));
6396
6397 ins_cost(100);
6398 format %{ "FST_D $mem,$src" %}
6399 opcode(0xDD); /* DD /2 */
6400 ins_encode( enc_FPR_store(mem,src) );
6401 ins_pipe( fpu_mem_reg );
6402 %}
6403
6404 // Store double does rounding on x86
6405 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6406 predicate(UseSSE<=1);
6407 match(Set mem (StoreD mem (RoundDouble src)));
6408
6409 ins_cost(100);
6410 format %{ "FST_D $mem,$src\t# round" %}
6411 opcode(0xDD); /* DD /2 */
6412 ins_encode( enc_FPR_store(mem,src) );
6413 ins_pipe( fpu_mem_reg );
6414 %}
6415
6416 // Store XMM register to memory (double-precision floating points)
6417 // MOVSD instruction
6418 instruct storeD(memory mem, regD src) %{
6419 predicate(UseSSE>=2);
6420 match(Set mem (StoreD mem src));
6421 ins_cost(95);
6422 format %{ "MOVSD $mem,$src" %}
6423 ins_encode %{
6424 __ movdbl($mem$$Address, $src$$XMMRegister);
6425 %}
6426 ins_pipe( pipe_slow );
6427 %}
6428
6429 // Load Double
6430 instruct MoveD2VL(vlRegD dst, regD src) %{
6431 match(Set dst src);
6432 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6433 ins_encode %{
6434 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6435 %}
6436 ins_pipe( fpu_reg_reg );
6437 %}
6438
6439 // Load Double
6440 instruct MoveVL2D(regD dst, vlRegD src) %{
6441 match(Set dst src);
6442 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6443 ins_encode %{
6444 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6445 %}
6446 ins_pipe( fpu_reg_reg );
6447 %}
6448
6449 // Store XMM register to memory (single-precision floating point)
6450 // MOVSS instruction
6451 instruct storeF(memory mem, regF src) %{
6452 predicate(UseSSE>=1);
6453 match(Set mem (StoreF mem src));
6454 ins_cost(95);
6455 format %{ "MOVSS $mem,$src" %}
6456 ins_encode %{
6457 __ movflt($mem$$Address, $src$$XMMRegister);
6458 %}
6459 ins_pipe( pipe_slow );
6460 %}
6461
6462 // Load Float
6463 instruct MoveF2VL(vlRegF dst, regF src) %{
6464 match(Set dst src);
6465 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6466 ins_encode %{
6467 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6468 %}
6469 ins_pipe( fpu_reg_reg );
6470 %}
6471
6472 // Load Float
6473 instruct MoveVL2F(regF dst, vlRegF src) %{
6474 match(Set dst src);
6475 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6476 ins_encode %{
6477 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6478 %}
6479 ins_pipe( fpu_reg_reg );
6480 %}
6481
6482 // Store Float
6483 instruct storeFPR( memory mem, regFPR1 src) %{
6484 predicate(UseSSE==0);
6485 match(Set mem (StoreF mem src));
6486
6487 ins_cost(100);
6488 format %{ "FST_S $mem,$src" %}
6489 opcode(0xD9); /* D9 /2 */
6490 ins_encode( enc_FPR_store(mem,src) );
6491 ins_pipe( fpu_mem_reg );
6492 %}
6493
6494 // Store Float does rounding on x86
6495 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6496 predicate(UseSSE==0);
6497 match(Set mem (StoreF mem (RoundFloat src)));
6498
6499 ins_cost(100);
6500 format %{ "FST_S $mem,$src\t# round" %}
6501 opcode(0xD9); /* D9 /2 */
6502 ins_encode( enc_FPR_store(mem,src) );
6503 ins_pipe( fpu_mem_reg );
6504 %}
6505
6506 // Store Float does rounding on x86
6507 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6508 predicate(UseSSE<=1);
6509 match(Set mem (StoreF mem (ConvD2F src)));
6510
6511 ins_cost(100);
6512 format %{ "FST_S $mem,$src\t# D-round" %}
6513 opcode(0xD9); /* D9 /2 */
6514 ins_encode( enc_FPR_store(mem,src) );
6515 ins_pipe( fpu_mem_reg );
6516 %}
6517
6518 // Store immediate Float value (it is faster than store from FPU register)
6519 // The instruction usage is guarded by predicate in operand immFPR().
6520 instruct storeFPR_imm( memory mem, immFPR src) %{
6521 match(Set mem (StoreF mem src));
6522
6523 ins_cost(50);
6524 format %{ "MOV $mem,$src\t# store float" %}
6525 opcode(0xC7); /* C7 /0 */
6526 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src ));
6527 ins_pipe( ialu_mem_imm );
6528 %}
6529
6530 // Store immediate Float value (it is faster than store from XMM register)
6531 // The instruction usage is guarded by predicate in operand immF().
6532 instruct storeF_imm( memory mem, immF src) %{
6533 match(Set mem (StoreF mem src));
6534
6535 ins_cost(50);
6536 format %{ "MOV $mem,$src\t# store float" %}
6537 opcode(0xC7); /* C7 /0 */
6538 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
6539 ins_pipe( ialu_mem_imm );
6540 %}
6541
6542 // Store Integer to stack slot
6543 instruct storeSSI(stackSlotI dst, rRegI src) %{
6544 match(Set dst src);
6545
6546 ins_cost(100);
6547 format %{ "MOV $dst,$src" %}
6548 opcode(0x89);
6549 ins_encode( OpcPRegSS( dst, src ) );
6550 ins_pipe( ialu_mem_reg );
6551 %}
6552
6553 // Store Integer to stack slot
6554 instruct storeSSP(stackSlotP dst, eRegP src) %{
6555 match(Set dst src);
6556
6557 ins_cost(100);
6558 format %{ "MOV $dst,$src" %}
6559 opcode(0x89);
6560 ins_encode( OpcPRegSS( dst, src ) );
6561 ins_pipe( ialu_mem_reg );
6562 %}
6563
6564 // Store Long to stack slot
6565 instruct storeSSL(stackSlotL dst, eRegL src) %{
6566 match(Set dst src);
6567
6568 ins_cost(200);
6569 format %{ "MOV $dst,$src.lo\n\t"
6570 "MOV $dst+4,$src.hi" %}
6571 opcode(0x89, 0x89);
6572 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6573 ins_pipe( ialu_mem_long_reg );
6574 %}
6575
6576 //----------MemBar Instructions-----------------------------------------------
6577 // Memory barrier flavors
6578
6579 instruct membar_acquire() %{
6580 match(MemBarAcquire);
6581 match(LoadFence);
6582 ins_cost(400);
6583
6584 size(0);
6585 format %{ "MEMBAR-acquire ! (empty encoding)" %}
6586 ins_encode();
6587 ins_pipe(empty);
6588 %}
6589
6590 instruct membar_acquire_lock() %{
6591 match(MemBarAcquireLock);
6592 ins_cost(0);
6593
6594 size(0);
6595 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6596 ins_encode( );
6597 ins_pipe(empty);
6598 %}
6599
6600 instruct membar_release() %{
6601 match(MemBarRelease);
6602 match(StoreFence);
6603 ins_cost(400);
6604
6605 size(0);
6606 format %{ "MEMBAR-release ! (empty encoding)" %}
6607 ins_encode( );
6608 ins_pipe(empty);
6609 %}
6610
6611 instruct membar_release_lock() %{
6612 match(MemBarReleaseLock);
6613 ins_cost(0);
6614
6615 size(0);
6616 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6617 ins_encode( );
6618 ins_pipe(empty);
6619 %}
6620
6621 instruct membar_volatile(eFlagsReg cr) %{
6622 match(MemBarVolatile);
6623 effect(KILL cr);
6624 ins_cost(400);
6625
6626 format %{
6627 $$template
6628 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6629 %}
6630 ins_encode %{
6631 __ membar(Assembler::StoreLoad);
6632 %}
6633 ins_pipe(pipe_slow);
6634 %}
6635
6636 instruct unnecessary_membar_volatile() %{
6637 match(MemBarVolatile);
6638 predicate(Matcher::post_store_load_barrier(n));
6639 ins_cost(0);
6640
6641 size(0);
6642 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6643 ins_encode( );
6644 ins_pipe(empty);
6645 %}
6646
6647 instruct membar_storestore() %{
6648 match(MemBarStoreStore);
6649 match(StoreStoreFence);
6650 ins_cost(0);
6651
6652 size(0);
6653 format %{ "MEMBAR-storestore (empty encoding)" %}
6654 ins_encode( );
6655 ins_pipe(empty);
6656 %}
6657
6658 //----------Move Instructions--------------------------------------------------
6659 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6660 match(Set dst (CastX2P src));
6661 format %{ "# X2P $dst, $src" %}
6662 ins_encode( /*empty encoding*/ );
6663 ins_cost(0);
6664 ins_pipe(empty);
6665 %}
6666
6667 instruct castP2X(rRegI dst, eRegP src ) %{
6668 match(Set dst (CastP2X src));
6669 ins_cost(50);
6670 format %{ "MOV $dst, $src\t# CastP2X" %}
6671 ins_encode( enc_Copy( dst, src) );
6672 ins_pipe( ialu_reg_reg );
6673 %}
6674
6675 //----------Conditional Move---------------------------------------------------
6676 // Conditional move
6677 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6678 predicate(!VM_Version::supports_cmov() );
6679 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6680 ins_cost(200);
6681 format %{ "J$cop,us skip\t# signed cmove\n\t"
6682 "MOV $dst,$src\n"
6683 "skip:" %}
6684 ins_encode %{
6685 Label Lskip;
6686 // Invert sense of branch from sense of CMOV
6687 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6688 __ movl($dst$$Register, $src$$Register);
6689 __ bind(Lskip);
6690 %}
6691 ins_pipe( pipe_cmov_reg );
6692 %}
6693
6694 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6695 predicate(!VM_Version::supports_cmov() );
6696 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6697 ins_cost(200);
6698 format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6699 "MOV $dst,$src\n"
6700 "skip:" %}
6701 ins_encode %{
6702 Label Lskip;
6703 // Invert sense of branch from sense of CMOV
6704 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6705 __ movl($dst$$Register, $src$$Register);
6706 __ bind(Lskip);
6707 %}
6708 ins_pipe( pipe_cmov_reg );
6709 %}
6710
6711 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6712 predicate(VM_Version::supports_cmov() );
6713 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6714 ins_cost(200);
6715 format %{ "CMOV$cop $dst,$src" %}
6716 opcode(0x0F,0x40);
6717 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6718 ins_pipe( pipe_cmov_reg );
6719 %}
6720
6721 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6722 predicate(VM_Version::supports_cmov() );
6723 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6724 ins_cost(200);
6725 format %{ "CMOV$cop $dst,$src" %}
6726 opcode(0x0F,0x40);
6727 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6728 ins_pipe( pipe_cmov_reg );
6729 %}
6730
6731 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6732 predicate(VM_Version::supports_cmov() );
6733 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6734 ins_cost(200);
6735 expand %{
6736 cmovI_regU(cop, cr, dst, src);
6737 %}
6738 %}
6739
6740 // Conditional move
6741 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6742 predicate(VM_Version::supports_cmov() );
6743 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6744 ins_cost(250);
6745 format %{ "CMOV$cop $dst,$src" %}
6746 opcode(0x0F,0x40);
6747 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6748 ins_pipe( pipe_cmov_mem );
6749 %}
6750
6751 // Conditional move
6752 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6753 predicate(VM_Version::supports_cmov() );
6754 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6755 ins_cost(250);
6756 format %{ "CMOV$cop $dst,$src" %}
6757 opcode(0x0F,0x40);
6758 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6759 ins_pipe( pipe_cmov_mem );
6760 %}
6761
6762 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6763 predicate(VM_Version::supports_cmov() );
6764 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6765 ins_cost(250);
6766 expand %{
6767 cmovI_memU(cop, cr, dst, src);
6768 %}
6769 %}
6770
6771 // Conditional move
6772 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6773 predicate(VM_Version::supports_cmov() );
6774 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6775 ins_cost(200);
6776 format %{ "CMOV$cop $dst,$src\t# ptr" %}
6777 opcode(0x0F,0x40);
6778 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6779 ins_pipe( pipe_cmov_reg );
6780 %}
6781
6782 // Conditional move (non-P6 version)
6783 // Note: a CMoveP is generated for stubs and native wrappers
6784 // regardless of whether we are on a P6, so we
6785 // emulate a cmov here
6786 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6787 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6788 ins_cost(300);
6789 format %{ "Jn$cop skip\n\t"
6790 "MOV $dst,$src\t# pointer\n"
6791 "skip:" %}
6792 opcode(0x8b);
6793 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6794 ins_pipe( pipe_cmov_reg );
6795 %}
6796
6797 // Conditional move
6798 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6799 predicate(VM_Version::supports_cmov() );
6800 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6801 ins_cost(200);
6802 format %{ "CMOV$cop $dst,$src\t# ptr" %}
6803 opcode(0x0F,0x40);
6804 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6805 ins_pipe( pipe_cmov_reg );
6806 %}
6807
6808 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6809 predicate(VM_Version::supports_cmov() );
6810 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6811 ins_cost(200);
6812 expand %{
6813 cmovP_regU(cop, cr, dst, src);
6814 %}
6815 %}
6816
6817 // DISABLED: Requires the ADLC to emit a bottom_type call that
6818 // correctly meets the two pointer arguments; one is an incoming
6819 // register but the other is a memory operand. ALSO appears to
6820 // be buggy with implicit null checks.
6821 //
6822 //// Conditional move
6823 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6824 // predicate(VM_Version::supports_cmov() );
6825 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6826 // ins_cost(250);
6827 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
6828 // opcode(0x0F,0x40);
6829 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6830 // ins_pipe( pipe_cmov_mem );
6831 //%}
6832 //
6833 //// Conditional move
6834 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6835 // predicate(VM_Version::supports_cmov() );
6836 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6837 // ins_cost(250);
6838 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
6839 // opcode(0x0F,0x40);
6840 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6841 // ins_pipe( pipe_cmov_mem );
6842 //%}
6843
6844 // Conditional move
6845 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6846 predicate(UseSSE<=1);
6847 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6848 ins_cost(200);
6849 format %{ "FCMOV$cop $dst,$src\t# double" %}
6850 opcode(0xDA);
6851 ins_encode( enc_cmov_dpr(cop,src) );
6852 ins_pipe( pipe_cmovDPR_reg );
6853 %}
6854
6855 // Conditional move
6856 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6857 predicate(UseSSE==0);
6858 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6859 ins_cost(200);
6860 format %{ "FCMOV$cop $dst,$src\t# float" %}
6861 opcode(0xDA);
6862 ins_encode( enc_cmov_dpr(cop,src) );
6863 ins_pipe( pipe_cmovDPR_reg );
6864 %}
6865
6866 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6867 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6868 predicate(UseSSE<=1);
6869 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6870 ins_cost(200);
6871 format %{ "Jn$cop skip\n\t"
6872 "MOV $dst,$src\t# double\n"
6873 "skip:" %}
6874 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
6875 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6876 ins_pipe( pipe_cmovDPR_reg );
6877 %}
6878
6879 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6880 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6881 predicate(UseSSE==0);
6882 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6883 ins_cost(200);
6884 format %{ "Jn$cop skip\n\t"
6885 "MOV $dst,$src\t# float\n"
6886 "skip:" %}
6887 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
6888 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6889 ins_pipe( pipe_cmovDPR_reg );
6890 %}
6891
6892 // No CMOVE with SSE/SSE2
6893 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6894 predicate (UseSSE>=1);
6895 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6896 ins_cost(200);
6897 format %{ "Jn$cop skip\n\t"
6898 "MOVSS $dst,$src\t# float\n"
6899 "skip:" %}
6900 ins_encode %{
6901 Label skip;
6902 // Invert sense of branch from sense of CMOV
6903 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6904 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6905 __ bind(skip);
6906 %}
6907 ins_pipe( pipe_slow );
6908 %}
6909
6910 // No CMOVE with SSE/SSE2
6911 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6912 predicate (UseSSE>=2);
6913 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6914 ins_cost(200);
6915 format %{ "Jn$cop skip\n\t"
6916 "MOVSD $dst,$src\t# float\n"
6917 "skip:" %}
6918 ins_encode %{
6919 Label skip;
6920 // Invert sense of branch from sense of CMOV
6921 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6922 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6923 __ bind(skip);
6924 %}
6925 ins_pipe( pipe_slow );
6926 %}
6927
6928 // unsigned version
6929 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6930 predicate (UseSSE>=1);
6931 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6932 ins_cost(200);
6933 format %{ "Jn$cop skip\n\t"
6934 "MOVSS $dst,$src\t# float\n"
6935 "skip:" %}
6936 ins_encode %{
6937 Label skip;
6938 // Invert sense of branch from sense of CMOV
6939 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6940 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6941 __ bind(skip);
6942 %}
6943 ins_pipe( pipe_slow );
6944 %}
6945
6946 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6947 predicate (UseSSE>=1);
6948 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6949 ins_cost(200);
6950 expand %{
6951 fcmovF_regU(cop, cr, dst, src);
6952 %}
6953 %}
6954
6955 // unsigned version
6956 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6957 predicate (UseSSE>=2);
6958 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6959 ins_cost(200);
6960 format %{ "Jn$cop skip\n\t"
6961 "MOVSD $dst,$src\t# float\n"
6962 "skip:" %}
6963 ins_encode %{
6964 Label skip;
6965 // Invert sense of branch from sense of CMOV
6966 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6967 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6968 __ bind(skip);
6969 %}
6970 ins_pipe( pipe_slow );
6971 %}
6972
6973 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6974 predicate (UseSSE>=2);
6975 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6976 ins_cost(200);
6977 expand %{
6978 fcmovD_regU(cop, cr, dst, src);
6979 %}
6980 %}
6981
6982 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6983 predicate(VM_Version::supports_cmov() );
6984 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6985 ins_cost(200);
6986 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6987 "CMOV$cop $dst.hi,$src.hi" %}
6988 opcode(0x0F,0x40);
6989 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6990 ins_pipe( pipe_cmov_reg_long );
6991 %}
6992
6993 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6994 predicate(VM_Version::supports_cmov() );
6995 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6996 ins_cost(200);
6997 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6998 "CMOV$cop $dst.hi,$src.hi" %}
6999 opcode(0x0F,0x40);
7000 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7001 ins_pipe( pipe_cmov_reg_long );
7002 %}
7003
7004 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7005 predicate(VM_Version::supports_cmov() );
7006 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7007 ins_cost(200);
7008 expand %{
7009 cmovL_regU(cop, cr, dst, src);
7010 %}
7011 %}
7012
7013 //----------Arithmetic Instructions--------------------------------------------
7014 //----------Addition Instructions----------------------------------------------
7015
7016 // Integer Addition Instructions
7017 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7018 match(Set dst (AddI dst src));
7019 effect(KILL cr);
7020
7021 size(2);
7022 format %{ "ADD $dst,$src" %}
7023 opcode(0x03);
7024 ins_encode( OpcP, RegReg( dst, src) );
7025 ins_pipe( ialu_reg_reg );
7026 %}
7027
7028 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7029 match(Set dst (AddI dst src));
7030 effect(KILL cr);
7031
7032 format %{ "ADD $dst,$src" %}
7033 opcode(0x81, 0x00); /* /0 id */
7034 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7035 ins_pipe( ialu_reg );
7036 %}
7037
7038 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
7039 predicate(UseIncDec);
7040 match(Set dst (AddI dst src));
7041 effect(KILL cr);
7042
7043 size(1);
7044 format %{ "INC $dst" %}
7045 opcode(0x40); /* */
7046 ins_encode( Opc_plus( primary, dst ) );
7047 ins_pipe( ialu_reg );
7048 %}
7049
7050 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7051 match(Set dst (AddI src0 src1));
7052 ins_cost(110);
7053
7054 format %{ "LEA $dst,[$src0 + $src1]" %}
7055 opcode(0x8D); /* 0x8D /r */
7056 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7057 ins_pipe( ialu_reg_reg );
7058 %}
7059
7060 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7061 match(Set dst (AddP src0 src1));
7062 ins_cost(110);
7063
7064 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %}
7065 opcode(0x8D); /* 0x8D /r */
7066 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7067 ins_pipe( ialu_reg_reg );
7068 %}
7069
7070 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7071 predicate(UseIncDec);
7072 match(Set dst (AddI dst src));
7073 effect(KILL cr);
7074
7075 size(1);
7076 format %{ "DEC $dst" %}
7077 opcode(0x48); /* */
7078 ins_encode( Opc_plus( primary, dst ) );
7079 ins_pipe( ialu_reg );
7080 %}
7081
7082 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7083 match(Set dst (AddP dst src));
7084 effect(KILL cr);
7085
7086 size(2);
7087 format %{ "ADD $dst,$src" %}
7088 opcode(0x03);
7089 ins_encode( OpcP, RegReg( dst, src) );
7090 ins_pipe( ialu_reg_reg );
7091 %}
7092
7093 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7094 match(Set dst (AddP dst src));
7095 effect(KILL cr);
7096
7097 format %{ "ADD $dst,$src" %}
7098 opcode(0x81,0x00); /* Opcode 81 /0 id */
7099 // ins_encode( RegImm( dst, src) );
7100 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7101 ins_pipe( ialu_reg );
7102 %}
7103
7104 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7105 match(Set dst (AddI dst (LoadI src)));
7106 effect(KILL cr);
7107
7108 ins_cost(125);
7109 format %{ "ADD $dst,$src" %}
7110 opcode(0x03);
7111 ins_encode( OpcP, RegMem( dst, src) );
7112 ins_pipe( ialu_reg_mem );
7113 %}
7114
7115 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7116 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7117 effect(KILL cr);
7118
7119 ins_cost(150);
7120 format %{ "ADD $dst,$src" %}
7121 opcode(0x01); /* Opcode 01 /r */
7122 ins_encode( OpcP, RegMem( src, dst ) );
7123 ins_pipe( ialu_mem_reg );
7124 %}
7125
7126 // Add Memory with Immediate
7127 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7128 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7129 effect(KILL cr);
7130
7131 ins_cost(125);
7132 format %{ "ADD $dst,$src" %}
7133 opcode(0x81); /* Opcode 81 /0 id */
7134 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7135 ins_pipe( ialu_mem_imm );
7136 %}
7137
7138 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
7139 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7140 effect(KILL cr);
7141
7142 ins_cost(125);
7143 format %{ "INC $dst" %}
7144 opcode(0xFF); /* Opcode FF /0 */
7145 ins_encode( OpcP, RMopc_Mem(0x00,dst));
7146 ins_pipe( ialu_mem_imm );
7147 %}
7148
7149 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7150 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7151 effect(KILL cr);
7152
7153 ins_cost(125);
7154 format %{ "DEC $dst" %}
7155 opcode(0xFF); /* Opcode FF /1 */
7156 ins_encode( OpcP, RMopc_Mem(0x01,dst));
7157 ins_pipe( ialu_mem_imm );
7158 %}
7159
7160
7161 instruct checkCastPP( eRegP dst ) %{
7162 match(Set dst (CheckCastPP dst));
7163
7164 size(0);
7165 format %{ "#checkcastPP of $dst" %}
7166 ins_encode( /*empty encoding*/ );
7167 ins_pipe( empty );
7168 %}
7169
7170 instruct castPP( eRegP dst ) %{
7171 match(Set dst (CastPP dst));
7172 format %{ "#castPP of $dst" %}
7173 ins_encode( /*empty encoding*/ );
7174 ins_pipe( empty );
7175 %}
7176
7177 instruct castII( rRegI dst ) %{
7178 match(Set dst (CastII dst));
7179 format %{ "#castII of $dst" %}
7180 ins_encode( /*empty encoding*/ );
7181 ins_cost(0);
7182 ins_pipe( empty );
7183 %}
7184
7185 instruct castLL( eRegL dst ) %{
7186 match(Set dst (CastLL dst));
7187 format %{ "#castLL of $dst" %}
7188 ins_encode( /*empty encoding*/ );
7189 ins_cost(0);
7190 ins_pipe( empty );
7191 %}
7192
7193 instruct castFF( regF dst ) %{
7194 predicate(UseSSE >= 1);
7195 match(Set dst (CastFF dst));
7196 format %{ "#castFF of $dst" %}
7197 ins_encode( /*empty encoding*/ );
7198 ins_cost(0);
7199 ins_pipe( empty );
7200 %}
7201
7202 instruct castDD( regD dst ) %{
7203 predicate(UseSSE >= 2);
7204 match(Set dst (CastDD dst));
7205 format %{ "#castDD of $dst" %}
7206 ins_encode( /*empty encoding*/ );
7207 ins_cost(0);
7208 ins_pipe( empty );
7209 %}
7210
7211 instruct castFF_PR( regFPR dst ) %{
7212 predicate(UseSSE < 1);
7213 match(Set dst (CastFF dst));
7214 format %{ "#castFF of $dst" %}
7215 ins_encode( /*empty encoding*/ );
7216 ins_cost(0);
7217 ins_pipe( empty );
7218 %}
7219
7220 instruct castDD_PR( regDPR dst ) %{
7221 predicate(UseSSE < 2);
7222 match(Set dst (CastDD dst));
7223 format %{ "#castDD of $dst" %}
7224 ins_encode( /*empty encoding*/ );
7225 ins_cost(0);
7226 ins_pipe( empty );
7227 %}
7228
7229 // Load-locked - same as a regular pointer load when used with compare-swap
7230 instruct loadPLocked(eRegP dst, memory mem) %{
7231 match(Set dst (LoadPLocked mem));
7232
7233 ins_cost(125);
7234 format %{ "MOV $dst,$mem\t# Load ptr. locked" %}
7235 opcode(0x8B);
7236 ins_encode( OpcP, RegMem(dst,mem));
7237 ins_pipe( ialu_reg_mem );
7238 %}
7239
7240 // Conditional-store of the updated heap-top.
7241 // Used during allocation of the shared heap.
7242 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel.
7243 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7244 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7245 // EAX is killed if there is contention, but then it's also unused.
7246 // In the common case of no contention, EAX holds the new oop address.
7247 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7248 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7249 ins_pipe( pipe_cmpxchg );
7250 %}
7251
7252 // Conditional-store of an int value.
7253 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel.
7254 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7255 match(Set cr (StoreIConditional mem (Binary oldval newval)));
7256 effect(KILL oldval);
7257 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7258 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7259 ins_pipe( pipe_cmpxchg );
7260 %}
7261
7262 // Conditional-store of a long value.
7263 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel.
7264 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7265 match(Set cr (StoreLConditional mem (Binary oldval newval)));
7266 effect(KILL oldval);
7267 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7268 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7269 "XCHG EBX,ECX"
7270 %}
7271 ins_encode %{
7272 // Note: we need to swap rbx, and rcx before and after the
7273 // cmpxchg8 instruction because the instruction uses
7274 // rcx as the high order word of the new value to store but
7275 // our register encoding uses rbx.
7276 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7277 __ lock();
7278 __ cmpxchg8($mem$$Address);
7279 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7280 %}
7281 ins_pipe( pipe_cmpxchg );
7282 %}
7283
7284 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7285
7286 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7287 predicate(VM_Version::supports_cx8());
7288 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7289 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7290 effect(KILL cr, KILL oldval);
7291 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7292 "MOV $res,0\n\t"
7293 "JNE,s fail\n\t"
7294 "MOV $res,1\n"
7295 "fail:" %}
7296 ins_encode( enc_cmpxchg8(mem_ptr),
7297 enc_flags_ne_to_boolean(res) );
7298 ins_pipe( pipe_cmpxchg );
7299 %}
7300
7301 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7302 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7303 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7304 effect(KILL cr, KILL oldval);
7305 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7306 "MOV $res,0\n\t"
7307 "JNE,s fail\n\t"
7308 "MOV $res,1\n"
7309 "fail:" %}
7310 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7311 ins_pipe( pipe_cmpxchg );
7312 %}
7313
7314 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7315 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7316 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7317 effect(KILL cr, KILL oldval);
7318 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7319 "MOV $res,0\n\t"
7320 "JNE,s fail\n\t"
7321 "MOV $res,1\n"
7322 "fail:" %}
7323 ins_encode( enc_cmpxchgb(mem_ptr),
7324 enc_flags_ne_to_boolean(res) );
7325 ins_pipe( pipe_cmpxchg );
7326 %}
7327
7328 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7329 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7330 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7331 effect(KILL cr, KILL oldval);
7332 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7333 "MOV $res,0\n\t"
7334 "JNE,s fail\n\t"
7335 "MOV $res,1\n"
7336 "fail:" %}
7337 ins_encode( enc_cmpxchgw(mem_ptr),
7338 enc_flags_ne_to_boolean(res) );
7339 ins_pipe( pipe_cmpxchg );
7340 %}
7341
7342 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7343 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7344 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7345 effect(KILL cr, KILL oldval);
7346 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7347 "MOV $res,0\n\t"
7348 "JNE,s fail\n\t"
7349 "MOV $res,1\n"
7350 "fail:" %}
7351 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7352 ins_pipe( pipe_cmpxchg );
7353 %}
7354
7355 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7356 predicate(VM_Version::supports_cx8());
7357 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7358 effect(KILL cr);
7359 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7360 ins_encode( enc_cmpxchg8(mem_ptr) );
7361 ins_pipe( pipe_cmpxchg );
7362 %}
7363
7364 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7365 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7366 effect(KILL cr);
7367 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7368 ins_encode( enc_cmpxchg(mem_ptr) );
7369 ins_pipe( pipe_cmpxchg );
7370 %}
7371
7372 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7373 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7374 effect(KILL cr);
7375 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7376 ins_encode( enc_cmpxchgb(mem_ptr) );
7377 ins_pipe( pipe_cmpxchg );
7378 %}
7379
7380 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7381 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7382 effect(KILL cr);
7383 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7384 ins_encode( enc_cmpxchgw(mem_ptr) );
7385 ins_pipe( pipe_cmpxchg );
7386 %}
7387
7388 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7389 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7390 effect(KILL cr);
7391 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7392 ins_encode( enc_cmpxchg(mem_ptr) );
7393 ins_pipe( pipe_cmpxchg );
7394 %}
7395
7396 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7397 predicate(n->as_LoadStore()->result_not_used());
7398 match(Set dummy (GetAndAddB mem add));
7399 effect(KILL cr);
7400 format %{ "ADDB [$mem],$add" %}
7401 ins_encode %{
7402 __ lock();
7403 __ addb($mem$$Address, $add$$constant);
7404 %}
7405 ins_pipe( pipe_cmpxchg );
7406 %}
7407
7408 // Important to match to xRegI: only 8-bit regs.
7409 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7410 match(Set newval (GetAndAddB mem newval));
7411 effect(KILL cr);
7412 format %{ "XADDB [$mem],$newval" %}
7413 ins_encode %{
7414 __ lock();
7415 __ xaddb($mem$$Address, $newval$$Register);
7416 %}
7417 ins_pipe( pipe_cmpxchg );
7418 %}
7419
7420 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7421 predicate(n->as_LoadStore()->result_not_used());
7422 match(Set dummy (GetAndAddS mem add));
7423 effect(KILL cr);
7424 format %{ "ADDS [$mem],$add" %}
7425 ins_encode %{
7426 __ lock();
7427 __ addw($mem$$Address, $add$$constant);
7428 %}
7429 ins_pipe( pipe_cmpxchg );
7430 %}
7431
7432 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7433 match(Set newval (GetAndAddS mem newval));
7434 effect(KILL cr);
7435 format %{ "XADDS [$mem],$newval" %}
7436 ins_encode %{
7437 __ lock();
7438 __ xaddw($mem$$Address, $newval$$Register);
7439 %}
7440 ins_pipe( pipe_cmpxchg );
7441 %}
7442
7443 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7444 predicate(n->as_LoadStore()->result_not_used());
7445 match(Set dummy (GetAndAddI mem add));
7446 effect(KILL cr);
7447 format %{ "ADDL [$mem],$add" %}
7448 ins_encode %{
7449 __ lock();
7450 __ addl($mem$$Address, $add$$constant);
7451 %}
7452 ins_pipe( pipe_cmpxchg );
7453 %}
7454
7455 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7456 match(Set newval (GetAndAddI mem newval));
7457 effect(KILL cr);
7458 format %{ "XADDL [$mem],$newval" %}
7459 ins_encode %{
7460 __ lock();
7461 __ xaddl($mem$$Address, $newval$$Register);
7462 %}
7463 ins_pipe( pipe_cmpxchg );
7464 %}
7465
7466 // Important to match to xRegI: only 8-bit regs.
7467 instruct xchgB( memory mem, xRegI newval) %{
7468 match(Set newval (GetAndSetB mem newval));
7469 format %{ "XCHGB $newval,[$mem]" %}
7470 ins_encode %{
7471 __ xchgb($newval$$Register, $mem$$Address);
7472 %}
7473 ins_pipe( pipe_cmpxchg );
7474 %}
7475
7476 instruct xchgS( memory mem, rRegI newval) %{
7477 match(Set newval (GetAndSetS mem newval));
7478 format %{ "XCHGW $newval,[$mem]" %}
7479 ins_encode %{
7480 __ xchgw($newval$$Register, $mem$$Address);
7481 %}
7482 ins_pipe( pipe_cmpxchg );
7483 %}
7484
7485 instruct xchgI( memory mem, rRegI newval) %{
7486 match(Set newval (GetAndSetI mem newval));
7487 format %{ "XCHGL $newval,[$mem]" %}
7488 ins_encode %{
7489 __ xchgl($newval$$Register, $mem$$Address);
7490 %}
7491 ins_pipe( pipe_cmpxchg );
7492 %}
7493
7494 instruct xchgP( memory mem, pRegP newval) %{
7495 match(Set newval (GetAndSetP mem newval));
7496 format %{ "XCHGL $newval,[$mem]" %}
7497 ins_encode %{
7498 __ xchgl($newval$$Register, $mem$$Address);
7499 %}
7500 ins_pipe( pipe_cmpxchg );
7501 %}
7502
7503 //----------Subtraction Instructions-------------------------------------------
7504
7505 // Integer Subtraction Instructions
7506 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7507 match(Set dst (SubI dst src));
7508 effect(KILL cr);
7509
7510 size(2);
7511 format %{ "SUB $dst,$src" %}
7512 opcode(0x2B);
7513 ins_encode( OpcP, RegReg( dst, src) );
7514 ins_pipe( ialu_reg_reg );
7515 %}
7516
7517 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7518 match(Set dst (SubI dst src));
7519 effect(KILL cr);
7520
7521 format %{ "SUB $dst,$src" %}
7522 opcode(0x81,0x05); /* Opcode 81 /5 */
7523 // ins_encode( RegImm( dst, src) );
7524 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7525 ins_pipe( ialu_reg );
7526 %}
7527
7528 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7529 match(Set dst (SubI dst (LoadI src)));
7530 effect(KILL cr);
7531
7532 ins_cost(125);
7533 format %{ "SUB $dst,$src" %}
7534 opcode(0x2B);
7535 ins_encode( OpcP, RegMem( dst, src) );
7536 ins_pipe( ialu_reg_mem );
7537 %}
7538
7539 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7540 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7541 effect(KILL cr);
7542
7543 ins_cost(150);
7544 format %{ "SUB $dst,$src" %}
7545 opcode(0x29); /* Opcode 29 /r */
7546 ins_encode( OpcP, RegMem( src, dst ) );
7547 ins_pipe( ialu_mem_reg );
7548 %}
7549
7550 // Subtract from a pointer
7551 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
7552 match(Set dst (AddP dst (SubI zero src)));
7553 effect(KILL cr);
7554
7555 size(2);
7556 format %{ "SUB $dst,$src" %}
7557 opcode(0x2B);
7558 ins_encode( OpcP, RegReg( dst, src) );
7559 ins_pipe( ialu_reg_reg );
7560 %}
7561
7562 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
7563 match(Set dst (SubI zero dst));
7564 effect(KILL cr);
7565
7566 size(2);
7567 format %{ "NEG $dst" %}
7568 opcode(0xF7,0x03); // Opcode F7 /3
7569 ins_encode( OpcP, RegOpc( dst ) );
7570 ins_pipe( ialu_reg );
7571 %}
7572
7573 //----------Multiplication/Division Instructions-------------------------------
7574 // Integer Multiplication Instructions
7575 // Multiply Register
7576 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7577 match(Set dst (MulI dst src));
7578 effect(KILL cr);
7579
7580 size(3);
7581 ins_cost(300);
7582 format %{ "IMUL $dst,$src" %}
7583 opcode(0xAF, 0x0F);
7584 ins_encode( OpcS, OpcP, RegReg( dst, src) );
7585 ins_pipe( ialu_reg_reg_alu0 );
7586 %}
7587
7588 // Multiply 32-bit Immediate
7589 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7590 match(Set dst (MulI src imm));
7591 effect(KILL cr);
7592
7593 ins_cost(300);
7594 format %{ "IMUL $dst,$src,$imm" %}
7595 opcode(0x69); /* 69 /r id */
7596 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7597 ins_pipe( ialu_reg_reg_alu0 );
7598 %}
7599
7600 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7601 match(Set dst src);
7602 effect(KILL cr);
7603
7604 // Note that this is artificially increased to make it more expensive than loadConL
7605 ins_cost(250);
7606 format %{ "MOV EAX,$src\t// low word only" %}
7607 opcode(0xB8);
7608 ins_encode( LdImmL_Lo(dst, src) );
7609 ins_pipe( ialu_reg_fat );
7610 %}
7611
7612 // Multiply by 32-bit Immediate, taking the shifted high order results
7613 // (special case for shift by 32)
7614 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7615 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7616 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7617 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7618 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7619 effect(USE src1, KILL cr);
7620
7621 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7622 ins_cost(0*100 + 1*400 - 150);
7623 format %{ "IMUL EDX:EAX,$src1" %}
7624 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7625 ins_pipe( pipe_slow );
7626 %}
7627
7628 // Multiply by 32-bit Immediate, taking the shifted high order results
7629 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7630 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7631 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7632 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7633 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7634 effect(USE src1, KILL cr);
7635
7636 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7637 ins_cost(1*100 + 1*400 - 150);
7638 format %{ "IMUL EDX:EAX,$src1\n\t"
7639 "SAR EDX,$cnt-32" %}
7640 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7641 ins_pipe( pipe_slow );
7642 %}
7643
7644 // Multiply Memory 32-bit Immediate
7645 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7646 match(Set dst (MulI (LoadI src) imm));
7647 effect(KILL cr);
7648
7649 ins_cost(300);
7650 format %{ "IMUL $dst,$src,$imm" %}
7651 opcode(0x69); /* 69 /r id */
7652 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7653 ins_pipe( ialu_reg_mem_alu0 );
7654 %}
7655
7656 // Multiply Memory
7657 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7658 match(Set dst (MulI dst (LoadI src)));
7659 effect(KILL cr);
7660
7661 ins_cost(350);
7662 format %{ "IMUL $dst,$src" %}
7663 opcode(0xAF, 0x0F);
7664 ins_encode( OpcS, OpcP, RegMem( dst, src) );
7665 ins_pipe( ialu_reg_mem_alu0 );
7666 %}
7667
7668 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7669 %{
7670 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7671 effect(KILL cr, KILL src2);
7672
7673 expand %{ mulI_eReg(dst, src1, cr);
7674 mulI_eReg(src2, src3, cr);
7675 addI_eReg(dst, src2, cr); %}
7676 %}
7677
7678 // Multiply Register Int to Long
7679 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7680 // Basic Idea: long = (long)int * (long)int
7681 match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7682 effect(DEF dst, USE src, USE src1, KILL flags);
7683
7684 ins_cost(300);
7685 format %{ "IMUL $dst,$src1" %}
7686
7687 ins_encode( long_int_multiply( dst, src1 ) );
7688 ins_pipe( ialu_reg_reg_alu0 );
7689 %}
7690
7691 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7692 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
7693 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7694 effect(KILL flags);
7695
7696 ins_cost(300);
7697 format %{ "MUL $dst,$src1" %}
7698
7699 ins_encode( long_uint_multiply(dst, src1) );
7700 ins_pipe( ialu_reg_reg_alu0 );
7701 %}
7702
7703 // Multiply Register Long
7704 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7705 match(Set dst (MulL dst src));
7706 effect(KILL cr, TEMP tmp);
7707 ins_cost(4*100+3*400);
7708 // Basic idea: lo(result) = lo(x_lo * y_lo)
7709 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7710 format %{ "MOV $tmp,$src.lo\n\t"
7711 "IMUL $tmp,EDX\n\t"
7712 "MOV EDX,$src.hi\n\t"
7713 "IMUL EDX,EAX\n\t"
7714 "ADD $tmp,EDX\n\t"
7715 "MUL EDX:EAX,$src.lo\n\t"
7716 "ADD EDX,$tmp" %}
7717 ins_encode( long_multiply( dst, src, tmp ) );
7718 ins_pipe( pipe_slow );
7719 %}
7720
7721 // Multiply Register Long where the left operand's high 32 bits are zero
7722 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7723 predicate(is_operand_hi32_zero(n->in(1)));
7724 match(Set dst (MulL dst src));
7725 effect(KILL cr, TEMP tmp);
7726 ins_cost(2*100+2*400);
7727 // Basic idea: lo(result) = lo(x_lo * y_lo)
7728 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7729 format %{ "MOV $tmp,$src.hi\n\t"
7730 "IMUL $tmp,EAX\n\t"
7731 "MUL EDX:EAX,$src.lo\n\t"
7732 "ADD EDX,$tmp" %}
7733 ins_encode %{
7734 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7735 __ imull($tmp$$Register, rax);
7736 __ mull($src$$Register);
7737 __ addl(rdx, $tmp$$Register);
7738 %}
7739 ins_pipe( pipe_slow );
7740 %}
7741
7742 // Multiply Register Long where the right operand's high 32 bits are zero
7743 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7744 predicate(is_operand_hi32_zero(n->in(2)));
7745 match(Set dst (MulL dst src));
7746 effect(KILL cr, TEMP tmp);
7747 ins_cost(2*100+2*400);
7748 // Basic idea: lo(result) = lo(x_lo * y_lo)
7749 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7750 format %{ "MOV $tmp,$src.lo\n\t"
7751 "IMUL $tmp,EDX\n\t"
7752 "MUL EDX:EAX,$src.lo\n\t"
7753 "ADD EDX,$tmp" %}
7754 ins_encode %{
7755 __ movl($tmp$$Register, $src$$Register);
7756 __ imull($tmp$$Register, rdx);
7757 __ mull($src$$Register);
7758 __ addl(rdx, $tmp$$Register);
7759 %}
7760 ins_pipe( pipe_slow );
7761 %}
7762
7763 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7764 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7765 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7766 match(Set dst (MulL dst src));
7767 effect(KILL cr);
7768 ins_cost(1*400);
7769 // Basic idea: lo(result) = lo(x_lo * y_lo)
7770 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7771 format %{ "MUL EDX:EAX,$src.lo\n\t" %}
7772 ins_encode %{
7773 __ mull($src$$Register);
7774 %}
7775 ins_pipe( pipe_slow );
7776 %}
7777
7778 // Multiply Register Long by small constant
7779 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7780 match(Set dst (MulL dst src));
7781 effect(KILL cr, TEMP tmp);
7782 ins_cost(2*100+2*400);
7783 size(12);
7784 // Basic idea: lo(result) = lo(src * EAX)
7785 // hi(result) = hi(src * EAX) + lo(src * EDX)
7786 format %{ "IMUL $tmp,EDX,$src\n\t"
7787 "MOV EDX,$src\n\t"
7788 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t"
7789 "ADD EDX,$tmp" %}
7790 ins_encode( long_multiply_con( dst, src, tmp ) );
7791 ins_pipe( pipe_slow );
7792 %}
7793
7794 // Integer DIV with Register
7795 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7796 match(Set rax (DivI rax div));
7797 effect(KILL rdx, KILL cr);
7798 size(26);
7799 ins_cost(30*100+10*100);
7800 format %{ "CMP EAX,0x80000000\n\t"
7801 "JNE,s normal\n\t"
7802 "XOR EDX,EDX\n\t"
7803 "CMP ECX,-1\n\t"
7804 "JE,s done\n"
7805 "normal: CDQ\n\t"
7806 "IDIV $div\n\t"
7807 "done:" %}
7808 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7809 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7810 ins_pipe( ialu_reg_reg_alu0 );
7811 %}
7812
7813 // Divide Register Long
7814 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7815 match(Set dst (DivL src1 src2));
7816 effect(CALL);
7817 ins_cost(10000);
7818 format %{ "PUSH $src1.hi\n\t"
7819 "PUSH $src1.lo\n\t"
7820 "PUSH $src2.hi\n\t"
7821 "PUSH $src2.lo\n\t"
7822 "CALL SharedRuntime::ldiv\n\t"
7823 "ADD ESP,16" %}
7824 ins_encode( long_div(src1,src2) );
7825 ins_pipe( pipe_slow );
7826 %}
7827
7828 // Integer DIVMOD with Register, both quotient and mod results
7829 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7830 match(DivModI rax div);
7831 effect(KILL cr);
7832 size(26);
7833 ins_cost(30*100+10*100);
7834 format %{ "CMP EAX,0x80000000\n\t"
7835 "JNE,s normal\n\t"
7836 "XOR EDX,EDX\n\t"
7837 "CMP ECX,-1\n\t"
7838 "JE,s done\n"
7839 "normal: CDQ\n\t"
7840 "IDIV $div\n\t"
7841 "done:" %}
7842 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7843 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7844 ins_pipe( pipe_slow );
7845 %}
7846
7847 // Integer MOD with Register
7848 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7849 match(Set rdx (ModI rax div));
7850 effect(KILL rax, KILL cr);
7851
7852 size(26);
7853 ins_cost(300);
7854 format %{ "CDQ\n\t"
7855 "IDIV $div" %}
7856 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7857 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7858 ins_pipe( ialu_reg_reg_alu0 );
7859 %}
7860
7861 // Remainder Register Long
7862 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7863 match(Set dst (ModL src1 src2));
7864 effect(CALL);
7865 ins_cost(10000);
7866 format %{ "PUSH $src1.hi\n\t"
7867 "PUSH $src1.lo\n\t"
7868 "PUSH $src2.hi\n\t"
7869 "PUSH $src2.lo\n\t"
7870 "CALL SharedRuntime::lrem\n\t"
7871 "ADD ESP,16" %}
7872 ins_encode( long_mod(src1,src2) );
7873 ins_pipe( pipe_slow );
7874 %}
7875
7876 // Divide Register Long (no special case since divisor != -1)
7877 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7878 match(Set dst (DivL dst imm));
7879 effect( TEMP tmp, TEMP tmp2, KILL cr );
7880 ins_cost(1000);
7881 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7882 "XOR $tmp2,$tmp2\n\t"
7883 "CMP $tmp,EDX\n\t"
7884 "JA,s fast\n\t"
7885 "MOV $tmp2,EAX\n\t"
7886 "MOV EAX,EDX\n\t"
7887 "MOV EDX,0\n\t"
7888 "JLE,s pos\n\t"
7889 "LNEG EAX : $tmp2\n\t"
7890 "DIV $tmp # unsigned division\n\t"
7891 "XCHG EAX,$tmp2\n\t"
7892 "DIV $tmp\n\t"
7893 "LNEG $tmp2 : EAX\n\t"
7894 "JMP,s done\n"
7895 "pos:\n\t"
7896 "DIV $tmp\n\t"
7897 "XCHG EAX,$tmp2\n"
7898 "fast:\n\t"
7899 "DIV $tmp\n"
7900 "done:\n\t"
7901 "MOV EDX,$tmp2\n\t"
7902 "NEG EDX:EAX # if $imm < 0" %}
7903 ins_encode %{
7904 int con = (int)$imm$$constant;
7905 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7906 int pcon = (con > 0) ? con : -con;
7907 Label Lfast, Lpos, Ldone;
7908
7909 __ movl($tmp$$Register, pcon);
7910 __ xorl($tmp2$$Register,$tmp2$$Register);
7911 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7912 __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7913
7914 __ movl($tmp2$$Register, $dst$$Register); // save
7915 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7916 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7917 __ jccb(Assembler::lessEqual, Lpos); // result is positive
7918
7919 // Negative dividend.
7920 // convert value to positive to use unsigned division
7921 __ lneg($dst$$Register, $tmp2$$Register);
7922 __ divl($tmp$$Register);
7923 __ xchgl($dst$$Register, $tmp2$$Register);
7924 __ divl($tmp$$Register);
7925 // revert result back to negative
7926 __ lneg($tmp2$$Register, $dst$$Register);
7927 __ jmpb(Ldone);
7928
7929 __ bind(Lpos);
7930 __ divl($tmp$$Register); // Use unsigned division
7931 __ xchgl($dst$$Register, $tmp2$$Register);
7932 // Fallthrow for final divide, tmp2 has 32 bit hi result
7933
7934 __ bind(Lfast);
7935 // fast path: src is positive
7936 __ divl($tmp$$Register); // Use unsigned division
7937
7938 __ bind(Ldone);
7939 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7940 if (con < 0) {
7941 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7942 }
7943 %}
7944 ins_pipe( pipe_slow );
7945 %}
7946
7947 // Remainder Register Long (remainder fit into 32 bits)
7948 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7949 match(Set dst (ModL dst imm));
7950 effect( TEMP tmp, TEMP tmp2, KILL cr );
7951 ins_cost(1000);
7952 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7953 "CMP $tmp,EDX\n\t"
7954 "JA,s fast\n\t"
7955 "MOV $tmp2,EAX\n\t"
7956 "MOV EAX,EDX\n\t"
7957 "MOV EDX,0\n\t"
7958 "JLE,s pos\n\t"
7959 "LNEG EAX : $tmp2\n\t"
7960 "DIV $tmp # unsigned division\n\t"
7961 "MOV EAX,$tmp2\n\t"
7962 "DIV $tmp\n\t"
7963 "NEG EDX\n\t"
7964 "JMP,s done\n"
7965 "pos:\n\t"
7966 "DIV $tmp\n\t"
7967 "MOV EAX,$tmp2\n"
7968 "fast:\n\t"
7969 "DIV $tmp\n"
7970 "done:\n\t"
7971 "MOV EAX,EDX\n\t"
7972 "SAR EDX,31\n\t" %}
7973 ins_encode %{
7974 int con = (int)$imm$$constant;
7975 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7976 int pcon = (con > 0) ? con : -con;
7977 Label Lfast, Lpos, Ldone;
7978
7979 __ movl($tmp$$Register, pcon);
7980 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7981 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7982
7983 __ movl($tmp2$$Register, $dst$$Register); // save
7984 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7985 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7986 __ jccb(Assembler::lessEqual, Lpos); // result is positive
7987
7988 // Negative dividend.
7989 // convert value to positive to use unsigned division
7990 __ lneg($dst$$Register, $tmp2$$Register);
7991 __ divl($tmp$$Register);
7992 __ movl($dst$$Register, $tmp2$$Register);
7993 __ divl($tmp$$Register);
7994 // revert remainder back to negative
7995 __ negl(HIGH_FROM_LOW($dst$$Register));
7996 __ jmpb(Ldone);
7997
7998 __ bind(Lpos);
7999 __ divl($tmp$$Register);
8000 __ movl($dst$$Register, $tmp2$$Register);
8001
8002 __ bind(Lfast);
8003 // fast path: src is positive
8004 __ divl($tmp$$Register);
8005
8006 __ bind(Ldone);
8007 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8008 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8009
8010 %}
8011 ins_pipe( pipe_slow );
8012 %}
8013
8014 // Integer Shift Instructions
8015 // Shift Left by one
8016 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8017 match(Set dst (LShiftI dst shift));
8018 effect(KILL cr);
8019
8020 size(2);
8021 format %{ "SHL $dst,$shift" %}
8022 opcode(0xD1, 0x4); /* D1 /4 */
8023 ins_encode( OpcP, RegOpc( dst ) );
8024 ins_pipe( ialu_reg );
8025 %}
8026
8027 // Shift Left by 8-bit immediate
8028 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8029 match(Set dst (LShiftI dst shift));
8030 effect(KILL cr);
8031
8032 size(3);
8033 format %{ "SHL $dst,$shift" %}
8034 opcode(0xC1, 0x4); /* C1 /4 ib */
8035 ins_encode( RegOpcImm( dst, shift) );
8036 ins_pipe( ialu_reg );
8037 %}
8038
8039 // Shift Left by variable
8040 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8041 match(Set dst (LShiftI dst shift));
8042 effect(KILL cr);
8043
8044 size(2);
8045 format %{ "SHL $dst,$shift" %}
8046 opcode(0xD3, 0x4); /* D3 /4 */
8047 ins_encode( OpcP, RegOpc( dst ) );
8048 ins_pipe( ialu_reg_reg );
8049 %}
8050
8051 // Arithmetic shift right by one
8052 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8053 match(Set dst (RShiftI dst shift));
8054 effect(KILL cr);
8055
8056 size(2);
8057 format %{ "SAR $dst,$shift" %}
8058 opcode(0xD1, 0x7); /* D1 /7 */
8059 ins_encode( OpcP, RegOpc( dst ) );
8060 ins_pipe( ialu_reg );
8061 %}
8062
8063 // Arithmetic shift right by one
8064 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
8065 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8066 effect(KILL cr);
8067 format %{ "SAR $dst,$shift" %}
8068 opcode(0xD1, 0x7); /* D1 /7 */
8069 ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8070 ins_pipe( ialu_mem_imm );
8071 %}
8072
8073 // Arithmetic Shift Right by 8-bit immediate
8074 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8075 match(Set dst (RShiftI dst shift));
8076 effect(KILL cr);
8077
8078 size(3);
8079 format %{ "SAR $dst,$shift" %}
8080 opcode(0xC1, 0x7); /* C1 /7 ib */
8081 ins_encode( RegOpcImm( dst, shift ) );
8082 ins_pipe( ialu_mem_imm );
8083 %}
8084
8085 // Arithmetic Shift Right by 8-bit immediate
8086 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8087 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8088 effect(KILL cr);
8089
8090 format %{ "SAR $dst,$shift" %}
8091 opcode(0xC1, 0x7); /* C1 /7 ib */
8092 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8093 ins_pipe( ialu_mem_imm );
8094 %}
8095
8096 // Arithmetic Shift Right by variable
8097 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8098 match(Set dst (RShiftI dst shift));
8099 effect(KILL cr);
8100
8101 size(2);
8102 format %{ "SAR $dst,$shift" %}
8103 opcode(0xD3, 0x7); /* D3 /7 */
8104 ins_encode( OpcP, RegOpc( dst ) );
8105 ins_pipe( ialu_reg_reg );
8106 %}
8107
8108 // Logical shift right by one
8109 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8110 match(Set dst (URShiftI dst shift));
8111 effect(KILL cr);
8112
8113 size(2);
8114 format %{ "SHR $dst,$shift" %}
8115 opcode(0xD1, 0x5); /* D1 /5 */
8116 ins_encode( OpcP, RegOpc( dst ) );
8117 ins_pipe( ialu_reg );
8118 %}
8119
8120 // Logical Shift Right by 8-bit immediate
8121 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8122 match(Set dst (URShiftI dst shift));
8123 effect(KILL cr);
8124
8125 size(3);
8126 format %{ "SHR $dst,$shift" %}
8127 opcode(0xC1, 0x5); /* C1 /5 ib */
8128 ins_encode( RegOpcImm( dst, shift) );
8129 ins_pipe( ialu_reg );
8130 %}
8131
8132
8133 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8134 // This idiom is used by the compiler for the i2b bytecode.
8135 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8136 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8137
8138 size(3);
8139 format %{ "MOVSX $dst,$src :8" %}
8140 ins_encode %{
8141 __ movsbl($dst$$Register, $src$$Register);
8142 %}
8143 ins_pipe(ialu_reg_reg);
8144 %}
8145
8146 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8147 // This idiom is used by the compiler the i2s bytecode.
8148 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8149 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8150
8151 size(3);
8152 format %{ "MOVSX $dst,$src :16" %}
8153 ins_encode %{
8154 __ movswl($dst$$Register, $src$$Register);
8155 %}
8156 ins_pipe(ialu_reg_reg);
8157 %}
8158
8159
8160 // Logical Shift Right by variable
8161 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8162 match(Set dst (URShiftI dst shift));
8163 effect(KILL cr);
8164
8165 size(2);
8166 format %{ "SHR $dst,$shift" %}
8167 opcode(0xD3, 0x5); /* D3 /5 */
8168 ins_encode( OpcP, RegOpc( dst ) );
8169 ins_pipe( ialu_reg_reg );
8170 %}
8171
8172
8173 //----------Logical Instructions-----------------------------------------------
8174 //----------Integer Logical Instructions---------------------------------------
8175 // And Instructions
8176 // And Register with Register
8177 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8178 match(Set dst (AndI dst src));
8179 effect(KILL cr);
8180
8181 size(2);
8182 format %{ "AND $dst,$src" %}
8183 opcode(0x23);
8184 ins_encode( OpcP, RegReg( dst, src) );
8185 ins_pipe( ialu_reg_reg );
8186 %}
8187
8188 // And Register with Immediate
8189 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8190 match(Set dst (AndI dst src));
8191 effect(KILL cr);
8192
8193 format %{ "AND $dst,$src" %}
8194 opcode(0x81,0x04); /* Opcode 81 /4 */
8195 // ins_encode( RegImm( dst, src) );
8196 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8197 ins_pipe( ialu_reg );
8198 %}
8199
8200 // And Register with Memory
8201 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8202 match(Set dst (AndI dst (LoadI src)));
8203 effect(KILL cr);
8204
8205 ins_cost(125);
8206 format %{ "AND $dst,$src" %}
8207 opcode(0x23);
8208 ins_encode( OpcP, RegMem( dst, src) );
8209 ins_pipe( ialu_reg_mem );
8210 %}
8211
8212 // And Memory with Register
8213 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8214 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8215 effect(KILL cr);
8216
8217 ins_cost(150);
8218 format %{ "AND $dst,$src" %}
8219 opcode(0x21); /* Opcode 21 /r */
8220 ins_encode( OpcP, RegMem( src, dst ) );
8221 ins_pipe( ialu_mem_reg );
8222 %}
8223
8224 // And Memory with Immediate
8225 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8226 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8227 effect(KILL cr);
8228
8229 ins_cost(125);
8230 format %{ "AND $dst,$src" %}
8231 opcode(0x81, 0x4); /* Opcode 81 /4 id */
8232 // ins_encode( MemImm( dst, src) );
8233 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8234 ins_pipe( ialu_mem_imm );
8235 %}
8236
8237 // BMI1 instructions
8238 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8239 match(Set dst (AndI (XorI src1 minus_1) src2));
8240 predicate(UseBMI1Instructions);
8241 effect(KILL cr);
8242
8243 format %{ "ANDNL $dst, $src1, $src2" %}
8244
8245 ins_encode %{
8246 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8247 %}
8248 ins_pipe(ialu_reg);
8249 %}
8250
8251 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8252 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8253 predicate(UseBMI1Instructions);
8254 effect(KILL cr);
8255
8256 ins_cost(125);
8257 format %{ "ANDNL $dst, $src1, $src2" %}
8258
8259 ins_encode %{
8260 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8261 %}
8262 ins_pipe(ialu_reg_mem);
8263 %}
8264
8265 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
8266 match(Set dst (AndI (SubI imm_zero src) src));
8267 predicate(UseBMI1Instructions);
8268 effect(KILL cr);
8269
8270 format %{ "BLSIL $dst, $src" %}
8271
8272 ins_encode %{
8273 __ blsil($dst$$Register, $src$$Register);
8274 %}
8275 ins_pipe(ialu_reg);
8276 %}
8277
8278 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
8279 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8280 predicate(UseBMI1Instructions);
8281 effect(KILL cr);
8282
8283 ins_cost(125);
8284 format %{ "BLSIL $dst, $src" %}
8285
8286 ins_encode %{
8287 __ blsil($dst$$Register, $src$$Address);
8288 %}
8289 ins_pipe(ialu_reg_mem);
8290 %}
8291
8292 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8293 %{
8294 match(Set dst (XorI (AddI src minus_1) src));
8295 predicate(UseBMI1Instructions);
8296 effect(KILL cr);
8297
8298 format %{ "BLSMSKL $dst, $src" %}
8299
8300 ins_encode %{
8301 __ blsmskl($dst$$Register, $src$$Register);
8302 %}
8303
8304 ins_pipe(ialu_reg);
8305 %}
8306
8307 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8308 %{
8309 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8310 predicate(UseBMI1Instructions);
8311 effect(KILL cr);
8312
8313 ins_cost(125);
8314 format %{ "BLSMSKL $dst, $src" %}
8315
8316 ins_encode %{
8317 __ blsmskl($dst$$Register, $src$$Address);
8318 %}
8319
8320 ins_pipe(ialu_reg_mem);
8321 %}
8322
8323 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8324 %{
8325 match(Set dst (AndI (AddI src minus_1) src) );
8326 predicate(UseBMI1Instructions);
8327 effect(KILL cr);
8328
8329 format %{ "BLSRL $dst, $src" %}
8330
8331 ins_encode %{
8332 __ blsrl($dst$$Register, $src$$Register);
8333 %}
8334
8335 ins_pipe(ialu_reg);
8336 %}
8337
8338 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8339 %{
8340 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8341 predicate(UseBMI1Instructions);
8342 effect(KILL cr);
8343
8344 ins_cost(125);
8345 format %{ "BLSRL $dst, $src" %}
8346
8347 ins_encode %{
8348 __ blsrl($dst$$Register, $src$$Address);
8349 %}
8350
8351 ins_pipe(ialu_reg_mem);
8352 %}
8353
8354 // Or Instructions
8355 // Or Register with Register
8356 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8357 match(Set dst (OrI dst src));
8358 effect(KILL cr);
8359
8360 size(2);
8361 format %{ "OR $dst,$src" %}
8362 opcode(0x0B);
8363 ins_encode( OpcP, RegReg( dst, src) );
8364 ins_pipe( ialu_reg_reg );
8365 %}
8366
8367 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8368 match(Set dst (OrI dst (CastP2X src)));
8369 effect(KILL cr);
8370
8371 size(2);
8372 format %{ "OR $dst,$src" %}
8373 opcode(0x0B);
8374 ins_encode( OpcP, RegReg( dst, src) );
8375 ins_pipe( ialu_reg_reg );
8376 %}
8377
8378
8379 // Or Register with Immediate
8380 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8381 match(Set dst (OrI dst src));
8382 effect(KILL cr);
8383
8384 format %{ "OR $dst,$src" %}
8385 opcode(0x81,0x01); /* Opcode 81 /1 id */
8386 // ins_encode( RegImm( dst, src) );
8387 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8388 ins_pipe( ialu_reg );
8389 %}
8390
8391 // Or Register with Memory
8392 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8393 match(Set dst (OrI dst (LoadI src)));
8394 effect(KILL cr);
8395
8396 ins_cost(125);
8397 format %{ "OR $dst,$src" %}
8398 opcode(0x0B);
8399 ins_encode( OpcP, RegMem( dst, src) );
8400 ins_pipe( ialu_reg_mem );
8401 %}
8402
8403 // Or Memory with Register
8404 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8405 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8406 effect(KILL cr);
8407
8408 ins_cost(150);
8409 format %{ "OR $dst,$src" %}
8410 opcode(0x09); /* Opcode 09 /r */
8411 ins_encode( OpcP, RegMem( src, dst ) );
8412 ins_pipe( ialu_mem_reg );
8413 %}
8414
8415 // Or Memory with Immediate
8416 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8417 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8418 effect(KILL cr);
8419
8420 ins_cost(125);
8421 format %{ "OR $dst,$src" %}
8422 opcode(0x81,0x1); /* Opcode 81 /1 id */
8423 // ins_encode( MemImm( dst, src) );
8424 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8425 ins_pipe( ialu_mem_imm );
8426 %}
8427
8428 // ROL/ROR
8429 // ROL expand
8430 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8431 effect(USE_DEF dst, USE shift, KILL cr);
8432
8433 format %{ "ROL $dst, $shift" %}
8434 opcode(0xD1, 0x0); /* Opcode D1 /0 */
8435 ins_encode( OpcP, RegOpc( dst ));
8436 ins_pipe( ialu_reg );
8437 %}
8438
8439 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8440 effect(USE_DEF dst, USE shift, KILL cr);
8441
8442 format %{ "ROL $dst, $shift" %}
8443 opcode(0xC1, 0x0); /*Opcode /C1 /0 */
8444 ins_encode( RegOpcImm(dst, shift) );
8445 ins_pipe(ialu_reg);
8446 %}
8447
8448 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8449 effect(USE_DEF dst, USE shift, KILL cr);
8450
8451 format %{ "ROL $dst, $shift" %}
8452 opcode(0xD3, 0x0); /* Opcode D3 /0 */
8453 ins_encode(OpcP, RegOpc(dst));
8454 ins_pipe( ialu_reg_reg );
8455 %}
8456 // end of ROL expand
8457
8458 // ROL 32bit by one once
8459 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8460 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8461
8462 expand %{
8463 rolI_eReg_imm1(dst, lshift, cr);
8464 %}
8465 %}
8466
8467 // ROL 32bit var by imm8 once
8468 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8469 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8470 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8471
8472 expand %{
8473 rolI_eReg_imm8(dst, lshift, cr);
8474 %}
8475 %}
8476
8477 // ROL 32bit var by var once
8478 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8479 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8480
8481 expand %{
8482 rolI_eReg_CL(dst, shift, cr);
8483 %}
8484 %}
8485
8486 // ROL 32bit var by var once
8487 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8488 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8489
8490 expand %{
8491 rolI_eReg_CL(dst, shift, cr);
8492 %}
8493 %}
8494
8495 // ROR expand
8496 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8497 effect(USE_DEF dst, USE shift, KILL cr);
8498
8499 format %{ "ROR $dst, $shift" %}
8500 opcode(0xD1,0x1); /* Opcode D1 /1 */
8501 ins_encode( OpcP, RegOpc( dst ) );
8502 ins_pipe( ialu_reg );
8503 %}
8504
8505 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8506 effect (USE_DEF dst, USE shift, KILL cr);
8507
8508 format %{ "ROR $dst, $shift" %}
8509 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8510 ins_encode( RegOpcImm(dst, shift) );
8511 ins_pipe( ialu_reg );
8512 %}
8513
8514 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8515 effect(USE_DEF dst, USE shift, KILL cr);
8516
8517 format %{ "ROR $dst, $shift" %}
8518 opcode(0xD3, 0x1); /* Opcode D3 /1 */
8519 ins_encode(OpcP, RegOpc(dst));
8520 ins_pipe( ialu_reg_reg );
8521 %}
8522 // end of ROR expand
8523
8524 // ROR right once
8525 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8526 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8527
8528 expand %{
8529 rorI_eReg_imm1(dst, rshift, cr);
8530 %}
8531 %}
8532
8533 // ROR 32bit by immI8 once
8534 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8535 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8536 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8537
8538 expand %{
8539 rorI_eReg_imm8(dst, rshift, cr);
8540 %}
8541 %}
8542
8543 // ROR 32bit var by var once
8544 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8545 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8546
8547 expand %{
8548 rorI_eReg_CL(dst, shift, cr);
8549 %}
8550 %}
8551
8552 // ROR 32bit var by var once
8553 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8554 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8555
8556 expand %{
8557 rorI_eReg_CL(dst, shift, cr);
8558 %}
8559 %}
8560
8561 // Xor Instructions
8562 // Xor Register with Register
8563 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8564 match(Set dst (XorI dst src));
8565 effect(KILL cr);
8566
8567 size(2);
8568 format %{ "XOR $dst,$src" %}
8569 opcode(0x33);
8570 ins_encode( OpcP, RegReg( dst, src) );
8571 ins_pipe( ialu_reg_reg );
8572 %}
8573
8574 // Xor Register with Immediate -1
8575 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8576 match(Set dst (XorI dst imm));
8577
8578 size(2);
8579 format %{ "NOT $dst" %}
8580 ins_encode %{
8581 __ notl($dst$$Register);
8582 %}
8583 ins_pipe( ialu_reg );
8584 %}
8585
8586 // Xor Register with Immediate
8587 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8588 match(Set dst (XorI dst src));
8589 effect(KILL cr);
8590
8591 format %{ "XOR $dst,$src" %}
8592 opcode(0x81,0x06); /* Opcode 81 /6 id */
8593 // ins_encode( RegImm( dst, src) );
8594 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8595 ins_pipe( ialu_reg );
8596 %}
8597
8598 // Xor Register with Memory
8599 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8600 match(Set dst (XorI dst (LoadI src)));
8601 effect(KILL cr);
8602
8603 ins_cost(125);
8604 format %{ "XOR $dst,$src" %}
8605 opcode(0x33);
8606 ins_encode( OpcP, RegMem(dst, src) );
8607 ins_pipe( ialu_reg_mem );
8608 %}
8609
8610 // Xor Memory with Register
8611 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8612 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8613 effect(KILL cr);
8614
8615 ins_cost(150);
8616 format %{ "XOR $dst,$src" %}
8617 opcode(0x31); /* Opcode 31 /r */
8618 ins_encode( OpcP, RegMem( src, dst ) );
8619 ins_pipe( ialu_mem_reg );
8620 %}
8621
8622 // Xor Memory with Immediate
8623 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8624 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8625 effect(KILL cr);
8626
8627 ins_cost(125);
8628 format %{ "XOR $dst,$src" %}
8629 opcode(0x81,0x6); /* Opcode 81 /6 id */
8630 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8631 ins_pipe( ialu_mem_imm );
8632 %}
8633
8634 //----------Convert Int to Boolean---------------------------------------------
8635
8636 instruct movI_nocopy(rRegI dst, rRegI src) %{
8637 effect( DEF dst, USE src );
8638 format %{ "MOV $dst,$src" %}
8639 ins_encode( enc_Copy( dst, src) );
8640 ins_pipe( ialu_reg_reg );
8641 %}
8642
8643 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8644 effect( USE_DEF dst, USE src, KILL cr );
8645
8646 size(4);
8647 format %{ "NEG $dst\n\t"
8648 "ADC $dst,$src" %}
8649 ins_encode( neg_reg(dst),
8650 OpcRegReg(0x13,dst,src) );
8651 ins_pipe( ialu_reg_reg_long );
8652 %}
8653
8654 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8655 match(Set dst (Conv2B src));
8656
8657 expand %{
8658 movI_nocopy(dst,src);
8659 ci2b(dst,src,cr);
8660 %}
8661 %}
8662
8663 instruct movP_nocopy(rRegI dst, eRegP src) %{
8664 effect( DEF dst, USE src );
8665 format %{ "MOV $dst,$src" %}
8666 ins_encode( enc_Copy( dst, src) );
8667 ins_pipe( ialu_reg_reg );
8668 %}
8669
8670 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8671 effect( USE_DEF dst, USE src, KILL cr );
8672 format %{ "NEG $dst\n\t"
8673 "ADC $dst,$src" %}
8674 ins_encode( neg_reg(dst),
8675 OpcRegReg(0x13,dst,src) );
8676 ins_pipe( ialu_reg_reg_long );
8677 %}
8678
8679 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8680 match(Set dst (Conv2B src));
8681
8682 expand %{
8683 movP_nocopy(dst,src);
8684 cp2b(dst,src,cr);
8685 %}
8686 %}
8687
8688 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8689 match(Set dst (CmpLTMask p q));
8690 effect(KILL cr);
8691 ins_cost(400);
8692
8693 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8694 format %{ "XOR $dst,$dst\n\t"
8695 "CMP $p,$q\n\t"
8696 "SETlt $dst\n\t"
8697 "NEG $dst" %}
8698 ins_encode %{
8699 Register Rp = $p$$Register;
8700 Register Rq = $q$$Register;
8701 Register Rd = $dst$$Register;
8702 Label done;
8703 __ xorl(Rd, Rd);
8704 __ cmpl(Rp, Rq);
8705 __ setb(Assembler::less, Rd);
8706 __ negl(Rd);
8707 %}
8708
8709 ins_pipe(pipe_slow);
8710 %}
8711
8712 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
8713 match(Set dst (CmpLTMask dst zero));
8714 effect(DEF dst, KILL cr);
8715 ins_cost(100);
8716
8717 format %{ "SAR $dst,31\t# cmpLTMask0" %}
8718 ins_encode %{
8719 __ sarl($dst$$Register, 31);
8720 %}
8721 ins_pipe(ialu_reg);
8722 %}
8723
8724 /* better to save a register than avoid a branch */
8725 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8726 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8727 effect(KILL cr);
8728 ins_cost(400);
8729 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t"
8730 "JGE done\n\t"
8731 "ADD $p,$y\n"
8732 "done: " %}
8733 ins_encode %{
8734 Register Rp = $p$$Register;
8735 Register Rq = $q$$Register;
8736 Register Ry = $y$$Register;
8737 Label done;
8738 __ subl(Rp, Rq);
8739 __ jccb(Assembler::greaterEqual, done);
8740 __ addl(Rp, Ry);
8741 __ bind(done);
8742 %}
8743
8744 ins_pipe(pipe_cmplt);
8745 %}
8746
8747 /* better to save a register than avoid a branch */
8748 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8749 match(Set y (AndI (CmpLTMask p q) y));
8750 effect(KILL cr);
8751
8752 ins_cost(300);
8753
8754 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t"
8755 "JLT done\n\t"
8756 "XORL $y, $y\n"
8757 "done: " %}
8758 ins_encode %{
8759 Register Rp = $p$$Register;
8760 Register Rq = $q$$Register;
8761 Register Ry = $y$$Register;
8762 Label done;
8763 __ cmpl(Rp, Rq);
8764 __ jccb(Assembler::less, done);
8765 __ xorl(Ry, Ry);
8766 __ bind(done);
8767 %}
8768
8769 ins_pipe(pipe_cmplt);
8770 %}
8771
8772 /* If I enable this, I encourage spilling in the inner loop of compress.
8773 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8774 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8775 */
8776 //----------Overflow Math Instructions-----------------------------------------
8777
8778 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8779 %{
8780 match(Set cr (OverflowAddI op1 op2));
8781 effect(DEF cr, USE_KILL op1, USE op2);
8782
8783 format %{ "ADD $op1, $op2\t# overflow check int" %}
8784
8785 ins_encode %{
8786 __ addl($op1$$Register, $op2$$Register);
8787 %}
8788 ins_pipe(ialu_reg_reg);
8789 %}
8790
8791 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8792 %{
8793 match(Set cr (OverflowAddI op1 op2));
8794 effect(DEF cr, USE_KILL op1, USE op2);
8795
8796 format %{ "ADD $op1, $op2\t# overflow check int" %}
8797
8798 ins_encode %{
8799 __ addl($op1$$Register, $op2$$constant);
8800 %}
8801 ins_pipe(ialu_reg_reg);
8802 %}
8803
8804 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8805 %{
8806 match(Set cr (OverflowSubI op1 op2));
8807
8808 format %{ "CMP $op1, $op2\t# overflow check int" %}
8809 ins_encode %{
8810 __ cmpl($op1$$Register, $op2$$Register);
8811 %}
8812 ins_pipe(ialu_reg_reg);
8813 %}
8814
8815 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8816 %{
8817 match(Set cr (OverflowSubI op1 op2));
8818
8819 format %{ "CMP $op1, $op2\t# overflow check int" %}
8820 ins_encode %{
8821 __ cmpl($op1$$Register, $op2$$constant);
8822 %}
8823 ins_pipe(ialu_reg_reg);
8824 %}
8825
8826 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
8827 %{
8828 match(Set cr (OverflowSubI zero op2));
8829 effect(DEF cr, USE_KILL op2);
8830
8831 format %{ "NEG $op2\t# overflow check int" %}
8832 ins_encode %{
8833 __ negl($op2$$Register);
8834 %}
8835 ins_pipe(ialu_reg_reg);
8836 %}
8837
8838 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8839 %{
8840 match(Set cr (OverflowMulI op1 op2));
8841 effect(DEF cr, USE_KILL op1, USE op2);
8842
8843 format %{ "IMUL $op1, $op2\t# overflow check int" %}
8844 ins_encode %{
8845 __ imull($op1$$Register, $op2$$Register);
8846 %}
8847 ins_pipe(ialu_reg_reg_alu0);
8848 %}
8849
8850 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8851 %{
8852 match(Set cr (OverflowMulI op1 op2));
8853 effect(DEF cr, TEMP tmp, USE op1, USE op2);
8854
8855 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %}
8856 ins_encode %{
8857 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8858 %}
8859 ins_pipe(ialu_reg_reg_alu0);
8860 %}
8861
8862 // Integer Absolute Instructions
8863 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8864 %{
8865 match(Set dst (AbsI src));
8866 effect(TEMP dst, TEMP tmp, KILL cr);
8867 format %{ "movl $tmp, $src\n\t"
8868 "sarl $tmp, 31\n\t"
8869 "movl $dst, $src\n\t"
8870 "xorl $dst, $tmp\n\t"
8871 "subl $dst, $tmp\n"
8872 %}
8873 ins_encode %{
8874 __ movl($tmp$$Register, $src$$Register);
8875 __ sarl($tmp$$Register, 31);
8876 __ movl($dst$$Register, $src$$Register);
8877 __ xorl($dst$$Register, $tmp$$Register);
8878 __ subl($dst$$Register, $tmp$$Register);
8879 %}
8880
8881 ins_pipe(ialu_reg_reg);
8882 %}
8883
8884 //----------Long Instructions------------------------------------------------
8885 // Add Long Register with Register
8886 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8887 match(Set dst (AddL dst src));
8888 effect(KILL cr);
8889 ins_cost(200);
8890 format %{ "ADD $dst.lo,$src.lo\n\t"
8891 "ADC $dst.hi,$src.hi" %}
8892 opcode(0x03, 0x13);
8893 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8894 ins_pipe( ialu_reg_reg_long );
8895 %}
8896
8897 // Add Long Register with Immediate
8898 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8899 match(Set dst (AddL dst src));
8900 effect(KILL cr);
8901 format %{ "ADD $dst.lo,$src.lo\n\t"
8902 "ADC $dst.hi,$src.hi" %}
8903 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */
8904 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8905 ins_pipe( ialu_reg_long );
8906 %}
8907
8908 // Add Long Register with Memory
8909 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8910 match(Set dst (AddL dst (LoadL mem)));
8911 effect(KILL cr);
8912 ins_cost(125);
8913 format %{ "ADD $dst.lo,$mem\n\t"
8914 "ADC $dst.hi,$mem+4" %}
8915 opcode(0x03, 0x13);
8916 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8917 ins_pipe( ialu_reg_long_mem );
8918 %}
8919
8920 // Subtract Long Register with Register.
8921 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8922 match(Set dst (SubL dst src));
8923 effect(KILL cr);
8924 ins_cost(200);
8925 format %{ "SUB $dst.lo,$src.lo\n\t"
8926 "SBB $dst.hi,$src.hi" %}
8927 opcode(0x2B, 0x1B);
8928 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8929 ins_pipe( ialu_reg_reg_long );
8930 %}
8931
8932 // Subtract Long Register with Immediate
8933 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8934 match(Set dst (SubL dst src));
8935 effect(KILL cr);
8936 format %{ "SUB $dst.lo,$src.lo\n\t"
8937 "SBB $dst.hi,$src.hi" %}
8938 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */
8939 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8940 ins_pipe( ialu_reg_long );
8941 %}
8942
8943 // Subtract Long Register with Memory
8944 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8945 match(Set dst (SubL dst (LoadL mem)));
8946 effect(KILL cr);
8947 ins_cost(125);
8948 format %{ "SUB $dst.lo,$mem\n\t"
8949 "SBB $dst.hi,$mem+4" %}
8950 opcode(0x2B, 0x1B);
8951 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8952 ins_pipe( ialu_reg_long_mem );
8953 %}
8954
8955 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8956 match(Set dst (SubL zero dst));
8957 effect(KILL cr);
8958 ins_cost(300);
8959 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %}
8960 ins_encode( neg_long(dst) );
8961 ins_pipe( ialu_reg_reg_long );
8962 %}
8963
8964 // And Long Register with Register
8965 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8966 match(Set dst (AndL dst src));
8967 effect(KILL cr);
8968 format %{ "AND $dst.lo,$src.lo\n\t"
8969 "AND $dst.hi,$src.hi" %}
8970 opcode(0x23,0x23);
8971 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8972 ins_pipe( ialu_reg_reg_long );
8973 %}
8974
8975 // And Long Register with Immediate
8976 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8977 match(Set dst (AndL dst src));
8978 effect(KILL cr);
8979 format %{ "AND $dst.lo,$src.lo\n\t"
8980 "AND $dst.hi,$src.hi" %}
8981 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */
8982 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8983 ins_pipe( ialu_reg_long );
8984 %}
8985
8986 // And Long Register with Memory
8987 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8988 match(Set dst (AndL dst (LoadL mem)));
8989 effect(KILL cr);
8990 ins_cost(125);
8991 format %{ "AND $dst.lo,$mem\n\t"
8992 "AND $dst.hi,$mem+4" %}
8993 opcode(0x23, 0x23);
8994 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8995 ins_pipe( ialu_reg_long_mem );
8996 %}
8997
8998 // BMI1 instructions
8999 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9000 match(Set dst (AndL (XorL src1 minus_1) src2));
9001 predicate(UseBMI1Instructions);
9002 effect(KILL cr, TEMP dst);
9003
9004 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t"
9005 "ANDNL $dst.hi, $src1.hi, $src2.hi"
9006 %}
9007
9008 ins_encode %{
9009 Register Rdst = $dst$$Register;
9010 Register Rsrc1 = $src1$$Register;
9011 Register Rsrc2 = $src2$$Register;
9012 __ andnl(Rdst, Rsrc1, Rsrc2);
9013 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9014 %}
9015 ins_pipe(ialu_reg_reg_long);
9016 %}
9017
9018 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9019 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9020 predicate(UseBMI1Instructions);
9021 effect(KILL cr, TEMP dst);
9022
9023 ins_cost(125);
9024 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t"
9025 "ANDNL $dst.hi, $src1.hi, $src2+4"
9026 %}
9027
9028 ins_encode %{
9029 Register Rdst = $dst$$Register;
9030 Register Rsrc1 = $src1$$Register;
9031 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9032
9033 __ andnl(Rdst, Rsrc1, $src2$$Address);
9034 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9035 %}
9036 ins_pipe(ialu_reg_mem);
9037 %}
9038
9039 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9040 match(Set dst (AndL (SubL imm_zero src) src));
9041 predicate(UseBMI1Instructions);
9042 effect(KILL cr, TEMP dst);
9043
9044 format %{ "MOVL $dst.hi, 0\n\t"
9045 "BLSIL $dst.lo, $src.lo\n\t"
9046 "JNZ done\n\t"
9047 "BLSIL $dst.hi, $src.hi\n"
9048 "done:"
9049 %}
9050
9051 ins_encode %{
9052 Label done;
9053 Register Rdst = $dst$$Register;
9054 Register Rsrc = $src$$Register;
9055 __ movl(HIGH_FROM_LOW(Rdst), 0);
9056 __ blsil(Rdst, Rsrc);
9057 __ jccb(Assembler::notZero, done);
9058 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9059 __ bind(done);
9060 %}
9061 ins_pipe(ialu_reg);
9062 %}
9063
9064 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9065 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9066 predicate(UseBMI1Instructions);
9067 effect(KILL cr, TEMP dst);
9068
9069 ins_cost(125);
9070 format %{ "MOVL $dst.hi, 0\n\t"
9071 "BLSIL $dst.lo, $src\n\t"
9072 "JNZ done\n\t"
9073 "BLSIL $dst.hi, $src+4\n"
9074 "done:"
9075 %}
9076
9077 ins_encode %{
9078 Label done;
9079 Register Rdst = $dst$$Register;
9080 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9081
9082 __ movl(HIGH_FROM_LOW(Rdst), 0);
9083 __ blsil(Rdst, $src$$Address);
9084 __ jccb(Assembler::notZero, done);
9085 __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9086 __ bind(done);
9087 %}
9088 ins_pipe(ialu_reg_mem);
9089 %}
9090
9091 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9092 %{
9093 match(Set dst (XorL (AddL src minus_1) src));
9094 predicate(UseBMI1Instructions);
9095 effect(KILL cr, TEMP dst);
9096
9097 format %{ "MOVL $dst.hi, 0\n\t"
9098 "BLSMSKL $dst.lo, $src.lo\n\t"
9099 "JNC done\n\t"
9100 "BLSMSKL $dst.hi, $src.hi\n"
9101 "done:"
9102 %}
9103
9104 ins_encode %{
9105 Label done;
9106 Register Rdst = $dst$$Register;
9107 Register Rsrc = $src$$Register;
9108 __ movl(HIGH_FROM_LOW(Rdst), 0);
9109 __ blsmskl(Rdst, Rsrc);
9110 __ jccb(Assembler::carryClear, done);
9111 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9112 __ bind(done);
9113 %}
9114
9115 ins_pipe(ialu_reg);
9116 %}
9117
9118 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9119 %{
9120 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9121 predicate(UseBMI1Instructions);
9122 effect(KILL cr, TEMP dst);
9123
9124 ins_cost(125);
9125 format %{ "MOVL $dst.hi, 0\n\t"
9126 "BLSMSKL $dst.lo, $src\n\t"
9127 "JNC done\n\t"
9128 "BLSMSKL $dst.hi, $src+4\n"
9129 "done:"
9130 %}
9131
9132 ins_encode %{
9133 Label done;
9134 Register Rdst = $dst$$Register;
9135 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9136
9137 __ movl(HIGH_FROM_LOW(Rdst), 0);
9138 __ blsmskl(Rdst, $src$$Address);
9139 __ jccb(Assembler::carryClear, done);
9140 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9141 __ bind(done);
9142 %}
9143
9144 ins_pipe(ialu_reg_mem);
9145 %}
9146
9147 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9148 %{
9149 match(Set dst (AndL (AddL src minus_1) src) );
9150 predicate(UseBMI1Instructions);
9151 effect(KILL cr, TEMP dst);
9152
9153 format %{ "MOVL $dst.hi, $src.hi\n\t"
9154 "BLSRL $dst.lo, $src.lo\n\t"
9155 "JNC done\n\t"
9156 "BLSRL $dst.hi, $src.hi\n"
9157 "done:"
9158 %}
9159
9160 ins_encode %{
9161 Label done;
9162 Register Rdst = $dst$$Register;
9163 Register Rsrc = $src$$Register;
9164 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9165 __ blsrl(Rdst, Rsrc);
9166 __ jccb(Assembler::carryClear, done);
9167 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9168 __ bind(done);
9169 %}
9170
9171 ins_pipe(ialu_reg);
9172 %}
9173
9174 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9175 %{
9176 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9177 predicate(UseBMI1Instructions);
9178 effect(KILL cr, TEMP dst);
9179
9180 ins_cost(125);
9181 format %{ "MOVL $dst.hi, $src+4\n\t"
9182 "BLSRL $dst.lo, $src\n\t"
9183 "JNC done\n\t"
9184 "BLSRL $dst.hi, $src+4\n"
9185 "done:"
9186 %}
9187
9188 ins_encode %{
9189 Label done;
9190 Register Rdst = $dst$$Register;
9191 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9192 __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9193 __ blsrl(Rdst, $src$$Address);
9194 __ jccb(Assembler::carryClear, done);
9195 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9196 __ bind(done);
9197 %}
9198
9199 ins_pipe(ialu_reg_mem);
9200 %}
9201
9202 // Or Long Register with Register
9203 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9204 match(Set dst (OrL dst src));
9205 effect(KILL cr);
9206 format %{ "OR $dst.lo,$src.lo\n\t"
9207 "OR $dst.hi,$src.hi" %}
9208 opcode(0x0B,0x0B);
9209 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9210 ins_pipe( ialu_reg_reg_long );
9211 %}
9212
9213 // Or Long Register with Immediate
9214 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9215 match(Set dst (OrL dst src));
9216 effect(KILL cr);
9217 format %{ "OR $dst.lo,$src.lo\n\t"
9218 "OR $dst.hi,$src.hi" %}
9219 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */
9220 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9221 ins_pipe( ialu_reg_long );
9222 %}
9223
9224 // Or Long Register with Memory
9225 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9226 match(Set dst (OrL dst (LoadL mem)));
9227 effect(KILL cr);
9228 ins_cost(125);
9229 format %{ "OR $dst.lo,$mem\n\t"
9230 "OR $dst.hi,$mem+4" %}
9231 opcode(0x0B,0x0B);
9232 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9233 ins_pipe( ialu_reg_long_mem );
9234 %}
9235
9236 // Xor Long Register with Register
9237 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9238 match(Set dst (XorL dst src));
9239 effect(KILL cr);
9240 format %{ "XOR $dst.lo,$src.lo\n\t"
9241 "XOR $dst.hi,$src.hi" %}
9242 opcode(0x33,0x33);
9243 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9244 ins_pipe( ialu_reg_reg_long );
9245 %}
9246
9247 // Xor Long Register with Immediate -1
9248 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9249 match(Set dst (XorL dst imm));
9250 format %{ "NOT $dst.lo\n\t"
9251 "NOT $dst.hi" %}
9252 ins_encode %{
9253 __ notl($dst$$Register);
9254 __ notl(HIGH_FROM_LOW($dst$$Register));
9255 %}
9256 ins_pipe( ialu_reg_long );
9257 %}
9258
9259 // Xor Long Register with Immediate
9260 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9261 match(Set dst (XorL dst src));
9262 effect(KILL cr);
9263 format %{ "XOR $dst.lo,$src.lo\n\t"
9264 "XOR $dst.hi,$src.hi" %}
9265 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */
9266 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9267 ins_pipe( ialu_reg_long );
9268 %}
9269
9270 // Xor Long Register with Memory
9271 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9272 match(Set dst (XorL dst (LoadL mem)));
9273 effect(KILL cr);
9274 ins_cost(125);
9275 format %{ "XOR $dst.lo,$mem\n\t"
9276 "XOR $dst.hi,$mem+4" %}
9277 opcode(0x33,0x33);
9278 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9279 ins_pipe( ialu_reg_long_mem );
9280 %}
9281
9282 // Shift Left Long by 1
9283 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9284 predicate(UseNewLongLShift);
9285 match(Set dst (LShiftL dst cnt));
9286 effect(KILL cr);
9287 ins_cost(100);
9288 format %{ "ADD $dst.lo,$dst.lo\n\t"
9289 "ADC $dst.hi,$dst.hi" %}
9290 ins_encode %{
9291 __ addl($dst$$Register,$dst$$Register);
9292 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9293 %}
9294 ins_pipe( ialu_reg_long );
9295 %}
9296
9297 // Shift Left Long by 2
9298 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9299 predicate(UseNewLongLShift);
9300 match(Set dst (LShiftL dst cnt));
9301 effect(KILL cr);
9302 ins_cost(100);
9303 format %{ "ADD $dst.lo,$dst.lo\n\t"
9304 "ADC $dst.hi,$dst.hi\n\t"
9305 "ADD $dst.lo,$dst.lo\n\t"
9306 "ADC $dst.hi,$dst.hi" %}
9307 ins_encode %{
9308 __ addl($dst$$Register,$dst$$Register);
9309 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9310 __ addl($dst$$Register,$dst$$Register);
9311 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9312 %}
9313 ins_pipe( ialu_reg_long );
9314 %}
9315
9316 // Shift Left Long by 3
9317 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9318 predicate(UseNewLongLShift);
9319 match(Set dst (LShiftL dst cnt));
9320 effect(KILL cr);
9321 ins_cost(100);
9322 format %{ "ADD $dst.lo,$dst.lo\n\t"
9323 "ADC $dst.hi,$dst.hi\n\t"
9324 "ADD $dst.lo,$dst.lo\n\t"
9325 "ADC $dst.hi,$dst.hi\n\t"
9326 "ADD $dst.lo,$dst.lo\n\t"
9327 "ADC $dst.hi,$dst.hi" %}
9328 ins_encode %{
9329 __ addl($dst$$Register,$dst$$Register);
9330 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9331 __ addl($dst$$Register,$dst$$Register);
9332 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9333 __ addl($dst$$Register,$dst$$Register);
9334 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9335 %}
9336 ins_pipe( ialu_reg_long );
9337 %}
9338
9339 // Shift Left Long by 1-31
9340 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9341 match(Set dst (LShiftL dst cnt));
9342 effect(KILL cr);
9343 ins_cost(200);
9344 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t"
9345 "SHL $dst.lo,$cnt" %}
9346 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */
9347 ins_encode( move_long_small_shift(dst,cnt) );
9348 ins_pipe( ialu_reg_long );
9349 %}
9350
9351 // Shift Left Long by 32-63
9352 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9353 match(Set dst (LShiftL dst cnt));
9354 effect(KILL cr);
9355 ins_cost(300);
9356 format %{ "MOV $dst.hi,$dst.lo\n"
9357 "\tSHL $dst.hi,$cnt-32\n"
9358 "\tXOR $dst.lo,$dst.lo" %}
9359 opcode(0xC1, 0x4); /* C1 /4 ib */
9360 ins_encode( move_long_big_shift_clr(dst,cnt) );
9361 ins_pipe( ialu_reg_long );
9362 %}
9363
9364 // Shift Left Long by variable
9365 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9366 match(Set dst (LShiftL dst shift));
9367 effect(KILL cr);
9368 ins_cost(500+200);
9369 size(17);
9370 format %{ "TEST $shift,32\n\t"
9371 "JEQ,s small\n\t"
9372 "MOV $dst.hi,$dst.lo\n\t"
9373 "XOR $dst.lo,$dst.lo\n"
9374 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t"
9375 "SHL $dst.lo,$shift" %}
9376 ins_encode( shift_left_long( dst, shift ) );
9377 ins_pipe( pipe_slow );
9378 %}
9379
9380 // Shift Right Long by 1-31
9381 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9382 match(Set dst (URShiftL dst cnt));
9383 effect(KILL cr);
9384 ins_cost(200);
9385 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9386 "SHR $dst.hi,$cnt" %}
9387 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */
9388 ins_encode( move_long_small_shift(dst,cnt) );
9389 ins_pipe( ialu_reg_long );
9390 %}
9391
9392 // Shift Right Long by 32-63
9393 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9394 match(Set dst (URShiftL dst cnt));
9395 effect(KILL cr);
9396 ins_cost(300);
9397 format %{ "MOV $dst.lo,$dst.hi\n"
9398 "\tSHR $dst.lo,$cnt-32\n"
9399 "\tXOR $dst.hi,$dst.hi" %}
9400 opcode(0xC1, 0x5); /* C1 /5 ib */
9401 ins_encode( move_long_big_shift_clr(dst,cnt) );
9402 ins_pipe( ialu_reg_long );
9403 %}
9404
9405 // Shift Right Long by variable
9406 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9407 match(Set dst (URShiftL dst shift));
9408 effect(KILL cr);
9409 ins_cost(600);
9410 size(17);
9411 format %{ "TEST $shift,32\n\t"
9412 "JEQ,s small\n\t"
9413 "MOV $dst.lo,$dst.hi\n\t"
9414 "XOR $dst.hi,$dst.hi\n"
9415 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9416 "SHR $dst.hi,$shift" %}
9417 ins_encode( shift_right_long( dst, shift ) );
9418 ins_pipe( pipe_slow );
9419 %}
9420
9421 // Shift Right Long by 1-31
9422 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9423 match(Set dst (RShiftL dst cnt));
9424 effect(KILL cr);
9425 ins_cost(200);
9426 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9427 "SAR $dst.hi,$cnt" %}
9428 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */
9429 ins_encode( move_long_small_shift(dst,cnt) );
9430 ins_pipe( ialu_reg_long );
9431 %}
9432
9433 // Shift Right Long by 32-63
9434 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9435 match(Set dst (RShiftL dst cnt));
9436 effect(KILL cr);
9437 ins_cost(300);
9438 format %{ "MOV $dst.lo,$dst.hi\n"
9439 "\tSAR $dst.lo,$cnt-32\n"
9440 "\tSAR $dst.hi,31" %}
9441 opcode(0xC1, 0x7); /* C1 /7 ib */
9442 ins_encode( move_long_big_shift_sign(dst,cnt) );
9443 ins_pipe( ialu_reg_long );
9444 %}
9445
9446 // Shift Right arithmetic Long by variable
9447 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9448 match(Set dst (RShiftL dst shift));
9449 effect(KILL cr);
9450 ins_cost(600);
9451 size(18);
9452 format %{ "TEST $shift,32\n\t"
9453 "JEQ,s small\n\t"
9454 "MOV $dst.lo,$dst.hi\n\t"
9455 "SAR $dst.hi,31\n"
9456 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9457 "SAR $dst.hi,$shift" %}
9458 ins_encode( shift_right_arith_long( dst, shift ) );
9459 ins_pipe( pipe_slow );
9460 %}
9461
9462
9463 //----------Double Instructions------------------------------------------------
9464 // Double Math
9465
9466 // Compare & branch
9467
9468 // P6 version of float compare, sets condition codes in EFLAGS
9469 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9470 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9471 match(Set cr (CmpD src1 src2));
9472 effect(KILL rax);
9473 ins_cost(150);
9474 format %{ "FLD $src1\n\t"
9475 "FUCOMIP ST,$src2 // P6 instruction\n\t"
9476 "JNP exit\n\t"
9477 "MOV ah,1 // saw a NaN, set CF\n\t"
9478 "SAHF\n"
9479 "exit:\tNOP // avoid branch to branch" %}
9480 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9481 ins_encode( Push_Reg_DPR(src1),
9482 OpcP, RegOpc(src2),
9483 cmpF_P6_fixup );
9484 ins_pipe( pipe_slow );
9485 %}
9486
9487 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9488 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9489 match(Set cr (CmpD src1 src2));
9490 ins_cost(150);
9491 format %{ "FLD $src1\n\t"
9492 "FUCOMIP ST,$src2 // P6 instruction" %}
9493 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9494 ins_encode( Push_Reg_DPR(src1),
9495 OpcP, RegOpc(src2));
9496 ins_pipe( pipe_slow );
9497 %}
9498
9499 // Compare & branch
9500 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9501 predicate(UseSSE<=1);
9502 match(Set cr (CmpD src1 src2));
9503 effect(KILL rax);
9504 ins_cost(200);
9505 format %{ "FLD $src1\n\t"
9506 "FCOMp $src2\n\t"
9507 "FNSTSW AX\n\t"
9508 "TEST AX,0x400\n\t"
9509 "JZ,s flags\n\t"
9510 "MOV AH,1\t# unordered treat as LT\n"
9511 "flags:\tSAHF" %}
9512 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9513 ins_encode( Push_Reg_DPR(src1),
9514 OpcP, RegOpc(src2),
9515 fpu_flags);
9516 ins_pipe( pipe_slow );
9517 %}
9518
9519 // Compare vs zero into -1,0,1
9520 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9521 predicate(UseSSE<=1);
9522 match(Set dst (CmpD3 src1 zero));
9523 effect(KILL cr, KILL rax);
9524 ins_cost(280);
9525 format %{ "FTSTD $dst,$src1" %}
9526 opcode(0xE4, 0xD9);
9527 ins_encode( Push_Reg_DPR(src1),
9528 OpcS, OpcP, PopFPU,
9529 CmpF_Result(dst));
9530 ins_pipe( pipe_slow );
9531 %}
9532
9533 // Compare into -1,0,1
9534 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9535 predicate(UseSSE<=1);
9536 match(Set dst (CmpD3 src1 src2));
9537 effect(KILL cr, KILL rax);
9538 ins_cost(300);
9539 format %{ "FCMPD $dst,$src1,$src2" %}
9540 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9541 ins_encode( Push_Reg_DPR(src1),
9542 OpcP, RegOpc(src2),
9543 CmpF_Result(dst));
9544 ins_pipe( pipe_slow );
9545 %}
9546
9547 // float compare and set condition codes in EFLAGS by XMM regs
9548 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9549 predicate(UseSSE>=2);
9550 match(Set cr (CmpD src1 src2));
9551 ins_cost(145);
9552 format %{ "UCOMISD $src1,$src2\n\t"
9553 "JNP,s exit\n\t"
9554 "PUSHF\t# saw NaN, set CF\n\t"
9555 "AND [rsp], #0xffffff2b\n\t"
9556 "POPF\n"
9557 "exit:" %}
9558 ins_encode %{
9559 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9560 emit_cmpfp_fixup(_masm);
9561 %}
9562 ins_pipe( pipe_slow );
9563 %}
9564
9565 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9566 predicate(UseSSE>=2);
9567 match(Set cr (CmpD src1 src2));
9568 ins_cost(100);
9569 format %{ "UCOMISD $src1,$src2" %}
9570 ins_encode %{
9571 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9572 %}
9573 ins_pipe( pipe_slow );
9574 %}
9575
9576 // float compare and set condition codes in EFLAGS by XMM regs
9577 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9578 predicate(UseSSE>=2);
9579 match(Set cr (CmpD src1 (LoadD src2)));
9580 ins_cost(145);
9581 format %{ "UCOMISD $src1,$src2\n\t"
9582 "JNP,s exit\n\t"
9583 "PUSHF\t# saw NaN, set CF\n\t"
9584 "AND [rsp], #0xffffff2b\n\t"
9585 "POPF\n"
9586 "exit:" %}
9587 ins_encode %{
9588 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9589 emit_cmpfp_fixup(_masm);
9590 %}
9591 ins_pipe( pipe_slow );
9592 %}
9593
9594 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9595 predicate(UseSSE>=2);
9596 match(Set cr (CmpD src1 (LoadD src2)));
9597 ins_cost(100);
9598 format %{ "UCOMISD $src1,$src2" %}
9599 ins_encode %{
9600 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9601 %}
9602 ins_pipe( pipe_slow );
9603 %}
9604
9605 // Compare into -1,0,1 in XMM
9606 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9607 predicate(UseSSE>=2);
9608 match(Set dst (CmpD3 src1 src2));
9609 effect(KILL cr);
9610 ins_cost(255);
9611 format %{ "UCOMISD $src1, $src2\n\t"
9612 "MOV $dst, #-1\n\t"
9613 "JP,s done\n\t"
9614 "JB,s done\n\t"
9615 "SETNE $dst\n\t"
9616 "MOVZB $dst, $dst\n"
9617 "done:" %}
9618 ins_encode %{
9619 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9620 emit_cmpfp3(_masm, $dst$$Register);
9621 %}
9622 ins_pipe( pipe_slow );
9623 %}
9624
9625 // Compare into -1,0,1 in XMM and memory
9626 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9627 predicate(UseSSE>=2);
9628 match(Set dst (CmpD3 src1 (LoadD src2)));
9629 effect(KILL cr);
9630 ins_cost(275);
9631 format %{ "UCOMISD $src1, $src2\n\t"
9632 "MOV $dst, #-1\n\t"
9633 "JP,s done\n\t"
9634 "JB,s done\n\t"
9635 "SETNE $dst\n\t"
9636 "MOVZB $dst, $dst\n"
9637 "done:" %}
9638 ins_encode %{
9639 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9640 emit_cmpfp3(_masm, $dst$$Register);
9641 %}
9642 ins_pipe( pipe_slow );
9643 %}
9644
9645
9646 instruct subDPR_reg(regDPR dst, regDPR src) %{
9647 predicate (UseSSE <=1);
9648 match(Set dst (SubD dst src));
9649
9650 format %{ "FLD $src\n\t"
9651 "DSUBp $dst,ST" %}
9652 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
9653 ins_cost(150);
9654 ins_encode( Push_Reg_DPR(src),
9655 OpcP, RegOpc(dst) );
9656 ins_pipe( fpu_reg_reg );
9657 %}
9658
9659 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9660 predicate (UseSSE <=1);
9661 match(Set dst (RoundDouble (SubD src1 src2)));
9662 ins_cost(250);
9663
9664 format %{ "FLD $src2\n\t"
9665 "DSUB ST,$src1\n\t"
9666 "FSTP_D $dst\t# D-round" %}
9667 opcode(0xD8, 0x5);
9668 ins_encode( Push_Reg_DPR(src2),
9669 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9670 ins_pipe( fpu_mem_reg_reg );
9671 %}
9672
9673
9674 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9675 predicate (UseSSE <=1);
9676 match(Set dst (SubD dst (LoadD src)));
9677 ins_cost(150);
9678
9679 format %{ "FLD $src\n\t"
9680 "DSUBp $dst,ST" %}
9681 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9682 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9683 OpcP, RegOpc(dst) );
9684 ins_pipe( fpu_reg_mem );
9685 %}
9686
9687 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9688 predicate (UseSSE<=1);
9689 match(Set dst (AbsD src));
9690 ins_cost(100);
9691 format %{ "FABS" %}
9692 opcode(0xE1, 0xD9);
9693 ins_encode( OpcS, OpcP );
9694 ins_pipe( fpu_reg_reg );
9695 %}
9696
9697 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9698 predicate(UseSSE<=1);
9699 match(Set dst (NegD src));
9700 ins_cost(100);
9701 format %{ "FCHS" %}
9702 opcode(0xE0, 0xD9);
9703 ins_encode( OpcS, OpcP );
9704 ins_pipe( fpu_reg_reg );
9705 %}
9706
9707 instruct addDPR_reg(regDPR dst, regDPR src) %{
9708 predicate(UseSSE<=1);
9709 match(Set dst (AddD dst src));
9710 format %{ "FLD $src\n\t"
9711 "DADD $dst,ST" %}
9712 size(4);
9713 ins_cost(150);
9714 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9715 ins_encode( Push_Reg_DPR(src),
9716 OpcP, RegOpc(dst) );
9717 ins_pipe( fpu_reg_reg );
9718 %}
9719
9720
9721 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9722 predicate(UseSSE<=1);
9723 match(Set dst (RoundDouble (AddD src1 src2)));
9724 ins_cost(250);
9725
9726 format %{ "FLD $src2\n\t"
9727 "DADD ST,$src1\n\t"
9728 "FSTP_D $dst\t# D-round" %}
9729 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9730 ins_encode( Push_Reg_DPR(src2),
9731 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9732 ins_pipe( fpu_mem_reg_reg );
9733 %}
9734
9735
9736 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9737 predicate(UseSSE<=1);
9738 match(Set dst (AddD dst (LoadD src)));
9739 ins_cost(150);
9740
9741 format %{ "FLD $src\n\t"
9742 "DADDp $dst,ST" %}
9743 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9744 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9745 OpcP, RegOpc(dst) );
9746 ins_pipe( fpu_reg_mem );
9747 %}
9748
9749 // add-to-memory
9750 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9751 predicate(UseSSE<=1);
9752 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9753 ins_cost(150);
9754
9755 format %{ "FLD_D $dst\n\t"
9756 "DADD ST,$src\n\t"
9757 "FST_D $dst" %}
9758 opcode(0xDD, 0x0);
9759 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9760 Opcode(0xD8), RegOpc(src),
9761 set_instruction_start,
9762 Opcode(0xDD), RMopc_Mem(0x03,dst) );
9763 ins_pipe( fpu_reg_mem );
9764 %}
9765
9766 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9767 predicate(UseSSE<=1);
9768 match(Set dst (AddD dst con));
9769 ins_cost(125);
9770 format %{ "FLD1\n\t"
9771 "DADDp $dst,ST" %}
9772 ins_encode %{
9773 __ fld1();
9774 __ faddp($dst$$reg);
9775 %}
9776 ins_pipe(fpu_reg);
9777 %}
9778
9779 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9780 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9781 match(Set dst (AddD dst con));
9782 ins_cost(200);
9783 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9784 "DADDp $dst,ST" %}
9785 ins_encode %{
9786 __ fld_d($constantaddress($con));
9787 __ faddp($dst$$reg);
9788 %}
9789 ins_pipe(fpu_reg_mem);
9790 %}
9791
9792 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9793 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9794 match(Set dst (RoundDouble (AddD src con)));
9795 ins_cost(200);
9796 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9797 "DADD ST,$src\n\t"
9798 "FSTP_D $dst\t# D-round" %}
9799 ins_encode %{
9800 __ fld_d($constantaddress($con));
9801 __ fadd($src$$reg);
9802 __ fstp_d(Address(rsp, $dst$$disp));
9803 %}
9804 ins_pipe(fpu_mem_reg_con);
9805 %}
9806
9807 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9808 predicate(UseSSE<=1);
9809 match(Set dst (MulD dst src));
9810 format %{ "FLD $src\n\t"
9811 "DMULp $dst,ST" %}
9812 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9813 ins_cost(150);
9814 ins_encode( Push_Reg_DPR(src),
9815 OpcP, RegOpc(dst) );
9816 ins_pipe( fpu_reg_reg );
9817 %}
9818
9819 // Strict FP instruction biases argument before multiply then
9820 // biases result to avoid double rounding of subnormals.
9821 //
9822 // scale arg1 by multiplying arg1 by 2^(-15360)
9823 // load arg2
9824 // multiply scaled arg1 by arg2
9825 // rescale product by 2^(15360)
9826 //
9827 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9828 predicate( UseSSE<=1 && Compile::current()->has_method() );
9829 match(Set dst (MulD dst src));
9830 ins_cost(1); // Select this instruction for all FP double multiplies
9831
9832 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9833 "DMULp $dst,ST\n\t"
9834 "FLD $src\n\t"
9835 "DMULp $dst,ST\n\t"
9836 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9837 "DMULp $dst,ST\n\t" %}
9838 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9839 ins_encode( strictfp_bias1(dst),
9840 Push_Reg_DPR(src),
9841 OpcP, RegOpc(dst),
9842 strictfp_bias2(dst) );
9843 ins_pipe( fpu_reg_reg );
9844 %}
9845
9846 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9847 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9848 match(Set dst (MulD dst con));
9849 ins_cost(200);
9850 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9851 "DMULp $dst,ST" %}
9852 ins_encode %{
9853 __ fld_d($constantaddress($con));
9854 __ fmulp($dst$$reg);
9855 %}
9856 ins_pipe(fpu_reg_mem);
9857 %}
9858
9859
9860 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9861 predicate( UseSSE<=1 );
9862 match(Set dst (MulD dst (LoadD src)));
9863 ins_cost(200);
9864 format %{ "FLD_D $src\n\t"
9865 "DMULp $dst,ST" %}
9866 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */
9867 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9868 OpcP, RegOpc(dst) );
9869 ins_pipe( fpu_reg_mem );
9870 %}
9871
9872 //
9873 // Cisc-alternate to reg-reg multiply
9874 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9875 predicate( UseSSE<=1 );
9876 match(Set dst (MulD src (LoadD mem)));
9877 ins_cost(250);
9878 format %{ "FLD_D $mem\n\t"
9879 "DMUL ST,$src\n\t"
9880 "FSTP_D $dst" %}
9881 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
9882 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9883 OpcReg_FPR(src),
9884 Pop_Reg_DPR(dst) );
9885 ins_pipe( fpu_reg_reg_mem );
9886 %}
9887
9888
9889 // MACRO3 -- addDPR a mulDPR
9890 // This instruction is a '2-address' instruction in that the result goes
9891 // back to src2. This eliminates a move from the macro; possibly the
9892 // register allocator will have to add it back (and maybe not).
9893 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9894 predicate( UseSSE<=1 );
9895 match(Set src2 (AddD (MulD src0 src1) src2));
9896 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9897 "DMUL ST,$src1\n\t"
9898 "DADDp $src2,ST" %}
9899 ins_cost(250);
9900 opcode(0xDD); /* LoadD DD /0 */
9901 ins_encode( Push_Reg_FPR(src0),
9902 FMul_ST_reg(src1),
9903 FAddP_reg_ST(src2) );
9904 ins_pipe( fpu_reg_reg_reg );
9905 %}
9906
9907
9908 // MACRO3 -- subDPR a mulDPR
9909 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9910 predicate( UseSSE<=1 );
9911 match(Set src2 (SubD (MulD src0 src1) src2));
9912 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9913 "DMUL ST,$src1\n\t"
9914 "DSUBRp $src2,ST" %}
9915 ins_cost(250);
9916 ins_encode( Push_Reg_FPR(src0),
9917 FMul_ST_reg(src1),
9918 Opcode(0xDE), Opc_plus(0xE0,src2));
9919 ins_pipe( fpu_reg_reg_reg );
9920 %}
9921
9922
9923 instruct divDPR_reg(regDPR dst, regDPR src) %{
9924 predicate( UseSSE<=1 );
9925 match(Set dst (DivD dst src));
9926
9927 format %{ "FLD $src\n\t"
9928 "FDIVp $dst,ST" %}
9929 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9930 ins_cost(150);
9931 ins_encode( Push_Reg_DPR(src),
9932 OpcP, RegOpc(dst) );
9933 ins_pipe( fpu_reg_reg );
9934 %}
9935
9936 // Strict FP instruction biases argument before division then
9937 // biases result, to avoid double rounding of subnormals.
9938 //
9939 // scale dividend by multiplying dividend by 2^(-15360)
9940 // load divisor
9941 // divide scaled dividend by divisor
9942 // rescale quotient by 2^(15360)
9943 //
9944 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9945 predicate (UseSSE<=1);
9946 match(Set dst (DivD dst src));
9947 predicate( UseSSE<=1 && Compile::current()->has_method() );
9948 ins_cost(01);
9949
9950 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9951 "DMULp $dst,ST\n\t"
9952 "FLD $src\n\t"
9953 "FDIVp $dst,ST\n\t"
9954 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9955 "DMULp $dst,ST\n\t" %}
9956 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9957 ins_encode( strictfp_bias1(dst),
9958 Push_Reg_DPR(src),
9959 OpcP, RegOpc(dst),
9960 strictfp_bias2(dst) );
9961 ins_pipe( fpu_reg_reg );
9962 %}
9963
9964 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9965 predicate(UseSSE<=1);
9966 match(Set dst (ModD dst src));
9967 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9968
9969 format %{ "DMOD $dst,$src" %}
9970 ins_cost(250);
9971 ins_encode(Push_Reg_Mod_DPR(dst, src),
9972 emitModDPR(),
9973 Push_Result_Mod_DPR(src),
9974 Pop_Reg_DPR(dst));
9975 ins_pipe( pipe_slow );
9976 %}
9977
9978 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9979 predicate(UseSSE>=2);
9980 match(Set dst (ModD src0 src1));
9981 effect(KILL rax, KILL cr);
9982
9983 format %{ "SUB ESP,8\t # DMOD\n"
9984 "\tMOVSD [ESP+0],$src1\n"
9985 "\tFLD_D [ESP+0]\n"
9986 "\tMOVSD [ESP+0],$src0\n"
9987 "\tFLD_D [ESP+0]\n"
9988 "loop:\tFPREM\n"
9989 "\tFWAIT\n"
9990 "\tFNSTSW AX\n"
9991 "\tSAHF\n"
9992 "\tJP loop\n"
9993 "\tFSTP_D [ESP+0]\n"
9994 "\tMOVSD $dst,[ESP+0]\n"
9995 "\tADD ESP,8\n"
9996 "\tFSTP ST0\t # Restore FPU Stack"
9997 %}
9998 ins_cost(250);
9999 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10000 ins_pipe( pipe_slow );
10001 %}
10002
10003 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10004 predicate (UseSSE<=1);
10005 match(Set dst(AtanD dst src));
10006 format %{ "DATA $dst,$src" %}
10007 opcode(0xD9, 0xF3);
10008 ins_encode( Push_Reg_DPR(src),
10009 OpcP, OpcS, RegOpc(dst) );
10010 ins_pipe( pipe_slow );
10011 %}
10012
10013 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10014 predicate (UseSSE>=2);
10015 match(Set dst(AtanD dst src));
10016 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10017 format %{ "DATA $dst,$src" %}
10018 opcode(0xD9, 0xF3);
10019 ins_encode( Push_SrcD(src),
10020 OpcP, OpcS, Push_ResultD(dst) );
10021 ins_pipe( pipe_slow );
10022 %}
10023
10024 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10025 predicate (UseSSE<=1);
10026 match(Set dst (SqrtD src));
10027 format %{ "DSQRT $dst,$src" %}
10028 opcode(0xFA, 0xD9);
10029 ins_encode( Push_Reg_DPR(src),
10030 OpcS, OpcP, Pop_Reg_DPR(dst) );
10031 ins_pipe( pipe_slow );
10032 %}
10033
10034 //-------------Float Instructions-------------------------------
10035 // Float Math
10036
10037 // Code for float compare:
10038 // fcompp();
10039 // fwait(); fnstsw_ax();
10040 // sahf();
10041 // movl(dst, unordered_result);
10042 // jcc(Assembler::parity, exit);
10043 // movl(dst, less_result);
10044 // jcc(Assembler::below, exit);
10045 // movl(dst, equal_result);
10046 // jcc(Assembler::equal, exit);
10047 // movl(dst, greater_result);
10048 // exit:
10049
10050 // P6 version of float compare, sets condition codes in EFLAGS
10051 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10052 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10053 match(Set cr (CmpF src1 src2));
10054 effect(KILL rax);
10055 ins_cost(150);
10056 format %{ "FLD $src1\n\t"
10057 "FUCOMIP ST,$src2 // P6 instruction\n\t"
10058 "JNP exit\n\t"
10059 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
10060 "SAHF\n"
10061 "exit:\tNOP // avoid branch to branch" %}
10062 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10063 ins_encode( Push_Reg_DPR(src1),
10064 OpcP, RegOpc(src2),
10065 cmpF_P6_fixup );
10066 ins_pipe( pipe_slow );
10067 %}
10068
10069 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10070 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10071 match(Set cr (CmpF src1 src2));
10072 ins_cost(100);
10073 format %{ "FLD $src1\n\t"
10074 "FUCOMIP ST,$src2 // P6 instruction" %}
10075 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10076 ins_encode( Push_Reg_DPR(src1),
10077 OpcP, RegOpc(src2));
10078 ins_pipe( pipe_slow );
10079 %}
10080
10081
10082 // Compare & branch
10083 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10084 predicate(UseSSE == 0);
10085 match(Set cr (CmpF src1 src2));
10086 effect(KILL rax);
10087 ins_cost(200);
10088 format %{ "FLD $src1\n\t"
10089 "FCOMp $src2\n\t"
10090 "FNSTSW AX\n\t"
10091 "TEST AX,0x400\n\t"
10092 "JZ,s flags\n\t"
10093 "MOV AH,1\t# unordered treat as LT\n"
10094 "flags:\tSAHF" %}
10095 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10096 ins_encode( Push_Reg_DPR(src1),
10097 OpcP, RegOpc(src2),
10098 fpu_flags);
10099 ins_pipe( pipe_slow );
10100 %}
10101
10102 // Compare vs zero into -1,0,1
10103 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10104 predicate(UseSSE == 0);
10105 match(Set dst (CmpF3 src1 zero));
10106 effect(KILL cr, KILL rax);
10107 ins_cost(280);
10108 format %{ "FTSTF $dst,$src1" %}
10109 opcode(0xE4, 0xD9);
10110 ins_encode( Push_Reg_DPR(src1),
10111 OpcS, OpcP, PopFPU,
10112 CmpF_Result(dst));
10113 ins_pipe( pipe_slow );
10114 %}
10115
10116 // Compare into -1,0,1
10117 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10118 predicate(UseSSE == 0);
10119 match(Set dst (CmpF3 src1 src2));
10120 effect(KILL cr, KILL rax);
10121 ins_cost(300);
10122 format %{ "FCMPF $dst,$src1,$src2" %}
10123 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10124 ins_encode( Push_Reg_DPR(src1),
10125 OpcP, RegOpc(src2),
10126 CmpF_Result(dst));
10127 ins_pipe( pipe_slow );
10128 %}
10129
10130 // float compare and set condition codes in EFLAGS by XMM regs
10131 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10132 predicate(UseSSE>=1);
10133 match(Set cr (CmpF src1 src2));
10134 ins_cost(145);
10135 format %{ "UCOMISS $src1,$src2\n\t"
10136 "JNP,s exit\n\t"
10137 "PUSHF\t# saw NaN, set CF\n\t"
10138 "AND [rsp], #0xffffff2b\n\t"
10139 "POPF\n"
10140 "exit:" %}
10141 ins_encode %{
10142 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10143 emit_cmpfp_fixup(_masm);
10144 %}
10145 ins_pipe( pipe_slow );
10146 %}
10147
10148 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10149 predicate(UseSSE>=1);
10150 match(Set cr (CmpF src1 src2));
10151 ins_cost(100);
10152 format %{ "UCOMISS $src1,$src2" %}
10153 ins_encode %{
10154 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10155 %}
10156 ins_pipe( pipe_slow );
10157 %}
10158
10159 // float compare and set condition codes in EFLAGS by XMM regs
10160 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10161 predicate(UseSSE>=1);
10162 match(Set cr (CmpF src1 (LoadF src2)));
10163 ins_cost(165);
10164 format %{ "UCOMISS $src1,$src2\n\t"
10165 "JNP,s exit\n\t"
10166 "PUSHF\t# saw NaN, set CF\n\t"
10167 "AND [rsp], #0xffffff2b\n\t"
10168 "POPF\n"
10169 "exit:" %}
10170 ins_encode %{
10171 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10172 emit_cmpfp_fixup(_masm);
10173 %}
10174 ins_pipe( pipe_slow );
10175 %}
10176
10177 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10178 predicate(UseSSE>=1);
10179 match(Set cr (CmpF src1 (LoadF src2)));
10180 ins_cost(100);
10181 format %{ "UCOMISS $src1,$src2" %}
10182 ins_encode %{
10183 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10184 %}
10185 ins_pipe( pipe_slow );
10186 %}
10187
10188 // Compare into -1,0,1 in XMM
10189 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10190 predicate(UseSSE>=1);
10191 match(Set dst (CmpF3 src1 src2));
10192 effect(KILL cr);
10193 ins_cost(255);
10194 format %{ "UCOMISS $src1, $src2\n\t"
10195 "MOV $dst, #-1\n\t"
10196 "JP,s done\n\t"
10197 "JB,s done\n\t"
10198 "SETNE $dst\n\t"
10199 "MOVZB $dst, $dst\n"
10200 "done:" %}
10201 ins_encode %{
10202 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10203 emit_cmpfp3(_masm, $dst$$Register);
10204 %}
10205 ins_pipe( pipe_slow );
10206 %}
10207
10208 // Compare into -1,0,1 in XMM and memory
10209 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10210 predicate(UseSSE>=1);
10211 match(Set dst (CmpF3 src1 (LoadF src2)));
10212 effect(KILL cr);
10213 ins_cost(275);
10214 format %{ "UCOMISS $src1, $src2\n\t"
10215 "MOV $dst, #-1\n\t"
10216 "JP,s done\n\t"
10217 "JB,s done\n\t"
10218 "SETNE $dst\n\t"
10219 "MOVZB $dst, $dst\n"
10220 "done:" %}
10221 ins_encode %{
10222 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10223 emit_cmpfp3(_masm, $dst$$Register);
10224 %}
10225 ins_pipe( pipe_slow );
10226 %}
10227
10228 // Spill to obtain 24-bit precision
10229 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10230 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10231 match(Set dst (SubF src1 src2));
10232
10233 format %{ "FSUB $dst,$src1 - $src2" %}
10234 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10235 ins_encode( Push_Reg_FPR(src1),
10236 OpcReg_FPR(src2),
10237 Pop_Mem_FPR(dst) );
10238 ins_pipe( fpu_mem_reg_reg );
10239 %}
10240 //
10241 // This instruction does not round to 24-bits
10242 instruct subFPR_reg(regFPR dst, regFPR src) %{
10243 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10244 match(Set dst (SubF dst src));
10245
10246 format %{ "FSUB $dst,$src" %}
10247 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10248 ins_encode( Push_Reg_FPR(src),
10249 OpcP, RegOpc(dst) );
10250 ins_pipe( fpu_reg_reg );
10251 %}
10252
10253 // Spill to obtain 24-bit precision
10254 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10255 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10256 match(Set dst (AddF src1 src2));
10257
10258 format %{ "FADD $dst,$src1,$src2" %}
10259 opcode(0xD8, 0x0); /* D8 C0+i */
10260 ins_encode( Push_Reg_FPR(src2),
10261 OpcReg_FPR(src1),
10262 Pop_Mem_FPR(dst) );
10263 ins_pipe( fpu_mem_reg_reg );
10264 %}
10265 //
10266 // This instruction does not round to 24-bits
10267 instruct addFPR_reg(regFPR dst, regFPR src) %{
10268 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10269 match(Set dst (AddF dst src));
10270
10271 format %{ "FLD $src\n\t"
10272 "FADDp $dst,ST" %}
10273 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10274 ins_encode( Push_Reg_FPR(src),
10275 OpcP, RegOpc(dst) );
10276 ins_pipe( fpu_reg_reg );
10277 %}
10278
10279 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10280 predicate(UseSSE==0);
10281 match(Set dst (AbsF src));
10282 ins_cost(100);
10283 format %{ "FABS" %}
10284 opcode(0xE1, 0xD9);
10285 ins_encode( OpcS, OpcP );
10286 ins_pipe( fpu_reg_reg );
10287 %}
10288
10289 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10290 predicate(UseSSE==0);
10291 match(Set dst (NegF src));
10292 ins_cost(100);
10293 format %{ "FCHS" %}
10294 opcode(0xE0, 0xD9);
10295 ins_encode( OpcS, OpcP );
10296 ins_pipe( fpu_reg_reg );
10297 %}
10298
10299 // Cisc-alternate to addFPR_reg
10300 // Spill to obtain 24-bit precision
10301 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10302 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10303 match(Set dst (AddF src1 (LoadF src2)));
10304
10305 format %{ "FLD $src2\n\t"
10306 "FADD ST,$src1\n\t"
10307 "FSTP_S $dst" %}
10308 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10309 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10310 OpcReg_FPR(src1),
10311 Pop_Mem_FPR(dst) );
10312 ins_pipe( fpu_mem_reg_mem );
10313 %}
10314 //
10315 // Cisc-alternate to addFPR_reg
10316 // This instruction does not round to 24-bits
10317 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10318 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10319 match(Set dst (AddF dst (LoadF src)));
10320
10321 format %{ "FADD $dst,$src" %}
10322 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
10323 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10324 OpcP, RegOpc(dst) );
10325 ins_pipe( fpu_reg_mem );
10326 %}
10327
10328 // // Following two instructions for _222_mpegaudio
10329 // Spill to obtain 24-bit precision
10330 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10331 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10332 match(Set dst (AddF src1 src2));
10333
10334 format %{ "FADD $dst,$src1,$src2" %}
10335 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10336 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10337 OpcReg_FPR(src2),
10338 Pop_Mem_FPR(dst) );
10339 ins_pipe( fpu_mem_reg_mem );
10340 %}
10341
10342 // Cisc-spill variant
10343 // Spill to obtain 24-bit precision
10344 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10345 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10346 match(Set dst (AddF src1 (LoadF src2)));
10347
10348 format %{ "FADD $dst,$src1,$src2 cisc" %}
10349 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10350 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10351 set_instruction_start,
10352 OpcP, RMopc_Mem(secondary,src1),
10353 Pop_Mem_FPR(dst) );
10354 ins_pipe( fpu_mem_mem_mem );
10355 %}
10356
10357 // Spill to obtain 24-bit precision
10358 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10359 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10360 match(Set dst (AddF src1 src2));
10361
10362 format %{ "FADD $dst,$src1,$src2" %}
10363 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
10364 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10365 set_instruction_start,
10366 OpcP, RMopc_Mem(secondary,src1),
10367 Pop_Mem_FPR(dst) );
10368 ins_pipe( fpu_mem_mem_mem );
10369 %}
10370
10371
10372 // Spill to obtain 24-bit precision
10373 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10374 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10375 match(Set dst (AddF src con));
10376 format %{ "FLD $src\n\t"
10377 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10378 "FSTP_S $dst" %}
10379 ins_encode %{
10380 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10381 __ fadd_s($constantaddress($con));
10382 __ fstp_s(Address(rsp, $dst$$disp));
10383 %}
10384 ins_pipe(fpu_mem_reg_con);
10385 %}
10386 //
10387 // This instruction does not round to 24-bits
10388 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10389 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10390 match(Set dst (AddF src con));
10391 format %{ "FLD $src\n\t"
10392 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10393 "FSTP $dst" %}
10394 ins_encode %{
10395 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10396 __ fadd_s($constantaddress($con));
10397 __ fstp_d($dst$$reg);
10398 %}
10399 ins_pipe(fpu_reg_reg_con);
10400 %}
10401
10402 // Spill to obtain 24-bit precision
10403 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10404 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10405 match(Set dst (MulF src1 src2));
10406
10407 format %{ "FLD $src1\n\t"
10408 "FMUL $src2\n\t"
10409 "FSTP_S $dst" %}
10410 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10411 ins_encode( Push_Reg_FPR(src1),
10412 OpcReg_FPR(src2),
10413 Pop_Mem_FPR(dst) );
10414 ins_pipe( fpu_mem_reg_reg );
10415 %}
10416 //
10417 // This instruction does not round to 24-bits
10418 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10419 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10420 match(Set dst (MulF src1 src2));
10421
10422 format %{ "FLD $src1\n\t"
10423 "FMUL $src2\n\t"
10424 "FSTP_S $dst" %}
10425 opcode(0xD8, 0x1); /* D8 C8+i */
10426 ins_encode( Push_Reg_FPR(src2),
10427 OpcReg_FPR(src1),
10428 Pop_Reg_FPR(dst) );
10429 ins_pipe( fpu_reg_reg_reg );
10430 %}
10431
10432
10433 // Spill to obtain 24-bit precision
10434 // Cisc-alternate to reg-reg multiply
10435 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10436 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10437 match(Set dst (MulF src1 (LoadF src2)));
10438
10439 format %{ "FLD_S $src2\n\t"
10440 "FMUL $src1\n\t"
10441 "FSTP_S $dst" %}
10442 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
10443 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10444 OpcReg_FPR(src1),
10445 Pop_Mem_FPR(dst) );
10446 ins_pipe( fpu_mem_reg_mem );
10447 %}
10448 //
10449 // This instruction does not round to 24-bits
10450 // Cisc-alternate to reg-reg multiply
10451 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10452 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10453 match(Set dst (MulF src1 (LoadF src2)));
10454
10455 format %{ "FMUL $dst,$src1,$src2" %}
10456 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
10457 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10458 OpcReg_FPR(src1),
10459 Pop_Reg_FPR(dst) );
10460 ins_pipe( fpu_reg_reg_mem );
10461 %}
10462
10463 // Spill to obtain 24-bit precision
10464 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10465 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10466 match(Set dst (MulF src1 src2));
10467
10468 format %{ "FMUL $dst,$src1,$src2" %}
10469 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
10470 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10471 set_instruction_start,
10472 OpcP, RMopc_Mem(secondary,src1),
10473 Pop_Mem_FPR(dst) );
10474 ins_pipe( fpu_mem_mem_mem );
10475 %}
10476
10477 // Spill to obtain 24-bit precision
10478 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10479 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10480 match(Set dst (MulF src con));
10481
10482 format %{ "FLD $src\n\t"
10483 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10484 "FSTP_S $dst" %}
10485 ins_encode %{
10486 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10487 __ fmul_s($constantaddress($con));
10488 __ fstp_s(Address(rsp, $dst$$disp));
10489 %}
10490 ins_pipe(fpu_mem_reg_con);
10491 %}
10492 //
10493 // This instruction does not round to 24-bits
10494 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10495 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10496 match(Set dst (MulF src con));
10497
10498 format %{ "FLD $src\n\t"
10499 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10500 "FSTP $dst" %}
10501 ins_encode %{
10502 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10503 __ fmul_s($constantaddress($con));
10504 __ fstp_d($dst$$reg);
10505 %}
10506 ins_pipe(fpu_reg_reg_con);
10507 %}
10508
10509
10510 //
10511 // MACRO1 -- subsume unshared load into mulFPR
10512 // This instruction does not round to 24-bits
10513 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10514 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10515 match(Set dst (MulF (LoadF mem1) src));
10516
10517 format %{ "FLD $mem1 ===MACRO1===\n\t"
10518 "FMUL ST,$src\n\t"
10519 "FSTP $dst" %}
10520 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
10521 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10522 OpcReg_FPR(src),
10523 Pop_Reg_FPR(dst) );
10524 ins_pipe( fpu_reg_reg_mem );
10525 %}
10526 //
10527 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10528 // This instruction does not round to 24-bits
10529 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10530 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10531 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10532 ins_cost(95);
10533
10534 format %{ "FLD $mem1 ===MACRO2===\n\t"
10535 "FMUL ST,$src1 subsume mulFPR left load\n\t"
10536 "FADD ST,$src2\n\t"
10537 "FSTP $dst" %}
10538 opcode(0xD9); /* LoadF D9 /0 */
10539 ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10540 FMul_ST_reg(src1),
10541 FAdd_ST_reg(src2),
10542 Pop_Reg_FPR(dst) );
10543 ins_pipe( fpu_reg_mem_reg_reg );
10544 %}
10545
10546 // MACRO3 -- addFPR a mulFPR
10547 // This instruction does not round to 24-bits. It is a '2-address'
10548 // instruction in that the result goes back to src2. This eliminates
10549 // a move from the macro; possibly the register allocator will have
10550 // to add it back (and maybe not).
10551 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10552 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10553 match(Set src2 (AddF (MulF src0 src1) src2));
10554
10555 format %{ "FLD $src0 ===MACRO3===\n\t"
10556 "FMUL ST,$src1\n\t"
10557 "FADDP $src2,ST" %}
10558 opcode(0xD9); /* LoadF D9 /0 */
10559 ins_encode( Push_Reg_FPR(src0),
10560 FMul_ST_reg(src1),
10561 FAddP_reg_ST(src2) );
10562 ins_pipe( fpu_reg_reg_reg );
10563 %}
10564
10565 // MACRO4 -- divFPR subFPR
10566 // This instruction does not round to 24-bits
10567 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10568 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10569 match(Set dst (DivF (SubF src2 src1) src3));
10570
10571 format %{ "FLD $src2 ===MACRO4===\n\t"
10572 "FSUB ST,$src1\n\t"
10573 "FDIV ST,$src3\n\t"
10574 "FSTP $dst" %}
10575 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10576 ins_encode( Push_Reg_FPR(src2),
10577 subFPR_divFPR_encode(src1,src3),
10578 Pop_Reg_FPR(dst) );
10579 ins_pipe( fpu_reg_reg_reg_reg );
10580 %}
10581
10582 // Spill to obtain 24-bit precision
10583 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10584 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10585 match(Set dst (DivF src1 src2));
10586
10587 format %{ "FDIV $dst,$src1,$src2" %}
10588 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10589 ins_encode( Push_Reg_FPR(src1),
10590 OpcReg_FPR(src2),
10591 Pop_Mem_FPR(dst) );
10592 ins_pipe( fpu_mem_reg_reg );
10593 %}
10594 //
10595 // This instruction does not round to 24-bits
10596 instruct divFPR_reg(regFPR dst, regFPR src) %{
10597 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10598 match(Set dst (DivF dst src));
10599
10600 format %{ "FDIV $dst,$src" %}
10601 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10602 ins_encode( Push_Reg_FPR(src),
10603 OpcP, RegOpc(dst) );
10604 ins_pipe( fpu_reg_reg );
10605 %}
10606
10607
10608 // Spill to obtain 24-bit precision
10609 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10610 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10611 match(Set dst (ModF src1 src2));
10612 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10613
10614 format %{ "FMOD $dst,$src1,$src2" %}
10615 ins_encode( Push_Reg_Mod_DPR(src1, src2),
10616 emitModDPR(),
10617 Push_Result_Mod_DPR(src2),
10618 Pop_Mem_FPR(dst));
10619 ins_pipe( pipe_slow );
10620 %}
10621 //
10622 // This instruction does not round to 24-bits
10623 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10624 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10625 match(Set dst (ModF dst src));
10626 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10627
10628 format %{ "FMOD $dst,$src" %}
10629 ins_encode(Push_Reg_Mod_DPR(dst, src),
10630 emitModDPR(),
10631 Push_Result_Mod_DPR(src),
10632 Pop_Reg_FPR(dst));
10633 ins_pipe( pipe_slow );
10634 %}
10635
10636 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10637 predicate(UseSSE>=1);
10638 match(Set dst (ModF src0 src1));
10639 effect(KILL rax, KILL cr);
10640 format %{ "SUB ESP,4\t # FMOD\n"
10641 "\tMOVSS [ESP+0],$src1\n"
10642 "\tFLD_S [ESP+0]\n"
10643 "\tMOVSS [ESP+0],$src0\n"
10644 "\tFLD_S [ESP+0]\n"
10645 "loop:\tFPREM\n"
10646 "\tFWAIT\n"
10647 "\tFNSTSW AX\n"
10648 "\tSAHF\n"
10649 "\tJP loop\n"
10650 "\tFSTP_S [ESP+0]\n"
10651 "\tMOVSS $dst,[ESP+0]\n"
10652 "\tADD ESP,4\n"
10653 "\tFSTP ST0\t # Restore FPU Stack"
10654 %}
10655 ins_cost(250);
10656 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10657 ins_pipe( pipe_slow );
10658 %}
10659
10660
10661 //----------Arithmetic Conversion Instructions---------------------------------
10662 // The conversions operations are all Alpha sorted. Please keep it that way!
10663
10664 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10665 predicate(UseSSE==0);
10666 match(Set dst (RoundFloat src));
10667 ins_cost(125);
10668 format %{ "FST_S $dst,$src\t# F-round" %}
10669 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10670 ins_pipe( fpu_mem_reg );
10671 %}
10672
10673 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10674 predicate(UseSSE<=1);
10675 match(Set dst (RoundDouble src));
10676 ins_cost(125);
10677 format %{ "FST_D $dst,$src\t# D-round" %}
10678 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10679 ins_pipe( fpu_mem_reg );
10680 %}
10681
10682 // Force rounding to 24-bit precision and 6-bit exponent
10683 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10684 predicate(UseSSE==0);
10685 match(Set dst (ConvD2F src));
10686 format %{ "FST_S $dst,$src\t# F-round" %}
10687 expand %{
10688 roundFloat_mem_reg(dst,src);
10689 %}
10690 %}
10691
10692 // Force rounding to 24-bit precision and 6-bit exponent
10693 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10694 predicate(UseSSE==1);
10695 match(Set dst (ConvD2F src));
10696 effect( KILL cr );
10697 format %{ "SUB ESP,4\n\t"
10698 "FST_S [ESP],$src\t# F-round\n\t"
10699 "MOVSS $dst,[ESP]\n\t"
10700 "ADD ESP,4" %}
10701 ins_encode %{
10702 __ subptr(rsp, 4);
10703 if ($src$$reg != FPR1L_enc) {
10704 __ fld_s($src$$reg-1);
10705 __ fstp_s(Address(rsp, 0));
10706 } else {
10707 __ fst_s(Address(rsp, 0));
10708 }
10709 __ movflt($dst$$XMMRegister, Address(rsp, 0));
10710 __ addptr(rsp, 4);
10711 %}
10712 ins_pipe( pipe_slow );
10713 %}
10714
10715 // Force rounding double precision to single precision
10716 instruct convD2F_reg(regF dst, regD src) %{
10717 predicate(UseSSE>=2);
10718 match(Set dst (ConvD2F src));
10719 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10720 ins_encode %{
10721 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10722 %}
10723 ins_pipe( pipe_slow );
10724 %}
10725
10726 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10727 predicate(UseSSE==0);
10728 match(Set dst (ConvF2D src));
10729 format %{ "FST_S $dst,$src\t# D-round" %}
10730 ins_encode( Pop_Reg_Reg_DPR(dst, src));
10731 ins_pipe( fpu_reg_reg );
10732 %}
10733
10734 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10735 predicate(UseSSE==1);
10736 match(Set dst (ConvF2D src));
10737 format %{ "FST_D $dst,$src\t# D-round" %}
10738 expand %{
10739 roundDouble_mem_reg(dst,src);
10740 %}
10741 %}
10742
10743 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10744 predicate(UseSSE==1);
10745 match(Set dst (ConvF2D src));
10746 effect( KILL cr );
10747 format %{ "SUB ESP,4\n\t"
10748 "MOVSS [ESP] $src\n\t"
10749 "FLD_S [ESP]\n\t"
10750 "ADD ESP,4\n\t"
10751 "FSTP $dst\t# D-round" %}
10752 ins_encode %{
10753 __ subptr(rsp, 4);
10754 __ movflt(Address(rsp, 0), $src$$XMMRegister);
10755 __ fld_s(Address(rsp, 0));
10756 __ addptr(rsp, 4);
10757 __ fstp_d($dst$$reg);
10758 %}
10759 ins_pipe( pipe_slow );
10760 %}
10761
10762 instruct convF2D_reg(regD dst, regF src) %{
10763 predicate(UseSSE>=2);
10764 match(Set dst (ConvF2D src));
10765 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10766 ins_encode %{
10767 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10768 %}
10769 ins_pipe( pipe_slow );
10770 %}
10771
10772 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10773 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10774 predicate(UseSSE<=1);
10775 match(Set dst (ConvD2I src));
10776 effect( KILL tmp, KILL cr );
10777 format %{ "FLD $src\t# Convert double to int \n\t"
10778 "FLDCW trunc mode\n\t"
10779 "SUB ESP,4\n\t"
10780 "FISTp [ESP + #0]\n\t"
10781 "FLDCW std/24-bit mode\n\t"
10782 "POP EAX\n\t"
10783 "CMP EAX,0x80000000\n\t"
10784 "JNE,s fast\n\t"
10785 "FLD_D $src\n\t"
10786 "CALL d2i_wrapper\n"
10787 "fast:" %}
10788 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10789 ins_pipe( pipe_slow );
10790 %}
10791
10792 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10793 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10794 predicate(UseSSE>=2);
10795 match(Set dst (ConvD2I src));
10796 effect( KILL tmp, KILL cr );
10797 format %{ "CVTTSD2SI $dst, $src\n\t"
10798 "CMP $dst,0x80000000\n\t"
10799 "JNE,s fast\n\t"
10800 "SUB ESP, 8\n\t"
10801 "MOVSD [ESP], $src\n\t"
10802 "FLD_D [ESP]\n\t"
10803 "ADD ESP, 8\n\t"
10804 "CALL d2i_wrapper\n"
10805 "fast:" %}
10806 ins_encode %{
10807 Label fast;
10808 __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10809 __ cmpl($dst$$Register, 0x80000000);
10810 __ jccb(Assembler::notEqual, fast);
10811 __ subptr(rsp, 8);
10812 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10813 __ fld_d(Address(rsp, 0));
10814 __ addptr(rsp, 8);
10815 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10816 __ bind(fast);
10817 %}
10818 ins_pipe( pipe_slow );
10819 %}
10820
10821 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10822 predicate(UseSSE<=1);
10823 match(Set dst (ConvD2L src));
10824 effect( KILL cr );
10825 format %{ "FLD $src\t# Convert double to long\n\t"
10826 "FLDCW trunc mode\n\t"
10827 "SUB ESP,8\n\t"
10828 "FISTp [ESP + #0]\n\t"
10829 "FLDCW std/24-bit mode\n\t"
10830 "POP EAX\n\t"
10831 "POP EDX\n\t"
10832 "CMP EDX,0x80000000\n\t"
10833 "JNE,s fast\n\t"
10834 "TEST EAX,EAX\n\t"
10835 "JNE,s fast\n\t"
10836 "FLD $src\n\t"
10837 "CALL d2l_wrapper\n"
10838 "fast:" %}
10839 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) );
10840 ins_pipe( pipe_slow );
10841 %}
10842
10843 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10844 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10845 predicate (UseSSE>=2);
10846 match(Set dst (ConvD2L src));
10847 effect( KILL cr );
10848 format %{ "SUB ESP,8\t# Convert double to long\n\t"
10849 "MOVSD [ESP],$src\n\t"
10850 "FLD_D [ESP]\n\t"
10851 "FLDCW trunc mode\n\t"
10852 "FISTp [ESP + #0]\n\t"
10853 "FLDCW std/24-bit mode\n\t"
10854 "POP EAX\n\t"
10855 "POP EDX\n\t"
10856 "CMP EDX,0x80000000\n\t"
10857 "JNE,s fast\n\t"
10858 "TEST EAX,EAX\n\t"
10859 "JNE,s fast\n\t"
10860 "SUB ESP,8\n\t"
10861 "MOVSD [ESP],$src\n\t"
10862 "FLD_D [ESP]\n\t"
10863 "ADD ESP,8\n\t"
10864 "CALL d2l_wrapper\n"
10865 "fast:" %}
10866 ins_encode %{
10867 Label fast;
10868 __ subptr(rsp, 8);
10869 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10870 __ fld_d(Address(rsp, 0));
10871 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10872 __ fistp_d(Address(rsp, 0));
10873 // Restore the rounding mode, mask the exception
10874 if (Compile::current()->in_24_bit_fp_mode()) {
10875 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10876 } else {
10877 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10878 }
10879 // Load the converted long, adjust CPU stack
10880 __ pop(rax);
10881 __ pop(rdx);
10882 __ cmpl(rdx, 0x80000000);
10883 __ jccb(Assembler::notEqual, fast);
10884 __ testl(rax, rax);
10885 __ jccb(Assembler::notEqual, fast);
10886 __ subptr(rsp, 8);
10887 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10888 __ fld_d(Address(rsp, 0));
10889 __ addptr(rsp, 8);
10890 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10891 __ bind(fast);
10892 %}
10893 ins_pipe( pipe_slow );
10894 %}
10895
10896 // Convert a double to an int. Java semantics require we do complex
10897 // manglations in the corner cases. So we set the rounding mode to
10898 // 'zero', store the darned double down as an int, and reset the
10899 // rounding mode to 'nearest'. The hardware stores a flag value down
10900 // if we would overflow or converted a NAN; we check for this and
10901 // and go the slow path if needed.
10902 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10903 predicate(UseSSE==0);
10904 match(Set dst (ConvF2I src));
10905 effect( KILL tmp, KILL cr );
10906 format %{ "FLD $src\t# Convert float to int \n\t"
10907 "FLDCW trunc mode\n\t"
10908 "SUB ESP,4\n\t"
10909 "FISTp [ESP + #0]\n\t"
10910 "FLDCW std/24-bit mode\n\t"
10911 "POP EAX\n\t"
10912 "CMP EAX,0x80000000\n\t"
10913 "JNE,s fast\n\t"
10914 "FLD $src\n\t"
10915 "CALL d2i_wrapper\n"
10916 "fast:" %}
10917 // DPR2I_encoding works for FPR2I
10918 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10919 ins_pipe( pipe_slow );
10920 %}
10921
10922 // Convert a float in xmm to an int reg.
10923 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10924 predicate(UseSSE>=1);
10925 match(Set dst (ConvF2I src));
10926 effect( KILL tmp, KILL cr );
10927 format %{ "CVTTSS2SI $dst, $src\n\t"
10928 "CMP $dst,0x80000000\n\t"
10929 "JNE,s fast\n\t"
10930 "SUB ESP, 4\n\t"
10931 "MOVSS [ESP], $src\n\t"
10932 "FLD [ESP]\n\t"
10933 "ADD ESP, 4\n\t"
10934 "CALL d2i_wrapper\n"
10935 "fast:" %}
10936 ins_encode %{
10937 Label fast;
10938 __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10939 __ cmpl($dst$$Register, 0x80000000);
10940 __ jccb(Assembler::notEqual, fast);
10941 __ subptr(rsp, 4);
10942 __ movflt(Address(rsp, 0), $src$$XMMRegister);
10943 __ fld_s(Address(rsp, 0));
10944 __ addptr(rsp, 4);
10945 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10946 __ bind(fast);
10947 %}
10948 ins_pipe( pipe_slow );
10949 %}
10950
10951 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10952 predicate(UseSSE==0);
10953 match(Set dst (ConvF2L src));
10954 effect( KILL cr );
10955 format %{ "FLD $src\t# Convert float to long\n\t"
10956 "FLDCW trunc mode\n\t"
10957 "SUB ESP,8\n\t"
10958 "FISTp [ESP + #0]\n\t"
10959 "FLDCW std/24-bit mode\n\t"
10960 "POP EAX\n\t"
10961 "POP EDX\n\t"
10962 "CMP EDX,0x80000000\n\t"
10963 "JNE,s fast\n\t"
10964 "TEST EAX,EAX\n\t"
10965 "JNE,s fast\n\t"
10966 "FLD $src\n\t"
10967 "CALL d2l_wrapper\n"
10968 "fast:" %}
10969 // DPR2L_encoding works for FPR2L
10970 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10971 ins_pipe( pipe_slow );
10972 %}
10973
10974 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10975 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10976 predicate (UseSSE>=1);
10977 match(Set dst (ConvF2L src));
10978 effect( KILL cr );
10979 format %{ "SUB ESP,8\t# Convert float to long\n\t"
10980 "MOVSS [ESP],$src\n\t"
10981 "FLD_S [ESP]\n\t"
10982 "FLDCW trunc mode\n\t"
10983 "FISTp [ESP + #0]\n\t"
10984 "FLDCW std/24-bit mode\n\t"
10985 "POP EAX\n\t"
10986 "POP EDX\n\t"
10987 "CMP EDX,0x80000000\n\t"
10988 "JNE,s fast\n\t"
10989 "TEST EAX,EAX\n\t"
10990 "JNE,s fast\n\t"
10991 "SUB ESP,4\t# Convert float to long\n\t"
10992 "MOVSS [ESP],$src\n\t"
10993 "FLD_S [ESP]\n\t"
10994 "ADD ESP,4\n\t"
10995 "CALL d2l_wrapper\n"
10996 "fast:" %}
10997 ins_encode %{
10998 Label fast;
10999 __ subptr(rsp, 8);
11000 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11001 __ fld_s(Address(rsp, 0));
11002 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11003 __ fistp_d(Address(rsp, 0));
11004 // Restore the rounding mode, mask the exception
11005 if (Compile::current()->in_24_bit_fp_mode()) {
11006 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11007 } else {
11008 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11009 }
11010 // Load the converted long, adjust CPU stack
11011 __ pop(rax);
11012 __ pop(rdx);
11013 __ cmpl(rdx, 0x80000000);
11014 __ jccb(Assembler::notEqual, fast);
11015 __ testl(rax, rax);
11016 __ jccb(Assembler::notEqual, fast);
11017 __ subptr(rsp, 4);
11018 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11019 __ fld_s(Address(rsp, 0));
11020 __ addptr(rsp, 4);
11021 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11022 __ bind(fast);
11023 %}
11024 ins_pipe( pipe_slow );
11025 %}
11026
11027 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11028 predicate( UseSSE<=1 );
11029 match(Set dst (ConvI2D src));
11030 format %{ "FILD $src\n\t"
11031 "FSTP $dst" %}
11032 opcode(0xDB, 0x0); /* DB /0 */
11033 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11034 ins_pipe( fpu_reg_mem );
11035 %}
11036
11037 instruct convI2D_reg(regD dst, rRegI src) %{
11038 predicate( UseSSE>=2 && !UseXmmI2D );
11039 match(Set dst (ConvI2D src));
11040 format %{ "CVTSI2SD $dst,$src" %}
11041 ins_encode %{
11042 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11043 %}
11044 ins_pipe( pipe_slow );
11045 %}
11046
11047 instruct convI2D_mem(regD dst, memory mem) %{
11048 predicate( UseSSE>=2 );
11049 match(Set dst (ConvI2D (LoadI mem)));
11050 format %{ "CVTSI2SD $dst,$mem" %}
11051 ins_encode %{
11052 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11053 %}
11054 ins_pipe( pipe_slow );
11055 %}
11056
11057 instruct convXI2D_reg(regD dst, rRegI src)
11058 %{
11059 predicate( UseSSE>=2 && UseXmmI2D );
11060 match(Set dst (ConvI2D src));
11061
11062 format %{ "MOVD $dst,$src\n\t"
11063 "CVTDQ2PD $dst,$dst\t# i2d" %}
11064 ins_encode %{
11065 __ movdl($dst$$XMMRegister, $src$$Register);
11066 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11067 %}
11068 ins_pipe(pipe_slow); // XXX
11069 %}
11070
11071 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11072 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11073 match(Set dst (ConvI2D (LoadI mem)));
11074 format %{ "FILD $mem\n\t"
11075 "FSTP $dst" %}
11076 opcode(0xDB); /* DB /0 */
11077 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11078 Pop_Reg_DPR(dst));
11079 ins_pipe( fpu_reg_mem );
11080 %}
11081
11082 // Convert a byte to a float; no rounding step needed.
11083 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11084 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11085 match(Set dst (ConvI2F src));
11086 format %{ "FILD $src\n\t"
11087 "FSTP $dst" %}
11088
11089 opcode(0xDB, 0x0); /* DB /0 */
11090 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11091 ins_pipe( fpu_reg_mem );
11092 %}
11093
11094 // In 24-bit mode, force exponent rounding by storing back out
11095 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11096 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11097 match(Set dst (ConvI2F src));
11098 ins_cost(200);
11099 format %{ "FILD $src\n\t"
11100 "FSTP_S $dst" %}
11101 opcode(0xDB, 0x0); /* DB /0 */
11102 ins_encode( Push_Mem_I(src),
11103 Pop_Mem_FPR(dst));
11104 ins_pipe( fpu_mem_mem );
11105 %}
11106
11107 // In 24-bit mode, force exponent rounding by storing back out
11108 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11109 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11110 match(Set dst (ConvI2F (LoadI mem)));
11111 ins_cost(200);
11112 format %{ "FILD $mem\n\t"
11113 "FSTP_S $dst" %}
11114 opcode(0xDB); /* DB /0 */
11115 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11116 Pop_Mem_FPR(dst));
11117 ins_pipe( fpu_mem_mem );
11118 %}
11119
11120 // This instruction does not round to 24-bits
11121 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11122 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11123 match(Set dst (ConvI2F src));
11124 format %{ "FILD $src\n\t"
11125 "FSTP $dst" %}
11126 opcode(0xDB, 0x0); /* DB /0 */
11127 ins_encode( Push_Mem_I(src),
11128 Pop_Reg_FPR(dst));
11129 ins_pipe( fpu_reg_mem );
11130 %}
11131
11132 // This instruction does not round to 24-bits
11133 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11134 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11135 match(Set dst (ConvI2F (LoadI mem)));
11136 format %{ "FILD $mem\n\t"
11137 "FSTP $dst" %}
11138 opcode(0xDB); /* DB /0 */
11139 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11140 Pop_Reg_FPR(dst));
11141 ins_pipe( fpu_reg_mem );
11142 %}
11143
11144 // Convert an int to a float in xmm; no rounding step needed.
11145 instruct convI2F_reg(regF dst, rRegI src) %{
11146 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11147 match(Set dst (ConvI2F src));
11148 format %{ "CVTSI2SS $dst, $src" %}
11149 ins_encode %{
11150 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11151 %}
11152 ins_pipe( pipe_slow );
11153 %}
11154
11155 instruct convXI2F_reg(regF dst, rRegI src)
11156 %{
11157 predicate( UseSSE>=2 && UseXmmI2F );
11158 match(Set dst (ConvI2F src));
11159
11160 format %{ "MOVD $dst,$src\n\t"
11161 "CVTDQ2PS $dst,$dst\t# i2f" %}
11162 ins_encode %{
11163 __ movdl($dst$$XMMRegister, $src$$Register);
11164 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11165 %}
11166 ins_pipe(pipe_slow); // XXX
11167 %}
11168
11169 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11170 match(Set dst (ConvI2L src));
11171 effect(KILL cr);
11172 ins_cost(375);
11173 format %{ "MOV $dst.lo,$src\n\t"
11174 "MOV $dst.hi,$src\n\t"
11175 "SAR $dst.hi,31" %}
11176 ins_encode(convert_int_long(dst,src));
11177 ins_pipe( ialu_reg_reg_long );
11178 %}
11179
11180 // Zero-extend convert int to long
11181 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11182 match(Set dst (AndL (ConvI2L src) mask) );
11183 effect( KILL flags );
11184 ins_cost(250);
11185 format %{ "MOV $dst.lo,$src\n\t"
11186 "XOR $dst.hi,$dst.hi" %}
11187 opcode(0x33); // XOR
11188 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11189 ins_pipe( ialu_reg_reg_long );
11190 %}
11191
11192 // Zero-extend long
11193 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11194 match(Set dst (AndL src mask) );
11195 effect( KILL flags );
11196 ins_cost(250);
11197 format %{ "MOV $dst.lo,$src.lo\n\t"
11198 "XOR $dst.hi,$dst.hi\n\t" %}
11199 opcode(0x33); // XOR
11200 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11201 ins_pipe( ialu_reg_reg_long );
11202 %}
11203
11204 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11205 predicate (UseSSE<=1);
11206 match(Set dst (ConvL2D src));
11207 effect( KILL cr );
11208 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11209 "PUSH $src.lo\n\t"
11210 "FILD ST,[ESP + #0]\n\t"
11211 "ADD ESP,8\n\t"
11212 "FSTP_D $dst\t# D-round" %}
11213 opcode(0xDF, 0x5); /* DF /5 */
11214 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11215 ins_pipe( pipe_slow );
11216 %}
11217
11218 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11219 predicate (UseSSE>=2);
11220 match(Set dst (ConvL2D src));
11221 effect( KILL cr );
11222 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11223 "PUSH $src.lo\n\t"
11224 "FILD_D [ESP]\n\t"
11225 "FSTP_D [ESP]\n\t"
11226 "MOVSD $dst,[ESP]\n\t"
11227 "ADD ESP,8" %}
11228 opcode(0xDF, 0x5); /* DF /5 */
11229 ins_encode(convert_long_double2(src), Push_ResultD(dst));
11230 ins_pipe( pipe_slow );
11231 %}
11232
11233 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11234 predicate (UseSSE>=1);
11235 match(Set dst (ConvL2F src));
11236 effect( KILL cr );
11237 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11238 "PUSH $src.lo\n\t"
11239 "FILD_D [ESP]\n\t"
11240 "FSTP_S [ESP]\n\t"
11241 "MOVSS $dst,[ESP]\n\t"
11242 "ADD ESP,8" %}
11243 opcode(0xDF, 0x5); /* DF /5 */
11244 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11245 ins_pipe( pipe_slow );
11246 %}
11247
11248 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11249 match(Set dst (ConvL2F src));
11250 effect( KILL cr );
11251 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11252 "PUSH $src.lo\n\t"
11253 "FILD ST,[ESP + #0]\n\t"
11254 "ADD ESP,8\n\t"
11255 "FSTP_S $dst\t# F-round" %}
11256 opcode(0xDF, 0x5); /* DF /5 */
11257 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11258 ins_pipe( pipe_slow );
11259 %}
11260
11261 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11262 match(Set dst (ConvL2I src));
11263 effect( DEF dst, USE src );
11264 format %{ "MOV $dst,$src.lo" %}
11265 ins_encode(enc_CopyL_Lo(dst,src));
11266 ins_pipe( ialu_reg_reg );
11267 %}
11268
11269 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11270 match(Set dst (MoveF2I src));
11271 effect( DEF dst, USE src );
11272 ins_cost(100);
11273 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
11274 ins_encode %{
11275 __ movl($dst$$Register, Address(rsp, $src$$disp));
11276 %}
11277 ins_pipe( ialu_reg_mem );
11278 %}
11279
11280 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11281 predicate(UseSSE==0);
11282 match(Set dst (MoveF2I src));
11283 effect( DEF dst, USE src );
11284
11285 ins_cost(125);
11286 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
11287 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11288 ins_pipe( fpu_mem_reg );
11289 %}
11290
11291 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11292 predicate(UseSSE>=1);
11293 match(Set dst (MoveF2I src));
11294 effect( DEF dst, USE src );
11295
11296 ins_cost(95);
11297 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
11298 ins_encode %{
11299 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11300 %}
11301 ins_pipe( pipe_slow );
11302 %}
11303
11304 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11305 predicate(UseSSE>=2);
11306 match(Set dst (MoveF2I src));
11307 effect( DEF dst, USE src );
11308 ins_cost(85);
11309 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
11310 ins_encode %{
11311 __ movdl($dst$$Register, $src$$XMMRegister);
11312 %}
11313 ins_pipe( pipe_slow );
11314 %}
11315
11316 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11317 match(Set dst (MoveI2F src));
11318 effect( DEF dst, USE src );
11319
11320 ins_cost(100);
11321 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
11322 ins_encode %{
11323 __ movl(Address(rsp, $dst$$disp), $src$$Register);
11324 %}
11325 ins_pipe( ialu_mem_reg );
11326 %}
11327
11328
11329 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11330 predicate(UseSSE==0);
11331 match(Set dst (MoveI2F src));
11332 effect(DEF dst, USE src);
11333
11334 ins_cost(125);
11335 format %{ "FLD_S $src\n\t"
11336 "FSTP $dst\t# MoveI2F_stack_reg" %}
11337 opcode(0xD9); /* D9 /0, FLD m32real */
11338 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11339 Pop_Reg_FPR(dst) );
11340 ins_pipe( fpu_reg_mem );
11341 %}
11342
11343 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11344 predicate(UseSSE>=1);
11345 match(Set dst (MoveI2F src));
11346 effect( DEF dst, USE src );
11347
11348 ins_cost(95);
11349 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
11350 ins_encode %{
11351 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11352 %}
11353 ins_pipe( pipe_slow );
11354 %}
11355
11356 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11357 predicate(UseSSE>=2);
11358 match(Set dst (MoveI2F src));
11359 effect( DEF dst, USE src );
11360
11361 ins_cost(85);
11362 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
11363 ins_encode %{
11364 __ movdl($dst$$XMMRegister, $src$$Register);
11365 %}
11366 ins_pipe( pipe_slow );
11367 %}
11368
11369 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11370 match(Set dst (MoveD2L src));
11371 effect(DEF dst, USE src);
11372
11373 ins_cost(250);
11374 format %{ "MOV $dst.lo,$src\n\t"
11375 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11376 opcode(0x8B, 0x8B);
11377 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11378 ins_pipe( ialu_mem_long_reg );
11379 %}
11380
11381 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11382 predicate(UseSSE<=1);
11383 match(Set dst (MoveD2L src));
11384 effect(DEF dst, USE src);
11385
11386 ins_cost(125);
11387 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
11388 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11389 ins_pipe( fpu_mem_reg );
11390 %}
11391
11392 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11393 predicate(UseSSE>=2);
11394 match(Set dst (MoveD2L src));
11395 effect(DEF dst, USE src);
11396 ins_cost(95);
11397 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
11398 ins_encode %{
11399 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11400 %}
11401 ins_pipe( pipe_slow );
11402 %}
11403
11404 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11405 predicate(UseSSE>=2);
11406 match(Set dst (MoveD2L src));
11407 effect(DEF dst, USE src, TEMP tmp);
11408 ins_cost(85);
11409 format %{ "MOVD $dst.lo,$src\n\t"
11410 "PSHUFLW $tmp,$src,0x4E\n\t"
11411 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11412 ins_encode %{
11413 __ movdl($dst$$Register, $src$$XMMRegister);
11414 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11415 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11416 %}
11417 ins_pipe( pipe_slow );
11418 %}
11419
11420 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11421 match(Set dst (MoveL2D src));
11422 effect(DEF dst, USE src);
11423
11424 ins_cost(200);
11425 format %{ "MOV $dst,$src.lo\n\t"
11426 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11427 opcode(0x89, 0x89);
11428 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11429 ins_pipe( ialu_mem_long_reg );
11430 %}
11431
11432
11433 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11434 predicate(UseSSE<=1);
11435 match(Set dst (MoveL2D src));
11436 effect(DEF dst, USE src);
11437 ins_cost(125);
11438
11439 format %{ "FLD_D $src\n\t"
11440 "FSTP $dst\t# MoveL2D_stack_reg" %}
11441 opcode(0xDD); /* DD /0, FLD m64real */
11442 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11443 Pop_Reg_DPR(dst) );
11444 ins_pipe( fpu_reg_mem );
11445 %}
11446
11447
11448 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11449 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11450 match(Set dst (MoveL2D src));
11451 effect(DEF dst, USE src);
11452
11453 ins_cost(95);
11454 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11455 ins_encode %{
11456 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11457 %}
11458 ins_pipe( pipe_slow );
11459 %}
11460
11461 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11462 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11463 match(Set dst (MoveL2D src));
11464 effect(DEF dst, USE src);
11465
11466 ins_cost(95);
11467 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11468 ins_encode %{
11469 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11470 %}
11471 ins_pipe( pipe_slow );
11472 %}
11473
11474 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11475 predicate(UseSSE>=2);
11476 match(Set dst (MoveL2D src));
11477 effect(TEMP dst, USE src, TEMP tmp);
11478 ins_cost(85);
11479 format %{ "MOVD $dst,$src.lo\n\t"
11480 "MOVD $tmp,$src.hi\n\t"
11481 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11482 ins_encode %{
11483 __ movdl($dst$$XMMRegister, $src$$Register);
11484 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11485 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11486 %}
11487 ins_pipe( pipe_slow );
11488 %}
11489
11490
11491 // =======================================================================
11492 // fast clearing of an array
11493 // Small ClearArray non-AVX512.
11494 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11495 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11496 match(Set dummy (ClearArray cnt base));
11497 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11498
11499 format %{ $$template
11500 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11501 $$emit$$"CMP InitArrayShortSize,rcx\n\t"
11502 $$emit$$"JG LARGE\n\t"
11503 $$emit$$"SHL ECX, 1\n\t"
11504 $$emit$$"DEC ECX\n\t"
11505 $$emit$$"JS DONE\t# Zero length\n\t"
11506 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
11507 $$emit$$"DEC ECX\n\t"
11508 $$emit$$"JGE LOOP\n\t"
11509 $$emit$$"JMP DONE\n\t"
11510 $$emit$$"# LARGE:\n\t"
11511 if (UseFastStosb) {
11512 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11513 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11514 } else if (UseXMMForObjInit) {
11515 $$emit$$"MOV RDI,RAX\n\t"
11516 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11517 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11518 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11519 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11520 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11521 $$emit$$"ADD 0x40,RAX\n\t"
11522 $$emit$$"# L_zero_64_bytes:\n\t"
11523 $$emit$$"SUB 0x8,RCX\n\t"
11524 $$emit$$"JGE L_loop\n\t"
11525 $$emit$$"ADD 0x4,RCX\n\t"
11526 $$emit$$"JL L_tail\n\t"
11527 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11528 $$emit$$"ADD 0x20,RAX\n\t"
11529 $$emit$$"SUB 0x4,RCX\n\t"
11530 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11531 $$emit$$"ADD 0x4,RCX\n\t"
11532 $$emit$$"JLE L_end\n\t"
11533 $$emit$$"DEC RCX\n\t"
11534 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11535 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11536 $$emit$$"ADD 0x8,RAX\n\t"
11537 $$emit$$"DEC RCX\n\t"
11538 $$emit$$"JGE L_sloop\n\t"
11539 $$emit$$"# L_end:\n\t"
11540 } else {
11541 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11542 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11543 }
11544 $$emit$$"# DONE"
11545 %}
11546 ins_encode %{
11547 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11548 $tmp$$XMMRegister, false, knoreg);
11549 %}
11550 ins_pipe( pipe_slow );
11551 %}
11552
11553 // Small ClearArray AVX512 non-constant length.
11554 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11555 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11556 match(Set dummy (ClearArray cnt base));
11557 ins_cost(125);
11558 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11559
11560 format %{ $$template
11561 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11562 $$emit$$"CMP InitArrayShortSize,rcx\n\t"
11563 $$emit$$"JG LARGE\n\t"
11564 $$emit$$"SHL ECX, 1\n\t"
11565 $$emit$$"DEC ECX\n\t"
11566 $$emit$$"JS DONE\t# Zero length\n\t"
11567 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
11568 $$emit$$"DEC ECX\n\t"
11569 $$emit$$"JGE LOOP\n\t"
11570 $$emit$$"JMP DONE\n\t"
11571 $$emit$$"# LARGE:\n\t"
11572 if (UseFastStosb) {
11573 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11574 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11575 } else if (UseXMMForObjInit) {
11576 $$emit$$"MOV RDI,RAX\n\t"
11577 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11578 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11579 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11580 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11581 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11582 $$emit$$"ADD 0x40,RAX\n\t"
11583 $$emit$$"# L_zero_64_bytes:\n\t"
11584 $$emit$$"SUB 0x8,RCX\n\t"
11585 $$emit$$"JGE L_loop\n\t"
11586 $$emit$$"ADD 0x4,RCX\n\t"
11587 $$emit$$"JL L_tail\n\t"
11588 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11589 $$emit$$"ADD 0x20,RAX\n\t"
11590 $$emit$$"SUB 0x4,RCX\n\t"
11591 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11592 $$emit$$"ADD 0x4,RCX\n\t"
11593 $$emit$$"JLE L_end\n\t"
11594 $$emit$$"DEC RCX\n\t"
11595 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11596 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11597 $$emit$$"ADD 0x8,RAX\n\t"
11598 $$emit$$"DEC RCX\n\t"
11599 $$emit$$"JGE L_sloop\n\t"
11600 $$emit$$"# L_end:\n\t"
11601 } else {
11602 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11603 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11604 }
11605 $$emit$$"# DONE"
11606 %}
11607 ins_encode %{
11608 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11609 $tmp$$XMMRegister, false, $ktmp$$KRegister);
11610 %}
11611 ins_pipe( pipe_slow );
11612 %}
11613
11614 // Large ClearArray non-AVX512.
11615 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11616 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11617 match(Set dummy (ClearArray cnt base));
11618 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11619 format %{ $$template
11620 if (UseFastStosb) {
11621 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11622 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11623 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11624 } else if (UseXMMForObjInit) {
11625 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
11626 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11627 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11628 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11629 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11630 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11631 $$emit$$"ADD 0x40,RAX\n\t"
11632 $$emit$$"# L_zero_64_bytes:\n\t"
11633 $$emit$$"SUB 0x8,RCX\n\t"
11634 $$emit$$"JGE L_loop\n\t"
11635 $$emit$$"ADD 0x4,RCX\n\t"
11636 $$emit$$"JL L_tail\n\t"
11637 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11638 $$emit$$"ADD 0x20,RAX\n\t"
11639 $$emit$$"SUB 0x4,RCX\n\t"
11640 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11641 $$emit$$"ADD 0x4,RCX\n\t"
11642 $$emit$$"JLE L_end\n\t"
11643 $$emit$$"DEC RCX\n\t"
11644 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11645 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11646 $$emit$$"ADD 0x8,RAX\n\t"
11647 $$emit$$"DEC RCX\n\t"
11648 $$emit$$"JGE L_sloop\n\t"
11649 $$emit$$"# L_end:\n\t"
11650 } else {
11651 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11652 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11653 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11654 }
11655 $$emit$$"# DONE"
11656 %}
11657 ins_encode %{
11658 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11659 $tmp$$XMMRegister, true, knoreg);
11660 %}
11661 ins_pipe( pipe_slow );
11662 %}
11663
11664 // Large ClearArray AVX512.
11665 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11666 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11667 match(Set dummy (ClearArray cnt base));
11668 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11669 format %{ $$template
11670 if (UseFastStosb) {
11671 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11672 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11673 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11674 } else if (UseXMMForObjInit) {
11675 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
11676 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11677 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11678 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11679 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11680 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11681 $$emit$$"ADD 0x40,RAX\n\t"
11682 $$emit$$"# L_zero_64_bytes:\n\t"
11683 $$emit$$"SUB 0x8,RCX\n\t"
11684 $$emit$$"JGE L_loop\n\t"
11685 $$emit$$"ADD 0x4,RCX\n\t"
11686 $$emit$$"JL L_tail\n\t"
11687 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11688 $$emit$$"ADD 0x20,RAX\n\t"
11689 $$emit$$"SUB 0x4,RCX\n\t"
11690 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11691 $$emit$$"ADD 0x4,RCX\n\t"
11692 $$emit$$"JLE L_end\n\t"
11693 $$emit$$"DEC RCX\n\t"
11694 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11695 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11696 $$emit$$"ADD 0x8,RAX\n\t"
11697 $$emit$$"DEC RCX\n\t"
11698 $$emit$$"JGE L_sloop\n\t"
11699 $$emit$$"# L_end:\n\t"
11700 } else {
11701 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11702 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11703 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11704 }
11705 $$emit$$"# DONE"
11706 %}
11707 ins_encode %{
11708 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11709 $tmp$$XMMRegister, true, $ktmp$$KRegister);
11710 %}
11711 ins_pipe( pipe_slow );
11712 %}
11713
11714 // Small ClearArray AVX512 constant length.
11715 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11716 %{
11717 predicate(!((ClearArrayNode*)n)->is_large() &&
11718 ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11719 match(Set dummy (ClearArray cnt base));
11720 ins_cost(100);
11721 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11722 format %{ "clear_mem_imm $base , $cnt \n\t" %}
11723 ins_encode %{
11724 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11725 %}
11726 ins_pipe(pipe_slow);
11727 %}
11728
11729 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11730 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11731 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11732 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11733 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11734
11735 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11736 ins_encode %{
11737 __ string_compare($str1$$Register, $str2$$Register,
11738 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11739 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11740 %}
11741 ins_pipe( pipe_slow );
11742 %}
11743
11744 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11745 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11746 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11747 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11748 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11749
11750 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11751 ins_encode %{
11752 __ string_compare($str1$$Register, $str2$$Register,
11753 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11754 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11755 %}
11756 ins_pipe( pipe_slow );
11757 %}
11758
11759 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11760 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11761 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11762 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11763 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11764
11765 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11766 ins_encode %{
11767 __ string_compare($str1$$Register, $str2$$Register,
11768 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11769 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11770 %}
11771 ins_pipe( pipe_slow );
11772 %}
11773
11774 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11775 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11776 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11777 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11778 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11779
11780 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11781 ins_encode %{
11782 __ string_compare($str1$$Register, $str2$$Register,
11783 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11784 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11785 %}
11786 ins_pipe( pipe_slow );
11787 %}
11788
11789 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11790 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11791 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11792 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11793 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11794
11795 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11796 ins_encode %{
11797 __ string_compare($str1$$Register, $str2$$Register,
11798 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11799 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11800 %}
11801 ins_pipe( pipe_slow );
11802 %}
11803
11804 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11805 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11806 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11807 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11808 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11809
11810 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11811 ins_encode %{
11812 __ string_compare($str1$$Register, $str2$$Register,
11813 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11814 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11815 %}
11816 ins_pipe( pipe_slow );
11817 %}
11818
11819 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11820 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11821 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11822 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11823 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11824
11825 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11826 ins_encode %{
11827 __ string_compare($str2$$Register, $str1$$Register,
11828 $cnt2$$Register, $cnt1$$Register, $result$$Register,
11829 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11830 %}
11831 ins_pipe( pipe_slow );
11832 %}
11833
11834 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11835 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11836 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11837 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11838 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11839
11840 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11841 ins_encode %{
11842 __ string_compare($str2$$Register, $str1$$Register,
11843 $cnt2$$Register, $cnt1$$Register, $result$$Register,
11844 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11845 %}
11846 ins_pipe( pipe_slow );
11847 %}
11848
11849 // fast string equals
11850 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11851 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11852 predicate(!VM_Version::supports_avx512vlbw());
11853 match(Set result (StrEquals (Binary str1 str2) cnt));
11854 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11855
11856 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
11857 ins_encode %{
11858 __ arrays_equals(false, $str1$$Register, $str2$$Register,
11859 $cnt$$Register, $result$$Register, $tmp3$$Register,
11860 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11861 %}
11862
11863 ins_pipe( pipe_slow );
11864 %}
11865
11866 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11867 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11868 predicate(VM_Version::supports_avx512vlbw());
11869 match(Set result (StrEquals (Binary str1 str2) cnt));
11870 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11871
11872 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
11873 ins_encode %{
11874 __ arrays_equals(false, $str1$$Register, $str2$$Register,
11875 $cnt$$Register, $result$$Register, $tmp3$$Register,
11876 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11877 %}
11878
11879 ins_pipe( pipe_slow );
11880 %}
11881
11882
11883 // fast search of substring with known size.
11884 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11885 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11886 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11887 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11888 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11889
11890 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11891 ins_encode %{
11892 int icnt2 = (int)$int_cnt2$$constant;
11893 if (icnt2 >= 16) {
11894 // IndexOf for constant substrings with size >= 16 elements
11895 // which don't need to be loaded through stack.
11896 __ string_indexofC8($str1$$Register, $str2$$Register,
11897 $cnt1$$Register, $cnt2$$Register,
11898 icnt2, $result$$Register,
11899 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11900 } else {
11901 // Small strings are loaded through stack if they cross page boundary.
11902 __ string_indexof($str1$$Register, $str2$$Register,
11903 $cnt1$$Register, $cnt2$$Register,
11904 icnt2, $result$$Register,
11905 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11906 }
11907 %}
11908 ins_pipe( pipe_slow );
11909 %}
11910
11911 // fast search of substring with known size.
11912 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11913 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11914 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11915 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11916 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11917
11918 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11919 ins_encode %{
11920 int icnt2 = (int)$int_cnt2$$constant;
11921 if (icnt2 >= 8) {
11922 // IndexOf for constant substrings with size >= 8 elements
11923 // which don't need to be loaded through stack.
11924 __ string_indexofC8($str1$$Register, $str2$$Register,
11925 $cnt1$$Register, $cnt2$$Register,
11926 icnt2, $result$$Register,
11927 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11928 } else {
11929 // Small strings are loaded through stack if they cross page boundary.
11930 __ string_indexof($str1$$Register, $str2$$Register,
11931 $cnt1$$Register, $cnt2$$Register,
11932 icnt2, $result$$Register,
11933 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11934 }
11935 %}
11936 ins_pipe( pipe_slow );
11937 %}
11938
11939 // fast search of substring with known size.
11940 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11941 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11942 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11943 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11944 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11945
11946 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11947 ins_encode %{
11948 int icnt2 = (int)$int_cnt2$$constant;
11949 if (icnt2 >= 8) {
11950 // IndexOf for constant substrings with size >= 8 elements
11951 // which don't need to be loaded through stack.
11952 __ string_indexofC8($str1$$Register, $str2$$Register,
11953 $cnt1$$Register, $cnt2$$Register,
11954 icnt2, $result$$Register,
11955 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11956 } else {
11957 // Small strings are loaded through stack if they cross page boundary.
11958 __ string_indexof($str1$$Register, $str2$$Register,
11959 $cnt1$$Register, $cnt2$$Register,
11960 icnt2, $result$$Register,
11961 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11962 }
11963 %}
11964 ins_pipe( pipe_slow );
11965 %}
11966
11967 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11968 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11969 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11970 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11971 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11972
11973 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
11974 ins_encode %{
11975 __ string_indexof($str1$$Register, $str2$$Register,
11976 $cnt1$$Register, $cnt2$$Register,
11977 (-1), $result$$Register,
11978 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11979 %}
11980 ins_pipe( pipe_slow );
11981 %}
11982
11983 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11984 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11985 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11986 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11987 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11988
11989 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
11990 ins_encode %{
11991 __ string_indexof($str1$$Register, $str2$$Register,
11992 $cnt1$$Register, $cnt2$$Register,
11993 (-1), $result$$Register,
11994 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11995 %}
11996 ins_pipe( pipe_slow );
11997 %}
11998
11999 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12000 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12001 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12002 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12003 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12004
12005 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
12006 ins_encode %{
12007 __ string_indexof($str1$$Register, $str2$$Register,
12008 $cnt1$$Register, $cnt2$$Register,
12009 (-1), $result$$Register,
12010 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12011 %}
12012 ins_pipe( pipe_slow );
12013 %}
12014
12015 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12016 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12017 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12018 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12019 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12020 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
12021 ins_encode %{
12022 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12023 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12024 %}
12025 ins_pipe( pipe_slow );
12026 %}
12027
12028 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12029 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12030 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12031 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12032 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12033 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
12034 ins_encode %{
12035 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12036 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12037 %}
12038 ins_pipe( pipe_slow );
12039 %}
12040
12041
12042 // fast array equals
12043 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12044 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12045 %{
12046 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12047 match(Set result (AryEq ary1 ary2));
12048 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12049 //ins_cost(300);
12050
12051 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12052 ins_encode %{
12053 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12054 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12055 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12056 %}
12057 ins_pipe( pipe_slow );
12058 %}
12059
12060 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12061 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12062 %{
12063 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12064 match(Set result (AryEq ary1 ary2));
12065 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12066 //ins_cost(300);
12067
12068 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12069 ins_encode %{
12070 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12071 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12072 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12073 %}
12074 ins_pipe( pipe_slow );
12075 %}
12076
12077 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12078 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12079 %{
12080 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12081 match(Set result (AryEq ary1 ary2));
12082 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12083 //ins_cost(300);
12084
12085 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12086 ins_encode %{
12087 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12088 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12089 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12090 %}
12091 ins_pipe( pipe_slow );
12092 %}
12093
12094 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12095 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12096 %{
12097 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12098 match(Set result (AryEq ary1 ary2));
12099 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12100 //ins_cost(300);
12101
12102 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12103 ins_encode %{
12104 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12105 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12106 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12107 %}
12108 ins_pipe( pipe_slow );
12109 %}
12110
12111 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12112 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12113 %{
12114 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12115 match(Set result (HasNegatives ary1 len));
12116 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12117
12118 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12119 ins_encode %{
12120 __ has_negatives($ary1$$Register, $len$$Register,
12121 $result$$Register, $tmp3$$Register,
12122 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12123 %}
12124 ins_pipe( pipe_slow );
12125 %}
12126
12127 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12128 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12129 %{
12130 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12131 match(Set result (HasNegatives ary1 len));
12132 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12133
12134 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12135 ins_encode %{
12136 __ has_negatives($ary1$$Register, $len$$Register,
12137 $result$$Register, $tmp3$$Register,
12138 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12139 %}
12140 ins_pipe( pipe_slow );
12141 %}
12142
12143
12144 // fast char[] to byte[] compression
12145 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12146 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12147 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12148 match(Set result (StrCompressedCopy src (Binary dst len)));
12149 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12150
12151 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
12152 ins_encode %{
12153 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12154 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12155 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12156 knoreg, knoreg);
12157 %}
12158 ins_pipe( pipe_slow );
12159 %}
12160
12161 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12162 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12163 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12164 match(Set result (StrCompressedCopy src (Binary dst len)));
12165 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12166
12167 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
12168 ins_encode %{
12169 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12170 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12171 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12172 $ktmp1$$KRegister, $ktmp2$$KRegister);
12173 %}
12174 ins_pipe( pipe_slow );
12175 %}
12176
12177 // fast byte[] to char[] inflation
12178 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12179 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12180 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12181 match(Set dummy (StrInflatedCopy src (Binary dst len)));
12182 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12183
12184 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
12185 ins_encode %{
12186 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12187 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12188 %}
12189 ins_pipe( pipe_slow );
12190 %}
12191
12192 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12193 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12194 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12195 match(Set dummy (StrInflatedCopy src (Binary dst len)));
12196 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12197
12198 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
12199 ins_encode %{
12200 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12201 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12202 %}
12203 ins_pipe( pipe_slow );
12204 %}
12205
12206 // encode char[] to byte[] in ISO_8859_1
12207 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12208 regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12209 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12210 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12211 match(Set result (EncodeISOArray src (Binary dst len)));
12212 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12213
12214 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12215 ins_encode %{
12216 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12217 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12218 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12219 %}
12220 ins_pipe( pipe_slow );
12221 %}
12222
12223 // encode char[] to byte[] in ASCII
12224 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12225 regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12226 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12227 predicate(((EncodeISOArrayNode*)n)->is_ascii());
12228 match(Set result (EncodeISOArray src (Binary dst len)));
12229 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12230
12231 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12232 ins_encode %{
12233 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12234 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12235 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12236 %}
12237 ins_pipe( pipe_slow );
12238 %}
12239
12240 //----------Control Flow Instructions------------------------------------------
12241 // Signed compare Instructions
12242 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12243 match(Set cr (CmpI op1 op2));
12244 effect( DEF cr, USE op1, USE op2 );
12245 format %{ "CMP $op1,$op2" %}
12246 opcode(0x3B); /* Opcode 3B /r */
12247 ins_encode( OpcP, RegReg( op1, op2) );
12248 ins_pipe( ialu_cr_reg_reg );
12249 %}
12250
12251 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12252 match(Set cr (CmpI op1 op2));
12253 effect( DEF cr, USE op1 );
12254 format %{ "CMP $op1,$op2" %}
12255 opcode(0x81,0x07); /* Opcode 81 /7 */
12256 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */
12257 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12258 ins_pipe( ialu_cr_reg_imm );
12259 %}
12260
12261 // Cisc-spilled version of cmpI_eReg
12262 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12263 match(Set cr (CmpI op1 (LoadI op2)));
12264
12265 format %{ "CMP $op1,$op2" %}
12266 ins_cost(500);
12267 opcode(0x3B); /* Opcode 3B /r */
12268 ins_encode( OpcP, RegMem( op1, op2) );
12269 ins_pipe( ialu_cr_reg_mem );
12270 %}
12271
12272 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12273 match(Set cr (CmpI src zero));
12274 effect( DEF cr, USE src );
12275
12276 format %{ "TEST $src,$src" %}
12277 opcode(0x85);
12278 ins_encode( OpcP, RegReg( src, src ) );
12279 ins_pipe( ialu_cr_reg_imm );
12280 %}
12281
12282 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12283 match(Set cr (CmpI (AndI src con) zero));
12284
12285 format %{ "TEST $src,$con" %}
12286 opcode(0xF7,0x00);
12287 ins_encode( OpcP, RegOpc(src), Con32(con) );
12288 ins_pipe( ialu_cr_reg_imm );
12289 %}
12290
12291 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12292 match(Set cr (CmpI (AndI src mem) zero));
12293
12294 format %{ "TEST $src,$mem" %}
12295 opcode(0x85);
12296 ins_encode( OpcP, RegMem( src, mem ) );
12297 ins_pipe( ialu_cr_reg_mem );
12298 %}
12299
12300 // Unsigned compare Instructions; really, same as signed except they
12301 // produce an eFlagsRegU instead of eFlagsReg.
12302 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12303 match(Set cr (CmpU op1 op2));
12304
12305 format %{ "CMPu $op1,$op2" %}
12306 opcode(0x3B); /* Opcode 3B /r */
12307 ins_encode( OpcP, RegReg( op1, op2) );
12308 ins_pipe( ialu_cr_reg_reg );
12309 %}
12310
12311 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12312 match(Set cr (CmpU op1 op2));
12313
12314 format %{ "CMPu $op1,$op2" %}
12315 opcode(0x81,0x07); /* Opcode 81 /7 */
12316 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12317 ins_pipe( ialu_cr_reg_imm );
12318 %}
12319
12320 // // Cisc-spilled version of cmpU_eReg
12321 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12322 match(Set cr (CmpU op1 (LoadI op2)));
12323
12324 format %{ "CMPu $op1,$op2" %}
12325 ins_cost(500);
12326 opcode(0x3B); /* Opcode 3B /r */
12327 ins_encode( OpcP, RegMem( op1, op2) );
12328 ins_pipe( ialu_cr_reg_mem );
12329 %}
12330
12331 // // Cisc-spilled version of cmpU_eReg
12332 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12333 // match(Set cr (CmpU (LoadI op1) op2));
12334 //
12335 // format %{ "CMPu $op1,$op2" %}
12336 // ins_cost(500);
12337 // opcode(0x39); /* Opcode 39 /r */
12338 // ins_encode( OpcP, RegMem( op1, op2) );
12339 //%}
12340
12341 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12342 match(Set cr (CmpU src zero));
12343
12344 format %{ "TESTu $src,$src" %}
12345 opcode(0x85);
12346 ins_encode( OpcP, RegReg( src, src ) );
12347 ins_pipe( ialu_cr_reg_imm );
12348 %}
12349
12350 // Unsigned pointer compare Instructions
12351 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12352 match(Set cr (CmpP op1 op2));
12353
12354 format %{ "CMPu $op1,$op2" %}
12355 opcode(0x3B); /* Opcode 3B /r */
12356 ins_encode( OpcP, RegReg( op1, op2) );
12357 ins_pipe( ialu_cr_reg_reg );
12358 %}
12359
12360 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12361 match(Set cr (CmpP op1 op2));
12362
12363 format %{ "CMPu $op1,$op2" %}
12364 opcode(0x81,0x07); /* Opcode 81 /7 */
12365 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12366 ins_pipe( ialu_cr_reg_imm );
12367 %}
12368
12369 // // Cisc-spilled version of cmpP_eReg
12370 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12371 match(Set cr (CmpP op1 (LoadP op2)));
12372
12373 format %{ "CMPu $op1,$op2" %}
12374 ins_cost(500);
12375 opcode(0x3B); /* Opcode 3B /r */
12376 ins_encode( OpcP, RegMem( op1, op2) );
12377 ins_pipe( ialu_cr_reg_mem );
12378 %}
12379
12380 // // Cisc-spilled version of cmpP_eReg
12381 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12382 // match(Set cr (CmpP (LoadP op1) op2));
12383 //
12384 // format %{ "CMPu $op1,$op2" %}
12385 // ins_cost(500);
12386 // opcode(0x39); /* Opcode 39 /r */
12387 // ins_encode( OpcP, RegMem( op1, op2) );
12388 //%}
12389
12390 // Compare raw pointer (used in out-of-heap check).
12391 // Only works because non-oop pointers must be raw pointers
12392 // and raw pointers have no anti-dependencies.
12393 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12394 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12395 match(Set cr (CmpP op1 (LoadP op2)));
12396
12397 format %{ "CMPu $op1,$op2" %}
12398 opcode(0x3B); /* Opcode 3B /r */
12399 ins_encode( OpcP, RegMem( op1, op2) );
12400 ins_pipe( ialu_cr_reg_mem );
12401 %}
12402
12403 //
12404 // This will generate a signed flags result. This should be ok
12405 // since any compare to a zero should be eq/neq.
12406 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12407 match(Set cr (CmpP src zero));
12408
12409 format %{ "TEST $src,$src" %}
12410 opcode(0x85);
12411 ins_encode( OpcP, RegReg( src, src ) );
12412 ins_pipe( ialu_cr_reg_imm );
12413 %}
12414
12415 // Cisc-spilled version of testP_reg
12416 // This will generate a signed flags result. This should be ok
12417 // since any compare to a zero should be eq/neq.
12418 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12419 match(Set cr (CmpP (LoadP op) zero));
12420
12421 format %{ "TEST $op,0xFFFFFFFF" %}
12422 ins_cost(500);
12423 opcode(0xF7); /* Opcode F7 /0 */
12424 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12425 ins_pipe( ialu_cr_reg_imm );
12426 %}
12427
12428 // Yanked all unsigned pointer compare operations.
12429 // Pointer compares are done with CmpP which is already unsigned.
12430
12431 //----------Max and Min--------------------------------------------------------
12432 // Min Instructions
12433 ////
12434 // *** Min and Max using the conditional move are slower than the
12435 // *** branch version on a Pentium III.
12436 // // Conditional move for min
12437 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12438 // effect( USE_DEF op2, USE op1, USE cr );
12439 // format %{ "CMOVlt $op2,$op1\t! min" %}
12440 // opcode(0x4C,0x0F);
12441 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12442 // ins_pipe( pipe_cmov_reg );
12443 //%}
12444 //
12445 //// Min Register with Register (P6 version)
12446 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12447 // predicate(VM_Version::supports_cmov() );
12448 // match(Set op2 (MinI op1 op2));
12449 // ins_cost(200);
12450 // expand %{
12451 // eFlagsReg cr;
12452 // compI_eReg(cr,op1,op2);
12453 // cmovI_reg_lt(op2,op1,cr);
12454 // %}
12455 //%}
12456
12457 // Min Register with Register (generic version)
12458 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12459 match(Set dst (MinI dst src));
12460 effect(KILL flags);
12461 ins_cost(300);
12462
12463 format %{ "MIN $dst,$src" %}
12464 opcode(0xCC);
12465 ins_encode( min_enc(dst,src) );
12466 ins_pipe( pipe_slow );
12467 %}
12468
12469 // Max Register with Register
12470 // *** Min and Max using the conditional move are slower than the
12471 // *** branch version on a Pentium III.
12472 // // Conditional move for max
12473 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12474 // effect( USE_DEF op2, USE op1, USE cr );
12475 // format %{ "CMOVgt $op2,$op1\t! max" %}
12476 // opcode(0x4F,0x0F);
12477 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12478 // ins_pipe( pipe_cmov_reg );
12479 //%}
12480 //
12481 // // Max Register with Register (P6 version)
12482 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12483 // predicate(VM_Version::supports_cmov() );
12484 // match(Set op2 (MaxI op1 op2));
12485 // ins_cost(200);
12486 // expand %{
12487 // eFlagsReg cr;
12488 // compI_eReg(cr,op1,op2);
12489 // cmovI_reg_gt(op2,op1,cr);
12490 // %}
12491 //%}
12492
12493 // Max Register with Register (generic version)
12494 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12495 match(Set dst (MaxI dst src));
12496 effect(KILL flags);
12497 ins_cost(300);
12498
12499 format %{ "MAX $dst,$src" %}
12500 opcode(0xCC);
12501 ins_encode( max_enc(dst,src) );
12502 ins_pipe( pipe_slow );
12503 %}
12504
12505 // ============================================================================
12506 // Counted Loop limit node which represents exact final iterator value.
12507 // Note: the resulting value should fit into integer range since
12508 // counted loops have limit check on overflow.
12509 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12510 match(Set limit (LoopLimit (Binary init limit) stride));
12511 effect(TEMP limit_hi, TEMP tmp, KILL flags);
12512 ins_cost(300);
12513
12514 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12515 ins_encode %{
12516 int strd = (int)$stride$$constant;
12517 assert(strd != 1 && strd != -1, "sanity");
12518 int m1 = (strd > 0) ? 1 : -1;
12519 // Convert limit to long (EAX:EDX)
12520 __ cdql();
12521 // Convert init to long (init:tmp)
12522 __ movl($tmp$$Register, $init$$Register);
12523 __ sarl($tmp$$Register, 31);
12524 // $limit - $init
12525 __ subl($limit$$Register, $init$$Register);
12526 __ sbbl($limit_hi$$Register, $tmp$$Register);
12527 // + ($stride - 1)
12528 if (strd > 0) {
12529 __ addl($limit$$Register, (strd - 1));
12530 __ adcl($limit_hi$$Register, 0);
12531 __ movl($tmp$$Register, strd);
12532 } else {
12533 __ addl($limit$$Register, (strd + 1));
12534 __ adcl($limit_hi$$Register, -1);
12535 __ lneg($limit_hi$$Register, $limit$$Register);
12536 __ movl($tmp$$Register, -strd);
12537 }
12538 // signed devision: (EAX:EDX) / pos_stride
12539 __ idivl($tmp$$Register);
12540 if (strd < 0) {
12541 // restore sign
12542 __ negl($tmp$$Register);
12543 }
12544 // (EAX) * stride
12545 __ mull($tmp$$Register);
12546 // + init (ignore upper bits)
12547 __ addl($limit$$Register, $init$$Register);
12548 %}
12549 ins_pipe( pipe_slow );
12550 %}
12551
12552 // ============================================================================
12553 // Branch Instructions
12554 // Jump Table
12555 instruct jumpXtnd(rRegI switch_val) %{
12556 match(Jump switch_val);
12557 ins_cost(350);
12558 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %}
12559 ins_encode %{
12560 // Jump to Address(table_base + switch_reg)
12561 Address index(noreg, $switch_val$$Register, Address::times_1);
12562 __ jump(ArrayAddress($constantaddress, index));
12563 %}
12564 ins_pipe(pipe_jmp);
12565 %}
12566
12567 // Jump Direct - Label defines a relative address from JMP+1
12568 instruct jmpDir(label labl) %{
12569 match(Goto);
12570 effect(USE labl);
12571
12572 ins_cost(300);
12573 format %{ "JMP $labl" %}
12574 size(5);
12575 ins_encode %{
12576 Label* L = $labl$$label;
12577 __ jmp(*L, false); // Always long jump
12578 %}
12579 ins_pipe( pipe_jmp );
12580 %}
12581
12582 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12583 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12584 match(If cop cr);
12585 effect(USE labl);
12586
12587 ins_cost(300);
12588 format %{ "J$cop $labl" %}
12589 size(6);
12590 ins_encode %{
12591 Label* L = $labl$$label;
12592 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12593 %}
12594 ins_pipe( pipe_jcc );
12595 %}
12596
12597 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12598 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12599 predicate(!n->has_vector_mask_set());
12600 match(CountedLoopEnd cop cr);
12601 effect(USE labl);
12602
12603 ins_cost(300);
12604 format %{ "J$cop $labl\t# Loop end" %}
12605 size(6);
12606 ins_encode %{
12607 Label* L = $labl$$label;
12608 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12609 %}
12610 ins_pipe( pipe_jcc );
12611 %}
12612
12613 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12614 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12615 predicate(!n->has_vector_mask_set());
12616 match(CountedLoopEnd cop cmp);
12617 effect(USE labl);
12618
12619 ins_cost(300);
12620 format %{ "J$cop,u $labl\t# Loop end" %}
12621 size(6);
12622 ins_encode %{
12623 Label* L = $labl$$label;
12624 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12625 %}
12626 ins_pipe( pipe_jcc );
12627 %}
12628
12629 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12630 predicate(!n->has_vector_mask_set());
12631 match(CountedLoopEnd cop cmp);
12632 effect(USE labl);
12633
12634 ins_cost(200);
12635 format %{ "J$cop,u $labl\t# Loop end" %}
12636 size(6);
12637 ins_encode %{
12638 Label* L = $labl$$label;
12639 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12640 %}
12641 ins_pipe( pipe_jcc );
12642 %}
12643
12644 // mask version
12645 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12646 // Bounded mask operand used in following patten is needed for
12647 // post-loop multiversioning.
12648 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
12649 predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12650 match(CountedLoopEnd cop cr);
12651 effect(USE labl, TEMP ktmp);
12652
12653 ins_cost(400);
12654 format %{ "J$cop $labl\t# Loop end\n\t"
12655 "restorevectmask \t# vector mask restore for loops" %}
12656 size(10);
12657 ins_encode %{
12658 Label* L = $labl$$label;
12659 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12660 __ restorevectmask($ktmp$$KRegister);
12661 %}
12662 ins_pipe( pipe_jcc );
12663 %}
12664
12665 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12666 // Bounded mask operand used in following patten is needed for
12667 // post-loop multiversioning.
12668 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
12669 predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12670 match(CountedLoopEnd cop cmp);
12671 effect(USE labl, TEMP ktmp);
12672
12673 ins_cost(400);
12674 format %{ "J$cop,u $labl\t# Loop end\n\t"
12675 "restorevectmask \t# vector mask restore for loops" %}
12676 size(10);
12677 ins_encode %{
12678 Label* L = $labl$$label;
12679 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12680 __ restorevectmask($ktmp$$KRegister);
12681 %}
12682 ins_pipe( pipe_jcc );
12683 %}
12684
12685 // Bounded mask operand used in following patten is needed for
12686 // post-loop multiversioning.
12687 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
12688 predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12689 match(CountedLoopEnd cop cmp);
12690 effect(USE labl, TEMP ktmp);
12691
12692 ins_cost(300);
12693 format %{ "J$cop,u $labl\t# Loop end\n\t"
12694 "restorevectmask \t# vector mask restore for loops" %}
12695 size(10);
12696 ins_encode %{
12697 Label* L = $labl$$label;
12698 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12699 __ restorevectmask($ktmp$$KRegister);
12700 %}
12701 ins_pipe( pipe_jcc );
12702 %}
12703
12704 // Jump Direct Conditional - using unsigned comparison
12705 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12706 match(If cop cmp);
12707 effect(USE labl);
12708
12709 ins_cost(300);
12710 format %{ "J$cop,u $labl" %}
12711 size(6);
12712 ins_encode %{
12713 Label* L = $labl$$label;
12714 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12715 %}
12716 ins_pipe(pipe_jcc);
12717 %}
12718
12719 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12720 match(If cop cmp);
12721 effect(USE labl);
12722
12723 ins_cost(200);
12724 format %{ "J$cop,u $labl" %}
12725 size(6);
12726 ins_encode %{
12727 Label* L = $labl$$label;
12728 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12729 %}
12730 ins_pipe(pipe_jcc);
12731 %}
12732
12733 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12734 match(If cop cmp);
12735 effect(USE labl);
12736
12737 ins_cost(200);
12738 format %{ $$template
12739 if ($cop$$cmpcode == Assembler::notEqual) {
12740 $$emit$$"JP,u $labl\n\t"
12741 $$emit$$"J$cop,u $labl"
12742 } else {
12743 $$emit$$"JP,u done\n\t"
12744 $$emit$$"J$cop,u $labl\n\t"
12745 $$emit$$"done:"
12746 }
12747 %}
12748 ins_encode %{
12749 Label* l = $labl$$label;
12750 if ($cop$$cmpcode == Assembler::notEqual) {
12751 __ jcc(Assembler::parity, *l, false);
12752 __ jcc(Assembler::notEqual, *l, false);
12753 } else if ($cop$$cmpcode == Assembler::equal) {
12754 Label done;
12755 __ jccb(Assembler::parity, done);
12756 __ jcc(Assembler::equal, *l, false);
12757 __ bind(done);
12758 } else {
12759 ShouldNotReachHere();
12760 }
12761 %}
12762 ins_pipe(pipe_jcc);
12763 %}
12764
12765 // ============================================================================
12766 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
12767 // array for an instance of the superklass. Set a hidden internal cache on a
12768 // hit (cache is checked with exposed code in gen_subtype_check()). Return
12769 // NZ for a miss or zero for a hit. The encoding ALSO sets flags.
12770 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12771 match(Set result (PartialSubtypeCheck sub super));
12772 effect( KILL rcx, KILL cr );
12773
12774 ins_cost(1100); // slightly larger than the next version
12775 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12776 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12777 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12778 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12779 "JNE,s miss\t\t# Missed: EDI not-zero\n\t"
12780 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12781 "XOR $result,$result\t\t Hit: EDI zero\n\t"
12782 "miss:\t" %}
12783
12784 opcode(0x1); // Force a XOR of EDI
12785 ins_encode( enc_PartialSubtypeCheck() );
12786 ins_pipe( pipe_slow );
12787 %}
12788
12789 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12790 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12791 effect( KILL rcx, KILL result );
12792
12793 ins_cost(1000);
12794 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12795 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12796 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12797 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12798 "JNE,s miss\t\t# Missed: flags NZ\n\t"
12799 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12800 "miss:\t" %}
12801
12802 opcode(0x0); // No need to XOR EDI
12803 ins_encode( enc_PartialSubtypeCheck() );
12804 ins_pipe( pipe_slow );
12805 %}
12806
12807 // ============================================================================
12808 // Branch Instructions -- short offset versions
12809 //
12810 // These instructions are used to replace jumps of a long offset (the default
12811 // match) with jumps of a shorter offset. These instructions are all tagged
12812 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12813 // match rules in general matching. Instead, the ADLC generates a conversion
12814 // method in the MachNode which can be used to do in-place replacement of the
12815 // long variant with the shorter variant. The compiler will determine if a
12816 // branch can be taken by the is_short_branch_offset() predicate in the machine
12817 // specific code section of the file.
12818
12819 // Jump Direct - Label defines a relative address from JMP+1
12820 instruct jmpDir_short(label labl) %{
12821 match(Goto);
12822 effect(USE labl);
12823
12824 ins_cost(300);
12825 format %{ "JMP,s $labl" %}
12826 size(2);
12827 ins_encode %{
12828 Label* L = $labl$$label;
12829 __ jmpb(*L);
12830 %}
12831 ins_pipe( pipe_jmp );
12832 ins_short_branch(1);
12833 %}
12834
12835 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12836 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12837 match(If cop cr);
12838 effect(USE labl);
12839
12840 ins_cost(300);
12841 format %{ "J$cop,s $labl" %}
12842 size(2);
12843 ins_encode %{
12844 Label* L = $labl$$label;
12845 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12846 %}
12847 ins_pipe( pipe_jcc );
12848 ins_short_branch(1);
12849 %}
12850
12851 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12852 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12853 match(CountedLoopEnd cop cr);
12854 effect(USE labl);
12855
12856 ins_cost(300);
12857 format %{ "J$cop,s $labl\t# Loop end" %}
12858 size(2);
12859 ins_encode %{
12860 Label* L = $labl$$label;
12861 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12862 %}
12863 ins_pipe( pipe_jcc );
12864 ins_short_branch(1);
12865 %}
12866
12867 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12868 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12869 match(CountedLoopEnd cop cmp);
12870 effect(USE labl);
12871
12872 ins_cost(300);
12873 format %{ "J$cop,us $labl\t# Loop end" %}
12874 size(2);
12875 ins_encode %{
12876 Label* L = $labl$$label;
12877 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12878 %}
12879 ins_pipe( pipe_jcc );
12880 ins_short_branch(1);
12881 %}
12882
12883 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12884 match(CountedLoopEnd cop cmp);
12885 effect(USE labl);
12886
12887 ins_cost(300);
12888 format %{ "J$cop,us $labl\t# Loop end" %}
12889 size(2);
12890 ins_encode %{
12891 Label* L = $labl$$label;
12892 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12893 %}
12894 ins_pipe( pipe_jcc );
12895 ins_short_branch(1);
12896 %}
12897
12898 // Jump Direct Conditional - using unsigned comparison
12899 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12900 match(If cop cmp);
12901 effect(USE labl);
12902
12903 ins_cost(300);
12904 format %{ "J$cop,us $labl" %}
12905 size(2);
12906 ins_encode %{
12907 Label* L = $labl$$label;
12908 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12909 %}
12910 ins_pipe( pipe_jcc );
12911 ins_short_branch(1);
12912 %}
12913
12914 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12915 match(If cop cmp);
12916 effect(USE labl);
12917
12918 ins_cost(300);
12919 format %{ "J$cop,us $labl" %}
12920 size(2);
12921 ins_encode %{
12922 Label* L = $labl$$label;
12923 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12924 %}
12925 ins_pipe( pipe_jcc );
12926 ins_short_branch(1);
12927 %}
12928
12929 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12930 match(If cop cmp);
12931 effect(USE labl);
12932
12933 ins_cost(300);
12934 format %{ $$template
12935 if ($cop$$cmpcode == Assembler::notEqual) {
12936 $$emit$$"JP,u,s $labl\n\t"
12937 $$emit$$"J$cop,u,s $labl"
12938 } else {
12939 $$emit$$"JP,u,s done\n\t"
12940 $$emit$$"J$cop,u,s $labl\n\t"
12941 $$emit$$"done:"
12942 }
12943 %}
12944 size(4);
12945 ins_encode %{
12946 Label* l = $labl$$label;
12947 if ($cop$$cmpcode == Assembler::notEqual) {
12948 __ jccb(Assembler::parity, *l);
12949 __ jccb(Assembler::notEqual, *l);
12950 } else if ($cop$$cmpcode == Assembler::equal) {
12951 Label done;
12952 __ jccb(Assembler::parity, done);
12953 __ jccb(Assembler::equal, *l);
12954 __ bind(done);
12955 } else {
12956 ShouldNotReachHere();
12957 }
12958 %}
12959 ins_pipe(pipe_jcc);
12960 ins_short_branch(1);
12961 %}
12962
12963 // ============================================================================
12964 // Long Compare
12965 //
12966 // Currently we hold longs in 2 registers. Comparing such values efficiently
12967 // is tricky. The flavor of compare used depends on whether we are testing
12968 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit.
12969 // The GE test is the negated LT test. The LE test can be had by commuting
12970 // the operands (yielding a GE test) and then negating; negate again for the
12971 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the
12972 // NE test is negated from that.
12973
12974 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12975 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
12976 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections
12977 // are collapsed internally in the ADLC's dfa-gen code. The match for
12978 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12979 // foo match ends up with the wrong leaf. One fix is to not match both
12980 // reg-reg and reg-zero forms of long-compare. This is unfortunate because
12981 // both forms beat the trinary form of long-compare and both are very useful
12982 // on Intel which has so few registers.
12983
12984 // Manifest a CmpL result in an integer register. Very painful.
12985 // This is the test to avoid.
12986 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12987 match(Set dst (CmpL3 src1 src2));
12988 effect( KILL flags );
12989 ins_cost(1000);
12990 format %{ "XOR $dst,$dst\n\t"
12991 "CMP $src1.hi,$src2.hi\n\t"
12992 "JLT,s m_one\n\t"
12993 "JGT,s p_one\n\t"
12994 "CMP $src1.lo,$src2.lo\n\t"
12995 "JB,s m_one\n\t"
12996 "JEQ,s done\n"
12997 "p_one:\tINC $dst\n\t"
12998 "JMP,s done\n"
12999 "m_one:\tDEC $dst\n"
13000 "done:" %}
13001 ins_encode %{
13002 Label p_one, m_one, done;
13003 __ xorptr($dst$$Register, $dst$$Register);
13004 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13005 __ jccb(Assembler::less, m_one);
13006 __ jccb(Assembler::greater, p_one);
13007 __ cmpl($src1$$Register, $src2$$Register);
13008 __ jccb(Assembler::below, m_one);
13009 __ jccb(Assembler::equal, done);
13010 __ bind(p_one);
13011 __ incrementl($dst$$Register);
13012 __ jmpb(done);
13013 __ bind(m_one);
13014 __ decrementl($dst$$Register);
13015 __ bind(done);
13016 %}
13017 ins_pipe( pipe_slow );
13018 %}
13019
13020 //======
13021 // Manifest a CmpL result in the normal flags. Only good for LT or GE
13022 // compares. Can be used for LE or GT compares by reversing arguments.
13023 // NOT GOOD FOR EQ/NE tests.
13024 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13025 match( Set flags (CmpL src zero ));
13026 ins_cost(100);
13027 format %{ "TEST $src.hi,$src.hi" %}
13028 opcode(0x85);
13029 ins_encode( OpcP, RegReg_Hi2( src, src ) );
13030 ins_pipe( ialu_cr_reg_reg );
13031 %}
13032
13033 // Manifest a CmpL result in the normal flags. Only good for LT or GE
13034 // compares. Can be used for LE or GT compares by reversing arguments.
13035 // NOT GOOD FOR EQ/NE tests.
13036 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13037 match( Set flags (CmpL src1 src2 ));
13038 effect( TEMP tmp );
13039 ins_cost(300);
13040 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13041 "MOV $tmp,$src1.hi\n\t"
13042 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %}
13043 ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13044 ins_pipe( ialu_cr_reg_reg );
13045 %}
13046
13047 // Long compares reg < zero/req OR reg >= zero/req.
13048 // Just a wrapper for a normal branch, plus the predicate test.
13049 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13050 match(If cmp flags);
13051 effect(USE labl);
13052 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13053 expand %{
13054 jmpCon(cmp,flags,labl); // JLT or JGE...
13055 %}
13056 %}
13057
13058 //======
13059 // Manifest a CmpUL result in the normal flags. Only good for LT or GE
13060 // compares. Can be used for LE or GT compares by reversing arguments.
13061 // NOT GOOD FOR EQ/NE tests.
13062 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13063 match(Set flags (CmpUL src zero));
13064 ins_cost(100);
13065 format %{ "TEST $src.hi,$src.hi" %}
13066 opcode(0x85);
13067 ins_encode(OpcP, RegReg_Hi2(src, src));
13068 ins_pipe(ialu_cr_reg_reg);
13069 %}
13070
13071 // Manifest a CmpUL result in the normal flags. Only good for LT or GE
13072 // compares. Can be used for LE or GT compares by reversing arguments.
13073 // NOT GOOD FOR EQ/NE tests.
13074 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13075 match(Set flags (CmpUL src1 src2));
13076 effect(TEMP tmp);
13077 ins_cost(300);
13078 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13079 "MOV $tmp,$src1.hi\n\t"
13080 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13081 ins_encode(long_cmp_flags2(src1, src2, tmp));
13082 ins_pipe(ialu_cr_reg_reg);
13083 %}
13084
13085 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13086 // Just a wrapper for a normal branch, plus the predicate test.
13087 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13088 match(If cmp flags);
13089 effect(USE labl);
13090 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13091 expand %{
13092 jmpCon(cmp, flags, labl); // JLT or JGE...
13093 %}
13094 %}
13095
13096 // Compare 2 longs and CMOVE longs.
13097 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13098 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13099 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13100 ins_cost(400);
13101 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13102 "CMOV$cmp $dst.hi,$src.hi" %}
13103 opcode(0x0F,0x40);
13104 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13105 ins_pipe( pipe_cmov_reg_long );
13106 %}
13107
13108 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13109 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13110 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13111 ins_cost(500);
13112 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13113 "CMOV$cmp $dst.hi,$src.hi" %}
13114 opcode(0x0F,0x40);
13115 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13116 ins_pipe( pipe_cmov_reg_long );
13117 %}
13118
13119 // Compare 2 longs and CMOVE ints.
13120 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13121 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13122 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13123 ins_cost(200);
13124 format %{ "CMOV$cmp $dst,$src" %}
13125 opcode(0x0F,0x40);
13126 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13127 ins_pipe( pipe_cmov_reg );
13128 %}
13129
13130 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13131 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13132 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13133 ins_cost(250);
13134 format %{ "CMOV$cmp $dst,$src" %}
13135 opcode(0x0F,0x40);
13136 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13137 ins_pipe( pipe_cmov_mem );
13138 %}
13139
13140 // Compare 2 longs and CMOVE ints.
13141 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13142 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13143 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13144 ins_cost(200);
13145 format %{ "CMOV$cmp $dst,$src" %}
13146 opcode(0x0F,0x40);
13147 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13148 ins_pipe( pipe_cmov_reg );
13149 %}
13150
13151 // Compare 2 longs and CMOVE doubles
13152 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13153 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13154 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13155 ins_cost(200);
13156 expand %{
13157 fcmovDPR_regS(cmp,flags,dst,src);
13158 %}
13159 %}
13160
13161 // Compare 2 longs and CMOVE doubles
13162 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13163 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13164 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13165 ins_cost(200);
13166 expand %{
13167 fcmovD_regS(cmp,flags,dst,src);
13168 %}
13169 %}
13170
13171 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13172 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13173 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13174 ins_cost(200);
13175 expand %{
13176 fcmovFPR_regS(cmp,flags,dst,src);
13177 %}
13178 %}
13179
13180 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13181 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13182 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13183 ins_cost(200);
13184 expand %{
13185 fcmovF_regS(cmp,flags,dst,src);
13186 %}
13187 %}
13188
13189 //======
13190 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13191 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13192 match( Set flags (CmpL src zero ));
13193 effect(TEMP tmp);
13194 ins_cost(200);
13195 format %{ "MOV $tmp,$src.lo\n\t"
13196 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13197 ins_encode( long_cmp_flags0( src, tmp ) );
13198 ins_pipe( ialu_reg_reg_long );
13199 %}
13200
13201 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13202 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13203 match( Set flags (CmpL src1 src2 ));
13204 ins_cost(200+300);
13205 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13206 "JNE,s skip\n\t"
13207 "CMP $src1.hi,$src2.hi\n\t"
13208 "skip:\t" %}
13209 ins_encode( long_cmp_flags1( src1, src2 ) );
13210 ins_pipe( ialu_cr_reg_reg );
13211 %}
13212
13213 // Long compare reg == zero/reg OR reg != zero/reg
13214 // Just a wrapper for a normal branch, plus the predicate test.
13215 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13216 match(If cmp flags);
13217 effect(USE labl);
13218 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13219 expand %{
13220 jmpCon(cmp,flags,labl); // JEQ or JNE...
13221 %}
13222 %}
13223
13224 //======
13225 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares.
13226 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13227 match(Set flags (CmpUL src zero));
13228 effect(TEMP tmp);
13229 ins_cost(200);
13230 format %{ "MOV $tmp,$src.lo\n\t"
13231 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13232 ins_encode(long_cmp_flags0(src, tmp));
13233 ins_pipe(ialu_reg_reg_long);
13234 %}
13235
13236 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares.
13237 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13238 match(Set flags (CmpUL src1 src2));
13239 ins_cost(200+300);
13240 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13241 "JNE,s skip\n\t"
13242 "CMP $src1.hi,$src2.hi\n\t"
13243 "skip:\t" %}
13244 ins_encode(long_cmp_flags1(src1, src2));
13245 ins_pipe(ialu_cr_reg_reg);
13246 %}
13247
13248 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13249 // Just a wrapper for a normal branch, plus the predicate test.
13250 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13251 match(If cmp flags);
13252 effect(USE labl);
13253 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13254 expand %{
13255 jmpCon(cmp, flags, labl); // JEQ or JNE...
13256 %}
13257 %}
13258
13259 // Compare 2 longs and CMOVE longs.
13260 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13261 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13262 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13263 ins_cost(400);
13264 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13265 "CMOV$cmp $dst.hi,$src.hi" %}
13266 opcode(0x0F,0x40);
13267 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13268 ins_pipe( pipe_cmov_reg_long );
13269 %}
13270
13271 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13272 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13273 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13274 ins_cost(500);
13275 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13276 "CMOV$cmp $dst.hi,$src.hi" %}
13277 opcode(0x0F,0x40);
13278 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13279 ins_pipe( pipe_cmov_reg_long );
13280 %}
13281
13282 // Compare 2 longs and CMOVE ints.
13283 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13284 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13285 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13286 ins_cost(200);
13287 format %{ "CMOV$cmp $dst,$src" %}
13288 opcode(0x0F,0x40);
13289 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13290 ins_pipe( pipe_cmov_reg );
13291 %}
13292
13293 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13294 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13295 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13296 ins_cost(250);
13297 format %{ "CMOV$cmp $dst,$src" %}
13298 opcode(0x0F,0x40);
13299 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13300 ins_pipe( pipe_cmov_mem );
13301 %}
13302
13303 // Compare 2 longs and CMOVE ints.
13304 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13305 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13306 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13307 ins_cost(200);
13308 format %{ "CMOV$cmp $dst,$src" %}
13309 opcode(0x0F,0x40);
13310 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13311 ins_pipe( pipe_cmov_reg );
13312 %}
13313
13314 // Compare 2 longs and CMOVE doubles
13315 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13316 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13317 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13318 ins_cost(200);
13319 expand %{
13320 fcmovDPR_regS(cmp,flags,dst,src);
13321 %}
13322 %}
13323
13324 // Compare 2 longs and CMOVE doubles
13325 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13326 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13327 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13328 ins_cost(200);
13329 expand %{
13330 fcmovD_regS(cmp,flags,dst,src);
13331 %}
13332 %}
13333
13334 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13335 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13336 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13337 ins_cost(200);
13338 expand %{
13339 fcmovFPR_regS(cmp,flags,dst,src);
13340 %}
13341 %}
13342
13343 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13344 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13345 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13346 ins_cost(200);
13347 expand %{
13348 fcmovF_regS(cmp,flags,dst,src);
13349 %}
13350 %}
13351
13352 //======
13353 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13354 // Same as cmpL_reg_flags_LEGT except must negate src
13355 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13356 match( Set flags (CmpL src zero ));
13357 effect( TEMP tmp );
13358 ins_cost(300);
13359 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13360 "CMP $tmp,$src.lo\n\t"
13361 "SBB $tmp,$src.hi\n\t" %}
13362 ins_encode( long_cmp_flags3(src, tmp) );
13363 ins_pipe( ialu_reg_reg_long );
13364 %}
13365
13366 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13367 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
13368 // requires a commuted test to get the same result.
13369 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13370 match( Set flags (CmpL src1 src2 ));
13371 effect( TEMP tmp );
13372 ins_cost(300);
13373 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13374 "MOV $tmp,$src2.hi\n\t"
13375 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %}
13376 ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13377 ins_pipe( ialu_cr_reg_reg );
13378 %}
13379
13380 // Long compares reg < zero/req OR reg >= zero/req.
13381 // Just a wrapper for a normal branch, plus the predicate test
13382 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13383 match(If cmp flags);
13384 effect(USE labl);
13385 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13386 ins_cost(300);
13387 expand %{
13388 jmpCon(cmp,flags,labl); // JGT or JLE...
13389 %}
13390 %}
13391
13392 //======
13393 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares.
13394 // Same as cmpUL_reg_flags_LEGT except must negate src
13395 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13396 match(Set flags (CmpUL src zero));
13397 effect(TEMP tmp);
13398 ins_cost(300);
13399 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13400 "CMP $tmp,$src.lo\n\t"
13401 "SBB $tmp,$src.hi\n\t" %}
13402 ins_encode(long_cmp_flags3(src, tmp));
13403 ins_pipe(ialu_reg_reg_long);
13404 %}
13405
13406 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares.
13407 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands
13408 // requires a commuted test to get the same result.
13409 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13410 match(Set flags (CmpUL src1 src2));
13411 effect(TEMP tmp);
13412 ins_cost(300);
13413 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13414 "MOV $tmp,$src2.hi\n\t"
13415 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13416 ins_encode(long_cmp_flags2( src2, src1, tmp));
13417 ins_pipe(ialu_cr_reg_reg);
13418 %}
13419
13420 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13421 // Just a wrapper for a normal branch, plus the predicate test
13422 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13423 match(If cmp flags);
13424 effect(USE labl);
13425 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13426 ins_cost(300);
13427 expand %{
13428 jmpCon(cmp, flags, labl); // JGT or JLE...
13429 %}
13430 %}
13431
13432 // Compare 2 longs and CMOVE longs.
13433 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13434 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13435 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13436 ins_cost(400);
13437 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13438 "CMOV$cmp $dst.hi,$src.hi" %}
13439 opcode(0x0F,0x40);
13440 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13441 ins_pipe( pipe_cmov_reg_long );
13442 %}
13443
13444 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13445 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13446 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13447 ins_cost(500);
13448 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13449 "CMOV$cmp $dst.hi,$src.hi+4" %}
13450 opcode(0x0F,0x40);
13451 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13452 ins_pipe( pipe_cmov_reg_long );
13453 %}
13454
13455 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13456 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13457 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13458 ins_cost(400);
13459 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13460 "CMOV$cmp $dst.hi,$src.hi" %}
13461 opcode(0x0F,0x40);
13462 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13463 ins_pipe( pipe_cmov_reg_long );
13464 %}
13465
13466 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13467 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13468 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13469 ins_cost(500);
13470 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13471 "CMOV$cmp $dst.hi,$src.hi+4" %}
13472 opcode(0x0F,0x40);
13473 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13474 ins_pipe( pipe_cmov_reg_long );
13475 %}
13476
13477 // Compare 2 longs and CMOVE ints.
13478 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13479 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13480 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13481 ins_cost(200);
13482 format %{ "CMOV$cmp $dst,$src" %}
13483 opcode(0x0F,0x40);
13484 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13485 ins_pipe( pipe_cmov_reg );
13486 %}
13487
13488 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13489 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13490 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13491 ins_cost(250);
13492 format %{ "CMOV$cmp $dst,$src" %}
13493 opcode(0x0F,0x40);
13494 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13495 ins_pipe( pipe_cmov_mem );
13496 %}
13497
13498 // Compare 2 longs and CMOVE ptrs.
13499 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13500 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13501 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13502 ins_cost(200);
13503 format %{ "CMOV$cmp $dst,$src" %}
13504 opcode(0x0F,0x40);
13505 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13506 ins_pipe( pipe_cmov_reg );
13507 %}
13508
13509 // Compare 2 longs and CMOVE doubles
13510 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13511 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13512 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13513 ins_cost(200);
13514 expand %{
13515 fcmovDPR_regS(cmp,flags,dst,src);
13516 %}
13517 %}
13518
13519 // Compare 2 longs and CMOVE doubles
13520 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13521 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13522 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13523 ins_cost(200);
13524 expand %{
13525 fcmovD_regS(cmp,flags,dst,src);
13526 %}
13527 %}
13528
13529 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13530 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13531 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13532 ins_cost(200);
13533 expand %{
13534 fcmovFPR_regS(cmp,flags,dst,src);
13535 %}
13536 %}
13537
13538
13539 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13540 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13541 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13542 ins_cost(200);
13543 expand %{
13544 fcmovF_regS(cmp,flags,dst,src);
13545 %}
13546 %}
13547
13548
13549 // ============================================================================
13550 // Procedure Call/Return Instructions
13551 // Call Java Static Instruction
13552 // Note: If this code changes, the corresponding ret_addr_offset() and
13553 // compute_padding() functions will have to be adjusted.
13554 instruct CallStaticJavaDirect(method meth) %{
13555 match(CallStaticJava);
13556 effect(USE meth);
13557
13558 ins_cost(300);
13559 format %{ "CALL,static " %}
13560 opcode(0xE8); /* E8 cd */
13561 ins_encode( pre_call_resets,
13562 Java_Static_Call( meth ),
13563 call_epilog,
13564 post_call_FPU );
13565 ins_pipe( pipe_slow );
13566 ins_alignment(4);
13567 %}
13568
13569 // Call Java Dynamic Instruction
13570 // Note: If this code changes, the corresponding ret_addr_offset() and
13571 // compute_padding() functions will have to be adjusted.
13572 instruct CallDynamicJavaDirect(method meth) %{
13573 match(CallDynamicJava);
13574 effect(USE meth);
13575
13576 ins_cost(300);
13577 format %{ "MOV EAX,(oop)-1\n\t"
13578 "CALL,dynamic" %}
13579 opcode(0xE8); /* E8 cd */
13580 ins_encode( pre_call_resets,
13581 Java_Dynamic_Call( meth ),
13582 call_epilog,
13583 post_call_FPU );
13584 ins_pipe( pipe_slow );
13585 ins_alignment(4);
13586 %}
13587
13588 // Call Runtime Instruction
13589 instruct CallRuntimeDirect(method meth) %{
13590 match(CallRuntime );
13591 effect(USE meth);
13592
13593 ins_cost(300);
13594 format %{ "CALL,runtime " %}
13595 opcode(0xE8); /* E8 cd */
13596 // Use FFREEs to clear entries in float stack
13597 ins_encode( pre_call_resets,
13598 FFree_Float_Stack_All,
13599 Java_To_Runtime( meth ),
13600 post_call_FPU );
13601 ins_pipe( pipe_slow );
13602 %}
13603
13604 // Call runtime without safepoint
13605 instruct CallLeafDirect(method meth) %{
13606 match(CallLeaf);
13607 effect(USE meth);
13608
13609 ins_cost(300);
13610 format %{ "CALL_LEAF,runtime " %}
13611 opcode(0xE8); /* E8 cd */
13612 ins_encode( pre_call_resets,
13613 FFree_Float_Stack_All,
13614 Java_To_Runtime( meth ),
13615 Verify_FPU_For_Leaf, post_call_FPU );
13616 ins_pipe( pipe_slow );
13617 %}
13618
13619 instruct CallLeafNoFPDirect(method meth) %{
13620 match(CallLeafNoFP);
13621 effect(USE meth);
13622
13623 ins_cost(300);
13624 format %{ "CALL_LEAF_NOFP,runtime " %}
13625 opcode(0xE8); /* E8 cd */
13626 ins_encode(pre_call_resets, Java_To_Runtime(meth));
13627 ins_pipe( pipe_slow );
13628 %}
13629
13630
13631 // Return Instruction
13632 // Remove the return address & jump to it.
13633 instruct Ret() %{
13634 match(Return);
13635 format %{ "RET" %}
13636 opcode(0xC3);
13637 ins_encode(OpcP);
13638 ins_pipe( pipe_jmp );
13639 %}
13640
13641 // Tail Call; Jump from runtime stub to Java code.
13642 // Also known as an 'interprocedural jump'.
13643 // Target of jump will eventually return to caller.
13644 // TailJump below removes the return address.
13645 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13646 match(TailCall jump_target method_ptr);
13647 ins_cost(300);
13648 format %{ "JMP $jump_target \t# EBX holds method" %}
13649 opcode(0xFF, 0x4); /* Opcode FF /4 */
13650 ins_encode( OpcP, RegOpc(jump_target) );
13651 ins_pipe( pipe_jmp );
13652 %}
13653
13654
13655 // Tail Jump; remove the return address; jump to target.
13656 // TailCall above leaves the return address around.
13657 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13658 match( TailJump jump_target ex_oop );
13659 ins_cost(300);
13660 format %{ "POP EDX\t# pop return address into dummy\n\t"
13661 "JMP $jump_target " %}
13662 opcode(0xFF, 0x4); /* Opcode FF /4 */
13663 ins_encode( enc_pop_rdx,
13664 OpcP, RegOpc(jump_target) );
13665 ins_pipe( pipe_jmp );
13666 %}
13667
13668 // Create exception oop: created by stack-crawling runtime code.
13669 // Created exception is now available to this handler, and is setup
13670 // just prior to jumping to this handler. No code emitted.
13671 instruct CreateException( eAXRegP ex_oop )
13672 %{
13673 match(Set ex_oop (CreateEx));
13674
13675 size(0);
13676 // use the following format syntax
13677 format %{ "# exception oop is in EAX; no code emitted" %}
13678 ins_encode();
13679 ins_pipe( empty );
13680 %}
13681
13682
13683 // Rethrow exception:
13684 // The exception oop will come in the first argument position.
13685 // Then JUMP (not call) to the rethrow stub code.
13686 instruct RethrowException()
13687 %{
13688 match(Rethrow);
13689
13690 // use the following format syntax
13691 format %{ "JMP rethrow_stub" %}
13692 ins_encode(enc_rethrow);
13693 ins_pipe( pipe_jmp );
13694 %}
13695
13696 // inlined locking and unlocking
13697
13698 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13699 predicate(Compile::current()->use_rtm());
13700 match(Set cr (FastLock object box));
13701 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13702 ins_cost(300);
13703 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13704 ins_encode %{
13705 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13706 $scr$$Register, $cx1$$Register, $cx2$$Register,
13707 _counters, _rtm_counters, _stack_rtm_counters,
13708 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13709 true, ra_->C->profile_rtm());
13710 %}
13711 ins_pipe(pipe_slow);
13712 %}
13713
13714 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13715 predicate(!Compile::current()->use_rtm());
13716 match(Set cr (FastLock object box));
13717 effect(TEMP tmp, TEMP scr, USE_KILL box);
13718 ins_cost(300);
13719 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13720 ins_encode %{
13721 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13722 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13723 %}
13724 ins_pipe(pipe_slow);
13725 %}
13726
13727 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13728 match(Set cr (FastUnlock object box));
13729 effect(TEMP tmp, USE_KILL box);
13730 ins_cost(300);
13731 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13732 ins_encode %{
13733 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13734 %}
13735 ins_pipe(pipe_slow);
13736 %}
13737
13738
13739
13740 // ============================================================================
13741 // Safepoint Instruction
13742 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13743 match(SafePoint poll);
13744 effect(KILL cr, USE poll);
13745
13746 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %}
13747 ins_cost(125);
13748 // EBP would need size(3)
13749 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13750 ins_encode %{
13751 __ relocate(relocInfo::poll_type);
13752 address pre_pc = __ pc();
13753 __ testl(rax, Address($poll$$Register, 0));
13754 address post_pc = __ pc();
13755 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13756 %}
13757 ins_pipe(ialu_reg_mem);
13758 %}
13759
13760
13761 // ============================================================================
13762 // This name is KNOWN by the ADLC and cannot be changed.
13763 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13764 // for this guy.
13765 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13766 match(Set dst (ThreadLocal));
13767 effect(DEF dst, KILL cr);
13768
13769 format %{ "MOV $dst, Thread::current()" %}
13770 ins_encode %{
13771 Register dstReg = as_Register($dst$$reg);
13772 __ get_thread(dstReg);
13773 %}
13774 ins_pipe( ialu_reg_fat );
13775 %}
13776
13777
13778
13779 //----------PEEPHOLE RULES-----------------------------------------------------
13780 // These must follow all instruction definitions as they use the names
13781 // defined in the instructions definitions.
13782 //
13783 // peepmatch ( root_instr_name [preceding_instruction]* );
13784 //
13785 // peepconstraint %{
13786 // (instruction_number.operand_name relational_op instruction_number.operand_name
13787 // [, ...] );
13788 // // instruction numbers are zero-based using left to right order in peepmatch
13789 //
13790 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
13791 // // provide an instruction_number.operand_name for each operand that appears
13792 // // in the replacement instruction's match rule
13793 //
13794 // ---------VM FLAGS---------------------------------------------------------
13795 //
13796 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13797 //
13798 // Each peephole rule is given an identifying number starting with zero and
13799 // increasing by one in the order seen by the parser. An individual peephole
13800 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13801 // on the command-line.
13802 //
13803 // ---------CURRENT LIMITATIONS----------------------------------------------
13804 //
13805 // Only match adjacent instructions in same basic block
13806 // Only equality constraints
13807 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13808 // Only one replacement instruction
13809 //
13810 // ---------EXAMPLE----------------------------------------------------------
13811 //
13812 // // pertinent parts of existing instructions in architecture description
13813 // instruct movI(rRegI dst, rRegI src) %{
13814 // match(Set dst (CopyI src));
13815 // %}
13816 //
13817 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13818 // match(Set dst (AddI dst src));
13819 // effect(KILL cr);
13820 // %}
13821 //
13822 // // Change (inc mov) to lea
13823 // peephole %{
13824 // // increment preceeded by register-register move
13825 // peepmatch ( incI_eReg movI );
13826 // // require that the destination register of the increment
13827 // // match the destination register of the move
13828 // peepconstraint ( 0.dst == 1.dst );
13829 // // construct a replacement instruction that sets
13830 // // the destination to ( move's source register + one )
13831 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13832 // %}
13833 //
13834 // Implementation no longer uses movX instructions since
13835 // machine-independent system no longer uses CopyX nodes.
13836 //
13837 // peephole %{
13838 // peepmatch ( incI_eReg movI );
13839 // peepconstraint ( 0.dst == 1.dst );
13840 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13841 // %}
13842 //
13843 // peephole %{
13844 // peepmatch ( decI_eReg movI );
13845 // peepconstraint ( 0.dst == 1.dst );
13846 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13847 // %}
13848 //
13849 // peephole %{
13850 // peepmatch ( addI_eReg_imm movI );
13851 // peepconstraint ( 0.dst == 1.dst );
13852 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13853 // %}
13854 //
13855 // peephole %{
13856 // peepmatch ( addP_eReg_imm movP );
13857 // peepconstraint ( 0.dst == 1.dst );
13858 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13859 // %}
13860
13861 // // Change load of spilled value to only a spill
13862 // instruct storeI(memory mem, rRegI src) %{
13863 // match(Set mem (StoreI mem src));
13864 // %}
13865 //
13866 // instruct loadI(rRegI dst, memory mem) %{
13867 // match(Set dst (LoadI mem));
13868 // %}
13869 //
13870 peephole %{
13871 peepmatch ( loadI storeI );
13872 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13873 peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13874 %}
13875
13876 //----------SMARTSPILL RULES---------------------------------------------------
13877 // These must follow all instruction definitions as they use the names
13878 // defined in the instructions definitions.