1 //
2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
66
67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
76
77 // Float registers. We treat TOS/FPR0 special. It is invisible to the
78 // allocator, and only shows up in the encodings.
79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
81 // Ok so here's the trick FPR1 is really st(0) except in the midst
82 // of emission of assembly for a machnode. During the emission the fpu stack
83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
84 // the stack will not have this element so FPR1 == st(0) from the
85 // oopMap viewpoint. This same weirdness with numbering causes
86 // instruction encoding to have to play games with the register
87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
88 // where it does flt->flt moves to see an example
89 //
90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
104 //
105 // Empty fill registers, which are never used, but supply alignment to xmm regs
106 //
107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
115
116 // Specify priority of register selection within phases of register
117 // allocation. Highest priority is first. A useful heuristic is to
118 // give registers a low priority when they are required by machine
119 // instructions, like EAX and EDX. Registers which are used as
120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
121 // For the Intel integer registers, the equivalent Long pairs are
122 // EDX:EAX, EBX:ECX, and EDI:EBP.
123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
126 FPR6L, FPR6H, FPR7L, FPR7H,
127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
128
129
130 //----------Architecture Description Register Classes--------------------------
131 // Several register classes are automatically defined based upon information in
132 // this architecture description.
133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
135 //
136 // Class for no registers (empty set).
137 reg_class no_reg();
138
139 // Class for all registers
140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
141 // Class for all registers (excluding EBP)
142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
143 // Dynamic register class that selects at runtime between register classes
144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
147
148 // Class for general registers
149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
150 // Class for general registers (excluding EBP).
151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
152 // Used also if the PreserveFramePointer flag is true.
153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
156
157 // Class of "X" registers
158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
159
160 // Class of registers that can appear in an address with no offset.
161 // EBP and ESP require an extra instruction byte for zero offset.
162 // Used in fast-unlock
163 reg_class p_reg(EDX, EDI, ESI, EBX);
164
165 // Class for general registers excluding ECX
166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
167 // Class for general registers excluding ECX (and EBP)
168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
171
172 // Class for general registers excluding EAX
173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
174
175 // Class for general registers excluding EAX and EBX.
176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
177 // Class for general registers excluding EAX and EBX (and EBP)
178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
181
182 // Class of EAX (for multiply and divide operations)
183 reg_class eax_reg(EAX);
184
185 // Class of EBX (for atomic add)
186 reg_class ebx_reg(EBX);
187
188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
189 reg_class ecx_reg(ECX);
190
191 // Class of EDX (for multiply and divide operations)
192 reg_class edx_reg(EDX);
193
194 // Class of EDI (for synchronization)
195 reg_class edi_reg(EDI);
196
197 // Class of ESI (for synchronization)
198 reg_class esi_reg(ESI);
199
200 // Singleton class for stack pointer
201 reg_class sp_reg(ESP);
202
203 // Singleton class for instruction pointer
204 // reg_class ip_reg(EIP);
205
206 // Class of integer register pairs
207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
208 // Class of integer register pairs (excluding EBP and EDI);
209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
212
213 // Class of integer register pairs that aligns with calling convention
214 reg_class eadx_reg( EAX,EDX );
215 reg_class ebcx_reg( ECX,EBX );
216
217 // Not AX or DX, used in divides
218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
219 // Not AX or DX (and neither EBP), used in divides
220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
223
224 // Floating point registers. Notice FPR0 is not a choice.
225 // FPR0 is not ever allocated; we use clever encodings to fake
226 // a 2-address instructions out of Intels FP stack.
227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
228
229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
230 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
231 FPR7L,FPR7H );
232
233 reg_class fp_flt_reg0( FPR1L );
234 reg_class fp_dbl_reg0( FPR1L,FPR1H );
235 reg_class fp_dbl_reg1( FPR2L,FPR2H );
236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
237 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
238
239 %}
240
241
242 //----------SOURCE BLOCK-------------------------------------------------------
243 // This is a block of C++ code which provides values, functions, and
244 // definitions necessary in the rest of the architecture description
245 source_hpp %{
246 // Must be visible to the DFA in dfa_x86_32.cpp
247 extern bool is_operand_hi32_zero(Node* n);
248 %}
249
250 source %{
251 #define RELOC_IMM32 Assembler::imm_operand
252 #define RELOC_DISP32 Assembler::disp32_operand
253
254 #define __ _masm.
255
256 // How to find the high register of a Long pair, given the low register
257 #define HIGH_FROM_LOW(x) ((x)+2)
258
259 // These masks are used to provide 128-bit aligned bitmasks to the XMM
260 // instructions, to allow sign-masking or sign-bit flipping. They allow
261 // fast versions of NegF/NegD and AbsF/AbsD.
262
263 void reg_mask_init() {
264 if (Matcher::has_predicated_vectors()) {
265 // Post-loop multi-versioning expects mask to be present in K1 register, till the time
266 // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
267 // curruption of value held in K1 register.
268 if (PostLoopMultiversioning) {
269 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
270 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
271 }
272 }
273 }
274
275 // Note: 'double' and 'long long' have 32-bits alignment on x86.
276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
277 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
278 // of 128-bits operands for SSE instructions.
279 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
280 // Store the value to a 128-bits operand.
281 operand[0] = lo;
282 operand[1] = hi;
283 return operand;
284 }
285
286 // Buffer for 128-bits masks used by SSE instructions.
287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
288
289 // Static initialization during VM startup.
290 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
292 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
294
295 // Offset hacking within calls.
296 static int pre_call_resets_size() {
297 int size = 0;
298 Compile* C = Compile::current();
299 if (C->in_24_bit_fp_mode()) {
300 size += 6; // fldcw
301 }
302 if (VM_Version::supports_vzeroupper()) {
303 size += 3; // vzeroupper
304 }
305 return size;
306 }
307
308 // !!!!! Special hack to get all type of calls to specify the byte offset
309 // from the start of the call to the point where the return address
310 // will point.
311 int MachCallStaticJavaNode::ret_addr_offset() {
312 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points
313 }
314
315 int MachCallDynamicJavaNode::ret_addr_offset() {
316 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points
317 }
318
319 static int sizeof_FFree_Float_Stack_All = -1;
320
321 int MachCallRuntimeNode::ret_addr_offset() {
322 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
323 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
324 }
325
326 int MachCallNativeNode::ret_addr_offset() {
327 ShouldNotCallThis();
328 return -1;
329 }
330
331 //
332 // Compute padding required for nodes which need alignment
333 //
334
335 // The address of the call instruction needs to be 4-byte aligned to
336 // ensure that it does not span a cache line so that it can be patched.
337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
338 current_offset += pre_call_resets_size(); // skip fldcw, if any
339 current_offset += 1; // skip call opcode byte
340 return align_up(current_offset, alignment_required()) - current_offset;
341 }
342
343 // The address of the call instruction needs to be 4-byte aligned to
344 // ensure that it does not span a cache line so that it can be patched.
345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
346 current_offset += pre_call_resets_size(); // skip fldcw, if any
347 current_offset += 5; // skip MOV instruction
348 current_offset += 1; // skip call opcode byte
349 return align_up(current_offset, alignment_required()) - current_offset;
350 }
351
352 // EMIT_RM()
353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
354 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
355 cbuf.insts()->emit_int8(c);
356 }
357
358 // EMIT_CC()
359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
360 unsigned char c = (unsigned char)( f1 | f2 );
361 cbuf.insts()->emit_int8(c);
362 }
363
364 // EMIT_OPCODE()
365 void emit_opcode(CodeBuffer &cbuf, int code) {
366 cbuf.insts()->emit_int8((unsigned char) code);
367 }
368
369 // EMIT_OPCODE() w/ relocation information
370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
371 cbuf.relocate(cbuf.insts_mark() + offset, reloc);
372 emit_opcode(cbuf, code);
373 }
374
375 // EMIT_D8()
376 void emit_d8(CodeBuffer &cbuf, int d8) {
377 cbuf.insts()->emit_int8((unsigned char) d8);
378 }
379
380 // EMIT_D16()
381 void emit_d16(CodeBuffer &cbuf, int d16) {
382 cbuf.insts()->emit_int16(d16);
383 }
384
385 // EMIT_D32()
386 void emit_d32(CodeBuffer &cbuf, int d32) {
387 cbuf.insts()->emit_int32(d32);
388 }
389
390 // emit 32 bit value and construct relocation entry from relocInfo::relocType
391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
392 int format) {
393 cbuf.relocate(cbuf.insts_mark(), reloc, format);
394 cbuf.insts()->emit_int32(d32);
395 }
396
397 // emit 32 bit value and construct relocation entry from RelocationHolder
398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
399 int format) {
400 #ifdef ASSERT
401 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
402 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
403 }
404 #endif
405 cbuf.relocate(cbuf.insts_mark(), rspec, format);
406 cbuf.insts()->emit_int32(d32);
407 }
408
409 // Access stack slot for load or store
410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
411 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src])
412 if( -128 <= disp && disp <= 127 ) {
413 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte
414 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
415 emit_d8 (cbuf, disp); // Displacement // R/M byte
416 } else {
417 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte
418 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
419 emit_d32(cbuf, disp); // Displacement // R/M byte
420 }
421 }
422
423 // rRegI ereg, memory mem) %{ // emit_reg_mem
424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
425 // There is no index & no scale, use form without SIB byte
426 if ((index == 0x4) &&
427 (scale == 0) && (base != ESP_enc)) {
428 // If no displacement, mode is 0x0; unless base is [EBP]
429 if ( (displace == 0) && (base != EBP_enc) ) {
430 emit_rm(cbuf, 0x0, reg_encoding, base);
431 }
432 else { // If 8-bit displacement, mode 0x1
433 if ((displace >= -128) && (displace <= 127)
434 && (disp_reloc == relocInfo::none) ) {
435 emit_rm(cbuf, 0x1, reg_encoding, base);
436 emit_d8(cbuf, displace);
437 }
438 else { // If 32-bit displacement
439 if (base == -1) { // Special flag for absolute address
440 emit_rm(cbuf, 0x0, reg_encoding, 0x5);
441 // (manual lies; no SIB needed here)
442 if ( disp_reloc != relocInfo::none ) {
443 emit_d32_reloc(cbuf, displace, disp_reloc, 1);
444 } else {
445 emit_d32 (cbuf, displace);
446 }
447 }
448 else { // Normal base + offset
449 emit_rm(cbuf, 0x2, reg_encoding, base);
450 if ( disp_reloc != relocInfo::none ) {
451 emit_d32_reloc(cbuf, displace, disp_reloc, 1);
452 } else {
453 emit_d32 (cbuf, displace);
454 }
455 }
456 }
457 }
458 }
459 else { // Else, encode with the SIB byte
460 // If no displacement, mode is 0x0; unless base is [EBP]
461 if (displace == 0 && (base != EBP_enc)) { // If no displacement
462 emit_rm(cbuf, 0x0, reg_encoding, 0x4);
463 emit_rm(cbuf, scale, index, base);
464 }
465 else { // If 8-bit displacement, mode 0x1
466 if ((displace >= -128) && (displace <= 127)
467 && (disp_reloc == relocInfo::none) ) {
468 emit_rm(cbuf, 0x1, reg_encoding, 0x4);
469 emit_rm(cbuf, scale, index, base);
470 emit_d8(cbuf, displace);
471 }
472 else { // If 32-bit displacement
473 if (base == 0x04 ) {
474 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
475 emit_rm(cbuf, scale, index, 0x04);
476 } else {
477 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
478 emit_rm(cbuf, scale, index, base);
479 }
480 if ( disp_reloc != relocInfo::none ) {
481 emit_d32_reloc(cbuf, displace, disp_reloc, 1);
482 } else {
483 emit_d32 (cbuf, displace);
484 }
485 }
486 }
487 }
488 }
489
490
491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
492 if( dst_encoding == src_encoding ) {
493 // reg-reg copy, use an empty encoding
494 } else {
495 emit_opcode( cbuf, 0x8B );
496 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
497 }
498 }
499
500 void emit_cmpfp_fixup(MacroAssembler& _masm) {
501 Label exit;
502 __ jccb(Assembler::noParity, exit);
503 __ pushf();
504 //
505 // comiss/ucomiss instructions set ZF,PF,CF flags and
506 // zero OF,AF,SF for NaN values.
507 // Fixup flags by zeroing ZF,PF so that compare of NaN
508 // values returns 'less than' result (CF is set).
509 // Leave the rest of flags unchanged.
510 //
511 // 7 6 5 4 3 2 1 0
512 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
513 // 0 0 1 0 1 0 1 1 (0x2B)
514 //
515 __ andl(Address(rsp, 0), 0xffffff2b);
516 __ popf();
517 __ bind(exit);
518 }
519
520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
521 Label done;
522 __ movl(dst, -1);
523 __ jcc(Assembler::parity, done);
524 __ jcc(Assembler::below, done);
525 __ setb(Assembler::notEqual, dst);
526 __ movzbl(dst, dst);
527 __ bind(done);
528 }
529
530
531 //=============================================================================
532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
533
534 int ConstantTable::calculate_table_base_offset() const {
535 return 0; // absolute addressing, no offset
536 }
537
538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
540 ShouldNotReachHere();
541 }
542
543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
544 // Empty encoding
545 }
546
547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
548 return 0;
549 }
550
551 #ifndef PRODUCT
552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
553 st->print("# MachConstantBaseNode (empty encoding)");
554 }
555 #endif
556
557
558 //=============================================================================
559 #ifndef PRODUCT
560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
561 Compile* C = ra_->C;
562
563 int framesize = C->output()->frame_size_in_bytes();
564 int bangsize = C->output()->bang_size_in_bytes();
565 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
566 // Remove wordSize for return addr which is already pushed.
567 framesize -= wordSize;
568
569 if (C->output()->need_stack_bang(bangsize)) {
570 framesize -= wordSize;
571 st->print("# stack bang (%d bytes)", bangsize);
572 st->print("\n\t");
573 st->print("PUSH EBP\t# Save EBP");
574 if (PreserveFramePointer) {
575 st->print("\n\t");
576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
577 }
578 if (framesize) {
579 st->print("\n\t");
580 st->print("SUB ESP, #%d\t# Create frame",framesize);
581 }
582 } else {
583 st->print("SUB ESP, #%d\t# Create frame",framesize);
584 st->print("\n\t");
585 framesize -= wordSize;
586 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize);
587 if (PreserveFramePointer) {
588 st->print("\n\t");
589 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
590 if (framesize > 0) {
591 st->print("\n\t");
592 st->print("ADD EBP, #%d", framesize);
593 }
594 }
595 }
596
597 if (VerifyStackAtCalls) {
598 st->print("\n\t");
599 framesize -= wordSize;
600 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
601 }
602
603 if( C->in_24_bit_fp_mode() ) {
604 st->print("\n\t");
605 st->print("FLDCW \t# load 24 bit fpu control word");
606 }
607 if (UseSSE >= 2 && VerifyFPU) {
608 st->print("\n\t");
609 st->print("# verify FPU stack (must be clean on entry)");
610 }
611
612 #ifdef ASSERT
613 if (VerifyStackAtCalls) {
614 st->print("\n\t");
615 st->print("# stack alignment check");
616 }
617 #endif
618 st->cr();
619 }
620 #endif
621
622
623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
624 Compile* C = ra_->C;
625 MacroAssembler _masm(&cbuf);
626
627 int framesize = C->output()->frame_size_in_bytes();
628 int bangsize = C->output()->bang_size_in_bytes();
629
630 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
631
632 C->output()->set_frame_complete(cbuf.insts_size());
633
634 if (C->has_mach_constant_base_node()) {
635 // NOTE: We set the table base offset here because users might be
636 // emitted before MachConstantBaseNode.
637 ConstantTable& constant_table = C->output()->constant_table();
638 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
639 }
640 }
641
642 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
643 return MachNode::size(ra_); // too many variables; just compute it the hard way
644 }
645
646 int MachPrologNode::reloc() const {
647 return 0; // a large enough number
648 }
649
650 //=============================================================================
651 #ifndef PRODUCT
652 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
653 Compile *C = ra_->C;
654 int framesize = C->output()->frame_size_in_bytes();
655 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
656 // Remove two words for return addr and rbp,
657 framesize -= 2*wordSize;
658
659 if (C->max_vector_size() > 16) {
660 st->print("VZEROUPPER");
661 st->cr(); st->print("\t");
662 }
663 if (C->in_24_bit_fp_mode()) {
664 st->print("FLDCW standard control word");
665 st->cr(); st->print("\t");
666 }
667 if (framesize) {
668 st->print("ADD ESP,%d\t# Destroy frame",framesize);
669 st->cr(); st->print("\t");
670 }
671 st->print_cr("POPL EBP"); st->print("\t");
672 if (do_polling() && C->is_method_compilation()) {
673 st->print("CMPL rsp, poll_offset[thread] \n\t"
674 "JA #safepoint_stub\t"
675 "# Safepoint: poll for GC");
676 }
677 }
678 #endif
679
680 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
681 Compile *C = ra_->C;
682 MacroAssembler _masm(&cbuf);
683
684 if (C->max_vector_size() > 16) {
685 // Clear upper bits of YMM registers when current compiled code uses
686 // wide vectors to avoid AVX <-> SSE transition penalty during call.
687 _masm.vzeroupper();
688 }
689 // If method set FPU control word, restore to standard control word
690 if (C->in_24_bit_fp_mode()) {
691 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
692 }
693
694 int framesize = C->output()->frame_size_in_bytes();
695 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
696 // Remove two words for return addr and rbp,
697 framesize -= 2*wordSize;
698
699 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
700
701 if (framesize >= 128) {
702 emit_opcode(cbuf, 0x81); // add SP, #framesize
703 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
704 emit_d32(cbuf, framesize);
705 } else if (framesize) {
706 emit_opcode(cbuf, 0x83); // add SP, #framesize
707 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
708 emit_d8(cbuf, framesize);
709 }
710
711 emit_opcode(cbuf, 0x58 | EBP_enc);
712
713 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
714 __ reserved_stack_check();
715 }
716
717 if (do_polling() && C->is_method_compilation()) {
718 Register thread = as_Register(EBX_enc);
719 MacroAssembler masm(&cbuf);
720 __ get_thread(thread);
721 Label dummy_label;
722 Label* code_stub = &dummy_label;
723 if (!C->output()->in_scratch_emit_size()) {
724 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
725 C->output()->add_stub(stub);
726 code_stub = &stub->entry();
727 }
728 __ relocate(relocInfo::poll_return_type);
729 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
730 }
731 }
732
733 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
734 return MachNode::size(ra_); // too many variables; just compute it
735 // the hard way
736 }
737
738 int MachEpilogNode::reloc() const {
739 return 0; // a large enough number
740 }
741
742 const Pipeline * MachEpilogNode::pipeline() const {
743 return MachNode::pipeline_class();
744 }
745
746 //=============================================================================
747
748 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
749 static enum RC rc_class( OptoReg::Name reg ) {
750
751 if( !OptoReg::is_valid(reg) ) return rc_bad;
752 if (OptoReg::is_stack(reg)) return rc_stack;
753
754 VMReg r = OptoReg::as_VMReg(reg);
755 if (r->is_Register()) return rc_int;
756 if (r->is_FloatRegister()) {
757 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
758 return rc_float;
759 }
760 if (r->is_KRegister()) return rc_kreg;
761 assert(r->is_XMMRegister(), "must be");
762 return rc_xmm;
763 }
764
765 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
766 int opcode, const char *op_str, int size, outputStream* st ) {
767 if( cbuf ) {
768 emit_opcode (*cbuf, opcode );
769 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
770 #ifndef PRODUCT
771 } else if( !do_size ) {
772 if( size != 0 ) st->print("\n\t");
773 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
774 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
775 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
776 } else { // FLD, FST, PUSH, POP
777 st->print("%s [ESP + #%d]",op_str,offset);
778 }
779 #endif
780 }
781 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
782 return size+3+offset_size;
783 }
784
785 // Helper for XMM registers. Extra opcode bits, limited syntax.
786 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
787 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
788 int in_size_in_bits = Assembler::EVEX_32bit;
789 int evex_encoding = 0;
790 if (reg_lo+1 == reg_hi) {
791 in_size_in_bits = Assembler::EVEX_64bit;
792 evex_encoding = Assembler::VEX_W;
793 }
794 if (cbuf) {
795 MacroAssembler _masm(cbuf);
796 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
797 // it maps more cases to single byte displacement
798 _masm.set_managed();
799 if (reg_lo+1 == reg_hi) { // double move?
800 if (is_load) {
801 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
802 } else {
803 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
804 }
805 } else {
806 if (is_load) {
807 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
808 } else {
809 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
810 }
811 }
812 #ifndef PRODUCT
813 } else if (!do_size) {
814 if (size != 0) st->print("\n\t");
815 if (reg_lo+1 == reg_hi) { // double move?
816 if (is_load) st->print("%s %s,[ESP + #%d]",
817 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
818 Matcher::regName[reg_lo], offset);
819 else st->print("MOVSD [ESP + #%d],%s",
820 offset, Matcher::regName[reg_lo]);
821 } else {
822 if (is_load) st->print("MOVSS %s,[ESP + #%d]",
823 Matcher::regName[reg_lo], offset);
824 else st->print("MOVSS [ESP + #%d],%s",
825 offset, Matcher::regName[reg_lo]);
826 }
827 #endif
828 }
829 bool is_single_byte = false;
830 if ((UseAVX > 2) && (offset != 0)) {
831 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
832 }
833 int offset_size = 0;
834 if (UseAVX > 2 ) {
835 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
836 } else {
837 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
838 }
839 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
840 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
841 return size+5+offset_size;
842 }
843
844
845 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
846 int src_hi, int dst_hi, int size, outputStream* st ) {
847 if (cbuf) {
848 MacroAssembler _masm(cbuf);
849 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
850 _masm.set_managed();
851 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
852 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
853 as_XMMRegister(Matcher::_regEncode[src_lo]));
854 } else {
855 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
856 as_XMMRegister(Matcher::_regEncode[src_lo]));
857 }
858 #ifndef PRODUCT
859 } else if (!do_size) {
860 if (size != 0) st->print("\n\t");
861 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
862 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
863 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
864 } else {
865 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
866 }
867 } else {
868 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
869 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
870 } else {
871 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
872 }
873 }
874 #endif
875 }
876 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
877 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes.
878 int sz = (UseAVX > 2) ? 6 : 4;
879 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
880 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
881 return size + sz;
882 }
883
884 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
885 int src_hi, int dst_hi, int size, outputStream* st ) {
886 // 32-bit
887 if (cbuf) {
888 MacroAssembler _masm(cbuf);
889 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
890 _masm.set_managed();
891 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
892 as_Register(Matcher::_regEncode[src_lo]));
893 #ifndef PRODUCT
894 } else if (!do_size) {
895 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
896 #endif
897 }
898 return (UseAVX> 2) ? 6 : 4;
899 }
900
901
902 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
903 int src_hi, int dst_hi, int size, outputStream* st ) {
904 // 32-bit
905 if (cbuf) {
906 MacroAssembler _masm(cbuf);
907 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
908 _masm.set_managed();
909 __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
910 as_XMMRegister(Matcher::_regEncode[src_lo]));
911 #ifndef PRODUCT
912 } else if (!do_size) {
913 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
914 #endif
915 }
916 return (UseAVX> 2) ? 6 : 4;
917 }
918
919 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
920 if( cbuf ) {
921 emit_opcode(*cbuf, 0x8B );
922 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
923 #ifndef PRODUCT
924 } else if( !do_size ) {
925 if( size != 0 ) st->print("\n\t");
926 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
927 #endif
928 }
929 return size+2;
930 }
931
932 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
933 int offset, int size, outputStream* st ) {
934 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
935 if( cbuf ) {
936 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it)
937 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
938 #ifndef PRODUCT
939 } else if( !do_size ) {
940 if( size != 0 ) st->print("\n\t");
941 st->print("FLD %s",Matcher::regName[src_lo]);
942 #endif
943 }
944 size += 2;
945 }
946
947 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
948 const char *op_str;
949 int op;
950 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
951 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
952 op = 0xDD;
953 } else { // 32-bit store
954 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
955 op = 0xD9;
956 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
957 }
958
959 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
960 }
961
962 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
963 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
964 int src_hi, int dst_hi, uint ireg, outputStream* st);
965
966 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
967 int stack_offset, int reg, uint ireg, outputStream* st);
968
969 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
970 int dst_offset, uint ireg, outputStream* st) {
971 if (cbuf) {
972 MacroAssembler _masm(cbuf);
973 switch (ireg) {
974 case Op_VecS:
975 __ pushl(Address(rsp, src_offset));
976 __ popl (Address(rsp, dst_offset));
977 break;
978 case Op_VecD:
979 __ pushl(Address(rsp, src_offset));
980 __ popl (Address(rsp, dst_offset));
981 __ pushl(Address(rsp, src_offset+4));
982 __ popl (Address(rsp, dst_offset+4));
983 break;
984 case Op_VecX:
985 __ movdqu(Address(rsp, -16), xmm0);
986 __ movdqu(xmm0, Address(rsp, src_offset));
987 __ movdqu(Address(rsp, dst_offset), xmm0);
988 __ movdqu(xmm0, Address(rsp, -16));
989 break;
990 case Op_VecY:
991 __ vmovdqu(Address(rsp, -32), xmm0);
992 __ vmovdqu(xmm0, Address(rsp, src_offset));
993 __ vmovdqu(Address(rsp, dst_offset), xmm0);
994 __ vmovdqu(xmm0, Address(rsp, -32));
995 break;
996 case Op_VecZ:
997 __ evmovdquq(Address(rsp, -64), xmm0, 2);
998 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
999 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1000 __ evmovdquq(xmm0, Address(rsp, -64), 2);
1001 break;
1002 default:
1003 ShouldNotReachHere();
1004 }
1005 #ifndef PRODUCT
1006 } else {
1007 switch (ireg) {
1008 case Op_VecS:
1009 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1010 "popl [rsp + #%d]",
1011 src_offset, dst_offset);
1012 break;
1013 case Op_VecD:
1014 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1015 "popq [rsp + #%d]\n\t"
1016 "pushl [rsp + #%d]\n\t"
1017 "popq [rsp + #%d]",
1018 src_offset, dst_offset, src_offset+4, dst_offset+4);
1019 break;
1020 case Op_VecX:
1021 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1022 "movdqu xmm0, [rsp + #%d]\n\t"
1023 "movdqu [rsp + #%d], xmm0\n\t"
1024 "movdqu xmm0, [rsp - #16]",
1025 src_offset, dst_offset);
1026 break;
1027 case Op_VecY:
1028 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1029 "vmovdqu xmm0, [rsp + #%d]\n\t"
1030 "vmovdqu [rsp + #%d], xmm0\n\t"
1031 "vmovdqu xmm0, [rsp - #32]",
1032 src_offset, dst_offset);
1033 break;
1034 case Op_VecZ:
1035 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1036 "vmovdqu xmm0, [rsp + #%d]\n\t"
1037 "vmovdqu [rsp + #%d], xmm0\n\t"
1038 "vmovdqu xmm0, [rsp - #64]",
1039 src_offset, dst_offset);
1040 break;
1041 default:
1042 ShouldNotReachHere();
1043 }
1044 #endif
1045 }
1046 }
1047
1048 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1049 // Get registers to move
1050 OptoReg::Name src_second = ra_->get_reg_second(in(1));
1051 OptoReg::Name src_first = ra_->get_reg_first(in(1));
1052 OptoReg::Name dst_second = ra_->get_reg_second(this );
1053 OptoReg::Name dst_first = ra_->get_reg_first(this );
1054
1055 enum RC src_second_rc = rc_class(src_second);
1056 enum RC src_first_rc = rc_class(src_first);
1057 enum RC dst_second_rc = rc_class(dst_second);
1058 enum RC dst_first_rc = rc_class(dst_first);
1059
1060 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1061
1062 // Generate spill code!
1063 int size = 0;
1064
1065 if( src_first == dst_first && src_second == dst_second )
1066 return size; // Self copy, no move
1067
1068 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
1069 uint ireg = ideal_reg();
1070 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1071 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1072 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1073 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1074 // mem -> mem
1075 int src_offset = ra_->reg2offset(src_first);
1076 int dst_offset = ra_->reg2offset(dst_first);
1077 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1078 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1079 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
1080 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1081 int stack_offset = ra_->reg2offset(dst_first);
1082 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
1083 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1084 int stack_offset = ra_->reg2offset(src_first);
1085 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
1086 } else {
1087 ShouldNotReachHere();
1088 }
1089 return 0;
1090 }
1091
1092 // --------------------------------------
1093 // Check for mem-mem move. push/pop to move.
1094 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1095 if( src_second == dst_first ) { // overlapping stack copy ranges
1096 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1097 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
1098 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
1099 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
1100 }
1101 // move low bits
1102 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
1103 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
1104 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1105 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
1106 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
1107 }
1108 return size;
1109 }
1110
1111 // --------------------------------------
1112 // Check for integer reg-reg copy
1113 if( src_first_rc == rc_int && dst_first_rc == rc_int )
1114 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1115
1116 // Check for integer store
1117 if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1118 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1119
1120 // Check for integer load
1121 if( src_first_rc == rc_stack && dst_first_rc == rc_int )
1122 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1123
1124 // Check for integer reg-xmm reg copy
1125 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1126 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1127 "no 64 bit integer-float reg moves" );
1128 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1129 }
1130 // --------------------------------------
1131 // Check for float reg-reg copy
1132 if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1133 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1134 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1135 if( cbuf ) {
1136
1137 // Note the mucking with the register encode to compensate for the 0/1
1138 // indexing issue mentioned in a comment in the reg_def sections
1139 // for FPR registers many lines above here.
1140
1141 if( src_first != FPR1L_num ) {
1142 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i)
1143 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1144 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
1145 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1146 } else {
1147 emit_opcode (*cbuf, 0xDD ); // FST ST(i)
1148 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1149 }
1150 #ifndef PRODUCT
1151 } else if( !do_size ) {
1152 if( size != 0 ) st->print("\n\t");
1153 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1154 else st->print( "FST %s", Matcher::regName[dst_first]);
1155 #endif
1156 }
1157 return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1158 }
1159
1160 // Check for float store
1161 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1162 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1163 }
1164
1165 // Check for float load
1166 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1167 int offset = ra_->reg2offset(src_first);
1168 const char *op_str;
1169 int op;
1170 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1171 op_str = "FLD_D";
1172 op = 0xDD;
1173 } else { // 32-bit load
1174 op_str = "FLD_S";
1175 op = 0xD9;
1176 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1177 }
1178 if( cbuf ) {
1179 emit_opcode (*cbuf, op );
1180 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1181 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
1182 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1183 #ifndef PRODUCT
1184 } else if( !do_size ) {
1185 if( size != 0 ) st->print("\n\t");
1186 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
1187 #endif
1188 }
1189 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1190 return size + 3+offset_size+2;
1191 }
1192
1193 // Check for xmm reg-reg copy
1194 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1195 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1196 (src_first+1 == src_second && dst_first+1 == dst_second),
1197 "no non-adjacent float-moves" );
1198 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1199 }
1200
1201 // Check for xmm reg-integer reg copy
1202 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1203 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1204 "no 64 bit float-integer reg moves" );
1205 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1206 }
1207
1208 // Check for xmm store
1209 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1210 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
1211 }
1212
1213 // Check for float xmm load
1214 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1215 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1216 }
1217
1218 // Copy from float reg to xmm reg
1219 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
1220 // copy to the top of stack from floating point reg
1221 // and use LEA to preserve flags
1222 if( cbuf ) {
1223 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8]
1224 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1225 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1226 emit_d8(*cbuf,0xF8);
1227 #ifndef PRODUCT
1228 } else if( !do_size ) {
1229 if( size != 0 ) st->print("\n\t");
1230 st->print("LEA ESP,[ESP-8]");
1231 #endif
1232 }
1233 size += 4;
1234
1235 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1236
1237 // Copy from the temp memory to the xmm reg.
1238 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1239
1240 if( cbuf ) {
1241 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8]
1242 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1243 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1244 emit_d8(*cbuf,0x08);
1245 #ifndef PRODUCT
1246 } else if( !do_size ) {
1247 if( size != 0 ) st->print("\n\t");
1248 st->print("LEA ESP,[ESP+8]");
1249 #endif
1250 }
1251 size += 4;
1252 return size;
1253 }
1254
1255 // AVX-512 opmask specific spilling.
1256 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
1257 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1258 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1259 MacroAssembler _masm(cbuf);
1260 int offset = ra_->reg2offset(src_first);
1261 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1262 return 0;
1263 }
1264
1265 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
1266 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1267 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1268 MacroAssembler _masm(cbuf);
1269 int offset = ra_->reg2offset(dst_first);
1270 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
1271 return 0;
1272 }
1273
1274 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
1275 Unimplemented();
1276 return 0;
1277 }
1278
1279 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
1280 Unimplemented();
1281 return 0;
1282 }
1283
1284 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
1285 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1286 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1287 MacroAssembler _masm(cbuf);
1288 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
1289 return 0;
1290 }
1291
1292 assert( size > 0, "missed a case" );
1293
1294 // --------------------------------------------------------------------
1295 // Check for second bits still needing moving.
1296 if( src_second == dst_second )
1297 return size; // Self copy; no move
1298 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1299
1300 // Check for second word int-int move
1301 if( src_second_rc == rc_int && dst_second_rc == rc_int )
1302 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1303
1304 // Check for second word integer store
1305 if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1306 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1307
1308 // Check for second word integer load
1309 if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1310 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1311
1312 Unimplemented();
1313 return 0; // Mute compiler
1314 }
1315
1316 #ifndef PRODUCT
1317 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1318 implementation( NULL, ra_, false, st );
1319 }
1320 #endif
1321
1322 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1323 implementation( &cbuf, ra_, false, NULL );
1324 }
1325
1326 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1327 return MachNode::size(ra_);
1328 }
1329
1330
1331 //=============================================================================
1332 #ifndef PRODUCT
1333 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1334 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1335 int reg = ra_->get_reg_first(this);
1336 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
1337 }
1338 #endif
1339
1340 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1341 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1342 int reg = ra_->get_encode(this);
1343 if( offset >= 128 ) {
1344 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1345 emit_rm(cbuf, 0x2, reg, 0x04);
1346 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1347 emit_d32(cbuf, offset);
1348 }
1349 else {
1350 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1351 emit_rm(cbuf, 0x1, reg, 0x04);
1352 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1353 emit_d8(cbuf, offset);
1354 }
1355 }
1356
1357 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1358 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1359 if( offset >= 128 ) {
1360 return 7;
1361 }
1362 else {
1363 return 4;
1364 }
1365 }
1366
1367 //=============================================================================
1368 #ifndef PRODUCT
1369 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1370 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
1371 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
1372 st->print_cr("\tNOP");
1373 st->print_cr("\tNOP");
1374 if( !OptoBreakpoint )
1375 st->print_cr("\tNOP");
1376 }
1377 #endif
1378
1379 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1380 MacroAssembler masm(&cbuf);
1381 #ifdef ASSERT
1382 uint insts_size = cbuf.insts_size();
1383 #endif
1384 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1385 masm.jump_cc(Assembler::notEqual,
1386 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1387 /* WARNING these NOPs are critical so that verified entry point is properly
1388 aligned for patching by NativeJump::patch_verified_entry() */
1389 int nops_cnt = 2;
1390 if( !OptoBreakpoint ) // Leave space for int3
1391 nops_cnt += 1;
1392 masm.nop(nops_cnt);
1393
1394 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1395 }
1396
1397 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1398 return OptoBreakpoint ? 11 : 12;
1399 }
1400
1401
1402 //=============================================================================
1403
1404 // Vector calling convention not supported.
1405 const bool Matcher::supports_vector_calling_convention() {
1406 return false;
1407 }
1408
1409 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1410 Unimplemented();
1411 return OptoRegPair(0, 0);
1412 }
1413
1414 // Is this branch offset short enough that a short branch can be used?
1415 //
1416 // NOTE: If the platform does not provide any short branch variants, then
1417 // this method should return false for offset 0.
1418 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1419 // The passed offset is relative to address of the branch.
1420 // On 86 a branch displacement is calculated relative to address
1421 // of a next instruction.
1422 offset -= br_size;
1423
1424 // the short version of jmpConUCF2 contains multiple branches,
1425 // making the reach slightly less
1426 if (rule == jmpConUCF2_rule)
1427 return (-126 <= offset && offset <= 125);
1428 return (-128 <= offset && offset <= 127);
1429 }
1430
1431 // Return whether or not this register is ever used as an argument. This
1432 // function is used on startup to build the trampoline stubs in generateOptoStub.
1433 // Registers not mentioned will be killed by the VM call in the trampoline, and
1434 // arguments in those registers not be available to the callee.
1435 bool Matcher::can_be_java_arg( int reg ) {
1436 if( reg == ECX_num || reg == EDX_num ) return true;
1437 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true;
1438 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1439 return false;
1440 }
1441
1442 bool Matcher::is_spillable_arg( int reg ) {
1443 return can_be_java_arg(reg);
1444 }
1445
1446 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1447 // Use hardware integer DIV instruction when
1448 // it is faster than a code which use multiply.
1449 // Only when constant divisor fits into 32 bit
1450 // (min_jint is excluded to get only correct
1451 // positive 32 bit values from negative).
1452 return VM_Version::has_fast_idiv() &&
1453 (divisor == (int)divisor && divisor != min_jint);
1454 }
1455
1456 // Register for DIVI projection of divmodI
1457 RegMask Matcher::divI_proj_mask() {
1458 return EAX_REG_mask();
1459 }
1460
1461 // Register for MODI projection of divmodI
1462 RegMask Matcher::modI_proj_mask() {
1463 return EDX_REG_mask();
1464 }
1465
1466 // Register for DIVL projection of divmodL
1467 RegMask Matcher::divL_proj_mask() {
1468 ShouldNotReachHere();
1469 return RegMask();
1470 }
1471
1472 // Register for MODL projection of divmodL
1473 RegMask Matcher::modL_proj_mask() {
1474 ShouldNotReachHere();
1475 return RegMask();
1476 }
1477
1478 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1479 return NO_REG_mask();
1480 }
1481
1482 // Returns true if the high 32 bits of the value is known to be zero.
1483 bool is_operand_hi32_zero(Node* n) {
1484 int opc = n->Opcode();
1485 if (opc == Op_AndL) {
1486 Node* o2 = n->in(2);
1487 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1488 return true;
1489 }
1490 }
1491 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1492 return true;
1493 }
1494 return false;
1495 }
1496
1497 %}
1498
1499 //----------ENCODING BLOCK-----------------------------------------------------
1500 // This block specifies the encoding classes used by the compiler to output
1501 // byte streams. Encoding classes generate functions which are called by
1502 // Machine Instruction Nodes in order to generate the bit encoding of the
1503 // instruction. Operands specify their base encoding interface with the
1504 // interface keyword. There are currently supported four interfaces,
1505 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
1506 // operand to generate a function which returns its register number when
1507 // queried. CONST_INTER causes an operand to generate a function which
1508 // returns the value of the constant when queried. MEMORY_INTER causes an
1509 // operand to generate four functions which return the Base Register, the
1510 // Index Register, the Scale Value, and the Offset Value of the operand when
1511 // queried. COND_INTER causes an operand to generate six functions which
1512 // return the encoding code (ie - encoding bits for the instruction)
1513 // associated with each basic boolean condition for a conditional instruction.
1514 // Instructions specify two basic values for encoding. They use the
1515 // ins_encode keyword to specify their encoding class (which must be one of
1516 // the class names specified in the encoding block), and they use the
1517 // opcode keyword to specify, in order, their primary, secondary, and
1518 // tertiary opcode. Only the opcode sections which a particular instruction
1519 // needs for encoding need to be specified.
1520 encode %{
1521 // Build emit functions for each basic byte or larger field in the intel
1522 // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1523 // code in the enc_class source block. Emit functions will live in the
1524 // main source block for now. In future, we can generalize this by
1525 // adding a syntax that specifies the sizes of fields in an order,
1526 // so that the adlc can build the emit functions automagically
1527
1528 // Emit primary opcode
1529 enc_class OpcP %{
1530 emit_opcode(cbuf, $primary);
1531 %}
1532
1533 // Emit secondary opcode
1534 enc_class OpcS %{
1535 emit_opcode(cbuf, $secondary);
1536 %}
1537
1538 // Emit opcode directly
1539 enc_class Opcode(immI d8) %{
1540 emit_opcode(cbuf, $d8$$constant);
1541 %}
1542
1543 enc_class SizePrefix %{
1544 emit_opcode(cbuf,0x66);
1545 %}
1546
1547 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
1548 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1549 %}
1550
1551 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many)
1552 emit_opcode(cbuf,$opcode$$constant);
1553 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1554 %}
1555
1556 enc_class mov_r32_imm0( rRegI dst ) %{
1557 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
1558 emit_d32 ( cbuf, 0x0 ); // imm32==0x0
1559 %}
1560
1561 enc_class cdq_enc %{
1562 // Full implementation of Java idiv and irem; checks for
1563 // special case as described in JVM spec., p.243 & p.271.
1564 //
1565 // normal case special case
1566 //
1567 // input : rax,: dividend min_int
1568 // reg: divisor -1
1569 //
1570 // output: rax,: quotient (= rax, idiv reg) min_int
1571 // rdx: remainder (= rax, irem reg) 0
1572 //
1573 // Code sequnce:
1574 //
1575 // 81 F8 00 00 00 80 cmp rax,80000000h
1576 // 0F 85 0B 00 00 00 jne normal_case
1577 // 33 D2 xor rdx,edx
1578 // 83 F9 FF cmp rcx,0FFh
1579 // 0F 84 03 00 00 00 je done
1580 // normal_case:
1581 // 99 cdq
1582 // F7 F9 idiv rax,ecx
1583 // done:
1584 //
1585 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1586 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1587 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h
1588 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1589 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1590 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case
1591 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx
1592 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1593 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1594 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1595 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done
1596 // normal_case:
1597 emit_opcode(cbuf,0x99); // cdq
1598 // idiv (note: must be emitted by the user of this rule)
1599 // normal:
1600 %}
1601
1602 // Dense encoding for older common ops
1603 enc_class Opc_plus(immI opcode, rRegI reg) %{
1604 emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1605 %}
1606
1607
1608 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1609 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1610 // Check for 8-bit immediate, and set sign extend bit in opcode
1611 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1612 emit_opcode(cbuf, $primary | 0x02);
1613 }
1614 else { // If 32-bit immediate
1615 emit_opcode(cbuf, $primary);
1616 }
1617 %}
1618
1619 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m
1620 // Emit primary opcode and set sign-extend bit
1621 // Check for 8-bit immediate, and set sign extend bit in opcode
1622 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1623 emit_opcode(cbuf, $primary | 0x02); }
1624 else { // If 32-bit immediate
1625 emit_opcode(cbuf, $primary);
1626 }
1627 // Emit r/m byte with secondary opcode, after primary opcode.
1628 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1629 %}
1630
1631 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
1632 // Check for 8-bit immediate, and set sign extend bit in opcode
1633 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1634 $$$emit8$imm$$constant;
1635 }
1636 else { // If 32-bit immediate
1637 // Output immediate
1638 $$$emit32$imm$$constant;
1639 }
1640 %}
1641
1642 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1643 // Emit primary opcode and set sign-extend bit
1644 // Check for 8-bit immediate, and set sign extend bit in opcode
1645 int con = (int)$imm$$constant; // Throw away top bits
1646 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1647 // Emit r/m byte with secondary opcode, after primary opcode.
1648 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1649 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1650 else emit_d32(cbuf,con);
1651 %}
1652
1653 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1654 // Emit primary opcode and set sign-extend bit
1655 // Check for 8-bit immediate, and set sign extend bit in opcode
1656 int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1657 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1658 // Emit r/m byte with tertiary opcode, after primary opcode.
1659 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1660 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1661 else emit_d32(cbuf,con);
1662 %}
1663
1664 enc_class OpcSReg (rRegI dst) %{ // BSWAP
1665 emit_cc(cbuf, $secondary, $dst$$reg );
1666 %}
1667
1668 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1669 int destlo = $dst$$reg;
1670 int desthi = HIGH_FROM_LOW(destlo);
1671 // bswap lo
1672 emit_opcode(cbuf, 0x0F);
1673 emit_cc(cbuf, 0xC8, destlo);
1674 // bswap hi
1675 emit_opcode(cbuf, 0x0F);
1676 emit_cc(cbuf, 0xC8, desthi);
1677 // xchg lo and hi
1678 emit_opcode(cbuf, 0x87);
1679 emit_rm(cbuf, 0x3, destlo, desthi);
1680 %}
1681
1682 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1683 emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1684 %}
1685
1686 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1687 $$$emit8$primary;
1688 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1689 %}
1690
1691 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1692 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1693 emit_d8(cbuf, op >> 8 );
1694 emit_d8(cbuf, op & 255);
1695 %}
1696
1697 // emulate a CMOV with a conditional branch around a MOV
1698 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1699 // Invert sense of branch from sense of CMOV
1700 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1701 emit_d8( cbuf, $brOffs$$constant );
1702 %}
1703
1704 enc_class enc_PartialSubtypeCheck( ) %{
1705 Register Redi = as_Register(EDI_enc); // result register
1706 Register Reax = as_Register(EAX_enc); // super class
1707 Register Recx = as_Register(ECX_enc); // killed
1708 Register Resi = as_Register(ESI_enc); // sub class
1709 Label miss;
1710
1711 MacroAssembler _masm(&cbuf);
1712 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1713 NULL, &miss,
1714 /*set_cond_codes:*/ true);
1715 if ($primary) {
1716 __ xorptr(Redi, Redi);
1717 }
1718 __ bind(miss);
1719 %}
1720
1721 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
1722 MacroAssembler masm(&cbuf);
1723 int start = masm.offset();
1724 if (UseSSE >= 2) {
1725 if (VerifyFPU) {
1726 masm.verify_FPU(0, "must be empty in SSE2+ mode");
1727 }
1728 } else {
1729 // External c_calling_convention expects the FPU stack to be 'clean'.
1730 // Compiled code leaves it dirty. Do cleanup now.
1731 masm.empty_FPU_stack();
1732 }
1733 if (sizeof_FFree_Float_Stack_All == -1) {
1734 sizeof_FFree_Float_Stack_All = masm.offset() - start;
1735 } else {
1736 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1737 }
1738 %}
1739
1740 enc_class Verify_FPU_For_Leaf %{
1741 if( VerifyFPU ) {
1742 MacroAssembler masm(&cbuf);
1743 masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1744 }
1745 %}
1746
1747 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1748 // This is the instruction starting address for relocation info.
1749 cbuf.set_insts_mark();
1750 $$$emit8$primary;
1751 // CALL directly to the runtime
1752 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1753 runtime_call_Relocation::spec(), RELOC_IMM32 );
1754
1755 if (UseSSE >= 2) {
1756 MacroAssembler _masm(&cbuf);
1757 BasicType rt = tf()->return_type();
1758
1759 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1760 // A C runtime call where the return value is unused. In SSE2+
1761 // mode the result needs to be removed from the FPU stack. It's
1762 // likely that this function call could be removed by the
1763 // optimizer if the C function is a pure function.
1764 __ ffree(0);
1765 } else if (rt == T_FLOAT) {
1766 __ lea(rsp, Address(rsp, -4));
1767 __ fstp_s(Address(rsp, 0));
1768 __ movflt(xmm0, Address(rsp, 0));
1769 __ lea(rsp, Address(rsp, 4));
1770 } else if (rt == T_DOUBLE) {
1771 __ lea(rsp, Address(rsp, -8));
1772 __ fstp_d(Address(rsp, 0));
1773 __ movdbl(xmm0, Address(rsp, 0));
1774 __ lea(rsp, Address(rsp, 8));
1775 }
1776 }
1777 %}
1778
1779 enc_class pre_call_resets %{
1780 // If method sets FPU control word restore it here
1781 debug_only(int off0 = cbuf.insts_size());
1782 if (ra_->C->in_24_bit_fp_mode()) {
1783 MacroAssembler _masm(&cbuf);
1784 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
1785 }
1786 // Clear upper bits of YMM registers when current compiled code uses
1787 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1788 MacroAssembler _masm(&cbuf);
1789 __ vzeroupper();
1790 debug_only(int off1 = cbuf.insts_size());
1791 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1792 %}
1793
1794 enc_class post_call_FPU %{
1795 // If method sets FPU control word do it here also
1796 if (Compile::current()->in_24_bit_fp_mode()) {
1797 MacroAssembler masm(&cbuf);
1798 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
1799 }
1800 %}
1801
1802 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL
1803 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1804 // who we intended to call.
1805 cbuf.set_insts_mark();
1806 $$$emit8$primary;
1807
1808 if (!_method) {
1809 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1810 runtime_call_Relocation::spec(),
1811 RELOC_IMM32);
1812 } else {
1813 int method_index = resolved_method_index(cbuf);
1814 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1815 : static_call_Relocation::spec(method_index);
1816 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1817 rspec, RELOC_DISP32);
1818 // Emit stubs for static call.
1819 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1820 if (stub == NULL) {
1821 ciEnv::current()->record_failure("CodeCache is full");
1822 return;
1823 }
1824 }
1825 %}
1826
1827 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
1828 MacroAssembler _masm(&cbuf);
1829 __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1830 %}
1831
1832 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1833 int disp = in_bytes(Method::from_compiled_offset());
1834 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1835
1836 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1837 cbuf.set_insts_mark();
1838 $$$emit8$primary;
1839 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
1840 emit_d8(cbuf, disp); // Displacement
1841
1842 %}
1843
1844 // Following encoding is no longer used, but may be restored if calling
1845 // convention changes significantly.
1846 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1847 //
1848 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL
1849 // // int ic_reg = Matcher::inline_cache_reg();
1850 // // int ic_encode = Matcher::_regEncode[ic_reg];
1851 // // int imo_reg = Matcher::interpreter_method_reg();
1852 // // int imo_encode = Matcher::_regEncode[imo_reg];
1853 //
1854 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
1855 // // // so we load it immediately before the call
1856 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr
1857 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1858 //
1859 // // xor rbp,ebp
1860 // emit_opcode(cbuf, 0x33);
1861 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1862 //
1863 // // CALL to interpreter.
1864 // cbuf.set_insts_mark();
1865 // $$$emit8$primary;
1866 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1867 // runtime_call_Relocation::spec(), RELOC_IMM32 );
1868 // %}
1869
1870 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR
1871 $$$emit8$primary;
1872 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1873 $$$emit8$shift$$constant;
1874 %}
1875
1876 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate
1877 // Load immediate does not have a zero or sign extended version
1878 // for 8-bit immediates
1879 emit_opcode(cbuf, 0xB8 + $dst$$reg);
1880 $$$emit32$src$$constant;
1881 %}
1882
1883 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate
1884 // Load immediate does not have a zero or sign extended version
1885 // for 8-bit immediates
1886 emit_opcode(cbuf, $primary + $dst$$reg);
1887 $$$emit32$src$$constant;
1888 %}
1889
1890 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate
1891 // Load immediate does not have a zero or sign extended version
1892 // for 8-bit immediates
1893 int dst_enc = $dst$$reg;
1894 int src_con = $src$$constant & 0x0FFFFFFFFL;
1895 if (src_con == 0) {
1896 // xor dst, dst
1897 emit_opcode(cbuf, 0x33);
1898 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1899 } else {
1900 emit_opcode(cbuf, $primary + dst_enc);
1901 emit_d32(cbuf, src_con);
1902 }
1903 %}
1904
1905 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
1906 // Load immediate does not have a zero or sign extended version
1907 // for 8-bit immediates
1908 int dst_enc = $dst$$reg + 2;
1909 int src_con = ((julong)($src$$constant)) >> 32;
1910 if (src_con == 0) {
1911 // xor dst, dst
1912 emit_opcode(cbuf, 0x33);
1913 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1914 } else {
1915 emit_opcode(cbuf, $primary + dst_enc);
1916 emit_d32(cbuf, src_con);
1917 }
1918 %}
1919
1920
1921 // Encode a reg-reg copy. If it is useless, then empty encoding.
1922 enc_class enc_Copy( rRegI dst, rRegI src ) %{
1923 encode_Copy( cbuf, $dst$$reg, $src$$reg );
1924 %}
1925
1926 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
1927 encode_Copy( cbuf, $dst$$reg, $src$$reg );
1928 %}
1929
1930 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
1931 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1932 %}
1933
1934 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
1935 $$$emit8$primary;
1936 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1937 %}
1938
1939 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
1940 $$$emit8$secondary;
1941 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
1942 %}
1943
1944 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
1945 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1946 %}
1947
1948 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
1949 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
1950 %}
1951
1952 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
1953 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
1954 %}
1955
1956 enc_class Con32 (immI src) %{ // Con32(storeImmI)
1957 // Output immediate
1958 $$$emit32$src$$constant;
1959 %}
1960
1961 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm
1962 // Output Float immediate bits
1963 jfloat jf = $src$$constant;
1964 int jf_as_bits = jint_cast( jf );
1965 emit_d32(cbuf, jf_as_bits);
1966 %}
1967
1968 enc_class Con32F_as_bits(immF src) %{ // storeX_imm
1969 // Output Float immediate bits
1970 jfloat jf = $src$$constant;
1971 int jf_as_bits = jint_cast( jf );
1972 emit_d32(cbuf, jf_as_bits);
1973 %}
1974
1975 enc_class Con16 (immI src) %{ // Con16(storeImmI)
1976 // Output immediate
1977 $$$emit16$src$$constant;
1978 %}
1979
1980 enc_class Con_d32(immI src) %{
1981 emit_d32(cbuf,$src$$constant);
1982 %}
1983
1984 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
1985 // Output immediate memory reference
1986 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
1987 emit_d32(cbuf, 0x00);
1988 %}
1989
1990 enc_class lock_prefix( ) %{
1991 emit_opcode(cbuf,0xF0); // [Lock]
1992 %}
1993
1994 // Cmp-xchg long value.
1995 // Note: we need to swap rbx, and rcx before and after the
1996 // cmpxchg8 instruction because the instruction uses
1997 // rcx as the high order word of the new value to store but
1998 // our register encoding uses rbx,.
1999 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2000
2001 // XCHG rbx,ecx
2002 emit_opcode(cbuf,0x87);
2003 emit_opcode(cbuf,0xD9);
2004 // [Lock]
2005 emit_opcode(cbuf,0xF0);
2006 // CMPXCHG8 [Eptr]
2007 emit_opcode(cbuf,0x0F);
2008 emit_opcode(cbuf,0xC7);
2009 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2010 // XCHG rbx,ecx
2011 emit_opcode(cbuf,0x87);
2012 emit_opcode(cbuf,0xD9);
2013 %}
2014
2015 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2016 // [Lock]
2017 emit_opcode(cbuf,0xF0);
2018
2019 // CMPXCHG [Eptr]
2020 emit_opcode(cbuf,0x0F);
2021 emit_opcode(cbuf,0xB1);
2022 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2023 %}
2024
2025 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2026 // [Lock]
2027 emit_opcode(cbuf,0xF0);
2028
2029 // CMPXCHGB [Eptr]
2030 emit_opcode(cbuf,0x0F);
2031 emit_opcode(cbuf,0xB0);
2032 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2033 %}
2034
2035 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2036 // [Lock]
2037 emit_opcode(cbuf,0xF0);
2038
2039 // 16-bit mode
2040 emit_opcode(cbuf, 0x66);
2041
2042 // CMPXCHGW [Eptr]
2043 emit_opcode(cbuf,0x0F);
2044 emit_opcode(cbuf,0xB1);
2045 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2046 %}
2047
2048 enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2049 int res_encoding = $res$$reg;
2050
2051 // MOV res,0
2052 emit_opcode( cbuf, 0xB8 + res_encoding);
2053 emit_d32( cbuf, 0 );
2054 // JNE,s fail
2055 emit_opcode(cbuf,0x75);
2056 emit_d8(cbuf, 5 );
2057 // MOV res,1
2058 emit_opcode( cbuf, 0xB8 + res_encoding);
2059 emit_d32( cbuf, 1 );
2060 // fail:
2061 %}
2062
2063 enc_class set_instruction_start( ) %{
2064 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2065 %}
2066
2067 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem
2068 int reg_encoding = $ereg$$reg;
2069 int base = $mem$$base;
2070 int index = $mem$$index;
2071 int scale = $mem$$scale;
2072 int displace = $mem$$disp;
2073 relocInfo::relocType disp_reloc = $mem->disp_reloc();
2074 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2075 %}
2076
2077 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem
2078 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo
2079 int base = $mem$$base;
2080 int index = $mem$$index;
2081 int scale = $mem$$scale;
2082 int displace = $mem$$disp + 4; // Offset is 4 further in memory
2083 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2084 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2085 %}
2086
2087 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2088 int r1, r2;
2089 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2090 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2091 emit_opcode(cbuf,0x0F);
2092 emit_opcode(cbuf,$tertiary);
2093 emit_rm(cbuf, 0x3, r1, r2);
2094 emit_d8(cbuf,$cnt$$constant);
2095 emit_d8(cbuf,$primary);
2096 emit_rm(cbuf, 0x3, $secondary, r1);
2097 emit_d8(cbuf,$cnt$$constant);
2098 %}
2099
2100 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2101 emit_opcode( cbuf, 0x8B ); // Move
2102 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2103 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2104 emit_d8(cbuf,$primary);
2105 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2106 emit_d8(cbuf,$cnt$$constant-32);
2107 }
2108 emit_d8(cbuf,$primary);
2109 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2110 emit_d8(cbuf,31);
2111 %}
2112
2113 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2114 int r1, r2;
2115 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2116 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2117
2118 emit_opcode( cbuf, 0x8B ); // Move r1,r2
2119 emit_rm(cbuf, 0x3, r1, r2);
2120 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2121 emit_opcode(cbuf,$primary);
2122 emit_rm(cbuf, 0x3, $secondary, r1);
2123 emit_d8(cbuf,$cnt$$constant-32);
2124 }
2125 emit_opcode(cbuf,0x33); // XOR r2,r2
2126 emit_rm(cbuf, 0x3, r2, r2);
2127 %}
2128
2129 // Clone of RegMem but accepts an extra parameter to access each
2130 // half of a double in memory; it never needs relocation info.
2131 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2132 emit_opcode(cbuf,$opcode$$constant);
2133 int reg_encoding = $rm_reg$$reg;
2134 int base = $mem$$base;
2135 int index = $mem$$index;
2136 int scale = $mem$$scale;
2137 int displace = $mem$$disp + $disp_for_half$$constant;
2138 relocInfo::relocType disp_reloc = relocInfo::none;
2139 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2140 %}
2141
2142 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2143 //
2144 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2145 // and it never needs relocation information.
2146 // Frequently used to move data between FPU's Stack Top and memory.
2147 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2148 int rm_byte_opcode = $rm_opcode$$constant;
2149 int base = $mem$$base;
2150 int index = $mem$$index;
2151 int scale = $mem$$scale;
2152 int displace = $mem$$disp;
2153 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2154 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2155 %}
2156
2157 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2158 int rm_byte_opcode = $rm_opcode$$constant;
2159 int base = $mem$$base;
2160 int index = $mem$$index;
2161 int scale = $mem$$scale;
2162 int displace = $mem$$disp;
2163 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2164 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2165 %}
2166
2167 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea
2168 int reg_encoding = $dst$$reg;
2169 int base = $src0$$reg; // 0xFFFFFFFF indicates no base
2170 int index = 0x04; // 0x04 indicates no index
2171 int scale = 0x00; // 0x00 indicates no scale
2172 int displace = $src1$$constant; // 0x00 indicates no displacement
2173 relocInfo::relocType disp_reloc = relocInfo::none;
2174 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2175 %}
2176
2177 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN
2178 // Compare dst,src
2179 emit_opcode(cbuf,0x3B);
2180 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2181 // jmp dst < src around move
2182 emit_opcode(cbuf,0x7C);
2183 emit_d8(cbuf,2);
2184 // move dst,src
2185 emit_opcode(cbuf,0x8B);
2186 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2187 %}
2188
2189 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX
2190 // Compare dst,src
2191 emit_opcode(cbuf,0x3B);
2192 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2193 // jmp dst > src around move
2194 emit_opcode(cbuf,0x7F);
2195 emit_d8(cbuf,2);
2196 // move dst,src
2197 emit_opcode(cbuf,0x8B);
2198 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2199 %}
2200
2201 enc_class enc_FPR_store(memory mem, regDPR src) %{
2202 // If src is FPR1, we can just FST to store it.
2203 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2204 int reg_encoding = 0x2; // Just store
2205 int base = $mem$$base;
2206 int index = $mem$$index;
2207 int scale = $mem$$scale;
2208 int displace = $mem$$disp;
2209 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2210 if( $src$$reg != FPR1L_enc ) {
2211 reg_encoding = 0x3; // Store & pop
2212 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2213 emit_d8( cbuf, 0xC0-1+$src$$reg );
2214 }
2215 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2216 emit_opcode(cbuf,$primary);
2217 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2218 %}
2219
2220 enc_class neg_reg(rRegI dst) %{
2221 // NEG $dst
2222 emit_opcode(cbuf,0xF7);
2223 emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2224 %}
2225
2226 enc_class setLT_reg(eCXRegI dst) %{
2227 // SETLT $dst
2228 emit_opcode(cbuf,0x0F);
2229 emit_opcode(cbuf,0x9C);
2230 emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2231 %}
2232
2233 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT
2234 int tmpReg = $tmp$$reg;
2235
2236 // SUB $p,$q
2237 emit_opcode(cbuf,0x2B);
2238 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2239 // SBB $tmp,$tmp
2240 emit_opcode(cbuf,0x1B);
2241 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2242 // AND $tmp,$y
2243 emit_opcode(cbuf,0x23);
2244 emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2245 // ADD $p,$tmp
2246 emit_opcode(cbuf,0x03);
2247 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2248 %}
2249
2250 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2251 // TEST shift,32
2252 emit_opcode(cbuf,0xF7);
2253 emit_rm(cbuf, 0x3, 0, ECX_enc);
2254 emit_d32(cbuf,0x20);
2255 // JEQ,s small
2256 emit_opcode(cbuf, 0x74);
2257 emit_d8(cbuf, 0x04);
2258 // MOV $dst.hi,$dst.lo
2259 emit_opcode( cbuf, 0x8B );
2260 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2261 // CLR $dst.lo
2262 emit_opcode(cbuf, 0x33);
2263 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2264 // small:
2265 // SHLD $dst.hi,$dst.lo,$shift
2266 emit_opcode(cbuf,0x0F);
2267 emit_opcode(cbuf,0xA5);
2268 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2269 // SHL $dst.lo,$shift"
2270 emit_opcode(cbuf,0xD3);
2271 emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2272 %}
2273
2274 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2275 // TEST shift,32
2276 emit_opcode(cbuf,0xF7);
2277 emit_rm(cbuf, 0x3, 0, ECX_enc);
2278 emit_d32(cbuf,0x20);
2279 // JEQ,s small
2280 emit_opcode(cbuf, 0x74);
2281 emit_d8(cbuf, 0x04);
2282 // MOV $dst.lo,$dst.hi
2283 emit_opcode( cbuf, 0x8B );
2284 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2285 // CLR $dst.hi
2286 emit_opcode(cbuf, 0x33);
2287 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2288 // small:
2289 // SHRD $dst.lo,$dst.hi,$shift
2290 emit_opcode(cbuf,0x0F);
2291 emit_opcode(cbuf,0xAD);
2292 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2293 // SHR $dst.hi,$shift"
2294 emit_opcode(cbuf,0xD3);
2295 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2296 %}
2297
2298 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2299 // TEST shift,32
2300 emit_opcode(cbuf,0xF7);
2301 emit_rm(cbuf, 0x3, 0, ECX_enc);
2302 emit_d32(cbuf,0x20);
2303 // JEQ,s small
2304 emit_opcode(cbuf, 0x74);
2305 emit_d8(cbuf, 0x05);
2306 // MOV $dst.lo,$dst.hi
2307 emit_opcode( cbuf, 0x8B );
2308 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2309 // SAR $dst.hi,31
2310 emit_opcode(cbuf, 0xC1);
2311 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2312 emit_d8(cbuf, 0x1F );
2313 // small:
2314 // SHRD $dst.lo,$dst.hi,$shift
2315 emit_opcode(cbuf,0x0F);
2316 emit_opcode(cbuf,0xAD);
2317 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2318 // SAR $dst.hi,$shift"
2319 emit_opcode(cbuf,0xD3);
2320 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2321 %}
2322
2323
2324 // ----------------- Encodings for floating point unit -----------------
2325 // May leave result in FPU-TOS or FPU reg depending on opcodes
2326 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV
2327 $$$emit8$primary;
2328 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2329 %}
2330
2331 // Pop argument in FPR0 with FSTP ST(0)
2332 enc_class PopFPU() %{
2333 emit_opcode( cbuf, 0xDD );
2334 emit_d8( cbuf, 0xD8 );
2335 %}
2336
2337 // !!!!! equivalent to Pop_Reg_F
2338 enc_class Pop_Reg_DPR( regDPR dst ) %{
2339 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2340 emit_d8( cbuf, 0xD8+$dst$$reg );
2341 %}
2342
2343 enc_class Push_Reg_DPR( regDPR dst ) %{
2344 emit_opcode( cbuf, 0xD9 );
2345 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2346 %}
2347
2348 enc_class strictfp_bias1( regDPR dst ) %{
2349 emit_opcode( cbuf, 0xDB ); // FLD m80real
2350 emit_opcode( cbuf, 0x2D );
2351 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
2352 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2353 emit_opcode( cbuf, 0xC8+$dst$$reg );
2354 %}
2355
2356 enc_class strictfp_bias2( regDPR dst ) %{
2357 emit_opcode( cbuf, 0xDB ); // FLD m80real
2358 emit_opcode( cbuf, 0x2D );
2359 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
2360 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2361 emit_opcode( cbuf, 0xC8+$dst$$reg );
2362 %}
2363
2364 // Special case for moving an integer register to a stack slot.
2365 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2366 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2367 %}
2368
2369 // Special case for moving a register to a stack slot.
2370 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2371 // Opcode already emitted
2372 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
2373 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
2374 emit_d32(cbuf, $dst$$disp); // Displacement
2375 %}
2376
2377 // Push the integer in stackSlot 'src' onto FP-stack
2378 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2379 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2380 %}
2381
2382 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2383 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2384 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2385 %}
2386
2387 // Same as Pop_Mem_F except for opcode
2388 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2389 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2390 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2391 %}
2392
2393 enc_class Pop_Reg_FPR( regFPR dst ) %{
2394 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2395 emit_d8( cbuf, 0xD8+$dst$$reg );
2396 %}
2397
2398 enc_class Push_Reg_FPR( regFPR dst ) %{
2399 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2400 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2401 %}
2402
2403 // Push FPU's float to a stack-slot, and pop FPU-stack
2404 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2405 int pop = 0x02;
2406 if ($src$$reg != FPR1L_enc) {
2407 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2408 emit_d8( cbuf, 0xC0-1+$src$$reg );
2409 pop = 0x03;
2410 }
2411 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2412 %}
2413
2414 // Push FPU's double to a stack-slot, and pop FPU-stack
2415 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2416 int pop = 0x02;
2417 if ($src$$reg != FPR1L_enc) {
2418 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2419 emit_d8( cbuf, 0xC0-1+$src$$reg );
2420 pop = 0x03;
2421 }
2422 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2423 %}
2424
2425 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2426 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2427 int pop = 0xD0 - 1; // -1 since we skip FLD
2428 if ($src$$reg != FPR1L_enc) {
2429 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2430 emit_d8( cbuf, 0xC0-1+$src$$reg );
2431 pop = 0xD8;
2432 }
2433 emit_opcode( cbuf, 0xDD );
2434 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2435 %}
2436
2437
2438 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2439 // load dst in FPR0
2440 emit_opcode( cbuf, 0xD9 );
2441 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2442 if ($src$$reg != FPR1L_enc) {
2443 // fincstp
2444 emit_opcode (cbuf, 0xD9);
2445 emit_opcode (cbuf, 0xF7);
2446 // swap src with FPR1:
2447 // FXCH FPR1 with src
2448 emit_opcode(cbuf, 0xD9);
2449 emit_d8(cbuf, 0xC8-1+$src$$reg );
2450 // fdecstp
2451 emit_opcode (cbuf, 0xD9);
2452 emit_opcode (cbuf, 0xF6);
2453 }
2454 %}
2455
2456 enc_class Push_ModD_encoding(regD src0, regD src1) %{
2457 MacroAssembler _masm(&cbuf);
2458 __ subptr(rsp, 8);
2459 __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2460 __ fld_d(Address(rsp, 0));
2461 __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2462 __ fld_d(Address(rsp, 0));
2463 %}
2464
2465 enc_class Push_ModF_encoding(regF src0, regF src1) %{
2466 MacroAssembler _masm(&cbuf);
2467 __ subptr(rsp, 4);
2468 __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2469 __ fld_s(Address(rsp, 0));
2470 __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2471 __ fld_s(Address(rsp, 0));
2472 %}
2473
2474 enc_class Push_ResultD(regD dst) %{
2475 MacroAssembler _masm(&cbuf);
2476 __ fstp_d(Address(rsp, 0));
2477 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2478 __ addptr(rsp, 8);
2479 %}
2480
2481 enc_class Push_ResultF(regF dst, immI d8) %{
2482 MacroAssembler _masm(&cbuf);
2483 __ fstp_s(Address(rsp, 0));
2484 __ movflt($dst$$XMMRegister, Address(rsp, 0));
2485 __ addptr(rsp, $d8$$constant);
2486 %}
2487
2488 enc_class Push_SrcD(regD src) %{
2489 MacroAssembler _masm(&cbuf);
2490 __ subptr(rsp, 8);
2491 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2492 __ fld_d(Address(rsp, 0));
2493 %}
2494
2495 enc_class push_stack_temp_qword() %{
2496 MacroAssembler _masm(&cbuf);
2497 __ subptr(rsp, 8);
2498 %}
2499
2500 enc_class pop_stack_temp_qword() %{
2501 MacroAssembler _masm(&cbuf);
2502 __ addptr(rsp, 8);
2503 %}
2504
2505 enc_class push_xmm_to_fpr1(regD src) %{
2506 MacroAssembler _masm(&cbuf);
2507 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2508 __ fld_d(Address(rsp, 0));
2509 %}
2510
2511 enc_class Push_Result_Mod_DPR( regDPR src) %{
2512 if ($src$$reg != FPR1L_enc) {
2513 // fincstp
2514 emit_opcode (cbuf, 0xD9);
2515 emit_opcode (cbuf, 0xF7);
2516 // FXCH FPR1 with src
2517 emit_opcode(cbuf, 0xD9);
2518 emit_d8(cbuf, 0xC8-1+$src$$reg );
2519 // fdecstp
2520 emit_opcode (cbuf, 0xD9);
2521 emit_opcode (cbuf, 0xF6);
2522 }
2523 // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2524 // // FSTP FPR$dst$$reg
2525 // emit_opcode( cbuf, 0xDD );
2526 // emit_d8( cbuf, 0xD8+$dst$$reg );
2527 %}
2528
2529 enc_class fnstsw_sahf_skip_parity() %{
2530 // fnstsw ax
2531 emit_opcode( cbuf, 0xDF );
2532 emit_opcode( cbuf, 0xE0 );
2533 // sahf
2534 emit_opcode( cbuf, 0x9E );
2535 // jnp ::skip
2536 emit_opcode( cbuf, 0x7B );
2537 emit_opcode( cbuf, 0x05 );
2538 %}
2539
2540 enc_class emitModDPR() %{
2541 // fprem must be iterative
2542 // :: loop
2543 // fprem
2544 emit_opcode( cbuf, 0xD9 );
2545 emit_opcode( cbuf, 0xF8 );
2546 // wait
2547 emit_opcode( cbuf, 0x9b );
2548 // fnstsw ax
2549 emit_opcode( cbuf, 0xDF );
2550 emit_opcode( cbuf, 0xE0 );
2551 // sahf
2552 emit_opcode( cbuf, 0x9E );
2553 // jp ::loop
2554 emit_opcode( cbuf, 0x0F );
2555 emit_opcode( cbuf, 0x8A );
2556 emit_opcode( cbuf, 0xF4 );
2557 emit_opcode( cbuf, 0xFF );
2558 emit_opcode( cbuf, 0xFF );
2559 emit_opcode( cbuf, 0xFF );
2560 %}
2561
2562 enc_class fpu_flags() %{
2563 // fnstsw_ax
2564 emit_opcode( cbuf, 0xDF);
2565 emit_opcode( cbuf, 0xE0);
2566 // test ax,0x0400
2567 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate
2568 emit_opcode( cbuf, 0xA9 );
2569 emit_d16 ( cbuf, 0x0400 );
2570 // // // This sequence works, but stalls for 12-16 cycles on PPro
2571 // // test rax,0x0400
2572 // emit_opcode( cbuf, 0xA9 );
2573 // emit_d32 ( cbuf, 0x00000400 );
2574 //
2575 // jz exit (no unordered comparison)
2576 emit_opcode( cbuf, 0x74 );
2577 emit_d8 ( cbuf, 0x02 );
2578 // mov ah,1 - treat as LT case (set carry flag)
2579 emit_opcode( cbuf, 0xB4 );
2580 emit_d8 ( cbuf, 0x01 );
2581 // sahf
2582 emit_opcode( cbuf, 0x9E);
2583 %}
2584
2585 enc_class cmpF_P6_fixup() %{
2586 // Fixup the integer flags in case comparison involved a NaN
2587 //
2588 // JNP exit (no unordered comparison, P-flag is set by NaN)
2589 emit_opcode( cbuf, 0x7B );
2590 emit_d8 ( cbuf, 0x03 );
2591 // MOV AH,1 - treat as LT case (set carry flag)
2592 emit_opcode( cbuf, 0xB4 );
2593 emit_d8 ( cbuf, 0x01 );
2594 // SAHF
2595 emit_opcode( cbuf, 0x9E);
2596 // NOP // target for branch to avoid branch to branch
2597 emit_opcode( cbuf, 0x90);
2598 %}
2599
2600 // fnstsw_ax();
2601 // sahf();
2602 // movl(dst, nan_result);
2603 // jcc(Assembler::parity, exit);
2604 // movl(dst, less_result);
2605 // jcc(Assembler::below, exit);
2606 // movl(dst, equal_result);
2607 // jcc(Assembler::equal, exit);
2608 // movl(dst, greater_result);
2609
2610 // less_result = 1;
2611 // greater_result = -1;
2612 // equal_result = 0;
2613 // nan_result = -1;
2614
2615 enc_class CmpF_Result(rRegI dst) %{
2616 // fnstsw_ax();
2617 emit_opcode( cbuf, 0xDF);
2618 emit_opcode( cbuf, 0xE0);
2619 // sahf
2620 emit_opcode( cbuf, 0x9E);
2621 // movl(dst, nan_result);
2622 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2623 emit_d32( cbuf, -1 );
2624 // jcc(Assembler::parity, exit);
2625 emit_opcode( cbuf, 0x7A );
2626 emit_d8 ( cbuf, 0x13 );
2627 // movl(dst, less_result);
2628 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2629 emit_d32( cbuf, -1 );
2630 // jcc(Assembler::below, exit);
2631 emit_opcode( cbuf, 0x72 );
2632 emit_d8 ( cbuf, 0x0C );
2633 // movl(dst, equal_result);
2634 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2635 emit_d32( cbuf, 0 );
2636 // jcc(Assembler::equal, exit);
2637 emit_opcode( cbuf, 0x74 );
2638 emit_d8 ( cbuf, 0x05 );
2639 // movl(dst, greater_result);
2640 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2641 emit_d32( cbuf, 1 );
2642 %}
2643
2644
2645 // Compare the longs and set flags
2646 // BROKEN! Do Not use as-is
2647 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2648 // CMP $src1.hi,$src2.hi
2649 emit_opcode( cbuf, 0x3B );
2650 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2651 // JNE,s done
2652 emit_opcode(cbuf,0x75);
2653 emit_d8(cbuf, 2 );
2654 // CMP $src1.lo,$src2.lo
2655 emit_opcode( cbuf, 0x3B );
2656 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2657 // done:
2658 %}
2659
2660 enc_class convert_int_long( regL dst, rRegI src ) %{
2661 // mov $dst.lo,$src
2662 int dst_encoding = $dst$$reg;
2663 int src_encoding = $src$$reg;
2664 encode_Copy( cbuf, dst_encoding , src_encoding );
2665 // mov $dst.hi,$src
2666 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2667 // sar $dst.hi,31
2668 emit_opcode( cbuf, 0xC1 );
2669 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2670 emit_d8(cbuf, 0x1F );
2671 %}
2672
2673 enc_class convert_long_double( eRegL src ) %{
2674 // push $src.hi
2675 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2676 // push $src.lo
2677 emit_opcode(cbuf, 0x50+$src$$reg );
2678 // fild 64-bits at [SP]
2679 emit_opcode(cbuf,0xdf);
2680 emit_d8(cbuf, 0x6C);
2681 emit_d8(cbuf, 0x24);
2682 emit_d8(cbuf, 0x00);
2683 // pop stack
2684 emit_opcode(cbuf, 0x83); // add SP, #8
2685 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2686 emit_d8(cbuf, 0x8);
2687 %}
2688
2689 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2690 // IMUL EDX:EAX,$src1
2691 emit_opcode( cbuf, 0xF7 );
2692 emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2693 // SAR EDX,$cnt-32
2694 int shift_count = ((int)$cnt$$constant) - 32;
2695 if (shift_count > 0) {
2696 emit_opcode(cbuf, 0xC1);
2697 emit_rm(cbuf, 0x3, 7, $dst$$reg );
2698 emit_d8(cbuf, shift_count);
2699 }
2700 %}
2701
2702 // this version doesn't have add sp, 8
2703 enc_class convert_long_double2( eRegL src ) %{
2704 // push $src.hi
2705 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2706 // push $src.lo
2707 emit_opcode(cbuf, 0x50+$src$$reg );
2708 // fild 64-bits at [SP]
2709 emit_opcode(cbuf,0xdf);
2710 emit_d8(cbuf, 0x6C);
2711 emit_d8(cbuf, 0x24);
2712 emit_d8(cbuf, 0x00);
2713 %}
2714
2715 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2716 // Basic idea: long = (long)int * (long)int
2717 // IMUL EDX:EAX, src
2718 emit_opcode( cbuf, 0xF7 );
2719 emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2720 %}
2721
2722 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2723 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
2724 // MUL EDX:EAX, src
2725 emit_opcode( cbuf, 0xF7 );
2726 emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2727 %}
2728
2729 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2730 // Basic idea: lo(result) = lo(x_lo * y_lo)
2731 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2732 // MOV $tmp,$src.lo
2733 encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2734 // IMUL $tmp,EDX
2735 emit_opcode( cbuf, 0x0F );
2736 emit_opcode( cbuf, 0xAF );
2737 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2738 // MOV EDX,$src.hi
2739 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2740 // IMUL EDX,EAX
2741 emit_opcode( cbuf, 0x0F );
2742 emit_opcode( cbuf, 0xAF );
2743 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2744 // ADD $tmp,EDX
2745 emit_opcode( cbuf, 0x03 );
2746 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2747 // MUL EDX:EAX,$src.lo
2748 emit_opcode( cbuf, 0xF7 );
2749 emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2750 // ADD EDX,ESI
2751 emit_opcode( cbuf, 0x03 );
2752 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2753 %}
2754
2755 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2756 // Basic idea: lo(result) = lo(src * y_lo)
2757 // hi(result) = hi(src * y_lo) + lo(src * y_hi)
2758 // IMUL $tmp,EDX,$src
2759 emit_opcode( cbuf, 0x6B );
2760 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2761 emit_d8( cbuf, (int)$src$$constant );
2762 // MOV EDX,$src
2763 emit_opcode(cbuf, 0xB8 + EDX_enc);
2764 emit_d32( cbuf, (int)$src$$constant );
2765 // MUL EDX:EAX,EDX
2766 emit_opcode( cbuf, 0xF7 );
2767 emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2768 // ADD EDX,ESI
2769 emit_opcode( cbuf, 0x03 );
2770 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2771 %}
2772
2773 enc_class long_div( eRegL src1, eRegL src2 ) %{
2774 // PUSH src1.hi
2775 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2776 // PUSH src1.lo
2777 emit_opcode(cbuf, 0x50+$src1$$reg );
2778 // PUSH src2.hi
2779 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2780 // PUSH src2.lo
2781 emit_opcode(cbuf, 0x50+$src2$$reg );
2782 // CALL directly to the runtime
2783 cbuf.set_insts_mark();
2784 emit_opcode(cbuf,0xE8); // Call into runtime
2785 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2786 // Restore stack
2787 emit_opcode(cbuf, 0x83); // add SP, #framesize
2788 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2789 emit_d8(cbuf, 4*4);
2790 %}
2791
2792 enc_class long_mod( eRegL src1, eRegL src2 ) %{
2793 // PUSH src1.hi
2794 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2795 // PUSH src1.lo
2796 emit_opcode(cbuf, 0x50+$src1$$reg );
2797 // PUSH src2.hi
2798 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2799 // PUSH src2.lo
2800 emit_opcode(cbuf, 0x50+$src2$$reg );
2801 // CALL directly to the runtime
2802 cbuf.set_insts_mark();
2803 emit_opcode(cbuf,0xE8); // Call into runtime
2804 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2805 // Restore stack
2806 emit_opcode(cbuf, 0x83); // add SP, #framesize
2807 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2808 emit_d8(cbuf, 4*4);
2809 %}
2810
2811 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2812 // MOV $tmp,$src.lo
2813 emit_opcode(cbuf, 0x8B);
2814 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2815 // OR $tmp,$src.hi
2816 emit_opcode(cbuf, 0x0B);
2817 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2818 %}
2819
2820 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2821 // CMP $src1.lo,$src2.lo
2822 emit_opcode( cbuf, 0x3B );
2823 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2824 // JNE,s skip
2825 emit_cc(cbuf, 0x70, 0x5);
2826 emit_d8(cbuf,2);
2827 // CMP $src1.hi,$src2.hi
2828 emit_opcode( cbuf, 0x3B );
2829 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2830 %}
2831
2832 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2833 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2834 emit_opcode( cbuf, 0x3B );
2835 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2836 // MOV $tmp,$src1.hi
2837 emit_opcode( cbuf, 0x8B );
2838 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2839 // SBB $tmp,$src2.hi\t! Compute flags for long compare
2840 emit_opcode( cbuf, 0x1B );
2841 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2842 %}
2843
2844 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2845 // XOR $tmp,$tmp
2846 emit_opcode(cbuf,0x33); // XOR
2847 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2848 // CMP $tmp,$src.lo
2849 emit_opcode( cbuf, 0x3B );
2850 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2851 // SBB $tmp,$src.hi
2852 emit_opcode( cbuf, 0x1B );
2853 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2854 %}
2855
2856 // Sniff, sniff... smells like Gnu Superoptimizer
2857 enc_class neg_long( eRegL dst ) %{
2858 emit_opcode(cbuf,0xF7); // NEG hi
2859 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2860 emit_opcode(cbuf,0xF7); // NEG lo
2861 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
2862 emit_opcode(cbuf,0x83); // SBB hi,0
2863 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2864 emit_d8 (cbuf,0 );
2865 %}
2866
2867 enc_class enc_pop_rdx() %{
2868 emit_opcode(cbuf,0x5A);
2869 %}
2870
2871 enc_class enc_rethrow() %{
2872 cbuf.set_insts_mark();
2873 emit_opcode(cbuf, 0xE9); // jmp entry
2874 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2875 runtime_call_Relocation::spec(), RELOC_IMM32 );
2876 %}
2877
2878
2879 // Convert a double to an int. Java semantics require we do complex
2880 // manglelations in the corner cases. So we set the rounding mode to
2881 // 'zero', store the darned double down as an int, and reset the
2882 // rounding mode to 'nearest'. The hardware throws an exception which
2883 // patches up the correct value directly to the stack.
2884 enc_class DPR2I_encoding( regDPR src ) %{
2885 // Flip to round-to-zero mode. We attempted to allow invalid-op
2886 // exceptions here, so that a NAN or other corner-case value will
2887 // thrown an exception (but normal values get converted at full speed).
2888 // However, I2C adapters and other float-stack manglers leave pending
2889 // invalid-op exceptions hanging. We would have to clear them before
2890 // enabling them and that is more expensive than just testing for the
2891 // invalid value Intel stores down in the corner cases.
2892 emit_opcode(cbuf,0xD9); // FLDCW trunc
2893 emit_opcode(cbuf,0x2D);
2894 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2895 // Allocate a word
2896 emit_opcode(cbuf,0x83); // SUB ESP,4
2897 emit_opcode(cbuf,0xEC);
2898 emit_d8(cbuf,0x04);
2899 // Encoding assumes a double has been pushed into FPR0.
2900 // Store down the double as an int, popping the FPU stack
2901 emit_opcode(cbuf,0xDB); // FISTP [ESP]
2902 emit_opcode(cbuf,0x1C);
2903 emit_d8(cbuf,0x24);
2904 // Restore the rounding mode; mask the exception
2905 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
2906 emit_opcode(cbuf,0x2D);
2907 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2908 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2909 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2910
2911 // Load the converted int; adjust CPU stack
2912 emit_opcode(cbuf,0x58); // POP EAX
2913 emit_opcode(cbuf,0x3D); // CMP EAX,imm
2914 emit_d32 (cbuf,0x80000000); // 0x80000000
2915 emit_opcode(cbuf,0x75); // JNE around_slow_call
2916 emit_d8 (cbuf,0x07); // Size of slow_call
2917 // Push src onto stack slow-path
2918 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
2919 emit_d8 (cbuf,0xC0-1+$src$$reg );
2920 // CALL directly to the runtime
2921 cbuf.set_insts_mark();
2922 emit_opcode(cbuf,0xE8); // Call into runtime
2923 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2924 // Carry on here...
2925 %}
2926
2927 enc_class DPR2L_encoding( regDPR src ) %{
2928 emit_opcode(cbuf,0xD9); // FLDCW trunc
2929 emit_opcode(cbuf,0x2D);
2930 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2931 // Allocate a word
2932 emit_opcode(cbuf,0x83); // SUB ESP,8
2933 emit_opcode(cbuf,0xEC);
2934 emit_d8(cbuf,0x08);
2935 // Encoding assumes a double has been pushed into FPR0.
2936 // Store down the double as a long, popping the FPU stack
2937 emit_opcode(cbuf,0xDF); // FISTP [ESP]
2938 emit_opcode(cbuf,0x3C);
2939 emit_d8(cbuf,0x24);
2940 // Restore the rounding mode; mask the exception
2941 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
2942 emit_opcode(cbuf,0x2D);
2943 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2944 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2945 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2946
2947 // Load the converted int; adjust CPU stack
2948 emit_opcode(cbuf,0x58); // POP EAX
2949 emit_opcode(cbuf,0x5A); // POP EDX
2950 emit_opcode(cbuf,0x81); // CMP EDX,imm
2951 emit_d8 (cbuf,0xFA); // rdx
2952 emit_d32 (cbuf,0x80000000); // 0x80000000
2953 emit_opcode(cbuf,0x75); // JNE around_slow_call
2954 emit_d8 (cbuf,0x07+4); // Size of slow_call
2955 emit_opcode(cbuf,0x85); // TEST EAX,EAX
2956 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
2957 emit_opcode(cbuf,0x75); // JNE around_slow_call
2958 emit_d8 (cbuf,0x07); // Size of slow_call
2959 // Push src onto stack slow-path
2960 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
2961 emit_d8 (cbuf,0xC0-1+$src$$reg );
2962 // CALL directly to the runtime
2963 cbuf.set_insts_mark();
2964 emit_opcode(cbuf,0xE8); // Call into runtime
2965 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2966 // Carry on here...
2967 %}
2968
2969 enc_class FMul_ST_reg( eRegFPR src1 ) %{
2970 // Operand was loaded from memory into fp ST (stack top)
2971 // FMUL ST,$src /* D8 C8+i */
2972 emit_opcode(cbuf, 0xD8);
2973 emit_opcode(cbuf, 0xC8 + $src1$$reg);
2974 %}
2975
2976 enc_class FAdd_ST_reg( eRegFPR src2 ) %{
2977 // FADDP ST,src2 /* D8 C0+i */
2978 emit_opcode(cbuf, 0xD8);
2979 emit_opcode(cbuf, 0xC0 + $src2$$reg);
2980 //could use FADDP src2,fpST /* DE C0+i */
2981 %}
2982
2983 enc_class FAddP_reg_ST( eRegFPR src2 ) %{
2984 // FADDP src2,ST /* DE C0+i */
2985 emit_opcode(cbuf, 0xDE);
2986 emit_opcode(cbuf, 0xC0 + $src2$$reg);
2987 %}
2988
2989 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
2990 // Operand has been loaded into fp ST (stack top)
2991 // FSUB ST,$src1
2992 emit_opcode(cbuf, 0xD8);
2993 emit_opcode(cbuf, 0xE0 + $src1$$reg);
2994
2995 // FDIV
2996 emit_opcode(cbuf, 0xD8);
2997 emit_opcode(cbuf, 0xF0 + $src2$$reg);
2998 %}
2999
3000 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3001 // Operand was loaded from memory into fp ST (stack top)
3002 // FADD ST,$src /* D8 C0+i */
3003 emit_opcode(cbuf, 0xD8);
3004 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3005
3006 // FMUL ST,src2 /* D8 C*+i */
3007 emit_opcode(cbuf, 0xD8);
3008 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3009 %}
3010
3011
3012 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3013 // Operand was loaded from memory into fp ST (stack top)
3014 // FADD ST,$src /* D8 C0+i */
3015 emit_opcode(cbuf, 0xD8);
3016 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3017
3018 // FMULP src2,ST /* DE C8+i */
3019 emit_opcode(cbuf, 0xDE);
3020 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3021 %}
3022
3023 // Atomically load the volatile long
3024 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3025 emit_opcode(cbuf,0xDF);
3026 int rm_byte_opcode = 0x05;
3027 int base = $mem$$base;
3028 int index = $mem$$index;
3029 int scale = $mem$$scale;
3030 int displace = $mem$$disp;
3031 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3032 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3033 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3034 %}
3035
3036 // Volatile Store Long. Must be atomic, so move it into
3037 // the FP TOS and then do a 64-bit FIST. Has to probe the
3038 // target address before the store (for null-ptr checks)
3039 // so the memory operand is used twice in the encoding.
3040 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3041 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3042 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop
3043 emit_opcode(cbuf,0xDF);
3044 int rm_byte_opcode = 0x07;
3045 int base = $mem$$base;
3046 int index = $mem$$index;
3047 int scale = $mem$$scale;
3048 int displace = $mem$$disp;
3049 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3050 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3051 %}
3052
3053 %}
3054
3055
3056 //----------FRAME--------------------------------------------------------------
3057 // Definition of frame structure and management information.
3058 //
3059 // S T A C K L A Y O U T Allocators stack-slot number
3060 // | (to get allocators register number
3061 // G Owned by | | v add OptoReg::stack0())
3062 // r CALLER | |
3063 // o | +--------+ pad to even-align allocators stack-slot
3064 // w V | pad0 | numbers; owned by CALLER
3065 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
3066 // h ^ | in | 5
3067 // | | args | 4 Holes in incoming args owned by SELF
3068 // | | | | 3
3069 // | | +--------+
3070 // V | | old out| Empty on Intel, window on Sparc
3071 // | old |preserve| Must be even aligned.
3072 // | SP-+--------+----> Matcher::_old_SP, even aligned
3073 // | | in | 3 area for Intel ret address
3074 // Owned by |preserve| Empty on Sparc.
3075 // SELF +--------+
3076 // | | pad2 | 2 pad to align old SP
3077 // | +--------+ 1
3078 // | | locks | 0
3079 // | +--------+----> OptoReg::stack0(), even aligned
3080 // | | pad1 | 11 pad to align new SP
3081 // | +--------+
3082 // | | | 10
3083 // | | spills | 9 spills
3084 // V | | 8 (pad0 slot for callee)
3085 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
3086 // ^ | out | 7
3087 // | | args | 6 Holes in outgoing args owned by CALLEE
3088 // Owned by +--------+
3089 // CALLEE | new out| 6 Empty on Intel, window on Sparc
3090 // | new |preserve| Must be even-aligned.
3091 // | SP-+--------+----> Matcher::_new_SP, even aligned
3092 // | | |
3093 //
3094 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
3095 // known from SELF's arguments and the Java calling convention.
3096 // Region 6-7 is determined per call site.
3097 // Note 2: If the calling convention leaves holes in the incoming argument
3098 // area, those holes are owned by SELF. Holes in the outgoing area
3099 // are owned by the CALLEE. Holes should not be nessecary in the
3100 // incoming area, as the Java calling convention is completely under
3101 // the control of the AD file. Doubles can be sorted and packed to
3102 // avoid holes. Holes in the outgoing arguments may be nessecary for
3103 // varargs C calling conventions.
3104 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
3105 // even aligned with pad0 as needed.
3106 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
3107 // region 6-11 is even aligned; it may be padded out more so that
3108 // the region from SP to FP meets the minimum stack alignment.
3109
3110 frame %{
3111 // These three registers define part of the calling convention
3112 // between compiled code and the interpreter.
3113 inline_cache_reg(EAX); // Inline Cache Register
3114
3115 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3116 cisc_spilling_operand_name(indOffset32);
3117
3118 // Number of stack slots consumed by locking an object
3119 sync_stack_slots(1);
3120
3121 // Compiled code's Frame Pointer
3122 frame_pointer(ESP);
3123 // Interpreter stores its frame pointer in a register which is
3124 // stored to the stack by I2CAdaptors.
3125 // I2CAdaptors convert from interpreted java to compiled java.
3126 interpreter_frame_pointer(EBP);
3127
3128 // Stack alignment requirement
3129 // Alignment size in bytes (128-bit -> 16 bytes)
3130 stack_alignment(StackAlignmentInBytes);
3131
3132 // Number of outgoing stack slots killed above the out_preserve_stack_slots
3133 // for calls to C. Supports the var-args backing area for register parms.
3134 varargs_C_out_slots_killed(0);
3135
3136 // The after-PROLOG location of the return address. Location of
3137 // return address specifies a type (REG or STACK) and a number
3138 // representing the register number (i.e. - use a register name) or
3139 // stack slot.
3140 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3141 // Otherwise, it is above the locks and verification slot and alignment word
3142 return_addr(STACK - 1 +
3143 align_up((Compile::current()->in_preserve_stack_slots() +
3144 Compile::current()->fixed_slots()),
3145 stack_alignment_in_slots()));
3146
3147 // Location of C & interpreter return values
3148 c_return_value %{
3149 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3150 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3151 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3152
3153 // in SSE2+ mode we want to keep the FPU stack clean so pretend
3154 // that C functions return float and double results in XMM0.
3155 if( ideal_reg == Op_RegD && UseSSE>=2 )
3156 return OptoRegPair(XMM0b_num,XMM0_num);
3157 if( ideal_reg == Op_RegF && UseSSE>=2 )
3158 return OptoRegPair(OptoReg::Bad,XMM0_num);
3159
3160 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3161 %}
3162
3163 // Location of return values
3164 return_value %{
3165 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3166 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3167 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3168 if( ideal_reg == Op_RegD && UseSSE>=2 )
3169 return OptoRegPair(XMM0b_num,XMM0_num);
3170 if( ideal_reg == Op_RegF && UseSSE>=1 )
3171 return OptoRegPair(OptoReg::Bad,XMM0_num);
3172 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3173 %}
3174
3175 %}
3176
3177 //----------ATTRIBUTES---------------------------------------------------------
3178 //----------Operand Attributes-------------------------------------------------
3179 op_attrib op_cost(0); // Required cost attribute
3180
3181 //----------Instruction Attributes---------------------------------------------
3182 ins_attrib ins_cost(100); // Required cost attribute
3183 ins_attrib ins_size(8); // Required size attribute (in bits)
3184 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3185 // non-matching short branch variant of some
3186 // long branch?
3187 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2)
3188 // specifies the alignment that some part of the instruction (not
3189 // necessarily the start) requires. If > 1, a compute_padding()
3190 // function must be provided for the instruction
3191
3192 //----------OPERANDS-----------------------------------------------------------
3193 // Operand definitions must precede instruction definitions for correct parsing
3194 // in the ADLC because operands constitute user defined types which are used in
3195 // instruction definitions.
3196
3197 //----------Simple Operands----------------------------------------------------
3198 // Immediate Operands
3199 // Integer Immediate
3200 operand immI() %{
3201 match(ConI);
3202
3203 op_cost(10);
3204 format %{ %}
3205 interface(CONST_INTER);
3206 %}
3207
3208 // Constant for test vs zero
3209 operand immI_0() %{
3210 predicate(n->get_int() == 0);
3211 match(ConI);
3212
3213 op_cost(0);
3214 format %{ %}
3215 interface(CONST_INTER);
3216 %}
3217
3218 // Constant for increment
3219 operand immI_1() %{
3220 predicate(n->get_int() == 1);
3221 match(ConI);
3222
3223 op_cost(0);
3224 format %{ %}
3225 interface(CONST_INTER);
3226 %}
3227
3228 // Constant for decrement
3229 operand immI_M1() %{
3230 predicate(n->get_int() == -1);
3231 match(ConI);
3232
3233 op_cost(0);
3234 format %{ %}
3235 interface(CONST_INTER);
3236 %}
3237
3238 // Valid scale values for addressing modes
3239 operand immI2() %{
3240 predicate(0 <= n->get_int() && (n->get_int() <= 3));
3241 match(ConI);
3242
3243 format %{ %}
3244 interface(CONST_INTER);
3245 %}
3246
3247 operand immI8() %{
3248 predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3249 match(ConI);
3250
3251 op_cost(5);
3252 format %{ %}
3253 interface(CONST_INTER);
3254 %}
3255
3256 operand immU8() %{
3257 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
3258 match(ConI);
3259
3260 op_cost(5);
3261 format %{ %}
3262 interface(CONST_INTER);
3263 %}
3264
3265 operand immI16() %{
3266 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3267 match(ConI);
3268
3269 op_cost(10);
3270 format %{ %}
3271 interface(CONST_INTER);
3272 %}
3273
3274 // Int Immediate non-negative
3275 operand immU31()
3276 %{
3277 predicate(n->get_int() >= 0);
3278 match(ConI);
3279
3280 op_cost(0);
3281 format %{ %}
3282 interface(CONST_INTER);
3283 %}
3284
3285 // Constant for long shifts
3286 operand immI_32() %{
3287 predicate( n->get_int() == 32 );
3288 match(ConI);
3289
3290 op_cost(0);
3291 format %{ %}
3292 interface(CONST_INTER);
3293 %}
3294
3295 operand immI_1_31() %{
3296 predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3297 match(ConI);
3298
3299 op_cost(0);
3300 format %{ %}
3301 interface(CONST_INTER);
3302 %}
3303
3304 operand immI_32_63() %{
3305 predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3306 match(ConI);
3307 op_cost(0);
3308
3309 format %{ %}
3310 interface(CONST_INTER);
3311 %}
3312
3313 operand immI_2() %{
3314 predicate( n->get_int() == 2 );
3315 match(ConI);
3316
3317 op_cost(0);
3318 format %{ %}
3319 interface(CONST_INTER);
3320 %}
3321
3322 operand immI_3() %{
3323 predicate( n->get_int() == 3 );
3324 match(ConI);
3325
3326 op_cost(0);
3327 format %{ %}
3328 interface(CONST_INTER);
3329 %}
3330
3331 operand immI_4()
3332 %{
3333 predicate(n->get_int() == 4);
3334 match(ConI);
3335
3336 op_cost(0);
3337 format %{ %}
3338 interface(CONST_INTER);
3339 %}
3340
3341 operand immI_8()
3342 %{
3343 predicate(n->get_int() == 8);
3344 match(ConI);
3345
3346 op_cost(0);
3347 format %{ %}
3348 interface(CONST_INTER);
3349 %}
3350
3351 // Pointer Immediate
3352 operand immP() %{
3353 match(ConP);
3354
3355 op_cost(10);
3356 format %{ %}
3357 interface(CONST_INTER);
3358 %}
3359
3360 // NULL Pointer Immediate
3361 operand immP0() %{
3362 predicate( n->get_ptr() == 0 );
3363 match(ConP);
3364 op_cost(0);
3365
3366 format %{ %}
3367 interface(CONST_INTER);
3368 %}
3369
3370 // Long Immediate
3371 operand immL() %{
3372 match(ConL);
3373
3374 op_cost(20);
3375 format %{ %}
3376 interface(CONST_INTER);
3377 %}
3378
3379 // Long Immediate zero
3380 operand immL0() %{
3381 predicate( n->get_long() == 0L );
3382 match(ConL);
3383 op_cost(0);
3384
3385 format %{ %}
3386 interface(CONST_INTER);
3387 %}
3388
3389 // Long Immediate zero
3390 operand immL_M1() %{
3391 predicate( n->get_long() == -1L );
3392 match(ConL);
3393 op_cost(0);
3394
3395 format %{ %}
3396 interface(CONST_INTER);
3397 %}
3398
3399 // Long immediate from 0 to 127.
3400 // Used for a shorter form of long mul by 10.
3401 operand immL_127() %{
3402 predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3403 match(ConL);
3404 op_cost(0);
3405
3406 format %{ %}
3407 interface(CONST_INTER);
3408 %}
3409
3410 // Long Immediate: low 32-bit mask
3411 operand immL_32bits() %{
3412 predicate(n->get_long() == 0xFFFFFFFFL);
3413 match(ConL);
3414 op_cost(0);
3415
3416 format %{ %}
3417 interface(CONST_INTER);
3418 %}
3419
3420 // Long Immediate: low 32-bit mask
3421 operand immL32() %{
3422 predicate(n->get_long() == (int)(n->get_long()));
3423 match(ConL);
3424 op_cost(20);
3425
3426 format %{ %}
3427 interface(CONST_INTER);
3428 %}
3429
3430 //Double Immediate zero
3431 operand immDPR0() %{
3432 // Do additional (and counter-intuitive) test against NaN to work around VC++
3433 // bug that generates code such that NaNs compare equal to 0.0
3434 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3435 match(ConD);
3436
3437 op_cost(5);
3438 format %{ %}
3439 interface(CONST_INTER);
3440 %}
3441
3442 // Double Immediate one
3443 operand immDPR1() %{
3444 predicate( UseSSE<=1 && n->getd() == 1.0 );
3445 match(ConD);
3446
3447 op_cost(5);
3448 format %{ %}
3449 interface(CONST_INTER);
3450 %}
3451
3452 // Double Immediate
3453 operand immDPR() %{
3454 predicate(UseSSE<=1);
3455 match(ConD);
3456
3457 op_cost(5);
3458 format %{ %}
3459 interface(CONST_INTER);
3460 %}
3461
3462 operand immD() %{
3463 predicate(UseSSE>=2);
3464 match(ConD);
3465
3466 op_cost(5);
3467 format %{ %}
3468 interface(CONST_INTER);
3469 %}
3470
3471 // Double Immediate zero
3472 operand immD0() %{
3473 // Do additional (and counter-intuitive) test against NaN to work around VC++
3474 // bug that generates code such that NaNs compare equal to 0.0 AND do not
3475 // compare equal to -0.0.
3476 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3477 match(ConD);
3478
3479 format %{ %}
3480 interface(CONST_INTER);
3481 %}
3482
3483 // Float Immediate zero
3484 operand immFPR0() %{
3485 predicate(UseSSE == 0 && n->getf() == 0.0F);
3486 match(ConF);
3487
3488 op_cost(5);
3489 format %{ %}
3490 interface(CONST_INTER);
3491 %}
3492
3493 // Float Immediate one
3494 operand immFPR1() %{
3495 predicate(UseSSE == 0 && n->getf() == 1.0F);
3496 match(ConF);
3497
3498 op_cost(5);
3499 format %{ %}
3500 interface(CONST_INTER);
3501 %}
3502
3503 // Float Immediate
3504 operand immFPR() %{
3505 predicate( UseSSE == 0 );
3506 match(ConF);
3507
3508 op_cost(5);
3509 format %{ %}
3510 interface(CONST_INTER);
3511 %}
3512
3513 // Float Immediate
3514 operand immF() %{
3515 predicate(UseSSE >= 1);
3516 match(ConF);
3517
3518 op_cost(5);
3519 format %{ %}
3520 interface(CONST_INTER);
3521 %}
3522
3523 // Float Immediate zero. Zero and not -0.0
3524 operand immF0() %{
3525 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3526 match(ConF);
3527
3528 op_cost(5);
3529 format %{ %}
3530 interface(CONST_INTER);
3531 %}
3532
3533 // Immediates for special shifts (sign extend)
3534
3535 // Constants for increment
3536 operand immI_16() %{
3537 predicate( n->get_int() == 16 );
3538 match(ConI);
3539
3540 format %{ %}
3541 interface(CONST_INTER);
3542 %}
3543
3544 operand immI_24() %{
3545 predicate( n->get_int() == 24 );
3546 match(ConI);
3547
3548 format %{ %}
3549 interface(CONST_INTER);
3550 %}
3551
3552 // Constant for byte-wide masking
3553 operand immI_255() %{
3554 predicate( n->get_int() == 255 );
3555 match(ConI);
3556
3557 format %{ %}
3558 interface(CONST_INTER);
3559 %}
3560
3561 // Constant for short-wide masking
3562 operand immI_65535() %{
3563 predicate(n->get_int() == 65535);
3564 match(ConI);
3565
3566 format %{ %}
3567 interface(CONST_INTER);
3568 %}
3569
3570 operand kReg()
3571 %{
3572 constraint(ALLOC_IN_RC(vectmask_reg));
3573 match(RegVectMask);
3574 format %{%}
3575 interface(REG_INTER);
3576 %}
3577
3578 operand kReg_K1()
3579 %{
3580 constraint(ALLOC_IN_RC(vectmask_reg_K1));
3581 match(RegVectMask);
3582 format %{%}
3583 interface(REG_INTER);
3584 %}
3585
3586 operand kReg_K2()
3587 %{
3588 constraint(ALLOC_IN_RC(vectmask_reg_K2));
3589 match(RegVectMask);
3590 format %{%}
3591 interface(REG_INTER);
3592 %}
3593
3594 // Special Registers
3595 operand kReg_K3()
3596 %{
3597 constraint(ALLOC_IN_RC(vectmask_reg_K3));
3598 match(RegVectMask);
3599 format %{%}
3600 interface(REG_INTER);
3601 %}
3602
3603 operand kReg_K4()
3604 %{
3605 constraint(ALLOC_IN_RC(vectmask_reg_K4));
3606 match(RegVectMask);
3607 format %{%}
3608 interface(REG_INTER);
3609 %}
3610
3611 operand kReg_K5()
3612 %{
3613 constraint(ALLOC_IN_RC(vectmask_reg_K5));
3614 match(RegVectMask);
3615 format %{%}
3616 interface(REG_INTER);
3617 %}
3618
3619 operand kReg_K6()
3620 %{
3621 constraint(ALLOC_IN_RC(vectmask_reg_K6));
3622 match(RegVectMask);
3623 format %{%}
3624 interface(REG_INTER);
3625 %}
3626
3627 // Special Registers
3628 operand kReg_K7()
3629 %{
3630 constraint(ALLOC_IN_RC(vectmask_reg_K7));
3631 match(RegVectMask);
3632 format %{%}
3633 interface(REG_INTER);
3634 %}
3635
3636 // Register Operands
3637 // Integer Register
3638 operand rRegI() %{
3639 constraint(ALLOC_IN_RC(int_reg));
3640 match(RegI);
3641 match(xRegI);
3642 match(eAXRegI);
3643 match(eBXRegI);
3644 match(eCXRegI);
3645 match(eDXRegI);
3646 match(eDIRegI);
3647 match(eSIRegI);
3648
3649 format %{ %}
3650 interface(REG_INTER);
3651 %}
3652
3653 // Subset of Integer Register
3654 operand xRegI(rRegI reg) %{
3655 constraint(ALLOC_IN_RC(int_x_reg));
3656 match(reg);
3657 match(eAXRegI);
3658 match(eBXRegI);
3659 match(eCXRegI);
3660 match(eDXRegI);
3661
3662 format %{ %}
3663 interface(REG_INTER);
3664 %}
3665
3666 // Special Registers
3667 operand eAXRegI(xRegI reg) %{
3668 constraint(ALLOC_IN_RC(eax_reg));
3669 match(reg);
3670 match(rRegI);
3671
3672 format %{ "EAX" %}
3673 interface(REG_INTER);
3674 %}
3675
3676 // Special Registers
3677 operand eBXRegI(xRegI reg) %{
3678 constraint(ALLOC_IN_RC(ebx_reg));
3679 match(reg);
3680 match(rRegI);
3681
3682 format %{ "EBX" %}
3683 interface(REG_INTER);
3684 %}
3685
3686 operand eCXRegI(xRegI reg) %{
3687 constraint(ALLOC_IN_RC(ecx_reg));
3688 match(reg);
3689 match(rRegI);
3690
3691 format %{ "ECX" %}
3692 interface(REG_INTER);
3693 %}
3694
3695 operand eDXRegI(xRegI reg) %{
3696 constraint(ALLOC_IN_RC(edx_reg));
3697 match(reg);
3698 match(rRegI);
3699
3700 format %{ "EDX" %}
3701 interface(REG_INTER);
3702 %}
3703
3704 operand eDIRegI(xRegI reg) %{
3705 constraint(ALLOC_IN_RC(edi_reg));
3706 match(reg);
3707 match(rRegI);
3708
3709 format %{ "EDI" %}
3710 interface(REG_INTER);
3711 %}
3712
3713 operand naxRegI() %{
3714 constraint(ALLOC_IN_RC(nax_reg));
3715 match(RegI);
3716 match(eCXRegI);
3717 match(eDXRegI);
3718 match(eSIRegI);
3719 match(eDIRegI);
3720
3721 format %{ %}
3722 interface(REG_INTER);
3723 %}
3724
3725 operand nadxRegI() %{
3726 constraint(ALLOC_IN_RC(nadx_reg));
3727 match(RegI);
3728 match(eBXRegI);
3729 match(eCXRegI);
3730 match(eSIRegI);
3731 match(eDIRegI);
3732
3733 format %{ %}
3734 interface(REG_INTER);
3735 %}
3736
3737 operand ncxRegI() %{
3738 constraint(ALLOC_IN_RC(ncx_reg));
3739 match(RegI);
3740 match(eAXRegI);
3741 match(eDXRegI);
3742 match(eSIRegI);
3743 match(eDIRegI);
3744
3745 format %{ %}
3746 interface(REG_INTER);
3747 %}
3748
3749 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3750 // //
3751 operand eSIRegI(xRegI reg) %{
3752 constraint(ALLOC_IN_RC(esi_reg));
3753 match(reg);
3754 match(rRegI);
3755
3756 format %{ "ESI" %}
3757 interface(REG_INTER);
3758 %}
3759
3760 // Pointer Register
3761 operand anyRegP() %{
3762 constraint(ALLOC_IN_RC(any_reg));
3763 match(RegP);
3764 match(eAXRegP);
3765 match(eBXRegP);
3766 match(eCXRegP);
3767 match(eDIRegP);
3768 match(eRegP);
3769
3770 format %{ %}
3771 interface(REG_INTER);
3772 %}
3773
3774 operand eRegP() %{
3775 constraint(ALLOC_IN_RC(int_reg));
3776 match(RegP);
3777 match(eAXRegP);
3778 match(eBXRegP);
3779 match(eCXRegP);
3780 match(eDIRegP);
3781
3782 format %{ %}
3783 interface(REG_INTER);
3784 %}
3785
3786 operand rRegP() %{
3787 constraint(ALLOC_IN_RC(int_reg));
3788 match(RegP);
3789 match(eAXRegP);
3790 match(eBXRegP);
3791 match(eCXRegP);
3792 match(eDIRegP);
3793
3794 format %{ %}
3795 interface(REG_INTER);
3796 %}
3797
3798 // On windows95, EBP is not safe to use for implicit null tests.
3799 operand eRegP_no_EBP() %{
3800 constraint(ALLOC_IN_RC(int_reg_no_ebp));
3801 match(RegP);
3802 match(eAXRegP);
3803 match(eBXRegP);
3804 match(eCXRegP);
3805 match(eDIRegP);
3806
3807 op_cost(100);
3808 format %{ %}
3809 interface(REG_INTER);
3810 %}
3811
3812 operand naxRegP() %{
3813 constraint(ALLOC_IN_RC(nax_reg));
3814 match(RegP);
3815 match(eBXRegP);
3816 match(eDXRegP);
3817 match(eCXRegP);
3818 match(eSIRegP);
3819 match(eDIRegP);
3820
3821 format %{ %}
3822 interface(REG_INTER);
3823 %}
3824
3825 operand nabxRegP() %{
3826 constraint(ALLOC_IN_RC(nabx_reg));
3827 match(RegP);
3828 match(eCXRegP);
3829 match(eDXRegP);
3830 match(eSIRegP);
3831 match(eDIRegP);
3832
3833 format %{ %}
3834 interface(REG_INTER);
3835 %}
3836
3837 operand pRegP() %{
3838 constraint(ALLOC_IN_RC(p_reg));
3839 match(RegP);
3840 match(eBXRegP);
3841 match(eDXRegP);
3842 match(eSIRegP);
3843 match(eDIRegP);
3844
3845 format %{ %}
3846 interface(REG_INTER);
3847 %}
3848
3849 // Special Registers
3850 // Return a pointer value
3851 operand eAXRegP(eRegP reg) %{
3852 constraint(ALLOC_IN_RC(eax_reg));
3853 match(reg);
3854 format %{ "EAX" %}
3855 interface(REG_INTER);
3856 %}
3857
3858 // Used in AtomicAdd
3859 operand eBXRegP(eRegP reg) %{
3860 constraint(ALLOC_IN_RC(ebx_reg));
3861 match(reg);
3862 format %{ "EBX" %}
3863 interface(REG_INTER);
3864 %}
3865
3866 // Tail-call (interprocedural jump) to interpreter
3867 operand eCXRegP(eRegP reg) %{
3868 constraint(ALLOC_IN_RC(ecx_reg));
3869 match(reg);
3870 format %{ "ECX" %}
3871 interface(REG_INTER);
3872 %}
3873
3874 operand eDXRegP(eRegP reg) %{
3875 constraint(ALLOC_IN_RC(edx_reg));
3876 match(reg);
3877 format %{ "EDX" %}
3878 interface(REG_INTER);
3879 %}
3880
3881 operand eSIRegP(eRegP reg) %{
3882 constraint(ALLOC_IN_RC(esi_reg));
3883 match(reg);
3884 format %{ "ESI" %}
3885 interface(REG_INTER);
3886 %}
3887
3888 // Used in rep stosw
3889 operand eDIRegP(eRegP reg) %{
3890 constraint(ALLOC_IN_RC(edi_reg));
3891 match(reg);
3892 format %{ "EDI" %}
3893 interface(REG_INTER);
3894 %}
3895
3896 operand eRegL() %{
3897 constraint(ALLOC_IN_RC(long_reg));
3898 match(RegL);
3899 match(eADXRegL);
3900
3901 format %{ %}
3902 interface(REG_INTER);
3903 %}
3904
3905 operand eADXRegL( eRegL reg ) %{
3906 constraint(ALLOC_IN_RC(eadx_reg));
3907 match(reg);
3908
3909 format %{ "EDX:EAX" %}
3910 interface(REG_INTER);
3911 %}
3912
3913 operand eBCXRegL( eRegL reg ) %{
3914 constraint(ALLOC_IN_RC(ebcx_reg));
3915 match(reg);
3916
3917 format %{ "EBX:ECX" %}
3918 interface(REG_INTER);
3919 %}
3920
3921 // Special case for integer high multiply
3922 operand eADXRegL_low_only() %{
3923 constraint(ALLOC_IN_RC(eadx_reg));
3924 match(RegL);
3925
3926 format %{ "EAX" %}
3927 interface(REG_INTER);
3928 %}
3929
3930 // Flags register, used as output of compare instructions
3931 operand rFlagsReg() %{
3932 constraint(ALLOC_IN_RC(int_flags));
3933 match(RegFlags);
3934
3935 format %{ "EFLAGS" %}
3936 interface(REG_INTER);
3937 %}
3938
3939 // Flags register, used as output of compare instructions
3940 operand eFlagsReg() %{
3941 constraint(ALLOC_IN_RC(int_flags));
3942 match(RegFlags);
3943
3944 format %{ "EFLAGS" %}
3945 interface(REG_INTER);
3946 %}
3947
3948 // Flags register, used as output of FLOATING POINT compare instructions
3949 operand eFlagsRegU() %{
3950 constraint(ALLOC_IN_RC(int_flags));
3951 match(RegFlags);
3952
3953 format %{ "EFLAGS_U" %}
3954 interface(REG_INTER);
3955 %}
3956
3957 operand eFlagsRegUCF() %{
3958 constraint(ALLOC_IN_RC(int_flags));
3959 match(RegFlags);
3960 predicate(false);
3961
3962 format %{ "EFLAGS_U_CF" %}
3963 interface(REG_INTER);
3964 %}
3965
3966 // Condition Code Register used by long compare
3967 operand flagsReg_long_LTGE() %{
3968 constraint(ALLOC_IN_RC(int_flags));
3969 match(RegFlags);
3970 format %{ "FLAGS_LTGE" %}
3971 interface(REG_INTER);
3972 %}
3973 operand flagsReg_long_EQNE() %{
3974 constraint(ALLOC_IN_RC(int_flags));
3975 match(RegFlags);
3976 format %{ "FLAGS_EQNE" %}
3977 interface(REG_INTER);
3978 %}
3979 operand flagsReg_long_LEGT() %{
3980 constraint(ALLOC_IN_RC(int_flags));
3981 match(RegFlags);
3982 format %{ "FLAGS_LEGT" %}
3983 interface(REG_INTER);
3984 %}
3985
3986 // Condition Code Register used by unsigned long compare
3987 operand flagsReg_ulong_LTGE() %{
3988 constraint(ALLOC_IN_RC(int_flags));
3989 match(RegFlags);
3990 format %{ "FLAGS_U_LTGE" %}
3991 interface(REG_INTER);
3992 %}
3993 operand flagsReg_ulong_EQNE() %{
3994 constraint(ALLOC_IN_RC(int_flags));
3995 match(RegFlags);
3996 format %{ "FLAGS_U_EQNE" %}
3997 interface(REG_INTER);
3998 %}
3999 operand flagsReg_ulong_LEGT() %{
4000 constraint(ALLOC_IN_RC(int_flags));
4001 match(RegFlags);
4002 format %{ "FLAGS_U_LEGT" %}
4003 interface(REG_INTER);
4004 %}
4005
4006 // Float register operands
4007 operand regDPR() %{
4008 predicate( UseSSE < 2 );
4009 constraint(ALLOC_IN_RC(fp_dbl_reg));
4010 match(RegD);
4011 match(regDPR1);
4012 match(regDPR2);
4013 format %{ %}
4014 interface(REG_INTER);
4015 %}
4016
4017 operand regDPR1(regDPR reg) %{
4018 predicate( UseSSE < 2 );
4019 constraint(ALLOC_IN_RC(fp_dbl_reg0));
4020 match(reg);
4021 format %{ "FPR1" %}
4022 interface(REG_INTER);
4023 %}
4024
4025 operand regDPR2(regDPR reg) %{
4026 predicate( UseSSE < 2 );
4027 constraint(ALLOC_IN_RC(fp_dbl_reg1));
4028 match(reg);
4029 format %{ "FPR2" %}
4030 interface(REG_INTER);
4031 %}
4032
4033 operand regnotDPR1(regDPR reg) %{
4034 predicate( UseSSE < 2 );
4035 constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4036 match(reg);
4037 format %{ %}
4038 interface(REG_INTER);
4039 %}
4040
4041 // Float register operands
4042 operand regFPR() %{
4043 predicate( UseSSE < 2 );
4044 constraint(ALLOC_IN_RC(fp_flt_reg));
4045 match(RegF);
4046 match(regFPR1);
4047 format %{ %}
4048 interface(REG_INTER);
4049 %}
4050
4051 // Float register operands
4052 operand regFPR1(regFPR reg) %{
4053 predicate( UseSSE < 2 );
4054 constraint(ALLOC_IN_RC(fp_flt_reg0));
4055 match(reg);
4056 format %{ "FPR1" %}
4057 interface(REG_INTER);
4058 %}
4059
4060 // XMM Float register operands
4061 operand regF() %{
4062 predicate( UseSSE>=1 );
4063 constraint(ALLOC_IN_RC(float_reg_legacy));
4064 match(RegF);
4065 format %{ %}
4066 interface(REG_INTER);
4067 %}
4068
4069 operand legRegF() %{
4070 predicate( UseSSE>=1 );
4071 constraint(ALLOC_IN_RC(float_reg_legacy));
4072 match(RegF);
4073 format %{ %}
4074 interface(REG_INTER);
4075 %}
4076
4077 // Float register operands
4078 operand vlRegF() %{
4079 constraint(ALLOC_IN_RC(float_reg_vl));
4080 match(RegF);
4081
4082 format %{ %}
4083 interface(REG_INTER);
4084 %}
4085
4086 // XMM Double register operands
4087 operand regD() %{
4088 predicate( UseSSE>=2 );
4089 constraint(ALLOC_IN_RC(double_reg_legacy));
4090 match(RegD);
4091 format %{ %}
4092 interface(REG_INTER);
4093 %}
4094
4095 // Double register operands
4096 operand legRegD() %{
4097 predicate( UseSSE>=2 );
4098 constraint(ALLOC_IN_RC(double_reg_legacy));
4099 match(RegD);
4100 format %{ %}
4101 interface(REG_INTER);
4102 %}
4103
4104 operand vlRegD() %{
4105 constraint(ALLOC_IN_RC(double_reg_vl));
4106 match(RegD);
4107
4108 format %{ %}
4109 interface(REG_INTER);
4110 %}
4111
4112 //----------Memory Operands----------------------------------------------------
4113 // Direct Memory Operand
4114 operand direct(immP addr) %{
4115 match(addr);
4116
4117 format %{ "[$addr]" %}
4118 interface(MEMORY_INTER) %{
4119 base(0xFFFFFFFF);
4120 index(0x4);
4121 scale(0x0);
4122 disp($addr);
4123 %}
4124 %}
4125
4126 // Indirect Memory Operand
4127 operand indirect(eRegP reg) %{
4128 constraint(ALLOC_IN_RC(int_reg));
4129 match(reg);
4130
4131 format %{ "[$reg]" %}
4132 interface(MEMORY_INTER) %{
4133 base($reg);
4134 index(0x4);
4135 scale(0x0);
4136 disp(0x0);
4137 %}
4138 %}
4139
4140 // Indirect Memory Plus Short Offset Operand
4141 operand indOffset8(eRegP reg, immI8 off) %{
4142 match(AddP reg off);
4143
4144 format %{ "[$reg + $off]" %}
4145 interface(MEMORY_INTER) %{
4146 base($reg);
4147 index(0x4);
4148 scale(0x0);
4149 disp($off);
4150 %}
4151 %}
4152
4153 // Indirect Memory Plus Long Offset Operand
4154 operand indOffset32(eRegP reg, immI off) %{
4155 match(AddP reg off);
4156
4157 format %{ "[$reg + $off]" %}
4158 interface(MEMORY_INTER) %{
4159 base($reg);
4160 index(0x4);
4161 scale(0x0);
4162 disp($off);
4163 %}
4164 %}
4165
4166 // Indirect Memory Plus Long Offset Operand
4167 operand indOffset32X(rRegI reg, immP off) %{
4168 match(AddP off reg);
4169
4170 format %{ "[$reg + $off]" %}
4171 interface(MEMORY_INTER) %{
4172 base($reg);
4173 index(0x4);
4174 scale(0x0);
4175 disp($off);
4176 %}
4177 %}
4178
4179 // Indirect Memory Plus Index Register Plus Offset Operand
4180 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4181 match(AddP (AddP reg ireg) off);
4182
4183 op_cost(10);
4184 format %{"[$reg + $off + $ireg]" %}
4185 interface(MEMORY_INTER) %{
4186 base($reg);
4187 index($ireg);
4188 scale(0x0);
4189 disp($off);
4190 %}
4191 %}
4192
4193 // Indirect Memory Plus Index Register Plus Offset Operand
4194 operand indIndex(eRegP reg, rRegI ireg) %{
4195 match(AddP reg ireg);
4196
4197 op_cost(10);
4198 format %{"[$reg + $ireg]" %}
4199 interface(MEMORY_INTER) %{
4200 base($reg);
4201 index($ireg);
4202 scale(0x0);
4203 disp(0x0);
4204 %}
4205 %}
4206
4207 // // -------------------------------------------------------------------------
4208 // // 486 architecture doesn't support "scale * index + offset" with out a base
4209 // // -------------------------------------------------------------------------
4210 // // Scaled Memory Operands
4211 // // Indirect Memory Times Scale Plus Offset Operand
4212 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4213 // match(AddP off (LShiftI ireg scale));
4214 //
4215 // op_cost(10);
4216 // format %{"[$off + $ireg << $scale]" %}
4217 // interface(MEMORY_INTER) %{
4218 // base(0x4);
4219 // index($ireg);
4220 // scale($scale);
4221 // disp($off);
4222 // %}
4223 // %}
4224
4225 // Indirect Memory Times Scale Plus Index Register
4226 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4227 match(AddP reg (LShiftI ireg scale));
4228
4229 op_cost(10);
4230 format %{"[$reg + $ireg << $scale]" %}
4231 interface(MEMORY_INTER) %{
4232 base($reg);
4233 index($ireg);
4234 scale($scale);
4235 disp(0x0);
4236 %}
4237 %}
4238
4239 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4240 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4241 match(AddP (AddP reg (LShiftI ireg scale)) off);
4242
4243 op_cost(10);
4244 format %{"[$reg + $off + $ireg << $scale]" %}
4245 interface(MEMORY_INTER) %{
4246 base($reg);
4247 index($ireg);
4248 scale($scale);
4249 disp($off);
4250 %}
4251 %}
4252
4253 //----------Load Long Memory Operands------------------------------------------
4254 // The load-long idiom will use it's address expression again after loading
4255 // the first word of the long. If the load-long destination overlaps with
4256 // registers used in the addressing expression, the 2nd half will be loaded
4257 // from a clobbered address. Fix this by requiring that load-long use
4258 // address registers that do not overlap with the load-long target.
4259
4260 // load-long support
4261 operand load_long_RegP() %{
4262 constraint(ALLOC_IN_RC(esi_reg));
4263 match(RegP);
4264 match(eSIRegP);
4265 op_cost(100);
4266 format %{ %}
4267 interface(REG_INTER);
4268 %}
4269
4270 // Indirect Memory Operand Long
4271 operand load_long_indirect(load_long_RegP reg) %{
4272 constraint(ALLOC_IN_RC(esi_reg));
4273 match(reg);
4274
4275 format %{ "[$reg]" %}
4276 interface(MEMORY_INTER) %{
4277 base($reg);
4278 index(0x4);
4279 scale(0x0);
4280 disp(0x0);
4281 %}
4282 %}
4283
4284 // Indirect Memory Plus Long Offset Operand
4285 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4286 match(AddP reg off);
4287
4288 format %{ "[$reg + $off]" %}
4289 interface(MEMORY_INTER) %{
4290 base($reg);
4291 index(0x4);
4292 scale(0x0);
4293 disp($off);
4294 %}
4295 %}
4296
4297 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4298
4299
4300 //----------Special Memory Operands--------------------------------------------
4301 // Stack Slot Operand - This operand is used for loading and storing temporary
4302 // values on the stack where a match requires a value to
4303 // flow through memory.
4304 operand stackSlotP(sRegP reg) %{
4305 constraint(ALLOC_IN_RC(stack_slots));
4306 // No match rule because this operand is only generated in matching
4307 format %{ "[$reg]" %}
4308 interface(MEMORY_INTER) %{
4309 base(0x4); // ESP
4310 index(0x4); // No Index
4311 scale(0x0); // No Scale
4312 disp($reg); // Stack Offset
4313 %}
4314 %}
4315
4316 operand stackSlotI(sRegI reg) %{
4317 constraint(ALLOC_IN_RC(stack_slots));
4318 // No match rule because this operand is only generated in matching
4319 format %{ "[$reg]" %}
4320 interface(MEMORY_INTER) %{
4321 base(0x4); // ESP
4322 index(0x4); // No Index
4323 scale(0x0); // No Scale
4324 disp($reg); // Stack Offset
4325 %}
4326 %}
4327
4328 operand stackSlotF(sRegF reg) %{
4329 constraint(ALLOC_IN_RC(stack_slots));
4330 // No match rule because this operand is only generated in matching
4331 format %{ "[$reg]" %}
4332 interface(MEMORY_INTER) %{
4333 base(0x4); // ESP
4334 index(0x4); // No Index
4335 scale(0x0); // No Scale
4336 disp($reg); // Stack Offset
4337 %}
4338 %}
4339
4340 operand stackSlotD(sRegD reg) %{
4341 constraint(ALLOC_IN_RC(stack_slots));
4342 // No match rule because this operand is only generated in matching
4343 format %{ "[$reg]" %}
4344 interface(MEMORY_INTER) %{
4345 base(0x4); // ESP
4346 index(0x4); // No Index
4347 scale(0x0); // No Scale
4348 disp($reg); // Stack Offset
4349 %}
4350 %}
4351
4352 operand stackSlotL(sRegL reg) %{
4353 constraint(ALLOC_IN_RC(stack_slots));
4354 // No match rule because this operand is only generated in matching
4355 format %{ "[$reg]" %}
4356 interface(MEMORY_INTER) %{
4357 base(0x4); // ESP
4358 index(0x4); // No Index
4359 scale(0x0); // No Scale
4360 disp($reg); // Stack Offset
4361 %}
4362 %}
4363
4364 //----------Conditional Branch Operands----------------------------------------
4365 // Comparison Op - This is the operation of the comparison, and is limited to
4366 // the following set of codes:
4367 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4368 //
4369 // Other attributes of the comparison, such as unsignedness, are specified
4370 // by the comparison instruction that sets a condition code flags register.
4371 // That result is represented by a flags operand whose subtype is appropriate
4372 // to the unsignedness (etc.) of the comparison.
4373 //
4374 // Later, the instruction which matches both the Comparison Op (a Bool) and
4375 // the flags (produced by the Cmp) specifies the coding of the comparison op
4376 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4377
4378 // Comparision Code
4379 operand cmpOp() %{
4380 match(Bool);
4381
4382 format %{ "" %}
4383 interface(COND_INTER) %{
4384 equal(0x4, "e");
4385 not_equal(0x5, "ne");
4386 less(0xC, "l");
4387 greater_equal(0xD, "ge");
4388 less_equal(0xE, "le");
4389 greater(0xF, "g");
4390 overflow(0x0, "o");
4391 no_overflow(0x1, "no");
4392 %}
4393 %}
4394
4395 // Comparison Code, unsigned compare. Used by FP also, with
4396 // C2 (unordered) turned into GT or LT already. The other bits
4397 // C0 and C3 are turned into Carry & Zero flags.
4398 operand cmpOpU() %{
4399 match(Bool);
4400
4401 format %{ "" %}
4402 interface(COND_INTER) %{
4403 equal(0x4, "e");
4404 not_equal(0x5, "ne");
4405 less(0x2, "b");
4406 greater_equal(0x3, "nb");
4407 less_equal(0x6, "be");
4408 greater(0x7, "nbe");
4409 overflow(0x0, "o");
4410 no_overflow(0x1, "no");
4411 %}
4412 %}
4413
4414 // Floating comparisons that don't require any fixup for the unordered case
4415 operand cmpOpUCF() %{
4416 match(Bool);
4417 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4418 n->as_Bool()->_test._test == BoolTest::ge ||
4419 n->as_Bool()->_test._test == BoolTest::le ||
4420 n->as_Bool()->_test._test == BoolTest::gt);
4421 format %{ "" %}
4422 interface(COND_INTER) %{
4423 equal(0x4, "e");
4424 not_equal(0x5, "ne");
4425 less(0x2, "b");
4426 greater_equal(0x3, "nb");
4427 less_equal(0x6, "be");
4428 greater(0x7, "nbe");
4429 overflow(0x0, "o");
4430 no_overflow(0x1, "no");
4431 %}
4432 %}
4433
4434
4435 // Floating comparisons that can be fixed up with extra conditional jumps
4436 operand cmpOpUCF2() %{
4437 match(Bool);
4438 predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4439 n->as_Bool()->_test._test == BoolTest::eq);
4440 format %{ "" %}
4441 interface(COND_INTER) %{
4442 equal(0x4, "e");
4443 not_equal(0x5, "ne");
4444 less(0x2, "b");
4445 greater_equal(0x3, "nb");
4446 less_equal(0x6, "be");
4447 greater(0x7, "nbe");
4448 overflow(0x0, "o");
4449 no_overflow(0x1, "no");
4450 %}
4451 %}
4452
4453 // Comparison Code for FP conditional move
4454 operand cmpOp_fcmov() %{
4455 match(Bool);
4456
4457 predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4458 n->as_Bool()->_test._test != BoolTest::no_overflow);
4459 format %{ "" %}
4460 interface(COND_INTER) %{
4461 equal (0x0C8);
4462 not_equal (0x1C8);
4463 less (0x0C0);
4464 greater_equal(0x1C0);
4465 less_equal (0x0D0);
4466 greater (0x1D0);
4467 overflow(0x0, "o"); // not really supported by the instruction
4468 no_overflow(0x1, "no"); // not really supported by the instruction
4469 %}
4470 %}
4471
4472 // Comparison Code used in long compares
4473 operand cmpOp_commute() %{
4474 match(Bool);
4475
4476 format %{ "" %}
4477 interface(COND_INTER) %{
4478 equal(0x4, "e");
4479 not_equal(0x5, "ne");
4480 less(0xF, "g");
4481 greater_equal(0xE, "le");
4482 less_equal(0xD, "ge");
4483 greater(0xC, "l");
4484 overflow(0x0, "o");
4485 no_overflow(0x1, "no");
4486 %}
4487 %}
4488
4489 // Comparison Code used in unsigned long compares
4490 operand cmpOpU_commute() %{
4491 match(Bool);
4492
4493 format %{ "" %}
4494 interface(COND_INTER) %{
4495 equal(0x4, "e");
4496 not_equal(0x5, "ne");
4497 less(0x7, "nbe");
4498 greater_equal(0x6, "be");
4499 less_equal(0x3, "nb");
4500 greater(0x2, "b");
4501 overflow(0x0, "o");
4502 no_overflow(0x1, "no");
4503 %}
4504 %}
4505
4506 //----------OPERAND CLASSES----------------------------------------------------
4507 // Operand Classes are groups of operands that are used as to simplify
4508 // instruction definitions by not requiring the AD writer to specify separate
4509 // instructions for every form of operand when the instruction accepts
4510 // multiple operand types with the same basic encoding and format. The classic
4511 // case of this is memory operands.
4512
4513 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4514 indIndex, indIndexScale, indIndexScaleOffset);
4515
4516 // Long memory operations are encoded in 2 instructions and a +4 offset.
4517 // This means some kind of offset is always required and you cannot use
4518 // an oop as the offset (done when working on static globals).
4519 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4520 indIndex, indIndexScale, indIndexScaleOffset);
4521
4522
4523 //----------PIPELINE-----------------------------------------------------------
4524 // Rules which define the behavior of the target architectures pipeline.
4525 pipeline %{
4526
4527 //----------ATTRIBUTES---------------------------------------------------------
4528 attributes %{
4529 variable_size_instructions; // Fixed size instructions
4530 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
4531 instruction_unit_size = 1; // An instruction is 1 bytes long
4532 instruction_fetch_unit_size = 16; // The processor fetches one line
4533 instruction_fetch_units = 1; // of 16 bytes
4534
4535 // List of nop instructions
4536 nops( MachNop );
4537 %}
4538
4539 //----------RESOURCES----------------------------------------------------------
4540 // Resources are the functional units available to the machine
4541
4542 // Generic P2/P3 pipeline
4543 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4544 // 3 instructions decoded per cycle.
4545 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4546 // 2 ALU op, only ALU0 handles mul/div instructions.
4547 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4548 MS0, MS1, MEM = MS0 | MS1,
4549 BR, FPU,
4550 ALU0, ALU1, ALU = ALU0 | ALU1 );
4551
4552 //----------PIPELINE DESCRIPTION-----------------------------------------------
4553 // Pipeline Description specifies the stages in the machine's pipeline
4554
4555 // Generic P2/P3 pipeline
4556 pipe_desc(S0, S1, S2, S3, S4, S5);
4557
4558 //----------PIPELINE CLASSES---------------------------------------------------
4559 // Pipeline Classes describe the stages in which input and output are
4560 // referenced by the hardware pipeline.
4561
4562 // Naming convention: ialu or fpu
4563 // Then: _reg
4564 // Then: _reg if there is a 2nd register
4565 // Then: _long if it's a pair of instructions implementing a long
4566 // Then: _fat if it requires the big decoder
4567 // Or: _mem if it requires the big decoder and a memory unit.
4568
4569 // Integer ALU reg operation
4570 pipe_class ialu_reg(rRegI dst) %{
4571 single_instruction;
4572 dst : S4(write);
4573 dst : S3(read);
4574 DECODE : S0; // any decoder
4575 ALU : S3; // any alu
4576 %}
4577
4578 // Long ALU reg operation
4579 pipe_class ialu_reg_long(eRegL dst) %{
4580 instruction_count(2);
4581 dst : S4(write);
4582 dst : S3(read);
4583 DECODE : S0(2); // any 2 decoders
4584 ALU : S3(2); // both alus
4585 %}
4586
4587 // Integer ALU reg operation using big decoder
4588 pipe_class ialu_reg_fat(rRegI dst) %{
4589 single_instruction;
4590 dst : S4(write);
4591 dst : S3(read);
4592 D0 : S0; // big decoder only
4593 ALU : S3; // any alu
4594 %}
4595
4596 // Long ALU reg operation using big decoder
4597 pipe_class ialu_reg_long_fat(eRegL dst) %{
4598 instruction_count(2);
4599 dst : S4(write);
4600 dst : S3(read);
4601 D0 : S0(2); // big decoder only; twice
4602 ALU : S3(2); // any 2 alus
4603 %}
4604
4605 // Integer ALU reg-reg operation
4606 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4607 single_instruction;
4608 dst : S4(write);
4609 src : S3(read);
4610 DECODE : S0; // any decoder
4611 ALU : S3; // any alu
4612 %}
4613
4614 // Long ALU reg-reg operation
4615 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4616 instruction_count(2);
4617 dst : S4(write);
4618 src : S3(read);
4619 DECODE : S0(2); // any 2 decoders
4620 ALU : S3(2); // both alus
4621 %}
4622
4623 // Integer ALU reg-reg operation
4624 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4625 single_instruction;
4626 dst : S4(write);
4627 src : S3(read);
4628 D0 : S0; // big decoder only
4629 ALU : S3; // any alu
4630 %}
4631
4632 // Long ALU reg-reg operation
4633 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4634 instruction_count(2);
4635 dst : S4(write);
4636 src : S3(read);
4637 D0 : S0(2); // big decoder only; twice
4638 ALU : S3(2); // both alus
4639 %}
4640
4641 // Integer ALU reg-mem operation
4642 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4643 single_instruction;
4644 dst : S5(write);
4645 mem : S3(read);
4646 D0 : S0; // big decoder only
4647 ALU : S4; // any alu
4648 MEM : S3; // any mem
4649 %}
4650
4651 // Long ALU reg-mem operation
4652 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4653 instruction_count(2);
4654 dst : S5(write);
4655 mem : S3(read);
4656 D0 : S0(2); // big decoder only; twice
4657 ALU : S4(2); // any 2 alus
4658 MEM : S3(2); // both mems
4659 %}
4660
4661 // Integer mem operation (prefetch)
4662 pipe_class ialu_mem(memory mem)
4663 %{
4664 single_instruction;
4665 mem : S3(read);
4666 D0 : S0; // big decoder only
4667 MEM : S3; // any mem
4668 %}
4669
4670 // Integer Store to Memory
4671 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4672 single_instruction;
4673 mem : S3(read);
4674 src : S5(read);
4675 D0 : S0; // big decoder only
4676 ALU : S4; // any alu
4677 MEM : S3;
4678 %}
4679
4680 // Long Store to Memory
4681 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4682 instruction_count(2);
4683 mem : S3(read);
4684 src : S5(read);
4685 D0 : S0(2); // big decoder only; twice
4686 ALU : S4(2); // any 2 alus
4687 MEM : S3(2); // Both mems
4688 %}
4689
4690 // Integer Store to Memory
4691 pipe_class ialu_mem_imm(memory mem) %{
4692 single_instruction;
4693 mem : S3(read);
4694 D0 : S0; // big decoder only
4695 ALU : S4; // any alu
4696 MEM : S3;
4697 %}
4698
4699 // Integer ALU0 reg-reg operation
4700 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4701 single_instruction;
4702 dst : S4(write);
4703 src : S3(read);
4704 D0 : S0; // Big decoder only
4705 ALU0 : S3; // only alu0
4706 %}
4707
4708 // Integer ALU0 reg-mem operation
4709 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4710 single_instruction;
4711 dst : S5(write);
4712 mem : S3(read);
4713 D0 : S0; // big decoder only
4714 ALU0 : S4; // ALU0 only
4715 MEM : S3; // any mem
4716 %}
4717
4718 // Integer ALU reg-reg operation
4719 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4720 single_instruction;
4721 cr : S4(write);
4722 src1 : S3(read);
4723 src2 : S3(read);
4724 DECODE : S0; // any decoder
4725 ALU : S3; // any alu
4726 %}
4727
4728 // Integer ALU reg-imm operation
4729 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4730 single_instruction;
4731 cr : S4(write);
4732 src1 : S3(read);
4733 DECODE : S0; // any decoder
4734 ALU : S3; // any alu
4735 %}
4736
4737 // Integer ALU reg-mem operation
4738 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4739 single_instruction;
4740 cr : S4(write);
4741 src1 : S3(read);
4742 src2 : S3(read);
4743 D0 : S0; // big decoder only
4744 ALU : S4; // any alu
4745 MEM : S3;
4746 %}
4747
4748 // Conditional move reg-reg
4749 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4750 instruction_count(4);
4751 y : S4(read);
4752 q : S3(read);
4753 p : S3(read);
4754 DECODE : S0(4); // any decoder
4755 %}
4756
4757 // Conditional move reg-reg
4758 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4759 single_instruction;
4760 dst : S4(write);
4761 src : S3(read);
4762 cr : S3(read);
4763 DECODE : S0; // any decoder
4764 %}
4765
4766 // Conditional move reg-mem
4767 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4768 single_instruction;
4769 dst : S4(write);
4770 src : S3(read);
4771 cr : S3(read);
4772 DECODE : S0; // any decoder
4773 MEM : S3;
4774 %}
4775
4776 // Conditional move reg-reg long
4777 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4778 single_instruction;
4779 dst : S4(write);
4780 src : S3(read);
4781 cr : S3(read);
4782 DECODE : S0(2); // any 2 decoders
4783 %}
4784
4785 // Conditional move double reg-reg
4786 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4787 single_instruction;
4788 dst : S4(write);
4789 src : S3(read);
4790 cr : S3(read);
4791 DECODE : S0; // any decoder
4792 %}
4793
4794 // Float reg-reg operation
4795 pipe_class fpu_reg(regDPR dst) %{
4796 instruction_count(2);
4797 dst : S3(read);
4798 DECODE : S0(2); // any 2 decoders
4799 FPU : S3;
4800 %}
4801
4802 // Float reg-reg operation
4803 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4804 instruction_count(2);
4805 dst : S4(write);
4806 src : S3(read);
4807 DECODE : S0(2); // any 2 decoders
4808 FPU : S3;
4809 %}
4810
4811 // Float reg-reg operation
4812 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4813 instruction_count(3);
4814 dst : S4(write);
4815 src1 : S3(read);
4816 src2 : S3(read);
4817 DECODE : S0(3); // any 3 decoders
4818 FPU : S3(2);
4819 %}
4820
4821 // Float reg-reg operation
4822 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4823 instruction_count(4);
4824 dst : S4(write);
4825 src1 : S3(read);
4826 src2 : S3(read);
4827 src3 : S3(read);
4828 DECODE : S0(4); // any 3 decoders
4829 FPU : S3(2);
4830 %}
4831
4832 // Float reg-reg operation
4833 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4834 instruction_count(4);
4835 dst : S4(write);
4836 src1 : S3(read);
4837 src2 : S3(read);
4838 src3 : S3(read);
4839 DECODE : S1(3); // any 3 decoders
4840 D0 : S0; // Big decoder only
4841 FPU : S3(2);
4842 MEM : S3;
4843 %}
4844
4845 // Float reg-mem operation
4846 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4847 instruction_count(2);
4848 dst : S5(write);
4849 mem : S3(read);
4850 D0 : S0; // big decoder only
4851 DECODE : S1; // any decoder for FPU POP
4852 FPU : S4;
4853 MEM : S3; // any mem
4854 %}
4855
4856 // Float reg-mem operation
4857 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4858 instruction_count(3);
4859 dst : S5(write);
4860 src1 : S3(read);
4861 mem : S3(read);
4862 D0 : S0; // big decoder only
4863 DECODE : S1(2); // any decoder for FPU POP
4864 FPU : S4;
4865 MEM : S3; // any mem
4866 %}
4867
4868 // Float mem-reg operation
4869 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4870 instruction_count(2);
4871 src : S5(read);
4872 mem : S3(read);
4873 DECODE : S0; // any decoder for FPU PUSH
4874 D0 : S1; // big decoder only
4875 FPU : S4;
4876 MEM : S3; // any mem
4877 %}
4878
4879 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4880 instruction_count(3);
4881 src1 : S3(read);
4882 src2 : S3(read);
4883 mem : S3(read);
4884 DECODE : S0(2); // any decoder for FPU PUSH
4885 D0 : S1; // big decoder only
4886 FPU : S4;
4887 MEM : S3; // any mem
4888 %}
4889
4890 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4891 instruction_count(3);
4892 src1 : S3(read);
4893 src2 : S3(read);
4894 mem : S4(read);
4895 DECODE : S0; // any decoder for FPU PUSH
4896 D0 : S0(2); // big decoder only
4897 FPU : S4;
4898 MEM : S3(2); // any mem
4899 %}
4900
4901 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4902 instruction_count(2);
4903 src1 : S3(read);
4904 dst : S4(read);
4905 D0 : S0(2); // big decoder only
4906 MEM : S3(2); // any mem
4907 %}
4908
4909 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4910 instruction_count(3);
4911 src1 : S3(read);
4912 src2 : S3(read);
4913 dst : S4(read);
4914 D0 : S0(3); // big decoder only
4915 FPU : S4;
4916 MEM : S3(3); // any mem
4917 %}
4918
4919 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4920 instruction_count(3);
4921 src1 : S4(read);
4922 mem : S4(read);
4923 DECODE : S0; // any decoder for FPU PUSH
4924 D0 : S0(2); // big decoder only
4925 FPU : S4;
4926 MEM : S3(2); // any mem
4927 %}
4928
4929 // Float load constant
4930 pipe_class fpu_reg_con(regDPR dst) %{
4931 instruction_count(2);
4932 dst : S5(write);
4933 D0 : S0; // big decoder only for the load
4934 DECODE : S1; // any decoder for FPU POP
4935 FPU : S4;
4936 MEM : S3; // any mem
4937 %}
4938
4939 // Float load constant
4940 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
4941 instruction_count(3);
4942 dst : S5(write);
4943 src : S3(read);
4944 D0 : S0; // big decoder only for the load
4945 DECODE : S1(2); // any decoder for FPU POP
4946 FPU : S4;
4947 MEM : S3; // any mem
4948 %}
4949
4950 // UnConditional branch
4951 pipe_class pipe_jmp( label labl ) %{
4952 single_instruction;
4953 BR : S3;
4954 %}
4955
4956 // Conditional branch
4957 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
4958 single_instruction;
4959 cr : S1(read);
4960 BR : S3;
4961 %}
4962
4963 // Allocation idiom
4964 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
4965 instruction_count(1); force_serialization;
4966 fixed_latency(6);
4967 heap_ptr : S3(read);
4968 DECODE : S0(3);
4969 D0 : S2;
4970 MEM : S3;
4971 ALU : S3(2);
4972 dst : S5(write);
4973 BR : S5;
4974 %}
4975
4976 // Generic big/slow expanded idiom
4977 pipe_class pipe_slow( ) %{
4978 instruction_count(10); multiple_bundles; force_serialization;
4979 fixed_latency(100);
4980 D0 : S0(2);
4981 MEM : S3(2);
4982 %}
4983
4984 // The real do-nothing guy
4985 pipe_class empty( ) %{
4986 instruction_count(0);
4987 %}
4988
4989 // Define the class for the Nop node
4990 define %{
4991 MachNop = empty;
4992 %}
4993
4994 %}
4995
4996 //----------INSTRUCTIONS-------------------------------------------------------
4997 //
4998 // match -- States which machine-independent subtree may be replaced
4999 // by this instruction.
5000 // ins_cost -- The estimated cost of this instruction is used by instruction
5001 // selection to identify a minimum cost tree of machine
5002 // instructions that matches a tree of machine-independent
5003 // instructions.
5004 // format -- A string providing the disassembly for this instruction.
5005 // The value of an instruction's operand may be inserted
5006 // by referring to it with a '$' prefix.
5007 // opcode -- Three instruction opcodes may be provided. These are referred
5008 // to within an encode class as $primary, $secondary, and $tertiary
5009 // respectively. The primary opcode is commonly used to
5010 // indicate the type of machine instruction, while secondary
5011 // and tertiary are often used for prefix options or addressing
5012 // modes.
5013 // ins_encode -- A list of encode classes with parameters. The encode class
5014 // name must have been defined in an 'enc_class' specification
5015 // in the encode section of the architecture description.
5016
5017 //----------BSWAP-Instruction--------------------------------------------------
5018 instruct bytes_reverse_int(rRegI dst) %{
5019 match(Set dst (ReverseBytesI dst));
5020
5021 format %{ "BSWAP $dst" %}
5022 opcode(0x0F, 0xC8);
5023 ins_encode( OpcP, OpcSReg(dst) );
5024 ins_pipe( ialu_reg );
5025 %}
5026
5027 instruct bytes_reverse_long(eRegL dst) %{
5028 match(Set dst (ReverseBytesL dst));
5029
5030 format %{ "BSWAP $dst.lo\n\t"
5031 "BSWAP $dst.hi\n\t"
5032 "XCHG $dst.lo $dst.hi" %}
5033
5034 ins_cost(125);
5035 ins_encode( bswap_long_bytes(dst) );
5036 ins_pipe( ialu_reg_reg);
5037 %}
5038
5039 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5040 match(Set dst (ReverseBytesUS dst));
5041 effect(KILL cr);
5042
5043 format %{ "BSWAP $dst\n\t"
5044 "SHR $dst,16\n\t" %}
5045 ins_encode %{
5046 __ bswapl($dst$$Register);
5047 __ shrl($dst$$Register, 16);
5048 %}
5049 ins_pipe( ialu_reg );
5050 %}
5051
5052 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5053 match(Set dst (ReverseBytesS dst));
5054 effect(KILL cr);
5055
5056 format %{ "BSWAP $dst\n\t"
5057 "SAR $dst,16\n\t" %}
5058 ins_encode %{
5059 __ bswapl($dst$$Register);
5060 __ sarl($dst$$Register, 16);
5061 %}
5062 ins_pipe( ialu_reg );
5063 %}
5064
5065
5066 //---------- Zeros Count Instructions ------------------------------------------
5067
5068 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5069 predicate(UseCountLeadingZerosInstruction);
5070 match(Set dst (CountLeadingZerosI src));
5071 effect(KILL cr);
5072
5073 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %}
5074 ins_encode %{
5075 __ lzcntl($dst$$Register, $src$$Register);
5076 %}
5077 ins_pipe(ialu_reg);
5078 %}
5079
5080 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5081 predicate(!UseCountLeadingZerosInstruction);
5082 match(Set dst (CountLeadingZerosI src));
5083 effect(KILL cr);
5084
5085 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t"
5086 "JNZ skip\n\t"
5087 "MOV $dst, -1\n"
5088 "skip:\n\t"
5089 "NEG $dst\n\t"
5090 "ADD $dst, 31" %}
5091 ins_encode %{
5092 Register Rdst = $dst$$Register;
5093 Register Rsrc = $src$$Register;
5094 Label skip;
5095 __ bsrl(Rdst, Rsrc);
5096 __ jccb(Assembler::notZero, skip);
5097 __ movl(Rdst, -1);
5098 __ bind(skip);
5099 __ negl(Rdst);
5100 __ addl(Rdst, BitsPerInt - 1);
5101 %}
5102 ins_pipe(ialu_reg);
5103 %}
5104
5105 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5106 predicate(UseCountLeadingZerosInstruction);
5107 match(Set dst (CountLeadingZerosL src));
5108 effect(TEMP dst, KILL cr);
5109
5110 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t"
5111 "JNC done\n\t"
5112 "LZCNT $dst, $src.lo\n\t"
5113 "ADD $dst, 32\n"
5114 "done:" %}
5115 ins_encode %{
5116 Register Rdst = $dst$$Register;
5117 Register Rsrc = $src$$Register;
5118 Label done;
5119 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5120 __ jccb(Assembler::carryClear, done);
5121 __ lzcntl(Rdst, Rsrc);
5122 __ addl(Rdst, BitsPerInt);
5123 __ bind(done);
5124 %}
5125 ins_pipe(ialu_reg);
5126 %}
5127
5128 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5129 predicate(!UseCountLeadingZerosInstruction);
5130 match(Set dst (CountLeadingZerosL src));
5131 effect(TEMP dst, KILL cr);
5132
5133 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t"
5134 "JZ msw_is_zero\n\t"
5135 "ADD $dst, 32\n\t"
5136 "JMP not_zero\n"
5137 "msw_is_zero:\n\t"
5138 "BSR $dst, $src.lo\n\t"
5139 "JNZ not_zero\n\t"
5140 "MOV $dst, -1\n"
5141 "not_zero:\n\t"
5142 "NEG $dst\n\t"
5143 "ADD $dst, 63\n" %}
5144 ins_encode %{
5145 Register Rdst = $dst$$Register;
5146 Register Rsrc = $src$$Register;
5147 Label msw_is_zero;
5148 Label not_zero;
5149 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5150 __ jccb(Assembler::zero, msw_is_zero);
5151 __ addl(Rdst, BitsPerInt);
5152 __ jmpb(not_zero);
5153 __ bind(msw_is_zero);
5154 __ bsrl(Rdst, Rsrc);
5155 __ jccb(Assembler::notZero, not_zero);
5156 __ movl(Rdst, -1);
5157 __ bind(not_zero);
5158 __ negl(Rdst);
5159 __ addl(Rdst, BitsPerLong - 1);
5160 %}
5161 ins_pipe(ialu_reg);
5162 %}
5163
5164 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5165 predicate(UseCountTrailingZerosInstruction);
5166 match(Set dst (CountTrailingZerosI src));
5167 effect(KILL cr);
5168
5169 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %}
5170 ins_encode %{
5171 __ tzcntl($dst$$Register, $src$$Register);
5172 %}
5173 ins_pipe(ialu_reg);
5174 %}
5175
5176 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5177 predicate(!UseCountTrailingZerosInstruction);
5178 match(Set dst (CountTrailingZerosI src));
5179 effect(KILL cr);
5180
5181 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t"
5182 "JNZ done\n\t"
5183 "MOV $dst, 32\n"
5184 "done:" %}
5185 ins_encode %{
5186 Register Rdst = $dst$$Register;
5187 Label done;
5188 __ bsfl(Rdst, $src$$Register);
5189 __ jccb(Assembler::notZero, done);
5190 __ movl(Rdst, BitsPerInt);
5191 __ bind(done);
5192 %}
5193 ins_pipe(ialu_reg);
5194 %}
5195
5196 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5197 predicate(UseCountTrailingZerosInstruction);
5198 match(Set dst (CountTrailingZerosL src));
5199 effect(TEMP dst, KILL cr);
5200
5201 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t"
5202 "JNC done\n\t"
5203 "TZCNT $dst, $src.hi\n\t"
5204 "ADD $dst, 32\n"
5205 "done:" %}
5206 ins_encode %{
5207 Register Rdst = $dst$$Register;
5208 Register Rsrc = $src$$Register;
5209 Label done;
5210 __ tzcntl(Rdst, Rsrc);
5211 __ jccb(Assembler::carryClear, done);
5212 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5213 __ addl(Rdst, BitsPerInt);
5214 __ bind(done);
5215 %}
5216 ins_pipe(ialu_reg);
5217 %}
5218
5219 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5220 predicate(!UseCountTrailingZerosInstruction);
5221 match(Set dst (CountTrailingZerosL src));
5222 effect(TEMP dst, KILL cr);
5223
5224 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t"
5225 "JNZ done\n\t"
5226 "BSF $dst, $src.hi\n\t"
5227 "JNZ msw_not_zero\n\t"
5228 "MOV $dst, 32\n"
5229 "msw_not_zero:\n\t"
5230 "ADD $dst, 32\n"
5231 "done:" %}
5232 ins_encode %{
5233 Register Rdst = $dst$$Register;
5234 Register Rsrc = $src$$Register;
5235 Label msw_not_zero;
5236 Label done;
5237 __ bsfl(Rdst, Rsrc);
5238 __ jccb(Assembler::notZero, done);
5239 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5240 __ jccb(Assembler::notZero, msw_not_zero);
5241 __ movl(Rdst, BitsPerInt);
5242 __ bind(msw_not_zero);
5243 __ addl(Rdst, BitsPerInt);
5244 __ bind(done);
5245 %}
5246 ins_pipe(ialu_reg);
5247 %}
5248
5249
5250 //---------- Population Count Instructions -------------------------------------
5251
5252 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5253 predicate(UsePopCountInstruction);
5254 match(Set dst (PopCountI src));
5255 effect(KILL cr);
5256
5257 format %{ "POPCNT $dst, $src" %}
5258 ins_encode %{
5259 __ popcntl($dst$$Register, $src$$Register);
5260 %}
5261 ins_pipe(ialu_reg);
5262 %}
5263
5264 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5265 predicate(UsePopCountInstruction);
5266 match(Set dst (PopCountI (LoadI mem)));
5267 effect(KILL cr);
5268
5269 format %{ "POPCNT $dst, $mem" %}
5270 ins_encode %{
5271 __ popcntl($dst$$Register, $mem$$Address);
5272 %}
5273 ins_pipe(ialu_reg);
5274 %}
5275
5276 // Note: Long.bitCount(long) returns an int.
5277 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5278 predicate(UsePopCountInstruction);
5279 match(Set dst (PopCountL src));
5280 effect(KILL cr, TEMP tmp, TEMP dst);
5281
5282 format %{ "POPCNT $dst, $src.lo\n\t"
5283 "POPCNT $tmp, $src.hi\n\t"
5284 "ADD $dst, $tmp" %}
5285 ins_encode %{
5286 __ popcntl($dst$$Register, $src$$Register);
5287 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5288 __ addl($dst$$Register, $tmp$$Register);
5289 %}
5290 ins_pipe(ialu_reg);
5291 %}
5292
5293 // Note: Long.bitCount(long) returns an int.
5294 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5295 predicate(UsePopCountInstruction);
5296 match(Set dst (PopCountL (LoadL mem)));
5297 effect(KILL cr, TEMP tmp, TEMP dst);
5298
5299 format %{ "POPCNT $dst, $mem\n\t"
5300 "POPCNT $tmp, $mem+4\n\t"
5301 "ADD $dst, $tmp" %}
5302 ins_encode %{
5303 //__ popcntl($dst$$Register, $mem$$Address$$first);
5304 //__ popcntl($tmp$$Register, $mem$$Address$$second);
5305 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5306 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5307 __ addl($dst$$Register, $tmp$$Register);
5308 %}
5309 ins_pipe(ialu_reg);
5310 %}
5311
5312
5313 //----------Load/Store/Move Instructions---------------------------------------
5314 //----------Load Instructions--------------------------------------------------
5315 // Load Byte (8bit signed)
5316 instruct loadB(xRegI dst, memory mem) %{
5317 match(Set dst (LoadB mem));
5318
5319 ins_cost(125);
5320 format %{ "MOVSX8 $dst,$mem\t# byte" %}
5321
5322 ins_encode %{
5323 __ movsbl($dst$$Register, $mem$$Address);
5324 %}
5325
5326 ins_pipe(ialu_reg_mem);
5327 %}
5328
5329 // Load Byte (8bit signed) into Long Register
5330 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5331 match(Set dst (ConvI2L (LoadB mem)));
5332 effect(KILL cr);
5333
5334 ins_cost(375);
5335 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5336 "MOV $dst.hi,$dst.lo\n\t"
5337 "SAR $dst.hi,7" %}
5338
5339 ins_encode %{
5340 __ movsbl($dst$$Register, $mem$$Address);
5341 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5342 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5343 %}
5344
5345 ins_pipe(ialu_reg_mem);
5346 %}
5347
5348 // Load Unsigned Byte (8bit UNsigned)
5349 instruct loadUB(xRegI dst, memory mem) %{
5350 match(Set dst (LoadUB mem));
5351
5352 ins_cost(125);
5353 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5354
5355 ins_encode %{
5356 __ movzbl($dst$$Register, $mem$$Address);
5357 %}
5358
5359 ins_pipe(ialu_reg_mem);
5360 %}
5361
5362 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5363 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5364 match(Set dst (ConvI2L (LoadUB mem)));
5365 effect(KILL cr);
5366
5367 ins_cost(250);
5368 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5369 "XOR $dst.hi,$dst.hi" %}
5370
5371 ins_encode %{
5372 Register Rdst = $dst$$Register;
5373 __ movzbl(Rdst, $mem$$Address);
5374 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5375 %}
5376
5377 ins_pipe(ialu_reg_mem);
5378 %}
5379
5380 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5381 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5382 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5383 effect(KILL cr);
5384
5385 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5386 "XOR $dst.hi,$dst.hi\n\t"
5387 "AND $dst.lo,right_n_bits($mask, 8)" %}
5388 ins_encode %{
5389 Register Rdst = $dst$$Register;
5390 __ movzbl(Rdst, $mem$$Address);
5391 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5392 __ andl(Rdst, $mask$$constant & right_n_bits(8));
5393 %}
5394 ins_pipe(ialu_reg_mem);
5395 %}
5396
5397 // Load Short (16bit signed)
5398 instruct loadS(rRegI dst, memory mem) %{
5399 match(Set dst (LoadS mem));
5400
5401 ins_cost(125);
5402 format %{ "MOVSX $dst,$mem\t# short" %}
5403
5404 ins_encode %{
5405 __ movswl($dst$$Register, $mem$$Address);
5406 %}
5407
5408 ins_pipe(ialu_reg_mem);
5409 %}
5410
5411 // Load Short (16 bit signed) to Byte (8 bit signed)
5412 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5413 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5414
5415 ins_cost(125);
5416 format %{ "MOVSX $dst, $mem\t# short -> byte" %}
5417 ins_encode %{
5418 __ movsbl($dst$$Register, $mem$$Address);
5419 %}
5420 ins_pipe(ialu_reg_mem);
5421 %}
5422
5423 // Load Short (16bit signed) into Long Register
5424 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5425 match(Set dst (ConvI2L (LoadS mem)));
5426 effect(KILL cr);
5427
5428 ins_cost(375);
5429 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t"
5430 "MOV $dst.hi,$dst.lo\n\t"
5431 "SAR $dst.hi,15" %}
5432
5433 ins_encode %{
5434 __ movswl($dst$$Register, $mem$$Address);
5435 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5436 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5437 %}
5438
5439 ins_pipe(ialu_reg_mem);
5440 %}
5441
5442 // Load Unsigned Short/Char (16bit unsigned)
5443 instruct loadUS(rRegI dst, memory mem) %{
5444 match(Set dst (LoadUS mem));
5445
5446 ins_cost(125);
5447 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %}
5448
5449 ins_encode %{
5450 __ movzwl($dst$$Register, $mem$$Address);
5451 %}
5452
5453 ins_pipe(ialu_reg_mem);
5454 %}
5455
5456 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5457 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5458 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5459
5460 ins_cost(125);
5461 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %}
5462 ins_encode %{
5463 __ movsbl($dst$$Register, $mem$$Address);
5464 %}
5465 ins_pipe(ialu_reg_mem);
5466 %}
5467
5468 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5469 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5470 match(Set dst (ConvI2L (LoadUS mem)));
5471 effect(KILL cr);
5472
5473 ins_cost(250);
5474 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t"
5475 "XOR $dst.hi,$dst.hi" %}
5476
5477 ins_encode %{
5478 __ movzwl($dst$$Register, $mem$$Address);
5479 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5480 %}
5481
5482 ins_pipe(ialu_reg_mem);
5483 %}
5484
5485 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5486 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5487 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5488 effect(KILL cr);
5489
5490 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5491 "XOR $dst.hi,$dst.hi" %}
5492 ins_encode %{
5493 Register Rdst = $dst$$Register;
5494 __ movzbl(Rdst, $mem$$Address);
5495 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5496 %}
5497 ins_pipe(ialu_reg_mem);
5498 %}
5499
5500 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5501 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5502 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5503 effect(KILL cr);
5504
5505 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5506 "XOR $dst.hi,$dst.hi\n\t"
5507 "AND $dst.lo,right_n_bits($mask, 16)" %}
5508 ins_encode %{
5509 Register Rdst = $dst$$Register;
5510 __ movzwl(Rdst, $mem$$Address);
5511 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5512 __ andl(Rdst, $mask$$constant & right_n_bits(16));
5513 %}
5514 ins_pipe(ialu_reg_mem);
5515 %}
5516
5517 // Load Integer
5518 instruct loadI(rRegI dst, memory mem) %{
5519 match(Set dst (LoadI mem));
5520
5521 ins_cost(125);
5522 format %{ "MOV $dst,$mem\t# int" %}
5523
5524 ins_encode %{
5525 __ movl($dst$$Register, $mem$$Address);
5526 %}
5527
5528 ins_pipe(ialu_reg_mem);
5529 %}
5530
5531 // Load Integer (32 bit signed) to Byte (8 bit signed)
5532 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5533 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5534
5535 ins_cost(125);
5536 format %{ "MOVSX $dst, $mem\t# int -> byte" %}
5537 ins_encode %{
5538 __ movsbl($dst$$Register, $mem$$Address);
5539 %}
5540 ins_pipe(ialu_reg_mem);
5541 %}
5542
5543 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5544 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5545 match(Set dst (AndI (LoadI mem) mask));
5546
5547 ins_cost(125);
5548 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %}
5549 ins_encode %{
5550 __ movzbl($dst$$Register, $mem$$Address);
5551 %}
5552 ins_pipe(ialu_reg_mem);
5553 %}
5554
5555 // Load Integer (32 bit signed) to Short (16 bit signed)
5556 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5557 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5558
5559 ins_cost(125);
5560 format %{ "MOVSX $dst, $mem\t# int -> short" %}
5561 ins_encode %{
5562 __ movswl($dst$$Register, $mem$$Address);
5563 %}
5564 ins_pipe(ialu_reg_mem);
5565 %}
5566
5567 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5568 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5569 match(Set dst (AndI (LoadI mem) mask));
5570
5571 ins_cost(125);
5572 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %}
5573 ins_encode %{
5574 __ movzwl($dst$$Register, $mem$$Address);
5575 %}
5576 ins_pipe(ialu_reg_mem);
5577 %}
5578
5579 // Load Integer into Long Register
5580 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5581 match(Set dst (ConvI2L (LoadI mem)));
5582 effect(KILL cr);
5583
5584 ins_cost(375);
5585 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t"
5586 "MOV $dst.hi,$dst.lo\n\t"
5587 "SAR $dst.hi,31" %}
5588
5589 ins_encode %{
5590 __ movl($dst$$Register, $mem$$Address);
5591 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5592 __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5593 %}
5594
5595 ins_pipe(ialu_reg_mem);
5596 %}
5597
5598 // Load Integer with mask 0xFF into Long Register
5599 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5600 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5601 effect(KILL cr);
5602
5603 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5604 "XOR $dst.hi,$dst.hi" %}
5605 ins_encode %{
5606 Register Rdst = $dst$$Register;
5607 __ movzbl(Rdst, $mem$$Address);
5608 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5609 %}
5610 ins_pipe(ialu_reg_mem);
5611 %}
5612
5613 // Load Integer with mask 0xFFFF into Long Register
5614 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5615 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5616 effect(KILL cr);
5617
5618 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5619 "XOR $dst.hi,$dst.hi" %}
5620 ins_encode %{
5621 Register Rdst = $dst$$Register;
5622 __ movzwl(Rdst, $mem$$Address);
5623 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5624 %}
5625 ins_pipe(ialu_reg_mem);
5626 %}
5627
5628 // Load Integer with 31-bit mask into Long Register
5629 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5630 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5631 effect(KILL cr);
5632
5633 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5634 "XOR $dst.hi,$dst.hi\n\t"
5635 "AND $dst.lo,$mask" %}
5636 ins_encode %{
5637 Register Rdst = $dst$$Register;
5638 __ movl(Rdst, $mem$$Address);
5639 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5640 __ andl(Rdst, $mask$$constant);
5641 %}
5642 ins_pipe(ialu_reg_mem);
5643 %}
5644
5645 // Load Unsigned Integer into Long Register
5646 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5647 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5648 effect(KILL cr);
5649
5650 ins_cost(250);
5651 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t"
5652 "XOR $dst.hi,$dst.hi" %}
5653
5654 ins_encode %{
5655 __ movl($dst$$Register, $mem$$Address);
5656 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5657 %}
5658
5659 ins_pipe(ialu_reg_mem);
5660 %}
5661
5662 // Load Long. Cannot clobber address while loading, so restrict address
5663 // register to ESI
5664 instruct loadL(eRegL dst, load_long_memory mem) %{
5665 predicate(!((LoadLNode*)n)->require_atomic_access());
5666 match(Set dst (LoadL mem));
5667
5668 ins_cost(250);
5669 format %{ "MOV $dst.lo,$mem\t# long\n\t"
5670 "MOV $dst.hi,$mem+4" %}
5671
5672 ins_encode %{
5673 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5674 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5675 __ movl($dst$$Register, Amemlo);
5676 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5677 %}
5678
5679 ins_pipe(ialu_reg_long_mem);
5680 %}
5681
5682 // Volatile Load Long. Must be atomic, so do 64-bit FILD
5683 // then store it down to the stack and reload on the int
5684 // side.
5685 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5686 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5687 match(Set dst (LoadL mem));
5688
5689 ins_cost(200);
5690 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
5691 "FISTp $dst" %}
5692 ins_encode(enc_loadL_volatile(mem,dst));
5693 ins_pipe( fpu_reg_mem );
5694 %}
5695
5696 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5697 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5698 match(Set dst (LoadL mem));
5699 effect(TEMP tmp);
5700 ins_cost(180);
5701 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
5702 "MOVSD $dst,$tmp" %}
5703 ins_encode %{
5704 __ movdbl($tmp$$XMMRegister, $mem$$Address);
5705 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5706 %}
5707 ins_pipe( pipe_slow );
5708 %}
5709
5710 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5711 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5712 match(Set dst (LoadL mem));
5713 effect(TEMP tmp);
5714 ins_cost(160);
5715 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
5716 "MOVD $dst.lo,$tmp\n\t"
5717 "PSRLQ $tmp,32\n\t"
5718 "MOVD $dst.hi,$tmp" %}
5719 ins_encode %{
5720 __ movdbl($tmp$$XMMRegister, $mem$$Address);
5721 __ movdl($dst$$Register, $tmp$$XMMRegister);
5722 __ psrlq($tmp$$XMMRegister, 32);
5723 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5724 %}
5725 ins_pipe( pipe_slow );
5726 %}
5727
5728 // Load Range
5729 instruct loadRange(rRegI dst, memory mem) %{
5730 match(Set dst (LoadRange mem));
5731
5732 ins_cost(125);
5733 format %{ "MOV $dst,$mem" %}
5734 opcode(0x8B);
5735 ins_encode( OpcP, RegMem(dst,mem));
5736 ins_pipe( ialu_reg_mem );
5737 %}
5738
5739
5740 // Load Pointer
5741 instruct loadP(eRegP dst, memory mem) %{
5742 match(Set dst (LoadP mem));
5743
5744 ins_cost(125);
5745 format %{ "MOV $dst,$mem" %}
5746 opcode(0x8B);
5747 ins_encode( OpcP, RegMem(dst,mem));
5748 ins_pipe( ialu_reg_mem );
5749 %}
5750
5751 // Load Klass Pointer
5752 instruct loadKlass(eRegP dst, memory mem) %{
5753 match(Set dst (LoadKlass mem));
5754
5755 ins_cost(125);
5756 format %{ "MOV $dst,$mem" %}
5757 opcode(0x8B);
5758 ins_encode( OpcP, RegMem(dst,mem));
5759 ins_pipe( ialu_reg_mem );
5760 %}
5761
5762 // Load Float
5763 instruct MoveF2LEG(legRegF dst, regF src) %{
5764 match(Set dst src);
5765 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
5766 ins_encode %{
5767 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5768 %}
5769 ins_pipe( fpu_reg_reg );
5770 %}
5771
5772 // Load Float
5773 instruct MoveLEG2F(regF dst, legRegF src) %{
5774 match(Set dst src);
5775 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
5776 ins_encode %{
5777 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5778 %}
5779 ins_pipe( fpu_reg_reg );
5780 %}
5781
5782 // Load Double
5783 instruct MoveD2LEG(legRegD dst, regD src) %{
5784 match(Set dst src);
5785 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
5786 ins_encode %{
5787 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5788 %}
5789 ins_pipe( fpu_reg_reg );
5790 %}
5791
5792 // Load Double
5793 instruct MoveLEG2D(regD dst, legRegD src) %{
5794 match(Set dst src);
5795 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
5796 ins_encode %{
5797 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5798 %}
5799 ins_pipe( fpu_reg_reg );
5800 %}
5801
5802 // Load Double
5803 instruct loadDPR(regDPR dst, memory mem) %{
5804 predicate(UseSSE<=1);
5805 match(Set dst (LoadD mem));
5806
5807 ins_cost(150);
5808 format %{ "FLD_D ST,$mem\n\t"
5809 "FSTP $dst" %}
5810 opcode(0xDD); /* DD /0 */
5811 ins_encode( OpcP, RMopc_Mem(0x00,mem),
5812 Pop_Reg_DPR(dst) );
5813 ins_pipe( fpu_reg_mem );
5814 %}
5815
5816 // Load Double to XMM
5817 instruct loadD(regD dst, memory mem) %{
5818 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5819 match(Set dst (LoadD mem));
5820 ins_cost(145);
5821 format %{ "MOVSD $dst,$mem" %}
5822 ins_encode %{
5823 __ movdbl ($dst$$XMMRegister, $mem$$Address);
5824 %}
5825 ins_pipe( pipe_slow );
5826 %}
5827
5828 instruct loadD_partial(regD dst, memory mem) %{
5829 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5830 match(Set dst (LoadD mem));
5831 ins_cost(145);
5832 format %{ "MOVLPD $dst,$mem" %}
5833 ins_encode %{
5834 __ movdbl ($dst$$XMMRegister, $mem$$Address);
5835 %}
5836 ins_pipe( pipe_slow );
5837 %}
5838
5839 // Load to XMM register (single-precision floating point)
5840 // MOVSS instruction
5841 instruct loadF(regF dst, memory mem) %{
5842 predicate(UseSSE>=1);
5843 match(Set dst (LoadF mem));
5844 ins_cost(145);
5845 format %{ "MOVSS $dst,$mem" %}
5846 ins_encode %{
5847 __ movflt ($dst$$XMMRegister, $mem$$Address);
5848 %}
5849 ins_pipe( pipe_slow );
5850 %}
5851
5852 // Load Float
5853 instruct loadFPR(regFPR dst, memory mem) %{
5854 predicate(UseSSE==0);
5855 match(Set dst (LoadF mem));
5856
5857 ins_cost(150);
5858 format %{ "FLD_S ST,$mem\n\t"
5859 "FSTP $dst" %}
5860 opcode(0xD9); /* D9 /0 */
5861 ins_encode( OpcP, RMopc_Mem(0x00,mem),
5862 Pop_Reg_FPR(dst) );
5863 ins_pipe( fpu_reg_mem );
5864 %}
5865
5866 // Load Effective Address
5867 instruct leaP8(eRegP dst, indOffset8 mem) %{
5868 match(Set dst mem);
5869
5870 ins_cost(110);
5871 format %{ "LEA $dst,$mem" %}
5872 opcode(0x8D);
5873 ins_encode( OpcP, RegMem(dst,mem));
5874 ins_pipe( ialu_reg_reg_fat );
5875 %}
5876
5877 instruct leaP32(eRegP dst, indOffset32 mem) %{
5878 match(Set dst mem);
5879
5880 ins_cost(110);
5881 format %{ "LEA $dst,$mem" %}
5882 opcode(0x8D);
5883 ins_encode( OpcP, RegMem(dst,mem));
5884 ins_pipe( ialu_reg_reg_fat );
5885 %}
5886
5887 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5888 match(Set dst mem);
5889
5890 ins_cost(110);
5891 format %{ "LEA $dst,$mem" %}
5892 opcode(0x8D);
5893 ins_encode( OpcP, RegMem(dst,mem));
5894 ins_pipe( ialu_reg_reg_fat );
5895 %}
5896
5897 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5898 match(Set dst mem);
5899
5900 ins_cost(110);
5901 format %{ "LEA $dst,$mem" %}
5902 opcode(0x8D);
5903 ins_encode( OpcP, RegMem(dst,mem));
5904 ins_pipe( ialu_reg_reg_fat );
5905 %}
5906
5907 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5908 match(Set dst mem);
5909
5910 ins_cost(110);
5911 format %{ "LEA $dst,$mem" %}
5912 opcode(0x8D);
5913 ins_encode( OpcP, RegMem(dst,mem));
5914 ins_pipe( ialu_reg_reg_fat );
5915 %}
5916
5917 // Load Constant
5918 instruct loadConI(rRegI dst, immI src) %{
5919 match(Set dst src);
5920
5921 format %{ "MOV $dst,$src" %}
5922 ins_encode( LdImmI(dst, src) );
5923 ins_pipe( ialu_reg_fat );
5924 %}
5925
5926 // Load Constant zero
5927 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
5928 match(Set dst src);
5929 effect(KILL cr);
5930
5931 ins_cost(50);
5932 format %{ "XOR $dst,$dst" %}
5933 opcode(0x33); /* + rd */
5934 ins_encode( OpcP, RegReg( dst, dst ) );
5935 ins_pipe( ialu_reg );
5936 %}
5937
5938 instruct loadConP(eRegP dst, immP src) %{
5939 match(Set dst src);
5940
5941 format %{ "MOV $dst,$src" %}
5942 opcode(0xB8); /* + rd */
5943 ins_encode( LdImmP(dst, src) );
5944 ins_pipe( ialu_reg_fat );
5945 %}
5946
5947 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5948 match(Set dst src);
5949 effect(KILL cr);
5950 ins_cost(200);
5951 format %{ "MOV $dst.lo,$src.lo\n\t"
5952 "MOV $dst.hi,$src.hi" %}
5953 opcode(0xB8);
5954 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5955 ins_pipe( ialu_reg_long_fat );
5956 %}
5957
5958 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5959 match(Set dst src);
5960 effect(KILL cr);
5961 ins_cost(150);
5962 format %{ "XOR $dst.lo,$dst.lo\n\t"
5963 "XOR $dst.hi,$dst.hi" %}
5964 opcode(0x33,0x33);
5965 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5966 ins_pipe( ialu_reg_long );
5967 %}
5968
5969 // The instruction usage is guarded by predicate in operand immFPR().
5970 instruct loadConFPR(regFPR dst, immFPR con) %{
5971 match(Set dst con);
5972 ins_cost(125);
5973 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5974 "FSTP $dst" %}
5975 ins_encode %{
5976 __ fld_s($constantaddress($con));
5977 __ fstp_d($dst$$reg);
5978 %}
5979 ins_pipe(fpu_reg_con);
5980 %}
5981
5982 // The instruction usage is guarded by predicate in operand immFPR0().
5983 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
5984 match(Set dst con);
5985 ins_cost(125);
5986 format %{ "FLDZ ST\n\t"
5987 "FSTP $dst" %}
5988 ins_encode %{
5989 __ fldz();
5990 __ fstp_d($dst$$reg);
5991 %}
5992 ins_pipe(fpu_reg_con);
5993 %}
5994
5995 // The instruction usage is guarded by predicate in operand immFPR1().
5996 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
5997 match(Set dst con);
5998 ins_cost(125);
5999 format %{ "FLD1 ST\n\t"
6000 "FSTP $dst" %}
6001 ins_encode %{
6002 __ fld1();
6003 __ fstp_d($dst$$reg);
6004 %}
6005 ins_pipe(fpu_reg_con);
6006 %}
6007
6008 // The instruction usage is guarded by predicate in operand immF().
6009 instruct loadConF(regF dst, immF con) %{
6010 match(Set dst con);
6011 ins_cost(125);
6012 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6013 ins_encode %{
6014 __ movflt($dst$$XMMRegister, $constantaddress($con));
6015 %}
6016 ins_pipe(pipe_slow);
6017 %}
6018
6019 // The instruction usage is guarded by predicate in operand immF0().
6020 instruct loadConF0(regF dst, immF0 src) %{
6021 match(Set dst src);
6022 ins_cost(100);
6023 format %{ "XORPS $dst,$dst\t# float 0.0" %}
6024 ins_encode %{
6025 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6026 %}
6027 ins_pipe(pipe_slow);
6028 %}
6029
6030 // The instruction usage is guarded by predicate in operand immDPR().
6031 instruct loadConDPR(regDPR dst, immDPR con) %{
6032 match(Set dst con);
6033 ins_cost(125);
6034
6035 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6036 "FSTP $dst" %}
6037 ins_encode %{
6038 __ fld_d($constantaddress($con));
6039 __ fstp_d($dst$$reg);
6040 %}
6041 ins_pipe(fpu_reg_con);
6042 %}
6043
6044 // The instruction usage is guarded by predicate in operand immDPR0().
6045 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6046 match(Set dst con);
6047 ins_cost(125);
6048
6049 format %{ "FLDZ ST\n\t"
6050 "FSTP $dst" %}
6051 ins_encode %{
6052 __ fldz();
6053 __ fstp_d($dst$$reg);
6054 %}
6055 ins_pipe(fpu_reg_con);
6056 %}
6057
6058 // The instruction usage is guarded by predicate in operand immDPR1().
6059 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6060 match(Set dst con);
6061 ins_cost(125);
6062
6063 format %{ "FLD1 ST\n\t"
6064 "FSTP $dst" %}
6065 ins_encode %{
6066 __ fld1();
6067 __ fstp_d($dst$$reg);
6068 %}
6069 ins_pipe(fpu_reg_con);
6070 %}
6071
6072 // The instruction usage is guarded by predicate in operand immD().
6073 instruct loadConD(regD dst, immD con) %{
6074 match(Set dst con);
6075 ins_cost(125);
6076 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6077 ins_encode %{
6078 __ movdbl($dst$$XMMRegister, $constantaddress($con));
6079 %}
6080 ins_pipe(pipe_slow);
6081 %}
6082
6083 // The instruction usage is guarded by predicate in operand immD0().
6084 instruct loadConD0(regD dst, immD0 src) %{
6085 match(Set dst src);
6086 ins_cost(100);
6087 format %{ "XORPD $dst,$dst\t# double 0.0" %}
6088 ins_encode %{
6089 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6090 %}
6091 ins_pipe( pipe_slow );
6092 %}
6093
6094 // Load Stack Slot
6095 instruct loadSSI(rRegI dst, stackSlotI src) %{
6096 match(Set dst src);
6097 ins_cost(125);
6098
6099 format %{ "MOV $dst,$src" %}
6100 opcode(0x8B);
6101 ins_encode( OpcP, RegMem(dst,src));
6102 ins_pipe( ialu_reg_mem );
6103 %}
6104
6105 instruct loadSSL(eRegL dst, stackSlotL src) %{
6106 match(Set dst src);
6107
6108 ins_cost(200);
6109 format %{ "MOV $dst,$src.lo\n\t"
6110 "MOV $dst+4,$src.hi" %}
6111 opcode(0x8B, 0x8B);
6112 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6113 ins_pipe( ialu_mem_long_reg );
6114 %}
6115
6116 // Load Stack Slot
6117 instruct loadSSP(eRegP dst, stackSlotP src) %{
6118 match(Set dst src);
6119 ins_cost(125);
6120
6121 format %{ "MOV $dst,$src" %}
6122 opcode(0x8B);
6123 ins_encode( OpcP, RegMem(dst,src));
6124 ins_pipe( ialu_reg_mem );
6125 %}
6126
6127 // Load Stack Slot
6128 instruct loadSSF(regFPR dst, stackSlotF src) %{
6129 match(Set dst src);
6130 ins_cost(125);
6131
6132 format %{ "FLD_S $src\n\t"
6133 "FSTP $dst" %}
6134 opcode(0xD9); /* D9 /0, FLD m32real */
6135 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6136 Pop_Reg_FPR(dst) );
6137 ins_pipe( fpu_reg_mem );
6138 %}
6139
6140 // Load Stack Slot
6141 instruct loadSSD(regDPR dst, stackSlotD src) %{
6142 match(Set dst src);
6143 ins_cost(125);
6144
6145 format %{ "FLD_D $src\n\t"
6146 "FSTP $dst" %}
6147 opcode(0xDD); /* DD /0, FLD m64real */
6148 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6149 Pop_Reg_DPR(dst) );
6150 ins_pipe( fpu_reg_mem );
6151 %}
6152
6153 // Prefetch instructions for allocation.
6154 // Must be safe to execute with invalid address (cannot fault).
6155
6156 instruct prefetchAlloc0( memory mem ) %{
6157 predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6158 match(PrefetchAllocation mem);
6159 ins_cost(0);
6160 size(0);
6161 format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6162 ins_encode();
6163 ins_pipe(empty);
6164 %}
6165
6166 instruct prefetchAlloc( memory mem ) %{
6167 predicate(AllocatePrefetchInstr==3);
6168 match( PrefetchAllocation mem );
6169 ins_cost(100);
6170
6171 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6172 ins_encode %{
6173 __ prefetchw($mem$$Address);
6174 %}
6175 ins_pipe(ialu_mem);
6176 %}
6177
6178 instruct prefetchAllocNTA( memory mem ) %{
6179 predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6180 match(PrefetchAllocation mem);
6181 ins_cost(100);
6182
6183 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6184 ins_encode %{
6185 __ prefetchnta($mem$$Address);
6186 %}
6187 ins_pipe(ialu_mem);
6188 %}
6189
6190 instruct prefetchAllocT0( memory mem ) %{
6191 predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6192 match(PrefetchAllocation mem);
6193 ins_cost(100);
6194
6195 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6196 ins_encode %{
6197 __ prefetcht0($mem$$Address);
6198 %}
6199 ins_pipe(ialu_mem);
6200 %}
6201
6202 instruct prefetchAllocT2( memory mem ) %{
6203 predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6204 match(PrefetchAllocation mem);
6205 ins_cost(100);
6206
6207 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6208 ins_encode %{
6209 __ prefetcht2($mem$$Address);
6210 %}
6211 ins_pipe(ialu_mem);
6212 %}
6213
6214 //----------Store Instructions-------------------------------------------------
6215
6216 // Store Byte
6217 instruct storeB(memory mem, xRegI src) %{
6218 match(Set mem (StoreB mem src));
6219
6220 ins_cost(125);
6221 format %{ "MOV8 $mem,$src" %}
6222 opcode(0x88);
6223 ins_encode( OpcP, RegMem( src, mem ) );
6224 ins_pipe( ialu_mem_reg );
6225 %}
6226
6227 // Store Char/Short
6228 instruct storeC(memory mem, rRegI src) %{
6229 match(Set mem (StoreC mem src));
6230
6231 ins_cost(125);
6232 format %{ "MOV16 $mem,$src" %}
6233 opcode(0x89, 0x66);
6234 ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6235 ins_pipe( ialu_mem_reg );
6236 %}
6237
6238 // Store Integer
6239 instruct storeI(memory mem, rRegI src) %{
6240 match(Set mem (StoreI mem src));
6241
6242 ins_cost(125);
6243 format %{ "MOV $mem,$src" %}
6244 opcode(0x89);
6245 ins_encode( OpcP, RegMem( src, mem ) );
6246 ins_pipe( ialu_mem_reg );
6247 %}
6248
6249 // Store Long
6250 instruct storeL(long_memory mem, eRegL src) %{
6251 predicate(!((StoreLNode*)n)->require_atomic_access());
6252 match(Set mem (StoreL mem src));
6253
6254 ins_cost(200);
6255 format %{ "MOV $mem,$src.lo\n\t"
6256 "MOV $mem+4,$src.hi" %}
6257 opcode(0x89, 0x89);
6258 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6259 ins_pipe( ialu_mem_long_reg );
6260 %}
6261
6262 // Store Long to Integer
6263 instruct storeL2I(memory mem, eRegL src) %{
6264 match(Set mem (StoreI mem (ConvL2I src)));
6265
6266 format %{ "MOV $mem,$src.lo\t# long -> int" %}
6267 ins_encode %{
6268 __ movl($mem$$Address, $src$$Register);
6269 %}
6270 ins_pipe(ialu_mem_reg);
6271 %}
6272
6273 // Volatile Store Long. Must be atomic, so move it into
6274 // the FP TOS and then do a 64-bit FIST. Has to probe the
6275 // target address before the store (for null-ptr checks)
6276 // so the memory operand is used twice in the encoding.
6277 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6278 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6279 match(Set mem (StoreL mem src));
6280 effect( KILL cr );
6281 ins_cost(400);
6282 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6283 "FILD $src\n\t"
6284 "FISTp $mem\t # 64-bit atomic volatile long store" %}
6285 opcode(0x3B);
6286 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6287 ins_pipe( fpu_reg_mem );
6288 %}
6289
6290 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6291 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6292 match(Set mem (StoreL mem src));
6293 effect( TEMP tmp, KILL cr );
6294 ins_cost(380);
6295 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6296 "MOVSD $tmp,$src\n\t"
6297 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6298 ins_encode %{
6299 __ cmpl(rax, $mem$$Address);
6300 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6301 __ movdbl($mem$$Address, $tmp$$XMMRegister);
6302 %}
6303 ins_pipe( pipe_slow );
6304 %}
6305
6306 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6307 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6308 match(Set mem (StoreL mem src));
6309 effect( TEMP tmp2 , TEMP tmp, KILL cr );
6310 ins_cost(360);
6311 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6312 "MOVD $tmp,$src.lo\n\t"
6313 "MOVD $tmp2,$src.hi\n\t"
6314 "PUNPCKLDQ $tmp,$tmp2\n\t"
6315 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6316 ins_encode %{
6317 __ cmpl(rax, $mem$$Address);
6318 __ movdl($tmp$$XMMRegister, $src$$Register);
6319 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6320 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6321 __ movdbl($mem$$Address, $tmp$$XMMRegister);
6322 %}
6323 ins_pipe( pipe_slow );
6324 %}
6325
6326 // Store Pointer; for storing unknown oops and raw pointers
6327 instruct storeP(memory mem, anyRegP src) %{
6328 match(Set mem (StoreP mem src));
6329
6330 ins_cost(125);
6331 format %{ "MOV $mem,$src" %}
6332 opcode(0x89);
6333 ins_encode( OpcP, RegMem( src, mem ) );
6334 ins_pipe( ialu_mem_reg );
6335 %}
6336
6337 // Store Integer Immediate
6338 instruct storeImmI(memory mem, immI src) %{
6339 match(Set mem (StoreI mem src));
6340
6341 ins_cost(150);
6342 format %{ "MOV $mem,$src" %}
6343 opcode(0xC7); /* C7 /0 */
6344 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
6345 ins_pipe( ialu_mem_imm );
6346 %}
6347
6348 // Store Short/Char Immediate
6349 instruct storeImmI16(memory mem, immI16 src) %{
6350 predicate(UseStoreImmI16);
6351 match(Set mem (StoreC mem src));
6352
6353 ins_cost(150);
6354 format %{ "MOV16 $mem,$src" %}
6355 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6356 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src ));
6357 ins_pipe( ialu_mem_imm );
6358 %}
6359
6360 // Store Pointer Immediate; null pointers or constant oops that do not
6361 // need card-mark barriers.
6362 instruct storeImmP(memory mem, immP src) %{
6363 match(Set mem (StoreP mem src));
6364
6365 ins_cost(150);
6366 format %{ "MOV $mem,$src" %}
6367 opcode(0xC7); /* C7 /0 */
6368 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
6369 ins_pipe( ialu_mem_imm );
6370 %}
6371
6372 // Store Byte Immediate
6373 instruct storeImmB(memory mem, immI8 src) %{
6374 match(Set mem (StoreB mem src));
6375
6376 ins_cost(150);
6377 format %{ "MOV8 $mem,$src" %}
6378 opcode(0xC6); /* C6 /0 */
6379 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
6380 ins_pipe( ialu_mem_imm );
6381 %}
6382
6383 // Store CMS card-mark Immediate
6384 instruct storeImmCM(memory mem, immI8 src) %{
6385 match(Set mem (StoreCM mem src));
6386
6387 ins_cost(150);
6388 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
6389 opcode(0xC6); /* C6 /0 */
6390 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
6391 ins_pipe( ialu_mem_imm );
6392 %}
6393
6394 // Store Double
6395 instruct storeDPR( memory mem, regDPR1 src) %{
6396 predicate(UseSSE<=1);
6397 match(Set mem (StoreD mem src));
6398
6399 ins_cost(100);
6400 format %{ "FST_D $mem,$src" %}
6401 opcode(0xDD); /* DD /2 */
6402 ins_encode( enc_FPR_store(mem,src) );
6403 ins_pipe( fpu_mem_reg );
6404 %}
6405
6406 // Store double does rounding on x86
6407 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6408 predicate(UseSSE<=1);
6409 match(Set mem (StoreD mem (RoundDouble src)));
6410
6411 ins_cost(100);
6412 format %{ "FST_D $mem,$src\t# round" %}
6413 opcode(0xDD); /* DD /2 */
6414 ins_encode( enc_FPR_store(mem,src) );
6415 ins_pipe( fpu_mem_reg );
6416 %}
6417
6418 // Store XMM register to memory (double-precision floating points)
6419 // MOVSD instruction
6420 instruct storeD(memory mem, regD src) %{
6421 predicate(UseSSE>=2);
6422 match(Set mem (StoreD mem src));
6423 ins_cost(95);
6424 format %{ "MOVSD $mem,$src" %}
6425 ins_encode %{
6426 __ movdbl($mem$$Address, $src$$XMMRegister);
6427 %}
6428 ins_pipe( pipe_slow );
6429 %}
6430
6431 // Load Double
6432 instruct MoveD2VL(vlRegD dst, regD src) %{
6433 match(Set dst src);
6434 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6435 ins_encode %{
6436 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6437 %}
6438 ins_pipe( fpu_reg_reg );
6439 %}
6440
6441 // Load Double
6442 instruct MoveVL2D(regD dst, vlRegD src) %{
6443 match(Set dst src);
6444 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6445 ins_encode %{
6446 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6447 %}
6448 ins_pipe( fpu_reg_reg );
6449 %}
6450
6451 // Store XMM register to memory (single-precision floating point)
6452 // MOVSS instruction
6453 instruct storeF(memory mem, regF src) %{
6454 predicate(UseSSE>=1);
6455 match(Set mem (StoreF mem src));
6456 ins_cost(95);
6457 format %{ "MOVSS $mem,$src" %}
6458 ins_encode %{
6459 __ movflt($mem$$Address, $src$$XMMRegister);
6460 %}
6461 ins_pipe( pipe_slow );
6462 %}
6463
6464 // Load Float
6465 instruct MoveF2VL(vlRegF dst, regF src) %{
6466 match(Set dst src);
6467 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6468 ins_encode %{
6469 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6470 %}
6471 ins_pipe( fpu_reg_reg );
6472 %}
6473
6474 // Load Float
6475 instruct MoveVL2F(regF dst, vlRegF src) %{
6476 match(Set dst src);
6477 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6478 ins_encode %{
6479 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6480 %}
6481 ins_pipe( fpu_reg_reg );
6482 %}
6483
6484 // Store Float
6485 instruct storeFPR( memory mem, regFPR1 src) %{
6486 predicate(UseSSE==0);
6487 match(Set mem (StoreF mem src));
6488
6489 ins_cost(100);
6490 format %{ "FST_S $mem,$src" %}
6491 opcode(0xD9); /* D9 /2 */
6492 ins_encode( enc_FPR_store(mem,src) );
6493 ins_pipe( fpu_mem_reg );
6494 %}
6495
6496 // Store Float does rounding on x86
6497 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6498 predicate(UseSSE==0);
6499 match(Set mem (StoreF mem (RoundFloat src)));
6500
6501 ins_cost(100);
6502 format %{ "FST_S $mem,$src\t# round" %}
6503 opcode(0xD9); /* D9 /2 */
6504 ins_encode( enc_FPR_store(mem,src) );
6505 ins_pipe( fpu_mem_reg );
6506 %}
6507
6508 // Store Float does rounding on x86
6509 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6510 predicate(UseSSE<=1);
6511 match(Set mem (StoreF mem (ConvD2F src)));
6512
6513 ins_cost(100);
6514 format %{ "FST_S $mem,$src\t# D-round" %}
6515 opcode(0xD9); /* D9 /2 */
6516 ins_encode( enc_FPR_store(mem,src) );
6517 ins_pipe( fpu_mem_reg );
6518 %}
6519
6520 // Store immediate Float value (it is faster than store from FPU register)
6521 // The instruction usage is guarded by predicate in operand immFPR().
6522 instruct storeFPR_imm( memory mem, immFPR src) %{
6523 match(Set mem (StoreF mem src));
6524
6525 ins_cost(50);
6526 format %{ "MOV $mem,$src\t# store float" %}
6527 opcode(0xC7); /* C7 /0 */
6528 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src ));
6529 ins_pipe( ialu_mem_imm );
6530 %}
6531
6532 // Store immediate Float value (it is faster than store from XMM register)
6533 // The instruction usage is guarded by predicate in operand immF().
6534 instruct storeF_imm( memory mem, immF src) %{
6535 match(Set mem (StoreF mem src));
6536
6537 ins_cost(50);
6538 format %{ "MOV $mem,$src\t# store float" %}
6539 opcode(0xC7); /* C7 /0 */
6540 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
6541 ins_pipe( ialu_mem_imm );
6542 %}
6543
6544 // Store Integer to stack slot
6545 instruct storeSSI(stackSlotI dst, rRegI src) %{
6546 match(Set dst src);
6547
6548 ins_cost(100);
6549 format %{ "MOV $dst,$src" %}
6550 opcode(0x89);
6551 ins_encode( OpcPRegSS( dst, src ) );
6552 ins_pipe( ialu_mem_reg );
6553 %}
6554
6555 // Store Integer to stack slot
6556 instruct storeSSP(stackSlotP dst, eRegP src) %{
6557 match(Set dst src);
6558
6559 ins_cost(100);
6560 format %{ "MOV $dst,$src" %}
6561 opcode(0x89);
6562 ins_encode( OpcPRegSS( dst, src ) );
6563 ins_pipe( ialu_mem_reg );
6564 %}
6565
6566 // Store Long to stack slot
6567 instruct storeSSL(stackSlotL dst, eRegL src) %{
6568 match(Set dst src);
6569
6570 ins_cost(200);
6571 format %{ "MOV $dst,$src.lo\n\t"
6572 "MOV $dst+4,$src.hi" %}
6573 opcode(0x89, 0x89);
6574 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6575 ins_pipe( ialu_mem_long_reg );
6576 %}
6577
6578 //----------MemBar Instructions-----------------------------------------------
6579 // Memory barrier flavors
6580
6581 instruct membar_acquire() %{
6582 match(MemBarAcquire);
6583 match(LoadFence);
6584 ins_cost(400);
6585
6586 size(0);
6587 format %{ "MEMBAR-acquire ! (empty encoding)" %}
6588 ins_encode();
6589 ins_pipe(empty);
6590 %}
6591
6592 instruct membar_acquire_lock() %{
6593 match(MemBarAcquireLock);
6594 ins_cost(0);
6595
6596 size(0);
6597 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6598 ins_encode( );
6599 ins_pipe(empty);
6600 %}
6601
6602 instruct membar_release() %{
6603 match(MemBarRelease);
6604 match(StoreFence);
6605 ins_cost(400);
6606
6607 size(0);
6608 format %{ "MEMBAR-release ! (empty encoding)" %}
6609 ins_encode( );
6610 ins_pipe(empty);
6611 %}
6612
6613 instruct membar_release_lock() %{
6614 match(MemBarReleaseLock);
6615 ins_cost(0);
6616
6617 size(0);
6618 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6619 ins_encode( );
6620 ins_pipe(empty);
6621 %}
6622
6623 instruct membar_volatile(eFlagsReg cr) %{
6624 match(MemBarVolatile);
6625 effect(KILL cr);
6626 ins_cost(400);
6627
6628 format %{
6629 $$template
6630 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6631 %}
6632 ins_encode %{
6633 __ membar(Assembler::StoreLoad);
6634 %}
6635 ins_pipe(pipe_slow);
6636 %}
6637
6638 instruct unnecessary_membar_volatile() %{
6639 match(MemBarVolatile);
6640 predicate(Matcher::post_store_load_barrier(n));
6641 ins_cost(0);
6642
6643 size(0);
6644 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6645 ins_encode( );
6646 ins_pipe(empty);
6647 %}
6648
6649 instruct membar_storestore() %{
6650 match(MemBarStoreStore);
6651 match(StoreStoreFence);
6652 ins_cost(0);
6653
6654 size(0);
6655 format %{ "MEMBAR-storestore (empty encoding)" %}
6656 ins_encode( );
6657 ins_pipe(empty);
6658 %}
6659
6660 //----------Move Instructions--------------------------------------------------
6661 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6662 match(Set dst (CastX2P src));
6663 format %{ "# X2P $dst, $src" %}
6664 ins_encode( /*empty encoding*/ );
6665 ins_cost(0);
6666 ins_pipe(empty);
6667 %}
6668
6669 instruct castP2X(rRegI dst, eRegP src ) %{
6670 match(Set dst (CastP2X src));
6671 ins_cost(50);
6672 format %{ "MOV $dst, $src\t# CastP2X" %}
6673 ins_encode( enc_Copy( dst, src) );
6674 ins_pipe( ialu_reg_reg );
6675 %}
6676
6677 //----------Conditional Move---------------------------------------------------
6678 // Conditional move
6679 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6680 predicate(!VM_Version::supports_cmov() );
6681 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6682 ins_cost(200);
6683 format %{ "J$cop,us skip\t# signed cmove\n\t"
6684 "MOV $dst,$src\n"
6685 "skip:" %}
6686 ins_encode %{
6687 Label Lskip;
6688 // Invert sense of branch from sense of CMOV
6689 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6690 __ movl($dst$$Register, $src$$Register);
6691 __ bind(Lskip);
6692 %}
6693 ins_pipe( pipe_cmov_reg );
6694 %}
6695
6696 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6697 predicate(!VM_Version::supports_cmov() );
6698 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6699 ins_cost(200);
6700 format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6701 "MOV $dst,$src\n"
6702 "skip:" %}
6703 ins_encode %{
6704 Label Lskip;
6705 // Invert sense of branch from sense of CMOV
6706 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6707 __ movl($dst$$Register, $src$$Register);
6708 __ bind(Lskip);
6709 %}
6710 ins_pipe( pipe_cmov_reg );
6711 %}
6712
6713 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6714 predicate(VM_Version::supports_cmov() );
6715 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6716 ins_cost(200);
6717 format %{ "CMOV$cop $dst,$src" %}
6718 opcode(0x0F,0x40);
6719 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6720 ins_pipe( pipe_cmov_reg );
6721 %}
6722
6723 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6724 predicate(VM_Version::supports_cmov() );
6725 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6726 ins_cost(200);
6727 format %{ "CMOV$cop $dst,$src" %}
6728 opcode(0x0F,0x40);
6729 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6730 ins_pipe( pipe_cmov_reg );
6731 %}
6732
6733 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6734 predicate(VM_Version::supports_cmov() );
6735 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6736 ins_cost(200);
6737 expand %{
6738 cmovI_regU(cop, cr, dst, src);
6739 %}
6740 %}
6741
6742 // Conditional move
6743 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6744 predicate(VM_Version::supports_cmov() );
6745 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6746 ins_cost(250);
6747 format %{ "CMOV$cop $dst,$src" %}
6748 opcode(0x0F,0x40);
6749 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6750 ins_pipe( pipe_cmov_mem );
6751 %}
6752
6753 // Conditional move
6754 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6755 predicate(VM_Version::supports_cmov() );
6756 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6757 ins_cost(250);
6758 format %{ "CMOV$cop $dst,$src" %}
6759 opcode(0x0F,0x40);
6760 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6761 ins_pipe( pipe_cmov_mem );
6762 %}
6763
6764 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6765 predicate(VM_Version::supports_cmov() );
6766 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6767 ins_cost(250);
6768 expand %{
6769 cmovI_memU(cop, cr, dst, src);
6770 %}
6771 %}
6772
6773 // Conditional move
6774 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6775 predicate(VM_Version::supports_cmov() );
6776 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6777 ins_cost(200);
6778 format %{ "CMOV$cop $dst,$src\t# ptr" %}
6779 opcode(0x0F,0x40);
6780 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6781 ins_pipe( pipe_cmov_reg );
6782 %}
6783
6784 // Conditional move (non-P6 version)
6785 // Note: a CMoveP is generated for stubs and native wrappers
6786 // regardless of whether we are on a P6, so we
6787 // emulate a cmov here
6788 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6789 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6790 ins_cost(300);
6791 format %{ "Jn$cop skip\n\t"
6792 "MOV $dst,$src\t# pointer\n"
6793 "skip:" %}
6794 opcode(0x8b);
6795 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6796 ins_pipe( pipe_cmov_reg );
6797 %}
6798
6799 // Conditional move
6800 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6801 predicate(VM_Version::supports_cmov() );
6802 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6803 ins_cost(200);
6804 format %{ "CMOV$cop $dst,$src\t# ptr" %}
6805 opcode(0x0F,0x40);
6806 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6807 ins_pipe( pipe_cmov_reg );
6808 %}
6809
6810 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6811 predicate(VM_Version::supports_cmov() );
6812 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6813 ins_cost(200);
6814 expand %{
6815 cmovP_regU(cop, cr, dst, src);
6816 %}
6817 %}
6818
6819 // DISABLED: Requires the ADLC to emit a bottom_type call that
6820 // correctly meets the two pointer arguments; one is an incoming
6821 // register but the other is a memory operand. ALSO appears to
6822 // be buggy with implicit null checks.
6823 //
6824 //// Conditional move
6825 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6826 // predicate(VM_Version::supports_cmov() );
6827 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6828 // ins_cost(250);
6829 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
6830 // opcode(0x0F,0x40);
6831 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6832 // ins_pipe( pipe_cmov_mem );
6833 //%}
6834 //
6835 //// Conditional move
6836 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6837 // predicate(VM_Version::supports_cmov() );
6838 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6839 // ins_cost(250);
6840 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
6841 // opcode(0x0F,0x40);
6842 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6843 // ins_pipe( pipe_cmov_mem );
6844 //%}
6845
6846 // Conditional move
6847 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6848 predicate(UseSSE<=1);
6849 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6850 ins_cost(200);
6851 format %{ "FCMOV$cop $dst,$src\t# double" %}
6852 opcode(0xDA);
6853 ins_encode( enc_cmov_dpr(cop,src) );
6854 ins_pipe( pipe_cmovDPR_reg );
6855 %}
6856
6857 // Conditional move
6858 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6859 predicate(UseSSE==0);
6860 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6861 ins_cost(200);
6862 format %{ "FCMOV$cop $dst,$src\t# float" %}
6863 opcode(0xDA);
6864 ins_encode( enc_cmov_dpr(cop,src) );
6865 ins_pipe( pipe_cmovDPR_reg );
6866 %}
6867
6868 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6869 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6870 predicate(UseSSE<=1);
6871 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6872 ins_cost(200);
6873 format %{ "Jn$cop skip\n\t"
6874 "MOV $dst,$src\t# double\n"
6875 "skip:" %}
6876 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
6877 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6878 ins_pipe( pipe_cmovDPR_reg );
6879 %}
6880
6881 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6882 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6883 predicate(UseSSE==0);
6884 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6885 ins_cost(200);
6886 format %{ "Jn$cop skip\n\t"
6887 "MOV $dst,$src\t# float\n"
6888 "skip:" %}
6889 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
6890 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6891 ins_pipe( pipe_cmovDPR_reg );
6892 %}
6893
6894 // No CMOVE with SSE/SSE2
6895 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6896 predicate (UseSSE>=1);
6897 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6898 ins_cost(200);
6899 format %{ "Jn$cop skip\n\t"
6900 "MOVSS $dst,$src\t# float\n"
6901 "skip:" %}
6902 ins_encode %{
6903 Label skip;
6904 // Invert sense of branch from sense of CMOV
6905 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6906 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6907 __ bind(skip);
6908 %}
6909 ins_pipe( pipe_slow );
6910 %}
6911
6912 // No CMOVE with SSE/SSE2
6913 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6914 predicate (UseSSE>=2);
6915 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6916 ins_cost(200);
6917 format %{ "Jn$cop skip\n\t"
6918 "MOVSD $dst,$src\t# float\n"
6919 "skip:" %}
6920 ins_encode %{
6921 Label skip;
6922 // Invert sense of branch from sense of CMOV
6923 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6924 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6925 __ bind(skip);
6926 %}
6927 ins_pipe( pipe_slow );
6928 %}
6929
6930 // unsigned version
6931 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6932 predicate (UseSSE>=1);
6933 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6934 ins_cost(200);
6935 format %{ "Jn$cop skip\n\t"
6936 "MOVSS $dst,$src\t# float\n"
6937 "skip:" %}
6938 ins_encode %{
6939 Label skip;
6940 // Invert sense of branch from sense of CMOV
6941 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6942 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6943 __ bind(skip);
6944 %}
6945 ins_pipe( pipe_slow );
6946 %}
6947
6948 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6949 predicate (UseSSE>=1);
6950 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6951 ins_cost(200);
6952 expand %{
6953 fcmovF_regU(cop, cr, dst, src);
6954 %}
6955 %}
6956
6957 // unsigned version
6958 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6959 predicate (UseSSE>=2);
6960 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6961 ins_cost(200);
6962 format %{ "Jn$cop skip\n\t"
6963 "MOVSD $dst,$src\t# float\n"
6964 "skip:" %}
6965 ins_encode %{
6966 Label skip;
6967 // Invert sense of branch from sense of CMOV
6968 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6969 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6970 __ bind(skip);
6971 %}
6972 ins_pipe( pipe_slow );
6973 %}
6974
6975 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6976 predicate (UseSSE>=2);
6977 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6978 ins_cost(200);
6979 expand %{
6980 fcmovD_regU(cop, cr, dst, src);
6981 %}
6982 %}
6983
6984 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6985 predicate(VM_Version::supports_cmov() );
6986 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6987 ins_cost(200);
6988 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6989 "CMOV$cop $dst.hi,$src.hi" %}
6990 opcode(0x0F,0x40);
6991 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6992 ins_pipe( pipe_cmov_reg_long );
6993 %}
6994
6995 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6996 predicate(VM_Version::supports_cmov() );
6997 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6998 ins_cost(200);
6999 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7000 "CMOV$cop $dst.hi,$src.hi" %}
7001 opcode(0x0F,0x40);
7002 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7003 ins_pipe( pipe_cmov_reg_long );
7004 %}
7005
7006 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7007 predicate(VM_Version::supports_cmov() );
7008 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7009 ins_cost(200);
7010 expand %{
7011 cmovL_regU(cop, cr, dst, src);
7012 %}
7013 %}
7014
7015 //----------Arithmetic Instructions--------------------------------------------
7016 //----------Addition Instructions----------------------------------------------
7017
7018 // Integer Addition Instructions
7019 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7020 match(Set dst (AddI dst src));
7021 effect(KILL cr);
7022
7023 size(2);
7024 format %{ "ADD $dst,$src" %}
7025 opcode(0x03);
7026 ins_encode( OpcP, RegReg( dst, src) );
7027 ins_pipe( ialu_reg_reg );
7028 %}
7029
7030 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7031 match(Set dst (AddI dst src));
7032 effect(KILL cr);
7033
7034 format %{ "ADD $dst,$src" %}
7035 opcode(0x81, 0x00); /* /0 id */
7036 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7037 ins_pipe( ialu_reg );
7038 %}
7039
7040 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
7041 predicate(UseIncDec);
7042 match(Set dst (AddI dst src));
7043 effect(KILL cr);
7044
7045 size(1);
7046 format %{ "INC $dst" %}
7047 opcode(0x40); /* */
7048 ins_encode( Opc_plus( primary, dst ) );
7049 ins_pipe( ialu_reg );
7050 %}
7051
7052 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7053 match(Set dst (AddI src0 src1));
7054 ins_cost(110);
7055
7056 format %{ "LEA $dst,[$src0 + $src1]" %}
7057 opcode(0x8D); /* 0x8D /r */
7058 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7059 ins_pipe( ialu_reg_reg );
7060 %}
7061
7062 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7063 match(Set dst (AddP src0 src1));
7064 ins_cost(110);
7065
7066 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %}
7067 opcode(0x8D); /* 0x8D /r */
7068 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7069 ins_pipe( ialu_reg_reg );
7070 %}
7071
7072 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7073 predicate(UseIncDec);
7074 match(Set dst (AddI dst src));
7075 effect(KILL cr);
7076
7077 size(1);
7078 format %{ "DEC $dst" %}
7079 opcode(0x48); /* */
7080 ins_encode( Opc_plus( primary, dst ) );
7081 ins_pipe( ialu_reg );
7082 %}
7083
7084 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7085 match(Set dst (AddP dst src));
7086 effect(KILL cr);
7087
7088 size(2);
7089 format %{ "ADD $dst,$src" %}
7090 opcode(0x03);
7091 ins_encode( OpcP, RegReg( dst, src) );
7092 ins_pipe( ialu_reg_reg );
7093 %}
7094
7095 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7096 match(Set dst (AddP dst src));
7097 effect(KILL cr);
7098
7099 format %{ "ADD $dst,$src" %}
7100 opcode(0x81,0x00); /* Opcode 81 /0 id */
7101 // ins_encode( RegImm( dst, src) );
7102 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7103 ins_pipe( ialu_reg );
7104 %}
7105
7106 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7107 match(Set dst (AddI dst (LoadI src)));
7108 effect(KILL cr);
7109
7110 ins_cost(125);
7111 format %{ "ADD $dst,$src" %}
7112 opcode(0x03);
7113 ins_encode( OpcP, RegMem( dst, src) );
7114 ins_pipe( ialu_reg_mem );
7115 %}
7116
7117 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7118 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7119 effect(KILL cr);
7120
7121 ins_cost(150);
7122 format %{ "ADD $dst,$src" %}
7123 opcode(0x01); /* Opcode 01 /r */
7124 ins_encode( OpcP, RegMem( src, dst ) );
7125 ins_pipe( ialu_mem_reg );
7126 %}
7127
7128 // Add Memory with Immediate
7129 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7130 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7131 effect(KILL cr);
7132
7133 ins_cost(125);
7134 format %{ "ADD $dst,$src" %}
7135 opcode(0x81); /* Opcode 81 /0 id */
7136 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7137 ins_pipe( ialu_mem_imm );
7138 %}
7139
7140 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
7141 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7142 effect(KILL cr);
7143
7144 ins_cost(125);
7145 format %{ "INC $dst" %}
7146 opcode(0xFF); /* Opcode FF /0 */
7147 ins_encode( OpcP, RMopc_Mem(0x00,dst));
7148 ins_pipe( ialu_mem_imm );
7149 %}
7150
7151 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7152 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7153 effect(KILL cr);
7154
7155 ins_cost(125);
7156 format %{ "DEC $dst" %}
7157 opcode(0xFF); /* Opcode FF /1 */
7158 ins_encode( OpcP, RMopc_Mem(0x01,dst));
7159 ins_pipe( ialu_mem_imm );
7160 %}
7161
7162
7163 instruct checkCastPP( eRegP dst ) %{
7164 match(Set dst (CheckCastPP dst));
7165
7166 size(0);
7167 format %{ "#checkcastPP of $dst" %}
7168 ins_encode( /*empty encoding*/ );
7169 ins_pipe( empty );
7170 %}
7171
7172 instruct castPP( eRegP dst ) %{
7173 match(Set dst (CastPP dst));
7174 format %{ "#castPP of $dst" %}
7175 ins_encode( /*empty encoding*/ );
7176 ins_pipe( empty );
7177 %}
7178
7179 instruct castII( rRegI dst ) %{
7180 match(Set dst (CastII dst));
7181 format %{ "#castII of $dst" %}
7182 ins_encode( /*empty encoding*/ );
7183 ins_cost(0);
7184 ins_pipe( empty );
7185 %}
7186
7187 instruct castLL( eRegL dst ) %{
7188 match(Set dst (CastLL dst));
7189 format %{ "#castLL of $dst" %}
7190 ins_encode( /*empty encoding*/ );
7191 ins_cost(0);
7192 ins_pipe( empty );
7193 %}
7194
7195 instruct castFF( regF dst ) %{
7196 predicate(UseSSE >= 1);
7197 match(Set dst (CastFF dst));
7198 format %{ "#castFF of $dst" %}
7199 ins_encode( /*empty encoding*/ );
7200 ins_cost(0);
7201 ins_pipe( empty );
7202 %}
7203
7204 instruct castDD( regD dst ) %{
7205 predicate(UseSSE >= 2);
7206 match(Set dst (CastDD dst));
7207 format %{ "#castDD of $dst" %}
7208 ins_encode( /*empty encoding*/ );
7209 ins_cost(0);
7210 ins_pipe( empty );
7211 %}
7212
7213 instruct castFF_PR( regFPR dst ) %{
7214 predicate(UseSSE < 1);
7215 match(Set dst (CastFF dst));
7216 format %{ "#castFF of $dst" %}
7217 ins_encode( /*empty encoding*/ );
7218 ins_cost(0);
7219 ins_pipe( empty );
7220 %}
7221
7222 instruct castDD_PR( regDPR dst ) %{
7223 predicate(UseSSE < 2);
7224 match(Set dst (CastDD dst));
7225 format %{ "#castDD of $dst" %}
7226 ins_encode( /*empty encoding*/ );
7227 ins_cost(0);
7228 ins_pipe( empty );
7229 %}
7230
7231 // Load-locked - same as a regular pointer load when used with compare-swap
7232 instruct loadPLocked(eRegP dst, memory mem) %{
7233 match(Set dst (LoadPLocked mem));
7234
7235 ins_cost(125);
7236 format %{ "MOV $dst,$mem\t# Load ptr. locked" %}
7237 opcode(0x8B);
7238 ins_encode( OpcP, RegMem(dst,mem));
7239 ins_pipe( ialu_reg_mem );
7240 %}
7241
7242 // Conditional-store of the updated heap-top.
7243 // Used during allocation of the shared heap.
7244 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel.
7245 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7246 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7247 // EAX is killed if there is contention, but then it's also unused.
7248 // In the common case of no contention, EAX holds the new oop address.
7249 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7250 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7251 ins_pipe( pipe_cmpxchg );
7252 %}
7253
7254 // Conditional-store of an int value.
7255 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel.
7256 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7257 match(Set cr (StoreIConditional mem (Binary oldval newval)));
7258 effect(KILL oldval);
7259 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7260 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7261 ins_pipe( pipe_cmpxchg );
7262 %}
7263
7264 // Conditional-store of a long value.
7265 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel.
7266 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7267 match(Set cr (StoreLConditional mem (Binary oldval newval)));
7268 effect(KILL oldval);
7269 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7270 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7271 "XCHG EBX,ECX"
7272 %}
7273 ins_encode %{
7274 // Note: we need to swap rbx, and rcx before and after the
7275 // cmpxchg8 instruction because the instruction uses
7276 // rcx as the high order word of the new value to store but
7277 // our register encoding uses rbx.
7278 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7279 __ lock();
7280 __ cmpxchg8($mem$$Address);
7281 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7282 %}
7283 ins_pipe( pipe_cmpxchg );
7284 %}
7285
7286 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7287
7288 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7289 predicate(VM_Version::supports_cx8());
7290 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7291 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7292 effect(KILL cr, KILL oldval);
7293 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7294 "MOV $res,0\n\t"
7295 "JNE,s fail\n\t"
7296 "MOV $res,1\n"
7297 "fail:" %}
7298 ins_encode( enc_cmpxchg8(mem_ptr),
7299 enc_flags_ne_to_boolean(res) );
7300 ins_pipe( pipe_cmpxchg );
7301 %}
7302
7303 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7304 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7305 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7306 effect(KILL cr, KILL oldval);
7307 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7308 "MOV $res,0\n\t"
7309 "JNE,s fail\n\t"
7310 "MOV $res,1\n"
7311 "fail:" %}
7312 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7313 ins_pipe( pipe_cmpxchg );
7314 %}
7315
7316 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7317 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7318 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7319 effect(KILL cr, KILL oldval);
7320 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7321 "MOV $res,0\n\t"
7322 "JNE,s fail\n\t"
7323 "MOV $res,1\n"
7324 "fail:" %}
7325 ins_encode( enc_cmpxchgb(mem_ptr),
7326 enc_flags_ne_to_boolean(res) );
7327 ins_pipe( pipe_cmpxchg );
7328 %}
7329
7330 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7331 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7332 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7333 effect(KILL cr, KILL oldval);
7334 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7335 "MOV $res,0\n\t"
7336 "JNE,s fail\n\t"
7337 "MOV $res,1\n"
7338 "fail:" %}
7339 ins_encode( enc_cmpxchgw(mem_ptr),
7340 enc_flags_ne_to_boolean(res) );
7341 ins_pipe( pipe_cmpxchg );
7342 %}
7343
7344 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7345 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7346 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7347 effect(KILL cr, KILL oldval);
7348 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7349 "MOV $res,0\n\t"
7350 "JNE,s fail\n\t"
7351 "MOV $res,1\n"
7352 "fail:" %}
7353 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7354 ins_pipe( pipe_cmpxchg );
7355 %}
7356
7357 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7358 predicate(VM_Version::supports_cx8());
7359 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7360 effect(KILL cr);
7361 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7362 ins_encode( enc_cmpxchg8(mem_ptr) );
7363 ins_pipe( pipe_cmpxchg );
7364 %}
7365
7366 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7367 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7368 effect(KILL cr);
7369 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7370 ins_encode( enc_cmpxchg(mem_ptr) );
7371 ins_pipe( pipe_cmpxchg );
7372 %}
7373
7374 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7375 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7376 effect(KILL cr);
7377 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7378 ins_encode( enc_cmpxchgb(mem_ptr) );
7379 ins_pipe( pipe_cmpxchg );
7380 %}
7381
7382 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7383 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7384 effect(KILL cr);
7385 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7386 ins_encode( enc_cmpxchgw(mem_ptr) );
7387 ins_pipe( pipe_cmpxchg );
7388 %}
7389
7390 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7391 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7392 effect(KILL cr);
7393 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7394 ins_encode( enc_cmpxchg(mem_ptr) );
7395 ins_pipe( pipe_cmpxchg );
7396 %}
7397
7398 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7399 predicate(n->as_LoadStore()->result_not_used());
7400 match(Set dummy (GetAndAddB mem add));
7401 effect(KILL cr);
7402 format %{ "ADDB [$mem],$add" %}
7403 ins_encode %{
7404 __ lock();
7405 __ addb($mem$$Address, $add$$constant);
7406 %}
7407 ins_pipe( pipe_cmpxchg );
7408 %}
7409
7410 // Important to match to xRegI: only 8-bit regs.
7411 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7412 match(Set newval (GetAndAddB mem newval));
7413 effect(KILL cr);
7414 format %{ "XADDB [$mem],$newval" %}
7415 ins_encode %{
7416 __ lock();
7417 __ xaddb($mem$$Address, $newval$$Register);
7418 %}
7419 ins_pipe( pipe_cmpxchg );
7420 %}
7421
7422 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7423 predicate(n->as_LoadStore()->result_not_used());
7424 match(Set dummy (GetAndAddS mem add));
7425 effect(KILL cr);
7426 format %{ "ADDS [$mem],$add" %}
7427 ins_encode %{
7428 __ lock();
7429 __ addw($mem$$Address, $add$$constant);
7430 %}
7431 ins_pipe( pipe_cmpxchg );
7432 %}
7433
7434 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7435 match(Set newval (GetAndAddS mem newval));
7436 effect(KILL cr);
7437 format %{ "XADDS [$mem],$newval" %}
7438 ins_encode %{
7439 __ lock();
7440 __ xaddw($mem$$Address, $newval$$Register);
7441 %}
7442 ins_pipe( pipe_cmpxchg );
7443 %}
7444
7445 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7446 predicate(n->as_LoadStore()->result_not_used());
7447 match(Set dummy (GetAndAddI mem add));
7448 effect(KILL cr);
7449 format %{ "ADDL [$mem],$add" %}
7450 ins_encode %{
7451 __ lock();
7452 __ addl($mem$$Address, $add$$constant);
7453 %}
7454 ins_pipe( pipe_cmpxchg );
7455 %}
7456
7457 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7458 match(Set newval (GetAndAddI mem newval));
7459 effect(KILL cr);
7460 format %{ "XADDL [$mem],$newval" %}
7461 ins_encode %{
7462 __ lock();
7463 __ xaddl($mem$$Address, $newval$$Register);
7464 %}
7465 ins_pipe( pipe_cmpxchg );
7466 %}
7467
7468 // Important to match to xRegI: only 8-bit regs.
7469 instruct xchgB( memory mem, xRegI newval) %{
7470 match(Set newval (GetAndSetB mem newval));
7471 format %{ "XCHGB $newval,[$mem]" %}
7472 ins_encode %{
7473 __ xchgb($newval$$Register, $mem$$Address);
7474 %}
7475 ins_pipe( pipe_cmpxchg );
7476 %}
7477
7478 instruct xchgS( memory mem, rRegI newval) %{
7479 match(Set newval (GetAndSetS mem newval));
7480 format %{ "XCHGW $newval,[$mem]" %}
7481 ins_encode %{
7482 __ xchgw($newval$$Register, $mem$$Address);
7483 %}
7484 ins_pipe( pipe_cmpxchg );
7485 %}
7486
7487 instruct xchgI( memory mem, rRegI newval) %{
7488 match(Set newval (GetAndSetI mem newval));
7489 format %{ "XCHGL $newval,[$mem]" %}
7490 ins_encode %{
7491 __ xchgl($newval$$Register, $mem$$Address);
7492 %}
7493 ins_pipe( pipe_cmpxchg );
7494 %}
7495
7496 instruct xchgP( memory mem, pRegP newval) %{
7497 match(Set newval (GetAndSetP mem newval));
7498 format %{ "XCHGL $newval,[$mem]" %}
7499 ins_encode %{
7500 __ xchgl($newval$$Register, $mem$$Address);
7501 %}
7502 ins_pipe( pipe_cmpxchg );
7503 %}
7504
7505 //----------Subtraction Instructions-------------------------------------------
7506
7507 // Integer Subtraction Instructions
7508 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7509 match(Set dst (SubI dst src));
7510 effect(KILL cr);
7511
7512 size(2);
7513 format %{ "SUB $dst,$src" %}
7514 opcode(0x2B);
7515 ins_encode( OpcP, RegReg( dst, src) );
7516 ins_pipe( ialu_reg_reg );
7517 %}
7518
7519 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7520 match(Set dst (SubI dst src));
7521 effect(KILL cr);
7522
7523 format %{ "SUB $dst,$src" %}
7524 opcode(0x81,0x05); /* Opcode 81 /5 */
7525 // ins_encode( RegImm( dst, src) );
7526 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7527 ins_pipe( ialu_reg );
7528 %}
7529
7530 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7531 match(Set dst (SubI dst (LoadI src)));
7532 effect(KILL cr);
7533
7534 ins_cost(125);
7535 format %{ "SUB $dst,$src" %}
7536 opcode(0x2B);
7537 ins_encode( OpcP, RegMem( dst, src) );
7538 ins_pipe( ialu_reg_mem );
7539 %}
7540
7541 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7542 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7543 effect(KILL cr);
7544
7545 ins_cost(150);
7546 format %{ "SUB $dst,$src" %}
7547 opcode(0x29); /* Opcode 29 /r */
7548 ins_encode( OpcP, RegMem( src, dst ) );
7549 ins_pipe( ialu_mem_reg );
7550 %}
7551
7552 // Subtract from a pointer
7553 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
7554 match(Set dst (AddP dst (SubI zero src)));
7555 effect(KILL cr);
7556
7557 size(2);
7558 format %{ "SUB $dst,$src" %}
7559 opcode(0x2B);
7560 ins_encode( OpcP, RegReg( dst, src) );
7561 ins_pipe( ialu_reg_reg );
7562 %}
7563
7564 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
7565 match(Set dst (SubI zero dst));
7566 effect(KILL cr);
7567
7568 size(2);
7569 format %{ "NEG $dst" %}
7570 opcode(0xF7,0x03); // Opcode F7 /3
7571 ins_encode( OpcP, RegOpc( dst ) );
7572 ins_pipe( ialu_reg );
7573 %}
7574
7575 //----------Multiplication/Division Instructions-------------------------------
7576 // Integer Multiplication Instructions
7577 // Multiply Register
7578 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7579 match(Set dst (MulI dst src));
7580 effect(KILL cr);
7581
7582 size(3);
7583 ins_cost(300);
7584 format %{ "IMUL $dst,$src" %}
7585 opcode(0xAF, 0x0F);
7586 ins_encode( OpcS, OpcP, RegReg( dst, src) );
7587 ins_pipe( ialu_reg_reg_alu0 );
7588 %}
7589
7590 // Multiply 32-bit Immediate
7591 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7592 match(Set dst (MulI src imm));
7593 effect(KILL cr);
7594
7595 ins_cost(300);
7596 format %{ "IMUL $dst,$src,$imm" %}
7597 opcode(0x69); /* 69 /r id */
7598 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7599 ins_pipe( ialu_reg_reg_alu0 );
7600 %}
7601
7602 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7603 match(Set dst src);
7604 effect(KILL cr);
7605
7606 // Note that this is artificially increased to make it more expensive than loadConL
7607 ins_cost(250);
7608 format %{ "MOV EAX,$src\t// low word only" %}
7609 opcode(0xB8);
7610 ins_encode( LdImmL_Lo(dst, src) );
7611 ins_pipe( ialu_reg_fat );
7612 %}
7613
7614 // Multiply by 32-bit Immediate, taking the shifted high order results
7615 // (special case for shift by 32)
7616 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7617 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7618 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7619 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7620 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7621 effect(USE src1, KILL cr);
7622
7623 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7624 ins_cost(0*100 + 1*400 - 150);
7625 format %{ "IMUL EDX:EAX,$src1" %}
7626 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7627 ins_pipe( pipe_slow );
7628 %}
7629
7630 // Multiply by 32-bit Immediate, taking the shifted high order results
7631 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7632 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7633 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7634 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7635 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7636 effect(USE src1, KILL cr);
7637
7638 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7639 ins_cost(1*100 + 1*400 - 150);
7640 format %{ "IMUL EDX:EAX,$src1\n\t"
7641 "SAR EDX,$cnt-32" %}
7642 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7643 ins_pipe( pipe_slow );
7644 %}
7645
7646 // Multiply Memory 32-bit Immediate
7647 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7648 match(Set dst (MulI (LoadI src) imm));
7649 effect(KILL cr);
7650
7651 ins_cost(300);
7652 format %{ "IMUL $dst,$src,$imm" %}
7653 opcode(0x69); /* 69 /r id */
7654 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7655 ins_pipe( ialu_reg_mem_alu0 );
7656 %}
7657
7658 // Multiply Memory
7659 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7660 match(Set dst (MulI dst (LoadI src)));
7661 effect(KILL cr);
7662
7663 ins_cost(350);
7664 format %{ "IMUL $dst,$src" %}
7665 opcode(0xAF, 0x0F);
7666 ins_encode( OpcS, OpcP, RegMem( dst, src) );
7667 ins_pipe( ialu_reg_mem_alu0 );
7668 %}
7669
7670 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7671 %{
7672 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7673 effect(KILL cr, KILL src2);
7674
7675 expand %{ mulI_eReg(dst, src1, cr);
7676 mulI_eReg(src2, src3, cr);
7677 addI_eReg(dst, src2, cr); %}
7678 %}
7679
7680 // Multiply Register Int to Long
7681 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7682 // Basic Idea: long = (long)int * (long)int
7683 match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7684 effect(DEF dst, USE src, USE src1, KILL flags);
7685
7686 ins_cost(300);
7687 format %{ "IMUL $dst,$src1" %}
7688
7689 ins_encode( long_int_multiply( dst, src1 ) );
7690 ins_pipe( ialu_reg_reg_alu0 );
7691 %}
7692
7693 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7694 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
7695 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7696 effect(KILL flags);
7697
7698 ins_cost(300);
7699 format %{ "MUL $dst,$src1" %}
7700
7701 ins_encode( long_uint_multiply(dst, src1) );
7702 ins_pipe( ialu_reg_reg_alu0 );
7703 %}
7704
7705 // Multiply Register Long
7706 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7707 match(Set dst (MulL dst src));
7708 effect(KILL cr, TEMP tmp);
7709 ins_cost(4*100+3*400);
7710 // Basic idea: lo(result) = lo(x_lo * y_lo)
7711 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7712 format %{ "MOV $tmp,$src.lo\n\t"
7713 "IMUL $tmp,EDX\n\t"
7714 "MOV EDX,$src.hi\n\t"
7715 "IMUL EDX,EAX\n\t"
7716 "ADD $tmp,EDX\n\t"
7717 "MUL EDX:EAX,$src.lo\n\t"
7718 "ADD EDX,$tmp" %}
7719 ins_encode( long_multiply( dst, src, tmp ) );
7720 ins_pipe( pipe_slow );
7721 %}
7722
7723 // Multiply Register Long where the left operand's high 32 bits are zero
7724 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7725 predicate(is_operand_hi32_zero(n->in(1)));
7726 match(Set dst (MulL dst src));
7727 effect(KILL cr, TEMP tmp);
7728 ins_cost(2*100+2*400);
7729 // Basic idea: lo(result) = lo(x_lo * y_lo)
7730 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7731 format %{ "MOV $tmp,$src.hi\n\t"
7732 "IMUL $tmp,EAX\n\t"
7733 "MUL EDX:EAX,$src.lo\n\t"
7734 "ADD EDX,$tmp" %}
7735 ins_encode %{
7736 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7737 __ imull($tmp$$Register, rax);
7738 __ mull($src$$Register);
7739 __ addl(rdx, $tmp$$Register);
7740 %}
7741 ins_pipe( pipe_slow );
7742 %}
7743
7744 // Multiply Register Long where the right operand's high 32 bits are zero
7745 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7746 predicate(is_operand_hi32_zero(n->in(2)));
7747 match(Set dst (MulL dst src));
7748 effect(KILL cr, TEMP tmp);
7749 ins_cost(2*100+2*400);
7750 // Basic idea: lo(result) = lo(x_lo * y_lo)
7751 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7752 format %{ "MOV $tmp,$src.lo\n\t"
7753 "IMUL $tmp,EDX\n\t"
7754 "MUL EDX:EAX,$src.lo\n\t"
7755 "ADD EDX,$tmp" %}
7756 ins_encode %{
7757 __ movl($tmp$$Register, $src$$Register);
7758 __ imull($tmp$$Register, rdx);
7759 __ mull($src$$Register);
7760 __ addl(rdx, $tmp$$Register);
7761 %}
7762 ins_pipe( pipe_slow );
7763 %}
7764
7765 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7766 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7767 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7768 match(Set dst (MulL dst src));
7769 effect(KILL cr);
7770 ins_cost(1*400);
7771 // Basic idea: lo(result) = lo(x_lo * y_lo)
7772 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7773 format %{ "MUL EDX:EAX,$src.lo\n\t" %}
7774 ins_encode %{
7775 __ mull($src$$Register);
7776 %}
7777 ins_pipe( pipe_slow );
7778 %}
7779
7780 // Multiply Register Long by small constant
7781 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7782 match(Set dst (MulL dst src));
7783 effect(KILL cr, TEMP tmp);
7784 ins_cost(2*100+2*400);
7785 size(12);
7786 // Basic idea: lo(result) = lo(src * EAX)
7787 // hi(result) = hi(src * EAX) + lo(src * EDX)
7788 format %{ "IMUL $tmp,EDX,$src\n\t"
7789 "MOV EDX,$src\n\t"
7790 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t"
7791 "ADD EDX,$tmp" %}
7792 ins_encode( long_multiply_con( dst, src, tmp ) );
7793 ins_pipe( pipe_slow );
7794 %}
7795
7796 // Integer DIV with Register
7797 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7798 match(Set rax (DivI rax div));
7799 effect(KILL rdx, KILL cr);
7800 size(26);
7801 ins_cost(30*100+10*100);
7802 format %{ "CMP EAX,0x80000000\n\t"
7803 "JNE,s normal\n\t"
7804 "XOR EDX,EDX\n\t"
7805 "CMP ECX,-1\n\t"
7806 "JE,s done\n"
7807 "normal: CDQ\n\t"
7808 "IDIV $div\n\t"
7809 "done:" %}
7810 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7811 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7812 ins_pipe( ialu_reg_reg_alu0 );
7813 %}
7814
7815 // Divide Register Long
7816 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7817 match(Set dst (DivL src1 src2));
7818 effect(CALL);
7819 ins_cost(10000);
7820 format %{ "PUSH $src1.hi\n\t"
7821 "PUSH $src1.lo\n\t"
7822 "PUSH $src2.hi\n\t"
7823 "PUSH $src2.lo\n\t"
7824 "CALL SharedRuntime::ldiv\n\t"
7825 "ADD ESP,16" %}
7826 ins_encode( long_div(src1,src2) );
7827 ins_pipe( pipe_slow );
7828 %}
7829
7830 // Integer DIVMOD with Register, both quotient and mod results
7831 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7832 match(DivModI rax div);
7833 effect(KILL cr);
7834 size(26);
7835 ins_cost(30*100+10*100);
7836 format %{ "CMP EAX,0x80000000\n\t"
7837 "JNE,s normal\n\t"
7838 "XOR EDX,EDX\n\t"
7839 "CMP ECX,-1\n\t"
7840 "JE,s done\n"
7841 "normal: CDQ\n\t"
7842 "IDIV $div\n\t"
7843 "done:" %}
7844 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7845 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7846 ins_pipe( pipe_slow );
7847 %}
7848
7849 // Integer MOD with Register
7850 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7851 match(Set rdx (ModI rax div));
7852 effect(KILL rax, KILL cr);
7853
7854 size(26);
7855 ins_cost(300);
7856 format %{ "CDQ\n\t"
7857 "IDIV $div" %}
7858 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7859 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7860 ins_pipe( ialu_reg_reg_alu0 );
7861 %}
7862
7863 // Remainder Register Long
7864 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7865 match(Set dst (ModL src1 src2));
7866 effect(CALL);
7867 ins_cost(10000);
7868 format %{ "PUSH $src1.hi\n\t"
7869 "PUSH $src1.lo\n\t"
7870 "PUSH $src2.hi\n\t"
7871 "PUSH $src2.lo\n\t"
7872 "CALL SharedRuntime::lrem\n\t"
7873 "ADD ESP,16" %}
7874 ins_encode( long_mod(src1,src2) );
7875 ins_pipe( pipe_slow );
7876 %}
7877
7878 // Divide Register Long (no special case since divisor != -1)
7879 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7880 match(Set dst (DivL dst imm));
7881 effect( TEMP tmp, TEMP tmp2, KILL cr );
7882 ins_cost(1000);
7883 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7884 "XOR $tmp2,$tmp2\n\t"
7885 "CMP $tmp,EDX\n\t"
7886 "JA,s fast\n\t"
7887 "MOV $tmp2,EAX\n\t"
7888 "MOV EAX,EDX\n\t"
7889 "MOV EDX,0\n\t"
7890 "JLE,s pos\n\t"
7891 "LNEG EAX : $tmp2\n\t"
7892 "DIV $tmp # unsigned division\n\t"
7893 "XCHG EAX,$tmp2\n\t"
7894 "DIV $tmp\n\t"
7895 "LNEG $tmp2 : EAX\n\t"
7896 "JMP,s done\n"
7897 "pos:\n\t"
7898 "DIV $tmp\n\t"
7899 "XCHG EAX,$tmp2\n"
7900 "fast:\n\t"
7901 "DIV $tmp\n"
7902 "done:\n\t"
7903 "MOV EDX,$tmp2\n\t"
7904 "NEG EDX:EAX # if $imm < 0" %}
7905 ins_encode %{
7906 int con = (int)$imm$$constant;
7907 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7908 int pcon = (con > 0) ? con : -con;
7909 Label Lfast, Lpos, Ldone;
7910
7911 __ movl($tmp$$Register, pcon);
7912 __ xorl($tmp2$$Register,$tmp2$$Register);
7913 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7914 __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7915
7916 __ movl($tmp2$$Register, $dst$$Register); // save
7917 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7918 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7919 __ jccb(Assembler::lessEqual, Lpos); // result is positive
7920
7921 // Negative dividend.
7922 // convert value to positive to use unsigned division
7923 __ lneg($dst$$Register, $tmp2$$Register);
7924 __ divl($tmp$$Register);
7925 __ xchgl($dst$$Register, $tmp2$$Register);
7926 __ divl($tmp$$Register);
7927 // revert result back to negative
7928 __ lneg($tmp2$$Register, $dst$$Register);
7929 __ jmpb(Ldone);
7930
7931 __ bind(Lpos);
7932 __ divl($tmp$$Register); // Use unsigned division
7933 __ xchgl($dst$$Register, $tmp2$$Register);
7934 // Fallthrow for final divide, tmp2 has 32 bit hi result
7935
7936 __ bind(Lfast);
7937 // fast path: src is positive
7938 __ divl($tmp$$Register); // Use unsigned division
7939
7940 __ bind(Ldone);
7941 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7942 if (con < 0) {
7943 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7944 }
7945 %}
7946 ins_pipe( pipe_slow );
7947 %}
7948
7949 // Remainder Register Long (remainder fit into 32 bits)
7950 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7951 match(Set dst (ModL dst imm));
7952 effect( TEMP tmp, TEMP tmp2, KILL cr );
7953 ins_cost(1000);
7954 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7955 "CMP $tmp,EDX\n\t"
7956 "JA,s fast\n\t"
7957 "MOV $tmp2,EAX\n\t"
7958 "MOV EAX,EDX\n\t"
7959 "MOV EDX,0\n\t"
7960 "JLE,s pos\n\t"
7961 "LNEG EAX : $tmp2\n\t"
7962 "DIV $tmp # unsigned division\n\t"
7963 "MOV EAX,$tmp2\n\t"
7964 "DIV $tmp\n\t"
7965 "NEG EDX\n\t"
7966 "JMP,s done\n"
7967 "pos:\n\t"
7968 "DIV $tmp\n\t"
7969 "MOV EAX,$tmp2\n"
7970 "fast:\n\t"
7971 "DIV $tmp\n"
7972 "done:\n\t"
7973 "MOV EAX,EDX\n\t"
7974 "SAR EDX,31\n\t" %}
7975 ins_encode %{
7976 int con = (int)$imm$$constant;
7977 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7978 int pcon = (con > 0) ? con : -con;
7979 Label Lfast, Lpos, Ldone;
7980
7981 __ movl($tmp$$Register, pcon);
7982 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7983 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7984
7985 __ movl($tmp2$$Register, $dst$$Register); // save
7986 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7987 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7988 __ jccb(Assembler::lessEqual, Lpos); // result is positive
7989
7990 // Negative dividend.
7991 // convert value to positive to use unsigned division
7992 __ lneg($dst$$Register, $tmp2$$Register);
7993 __ divl($tmp$$Register);
7994 __ movl($dst$$Register, $tmp2$$Register);
7995 __ divl($tmp$$Register);
7996 // revert remainder back to negative
7997 __ negl(HIGH_FROM_LOW($dst$$Register));
7998 __ jmpb(Ldone);
7999
8000 __ bind(Lpos);
8001 __ divl($tmp$$Register);
8002 __ movl($dst$$Register, $tmp2$$Register);
8003
8004 __ bind(Lfast);
8005 // fast path: src is positive
8006 __ divl($tmp$$Register);
8007
8008 __ bind(Ldone);
8009 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8010 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8011
8012 %}
8013 ins_pipe( pipe_slow );
8014 %}
8015
8016 // Integer Shift Instructions
8017 // Shift Left by one
8018 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8019 match(Set dst (LShiftI dst shift));
8020 effect(KILL cr);
8021
8022 size(2);
8023 format %{ "SHL $dst,$shift" %}
8024 opcode(0xD1, 0x4); /* D1 /4 */
8025 ins_encode( OpcP, RegOpc( dst ) );
8026 ins_pipe( ialu_reg );
8027 %}
8028
8029 // Shift Left by 8-bit immediate
8030 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8031 match(Set dst (LShiftI dst shift));
8032 effect(KILL cr);
8033
8034 size(3);
8035 format %{ "SHL $dst,$shift" %}
8036 opcode(0xC1, 0x4); /* C1 /4 ib */
8037 ins_encode( RegOpcImm( dst, shift) );
8038 ins_pipe( ialu_reg );
8039 %}
8040
8041 // Shift Left by variable
8042 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8043 match(Set dst (LShiftI dst shift));
8044 effect(KILL cr);
8045
8046 size(2);
8047 format %{ "SHL $dst,$shift" %}
8048 opcode(0xD3, 0x4); /* D3 /4 */
8049 ins_encode( OpcP, RegOpc( dst ) );
8050 ins_pipe( ialu_reg_reg );
8051 %}
8052
8053 // Arithmetic shift right by one
8054 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8055 match(Set dst (RShiftI dst shift));
8056 effect(KILL cr);
8057
8058 size(2);
8059 format %{ "SAR $dst,$shift" %}
8060 opcode(0xD1, 0x7); /* D1 /7 */
8061 ins_encode( OpcP, RegOpc( dst ) );
8062 ins_pipe( ialu_reg );
8063 %}
8064
8065 // Arithmetic shift right by one
8066 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
8067 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8068 effect(KILL cr);
8069 format %{ "SAR $dst,$shift" %}
8070 opcode(0xD1, 0x7); /* D1 /7 */
8071 ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8072 ins_pipe( ialu_mem_imm );
8073 %}
8074
8075 // Arithmetic Shift Right by 8-bit immediate
8076 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8077 match(Set dst (RShiftI dst shift));
8078 effect(KILL cr);
8079
8080 size(3);
8081 format %{ "SAR $dst,$shift" %}
8082 opcode(0xC1, 0x7); /* C1 /7 ib */
8083 ins_encode( RegOpcImm( dst, shift ) );
8084 ins_pipe( ialu_mem_imm );
8085 %}
8086
8087 // Arithmetic Shift Right by 8-bit immediate
8088 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8089 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8090 effect(KILL cr);
8091
8092 format %{ "SAR $dst,$shift" %}
8093 opcode(0xC1, 0x7); /* C1 /7 ib */
8094 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8095 ins_pipe( ialu_mem_imm );
8096 %}
8097
8098 // Arithmetic Shift Right by variable
8099 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8100 match(Set dst (RShiftI dst shift));
8101 effect(KILL cr);
8102
8103 size(2);
8104 format %{ "SAR $dst,$shift" %}
8105 opcode(0xD3, 0x7); /* D3 /7 */
8106 ins_encode( OpcP, RegOpc( dst ) );
8107 ins_pipe( ialu_reg_reg );
8108 %}
8109
8110 // Logical shift right by one
8111 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8112 match(Set dst (URShiftI dst shift));
8113 effect(KILL cr);
8114
8115 size(2);
8116 format %{ "SHR $dst,$shift" %}
8117 opcode(0xD1, 0x5); /* D1 /5 */
8118 ins_encode( OpcP, RegOpc( dst ) );
8119 ins_pipe( ialu_reg );
8120 %}
8121
8122 // Logical Shift Right by 8-bit immediate
8123 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8124 match(Set dst (URShiftI dst shift));
8125 effect(KILL cr);
8126
8127 size(3);
8128 format %{ "SHR $dst,$shift" %}
8129 opcode(0xC1, 0x5); /* C1 /5 ib */
8130 ins_encode( RegOpcImm( dst, shift) );
8131 ins_pipe( ialu_reg );
8132 %}
8133
8134
8135 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8136 // This idiom is used by the compiler for the i2b bytecode.
8137 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8138 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8139
8140 size(3);
8141 format %{ "MOVSX $dst,$src :8" %}
8142 ins_encode %{
8143 __ movsbl($dst$$Register, $src$$Register);
8144 %}
8145 ins_pipe(ialu_reg_reg);
8146 %}
8147
8148 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8149 // This idiom is used by the compiler the i2s bytecode.
8150 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8151 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8152
8153 size(3);
8154 format %{ "MOVSX $dst,$src :16" %}
8155 ins_encode %{
8156 __ movswl($dst$$Register, $src$$Register);
8157 %}
8158 ins_pipe(ialu_reg_reg);
8159 %}
8160
8161
8162 // Logical Shift Right by variable
8163 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8164 match(Set dst (URShiftI dst shift));
8165 effect(KILL cr);
8166
8167 size(2);
8168 format %{ "SHR $dst,$shift" %}
8169 opcode(0xD3, 0x5); /* D3 /5 */
8170 ins_encode( OpcP, RegOpc( dst ) );
8171 ins_pipe( ialu_reg_reg );
8172 %}
8173
8174
8175 //----------Logical Instructions-----------------------------------------------
8176 //----------Integer Logical Instructions---------------------------------------
8177 // And Instructions
8178 // And Register with Register
8179 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8180 match(Set dst (AndI dst src));
8181 effect(KILL cr);
8182
8183 size(2);
8184 format %{ "AND $dst,$src" %}
8185 opcode(0x23);
8186 ins_encode( OpcP, RegReg( dst, src) );
8187 ins_pipe( ialu_reg_reg );
8188 %}
8189
8190 // And Register with Immediate
8191 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8192 match(Set dst (AndI dst src));
8193 effect(KILL cr);
8194
8195 format %{ "AND $dst,$src" %}
8196 opcode(0x81,0x04); /* Opcode 81 /4 */
8197 // ins_encode( RegImm( dst, src) );
8198 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8199 ins_pipe( ialu_reg );
8200 %}
8201
8202 // And Register with Memory
8203 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8204 match(Set dst (AndI dst (LoadI src)));
8205 effect(KILL cr);
8206
8207 ins_cost(125);
8208 format %{ "AND $dst,$src" %}
8209 opcode(0x23);
8210 ins_encode( OpcP, RegMem( dst, src) );
8211 ins_pipe( ialu_reg_mem );
8212 %}
8213
8214 // And Memory with Register
8215 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8216 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8217 effect(KILL cr);
8218
8219 ins_cost(150);
8220 format %{ "AND $dst,$src" %}
8221 opcode(0x21); /* Opcode 21 /r */
8222 ins_encode( OpcP, RegMem( src, dst ) );
8223 ins_pipe( ialu_mem_reg );
8224 %}
8225
8226 // And Memory with Immediate
8227 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8228 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8229 effect(KILL cr);
8230
8231 ins_cost(125);
8232 format %{ "AND $dst,$src" %}
8233 opcode(0x81, 0x4); /* Opcode 81 /4 id */
8234 // ins_encode( MemImm( dst, src) );
8235 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8236 ins_pipe( ialu_mem_imm );
8237 %}
8238
8239 // BMI1 instructions
8240 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8241 match(Set dst (AndI (XorI src1 minus_1) src2));
8242 predicate(UseBMI1Instructions);
8243 effect(KILL cr);
8244
8245 format %{ "ANDNL $dst, $src1, $src2" %}
8246
8247 ins_encode %{
8248 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8249 %}
8250 ins_pipe(ialu_reg);
8251 %}
8252
8253 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8254 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8255 predicate(UseBMI1Instructions);
8256 effect(KILL cr);
8257
8258 ins_cost(125);
8259 format %{ "ANDNL $dst, $src1, $src2" %}
8260
8261 ins_encode %{
8262 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8263 %}
8264 ins_pipe(ialu_reg_mem);
8265 %}
8266
8267 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
8268 match(Set dst (AndI (SubI imm_zero src) src));
8269 predicate(UseBMI1Instructions);
8270 effect(KILL cr);
8271
8272 format %{ "BLSIL $dst, $src" %}
8273
8274 ins_encode %{
8275 __ blsil($dst$$Register, $src$$Register);
8276 %}
8277 ins_pipe(ialu_reg);
8278 %}
8279
8280 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
8281 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8282 predicate(UseBMI1Instructions);
8283 effect(KILL cr);
8284
8285 ins_cost(125);
8286 format %{ "BLSIL $dst, $src" %}
8287
8288 ins_encode %{
8289 __ blsil($dst$$Register, $src$$Address);
8290 %}
8291 ins_pipe(ialu_reg_mem);
8292 %}
8293
8294 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8295 %{
8296 match(Set dst (XorI (AddI src minus_1) src));
8297 predicate(UseBMI1Instructions);
8298 effect(KILL cr);
8299
8300 format %{ "BLSMSKL $dst, $src" %}
8301
8302 ins_encode %{
8303 __ blsmskl($dst$$Register, $src$$Register);
8304 %}
8305
8306 ins_pipe(ialu_reg);
8307 %}
8308
8309 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8310 %{
8311 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8312 predicate(UseBMI1Instructions);
8313 effect(KILL cr);
8314
8315 ins_cost(125);
8316 format %{ "BLSMSKL $dst, $src" %}
8317
8318 ins_encode %{
8319 __ blsmskl($dst$$Register, $src$$Address);
8320 %}
8321
8322 ins_pipe(ialu_reg_mem);
8323 %}
8324
8325 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8326 %{
8327 match(Set dst (AndI (AddI src minus_1) src) );
8328 predicate(UseBMI1Instructions);
8329 effect(KILL cr);
8330
8331 format %{ "BLSRL $dst, $src" %}
8332
8333 ins_encode %{
8334 __ blsrl($dst$$Register, $src$$Register);
8335 %}
8336
8337 ins_pipe(ialu_reg);
8338 %}
8339
8340 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8341 %{
8342 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8343 predicate(UseBMI1Instructions);
8344 effect(KILL cr);
8345
8346 ins_cost(125);
8347 format %{ "BLSRL $dst, $src" %}
8348
8349 ins_encode %{
8350 __ blsrl($dst$$Register, $src$$Address);
8351 %}
8352
8353 ins_pipe(ialu_reg_mem);
8354 %}
8355
8356 // Or Instructions
8357 // Or Register with Register
8358 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8359 match(Set dst (OrI dst src));
8360 effect(KILL cr);
8361
8362 size(2);
8363 format %{ "OR $dst,$src" %}
8364 opcode(0x0B);
8365 ins_encode( OpcP, RegReg( dst, src) );
8366 ins_pipe( ialu_reg_reg );
8367 %}
8368
8369 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8370 match(Set dst (OrI dst (CastP2X src)));
8371 effect(KILL cr);
8372
8373 size(2);
8374 format %{ "OR $dst,$src" %}
8375 opcode(0x0B);
8376 ins_encode( OpcP, RegReg( dst, src) );
8377 ins_pipe( ialu_reg_reg );
8378 %}
8379
8380
8381 // Or Register with Immediate
8382 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8383 match(Set dst (OrI dst src));
8384 effect(KILL cr);
8385
8386 format %{ "OR $dst,$src" %}
8387 opcode(0x81,0x01); /* Opcode 81 /1 id */
8388 // ins_encode( RegImm( dst, src) );
8389 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8390 ins_pipe( ialu_reg );
8391 %}
8392
8393 // Or Register with Memory
8394 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8395 match(Set dst (OrI dst (LoadI src)));
8396 effect(KILL cr);
8397
8398 ins_cost(125);
8399 format %{ "OR $dst,$src" %}
8400 opcode(0x0B);
8401 ins_encode( OpcP, RegMem( dst, src) );
8402 ins_pipe( ialu_reg_mem );
8403 %}
8404
8405 // Or Memory with Register
8406 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8407 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8408 effect(KILL cr);
8409
8410 ins_cost(150);
8411 format %{ "OR $dst,$src" %}
8412 opcode(0x09); /* Opcode 09 /r */
8413 ins_encode( OpcP, RegMem( src, dst ) );
8414 ins_pipe( ialu_mem_reg );
8415 %}
8416
8417 // Or Memory with Immediate
8418 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8419 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8420 effect(KILL cr);
8421
8422 ins_cost(125);
8423 format %{ "OR $dst,$src" %}
8424 opcode(0x81,0x1); /* Opcode 81 /1 id */
8425 // ins_encode( MemImm( dst, src) );
8426 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8427 ins_pipe( ialu_mem_imm );
8428 %}
8429
8430 // ROL/ROR
8431 // ROL expand
8432 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8433 effect(USE_DEF dst, USE shift, KILL cr);
8434
8435 format %{ "ROL $dst, $shift" %}
8436 opcode(0xD1, 0x0); /* Opcode D1 /0 */
8437 ins_encode( OpcP, RegOpc( dst ));
8438 ins_pipe( ialu_reg );
8439 %}
8440
8441 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8442 effect(USE_DEF dst, USE shift, KILL cr);
8443
8444 format %{ "ROL $dst, $shift" %}
8445 opcode(0xC1, 0x0); /*Opcode /C1 /0 */
8446 ins_encode( RegOpcImm(dst, shift) );
8447 ins_pipe(ialu_reg);
8448 %}
8449
8450 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8451 effect(USE_DEF dst, USE shift, KILL cr);
8452
8453 format %{ "ROL $dst, $shift" %}
8454 opcode(0xD3, 0x0); /* Opcode D3 /0 */
8455 ins_encode(OpcP, RegOpc(dst));
8456 ins_pipe( ialu_reg_reg );
8457 %}
8458 // end of ROL expand
8459
8460 // ROL 32bit by one once
8461 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8462 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8463
8464 expand %{
8465 rolI_eReg_imm1(dst, lshift, cr);
8466 %}
8467 %}
8468
8469 // ROL 32bit var by imm8 once
8470 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8471 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8472 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8473
8474 expand %{
8475 rolI_eReg_imm8(dst, lshift, cr);
8476 %}
8477 %}
8478
8479 // ROL 32bit var by var once
8480 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8481 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8482
8483 expand %{
8484 rolI_eReg_CL(dst, shift, cr);
8485 %}
8486 %}
8487
8488 // ROL 32bit var by var once
8489 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8490 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8491
8492 expand %{
8493 rolI_eReg_CL(dst, shift, cr);
8494 %}
8495 %}
8496
8497 // ROR expand
8498 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8499 effect(USE_DEF dst, USE shift, KILL cr);
8500
8501 format %{ "ROR $dst, $shift" %}
8502 opcode(0xD1,0x1); /* Opcode D1 /1 */
8503 ins_encode( OpcP, RegOpc( dst ) );
8504 ins_pipe( ialu_reg );
8505 %}
8506
8507 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8508 effect (USE_DEF dst, USE shift, KILL cr);
8509
8510 format %{ "ROR $dst, $shift" %}
8511 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8512 ins_encode( RegOpcImm(dst, shift) );
8513 ins_pipe( ialu_reg );
8514 %}
8515
8516 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8517 effect(USE_DEF dst, USE shift, KILL cr);
8518
8519 format %{ "ROR $dst, $shift" %}
8520 opcode(0xD3, 0x1); /* Opcode D3 /1 */
8521 ins_encode(OpcP, RegOpc(dst));
8522 ins_pipe( ialu_reg_reg );
8523 %}
8524 // end of ROR expand
8525
8526 // ROR right once
8527 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8528 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8529
8530 expand %{
8531 rorI_eReg_imm1(dst, rshift, cr);
8532 %}
8533 %}
8534
8535 // ROR 32bit by immI8 once
8536 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8537 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8538 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8539
8540 expand %{
8541 rorI_eReg_imm8(dst, rshift, cr);
8542 %}
8543 %}
8544
8545 // ROR 32bit var by var once
8546 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8547 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8548
8549 expand %{
8550 rorI_eReg_CL(dst, shift, cr);
8551 %}
8552 %}
8553
8554 // ROR 32bit var by var once
8555 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8556 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8557
8558 expand %{
8559 rorI_eReg_CL(dst, shift, cr);
8560 %}
8561 %}
8562
8563 // Xor Instructions
8564 // Xor Register with Register
8565 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8566 match(Set dst (XorI dst src));
8567 effect(KILL cr);
8568
8569 size(2);
8570 format %{ "XOR $dst,$src" %}
8571 opcode(0x33);
8572 ins_encode( OpcP, RegReg( dst, src) );
8573 ins_pipe( ialu_reg_reg );
8574 %}
8575
8576 // Xor Register with Immediate -1
8577 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8578 match(Set dst (XorI dst imm));
8579
8580 size(2);
8581 format %{ "NOT $dst" %}
8582 ins_encode %{
8583 __ notl($dst$$Register);
8584 %}
8585 ins_pipe( ialu_reg );
8586 %}
8587
8588 // Xor Register with Immediate
8589 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8590 match(Set dst (XorI dst src));
8591 effect(KILL cr);
8592
8593 format %{ "XOR $dst,$src" %}
8594 opcode(0x81,0x06); /* Opcode 81 /6 id */
8595 // ins_encode( RegImm( dst, src) );
8596 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8597 ins_pipe( ialu_reg );
8598 %}
8599
8600 // Xor Register with Memory
8601 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8602 match(Set dst (XorI dst (LoadI src)));
8603 effect(KILL cr);
8604
8605 ins_cost(125);
8606 format %{ "XOR $dst,$src" %}
8607 opcode(0x33);
8608 ins_encode( OpcP, RegMem(dst, src) );
8609 ins_pipe( ialu_reg_mem );
8610 %}
8611
8612 // Xor Memory with Register
8613 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8614 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8615 effect(KILL cr);
8616
8617 ins_cost(150);
8618 format %{ "XOR $dst,$src" %}
8619 opcode(0x31); /* Opcode 31 /r */
8620 ins_encode( OpcP, RegMem( src, dst ) );
8621 ins_pipe( ialu_mem_reg );
8622 %}
8623
8624 // Xor Memory with Immediate
8625 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8626 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8627 effect(KILL cr);
8628
8629 ins_cost(125);
8630 format %{ "XOR $dst,$src" %}
8631 opcode(0x81,0x6); /* Opcode 81 /6 id */
8632 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8633 ins_pipe( ialu_mem_imm );
8634 %}
8635
8636 //----------Convert Int to Boolean---------------------------------------------
8637
8638 instruct movI_nocopy(rRegI dst, rRegI src) %{
8639 effect( DEF dst, USE src );
8640 format %{ "MOV $dst,$src" %}
8641 ins_encode( enc_Copy( dst, src) );
8642 ins_pipe( ialu_reg_reg );
8643 %}
8644
8645 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8646 effect( USE_DEF dst, USE src, KILL cr );
8647
8648 size(4);
8649 format %{ "NEG $dst\n\t"
8650 "ADC $dst,$src" %}
8651 ins_encode( neg_reg(dst),
8652 OpcRegReg(0x13,dst,src) );
8653 ins_pipe( ialu_reg_reg_long );
8654 %}
8655
8656 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8657 match(Set dst (Conv2B src));
8658
8659 expand %{
8660 movI_nocopy(dst,src);
8661 ci2b(dst,src,cr);
8662 %}
8663 %}
8664
8665 instruct movP_nocopy(rRegI dst, eRegP src) %{
8666 effect( DEF dst, USE src );
8667 format %{ "MOV $dst,$src" %}
8668 ins_encode( enc_Copy( dst, src) );
8669 ins_pipe( ialu_reg_reg );
8670 %}
8671
8672 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8673 effect( USE_DEF dst, USE src, KILL cr );
8674 format %{ "NEG $dst\n\t"
8675 "ADC $dst,$src" %}
8676 ins_encode( neg_reg(dst),
8677 OpcRegReg(0x13,dst,src) );
8678 ins_pipe( ialu_reg_reg_long );
8679 %}
8680
8681 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8682 match(Set dst (Conv2B src));
8683
8684 expand %{
8685 movP_nocopy(dst,src);
8686 cp2b(dst,src,cr);
8687 %}
8688 %}
8689
8690 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8691 match(Set dst (CmpLTMask p q));
8692 effect(KILL cr);
8693 ins_cost(400);
8694
8695 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8696 format %{ "XOR $dst,$dst\n\t"
8697 "CMP $p,$q\n\t"
8698 "SETlt $dst\n\t"
8699 "NEG $dst" %}
8700 ins_encode %{
8701 Register Rp = $p$$Register;
8702 Register Rq = $q$$Register;
8703 Register Rd = $dst$$Register;
8704 Label done;
8705 __ xorl(Rd, Rd);
8706 __ cmpl(Rp, Rq);
8707 __ setb(Assembler::less, Rd);
8708 __ negl(Rd);
8709 %}
8710
8711 ins_pipe(pipe_slow);
8712 %}
8713
8714 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
8715 match(Set dst (CmpLTMask dst zero));
8716 effect(DEF dst, KILL cr);
8717 ins_cost(100);
8718
8719 format %{ "SAR $dst,31\t# cmpLTMask0" %}
8720 ins_encode %{
8721 __ sarl($dst$$Register, 31);
8722 %}
8723 ins_pipe(ialu_reg);
8724 %}
8725
8726 /* better to save a register than avoid a branch */
8727 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8728 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8729 effect(KILL cr);
8730 ins_cost(400);
8731 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t"
8732 "JGE done\n\t"
8733 "ADD $p,$y\n"
8734 "done: " %}
8735 ins_encode %{
8736 Register Rp = $p$$Register;
8737 Register Rq = $q$$Register;
8738 Register Ry = $y$$Register;
8739 Label done;
8740 __ subl(Rp, Rq);
8741 __ jccb(Assembler::greaterEqual, done);
8742 __ addl(Rp, Ry);
8743 __ bind(done);
8744 %}
8745
8746 ins_pipe(pipe_cmplt);
8747 %}
8748
8749 /* better to save a register than avoid a branch */
8750 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8751 match(Set y (AndI (CmpLTMask p q) y));
8752 effect(KILL cr);
8753
8754 ins_cost(300);
8755
8756 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t"
8757 "JLT done\n\t"
8758 "XORL $y, $y\n"
8759 "done: " %}
8760 ins_encode %{
8761 Register Rp = $p$$Register;
8762 Register Rq = $q$$Register;
8763 Register Ry = $y$$Register;
8764 Label done;
8765 __ cmpl(Rp, Rq);
8766 __ jccb(Assembler::less, done);
8767 __ xorl(Ry, Ry);
8768 __ bind(done);
8769 %}
8770
8771 ins_pipe(pipe_cmplt);
8772 %}
8773
8774 /* If I enable this, I encourage spilling in the inner loop of compress.
8775 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8776 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8777 */
8778 //----------Overflow Math Instructions-----------------------------------------
8779
8780 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8781 %{
8782 match(Set cr (OverflowAddI op1 op2));
8783 effect(DEF cr, USE_KILL op1, USE op2);
8784
8785 format %{ "ADD $op1, $op2\t# overflow check int" %}
8786
8787 ins_encode %{
8788 __ addl($op1$$Register, $op2$$Register);
8789 %}
8790 ins_pipe(ialu_reg_reg);
8791 %}
8792
8793 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8794 %{
8795 match(Set cr (OverflowAddI op1 op2));
8796 effect(DEF cr, USE_KILL op1, USE op2);
8797
8798 format %{ "ADD $op1, $op2\t# overflow check int" %}
8799
8800 ins_encode %{
8801 __ addl($op1$$Register, $op2$$constant);
8802 %}
8803 ins_pipe(ialu_reg_reg);
8804 %}
8805
8806 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8807 %{
8808 match(Set cr (OverflowSubI op1 op2));
8809
8810 format %{ "CMP $op1, $op2\t# overflow check int" %}
8811 ins_encode %{
8812 __ cmpl($op1$$Register, $op2$$Register);
8813 %}
8814 ins_pipe(ialu_reg_reg);
8815 %}
8816
8817 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8818 %{
8819 match(Set cr (OverflowSubI op1 op2));
8820
8821 format %{ "CMP $op1, $op2\t# overflow check int" %}
8822 ins_encode %{
8823 __ cmpl($op1$$Register, $op2$$constant);
8824 %}
8825 ins_pipe(ialu_reg_reg);
8826 %}
8827
8828 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
8829 %{
8830 match(Set cr (OverflowSubI zero op2));
8831 effect(DEF cr, USE_KILL op2);
8832
8833 format %{ "NEG $op2\t# overflow check int" %}
8834 ins_encode %{
8835 __ negl($op2$$Register);
8836 %}
8837 ins_pipe(ialu_reg_reg);
8838 %}
8839
8840 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8841 %{
8842 match(Set cr (OverflowMulI op1 op2));
8843 effect(DEF cr, USE_KILL op1, USE op2);
8844
8845 format %{ "IMUL $op1, $op2\t# overflow check int" %}
8846 ins_encode %{
8847 __ imull($op1$$Register, $op2$$Register);
8848 %}
8849 ins_pipe(ialu_reg_reg_alu0);
8850 %}
8851
8852 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8853 %{
8854 match(Set cr (OverflowMulI op1 op2));
8855 effect(DEF cr, TEMP tmp, USE op1, USE op2);
8856
8857 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %}
8858 ins_encode %{
8859 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8860 %}
8861 ins_pipe(ialu_reg_reg_alu0);
8862 %}
8863
8864 // Integer Absolute Instructions
8865 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8866 %{
8867 match(Set dst (AbsI src));
8868 effect(TEMP dst, TEMP tmp, KILL cr);
8869 format %{ "movl $tmp, $src\n\t"
8870 "sarl $tmp, 31\n\t"
8871 "movl $dst, $src\n\t"
8872 "xorl $dst, $tmp\n\t"
8873 "subl $dst, $tmp\n"
8874 %}
8875 ins_encode %{
8876 __ movl($tmp$$Register, $src$$Register);
8877 __ sarl($tmp$$Register, 31);
8878 __ movl($dst$$Register, $src$$Register);
8879 __ xorl($dst$$Register, $tmp$$Register);
8880 __ subl($dst$$Register, $tmp$$Register);
8881 %}
8882
8883 ins_pipe(ialu_reg_reg);
8884 %}
8885
8886 //----------Long Instructions------------------------------------------------
8887 // Add Long Register with Register
8888 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8889 match(Set dst (AddL dst src));
8890 effect(KILL cr);
8891 ins_cost(200);
8892 format %{ "ADD $dst.lo,$src.lo\n\t"
8893 "ADC $dst.hi,$src.hi" %}
8894 opcode(0x03, 0x13);
8895 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8896 ins_pipe( ialu_reg_reg_long );
8897 %}
8898
8899 // Add Long Register with Immediate
8900 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8901 match(Set dst (AddL dst src));
8902 effect(KILL cr);
8903 format %{ "ADD $dst.lo,$src.lo\n\t"
8904 "ADC $dst.hi,$src.hi" %}
8905 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */
8906 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8907 ins_pipe( ialu_reg_long );
8908 %}
8909
8910 // Add Long Register with Memory
8911 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8912 match(Set dst (AddL dst (LoadL mem)));
8913 effect(KILL cr);
8914 ins_cost(125);
8915 format %{ "ADD $dst.lo,$mem\n\t"
8916 "ADC $dst.hi,$mem+4" %}
8917 opcode(0x03, 0x13);
8918 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8919 ins_pipe( ialu_reg_long_mem );
8920 %}
8921
8922 // Subtract Long Register with Register.
8923 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8924 match(Set dst (SubL dst src));
8925 effect(KILL cr);
8926 ins_cost(200);
8927 format %{ "SUB $dst.lo,$src.lo\n\t"
8928 "SBB $dst.hi,$src.hi" %}
8929 opcode(0x2B, 0x1B);
8930 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8931 ins_pipe( ialu_reg_reg_long );
8932 %}
8933
8934 // Subtract Long Register with Immediate
8935 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8936 match(Set dst (SubL dst src));
8937 effect(KILL cr);
8938 format %{ "SUB $dst.lo,$src.lo\n\t"
8939 "SBB $dst.hi,$src.hi" %}
8940 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */
8941 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8942 ins_pipe( ialu_reg_long );
8943 %}
8944
8945 // Subtract Long Register with Memory
8946 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8947 match(Set dst (SubL dst (LoadL mem)));
8948 effect(KILL cr);
8949 ins_cost(125);
8950 format %{ "SUB $dst.lo,$mem\n\t"
8951 "SBB $dst.hi,$mem+4" %}
8952 opcode(0x2B, 0x1B);
8953 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8954 ins_pipe( ialu_reg_long_mem );
8955 %}
8956
8957 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8958 match(Set dst (SubL zero dst));
8959 effect(KILL cr);
8960 ins_cost(300);
8961 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %}
8962 ins_encode( neg_long(dst) );
8963 ins_pipe( ialu_reg_reg_long );
8964 %}
8965
8966 // And Long Register with Register
8967 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8968 match(Set dst (AndL dst src));
8969 effect(KILL cr);
8970 format %{ "AND $dst.lo,$src.lo\n\t"
8971 "AND $dst.hi,$src.hi" %}
8972 opcode(0x23,0x23);
8973 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8974 ins_pipe( ialu_reg_reg_long );
8975 %}
8976
8977 // And Long Register with Immediate
8978 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8979 match(Set dst (AndL dst src));
8980 effect(KILL cr);
8981 format %{ "AND $dst.lo,$src.lo\n\t"
8982 "AND $dst.hi,$src.hi" %}
8983 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */
8984 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8985 ins_pipe( ialu_reg_long );
8986 %}
8987
8988 // And Long Register with Memory
8989 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8990 match(Set dst (AndL dst (LoadL mem)));
8991 effect(KILL cr);
8992 ins_cost(125);
8993 format %{ "AND $dst.lo,$mem\n\t"
8994 "AND $dst.hi,$mem+4" %}
8995 opcode(0x23, 0x23);
8996 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8997 ins_pipe( ialu_reg_long_mem );
8998 %}
8999
9000 // BMI1 instructions
9001 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9002 match(Set dst (AndL (XorL src1 minus_1) src2));
9003 predicate(UseBMI1Instructions);
9004 effect(KILL cr, TEMP dst);
9005
9006 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t"
9007 "ANDNL $dst.hi, $src1.hi, $src2.hi"
9008 %}
9009
9010 ins_encode %{
9011 Register Rdst = $dst$$Register;
9012 Register Rsrc1 = $src1$$Register;
9013 Register Rsrc2 = $src2$$Register;
9014 __ andnl(Rdst, Rsrc1, Rsrc2);
9015 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9016 %}
9017 ins_pipe(ialu_reg_reg_long);
9018 %}
9019
9020 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9021 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9022 predicate(UseBMI1Instructions);
9023 effect(KILL cr, TEMP dst);
9024
9025 ins_cost(125);
9026 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t"
9027 "ANDNL $dst.hi, $src1.hi, $src2+4"
9028 %}
9029
9030 ins_encode %{
9031 Register Rdst = $dst$$Register;
9032 Register Rsrc1 = $src1$$Register;
9033 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9034
9035 __ andnl(Rdst, Rsrc1, $src2$$Address);
9036 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9037 %}
9038 ins_pipe(ialu_reg_mem);
9039 %}
9040
9041 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9042 match(Set dst (AndL (SubL imm_zero src) src));
9043 predicate(UseBMI1Instructions);
9044 effect(KILL cr, TEMP dst);
9045
9046 format %{ "MOVL $dst.hi, 0\n\t"
9047 "BLSIL $dst.lo, $src.lo\n\t"
9048 "JNZ done\n\t"
9049 "BLSIL $dst.hi, $src.hi\n"
9050 "done:"
9051 %}
9052
9053 ins_encode %{
9054 Label done;
9055 Register Rdst = $dst$$Register;
9056 Register Rsrc = $src$$Register;
9057 __ movl(HIGH_FROM_LOW(Rdst), 0);
9058 __ blsil(Rdst, Rsrc);
9059 __ jccb(Assembler::notZero, done);
9060 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9061 __ bind(done);
9062 %}
9063 ins_pipe(ialu_reg);
9064 %}
9065
9066 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9067 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9068 predicate(UseBMI1Instructions);
9069 effect(KILL cr, TEMP dst);
9070
9071 ins_cost(125);
9072 format %{ "MOVL $dst.hi, 0\n\t"
9073 "BLSIL $dst.lo, $src\n\t"
9074 "JNZ done\n\t"
9075 "BLSIL $dst.hi, $src+4\n"
9076 "done:"
9077 %}
9078
9079 ins_encode %{
9080 Label done;
9081 Register Rdst = $dst$$Register;
9082 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9083
9084 __ movl(HIGH_FROM_LOW(Rdst), 0);
9085 __ blsil(Rdst, $src$$Address);
9086 __ jccb(Assembler::notZero, done);
9087 __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9088 __ bind(done);
9089 %}
9090 ins_pipe(ialu_reg_mem);
9091 %}
9092
9093 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9094 %{
9095 match(Set dst (XorL (AddL src minus_1) src));
9096 predicate(UseBMI1Instructions);
9097 effect(KILL cr, TEMP dst);
9098
9099 format %{ "MOVL $dst.hi, 0\n\t"
9100 "BLSMSKL $dst.lo, $src.lo\n\t"
9101 "JNC done\n\t"
9102 "BLSMSKL $dst.hi, $src.hi\n"
9103 "done:"
9104 %}
9105
9106 ins_encode %{
9107 Label done;
9108 Register Rdst = $dst$$Register;
9109 Register Rsrc = $src$$Register;
9110 __ movl(HIGH_FROM_LOW(Rdst), 0);
9111 __ blsmskl(Rdst, Rsrc);
9112 __ jccb(Assembler::carryClear, done);
9113 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9114 __ bind(done);
9115 %}
9116
9117 ins_pipe(ialu_reg);
9118 %}
9119
9120 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9121 %{
9122 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9123 predicate(UseBMI1Instructions);
9124 effect(KILL cr, TEMP dst);
9125
9126 ins_cost(125);
9127 format %{ "MOVL $dst.hi, 0\n\t"
9128 "BLSMSKL $dst.lo, $src\n\t"
9129 "JNC done\n\t"
9130 "BLSMSKL $dst.hi, $src+4\n"
9131 "done:"
9132 %}
9133
9134 ins_encode %{
9135 Label done;
9136 Register Rdst = $dst$$Register;
9137 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9138
9139 __ movl(HIGH_FROM_LOW(Rdst), 0);
9140 __ blsmskl(Rdst, $src$$Address);
9141 __ jccb(Assembler::carryClear, done);
9142 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9143 __ bind(done);
9144 %}
9145
9146 ins_pipe(ialu_reg_mem);
9147 %}
9148
9149 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9150 %{
9151 match(Set dst (AndL (AddL src minus_1) src) );
9152 predicate(UseBMI1Instructions);
9153 effect(KILL cr, TEMP dst);
9154
9155 format %{ "MOVL $dst.hi, $src.hi\n\t"
9156 "BLSRL $dst.lo, $src.lo\n\t"
9157 "JNC done\n\t"
9158 "BLSRL $dst.hi, $src.hi\n"
9159 "done:"
9160 %}
9161
9162 ins_encode %{
9163 Label done;
9164 Register Rdst = $dst$$Register;
9165 Register Rsrc = $src$$Register;
9166 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9167 __ blsrl(Rdst, Rsrc);
9168 __ jccb(Assembler::carryClear, done);
9169 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9170 __ bind(done);
9171 %}
9172
9173 ins_pipe(ialu_reg);
9174 %}
9175
9176 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9177 %{
9178 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9179 predicate(UseBMI1Instructions);
9180 effect(KILL cr, TEMP dst);
9181
9182 ins_cost(125);
9183 format %{ "MOVL $dst.hi, $src+4\n\t"
9184 "BLSRL $dst.lo, $src\n\t"
9185 "JNC done\n\t"
9186 "BLSRL $dst.hi, $src+4\n"
9187 "done:"
9188 %}
9189
9190 ins_encode %{
9191 Label done;
9192 Register Rdst = $dst$$Register;
9193 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9194 __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9195 __ blsrl(Rdst, $src$$Address);
9196 __ jccb(Assembler::carryClear, done);
9197 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9198 __ bind(done);
9199 %}
9200
9201 ins_pipe(ialu_reg_mem);
9202 %}
9203
9204 // Or Long Register with Register
9205 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9206 match(Set dst (OrL dst src));
9207 effect(KILL cr);
9208 format %{ "OR $dst.lo,$src.lo\n\t"
9209 "OR $dst.hi,$src.hi" %}
9210 opcode(0x0B,0x0B);
9211 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9212 ins_pipe( ialu_reg_reg_long );
9213 %}
9214
9215 // Or Long Register with Immediate
9216 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9217 match(Set dst (OrL dst src));
9218 effect(KILL cr);
9219 format %{ "OR $dst.lo,$src.lo\n\t"
9220 "OR $dst.hi,$src.hi" %}
9221 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */
9222 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9223 ins_pipe( ialu_reg_long );
9224 %}
9225
9226 // Or Long Register with Memory
9227 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9228 match(Set dst (OrL dst (LoadL mem)));
9229 effect(KILL cr);
9230 ins_cost(125);
9231 format %{ "OR $dst.lo,$mem\n\t"
9232 "OR $dst.hi,$mem+4" %}
9233 opcode(0x0B,0x0B);
9234 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9235 ins_pipe( ialu_reg_long_mem );
9236 %}
9237
9238 // Xor Long Register with Register
9239 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9240 match(Set dst (XorL dst src));
9241 effect(KILL cr);
9242 format %{ "XOR $dst.lo,$src.lo\n\t"
9243 "XOR $dst.hi,$src.hi" %}
9244 opcode(0x33,0x33);
9245 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9246 ins_pipe( ialu_reg_reg_long );
9247 %}
9248
9249 // Xor Long Register with Immediate -1
9250 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9251 match(Set dst (XorL dst imm));
9252 format %{ "NOT $dst.lo\n\t"
9253 "NOT $dst.hi" %}
9254 ins_encode %{
9255 __ notl($dst$$Register);
9256 __ notl(HIGH_FROM_LOW($dst$$Register));
9257 %}
9258 ins_pipe( ialu_reg_long );
9259 %}
9260
9261 // Xor Long Register with Immediate
9262 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9263 match(Set dst (XorL dst src));
9264 effect(KILL cr);
9265 format %{ "XOR $dst.lo,$src.lo\n\t"
9266 "XOR $dst.hi,$src.hi" %}
9267 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */
9268 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9269 ins_pipe( ialu_reg_long );
9270 %}
9271
9272 // Xor Long Register with Memory
9273 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9274 match(Set dst (XorL dst (LoadL mem)));
9275 effect(KILL cr);
9276 ins_cost(125);
9277 format %{ "XOR $dst.lo,$mem\n\t"
9278 "XOR $dst.hi,$mem+4" %}
9279 opcode(0x33,0x33);
9280 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9281 ins_pipe( ialu_reg_long_mem );
9282 %}
9283
9284 // Shift Left Long by 1
9285 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9286 predicate(UseNewLongLShift);
9287 match(Set dst (LShiftL dst cnt));
9288 effect(KILL cr);
9289 ins_cost(100);
9290 format %{ "ADD $dst.lo,$dst.lo\n\t"
9291 "ADC $dst.hi,$dst.hi" %}
9292 ins_encode %{
9293 __ addl($dst$$Register,$dst$$Register);
9294 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9295 %}
9296 ins_pipe( ialu_reg_long );
9297 %}
9298
9299 // Shift Left Long by 2
9300 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9301 predicate(UseNewLongLShift);
9302 match(Set dst (LShiftL dst cnt));
9303 effect(KILL cr);
9304 ins_cost(100);
9305 format %{ "ADD $dst.lo,$dst.lo\n\t"
9306 "ADC $dst.hi,$dst.hi\n\t"
9307 "ADD $dst.lo,$dst.lo\n\t"
9308 "ADC $dst.hi,$dst.hi" %}
9309 ins_encode %{
9310 __ addl($dst$$Register,$dst$$Register);
9311 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9312 __ addl($dst$$Register,$dst$$Register);
9313 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9314 %}
9315 ins_pipe( ialu_reg_long );
9316 %}
9317
9318 // Shift Left Long by 3
9319 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9320 predicate(UseNewLongLShift);
9321 match(Set dst (LShiftL dst cnt));
9322 effect(KILL cr);
9323 ins_cost(100);
9324 format %{ "ADD $dst.lo,$dst.lo\n\t"
9325 "ADC $dst.hi,$dst.hi\n\t"
9326 "ADD $dst.lo,$dst.lo\n\t"
9327 "ADC $dst.hi,$dst.hi\n\t"
9328 "ADD $dst.lo,$dst.lo\n\t"
9329 "ADC $dst.hi,$dst.hi" %}
9330 ins_encode %{
9331 __ addl($dst$$Register,$dst$$Register);
9332 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9333 __ addl($dst$$Register,$dst$$Register);
9334 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9335 __ addl($dst$$Register,$dst$$Register);
9336 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9337 %}
9338 ins_pipe( ialu_reg_long );
9339 %}
9340
9341 // Shift Left Long by 1-31
9342 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9343 match(Set dst (LShiftL dst cnt));
9344 effect(KILL cr);
9345 ins_cost(200);
9346 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t"
9347 "SHL $dst.lo,$cnt" %}
9348 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */
9349 ins_encode( move_long_small_shift(dst,cnt) );
9350 ins_pipe( ialu_reg_long );
9351 %}
9352
9353 // Shift Left Long by 32-63
9354 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9355 match(Set dst (LShiftL dst cnt));
9356 effect(KILL cr);
9357 ins_cost(300);
9358 format %{ "MOV $dst.hi,$dst.lo\n"
9359 "\tSHL $dst.hi,$cnt-32\n"
9360 "\tXOR $dst.lo,$dst.lo" %}
9361 opcode(0xC1, 0x4); /* C1 /4 ib */
9362 ins_encode( move_long_big_shift_clr(dst,cnt) );
9363 ins_pipe( ialu_reg_long );
9364 %}
9365
9366 // Shift Left Long by variable
9367 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9368 match(Set dst (LShiftL dst shift));
9369 effect(KILL cr);
9370 ins_cost(500+200);
9371 size(17);
9372 format %{ "TEST $shift,32\n\t"
9373 "JEQ,s small\n\t"
9374 "MOV $dst.hi,$dst.lo\n\t"
9375 "XOR $dst.lo,$dst.lo\n"
9376 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t"
9377 "SHL $dst.lo,$shift" %}
9378 ins_encode( shift_left_long( dst, shift ) );
9379 ins_pipe( pipe_slow );
9380 %}
9381
9382 // Shift Right Long by 1-31
9383 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9384 match(Set dst (URShiftL dst cnt));
9385 effect(KILL cr);
9386 ins_cost(200);
9387 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9388 "SHR $dst.hi,$cnt" %}
9389 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */
9390 ins_encode( move_long_small_shift(dst,cnt) );
9391 ins_pipe( ialu_reg_long );
9392 %}
9393
9394 // Shift Right Long by 32-63
9395 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9396 match(Set dst (URShiftL dst cnt));
9397 effect(KILL cr);
9398 ins_cost(300);
9399 format %{ "MOV $dst.lo,$dst.hi\n"
9400 "\tSHR $dst.lo,$cnt-32\n"
9401 "\tXOR $dst.hi,$dst.hi" %}
9402 opcode(0xC1, 0x5); /* C1 /5 ib */
9403 ins_encode( move_long_big_shift_clr(dst,cnt) );
9404 ins_pipe( ialu_reg_long );
9405 %}
9406
9407 // Shift Right Long by variable
9408 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9409 match(Set dst (URShiftL dst shift));
9410 effect(KILL cr);
9411 ins_cost(600);
9412 size(17);
9413 format %{ "TEST $shift,32\n\t"
9414 "JEQ,s small\n\t"
9415 "MOV $dst.lo,$dst.hi\n\t"
9416 "XOR $dst.hi,$dst.hi\n"
9417 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9418 "SHR $dst.hi,$shift" %}
9419 ins_encode( shift_right_long( dst, shift ) );
9420 ins_pipe( pipe_slow );
9421 %}
9422
9423 // Shift Right Long by 1-31
9424 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9425 match(Set dst (RShiftL dst cnt));
9426 effect(KILL cr);
9427 ins_cost(200);
9428 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9429 "SAR $dst.hi,$cnt" %}
9430 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */
9431 ins_encode( move_long_small_shift(dst,cnt) );
9432 ins_pipe( ialu_reg_long );
9433 %}
9434
9435 // Shift Right Long by 32-63
9436 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9437 match(Set dst (RShiftL dst cnt));
9438 effect(KILL cr);
9439 ins_cost(300);
9440 format %{ "MOV $dst.lo,$dst.hi\n"
9441 "\tSAR $dst.lo,$cnt-32\n"
9442 "\tSAR $dst.hi,31" %}
9443 opcode(0xC1, 0x7); /* C1 /7 ib */
9444 ins_encode( move_long_big_shift_sign(dst,cnt) );
9445 ins_pipe( ialu_reg_long );
9446 %}
9447
9448 // Shift Right arithmetic Long by variable
9449 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9450 match(Set dst (RShiftL dst shift));
9451 effect(KILL cr);
9452 ins_cost(600);
9453 size(18);
9454 format %{ "TEST $shift,32\n\t"
9455 "JEQ,s small\n\t"
9456 "MOV $dst.lo,$dst.hi\n\t"
9457 "SAR $dst.hi,31\n"
9458 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9459 "SAR $dst.hi,$shift" %}
9460 ins_encode( shift_right_arith_long( dst, shift ) );
9461 ins_pipe( pipe_slow );
9462 %}
9463
9464
9465 //----------Double Instructions------------------------------------------------
9466 // Double Math
9467
9468 // Compare & branch
9469
9470 // P6 version of float compare, sets condition codes in EFLAGS
9471 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9472 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9473 match(Set cr (CmpD src1 src2));
9474 effect(KILL rax);
9475 ins_cost(150);
9476 format %{ "FLD $src1\n\t"
9477 "FUCOMIP ST,$src2 // P6 instruction\n\t"
9478 "JNP exit\n\t"
9479 "MOV ah,1 // saw a NaN, set CF\n\t"
9480 "SAHF\n"
9481 "exit:\tNOP // avoid branch to branch" %}
9482 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9483 ins_encode( Push_Reg_DPR(src1),
9484 OpcP, RegOpc(src2),
9485 cmpF_P6_fixup );
9486 ins_pipe( pipe_slow );
9487 %}
9488
9489 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9490 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9491 match(Set cr (CmpD src1 src2));
9492 ins_cost(150);
9493 format %{ "FLD $src1\n\t"
9494 "FUCOMIP ST,$src2 // P6 instruction" %}
9495 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9496 ins_encode( Push_Reg_DPR(src1),
9497 OpcP, RegOpc(src2));
9498 ins_pipe( pipe_slow );
9499 %}
9500
9501 // Compare & branch
9502 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9503 predicate(UseSSE<=1);
9504 match(Set cr (CmpD src1 src2));
9505 effect(KILL rax);
9506 ins_cost(200);
9507 format %{ "FLD $src1\n\t"
9508 "FCOMp $src2\n\t"
9509 "FNSTSW AX\n\t"
9510 "TEST AX,0x400\n\t"
9511 "JZ,s flags\n\t"
9512 "MOV AH,1\t# unordered treat as LT\n"
9513 "flags:\tSAHF" %}
9514 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9515 ins_encode( Push_Reg_DPR(src1),
9516 OpcP, RegOpc(src2),
9517 fpu_flags);
9518 ins_pipe( pipe_slow );
9519 %}
9520
9521 // Compare vs zero into -1,0,1
9522 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9523 predicate(UseSSE<=1);
9524 match(Set dst (CmpD3 src1 zero));
9525 effect(KILL cr, KILL rax);
9526 ins_cost(280);
9527 format %{ "FTSTD $dst,$src1" %}
9528 opcode(0xE4, 0xD9);
9529 ins_encode( Push_Reg_DPR(src1),
9530 OpcS, OpcP, PopFPU,
9531 CmpF_Result(dst));
9532 ins_pipe( pipe_slow );
9533 %}
9534
9535 // Compare into -1,0,1
9536 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9537 predicate(UseSSE<=1);
9538 match(Set dst (CmpD3 src1 src2));
9539 effect(KILL cr, KILL rax);
9540 ins_cost(300);
9541 format %{ "FCMPD $dst,$src1,$src2" %}
9542 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9543 ins_encode( Push_Reg_DPR(src1),
9544 OpcP, RegOpc(src2),
9545 CmpF_Result(dst));
9546 ins_pipe( pipe_slow );
9547 %}
9548
9549 // float compare and set condition codes in EFLAGS by XMM regs
9550 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9551 predicate(UseSSE>=2);
9552 match(Set cr (CmpD src1 src2));
9553 ins_cost(145);
9554 format %{ "UCOMISD $src1,$src2\n\t"
9555 "JNP,s exit\n\t"
9556 "PUSHF\t# saw NaN, set CF\n\t"
9557 "AND [rsp], #0xffffff2b\n\t"
9558 "POPF\n"
9559 "exit:" %}
9560 ins_encode %{
9561 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9562 emit_cmpfp_fixup(_masm);
9563 %}
9564 ins_pipe( pipe_slow );
9565 %}
9566
9567 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9568 predicate(UseSSE>=2);
9569 match(Set cr (CmpD src1 src2));
9570 ins_cost(100);
9571 format %{ "UCOMISD $src1,$src2" %}
9572 ins_encode %{
9573 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9574 %}
9575 ins_pipe( pipe_slow );
9576 %}
9577
9578 // float compare and set condition codes in EFLAGS by XMM regs
9579 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9580 predicate(UseSSE>=2);
9581 match(Set cr (CmpD src1 (LoadD src2)));
9582 ins_cost(145);
9583 format %{ "UCOMISD $src1,$src2\n\t"
9584 "JNP,s exit\n\t"
9585 "PUSHF\t# saw NaN, set CF\n\t"
9586 "AND [rsp], #0xffffff2b\n\t"
9587 "POPF\n"
9588 "exit:" %}
9589 ins_encode %{
9590 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9591 emit_cmpfp_fixup(_masm);
9592 %}
9593 ins_pipe( pipe_slow );
9594 %}
9595
9596 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9597 predicate(UseSSE>=2);
9598 match(Set cr (CmpD src1 (LoadD src2)));
9599 ins_cost(100);
9600 format %{ "UCOMISD $src1,$src2" %}
9601 ins_encode %{
9602 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9603 %}
9604 ins_pipe( pipe_slow );
9605 %}
9606
9607 // Compare into -1,0,1 in XMM
9608 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9609 predicate(UseSSE>=2);
9610 match(Set dst (CmpD3 src1 src2));
9611 effect(KILL cr);
9612 ins_cost(255);
9613 format %{ "UCOMISD $src1, $src2\n\t"
9614 "MOV $dst, #-1\n\t"
9615 "JP,s done\n\t"
9616 "JB,s done\n\t"
9617 "SETNE $dst\n\t"
9618 "MOVZB $dst, $dst\n"
9619 "done:" %}
9620 ins_encode %{
9621 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9622 emit_cmpfp3(_masm, $dst$$Register);
9623 %}
9624 ins_pipe( pipe_slow );
9625 %}
9626
9627 // Compare into -1,0,1 in XMM and memory
9628 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9629 predicate(UseSSE>=2);
9630 match(Set dst (CmpD3 src1 (LoadD src2)));
9631 effect(KILL cr);
9632 ins_cost(275);
9633 format %{ "UCOMISD $src1, $src2\n\t"
9634 "MOV $dst, #-1\n\t"
9635 "JP,s done\n\t"
9636 "JB,s done\n\t"
9637 "SETNE $dst\n\t"
9638 "MOVZB $dst, $dst\n"
9639 "done:" %}
9640 ins_encode %{
9641 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9642 emit_cmpfp3(_masm, $dst$$Register);
9643 %}
9644 ins_pipe( pipe_slow );
9645 %}
9646
9647
9648 instruct subDPR_reg(regDPR dst, regDPR src) %{
9649 predicate (UseSSE <=1);
9650 match(Set dst (SubD dst src));
9651
9652 format %{ "FLD $src\n\t"
9653 "DSUBp $dst,ST" %}
9654 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
9655 ins_cost(150);
9656 ins_encode( Push_Reg_DPR(src),
9657 OpcP, RegOpc(dst) );
9658 ins_pipe( fpu_reg_reg );
9659 %}
9660
9661 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9662 predicate (UseSSE <=1);
9663 match(Set dst (RoundDouble (SubD src1 src2)));
9664 ins_cost(250);
9665
9666 format %{ "FLD $src2\n\t"
9667 "DSUB ST,$src1\n\t"
9668 "FSTP_D $dst\t# D-round" %}
9669 opcode(0xD8, 0x5);
9670 ins_encode( Push_Reg_DPR(src2),
9671 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9672 ins_pipe( fpu_mem_reg_reg );
9673 %}
9674
9675
9676 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9677 predicate (UseSSE <=1);
9678 match(Set dst (SubD dst (LoadD src)));
9679 ins_cost(150);
9680
9681 format %{ "FLD $src\n\t"
9682 "DSUBp $dst,ST" %}
9683 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9684 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9685 OpcP, RegOpc(dst) );
9686 ins_pipe( fpu_reg_mem );
9687 %}
9688
9689 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9690 predicate (UseSSE<=1);
9691 match(Set dst (AbsD src));
9692 ins_cost(100);
9693 format %{ "FABS" %}
9694 opcode(0xE1, 0xD9);
9695 ins_encode( OpcS, OpcP );
9696 ins_pipe( fpu_reg_reg );
9697 %}
9698
9699 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9700 predicate(UseSSE<=1);
9701 match(Set dst (NegD src));
9702 ins_cost(100);
9703 format %{ "FCHS" %}
9704 opcode(0xE0, 0xD9);
9705 ins_encode( OpcS, OpcP );
9706 ins_pipe( fpu_reg_reg );
9707 %}
9708
9709 instruct addDPR_reg(regDPR dst, regDPR src) %{
9710 predicate(UseSSE<=1);
9711 match(Set dst (AddD dst src));
9712 format %{ "FLD $src\n\t"
9713 "DADD $dst,ST" %}
9714 size(4);
9715 ins_cost(150);
9716 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9717 ins_encode( Push_Reg_DPR(src),
9718 OpcP, RegOpc(dst) );
9719 ins_pipe( fpu_reg_reg );
9720 %}
9721
9722
9723 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9724 predicate(UseSSE<=1);
9725 match(Set dst (RoundDouble (AddD src1 src2)));
9726 ins_cost(250);
9727
9728 format %{ "FLD $src2\n\t"
9729 "DADD ST,$src1\n\t"
9730 "FSTP_D $dst\t# D-round" %}
9731 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9732 ins_encode( Push_Reg_DPR(src2),
9733 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9734 ins_pipe( fpu_mem_reg_reg );
9735 %}
9736
9737
9738 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9739 predicate(UseSSE<=1);
9740 match(Set dst (AddD dst (LoadD src)));
9741 ins_cost(150);
9742
9743 format %{ "FLD $src\n\t"
9744 "DADDp $dst,ST" %}
9745 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9746 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9747 OpcP, RegOpc(dst) );
9748 ins_pipe( fpu_reg_mem );
9749 %}
9750
9751 // add-to-memory
9752 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9753 predicate(UseSSE<=1);
9754 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9755 ins_cost(150);
9756
9757 format %{ "FLD_D $dst\n\t"
9758 "DADD ST,$src\n\t"
9759 "FST_D $dst" %}
9760 opcode(0xDD, 0x0);
9761 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9762 Opcode(0xD8), RegOpc(src),
9763 set_instruction_start,
9764 Opcode(0xDD), RMopc_Mem(0x03,dst) );
9765 ins_pipe( fpu_reg_mem );
9766 %}
9767
9768 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9769 predicate(UseSSE<=1);
9770 match(Set dst (AddD dst con));
9771 ins_cost(125);
9772 format %{ "FLD1\n\t"
9773 "DADDp $dst,ST" %}
9774 ins_encode %{
9775 __ fld1();
9776 __ faddp($dst$$reg);
9777 %}
9778 ins_pipe(fpu_reg);
9779 %}
9780
9781 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9782 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9783 match(Set dst (AddD dst con));
9784 ins_cost(200);
9785 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9786 "DADDp $dst,ST" %}
9787 ins_encode %{
9788 __ fld_d($constantaddress($con));
9789 __ faddp($dst$$reg);
9790 %}
9791 ins_pipe(fpu_reg_mem);
9792 %}
9793
9794 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9795 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9796 match(Set dst (RoundDouble (AddD src con)));
9797 ins_cost(200);
9798 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9799 "DADD ST,$src\n\t"
9800 "FSTP_D $dst\t# D-round" %}
9801 ins_encode %{
9802 __ fld_d($constantaddress($con));
9803 __ fadd($src$$reg);
9804 __ fstp_d(Address(rsp, $dst$$disp));
9805 %}
9806 ins_pipe(fpu_mem_reg_con);
9807 %}
9808
9809 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9810 predicate(UseSSE<=1);
9811 match(Set dst (MulD dst src));
9812 format %{ "FLD $src\n\t"
9813 "DMULp $dst,ST" %}
9814 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9815 ins_cost(150);
9816 ins_encode( Push_Reg_DPR(src),
9817 OpcP, RegOpc(dst) );
9818 ins_pipe( fpu_reg_reg );
9819 %}
9820
9821 // Strict FP instruction biases argument before multiply then
9822 // biases result to avoid double rounding of subnormals.
9823 //
9824 // scale arg1 by multiplying arg1 by 2^(-15360)
9825 // load arg2
9826 // multiply scaled arg1 by arg2
9827 // rescale product by 2^(15360)
9828 //
9829 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9830 predicate( UseSSE<=1 && Compile::current()->has_method() );
9831 match(Set dst (MulD dst src));
9832 ins_cost(1); // Select this instruction for all FP double multiplies
9833
9834 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9835 "DMULp $dst,ST\n\t"
9836 "FLD $src\n\t"
9837 "DMULp $dst,ST\n\t"
9838 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9839 "DMULp $dst,ST\n\t" %}
9840 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9841 ins_encode( strictfp_bias1(dst),
9842 Push_Reg_DPR(src),
9843 OpcP, RegOpc(dst),
9844 strictfp_bias2(dst) );
9845 ins_pipe( fpu_reg_reg );
9846 %}
9847
9848 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9849 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9850 match(Set dst (MulD dst con));
9851 ins_cost(200);
9852 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9853 "DMULp $dst,ST" %}
9854 ins_encode %{
9855 __ fld_d($constantaddress($con));
9856 __ fmulp($dst$$reg);
9857 %}
9858 ins_pipe(fpu_reg_mem);
9859 %}
9860
9861
9862 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9863 predicate( UseSSE<=1 );
9864 match(Set dst (MulD dst (LoadD src)));
9865 ins_cost(200);
9866 format %{ "FLD_D $src\n\t"
9867 "DMULp $dst,ST" %}
9868 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */
9869 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9870 OpcP, RegOpc(dst) );
9871 ins_pipe( fpu_reg_mem );
9872 %}
9873
9874 //
9875 // Cisc-alternate to reg-reg multiply
9876 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9877 predicate( UseSSE<=1 );
9878 match(Set dst (MulD src (LoadD mem)));
9879 ins_cost(250);
9880 format %{ "FLD_D $mem\n\t"
9881 "DMUL ST,$src\n\t"
9882 "FSTP_D $dst" %}
9883 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
9884 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9885 OpcReg_FPR(src),
9886 Pop_Reg_DPR(dst) );
9887 ins_pipe( fpu_reg_reg_mem );
9888 %}
9889
9890
9891 // MACRO3 -- addDPR a mulDPR
9892 // This instruction is a '2-address' instruction in that the result goes
9893 // back to src2. This eliminates a move from the macro; possibly the
9894 // register allocator will have to add it back (and maybe not).
9895 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9896 predicate( UseSSE<=1 );
9897 match(Set src2 (AddD (MulD src0 src1) src2));
9898 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9899 "DMUL ST,$src1\n\t"
9900 "DADDp $src2,ST" %}
9901 ins_cost(250);
9902 opcode(0xDD); /* LoadD DD /0 */
9903 ins_encode( Push_Reg_FPR(src0),
9904 FMul_ST_reg(src1),
9905 FAddP_reg_ST(src2) );
9906 ins_pipe( fpu_reg_reg_reg );
9907 %}
9908
9909
9910 // MACRO3 -- subDPR a mulDPR
9911 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9912 predicate( UseSSE<=1 );
9913 match(Set src2 (SubD (MulD src0 src1) src2));
9914 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9915 "DMUL ST,$src1\n\t"
9916 "DSUBRp $src2,ST" %}
9917 ins_cost(250);
9918 ins_encode( Push_Reg_FPR(src0),
9919 FMul_ST_reg(src1),
9920 Opcode(0xDE), Opc_plus(0xE0,src2));
9921 ins_pipe( fpu_reg_reg_reg );
9922 %}
9923
9924
9925 instruct divDPR_reg(regDPR dst, regDPR src) %{
9926 predicate( UseSSE<=1 );
9927 match(Set dst (DivD dst src));
9928
9929 format %{ "FLD $src\n\t"
9930 "FDIVp $dst,ST" %}
9931 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9932 ins_cost(150);
9933 ins_encode( Push_Reg_DPR(src),
9934 OpcP, RegOpc(dst) );
9935 ins_pipe( fpu_reg_reg );
9936 %}
9937
9938 // Strict FP instruction biases argument before division then
9939 // biases result, to avoid double rounding of subnormals.
9940 //
9941 // scale dividend by multiplying dividend by 2^(-15360)
9942 // load divisor
9943 // divide scaled dividend by divisor
9944 // rescale quotient by 2^(15360)
9945 //
9946 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9947 predicate (UseSSE<=1);
9948 match(Set dst (DivD dst src));
9949 predicate( UseSSE<=1 && Compile::current()->has_method() );
9950 ins_cost(01);
9951
9952 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9953 "DMULp $dst,ST\n\t"
9954 "FLD $src\n\t"
9955 "FDIVp $dst,ST\n\t"
9956 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9957 "DMULp $dst,ST\n\t" %}
9958 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9959 ins_encode( strictfp_bias1(dst),
9960 Push_Reg_DPR(src),
9961 OpcP, RegOpc(dst),
9962 strictfp_bias2(dst) );
9963 ins_pipe( fpu_reg_reg );
9964 %}
9965
9966 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9967 predicate(UseSSE<=1);
9968 match(Set dst (ModD dst src));
9969 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9970
9971 format %{ "DMOD $dst,$src" %}
9972 ins_cost(250);
9973 ins_encode(Push_Reg_Mod_DPR(dst, src),
9974 emitModDPR(),
9975 Push_Result_Mod_DPR(src),
9976 Pop_Reg_DPR(dst));
9977 ins_pipe( pipe_slow );
9978 %}
9979
9980 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9981 predicate(UseSSE>=2);
9982 match(Set dst (ModD src0 src1));
9983 effect(KILL rax, KILL cr);
9984
9985 format %{ "SUB ESP,8\t # DMOD\n"
9986 "\tMOVSD [ESP+0],$src1\n"
9987 "\tFLD_D [ESP+0]\n"
9988 "\tMOVSD [ESP+0],$src0\n"
9989 "\tFLD_D [ESP+0]\n"
9990 "loop:\tFPREM\n"
9991 "\tFWAIT\n"
9992 "\tFNSTSW AX\n"
9993 "\tSAHF\n"
9994 "\tJP loop\n"
9995 "\tFSTP_D [ESP+0]\n"
9996 "\tMOVSD $dst,[ESP+0]\n"
9997 "\tADD ESP,8\n"
9998 "\tFSTP ST0\t # Restore FPU Stack"
9999 %}
10000 ins_cost(250);
10001 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10002 ins_pipe( pipe_slow );
10003 %}
10004
10005 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10006 predicate (UseSSE<=1);
10007 match(Set dst(AtanD dst src));
10008 format %{ "DATA $dst,$src" %}
10009 opcode(0xD9, 0xF3);
10010 ins_encode( Push_Reg_DPR(src),
10011 OpcP, OpcS, RegOpc(dst) );
10012 ins_pipe( pipe_slow );
10013 %}
10014
10015 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10016 predicate (UseSSE>=2);
10017 match(Set dst(AtanD dst src));
10018 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10019 format %{ "DATA $dst,$src" %}
10020 opcode(0xD9, 0xF3);
10021 ins_encode( Push_SrcD(src),
10022 OpcP, OpcS, Push_ResultD(dst) );
10023 ins_pipe( pipe_slow );
10024 %}
10025
10026 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10027 predicate (UseSSE<=1);
10028 match(Set dst (SqrtD src));
10029 format %{ "DSQRT $dst,$src" %}
10030 opcode(0xFA, 0xD9);
10031 ins_encode( Push_Reg_DPR(src),
10032 OpcS, OpcP, Pop_Reg_DPR(dst) );
10033 ins_pipe( pipe_slow );
10034 %}
10035
10036 //-------------Float Instructions-------------------------------
10037 // Float Math
10038
10039 // Code for float compare:
10040 // fcompp();
10041 // fwait(); fnstsw_ax();
10042 // sahf();
10043 // movl(dst, unordered_result);
10044 // jcc(Assembler::parity, exit);
10045 // movl(dst, less_result);
10046 // jcc(Assembler::below, exit);
10047 // movl(dst, equal_result);
10048 // jcc(Assembler::equal, exit);
10049 // movl(dst, greater_result);
10050 // exit:
10051
10052 // P6 version of float compare, sets condition codes in EFLAGS
10053 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10054 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10055 match(Set cr (CmpF src1 src2));
10056 effect(KILL rax);
10057 ins_cost(150);
10058 format %{ "FLD $src1\n\t"
10059 "FUCOMIP ST,$src2 // P6 instruction\n\t"
10060 "JNP exit\n\t"
10061 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
10062 "SAHF\n"
10063 "exit:\tNOP // avoid branch to branch" %}
10064 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10065 ins_encode( Push_Reg_DPR(src1),
10066 OpcP, RegOpc(src2),
10067 cmpF_P6_fixup );
10068 ins_pipe( pipe_slow );
10069 %}
10070
10071 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10072 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10073 match(Set cr (CmpF src1 src2));
10074 ins_cost(100);
10075 format %{ "FLD $src1\n\t"
10076 "FUCOMIP ST,$src2 // P6 instruction" %}
10077 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10078 ins_encode( Push_Reg_DPR(src1),
10079 OpcP, RegOpc(src2));
10080 ins_pipe( pipe_slow );
10081 %}
10082
10083
10084 // Compare & branch
10085 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10086 predicate(UseSSE == 0);
10087 match(Set cr (CmpF src1 src2));
10088 effect(KILL rax);
10089 ins_cost(200);
10090 format %{ "FLD $src1\n\t"
10091 "FCOMp $src2\n\t"
10092 "FNSTSW AX\n\t"
10093 "TEST AX,0x400\n\t"
10094 "JZ,s flags\n\t"
10095 "MOV AH,1\t# unordered treat as LT\n"
10096 "flags:\tSAHF" %}
10097 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10098 ins_encode( Push_Reg_DPR(src1),
10099 OpcP, RegOpc(src2),
10100 fpu_flags);
10101 ins_pipe( pipe_slow );
10102 %}
10103
10104 // Compare vs zero into -1,0,1
10105 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10106 predicate(UseSSE == 0);
10107 match(Set dst (CmpF3 src1 zero));
10108 effect(KILL cr, KILL rax);
10109 ins_cost(280);
10110 format %{ "FTSTF $dst,$src1" %}
10111 opcode(0xE4, 0xD9);
10112 ins_encode( Push_Reg_DPR(src1),
10113 OpcS, OpcP, PopFPU,
10114 CmpF_Result(dst));
10115 ins_pipe( pipe_slow );
10116 %}
10117
10118 // Compare into -1,0,1
10119 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10120 predicate(UseSSE == 0);
10121 match(Set dst (CmpF3 src1 src2));
10122 effect(KILL cr, KILL rax);
10123 ins_cost(300);
10124 format %{ "FCMPF $dst,$src1,$src2" %}
10125 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10126 ins_encode( Push_Reg_DPR(src1),
10127 OpcP, RegOpc(src2),
10128 CmpF_Result(dst));
10129 ins_pipe( pipe_slow );
10130 %}
10131
10132 // float compare and set condition codes in EFLAGS by XMM regs
10133 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10134 predicate(UseSSE>=1);
10135 match(Set cr (CmpF src1 src2));
10136 ins_cost(145);
10137 format %{ "UCOMISS $src1,$src2\n\t"
10138 "JNP,s exit\n\t"
10139 "PUSHF\t# saw NaN, set CF\n\t"
10140 "AND [rsp], #0xffffff2b\n\t"
10141 "POPF\n"
10142 "exit:" %}
10143 ins_encode %{
10144 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10145 emit_cmpfp_fixup(_masm);
10146 %}
10147 ins_pipe( pipe_slow );
10148 %}
10149
10150 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10151 predicate(UseSSE>=1);
10152 match(Set cr (CmpF src1 src2));
10153 ins_cost(100);
10154 format %{ "UCOMISS $src1,$src2" %}
10155 ins_encode %{
10156 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10157 %}
10158 ins_pipe( pipe_slow );
10159 %}
10160
10161 // float compare and set condition codes in EFLAGS by XMM regs
10162 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10163 predicate(UseSSE>=1);
10164 match(Set cr (CmpF src1 (LoadF src2)));
10165 ins_cost(165);
10166 format %{ "UCOMISS $src1,$src2\n\t"
10167 "JNP,s exit\n\t"
10168 "PUSHF\t# saw NaN, set CF\n\t"
10169 "AND [rsp], #0xffffff2b\n\t"
10170 "POPF\n"
10171 "exit:" %}
10172 ins_encode %{
10173 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10174 emit_cmpfp_fixup(_masm);
10175 %}
10176 ins_pipe( pipe_slow );
10177 %}
10178
10179 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10180 predicate(UseSSE>=1);
10181 match(Set cr (CmpF src1 (LoadF src2)));
10182 ins_cost(100);
10183 format %{ "UCOMISS $src1,$src2" %}
10184 ins_encode %{
10185 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10186 %}
10187 ins_pipe( pipe_slow );
10188 %}
10189
10190 // Compare into -1,0,1 in XMM
10191 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10192 predicate(UseSSE>=1);
10193 match(Set dst (CmpF3 src1 src2));
10194 effect(KILL cr);
10195 ins_cost(255);
10196 format %{ "UCOMISS $src1, $src2\n\t"
10197 "MOV $dst, #-1\n\t"
10198 "JP,s done\n\t"
10199 "JB,s done\n\t"
10200 "SETNE $dst\n\t"
10201 "MOVZB $dst, $dst\n"
10202 "done:" %}
10203 ins_encode %{
10204 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10205 emit_cmpfp3(_masm, $dst$$Register);
10206 %}
10207 ins_pipe( pipe_slow );
10208 %}
10209
10210 // Compare into -1,0,1 in XMM and memory
10211 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10212 predicate(UseSSE>=1);
10213 match(Set dst (CmpF3 src1 (LoadF src2)));
10214 effect(KILL cr);
10215 ins_cost(275);
10216 format %{ "UCOMISS $src1, $src2\n\t"
10217 "MOV $dst, #-1\n\t"
10218 "JP,s done\n\t"
10219 "JB,s done\n\t"
10220 "SETNE $dst\n\t"
10221 "MOVZB $dst, $dst\n"
10222 "done:" %}
10223 ins_encode %{
10224 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10225 emit_cmpfp3(_masm, $dst$$Register);
10226 %}
10227 ins_pipe( pipe_slow );
10228 %}
10229
10230 // Spill to obtain 24-bit precision
10231 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10232 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10233 match(Set dst (SubF src1 src2));
10234
10235 format %{ "FSUB $dst,$src1 - $src2" %}
10236 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10237 ins_encode( Push_Reg_FPR(src1),
10238 OpcReg_FPR(src2),
10239 Pop_Mem_FPR(dst) );
10240 ins_pipe( fpu_mem_reg_reg );
10241 %}
10242 //
10243 // This instruction does not round to 24-bits
10244 instruct subFPR_reg(regFPR dst, regFPR src) %{
10245 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10246 match(Set dst (SubF dst src));
10247
10248 format %{ "FSUB $dst,$src" %}
10249 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10250 ins_encode( Push_Reg_FPR(src),
10251 OpcP, RegOpc(dst) );
10252 ins_pipe( fpu_reg_reg );
10253 %}
10254
10255 // Spill to obtain 24-bit precision
10256 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10257 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10258 match(Set dst (AddF src1 src2));
10259
10260 format %{ "FADD $dst,$src1,$src2" %}
10261 opcode(0xD8, 0x0); /* D8 C0+i */
10262 ins_encode( Push_Reg_FPR(src2),
10263 OpcReg_FPR(src1),
10264 Pop_Mem_FPR(dst) );
10265 ins_pipe( fpu_mem_reg_reg );
10266 %}
10267 //
10268 // This instruction does not round to 24-bits
10269 instruct addFPR_reg(regFPR dst, regFPR src) %{
10270 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10271 match(Set dst (AddF dst src));
10272
10273 format %{ "FLD $src\n\t"
10274 "FADDp $dst,ST" %}
10275 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10276 ins_encode( Push_Reg_FPR(src),
10277 OpcP, RegOpc(dst) );
10278 ins_pipe( fpu_reg_reg );
10279 %}
10280
10281 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10282 predicate(UseSSE==0);
10283 match(Set dst (AbsF src));
10284 ins_cost(100);
10285 format %{ "FABS" %}
10286 opcode(0xE1, 0xD9);
10287 ins_encode( OpcS, OpcP );
10288 ins_pipe( fpu_reg_reg );
10289 %}
10290
10291 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10292 predicate(UseSSE==0);
10293 match(Set dst (NegF src));
10294 ins_cost(100);
10295 format %{ "FCHS" %}
10296 opcode(0xE0, 0xD9);
10297 ins_encode( OpcS, OpcP );
10298 ins_pipe( fpu_reg_reg );
10299 %}
10300
10301 // Cisc-alternate to addFPR_reg
10302 // Spill to obtain 24-bit precision
10303 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10304 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10305 match(Set dst (AddF src1 (LoadF src2)));
10306
10307 format %{ "FLD $src2\n\t"
10308 "FADD ST,$src1\n\t"
10309 "FSTP_S $dst" %}
10310 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10311 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10312 OpcReg_FPR(src1),
10313 Pop_Mem_FPR(dst) );
10314 ins_pipe( fpu_mem_reg_mem );
10315 %}
10316 //
10317 // Cisc-alternate to addFPR_reg
10318 // This instruction does not round to 24-bits
10319 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10320 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10321 match(Set dst (AddF dst (LoadF src)));
10322
10323 format %{ "FADD $dst,$src" %}
10324 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
10325 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10326 OpcP, RegOpc(dst) );
10327 ins_pipe( fpu_reg_mem );
10328 %}
10329
10330 // // Following two instructions for _222_mpegaudio
10331 // Spill to obtain 24-bit precision
10332 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10333 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10334 match(Set dst (AddF src1 src2));
10335
10336 format %{ "FADD $dst,$src1,$src2" %}
10337 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10338 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10339 OpcReg_FPR(src2),
10340 Pop_Mem_FPR(dst) );
10341 ins_pipe( fpu_mem_reg_mem );
10342 %}
10343
10344 // Cisc-spill variant
10345 // Spill to obtain 24-bit precision
10346 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10347 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10348 match(Set dst (AddF src1 (LoadF src2)));
10349
10350 format %{ "FADD $dst,$src1,$src2 cisc" %}
10351 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10352 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10353 set_instruction_start,
10354 OpcP, RMopc_Mem(secondary,src1),
10355 Pop_Mem_FPR(dst) );
10356 ins_pipe( fpu_mem_mem_mem );
10357 %}
10358
10359 // Spill to obtain 24-bit precision
10360 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10361 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10362 match(Set dst (AddF src1 src2));
10363
10364 format %{ "FADD $dst,$src1,$src2" %}
10365 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
10366 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10367 set_instruction_start,
10368 OpcP, RMopc_Mem(secondary,src1),
10369 Pop_Mem_FPR(dst) );
10370 ins_pipe( fpu_mem_mem_mem );
10371 %}
10372
10373
10374 // Spill to obtain 24-bit precision
10375 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10376 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10377 match(Set dst (AddF src con));
10378 format %{ "FLD $src\n\t"
10379 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10380 "FSTP_S $dst" %}
10381 ins_encode %{
10382 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10383 __ fadd_s($constantaddress($con));
10384 __ fstp_s(Address(rsp, $dst$$disp));
10385 %}
10386 ins_pipe(fpu_mem_reg_con);
10387 %}
10388 //
10389 // This instruction does not round to 24-bits
10390 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10391 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10392 match(Set dst (AddF src con));
10393 format %{ "FLD $src\n\t"
10394 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10395 "FSTP $dst" %}
10396 ins_encode %{
10397 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10398 __ fadd_s($constantaddress($con));
10399 __ fstp_d($dst$$reg);
10400 %}
10401 ins_pipe(fpu_reg_reg_con);
10402 %}
10403
10404 // Spill to obtain 24-bit precision
10405 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10406 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10407 match(Set dst (MulF src1 src2));
10408
10409 format %{ "FLD $src1\n\t"
10410 "FMUL $src2\n\t"
10411 "FSTP_S $dst" %}
10412 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10413 ins_encode( Push_Reg_FPR(src1),
10414 OpcReg_FPR(src2),
10415 Pop_Mem_FPR(dst) );
10416 ins_pipe( fpu_mem_reg_reg );
10417 %}
10418 //
10419 // This instruction does not round to 24-bits
10420 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10421 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10422 match(Set dst (MulF src1 src2));
10423
10424 format %{ "FLD $src1\n\t"
10425 "FMUL $src2\n\t"
10426 "FSTP_S $dst" %}
10427 opcode(0xD8, 0x1); /* D8 C8+i */
10428 ins_encode( Push_Reg_FPR(src2),
10429 OpcReg_FPR(src1),
10430 Pop_Reg_FPR(dst) );
10431 ins_pipe( fpu_reg_reg_reg );
10432 %}
10433
10434
10435 // Spill to obtain 24-bit precision
10436 // Cisc-alternate to reg-reg multiply
10437 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10438 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10439 match(Set dst (MulF src1 (LoadF src2)));
10440
10441 format %{ "FLD_S $src2\n\t"
10442 "FMUL $src1\n\t"
10443 "FSTP_S $dst" %}
10444 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
10445 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10446 OpcReg_FPR(src1),
10447 Pop_Mem_FPR(dst) );
10448 ins_pipe( fpu_mem_reg_mem );
10449 %}
10450 //
10451 // This instruction does not round to 24-bits
10452 // Cisc-alternate to reg-reg multiply
10453 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10454 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10455 match(Set dst (MulF src1 (LoadF src2)));
10456
10457 format %{ "FMUL $dst,$src1,$src2" %}
10458 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
10459 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10460 OpcReg_FPR(src1),
10461 Pop_Reg_FPR(dst) );
10462 ins_pipe( fpu_reg_reg_mem );
10463 %}
10464
10465 // Spill to obtain 24-bit precision
10466 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10467 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10468 match(Set dst (MulF src1 src2));
10469
10470 format %{ "FMUL $dst,$src1,$src2" %}
10471 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
10472 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10473 set_instruction_start,
10474 OpcP, RMopc_Mem(secondary,src1),
10475 Pop_Mem_FPR(dst) );
10476 ins_pipe( fpu_mem_mem_mem );
10477 %}
10478
10479 // Spill to obtain 24-bit precision
10480 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10481 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10482 match(Set dst (MulF src con));
10483
10484 format %{ "FLD $src\n\t"
10485 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10486 "FSTP_S $dst" %}
10487 ins_encode %{
10488 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10489 __ fmul_s($constantaddress($con));
10490 __ fstp_s(Address(rsp, $dst$$disp));
10491 %}
10492 ins_pipe(fpu_mem_reg_con);
10493 %}
10494 //
10495 // This instruction does not round to 24-bits
10496 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10497 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10498 match(Set dst (MulF src con));
10499
10500 format %{ "FLD $src\n\t"
10501 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10502 "FSTP $dst" %}
10503 ins_encode %{
10504 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10505 __ fmul_s($constantaddress($con));
10506 __ fstp_d($dst$$reg);
10507 %}
10508 ins_pipe(fpu_reg_reg_con);
10509 %}
10510
10511
10512 //
10513 // MACRO1 -- subsume unshared load into mulFPR
10514 // This instruction does not round to 24-bits
10515 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10516 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10517 match(Set dst (MulF (LoadF mem1) src));
10518
10519 format %{ "FLD $mem1 ===MACRO1===\n\t"
10520 "FMUL ST,$src\n\t"
10521 "FSTP $dst" %}
10522 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
10523 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10524 OpcReg_FPR(src),
10525 Pop_Reg_FPR(dst) );
10526 ins_pipe( fpu_reg_reg_mem );
10527 %}
10528 //
10529 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10530 // This instruction does not round to 24-bits
10531 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10532 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10533 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10534 ins_cost(95);
10535
10536 format %{ "FLD $mem1 ===MACRO2===\n\t"
10537 "FMUL ST,$src1 subsume mulFPR left load\n\t"
10538 "FADD ST,$src2\n\t"
10539 "FSTP $dst" %}
10540 opcode(0xD9); /* LoadF D9 /0 */
10541 ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10542 FMul_ST_reg(src1),
10543 FAdd_ST_reg(src2),
10544 Pop_Reg_FPR(dst) );
10545 ins_pipe( fpu_reg_mem_reg_reg );
10546 %}
10547
10548 // MACRO3 -- addFPR a mulFPR
10549 // This instruction does not round to 24-bits. It is a '2-address'
10550 // instruction in that the result goes back to src2. This eliminates
10551 // a move from the macro; possibly the register allocator will have
10552 // to add it back (and maybe not).
10553 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10554 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10555 match(Set src2 (AddF (MulF src0 src1) src2));
10556
10557 format %{ "FLD $src0 ===MACRO3===\n\t"
10558 "FMUL ST,$src1\n\t"
10559 "FADDP $src2,ST" %}
10560 opcode(0xD9); /* LoadF D9 /0 */
10561 ins_encode( Push_Reg_FPR(src0),
10562 FMul_ST_reg(src1),
10563 FAddP_reg_ST(src2) );
10564 ins_pipe( fpu_reg_reg_reg );
10565 %}
10566
10567 // MACRO4 -- divFPR subFPR
10568 // This instruction does not round to 24-bits
10569 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10570 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10571 match(Set dst (DivF (SubF src2 src1) src3));
10572
10573 format %{ "FLD $src2 ===MACRO4===\n\t"
10574 "FSUB ST,$src1\n\t"
10575 "FDIV ST,$src3\n\t"
10576 "FSTP $dst" %}
10577 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10578 ins_encode( Push_Reg_FPR(src2),
10579 subFPR_divFPR_encode(src1,src3),
10580 Pop_Reg_FPR(dst) );
10581 ins_pipe( fpu_reg_reg_reg_reg );
10582 %}
10583
10584 // Spill to obtain 24-bit precision
10585 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10586 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10587 match(Set dst (DivF src1 src2));
10588
10589 format %{ "FDIV $dst,$src1,$src2" %}
10590 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10591 ins_encode( Push_Reg_FPR(src1),
10592 OpcReg_FPR(src2),
10593 Pop_Mem_FPR(dst) );
10594 ins_pipe( fpu_mem_reg_reg );
10595 %}
10596 //
10597 // This instruction does not round to 24-bits
10598 instruct divFPR_reg(regFPR dst, regFPR src) %{
10599 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10600 match(Set dst (DivF dst src));
10601
10602 format %{ "FDIV $dst,$src" %}
10603 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10604 ins_encode( Push_Reg_FPR(src),
10605 OpcP, RegOpc(dst) );
10606 ins_pipe( fpu_reg_reg );
10607 %}
10608
10609
10610 // Spill to obtain 24-bit precision
10611 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10612 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10613 match(Set dst (ModF src1 src2));
10614 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10615
10616 format %{ "FMOD $dst,$src1,$src2" %}
10617 ins_encode( Push_Reg_Mod_DPR(src1, src2),
10618 emitModDPR(),
10619 Push_Result_Mod_DPR(src2),
10620 Pop_Mem_FPR(dst));
10621 ins_pipe( pipe_slow );
10622 %}
10623 //
10624 // This instruction does not round to 24-bits
10625 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10626 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10627 match(Set dst (ModF dst src));
10628 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10629
10630 format %{ "FMOD $dst,$src" %}
10631 ins_encode(Push_Reg_Mod_DPR(dst, src),
10632 emitModDPR(),
10633 Push_Result_Mod_DPR(src),
10634 Pop_Reg_FPR(dst));
10635 ins_pipe( pipe_slow );
10636 %}
10637
10638 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10639 predicate(UseSSE>=1);
10640 match(Set dst (ModF src0 src1));
10641 effect(KILL rax, KILL cr);
10642 format %{ "SUB ESP,4\t # FMOD\n"
10643 "\tMOVSS [ESP+0],$src1\n"
10644 "\tFLD_S [ESP+0]\n"
10645 "\tMOVSS [ESP+0],$src0\n"
10646 "\tFLD_S [ESP+0]\n"
10647 "loop:\tFPREM\n"
10648 "\tFWAIT\n"
10649 "\tFNSTSW AX\n"
10650 "\tSAHF\n"
10651 "\tJP loop\n"
10652 "\tFSTP_S [ESP+0]\n"
10653 "\tMOVSS $dst,[ESP+0]\n"
10654 "\tADD ESP,4\n"
10655 "\tFSTP ST0\t # Restore FPU Stack"
10656 %}
10657 ins_cost(250);
10658 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10659 ins_pipe( pipe_slow );
10660 %}
10661
10662
10663 //----------Arithmetic Conversion Instructions---------------------------------
10664 // The conversions operations are all Alpha sorted. Please keep it that way!
10665
10666 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10667 predicate(UseSSE==0);
10668 match(Set dst (RoundFloat src));
10669 ins_cost(125);
10670 format %{ "FST_S $dst,$src\t# F-round" %}
10671 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10672 ins_pipe( fpu_mem_reg );
10673 %}
10674
10675 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10676 predicate(UseSSE<=1);
10677 match(Set dst (RoundDouble src));
10678 ins_cost(125);
10679 format %{ "FST_D $dst,$src\t# D-round" %}
10680 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10681 ins_pipe( fpu_mem_reg );
10682 %}
10683
10684 // Force rounding to 24-bit precision and 6-bit exponent
10685 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10686 predicate(UseSSE==0);
10687 match(Set dst (ConvD2F src));
10688 format %{ "FST_S $dst,$src\t# F-round" %}
10689 expand %{
10690 roundFloat_mem_reg(dst,src);
10691 %}
10692 %}
10693
10694 // Force rounding to 24-bit precision and 6-bit exponent
10695 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10696 predicate(UseSSE==1);
10697 match(Set dst (ConvD2F src));
10698 effect( KILL cr );
10699 format %{ "SUB ESP,4\n\t"
10700 "FST_S [ESP],$src\t# F-round\n\t"
10701 "MOVSS $dst,[ESP]\n\t"
10702 "ADD ESP,4" %}
10703 ins_encode %{
10704 __ subptr(rsp, 4);
10705 if ($src$$reg != FPR1L_enc) {
10706 __ fld_s($src$$reg-1);
10707 __ fstp_s(Address(rsp, 0));
10708 } else {
10709 __ fst_s(Address(rsp, 0));
10710 }
10711 __ movflt($dst$$XMMRegister, Address(rsp, 0));
10712 __ addptr(rsp, 4);
10713 %}
10714 ins_pipe( pipe_slow );
10715 %}
10716
10717 // Force rounding double precision to single precision
10718 instruct convD2F_reg(regF dst, regD src) %{
10719 predicate(UseSSE>=2);
10720 match(Set dst (ConvD2F src));
10721 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10722 ins_encode %{
10723 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10724 %}
10725 ins_pipe( pipe_slow );
10726 %}
10727
10728 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10729 predicate(UseSSE==0);
10730 match(Set dst (ConvF2D src));
10731 format %{ "FST_S $dst,$src\t# D-round" %}
10732 ins_encode( Pop_Reg_Reg_DPR(dst, src));
10733 ins_pipe( fpu_reg_reg );
10734 %}
10735
10736 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10737 predicate(UseSSE==1);
10738 match(Set dst (ConvF2D src));
10739 format %{ "FST_D $dst,$src\t# D-round" %}
10740 expand %{
10741 roundDouble_mem_reg(dst,src);
10742 %}
10743 %}
10744
10745 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10746 predicate(UseSSE==1);
10747 match(Set dst (ConvF2D src));
10748 effect( KILL cr );
10749 format %{ "SUB ESP,4\n\t"
10750 "MOVSS [ESP] $src\n\t"
10751 "FLD_S [ESP]\n\t"
10752 "ADD ESP,4\n\t"
10753 "FSTP $dst\t# D-round" %}
10754 ins_encode %{
10755 __ subptr(rsp, 4);
10756 __ movflt(Address(rsp, 0), $src$$XMMRegister);
10757 __ fld_s(Address(rsp, 0));
10758 __ addptr(rsp, 4);
10759 __ fstp_d($dst$$reg);
10760 %}
10761 ins_pipe( pipe_slow );
10762 %}
10763
10764 instruct convF2D_reg(regD dst, regF src) %{
10765 predicate(UseSSE>=2);
10766 match(Set dst (ConvF2D src));
10767 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10768 ins_encode %{
10769 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10770 %}
10771 ins_pipe( pipe_slow );
10772 %}
10773
10774 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10775 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10776 predicate(UseSSE<=1);
10777 match(Set dst (ConvD2I src));
10778 effect( KILL tmp, KILL cr );
10779 format %{ "FLD $src\t# Convert double to int \n\t"
10780 "FLDCW trunc mode\n\t"
10781 "SUB ESP,4\n\t"
10782 "FISTp [ESP + #0]\n\t"
10783 "FLDCW std/24-bit mode\n\t"
10784 "POP EAX\n\t"
10785 "CMP EAX,0x80000000\n\t"
10786 "JNE,s fast\n\t"
10787 "FLD_D $src\n\t"
10788 "CALL d2i_wrapper\n"
10789 "fast:" %}
10790 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10791 ins_pipe( pipe_slow );
10792 %}
10793
10794 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10795 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10796 predicate(UseSSE>=2);
10797 match(Set dst (ConvD2I src));
10798 effect( KILL tmp, KILL cr );
10799 format %{ "CVTTSD2SI $dst, $src\n\t"
10800 "CMP $dst,0x80000000\n\t"
10801 "JNE,s fast\n\t"
10802 "SUB ESP, 8\n\t"
10803 "MOVSD [ESP], $src\n\t"
10804 "FLD_D [ESP]\n\t"
10805 "ADD ESP, 8\n\t"
10806 "CALL d2i_wrapper\n"
10807 "fast:" %}
10808 ins_encode %{
10809 Label fast;
10810 __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10811 __ cmpl($dst$$Register, 0x80000000);
10812 __ jccb(Assembler::notEqual, fast);
10813 __ subptr(rsp, 8);
10814 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10815 __ fld_d(Address(rsp, 0));
10816 __ addptr(rsp, 8);
10817 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10818 __ bind(fast);
10819 %}
10820 ins_pipe( pipe_slow );
10821 %}
10822
10823 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10824 predicate(UseSSE<=1);
10825 match(Set dst (ConvD2L src));
10826 effect( KILL cr );
10827 format %{ "FLD $src\t# Convert double to long\n\t"
10828 "FLDCW trunc mode\n\t"
10829 "SUB ESP,8\n\t"
10830 "FISTp [ESP + #0]\n\t"
10831 "FLDCW std/24-bit mode\n\t"
10832 "POP EAX\n\t"
10833 "POP EDX\n\t"
10834 "CMP EDX,0x80000000\n\t"
10835 "JNE,s fast\n\t"
10836 "TEST EAX,EAX\n\t"
10837 "JNE,s fast\n\t"
10838 "FLD $src\n\t"
10839 "CALL d2l_wrapper\n"
10840 "fast:" %}
10841 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) );
10842 ins_pipe( pipe_slow );
10843 %}
10844
10845 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10846 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10847 predicate (UseSSE>=2);
10848 match(Set dst (ConvD2L src));
10849 effect( KILL cr );
10850 format %{ "SUB ESP,8\t# Convert double to long\n\t"
10851 "MOVSD [ESP],$src\n\t"
10852 "FLD_D [ESP]\n\t"
10853 "FLDCW trunc mode\n\t"
10854 "FISTp [ESP + #0]\n\t"
10855 "FLDCW std/24-bit mode\n\t"
10856 "POP EAX\n\t"
10857 "POP EDX\n\t"
10858 "CMP EDX,0x80000000\n\t"
10859 "JNE,s fast\n\t"
10860 "TEST EAX,EAX\n\t"
10861 "JNE,s fast\n\t"
10862 "SUB ESP,8\n\t"
10863 "MOVSD [ESP],$src\n\t"
10864 "FLD_D [ESP]\n\t"
10865 "ADD ESP,8\n\t"
10866 "CALL d2l_wrapper\n"
10867 "fast:" %}
10868 ins_encode %{
10869 Label fast;
10870 __ subptr(rsp, 8);
10871 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10872 __ fld_d(Address(rsp, 0));
10873 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10874 __ fistp_d(Address(rsp, 0));
10875 // Restore the rounding mode, mask the exception
10876 if (Compile::current()->in_24_bit_fp_mode()) {
10877 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10878 } else {
10879 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10880 }
10881 // Load the converted long, adjust CPU stack
10882 __ pop(rax);
10883 __ pop(rdx);
10884 __ cmpl(rdx, 0x80000000);
10885 __ jccb(Assembler::notEqual, fast);
10886 __ testl(rax, rax);
10887 __ jccb(Assembler::notEqual, fast);
10888 __ subptr(rsp, 8);
10889 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10890 __ fld_d(Address(rsp, 0));
10891 __ addptr(rsp, 8);
10892 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10893 __ bind(fast);
10894 %}
10895 ins_pipe( pipe_slow );
10896 %}
10897
10898 // Convert a double to an int. Java semantics require we do complex
10899 // manglations in the corner cases. So we set the rounding mode to
10900 // 'zero', store the darned double down as an int, and reset the
10901 // rounding mode to 'nearest'. The hardware stores a flag value down
10902 // if we would overflow or converted a NAN; we check for this and
10903 // and go the slow path if needed.
10904 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10905 predicate(UseSSE==0);
10906 match(Set dst (ConvF2I src));
10907 effect( KILL tmp, KILL cr );
10908 format %{ "FLD $src\t# Convert float to int \n\t"
10909 "FLDCW trunc mode\n\t"
10910 "SUB ESP,4\n\t"
10911 "FISTp [ESP + #0]\n\t"
10912 "FLDCW std/24-bit mode\n\t"
10913 "POP EAX\n\t"
10914 "CMP EAX,0x80000000\n\t"
10915 "JNE,s fast\n\t"
10916 "FLD $src\n\t"
10917 "CALL d2i_wrapper\n"
10918 "fast:" %}
10919 // DPR2I_encoding works for FPR2I
10920 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10921 ins_pipe( pipe_slow );
10922 %}
10923
10924 // Convert a float in xmm to an int reg.
10925 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10926 predicate(UseSSE>=1);
10927 match(Set dst (ConvF2I src));
10928 effect( KILL tmp, KILL cr );
10929 format %{ "CVTTSS2SI $dst, $src\n\t"
10930 "CMP $dst,0x80000000\n\t"
10931 "JNE,s fast\n\t"
10932 "SUB ESP, 4\n\t"
10933 "MOVSS [ESP], $src\n\t"
10934 "FLD [ESP]\n\t"
10935 "ADD ESP, 4\n\t"
10936 "CALL d2i_wrapper\n"
10937 "fast:" %}
10938 ins_encode %{
10939 Label fast;
10940 __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10941 __ cmpl($dst$$Register, 0x80000000);
10942 __ jccb(Assembler::notEqual, fast);
10943 __ subptr(rsp, 4);
10944 __ movflt(Address(rsp, 0), $src$$XMMRegister);
10945 __ fld_s(Address(rsp, 0));
10946 __ addptr(rsp, 4);
10947 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10948 __ bind(fast);
10949 %}
10950 ins_pipe( pipe_slow );
10951 %}
10952
10953 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10954 predicate(UseSSE==0);
10955 match(Set dst (ConvF2L src));
10956 effect( KILL cr );
10957 format %{ "FLD $src\t# Convert float to long\n\t"
10958 "FLDCW trunc mode\n\t"
10959 "SUB ESP,8\n\t"
10960 "FISTp [ESP + #0]\n\t"
10961 "FLDCW std/24-bit mode\n\t"
10962 "POP EAX\n\t"
10963 "POP EDX\n\t"
10964 "CMP EDX,0x80000000\n\t"
10965 "JNE,s fast\n\t"
10966 "TEST EAX,EAX\n\t"
10967 "JNE,s fast\n\t"
10968 "FLD $src\n\t"
10969 "CALL d2l_wrapper\n"
10970 "fast:" %}
10971 // DPR2L_encoding works for FPR2L
10972 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10973 ins_pipe( pipe_slow );
10974 %}
10975
10976 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10977 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10978 predicate (UseSSE>=1);
10979 match(Set dst (ConvF2L src));
10980 effect( KILL cr );
10981 format %{ "SUB ESP,8\t# Convert float to long\n\t"
10982 "MOVSS [ESP],$src\n\t"
10983 "FLD_S [ESP]\n\t"
10984 "FLDCW trunc mode\n\t"
10985 "FISTp [ESP + #0]\n\t"
10986 "FLDCW std/24-bit mode\n\t"
10987 "POP EAX\n\t"
10988 "POP EDX\n\t"
10989 "CMP EDX,0x80000000\n\t"
10990 "JNE,s fast\n\t"
10991 "TEST EAX,EAX\n\t"
10992 "JNE,s fast\n\t"
10993 "SUB ESP,4\t# Convert float to long\n\t"
10994 "MOVSS [ESP],$src\n\t"
10995 "FLD_S [ESP]\n\t"
10996 "ADD ESP,4\n\t"
10997 "CALL d2l_wrapper\n"
10998 "fast:" %}
10999 ins_encode %{
11000 Label fast;
11001 __ subptr(rsp, 8);
11002 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11003 __ fld_s(Address(rsp, 0));
11004 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11005 __ fistp_d(Address(rsp, 0));
11006 // Restore the rounding mode, mask the exception
11007 if (Compile::current()->in_24_bit_fp_mode()) {
11008 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11009 } else {
11010 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11011 }
11012 // Load the converted long, adjust CPU stack
11013 __ pop(rax);
11014 __ pop(rdx);
11015 __ cmpl(rdx, 0x80000000);
11016 __ jccb(Assembler::notEqual, fast);
11017 __ testl(rax, rax);
11018 __ jccb(Assembler::notEqual, fast);
11019 __ subptr(rsp, 4);
11020 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11021 __ fld_s(Address(rsp, 0));
11022 __ addptr(rsp, 4);
11023 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11024 __ bind(fast);
11025 %}
11026 ins_pipe( pipe_slow );
11027 %}
11028
11029 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11030 predicate( UseSSE<=1 );
11031 match(Set dst (ConvI2D src));
11032 format %{ "FILD $src\n\t"
11033 "FSTP $dst" %}
11034 opcode(0xDB, 0x0); /* DB /0 */
11035 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11036 ins_pipe( fpu_reg_mem );
11037 %}
11038
11039 instruct convI2D_reg(regD dst, rRegI src) %{
11040 predicate( UseSSE>=2 && !UseXmmI2D );
11041 match(Set dst (ConvI2D src));
11042 format %{ "CVTSI2SD $dst,$src" %}
11043 ins_encode %{
11044 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11045 %}
11046 ins_pipe( pipe_slow );
11047 %}
11048
11049 instruct convI2D_mem(regD dst, memory mem) %{
11050 predicate( UseSSE>=2 );
11051 match(Set dst (ConvI2D (LoadI mem)));
11052 format %{ "CVTSI2SD $dst,$mem" %}
11053 ins_encode %{
11054 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11055 %}
11056 ins_pipe( pipe_slow );
11057 %}
11058
11059 instruct convXI2D_reg(regD dst, rRegI src)
11060 %{
11061 predicate( UseSSE>=2 && UseXmmI2D );
11062 match(Set dst (ConvI2D src));
11063
11064 format %{ "MOVD $dst,$src\n\t"
11065 "CVTDQ2PD $dst,$dst\t# i2d" %}
11066 ins_encode %{
11067 __ movdl($dst$$XMMRegister, $src$$Register);
11068 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11069 %}
11070 ins_pipe(pipe_slow); // XXX
11071 %}
11072
11073 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11074 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11075 match(Set dst (ConvI2D (LoadI mem)));
11076 format %{ "FILD $mem\n\t"
11077 "FSTP $dst" %}
11078 opcode(0xDB); /* DB /0 */
11079 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11080 Pop_Reg_DPR(dst));
11081 ins_pipe( fpu_reg_mem );
11082 %}
11083
11084 // Convert a byte to a float; no rounding step needed.
11085 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11086 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11087 match(Set dst (ConvI2F src));
11088 format %{ "FILD $src\n\t"
11089 "FSTP $dst" %}
11090
11091 opcode(0xDB, 0x0); /* DB /0 */
11092 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11093 ins_pipe( fpu_reg_mem );
11094 %}
11095
11096 // In 24-bit mode, force exponent rounding by storing back out
11097 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11098 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11099 match(Set dst (ConvI2F src));
11100 ins_cost(200);
11101 format %{ "FILD $src\n\t"
11102 "FSTP_S $dst" %}
11103 opcode(0xDB, 0x0); /* DB /0 */
11104 ins_encode( Push_Mem_I(src),
11105 Pop_Mem_FPR(dst));
11106 ins_pipe( fpu_mem_mem );
11107 %}
11108
11109 // In 24-bit mode, force exponent rounding by storing back out
11110 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11111 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11112 match(Set dst (ConvI2F (LoadI mem)));
11113 ins_cost(200);
11114 format %{ "FILD $mem\n\t"
11115 "FSTP_S $dst" %}
11116 opcode(0xDB); /* DB /0 */
11117 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11118 Pop_Mem_FPR(dst));
11119 ins_pipe( fpu_mem_mem );
11120 %}
11121
11122 // This instruction does not round to 24-bits
11123 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11124 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11125 match(Set dst (ConvI2F src));
11126 format %{ "FILD $src\n\t"
11127 "FSTP $dst" %}
11128 opcode(0xDB, 0x0); /* DB /0 */
11129 ins_encode( Push_Mem_I(src),
11130 Pop_Reg_FPR(dst));
11131 ins_pipe( fpu_reg_mem );
11132 %}
11133
11134 // This instruction does not round to 24-bits
11135 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11136 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11137 match(Set dst (ConvI2F (LoadI mem)));
11138 format %{ "FILD $mem\n\t"
11139 "FSTP $dst" %}
11140 opcode(0xDB); /* DB /0 */
11141 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11142 Pop_Reg_FPR(dst));
11143 ins_pipe( fpu_reg_mem );
11144 %}
11145
11146 // Convert an int to a float in xmm; no rounding step needed.
11147 instruct convI2F_reg(regF dst, rRegI src) %{
11148 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11149 match(Set dst (ConvI2F src));
11150 format %{ "CVTSI2SS $dst, $src" %}
11151 ins_encode %{
11152 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11153 %}
11154 ins_pipe( pipe_slow );
11155 %}
11156
11157 instruct convXI2F_reg(regF dst, rRegI src)
11158 %{
11159 predicate( UseSSE>=2 && UseXmmI2F );
11160 match(Set dst (ConvI2F src));
11161
11162 format %{ "MOVD $dst,$src\n\t"
11163 "CVTDQ2PS $dst,$dst\t# i2f" %}
11164 ins_encode %{
11165 __ movdl($dst$$XMMRegister, $src$$Register);
11166 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11167 %}
11168 ins_pipe(pipe_slow); // XXX
11169 %}
11170
11171 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11172 match(Set dst (ConvI2L src));
11173 effect(KILL cr);
11174 ins_cost(375);
11175 format %{ "MOV $dst.lo,$src\n\t"
11176 "MOV $dst.hi,$src\n\t"
11177 "SAR $dst.hi,31" %}
11178 ins_encode(convert_int_long(dst,src));
11179 ins_pipe( ialu_reg_reg_long );
11180 %}
11181
11182 // Zero-extend convert int to long
11183 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11184 match(Set dst (AndL (ConvI2L src) mask) );
11185 effect( KILL flags );
11186 ins_cost(250);
11187 format %{ "MOV $dst.lo,$src\n\t"
11188 "XOR $dst.hi,$dst.hi" %}
11189 opcode(0x33); // XOR
11190 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11191 ins_pipe( ialu_reg_reg_long );
11192 %}
11193
11194 // Zero-extend long
11195 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11196 match(Set dst (AndL src mask) );
11197 effect( KILL flags );
11198 ins_cost(250);
11199 format %{ "MOV $dst.lo,$src.lo\n\t"
11200 "XOR $dst.hi,$dst.hi\n\t" %}
11201 opcode(0x33); // XOR
11202 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11203 ins_pipe( ialu_reg_reg_long );
11204 %}
11205
11206 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11207 predicate (UseSSE<=1);
11208 match(Set dst (ConvL2D src));
11209 effect( KILL cr );
11210 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11211 "PUSH $src.lo\n\t"
11212 "FILD ST,[ESP + #0]\n\t"
11213 "ADD ESP,8\n\t"
11214 "FSTP_D $dst\t# D-round" %}
11215 opcode(0xDF, 0x5); /* DF /5 */
11216 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11217 ins_pipe( pipe_slow );
11218 %}
11219
11220 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11221 predicate (UseSSE>=2);
11222 match(Set dst (ConvL2D src));
11223 effect( KILL cr );
11224 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11225 "PUSH $src.lo\n\t"
11226 "FILD_D [ESP]\n\t"
11227 "FSTP_D [ESP]\n\t"
11228 "MOVSD $dst,[ESP]\n\t"
11229 "ADD ESP,8" %}
11230 opcode(0xDF, 0x5); /* DF /5 */
11231 ins_encode(convert_long_double2(src), Push_ResultD(dst));
11232 ins_pipe( pipe_slow );
11233 %}
11234
11235 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11236 predicate (UseSSE>=1);
11237 match(Set dst (ConvL2F src));
11238 effect( KILL cr );
11239 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11240 "PUSH $src.lo\n\t"
11241 "FILD_D [ESP]\n\t"
11242 "FSTP_S [ESP]\n\t"
11243 "MOVSS $dst,[ESP]\n\t"
11244 "ADD ESP,8" %}
11245 opcode(0xDF, 0x5); /* DF /5 */
11246 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11247 ins_pipe( pipe_slow );
11248 %}
11249
11250 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11251 match(Set dst (ConvL2F src));
11252 effect( KILL cr );
11253 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11254 "PUSH $src.lo\n\t"
11255 "FILD ST,[ESP + #0]\n\t"
11256 "ADD ESP,8\n\t"
11257 "FSTP_S $dst\t# F-round" %}
11258 opcode(0xDF, 0x5); /* DF /5 */
11259 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11260 ins_pipe( pipe_slow );
11261 %}
11262
11263 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11264 match(Set dst (ConvL2I src));
11265 effect( DEF dst, USE src );
11266 format %{ "MOV $dst,$src.lo" %}
11267 ins_encode(enc_CopyL_Lo(dst,src));
11268 ins_pipe( ialu_reg_reg );
11269 %}
11270
11271 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11272 match(Set dst (MoveF2I src));
11273 effect( DEF dst, USE src );
11274 ins_cost(100);
11275 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
11276 ins_encode %{
11277 __ movl($dst$$Register, Address(rsp, $src$$disp));
11278 %}
11279 ins_pipe( ialu_reg_mem );
11280 %}
11281
11282 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11283 predicate(UseSSE==0);
11284 match(Set dst (MoveF2I src));
11285 effect( DEF dst, USE src );
11286
11287 ins_cost(125);
11288 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
11289 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11290 ins_pipe( fpu_mem_reg );
11291 %}
11292
11293 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11294 predicate(UseSSE>=1);
11295 match(Set dst (MoveF2I src));
11296 effect( DEF dst, USE src );
11297
11298 ins_cost(95);
11299 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
11300 ins_encode %{
11301 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11302 %}
11303 ins_pipe( pipe_slow );
11304 %}
11305
11306 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11307 predicate(UseSSE>=2);
11308 match(Set dst (MoveF2I src));
11309 effect( DEF dst, USE src );
11310 ins_cost(85);
11311 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
11312 ins_encode %{
11313 __ movdl($dst$$Register, $src$$XMMRegister);
11314 %}
11315 ins_pipe( pipe_slow );
11316 %}
11317
11318 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11319 match(Set dst (MoveI2F src));
11320 effect( DEF dst, USE src );
11321
11322 ins_cost(100);
11323 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
11324 ins_encode %{
11325 __ movl(Address(rsp, $dst$$disp), $src$$Register);
11326 %}
11327 ins_pipe( ialu_mem_reg );
11328 %}
11329
11330
11331 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11332 predicate(UseSSE==0);
11333 match(Set dst (MoveI2F src));
11334 effect(DEF dst, USE src);
11335
11336 ins_cost(125);
11337 format %{ "FLD_S $src\n\t"
11338 "FSTP $dst\t# MoveI2F_stack_reg" %}
11339 opcode(0xD9); /* D9 /0, FLD m32real */
11340 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11341 Pop_Reg_FPR(dst) );
11342 ins_pipe( fpu_reg_mem );
11343 %}
11344
11345 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11346 predicate(UseSSE>=1);
11347 match(Set dst (MoveI2F src));
11348 effect( DEF dst, USE src );
11349
11350 ins_cost(95);
11351 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
11352 ins_encode %{
11353 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11354 %}
11355 ins_pipe( pipe_slow );
11356 %}
11357
11358 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11359 predicate(UseSSE>=2);
11360 match(Set dst (MoveI2F src));
11361 effect( DEF dst, USE src );
11362
11363 ins_cost(85);
11364 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
11365 ins_encode %{
11366 __ movdl($dst$$XMMRegister, $src$$Register);
11367 %}
11368 ins_pipe( pipe_slow );
11369 %}
11370
11371 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11372 match(Set dst (MoveD2L src));
11373 effect(DEF dst, USE src);
11374
11375 ins_cost(250);
11376 format %{ "MOV $dst.lo,$src\n\t"
11377 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11378 opcode(0x8B, 0x8B);
11379 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11380 ins_pipe( ialu_mem_long_reg );
11381 %}
11382
11383 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11384 predicate(UseSSE<=1);
11385 match(Set dst (MoveD2L src));
11386 effect(DEF dst, USE src);
11387
11388 ins_cost(125);
11389 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
11390 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11391 ins_pipe( fpu_mem_reg );
11392 %}
11393
11394 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11395 predicate(UseSSE>=2);
11396 match(Set dst (MoveD2L src));
11397 effect(DEF dst, USE src);
11398 ins_cost(95);
11399 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
11400 ins_encode %{
11401 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11402 %}
11403 ins_pipe( pipe_slow );
11404 %}
11405
11406 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11407 predicate(UseSSE>=2);
11408 match(Set dst (MoveD2L src));
11409 effect(DEF dst, USE src, TEMP tmp);
11410 ins_cost(85);
11411 format %{ "MOVD $dst.lo,$src\n\t"
11412 "PSHUFLW $tmp,$src,0x4E\n\t"
11413 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11414 ins_encode %{
11415 __ movdl($dst$$Register, $src$$XMMRegister);
11416 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11417 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11418 %}
11419 ins_pipe( pipe_slow );
11420 %}
11421
11422 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11423 match(Set dst (MoveL2D src));
11424 effect(DEF dst, USE src);
11425
11426 ins_cost(200);
11427 format %{ "MOV $dst,$src.lo\n\t"
11428 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11429 opcode(0x89, 0x89);
11430 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11431 ins_pipe( ialu_mem_long_reg );
11432 %}
11433
11434
11435 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11436 predicate(UseSSE<=1);
11437 match(Set dst (MoveL2D src));
11438 effect(DEF dst, USE src);
11439 ins_cost(125);
11440
11441 format %{ "FLD_D $src\n\t"
11442 "FSTP $dst\t# MoveL2D_stack_reg" %}
11443 opcode(0xDD); /* DD /0, FLD m64real */
11444 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11445 Pop_Reg_DPR(dst) );
11446 ins_pipe( fpu_reg_mem );
11447 %}
11448
11449
11450 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11451 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11452 match(Set dst (MoveL2D src));
11453 effect(DEF dst, USE src);
11454
11455 ins_cost(95);
11456 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11457 ins_encode %{
11458 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11459 %}
11460 ins_pipe( pipe_slow );
11461 %}
11462
11463 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11464 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11465 match(Set dst (MoveL2D src));
11466 effect(DEF dst, USE src);
11467
11468 ins_cost(95);
11469 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11470 ins_encode %{
11471 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11472 %}
11473 ins_pipe( pipe_slow );
11474 %}
11475
11476 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11477 predicate(UseSSE>=2);
11478 match(Set dst (MoveL2D src));
11479 effect(TEMP dst, USE src, TEMP tmp);
11480 ins_cost(85);
11481 format %{ "MOVD $dst,$src.lo\n\t"
11482 "MOVD $tmp,$src.hi\n\t"
11483 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11484 ins_encode %{
11485 __ movdl($dst$$XMMRegister, $src$$Register);
11486 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11487 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11488 %}
11489 ins_pipe( pipe_slow );
11490 %}
11491
11492
11493 // =======================================================================
11494 // fast clearing of an array
11495 // Small ClearArray non-AVX512.
11496 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11497 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11498 match(Set dummy (ClearArray cnt base));
11499 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11500
11501 format %{ $$template
11502 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11503 $$emit$$"CMP InitArrayShortSize,rcx\n\t"
11504 $$emit$$"JG LARGE\n\t"
11505 $$emit$$"SHL ECX, 1\n\t"
11506 $$emit$$"DEC ECX\n\t"
11507 $$emit$$"JS DONE\t# Zero length\n\t"
11508 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
11509 $$emit$$"DEC ECX\n\t"
11510 $$emit$$"JGE LOOP\n\t"
11511 $$emit$$"JMP DONE\n\t"
11512 $$emit$$"# LARGE:\n\t"
11513 if (UseFastStosb) {
11514 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11515 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11516 } else if (UseXMMForObjInit) {
11517 $$emit$$"MOV RDI,RAX\n\t"
11518 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11519 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11520 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11521 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11522 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11523 $$emit$$"ADD 0x40,RAX\n\t"
11524 $$emit$$"# L_zero_64_bytes:\n\t"
11525 $$emit$$"SUB 0x8,RCX\n\t"
11526 $$emit$$"JGE L_loop\n\t"
11527 $$emit$$"ADD 0x4,RCX\n\t"
11528 $$emit$$"JL L_tail\n\t"
11529 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11530 $$emit$$"ADD 0x20,RAX\n\t"
11531 $$emit$$"SUB 0x4,RCX\n\t"
11532 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11533 $$emit$$"ADD 0x4,RCX\n\t"
11534 $$emit$$"JLE L_end\n\t"
11535 $$emit$$"DEC RCX\n\t"
11536 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11537 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11538 $$emit$$"ADD 0x8,RAX\n\t"
11539 $$emit$$"DEC RCX\n\t"
11540 $$emit$$"JGE L_sloop\n\t"
11541 $$emit$$"# L_end:\n\t"
11542 } else {
11543 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11544 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11545 }
11546 $$emit$$"# DONE"
11547 %}
11548 ins_encode %{
11549 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11550 $tmp$$XMMRegister, false, knoreg);
11551 %}
11552 ins_pipe( pipe_slow );
11553 %}
11554
11555 // Small ClearArray AVX512 non-constant length.
11556 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11557 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11558 match(Set dummy (ClearArray cnt base));
11559 ins_cost(125);
11560 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11561
11562 format %{ $$template
11563 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11564 $$emit$$"CMP InitArrayShortSize,rcx\n\t"
11565 $$emit$$"JG LARGE\n\t"
11566 $$emit$$"SHL ECX, 1\n\t"
11567 $$emit$$"DEC ECX\n\t"
11568 $$emit$$"JS DONE\t# Zero length\n\t"
11569 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
11570 $$emit$$"DEC ECX\n\t"
11571 $$emit$$"JGE LOOP\n\t"
11572 $$emit$$"JMP DONE\n\t"
11573 $$emit$$"# LARGE:\n\t"
11574 if (UseFastStosb) {
11575 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11576 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11577 } else if (UseXMMForObjInit) {
11578 $$emit$$"MOV RDI,RAX\n\t"
11579 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11580 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11581 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11582 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11583 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11584 $$emit$$"ADD 0x40,RAX\n\t"
11585 $$emit$$"# L_zero_64_bytes:\n\t"
11586 $$emit$$"SUB 0x8,RCX\n\t"
11587 $$emit$$"JGE L_loop\n\t"
11588 $$emit$$"ADD 0x4,RCX\n\t"
11589 $$emit$$"JL L_tail\n\t"
11590 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11591 $$emit$$"ADD 0x20,RAX\n\t"
11592 $$emit$$"SUB 0x4,RCX\n\t"
11593 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11594 $$emit$$"ADD 0x4,RCX\n\t"
11595 $$emit$$"JLE L_end\n\t"
11596 $$emit$$"DEC RCX\n\t"
11597 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11598 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11599 $$emit$$"ADD 0x8,RAX\n\t"
11600 $$emit$$"DEC RCX\n\t"
11601 $$emit$$"JGE L_sloop\n\t"
11602 $$emit$$"# L_end:\n\t"
11603 } else {
11604 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11605 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11606 }
11607 $$emit$$"# DONE"
11608 %}
11609 ins_encode %{
11610 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11611 $tmp$$XMMRegister, false, $ktmp$$KRegister);
11612 %}
11613 ins_pipe( pipe_slow );
11614 %}
11615
11616 // Large ClearArray non-AVX512.
11617 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11618 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11619 match(Set dummy (ClearArray cnt base));
11620 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11621 format %{ $$template
11622 if (UseFastStosb) {
11623 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11624 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11625 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11626 } else if (UseXMMForObjInit) {
11627 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
11628 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11629 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11630 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11631 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11632 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11633 $$emit$$"ADD 0x40,RAX\n\t"
11634 $$emit$$"# L_zero_64_bytes:\n\t"
11635 $$emit$$"SUB 0x8,RCX\n\t"
11636 $$emit$$"JGE L_loop\n\t"
11637 $$emit$$"ADD 0x4,RCX\n\t"
11638 $$emit$$"JL L_tail\n\t"
11639 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11640 $$emit$$"ADD 0x20,RAX\n\t"
11641 $$emit$$"SUB 0x4,RCX\n\t"
11642 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11643 $$emit$$"ADD 0x4,RCX\n\t"
11644 $$emit$$"JLE L_end\n\t"
11645 $$emit$$"DEC RCX\n\t"
11646 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11647 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11648 $$emit$$"ADD 0x8,RAX\n\t"
11649 $$emit$$"DEC RCX\n\t"
11650 $$emit$$"JGE L_sloop\n\t"
11651 $$emit$$"# L_end:\n\t"
11652 } else {
11653 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11654 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11655 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11656 }
11657 $$emit$$"# DONE"
11658 %}
11659 ins_encode %{
11660 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11661 $tmp$$XMMRegister, true, knoreg);
11662 %}
11663 ins_pipe( pipe_slow );
11664 %}
11665
11666 // Large ClearArray AVX512.
11667 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11668 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11669 match(Set dummy (ClearArray cnt base));
11670 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11671 format %{ $$template
11672 if (UseFastStosb) {
11673 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11674 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11675 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11676 } else if (UseXMMForObjInit) {
11677 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
11678 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11679 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11680 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11681 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11682 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11683 $$emit$$"ADD 0x40,RAX\n\t"
11684 $$emit$$"# L_zero_64_bytes:\n\t"
11685 $$emit$$"SUB 0x8,RCX\n\t"
11686 $$emit$$"JGE L_loop\n\t"
11687 $$emit$$"ADD 0x4,RCX\n\t"
11688 $$emit$$"JL L_tail\n\t"
11689 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11690 $$emit$$"ADD 0x20,RAX\n\t"
11691 $$emit$$"SUB 0x4,RCX\n\t"
11692 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11693 $$emit$$"ADD 0x4,RCX\n\t"
11694 $$emit$$"JLE L_end\n\t"
11695 $$emit$$"DEC RCX\n\t"
11696 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11697 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11698 $$emit$$"ADD 0x8,RAX\n\t"
11699 $$emit$$"DEC RCX\n\t"
11700 $$emit$$"JGE L_sloop\n\t"
11701 $$emit$$"# L_end:\n\t"
11702 } else {
11703 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11704 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11705 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11706 }
11707 $$emit$$"# DONE"
11708 %}
11709 ins_encode %{
11710 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11711 $tmp$$XMMRegister, true, $ktmp$$KRegister);
11712 %}
11713 ins_pipe( pipe_slow );
11714 %}
11715
11716 // Small ClearArray AVX512 constant length.
11717 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11718 %{
11719 predicate(!((ClearArrayNode*)n)->is_large() &&
11720 ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11721 match(Set dummy (ClearArray cnt base));
11722 ins_cost(100);
11723 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11724 format %{ "clear_mem_imm $base , $cnt \n\t" %}
11725 ins_encode %{
11726 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11727 %}
11728 ins_pipe(pipe_slow);
11729 %}
11730
11731 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11732 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11733 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11734 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11735 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11736
11737 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11738 ins_encode %{
11739 __ string_compare($str1$$Register, $str2$$Register,
11740 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11741 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11742 %}
11743 ins_pipe( pipe_slow );
11744 %}
11745
11746 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11747 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11748 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11749 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11750 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11751
11752 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11753 ins_encode %{
11754 __ string_compare($str1$$Register, $str2$$Register,
11755 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11756 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11757 %}
11758 ins_pipe( pipe_slow );
11759 %}
11760
11761 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11762 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11763 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11764 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11765 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11766
11767 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11768 ins_encode %{
11769 __ string_compare($str1$$Register, $str2$$Register,
11770 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11771 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11772 %}
11773 ins_pipe( pipe_slow );
11774 %}
11775
11776 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11777 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11778 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11779 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11780 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11781
11782 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11783 ins_encode %{
11784 __ string_compare($str1$$Register, $str2$$Register,
11785 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11786 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11787 %}
11788 ins_pipe( pipe_slow );
11789 %}
11790
11791 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11792 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11793 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11794 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11795 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11796
11797 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11798 ins_encode %{
11799 __ string_compare($str1$$Register, $str2$$Register,
11800 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11801 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11802 %}
11803 ins_pipe( pipe_slow );
11804 %}
11805
11806 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11807 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11808 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11809 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11810 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11811
11812 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11813 ins_encode %{
11814 __ string_compare($str1$$Register, $str2$$Register,
11815 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11816 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11817 %}
11818 ins_pipe( pipe_slow );
11819 %}
11820
11821 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11822 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11823 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11824 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11825 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11826
11827 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11828 ins_encode %{
11829 __ string_compare($str2$$Register, $str1$$Register,
11830 $cnt2$$Register, $cnt1$$Register, $result$$Register,
11831 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11832 %}
11833 ins_pipe( pipe_slow );
11834 %}
11835
11836 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11837 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11838 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11839 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11840 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11841
11842 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11843 ins_encode %{
11844 __ string_compare($str2$$Register, $str1$$Register,
11845 $cnt2$$Register, $cnt1$$Register, $result$$Register,
11846 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11847 %}
11848 ins_pipe( pipe_slow );
11849 %}
11850
11851 // fast string equals
11852 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11853 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11854 predicate(!VM_Version::supports_avx512vlbw());
11855 match(Set result (StrEquals (Binary str1 str2) cnt));
11856 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11857
11858 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
11859 ins_encode %{
11860 __ arrays_equals(false, $str1$$Register, $str2$$Register,
11861 $cnt$$Register, $result$$Register, $tmp3$$Register,
11862 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11863 %}
11864
11865 ins_pipe( pipe_slow );
11866 %}
11867
11868 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11869 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11870 predicate(VM_Version::supports_avx512vlbw());
11871 match(Set result (StrEquals (Binary str1 str2) cnt));
11872 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11873
11874 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
11875 ins_encode %{
11876 __ arrays_equals(false, $str1$$Register, $str2$$Register,
11877 $cnt$$Register, $result$$Register, $tmp3$$Register,
11878 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11879 %}
11880
11881 ins_pipe( pipe_slow );
11882 %}
11883
11884
11885 // fast search of substring with known size.
11886 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11887 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11888 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11889 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11890 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11891
11892 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11893 ins_encode %{
11894 int icnt2 = (int)$int_cnt2$$constant;
11895 if (icnt2 >= 16) {
11896 // IndexOf for constant substrings with size >= 16 elements
11897 // which don't need to be loaded through stack.
11898 __ string_indexofC8($str1$$Register, $str2$$Register,
11899 $cnt1$$Register, $cnt2$$Register,
11900 icnt2, $result$$Register,
11901 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11902 } else {
11903 // Small strings are loaded through stack if they cross page boundary.
11904 __ string_indexof($str1$$Register, $str2$$Register,
11905 $cnt1$$Register, $cnt2$$Register,
11906 icnt2, $result$$Register,
11907 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11908 }
11909 %}
11910 ins_pipe( pipe_slow );
11911 %}
11912
11913 // fast search of substring with known size.
11914 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11915 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11916 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11917 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11918 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11919
11920 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11921 ins_encode %{
11922 int icnt2 = (int)$int_cnt2$$constant;
11923 if (icnt2 >= 8) {
11924 // IndexOf for constant substrings with size >= 8 elements
11925 // which don't need to be loaded through stack.
11926 __ string_indexofC8($str1$$Register, $str2$$Register,
11927 $cnt1$$Register, $cnt2$$Register,
11928 icnt2, $result$$Register,
11929 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11930 } else {
11931 // Small strings are loaded through stack if they cross page boundary.
11932 __ string_indexof($str1$$Register, $str2$$Register,
11933 $cnt1$$Register, $cnt2$$Register,
11934 icnt2, $result$$Register,
11935 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11936 }
11937 %}
11938 ins_pipe( pipe_slow );
11939 %}
11940
11941 // fast search of substring with known size.
11942 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11943 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11944 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11945 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11946 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11947
11948 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11949 ins_encode %{
11950 int icnt2 = (int)$int_cnt2$$constant;
11951 if (icnt2 >= 8) {
11952 // IndexOf for constant substrings with size >= 8 elements
11953 // which don't need to be loaded through stack.
11954 __ string_indexofC8($str1$$Register, $str2$$Register,
11955 $cnt1$$Register, $cnt2$$Register,
11956 icnt2, $result$$Register,
11957 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11958 } else {
11959 // Small strings are loaded through stack if they cross page boundary.
11960 __ string_indexof($str1$$Register, $str2$$Register,
11961 $cnt1$$Register, $cnt2$$Register,
11962 icnt2, $result$$Register,
11963 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11964 }
11965 %}
11966 ins_pipe( pipe_slow );
11967 %}
11968
11969 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11970 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11971 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11972 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11973 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11974
11975 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
11976 ins_encode %{
11977 __ string_indexof($str1$$Register, $str2$$Register,
11978 $cnt1$$Register, $cnt2$$Register,
11979 (-1), $result$$Register,
11980 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11981 %}
11982 ins_pipe( pipe_slow );
11983 %}
11984
11985 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11986 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11987 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11988 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11989 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11990
11991 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
11992 ins_encode %{
11993 __ string_indexof($str1$$Register, $str2$$Register,
11994 $cnt1$$Register, $cnt2$$Register,
11995 (-1), $result$$Register,
11996 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11997 %}
11998 ins_pipe( pipe_slow );
11999 %}
12000
12001 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12002 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12003 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12004 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12005 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12006
12007 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
12008 ins_encode %{
12009 __ string_indexof($str1$$Register, $str2$$Register,
12010 $cnt1$$Register, $cnt2$$Register,
12011 (-1), $result$$Register,
12012 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12013 %}
12014 ins_pipe( pipe_slow );
12015 %}
12016
12017 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12018 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12019 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12020 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12021 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12022 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
12023 ins_encode %{
12024 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12025 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12026 %}
12027 ins_pipe( pipe_slow );
12028 %}
12029
12030 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12031 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12032 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12033 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12034 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12035 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
12036 ins_encode %{
12037 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12038 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12039 %}
12040 ins_pipe( pipe_slow );
12041 %}
12042
12043
12044 // fast array equals
12045 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12046 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12047 %{
12048 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12049 match(Set result (AryEq ary1 ary2));
12050 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12051 //ins_cost(300);
12052
12053 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12054 ins_encode %{
12055 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12056 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12057 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12058 %}
12059 ins_pipe( pipe_slow );
12060 %}
12061
12062 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12063 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12064 %{
12065 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12066 match(Set result (AryEq ary1 ary2));
12067 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12068 //ins_cost(300);
12069
12070 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12071 ins_encode %{
12072 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12073 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12074 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12075 %}
12076 ins_pipe( pipe_slow );
12077 %}
12078
12079 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12080 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12081 %{
12082 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12083 match(Set result (AryEq ary1 ary2));
12084 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12085 //ins_cost(300);
12086
12087 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12088 ins_encode %{
12089 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12090 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12091 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12092 %}
12093 ins_pipe( pipe_slow );
12094 %}
12095
12096 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12097 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12098 %{
12099 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12100 match(Set result (AryEq ary1 ary2));
12101 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12102 //ins_cost(300);
12103
12104 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12105 ins_encode %{
12106 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12107 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12108 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12109 %}
12110 ins_pipe( pipe_slow );
12111 %}
12112
12113 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12114 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12115 %{
12116 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12117 match(Set result (HasNegatives ary1 len));
12118 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12119
12120 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12121 ins_encode %{
12122 __ has_negatives($ary1$$Register, $len$$Register,
12123 $result$$Register, $tmp3$$Register,
12124 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12125 %}
12126 ins_pipe( pipe_slow );
12127 %}
12128
12129 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12130 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12131 %{
12132 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12133 match(Set result (HasNegatives ary1 len));
12134 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12135
12136 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12137 ins_encode %{
12138 __ has_negatives($ary1$$Register, $len$$Register,
12139 $result$$Register, $tmp3$$Register,
12140 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12141 %}
12142 ins_pipe( pipe_slow );
12143 %}
12144
12145
12146 // fast char[] to byte[] compression
12147 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12148 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12149 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12150 match(Set result (StrCompressedCopy src (Binary dst len)));
12151 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12152
12153 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
12154 ins_encode %{
12155 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12156 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12157 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12158 knoreg, knoreg);
12159 %}
12160 ins_pipe( pipe_slow );
12161 %}
12162
12163 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12164 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12165 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12166 match(Set result (StrCompressedCopy src (Binary dst len)));
12167 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12168
12169 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
12170 ins_encode %{
12171 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12172 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12173 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12174 $ktmp1$$KRegister, $ktmp2$$KRegister);
12175 %}
12176 ins_pipe( pipe_slow );
12177 %}
12178
12179 // fast byte[] to char[] inflation
12180 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12181 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12182 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12183 match(Set dummy (StrInflatedCopy src (Binary dst len)));
12184 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12185
12186 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
12187 ins_encode %{
12188 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12189 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12190 %}
12191 ins_pipe( pipe_slow );
12192 %}
12193
12194 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12195 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12196 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12197 match(Set dummy (StrInflatedCopy src (Binary dst len)));
12198 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12199
12200 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
12201 ins_encode %{
12202 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12203 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12204 %}
12205 ins_pipe( pipe_slow );
12206 %}
12207
12208 // encode char[] to byte[] in ISO_8859_1
12209 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12210 regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12211 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12212 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12213 match(Set result (EncodeISOArray src (Binary dst len)));
12214 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12215
12216 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12217 ins_encode %{
12218 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12219 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12220 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12221 %}
12222 ins_pipe( pipe_slow );
12223 %}
12224
12225 // encode char[] to byte[] in ASCII
12226 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12227 regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12228 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12229 predicate(((EncodeISOArrayNode*)n)->is_ascii());
12230 match(Set result (EncodeISOArray src (Binary dst len)));
12231 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12232
12233 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12234 ins_encode %{
12235 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12236 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12237 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12238 %}
12239 ins_pipe( pipe_slow );
12240 %}
12241
12242 //----------Control Flow Instructions------------------------------------------
12243 // Signed compare Instructions
12244 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12245 match(Set cr (CmpI op1 op2));
12246 effect( DEF cr, USE op1, USE op2 );
12247 format %{ "CMP $op1,$op2" %}
12248 opcode(0x3B); /* Opcode 3B /r */
12249 ins_encode( OpcP, RegReg( op1, op2) );
12250 ins_pipe( ialu_cr_reg_reg );
12251 %}
12252
12253 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12254 match(Set cr (CmpI op1 op2));
12255 effect( DEF cr, USE op1 );
12256 format %{ "CMP $op1,$op2" %}
12257 opcode(0x81,0x07); /* Opcode 81 /7 */
12258 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */
12259 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12260 ins_pipe( ialu_cr_reg_imm );
12261 %}
12262
12263 // Cisc-spilled version of cmpI_eReg
12264 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12265 match(Set cr (CmpI op1 (LoadI op2)));
12266
12267 format %{ "CMP $op1,$op2" %}
12268 ins_cost(500);
12269 opcode(0x3B); /* Opcode 3B /r */
12270 ins_encode( OpcP, RegMem( op1, op2) );
12271 ins_pipe( ialu_cr_reg_mem );
12272 %}
12273
12274 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12275 match(Set cr (CmpI src zero));
12276 effect( DEF cr, USE src );
12277
12278 format %{ "TEST $src,$src" %}
12279 opcode(0x85);
12280 ins_encode( OpcP, RegReg( src, src ) );
12281 ins_pipe( ialu_cr_reg_imm );
12282 %}
12283
12284 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12285 match(Set cr (CmpI (AndI src con) zero));
12286
12287 format %{ "TEST $src,$con" %}
12288 opcode(0xF7,0x00);
12289 ins_encode( OpcP, RegOpc(src), Con32(con) );
12290 ins_pipe( ialu_cr_reg_imm );
12291 %}
12292
12293 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12294 match(Set cr (CmpI (AndI src mem) zero));
12295
12296 format %{ "TEST $src,$mem" %}
12297 opcode(0x85);
12298 ins_encode( OpcP, RegMem( src, mem ) );
12299 ins_pipe( ialu_cr_reg_mem );
12300 %}
12301
12302 // Unsigned compare Instructions; really, same as signed except they
12303 // produce an eFlagsRegU instead of eFlagsReg.
12304 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12305 match(Set cr (CmpU op1 op2));
12306
12307 format %{ "CMPu $op1,$op2" %}
12308 opcode(0x3B); /* Opcode 3B /r */
12309 ins_encode( OpcP, RegReg( op1, op2) );
12310 ins_pipe( ialu_cr_reg_reg );
12311 %}
12312
12313 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12314 match(Set cr (CmpU op1 op2));
12315
12316 format %{ "CMPu $op1,$op2" %}
12317 opcode(0x81,0x07); /* Opcode 81 /7 */
12318 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12319 ins_pipe( ialu_cr_reg_imm );
12320 %}
12321
12322 // // Cisc-spilled version of cmpU_eReg
12323 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12324 match(Set cr (CmpU op1 (LoadI op2)));
12325
12326 format %{ "CMPu $op1,$op2" %}
12327 ins_cost(500);
12328 opcode(0x3B); /* Opcode 3B /r */
12329 ins_encode( OpcP, RegMem( op1, op2) );
12330 ins_pipe( ialu_cr_reg_mem );
12331 %}
12332
12333 // // Cisc-spilled version of cmpU_eReg
12334 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12335 // match(Set cr (CmpU (LoadI op1) op2));
12336 //
12337 // format %{ "CMPu $op1,$op2" %}
12338 // ins_cost(500);
12339 // opcode(0x39); /* Opcode 39 /r */
12340 // ins_encode( OpcP, RegMem( op1, op2) );
12341 //%}
12342
12343 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12344 match(Set cr (CmpU src zero));
12345
12346 format %{ "TESTu $src,$src" %}
12347 opcode(0x85);
12348 ins_encode( OpcP, RegReg( src, src ) );
12349 ins_pipe( ialu_cr_reg_imm );
12350 %}
12351
12352 // Unsigned pointer compare Instructions
12353 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12354 match(Set cr (CmpP op1 op2));
12355
12356 format %{ "CMPu $op1,$op2" %}
12357 opcode(0x3B); /* Opcode 3B /r */
12358 ins_encode( OpcP, RegReg( op1, op2) );
12359 ins_pipe( ialu_cr_reg_reg );
12360 %}
12361
12362 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12363 match(Set cr (CmpP op1 op2));
12364
12365 format %{ "CMPu $op1,$op2" %}
12366 opcode(0x81,0x07); /* Opcode 81 /7 */
12367 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12368 ins_pipe( ialu_cr_reg_imm );
12369 %}
12370
12371 // // Cisc-spilled version of cmpP_eReg
12372 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12373 match(Set cr (CmpP op1 (LoadP op2)));
12374
12375 format %{ "CMPu $op1,$op2" %}
12376 ins_cost(500);
12377 opcode(0x3B); /* Opcode 3B /r */
12378 ins_encode( OpcP, RegMem( op1, op2) );
12379 ins_pipe( ialu_cr_reg_mem );
12380 %}
12381
12382 // // Cisc-spilled version of cmpP_eReg
12383 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12384 // match(Set cr (CmpP (LoadP op1) op2));
12385 //
12386 // format %{ "CMPu $op1,$op2" %}
12387 // ins_cost(500);
12388 // opcode(0x39); /* Opcode 39 /r */
12389 // ins_encode( OpcP, RegMem( op1, op2) );
12390 //%}
12391
12392 // Compare raw pointer (used in out-of-heap check).
12393 // Only works because non-oop pointers must be raw pointers
12394 // and raw pointers have no anti-dependencies.
12395 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12396 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12397 match(Set cr (CmpP op1 (LoadP op2)));
12398
12399 format %{ "CMPu $op1,$op2" %}
12400 opcode(0x3B); /* Opcode 3B /r */
12401 ins_encode( OpcP, RegMem( op1, op2) );
12402 ins_pipe( ialu_cr_reg_mem );
12403 %}
12404
12405 //
12406 // This will generate a signed flags result. This should be ok
12407 // since any compare to a zero should be eq/neq.
12408 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12409 match(Set cr (CmpP src zero));
12410
12411 format %{ "TEST $src,$src" %}
12412 opcode(0x85);
12413 ins_encode( OpcP, RegReg( src, src ) );
12414 ins_pipe( ialu_cr_reg_imm );
12415 %}
12416
12417 // Cisc-spilled version of testP_reg
12418 // This will generate a signed flags result. This should be ok
12419 // since any compare to a zero should be eq/neq.
12420 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12421 match(Set cr (CmpP (LoadP op) zero));
12422
12423 format %{ "TEST $op,0xFFFFFFFF" %}
12424 ins_cost(500);
12425 opcode(0xF7); /* Opcode F7 /0 */
12426 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12427 ins_pipe( ialu_cr_reg_imm );
12428 %}
12429
12430 // Yanked all unsigned pointer compare operations.
12431 // Pointer compares are done with CmpP which is already unsigned.
12432
12433 //----------Max and Min--------------------------------------------------------
12434 // Min Instructions
12435 ////
12436 // *** Min and Max using the conditional move are slower than the
12437 // *** branch version on a Pentium III.
12438 // // Conditional move for min
12439 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12440 // effect( USE_DEF op2, USE op1, USE cr );
12441 // format %{ "CMOVlt $op2,$op1\t! min" %}
12442 // opcode(0x4C,0x0F);
12443 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12444 // ins_pipe( pipe_cmov_reg );
12445 //%}
12446 //
12447 //// Min Register with Register (P6 version)
12448 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12449 // predicate(VM_Version::supports_cmov() );
12450 // match(Set op2 (MinI op1 op2));
12451 // ins_cost(200);
12452 // expand %{
12453 // eFlagsReg cr;
12454 // compI_eReg(cr,op1,op2);
12455 // cmovI_reg_lt(op2,op1,cr);
12456 // %}
12457 //%}
12458
12459 // Min Register with Register (generic version)
12460 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12461 match(Set dst (MinI dst src));
12462 effect(KILL flags);
12463 ins_cost(300);
12464
12465 format %{ "MIN $dst,$src" %}
12466 opcode(0xCC);
12467 ins_encode( min_enc(dst,src) );
12468 ins_pipe( pipe_slow );
12469 %}
12470
12471 // Max Register with Register
12472 // *** Min and Max using the conditional move are slower than the
12473 // *** branch version on a Pentium III.
12474 // // Conditional move for max
12475 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12476 // effect( USE_DEF op2, USE op1, USE cr );
12477 // format %{ "CMOVgt $op2,$op1\t! max" %}
12478 // opcode(0x4F,0x0F);
12479 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12480 // ins_pipe( pipe_cmov_reg );
12481 //%}
12482 //
12483 // // Max Register with Register (P6 version)
12484 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12485 // predicate(VM_Version::supports_cmov() );
12486 // match(Set op2 (MaxI op1 op2));
12487 // ins_cost(200);
12488 // expand %{
12489 // eFlagsReg cr;
12490 // compI_eReg(cr,op1,op2);
12491 // cmovI_reg_gt(op2,op1,cr);
12492 // %}
12493 //%}
12494
12495 // Max Register with Register (generic version)
12496 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12497 match(Set dst (MaxI dst src));
12498 effect(KILL flags);
12499 ins_cost(300);
12500
12501 format %{ "MAX $dst,$src" %}
12502 opcode(0xCC);
12503 ins_encode( max_enc(dst,src) );
12504 ins_pipe( pipe_slow );
12505 %}
12506
12507 // ============================================================================
12508 // Counted Loop limit node which represents exact final iterator value.
12509 // Note: the resulting value should fit into integer range since
12510 // counted loops have limit check on overflow.
12511 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12512 match(Set limit (LoopLimit (Binary init limit) stride));
12513 effect(TEMP limit_hi, TEMP tmp, KILL flags);
12514 ins_cost(300);
12515
12516 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12517 ins_encode %{
12518 int strd = (int)$stride$$constant;
12519 assert(strd != 1 && strd != -1, "sanity");
12520 int m1 = (strd > 0) ? 1 : -1;
12521 // Convert limit to long (EAX:EDX)
12522 __ cdql();
12523 // Convert init to long (init:tmp)
12524 __ movl($tmp$$Register, $init$$Register);
12525 __ sarl($tmp$$Register, 31);
12526 // $limit - $init
12527 __ subl($limit$$Register, $init$$Register);
12528 __ sbbl($limit_hi$$Register, $tmp$$Register);
12529 // + ($stride - 1)
12530 if (strd > 0) {
12531 __ addl($limit$$Register, (strd - 1));
12532 __ adcl($limit_hi$$Register, 0);
12533 __ movl($tmp$$Register, strd);
12534 } else {
12535 __ addl($limit$$Register, (strd + 1));
12536 __ adcl($limit_hi$$Register, -1);
12537 __ lneg($limit_hi$$Register, $limit$$Register);
12538 __ movl($tmp$$Register, -strd);
12539 }
12540 // signed devision: (EAX:EDX) / pos_stride
12541 __ idivl($tmp$$Register);
12542 if (strd < 0) {
12543 // restore sign
12544 __ negl($tmp$$Register);
12545 }
12546 // (EAX) * stride
12547 __ mull($tmp$$Register);
12548 // + init (ignore upper bits)
12549 __ addl($limit$$Register, $init$$Register);
12550 %}
12551 ins_pipe( pipe_slow );
12552 %}
12553
12554 // ============================================================================
12555 // Branch Instructions
12556 // Jump Table
12557 instruct jumpXtnd(rRegI switch_val) %{
12558 match(Jump switch_val);
12559 ins_cost(350);
12560 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %}
12561 ins_encode %{
12562 // Jump to Address(table_base + switch_reg)
12563 Address index(noreg, $switch_val$$Register, Address::times_1);
12564 __ jump(ArrayAddress($constantaddress, index));
12565 %}
12566 ins_pipe(pipe_jmp);
12567 %}
12568
12569 // Jump Direct - Label defines a relative address from JMP+1
12570 instruct jmpDir(label labl) %{
12571 match(Goto);
12572 effect(USE labl);
12573
12574 ins_cost(300);
12575 format %{ "JMP $labl" %}
12576 size(5);
12577 ins_encode %{
12578 Label* L = $labl$$label;
12579 __ jmp(*L, false); // Always long jump
12580 %}
12581 ins_pipe( pipe_jmp );
12582 %}
12583
12584 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12585 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12586 match(If cop cr);
12587 effect(USE labl);
12588
12589 ins_cost(300);
12590 format %{ "J$cop $labl" %}
12591 size(6);
12592 ins_encode %{
12593 Label* L = $labl$$label;
12594 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12595 %}
12596 ins_pipe( pipe_jcc );
12597 %}
12598
12599 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12600 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12601 predicate(!n->has_vector_mask_set());
12602 match(CountedLoopEnd cop cr);
12603 effect(USE labl);
12604
12605 ins_cost(300);
12606 format %{ "J$cop $labl\t# Loop end" %}
12607 size(6);
12608 ins_encode %{
12609 Label* L = $labl$$label;
12610 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12611 %}
12612 ins_pipe( pipe_jcc );
12613 %}
12614
12615 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12616 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12617 predicate(!n->has_vector_mask_set());
12618 match(CountedLoopEnd cop cmp);
12619 effect(USE labl);
12620
12621 ins_cost(300);
12622 format %{ "J$cop,u $labl\t# Loop end" %}
12623 size(6);
12624 ins_encode %{
12625 Label* L = $labl$$label;
12626 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12627 %}
12628 ins_pipe( pipe_jcc );
12629 %}
12630
12631 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12632 predicate(!n->has_vector_mask_set());
12633 match(CountedLoopEnd cop cmp);
12634 effect(USE labl);
12635
12636 ins_cost(200);
12637 format %{ "J$cop,u $labl\t# Loop end" %}
12638 size(6);
12639 ins_encode %{
12640 Label* L = $labl$$label;
12641 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12642 %}
12643 ins_pipe( pipe_jcc );
12644 %}
12645
12646 // mask version
12647 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12648 // Bounded mask operand used in following patten is needed for
12649 // post-loop multiversioning.
12650 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
12651 predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12652 match(CountedLoopEnd cop cr);
12653 effect(USE labl, TEMP ktmp);
12654
12655 ins_cost(400);
12656 format %{ "J$cop $labl\t# Loop end\n\t"
12657 "restorevectmask \t# vector mask restore for loops" %}
12658 size(10);
12659 ins_encode %{
12660 Label* L = $labl$$label;
12661 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12662 __ restorevectmask($ktmp$$KRegister);
12663 %}
12664 ins_pipe( pipe_jcc );
12665 %}
12666
12667 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12668 // Bounded mask operand used in following patten is needed for
12669 // post-loop multiversioning.
12670 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
12671 predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12672 match(CountedLoopEnd cop cmp);
12673 effect(USE labl, TEMP ktmp);
12674
12675 ins_cost(400);
12676 format %{ "J$cop,u $labl\t# Loop end\n\t"
12677 "restorevectmask \t# vector mask restore for loops" %}
12678 size(10);
12679 ins_encode %{
12680 Label* L = $labl$$label;
12681 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12682 __ restorevectmask($ktmp$$KRegister);
12683 %}
12684 ins_pipe( pipe_jcc );
12685 %}
12686
12687 // Bounded mask operand used in following patten is needed for
12688 // post-loop multiversioning.
12689 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
12690 predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12691 match(CountedLoopEnd cop cmp);
12692 effect(USE labl, TEMP ktmp);
12693
12694 ins_cost(300);
12695 format %{ "J$cop,u $labl\t# Loop end\n\t"
12696 "restorevectmask \t# vector mask restore for loops" %}
12697 size(10);
12698 ins_encode %{
12699 Label* L = $labl$$label;
12700 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12701 __ restorevectmask($ktmp$$KRegister);
12702 %}
12703 ins_pipe( pipe_jcc );
12704 %}
12705
12706 // Jump Direct Conditional - using unsigned comparison
12707 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12708 match(If cop cmp);
12709 effect(USE labl);
12710
12711 ins_cost(300);
12712 format %{ "J$cop,u $labl" %}
12713 size(6);
12714 ins_encode %{
12715 Label* L = $labl$$label;
12716 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12717 %}
12718 ins_pipe(pipe_jcc);
12719 %}
12720
12721 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12722 match(If cop cmp);
12723 effect(USE labl);
12724
12725 ins_cost(200);
12726 format %{ "J$cop,u $labl" %}
12727 size(6);
12728 ins_encode %{
12729 Label* L = $labl$$label;
12730 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12731 %}
12732 ins_pipe(pipe_jcc);
12733 %}
12734
12735 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12736 match(If cop cmp);
12737 effect(USE labl);
12738
12739 ins_cost(200);
12740 format %{ $$template
12741 if ($cop$$cmpcode == Assembler::notEqual) {
12742 $$emit$$"JP,u $labl\n\t"
12743 $$emit$$"J$cop,u $labl"
12744 } else {
12745 $$emit$$"JP,u done\n\t"
12746 $$emit$$"J$cop,u $labl\n\t"
12747 $$emit$$"done:"
12748 }
12749 %}
12750 ins_encode %{
12751 Label* l = $labl$$label;
12752 if ($cop$$cmpcode == Assembler::notEqual) {
12753 __ jcc(Assembler::parity, *l, false);
12754 __ jcc(Assembler::notEqual, *l, false);
12755 } else if ($cop$$cmpcode == Assembler::equal) {
12756 Label done;
12757 __ jccb(Assembler::parity, done);
12758 __ jcc(Assembler::equal, *l, false);
12759 __ bind(done);
12760 } else {
12761 ShouldNotReachHere();
12762 }
12763 %}
12764 ins_pipe(pipe_jcc);
12765 %}
12766
12767 // ============================================================================
12768 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
12769 // array for an instance of the superklass. Set a hidden internal cache on a
12770 // hit (cache is checked with exposed code in gen_subtype_check()). Return
12771 // NZ for a miss or zero for a hit. The encoding ALSO sets flags.
12772 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12773 match(Set result (PartialSubtypeCheck sub super));
12774 effect( KILL rcx, KILL cr );
12775
12776 ins_cost(1100); // slightly larger than the next version
12777 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12778 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12779 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12780 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12781 "JNE,s miss\t\t# Missed: EDI not-zero\n\t"
12782 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12783 "XOR $result,$result\t\t Hit: EDI zero\n\t"
12784 "miss:\t" %}
12785
12786 opcode(0x1); // Force a XOR of EDI
12787 ins_encode( enc_PartialSubtypeCheck() );
12788 ins_pipe( pipe_slow );
12789 %}
12790
12791 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12792 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12793 effect( KILL rcx, KILL result );
12794
12795 ins_cost(1000);
12796 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12797 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12798 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12799 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12800 "JNE,s miss\t\t# Missed: flags NZ\n\t"
12801 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12802 "miss:\t" %}
12803
12804 opcode(0x0); // No need to XOR EDI
12805 ins_encode( enc_PartialSubtypeCheck() );
12806 ins_pipe( pipe_slow );
12807 %}
12808
12809 // ============================================================================
12810 // Branch Instructions -- short offset versions
12811 //
12812 // These instructions are used to replace jumps of a long offset (the default
12813 // match) with jumps of a shorter offset. These instructions are all tagged
12814 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12815 // match rules in general matching. Instead, the ADLC generates a conversion
12816 // method in the MachNode which can be used to do in-place replacement of the
12817 // long variant with the shorter variant. The compiler will determine if a
12818 // branch can be taken by the is_short_branch_offset() predicate in the machine
12819 // specific code section of the file.
12820
12821 // Jump Direct - Label defines a relative address from JMP+1
12822 instruct jmpDir_short(label labl) %{
12823 match(Goto);
12824 effect(USE labl);
12825
12826 ins_cost(300);
12827 format %{ "JMP,s $labl" %}
12828 size(2);
12829 ins_encode %{
12830 Label* L = $labl$$label;
12831 __ jmpb(*L);
12832 %}
12833 ins_pipe( pipe_jmp );
12834 ins_short_branch(1);
12835 %}
12836
12837 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12838 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12839 match(If cop cr);
12840 effect(USE labl);
12841
12842 ins_cost(300);
12843 format %{ "J$cop,s $labl" %}
12844 size(2);
12845 ins_encode %{
12846 Label* L = $labl$$label;
12847 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12848 %}
12849 ins_pipe( pipe_jcc );
12850 ins_short_branch(1);
12851 %}
12852
12853 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12854 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12855 match(CountedLoopEnd cop cr);
12856 effect(USE labl);
12857
12858 ins_cost(300);
12859 format %{ "J$cop,s $labl\t# Loop end" %}
12860 size(2);
12861 ins_encode %{
12862 Label* L = $labl$$label;
12863 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12864 %}
12865 ins_pipe( pipe_jcc );
12866 ins_short_branch(1);
12867 %}
12868
12869 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12870 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12871 match(CountedLoopEnd cop cmp);
12872 effect(USE labl);
12873
12874 ins_cost(300);
12875 format %{ "J$cop,us $labl\t# Loop end" %}
12876 size(2);
12877 ins_encode %{
12878 Label* L = $labl$$label;
12879 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12880 %}
12881 ins_pipe( pipe_jcc );
12882 ins_short_branch(1);
12883 %}
12884
12885 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12886 match(CountedLoopEnd cop cmp);
12887 effect(USE labl);
12888
12889 ins_cost(300);
12890 format %{ "J$cop,us $labl\t# Loop end" %}
12891 size(2);
12892 ins_encode %{
12893 Label* L = $labl$$label;
12894 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12895 %}
12896 ins_pipe( pipe_jcc );
12897 ins_short_branch(1);
12898 %}
12899
12900 // Jump Direct Conditional - using unsigned comparison
12901 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12902 match(If cop cmp);
12903 effect(USE labl);
12904
12905 ins_cost(300);
12906 format %{ "J$cop,us $labl" %}
12907 size(2);
12908 ins_encode %{
12909 Label* L = $labl$$label;
12910 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12911 %}
12912 ins_pipe( pipe_jcc );
12913 ins_short_branch(1);
12914 %}
12915
12916 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12917 match(If cop cmp);
12918 effect(USE labl);
12919
12920 ins_cost(300);
12921 format %{ "J$cop,us $labl" %}
12922 size(2);
12923 ins_encode %{
12924 Label* L = $labl$$label;
12925 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12926 %}
12927 ins_pipe( pipe_jcc );
12928 ins_short_branch(1);
12929 %}
12930
12931 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12932 match(If cop cmp);
12933 effect(USE labl);
12934
12935 ins_cost(300);
12936 format %{ $$template
12937 if ($cop$$cmpcode == Assembler::notEqual) {
12938 $$emit$$"JP,u,s $labl\n\t"
12939 $$emit$$"J$cop,u,s $labl"
12940 } else {
12941 $$emit$$"JP,u,s done\n\t"
12942 $$emit$$"J$cop,u,s $labl\n\t"
12943 $$emit$$"done:"
12944 }
12945 %}
12946 size(4);
12947 ins_encode %{
12948 Label* l = $labl$$label;
12949 if ($cop$$cmpcode == Assembler::notEqual) {
12950 __ jccb(Assembler::parity, *l);
12951 __ jccb(Assembler::notEqual, *l);
12952 } else if ($cop$$cmpcode == Assembler::equal) {
12953 Label done;
12954 __ jccb(Assembler::parity, done);
12955 __ jccb(Assembler::equal, *l);
12956 __ bind(done);
12957 } else {
12958 ShouldNotReachHere();
12959 }
12960 %}
12961 ins_pipe(pipe_jcc);
12962 ins_short_branch(1);
12963 %}
12964
12965 // ============================================================================
12966 // Long Compare
12967 //
12968 // Currently we hold longs in 2 registers. Comparing such values efficiently
12969 // is tricky. The flavor of compare used depends on whether we are testing
12970 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit.
12971 // The GE test is the negated LT test. The LE test can be had by commuting
12972 // the operands (yielding a GE test) and then negating; negate again for the
12973 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the
12974 // NE test is negated from that.
12975
12976 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12977 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
12978 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections
12979 // are collapsed internally in the ADLC's dfa-gen code. The match for
12980 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12981 // foo match ends up with the wrong leaf. One fix is to not match both
12982 // reg-reg and reg-zero forms of long-compare. This is unfortunate because
12983 // both forms beat the trinary form of long-compare and both are very useful
12984 // on Intel which has so few registers.
12985
12986 // Manifest a CmpL result in an integer register. Very painful.
12987 // This is the test to avoid.
12988 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12989 match(Set dst (CmpL3 src1 src2));
12990 effect( KILL flags );
12991 ins_cost(1000);
12992 format %{ "XOR $dst,$dst\n\t"
12993 "CMP $src1.hi,$src2.hi\n\t"
12994 "JLT,s m_one\n\t"
12995 "JGT,s p_one\n\t"
12996 "CMP $src1.lo,$src2.lo\n\t"
12997 "JB,s m_one\n\t"
12998 "JEQ,s done\n"
12999 "p_one:\tINC $dst\n\t"
13000 "JMP,s done\n"
13001 "m_one:\tDEC $dst\n"
13002 "done:" %}
13003 ins_encode %{
13004 Label p_one, m_one, done;
13005 __ xorptr($dst$$Register, $dst$$Register);
13006 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13007 __ jccb(Assembler::less, m_one);
13008 __ jccb(Assembler::greater, p_one);
13009 __ cmpl($src1$$Register, $src2$$Register);
13010 __ jccb(Assembler::below, m_one);
13011 __ jccb(Assembler::equal, done);
13012 __ bind(p_one);
13013 __ incrementl($dst$$Register);
13014 __ jmpb(done);
13015 __ bind(m_one);
13016 __ decrementl($dst$$Register);
13017 __ bind(done);
13018 %}
13019 ins_pipe( pipe_slow );
13020 %}
13021
13022 //======
13023 // Manifest a CmpL result in the normal flags. Only good for LT or GE
13024 // compares. Can be used for LE or GT compares by reversing arguments.
13025 // NOT GOOD FOR EQ/NE tests.
13026 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13027 match( Set flags (CmpL src zero ));
13028 ins_cost(100);
13029 format %{ "TEST $src.hi,$src.hi" %}
13030 opcode(0x85);
13031 ins_encode( OpcP, RegReg_Hi2( src, src ) );
13032 ins_pipe( ialu_cr_reg_reg );
13033 %}
13034
13035 // Manifest a CmpL result in the normal flags. Only good for LT or GE
13036 // compares. Can be used for LE or GT compares by reversing arguments.
13037 // NOT GOOD FOR EQ/NE tests.
13038 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13039 match( Set flags (CmpL src1 src2 ));
13040 effect( TEMP tmp );
13041 ins_cost(300);
13042 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13043 "MOV $tmp,$src1.hi\n\t"
13044 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %}
13045 ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13046 ins_pipe( ialu_cr_reg_reg );
13047 %}
13048
13049 // Long compares reg < zero/req OR reg >= zero/req.
13050 // Just a wrapper for a normal branch, plus the predicate test.
13051 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13052 match(If cmp flags);
13053 effect(USE labl);
13054 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13055 expand %{
13056 jmpCon(cmp,flags,labl); // JLT or JGE...
13057 %}
13058 %}
13059
13060 //======
13061 // Manifest a CmpUL result in the normal flags. Only good for LT or GE
13062 // compares. Can be used for LE or GT compares by reversing arguments.
13063 // NOT GOOD FOR EQ/NE tests.
13064 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13065 match(Set flags (CmpUL src zero));
13066 ins_cost(100);
13067 format %{ "TEST $src.hi,$src.hi" %}
13068 opcode(0x85);
13069 ins_encode(OpcP, RegReg_Hi2(src, src));
13070 ins_pipe(ialu_cr_reg_reg);
13071 %}
13072
13073 // Manifest a CmpUL result in the normal flags. Only good for LT or GE
13074 // compares. Can be used for LE or GT compares by reversing arguments.
13075 // NOT GOOD FOR EQ/NE tests.
13076 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13077 match(Set flags (CmpUL src1 src2));
13078 effect(TEMP tmp);
13079 ins_cost(300);
13080 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13081 "MOV $tmp,$src1.hi\n\t"
13082 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13083 ins_encode(long_cmp_flags2(src1, src2, tmp));
13084 ins_pipe(ialu_cr_reg_reg);
13085 %}
13086
13087 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13088 // Just a wrapper for a normal branch, plus the predicate test.
13089 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13090 match(If cmp flags);
13091 effect(USE labl);
13092 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13093 expand %{
13094 jmpCon(cmp, flags, labl); // JLT or JGE...
13095 %}
13096 %}
13097
13098 // Compare 2 longs and CMOVE longs.
13099 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13100 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13101 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13102 ins_cost(400);
13103 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13104 "CMOV$cmp $dst.hi,$src.hi" %}
13105 opcode(0x0F,0x40);
13106 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13107 ins_pipe( pipe_cmov_reg_long );
13108 %}
13109
13110 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13111 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13112 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13113 ins_cost(500);
13114 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13115 "CMOV$cmp $dst.hi,$src.hi" %}
13116 opcode(0x0F,0x40);
13117 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13118 ins_pipe( pipe_cmov_reg_long );
13119 %}
13120
13121 // Compare 2 longs and CMOVE ints.
13122 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13123 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13124 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13125 ins_cost(200);
13126 format %{ "CMOV$cmp $dst,$src" %}
13127 opcode(0x0F,0x40);
13128 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13129 ins_pipe( pipe_cmov_reg );
13130 %}
13131
13132 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13133 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13134 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13135 ins_cost(250);
13136 format %{ "CMOV$cmp $dst,$src" %}
13137 opcode(0x0F,0x40);
13138 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13139 ins_pipe( pipe_cmov_mem );
13140 %}
13141
13142 // Compare 2 longs and CMOVE ints.
13143 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13144 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13145 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13146 ins_cost(200);
13147 format %{ "CMOV$cmp $dst,$src" %}
13148 opcode(0x0F,0x40);
13149 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13150 ins_pipe( pipe_cmov_reg );
13151 %}
13152
13153 // Compare 2 longs and CMOVE doubles
13154 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13155 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13156 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13157 ins_cost(200);
13158 expand %{
13159 fcmovDPR_regS(cmp,flags,dst,src);
13160 %}
13161 %}
13162
13163 // Compare 2 longs and CMOVE doubles
13164 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13165 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13166 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13167 ins_cost(200);
13168 expand %{
13169 fcmovD_regS(cmp,flags,dst,src);
13170 %}
13171 %}
13172
13173 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13174 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13175 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13176 ins_cost(200);
13177 expand %{
13178 fcmovFPR_regS(cmp,flags,dst,src);
13179 %}
13180 %}
13181
13182 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13183 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13184 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13185 ins_cost(200);
13186 expand %{
13187 fcmovF_regS(cmp,flags,dst,src);
13188 %}
13189 %}
13190
13191 //======
13192 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13193 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13194 match( Set flags (CmpL src zero ));
13195 effect(TEMP tmp);
13196 ins_cost(200);
13197 format %{ "MOV $tmp,$src.lo\n\t"
13198 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13199 ins_encode( long_cmp_flags0( src, tmp ) );
13200 ins_pipe( ialu_reg_reg_long );
13201 %}
13202
13203 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13204 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13205 match( Set flags (CmpL src1 src2 ));
13206 ins_cost(200+300);
13207 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13208 "JNE,s skip\n\t"
13209 "CMP $src1.hi,$src2.hi\n\t"
13210 "skip:\t" %}
13211 ins_encode( long_cmp_flags1( src1, src2 ) );
13212 ins_pipe( ialu_cr_reg_reg );
13213 %}
13214
13215 // Long compare reg == zero/reg OR reg != zero/reg
13216 // Just a wrapper for a normal branch, plus the predicate test.
13217 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13218 match(If cmp flags);
13219 effect(USE labl);
13220 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13221 expand %{
13222 jmpCon(cmp,flags,labl); // JEQ or JNE...
13223 %}
13224 %}
13225
13226 //======
13227 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares.
13228 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13229 match(Set flags (CmpUL src zero));
13230 effect(TEMP tmp);
13231 ins_cost(200);
13232 format %{ "MOV $tmp,$src.lo\n\t"
13233 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13234 ins_encode(long_cmp_flags0(src, tmp));
13235 ins_pipe(ialu_reg_reg_long);
13236 %}
13237
13238 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares.
13239 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13240 match(Set flags (CmpUL src1 src2));
13241 ins_cost(200+300);
13242 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13243 "JNE,s skip\n\t"
13244 "CMP $src1.hi,$src2.hi\n\t"
13245 "skip:\t" %}
13246 ins_encode(long_cmp_flags1(src1, src2));
13247 ins_pipe(ialu_cr_reg_reg);
13248 %}
13249
13250 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13251 // Just a wrapper for a normal branch, plus the predicate test.
13252 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13253 match(If cmp flags);
13254 effect(USE labl);
13255 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13256 expand %{
13257 jmpCon(cmp, flags, labl); // JEQ or JNE...
13258 %}
13259 %}
13260
13261 // Compare 2 longs and CMOVE longs.
13262 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13263 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13264 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13265 ins_cost(400);
13266 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13267 "CMOV$cmp $dst.hi,$src.hi" %}
13268 opcode(0x0F,0x40);
13269 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13270 ins_pipe( pipe_cmov_reg_long );
13271 %}
13272
13273 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13274 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13275 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13276 ins_cost(500);
13277 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13278 "CMOV$cmp $dst.hi,$src.hi" %}
13279 opcode(0x0F,0x40);
13280 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13281 ins_pipe( pipe_cmov_reg_long );
13282 %}
13283
13284 // Compare 2 longs and CMOVE ints.
13285 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13286 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13287 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13288 ins_cost(200);
13289 format %{ "CMOV$cmp $dst,$src" %}
13290 opcode(0x0F,0x40);
13291 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13292 ins_pipe( pipe_cmov_reg );
13293 %}
13294
13295 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13296 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13297 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13298 ins_cost(250);
13299 format %{ "CMOV$cmp $dst,$src" %}
13300 opcode(0x0F,0x40);
13301 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13302 ins_pipe( pipe_cmov_mem );
13303 %}
13304
13305 // Compare 2 longs and CMOVE ints.
13306 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13307 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13308 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13309 ins_cost(200);
13310 format %{ "CMOV$cmp $dst,$src" %}
13311 opcode(0x0F,0x40);
13312 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13313 ins_pipe( pipe_cmov_reg );
13314 %}
13315
13316 // Compare 2 longs and CMOVE doubles
13317 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13318 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13319 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13320 ins_cost(200);
13321 expand %{
13322 fcmovDPR_regS(cmp,flags,dst,src);
13323 %}
13324 %}
13325
13326 // Compare 2 longs and CMOVE doubles
13327 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13328 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13329 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13330 ins_cost(200);
13331 expand %{
13332 fcmovD_regS(cmp,flags,dst,src);
13333 %}
13334 %}
13335
13336 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13337 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13338 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13339 ins_cost(200);
13340 expand %{
13341 fcmovFPR_regS(cmp,flags,dst,src);
13342 %}
13343 %}
13344
13345 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13346 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13347 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13348 ins_cost(200);
13349 expand %{
13350 fcmovF_regS(cmp,flags,dst,src);
13351 %}
13352 %}
13353
13354 //======
13355 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13356 // Same as cmpL_reg_flags_LEGT except must negate src
13357 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13358 match( Set flags (CmpL src zero ));
13359 effect( TEMP tmp );
13360 ins_cost(300);
13361 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13362 "CMP $tmp,$src.lo\n\t"
13363 "SBB $tmp,$src.hi\n\t" %}
13364 ins_encode( long_cmp_flags3(src, tmp) );
13365 ins_pipe( ialu_reg_reg_long );
13366 %}
13367
13368 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13369 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
13370 // requires a commuted test to get the same result.
13371 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13372 match( Set flags (CmpL src1 src2 ));
13373 effect( TEMP tmp );
13374 ins_cost(300);
13375 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13376 "MOV $tmp,$src2.hi\n\t"
13377 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %}
13378 ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13379 ins_pipe( ialu_cr_reg_reg );
13380 %}
13381
13382 // Long compares reg < zero/req OR reg >= zero/req.
13383 // Just a wrapper for a normal branch, plus the predicate test
13384 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13385 match(If cmp flags);
13386 effect(USE labl);
13387 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13388 ins_cost(300);
13389 expand %{
13390 jmpCon(cmp,flags,labl); // JGT or JLE...
13391 %}
13392 %}
13393
13394 //======
13395 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares.
13396 // Same as cmpUL_reg_flags_LEGT except must negate src
13397 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13398 match(Set flags (CmpUL src zero));
13399 effect(TEMP tmp);
13400 ins_cost(300);
13401 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13402 "CMP $tmp,$src.lo\n\t"
13403 "SBB $tmp,$src.hi\n\t" %}
13404 ins_encode(long_cmp_flags3(src, tmp));
13405 ins_pipe(ialu_reg_reg_long);
13406 %}
13407
13408 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares.
13409 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands
13410 // requires a commuted test to get the same result.
13411 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13412 match(Set flags (CmpUL src1 src2));
13413 effect(TEMP tmp);
13414 ins_cost(300);
13415 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13416 "MOV $tmp,$src2.hi\n\t"
13417 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13418 ins_encode(long_cmp_flags2( src2, src1, tmp));
13419 ins_pipe(ialu_cr_reg_reg);
13420 %}
13421
13422 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13423 // Just a wrapper for a normal branch, plus the predicate test
13424 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13425 match(If cmp flags);
13426 effect(USE labl);
13427 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13428 ins_cost(300);
13429 expand %{
13430 jmpCon(cmp, flags, labl); // JGT or JLE...
13431 %}
13432 %}
13433
13434 // Compare 2 longs and CMOVE longs.
13435 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13436 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13437 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13438 ins_cost(400);
13439 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13440 "CMOV$cmp $dst.hi,$src.hi" %}
13441 opcode(0x0F,0x40);
13442 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13443 ins_pipe( pipe_cmov_reg_long );
13444 %}
13445
13446 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13447 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13448 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13449 ins_cost(500);
13450 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13451 "CMOV$cmp $dst.hi,$src.hi+4" %}
13452 opcode(0x0F,0x40);
13453 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13454 ins_pipe( pipe_cmov_reg_long );
13455 %}
13456
13457 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13458 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13459 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13460 ins_cost(400);
13461 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13462 "CMOV$cmp $dst.hi,$src.hi" %}
13463 opcode(0x0F,0x40);
13464 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13465 ins_pipe( pipe_cmov_reg_long );
13466 %}
13467
13468 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13469 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13470 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13471 ins_cost(500);
13472 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13473 "CMOV$cmp $dst.hi,$src.hi+4" %}
13474 opcode(0x0F,0x40);
13475 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13476 ins_pipe( pipe_cmov_reg_long );
13477 %}
13478
13479 // Compare 2 longs and CMOVE ints.
13480 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13481 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13482 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13483 ins_cost(200);
13484 format %{ "CMOV$cmp $dst,$src" %}
13485 opcode(0x0F,0x40);
13486 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13487 ins_pipe( pipe_cmov_reg );
13488 %}
13489
13490 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13491 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13492 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13493 ins_cost(250);
13494 format %{ "CMOV$cmp $dst,$src" %}
13495 opcode(0x0F,0x40);
13496 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13497 ins_pipe( pipe_cmov_mem );
13498 %}
13499
13500 // Compare 2 longs and CMOVE ptrs.
13501 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13502 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13503 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13504 ins_cost(200);
13505 format %{ "CMOV$cmp $dst,$src" %}
13506 opcode(0x0F,0x40);
13507 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13508 ins_pipe( pipe_cmov_reg );
13509 %}
13510
13511 // Compare 2 longs and CMOVE doubles
13512 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13513 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13514 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13515 ins_cost(200);
13516 expand %{
13517 fcmovDPR_regS(cmp,flags,dst,src);
13518 %}
13519 %}
13520
13521 // Compare 2 longs and CMOVE doubles
13522 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13523 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13524 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13525 ins_cost(200);
13526 expand %{
13527 fcmovD_regS(cmp,flags,dst,src);
13528 %}
13529 %}
13530
13531 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13532 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13533 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13534 ins_cost(200);
13535 expand %{
13536 fcmovFPR_regS(cmp,flags,dst,src);
13537 %}
13538 %}
13539
13540
13541 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13542 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13543 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13544 ins_cost(200);
13545 expand %{
13546 fcmovF_regS(cmp,flags,dst,src);
13547 %}
13548 %}
13549
13550
13551 // ============================================================================
13552 // Procedure Call/Return Instructions
13553 // Call Java Static Instruction
13554 // Note: If this code changes, the corresponding ret_addr_offset() and
13555 // compute_padding() functions will have to be adjusted.
13556 instruct CallStaticJavaDirect(method meth) %{
13557 match(CallStaticJava);
13558 effect(USE meth);
13559
13560 ins_cost(300);
13561 format %{ "CALL,static " %}
13562 opcode(0xE8); /* E8 cd */
13563 ins_encode( pre_call_resets,
13564 Java_Static_Call( meth ),
13565 call_epilog,
13566 post_call_FPU );
13567 ins_pipe( pipe_slow );
13568 ins_alignment(4);
13569 %}
13570
13571 // Call Java Dynamic Instruction
13572 // Note: If this code changes, the corresponding ret_addr_offset() and
13573 // compute_padding() functions will have to be adjusted.
13574 instruct CallDynamicJavaDirect(method meth) %{
13575 match(CallDynamicJava);
13576 effect(USE meth);
13577
13578 ins_cost(300);
13579 format %{ "MOV EAX,(oop)-1\n\t"
13580 "CALL,dynamic" %}
13581 opcode(0xE8); /* E8 cd */
13582 ins_encode( pre_call_resets,
13583 Java_Dynamic_Call( meth ),
13584 call_epilog,
13585 post_call_FPU );
13586 ins_pipe( pipe_slow );
13587 ins_alignment(4);
13588 %}
13589
13590 // Call Runtime Instruction
13591 instruct CallRuntimeDirect(method meth) %{
13592 match(CallRuntime );
13593 effect(USE meth);
13594
13595 ins_cost(300);
13596 format %{ "CALL,runtime " %}
13597 opcode(0xE8); /* E8 cd */
13598 // Use FFREEs to clear entries in float stack
13599 ins_encode( pre_call_resets,
13600 FFree_Float_Stack_All,
13601 Java_To_Runtime( meth ),
13602 post_call_FPU );
13603 ins_pipe( pipe_slow );
13604 %}
13605
13606 // Call runtime without safepoint
13607 instruct CallLeafDirect(method meth) %{
13608 match(CallLeaf);
13609 effect(USE meth);
13610
13611 ins_cost(300);
13612 format %{ "CALL_LEAF,runtime " %}
13613 opcode(0xE8); /* E8 cd */
13614 ins_encode( pre_call_resets,
13615 FFree_Float_Stack_All,
13616 Java_To_Runtime( meth ),
13617 Verify_FPU_For_Leaf, post_call_FPU );
13618 ins_pipe( pipe_slow );
13619 %}
13620
13621 instruct CallLeafNoFPDirect(method meth) %{
13622 match(CallLeafNoFP);
13623 effect(USE meth);
13624
13625 ins_cost(300);
13626 format %{ "CALL_LEAF_NOFP,runtime " %}
13627 opcode(0xE8); /* E8 cd */
13628 ins_encode(pre_call_resets, Java_To_Runtime(meth));
13629 ins_pipe( pipe_slow );
13630 %}
13631
13632
13633 // Return Instruction
13634 // Remove the return address & jump to it.
13635 instruct Ret() %{
13636 match(Return);
13637 format %{ "RET" %}
13638 opcode(0xC3);
13639 ins_encode(OpcP);
13640 ins_pipe( pipe_jmp );
13641 %}
13642
13643 // Tail Call; Jump from runtime stub to Java code.
13644 // Also known as an 'interprocedural jump'.
13645 // Target of jump will eventually return to caller.
13646 // TailJump below removes the return address.
13647 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13648 match(TailCall jump_target method_ptr);
13649 ins_cost(300);
13650 format %{ "JMP $jump_target \t# EBX holds method" %}
13651 opcode(0xFF, 0x4); /* Opcode FF /4 */
13652 ins_encode( OpcP, RegOpc(jump_target) );
13653 ins_pipe( pipe_jmp );
13654 %}
13655
13656
13657 // Tail Jump; remove the return address; jump to target.
13658 // TailCall above leaves the return address around.
13659 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13660 match( TailJump jump_target ex_oop );
13661 ins_cost(300);
13662 format %{ "POP EDX\t# pop return address into dummy\n\t"
13663 "JMP $jump_target " %}
13664 opcode(0xFF, 0x4); /* Opcode FF /4 */
13665 ins_encode( enc_pop_rdx,
13666 OpcP, RegOpc(jump_target) );
13667 ins_pipe( pipe_jmp );
13668 %}
13669
13670 // Create exception oop: created by stack-crawling runtime code.
13671 // Created exception is now available to this handler, and is setup
13672 // just prior to jumping to this handler. No code emitted.
13673 instruct CreateException( eAXRegP ex_oop )
13674 %{
13675 match(Set ex_oop (CreateEx));
13676
13677 size(0);
13678 // use the following format syntax
13679 format %{ "# exception oop is in EAX; no code emitted" %}
13680 ins_encode();
13681 ins_pipe( empty );
13682 %}
13683
13684
13685 // Rethrow exception:
13686 // The exception oop will come in the first argument position.
13687 // Then JUMP (not call) to the rethrow stub code.
13688 instruct RethrowException()
13689 %{
13690 match(Rethrow);
13691
13692 // use the following format syntax
13693 format %{ "JMP rethrow_stub" %}
13694 ins_encode(enc_rethrow);
13695 ins_pipe( pipe_jmp );
13696 %}
13697
13698 // inlined locking and unlocking
13699
13700 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13701 predicate(Compile::current()->use_rtm());
13702 match(Set cr (FastLock object box));
13703 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13704 ins_cost(300);
13705 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13706 ins_encode %{
13707 __ get_thread($thread$$Register);
13708 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13709 $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13710 _counters, _rtm_counters, _stack_rtm_counters,
13711 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13712 true, ra_->C->profile_rtm());
13713 %}
13714 ins_pipe(pipe_slow);
13715 %}
13716
13717 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13718 predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm());
13719 match(Set cr (FastLock object box));
13720 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13721 ins_cost(300);
13722 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13723 ins_encode %{
13724 __ get_thread($thread$$Register);
13725 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13726 $scr$$Register, noreg, noreg, $thread$$Register, _counters, NULL, NULL, NULL, false, false);
13727 %}
13728 ins_pipe(pipe_slow);
13729 %}
13730
13731 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13732 predicate(LockingMode != LM_LIGHTWEIGHT);
13733 match(Set cr (FastUnlock object box));
13734 effect(TEMP tmp, USE_KILL box);
13735 ins_cost(300);
13736 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13737 ins_encode %{
13738 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13739 %}
13740 ins_pipe(pipe_slow);
13741 %}
13742
13743 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13744 predicate(LockingMode == LM_LIGHTWEIGHT);
13745 match(Set cr (FastLock object box));
13746 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13747 ins_cost(300);
13748 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13749 ins_encode %{
13750 __ get_thread($thread$$Register);
13751 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13752 %}
13753 ins_pipe(pipe_slow);
13754 %}
13755
13756 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13757 predicate(LockingMode == LM_LIGHTWEIGHT);
13758 match(Set cr (FastUnlock object eax_reg));
13759 effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13760 ins_cost(300);
13761 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13762 ins_encode %{
13763 __ get_thread($thread$$Register);
13764 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13765 %}
13766 ins_pipe(pipe_slow);
13767 %}
13768
13769 // ============================================================================
13770 // Safepoint Instruction
13771 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13772 match(SafePoint poll);
13773 effect(KILL cr, USE poll);
13774
13775 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %}
13776 ins_cost(125);
13777 // EBP would need size(3)
13778 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13779 ins_encode %{
13780 __ relocate(relocInfo::poll_type);
13781 address pre_pc = __ pc();
13782 __ testl(rax, Address($poll$$Register, 0));
13783 address post_pc = __ pc();
13784 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13785 %}
13786 ins_pipe(ialu_reg_mem);
13787 %}
13788
13789
13790 // ============================================================================
13791 // This name is KNOWN by the ADLC and cannot be changed.
13792 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13793 // for this guy.
13794 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13795 match(Set dst (ThreadLocal));
13796 effect(DEF dst, KILL cr);
13797
13798 format %{ "MOV $dst, Thread::current()" %}
13799 ins_encode %{
13800 Register dstReg = as_Register($dst$$reg);
13801 __ get_thread(dstReg);
13802 %}
13803 ins_pipe( ialu_reg_fat );
13804 %}
13805
13806
13807
13808 //----------PEEPHOLE RULES-----------------------------------------------------
13809 // These must follow all instruction definitions as they use the names
13810 // defined in the instructions definitions.
13811 //
13812 // peepmatch ( root_instr_name [preceding_instruction]* );
13813 //
13814 // peepconstraint %{
13815 // (instruction_number.operand_name relational_op instruction_number.operand_name
13816 // [, ...] );
13817 // // instruction numbers are zero-based using left to right order in peepmatch
13818 //
13819 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
13820 // // provide an instruction_number.operand_name for each operand that appears
13821 // // in the replacement instruction's match rule
13822 //
13823 // ---------VM FLAGS---------------------------------------------------------
13824 //
13825 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13826 //
13827 // Each peephole rule is given an identifying number starting with zero and
13828 // increasing by one in the order seen by the parser. An individual peephole
13829 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13830 // on the command-line.
13831 //
13832 // ---------CURRENT LIMITATIONS----------------------------------------------
13833 //
13834 // Only match adjacent instructions in same basic block
13835 // Only equality constraints
13836 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13837 // Only one replacement instruction
13838 //
13839 // ---------EXAMPLE----------------------------------------------------------
13840 //
13841 // // pertinent parts of existing instructions in architecture description
13842 // instruct movI(rRegI dst, rRegI src) %{
13843 // match(Set dst (CopyI src));
13844 // %}
13845 //
13846 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13847 // match(Set dst (AddI dst src));
13848 // effect(KILL cr);
13849 // %}
13850 //
13851 // // Change (inc mov) to lea
13852 // peephole %{
13853 // // increment preceeded by register-register move
13854 // peepmatch ( incI_eReg movI );
13855 // // require that the destination register of the increment
13856 // // match the destination register of the move
13857 // peepconstraint ( 0.dst == 1.dst );
13858 // // construct a replacement instruction that sets
13859 // // the destination to ( move's source register + one )
13860 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13861 // %}
13862 //
13863 // Implementation no longer uses movX instructions since
13864 // machine-independent system no longer uses CopyX nodes.
13865 //
13866 // peephole %{
13867 // peepmatch ( incI_eReg movI );
13868 // peepconstraint ( 0.dst == 1.dst );
13869 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13870 // %}
13871 //
13872 // peephole %{
13873 // peepmatch ( decI_eReg movI );
13874 // peepconstraint ( 0.dst == 1.dst );
13875 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13876 // %}
13877 //
13878 // peephole %{
13879 // peepmatch ( addI_eReg_imm movI );
13880 // peepconstraint ( 0.dst == 1.dst );
13881 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13882 // %}
13883 //
13884 // peephole %{
13885 // peepmatch ( addP_eReg_imm movP );
13886 // peepconstraint ( 0.dst == 1.dst );
13887 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13888 // %}
13889
13890 // // Change load of spilled value to only a spill
13891 // instruct storeI(memory mem, rRegI src) %{
13892 // match(Set mem (StoreI mem src));
13893 // %}
13894 //
13895 // instruct loadI(rRegI dst, memory mem) %{
13896 // match(Set dst (LoadI mem));
13897 // %}
13898 //
13899 peephole %{
13900 peepmatch ( loadI storeI );
13901 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13902 peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13903 %}
13904
13905 //----------SMARTSPILL RULES---------------------------------------------------
13906 // These must follow all instruction definitions as they use the names
13907 // defined in the instructions definitions.