1 //
2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
66
67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
76
77 // Float registers. We treat TOS/FPR0 special. It is invisible to the
78 // allocator, and only shows up in the encodings.
79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
81 // Ok so here's the trick FPR1 is really st(0) except in the midst
82 // of emission of assembly for a machnode. During the emission the fpu stack
83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
84 // the stack will not have this element so FPR1 == st(0) from the
85 // oopMap viewpoint. This same weirdness with numbering causes
86 // instruction encoding to have to play games with the register
87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
88 // where it does flt->flt moves to see an example
89 //
90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
104 //
105 // Empty fill registers, which are never used, but supply alignment to xmm regs
106 //
107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
115
116 // Specify priority of register selection within phases of register
117 // allocation. Highest priority is first. A useful heuristic is to
118 // give registers a low priority when they are required by machine
119 // instructions, like EAX and EDX. Registers which are used as
120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
121 // For the Intel integer registers, the equivalent Long pairs are
122 // EDX:EAX, EBX:ECX, and EDI:EBP.
123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
126 FPR6L, FPR6H, FPR7L, FPR7H,
127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
128
129
130 //----------Architecture Description Register Classes--------------------------
131 // Several register classes are automatically defined based upon information in
132 // this architecture description.
133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
135 //
136 // Class for no registers (empty set).
137 reg_class no_reg();
138
139 // Class for all registers
140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
141 // Class for all registers (excluding EBP)
142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
143 // Dynamic register class that selects at runtime between register classes
144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
147
148 // Class for general registers
149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
150 // Class for general registers (excluding EBP).
151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
152 // Used also if the PreserveFramePointer flag is true.
153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
156
157 // Class of "X" registers
158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
159
160 // Class of registers that can appear in an address with no offset.
161 // EBP and ESP require an extra instruction byte for zero offset.
162 // Used in fast-unlock
163 reg_class p_reg(EDX, EDI, ESI, EBX);
164
165 // Class for general registers excluding ECX
166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
167 // Class for general registers excluding ECX (and EBP)
168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
171
172 // Class for general registers excluding EAX
173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
174
175 // Class for general registers excluding EAX and EBX.
176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
177 // Class for general registers excluding EAX and EBX (and EBP)
178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
181
182 // Class of EAX (for multiply and divide operations)
183 reg_class eax_reg(EAX);
184
185 // Class of EBX (for atomic add)
186 reg_class ebx_reg(EBX);
187
188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
189 reg_class ecx_reg(ECX);
190
191 // Class of EDX (for multiply and divide operations)
192 reg_class edx_reg(EDX);
193
194 // Class of EDI (for synchronization)
195 reg_class edi_reg(EDI);
196
197 // Class of ESI (for synchronization)
198 reg_class esi_reg(ESI);
199
200 // Singleton class for stack pointer
201 reg_class sp_reg(ESP);
202
203 // Singleton class for instruction pointer
204 // reg_class ip_reg(EIP);
205
206 // Class of integer register pairs
207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
208 // Class of integer register pairs (excluding EBP and EDI);
209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
212
213 // Class of integer register pairs that aligns with calling convention
214 reg_class eadx_reg( EAX,EDX );
215 reg_class ebcx_reg( ECX,EBX );
216 reg_class ebpd_reg( EBP,EDI );
217
218 // Not AX or DX, used in divides
219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
220 // Not AX or DX (and neither EBP), used in divides
221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
224
225 // Floating point registers. Notice FPR0 is not a choice.
226 // FPR0 is not ever allocated; we use clever encodings to fake
227 // a 2-address instructions out of Intels FP stack.
228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
229
230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
232 FPR7L,FPR7H );
233
234 reg_class fp_flt_reg0( FPR1L );
235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
239
240 %}
241
242
243 //----------SOURCE BLOCK-------------------------------------------------------
244 // This is a block of C++ code which provides values, functions, and
245 // definitions necessary in the rest of the architecture description
246 source_hpp %{
247 // Must be visible to the DFA in dfa_x86_32.cpp
248 extern bool is_operand_hi32_zero(Node* n);
249 %}
250
251 source %{
252 #define RELOC_IMM32 Assembler::imm_operand
253 #define RELOC_DISP32 Assembler::disp32_operand
254
255 #define __ _masm.
256
257 // How to find the high register of a Long pair, given the low register
258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
259 #define HIGH_FROM_LOW_ENC(x) ((x)+2)
260
261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
262 // instructions, to allow sign-masking or sign-bit flipping. They allow
263 // fast versions of NegF/NegD and AbsF/AbsD.
264
265 void reg_mask_init() {}
266
267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
270 // of 128-bits operands for SSE instructions.
271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
272 // Store the value to a 128-bits operand.
273 operand[0] = lo;
274 operand[1] = hi;
275 return operand;
276 }
277
278 // Buffer for 128-bits masks used by SSE instructions.
279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
280
281 // Static initialization during VM startup.
282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
286
287 // Offset hacking within calls.
288 static int pre_call_resets_size() {
289 int size = 0;
290 Compile* C = Compile::current();
291 if (C->in_24_bit_fp_mode()) {
292 size += 6; // fldcw
293 }
294 if (VM_Version::supports_vzeroupper()) {
295 size += 3; // vzeroupper
296 }
297 return size;
298 }
299
300 // !!!!! Special hack to get all type of calls to specify the byte offset
301 // from the start of the call to the point where the return address
302 // will point.
303 int MachCallStaticJavaNode::ret_addr_offset() {
304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points
305 }
306
307 int MachCallDynamicJavaNode::ret_addr_offset() {
308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points
309 }
310
311 static int sizeof_FFree_Float_Stack_All = -1;
312
313 int MachCallRuntimeNode::ret_addr_offset() {
314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
316 }
317
318 //
319 // Compute padding required for nodes which need alignment
320 //
321
322 // The address of the call instruction needs to be 4-byte aligned to
323 // ensure that it does not span a cache line so that it can be patched.
324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
325 current_offset += pre_call_resets_size(); // skip fldcw, if any
326 current_offset += 1; // skip call opcode byte
327 return align_up(current_offset, alignment_required()) - current_offset;
328 }
329
330 // The address of the call instruction needs to be 4-byte aligned to
331 // ensure that it does not span a cache line so that it can be patched.
332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
333 current_offset += pre_call_resets_size(); // skip fldcw, if any
334 current_offset += 5; // skip MOV instruction
335 current_offset += 1; // skip call opcode byte
336 return align_up(current_offset, alignment_required()) - current_offset;
337 }
338
339 // EMIT_RM()
340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
342 cbuf.insts()->emit_int8(c);
343 }
344
345 // EMIT_CC()
346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
347 unsigned char c = (unsigned char)( f1 | f2 );
348 cbuf.insts()->emit_int8(c);
349 }
350
351 // EMIT_OPCODE()
352 void emit_opcode(CodeBuffer &cbuf, int code) {
353 cbuf.insts()->emit_int8((unsigned char) code);
354 }
355
356 // EMIT_OPCODE() w/ relocation information
357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
358 cbuf.relocate(cbuf.insts_mark() + offset, reloc);
359 emit_opcode(cbuf, code);
360 }
361
362 // EMIT_D8()
363 void emit_d8(CodeBuffer &cbuf, int d8) {
364 cbuf.insts()->emit_int8((unsigned char) d8);
365 }
366
367 // EMIT_D16()
368 void emit_d16(CodeBuffer &cbuf, int d16) {
369 cbuf.insts()->emit_int16(d16);
370 }
371
372 // EMIT_D32()
373 void emit_d32(CodeBuffer &cbuf, int d32) {
374 cbuf.insts()->emit_int32(d32);
375 }
376
377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
379 int format) {
380 cbuf.relocate(cbuf.insts_mark(), reloc, format);
381 cbuf.insts()->emit_int32(d32);
382 }
383
384 // emit 32 bit value and construct relocation entry from RelocationHolder
385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
386 int format) {
387 #ifdef ASSERT
388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
390 }
391 #endif
392 cbuf.relocate(cbuf.insts_mark(), rspec, format);
393 cbuf.insts()->emit_int32(d32);
394 }
395
396 // Access stack slot for load or store
397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
398 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src])
399 if( -128 <= disp && disp <= 127 ) {
400 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte
401 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
402 emit_d8 (cbuf, disp); // Displacement // R/M byte
403 } else {
404 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte
405 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
406 emit_d32(cbuf, disp); // Displacement // R/M byte
407 }
408 }
409
410 // rRegI ereg, memory mem) %{ // emit_reg_mem
411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
412 // There is no index & no scale, use form without SIB byte
413 if ((index == 0x4) &&
414 (scale == 0) && (base != ESP_enc)) {
415 // If no displacement, mode is 0x0; unless base is [EBP]
416 if ( (displace == 0) && (base != EBP_enc) ) {
417 emit_rm(cbuf, 0x0, reg_encoding, base);
418 }
419 else { // If 8-bit displacement, mode 0x1
420 if ((displace >= -128) && (displace <= 127)
421 && (disp_reloc == relocInfo::none) ) {
422 emit_rm(cbuf, 0x1, reg_encoding, base);
423 emit_d8(cbuf, displace);
424 }
425 else { // If 32-bit displacement
426 if (base == -1) { // Special flag for absolute address
427 emit_rm(cbuf, 0x0, reg_encoding, 0x5);
428 // (manual lies; no SIB needed here)
429 if ( disp_reloc != relocInfo::none ) {
430 emit_d32_reloc(cbuf, displace, disp_reloc, 1);
431 } else {
432 emit_d32 (cbuf, displace);
433 }
434 }
435 else { // Normal base + offset
436 emit_rm(cbuf, 0x2, reg_encoding, base);
437 if ( disp_reloc != relocInfo::none ) {
438 emit_d32_reloc(cbuf, displace, disp_reloc, 1);
439 } else {
440 emit_d32 (cbuf, displace);
441 }
442 }
443 }
444 }
445 }
446 else { // Else, encode with the SIB byte
447 // If no displacement, mode is 0x0; unless base is [EBP]
448 if (displace == 0 && (base != EBP_enc)) { // If no displacement
449 emit_rm(cbuf, 0x0, reg_encoding, 0x4);
450 emit_rm(cbuf, scale, index, base);
451 }
452 else { // If 8-bit displacement, mode 0x1
453 if ((displace >= -128) && (displace <= 127)
454 && (disp_reloc == relocInfo::none) ) {
455 emit_rm(cbuf, 0x1, reg_encoding, 0x4);
456 emit_rm(cbuf, scale, index, base);
457 emit_d8(cbuf, displace);
458 }
459 else { // If 32-bit displacement
460 if (base == 0x04 ) {
461 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
462 emit_rm(cbuf, scale, index, 0x04);
463 } else {
464 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
465 emit_rm(cbuf, scale, index, base);
466 }
467 if ( disp_reloc != relocInfo::none ) {
468 emit_d32_reloc(cbuf, displace, disp_reloc, 1);
469 } else {
470 emit_d32 (cbuf, displace);
471 }
472 }
473 }
474 }
475 }
476
477
478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
479 if( dst_encoding == src_encoding ) {
480 // reg-reg copy, use an empty encoding
481 } else {
482 emit_opcode( cbuf, 0x8B );
483 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
484 }
485 }
486
487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
488 Label exit;
489 __ jccb(Assembler::noParity, exit);
490 __ pushf();
491 //
492 // comiss/ucomiss instructions set ZF,PF,CF flags and
493 // zero OF,AF,SF for NaN values.
494 // Fixup flags by zeroing ZF,PF so that compare of NaN
495 // values returns 'less than' result (CF is set).
496 // Leave the rest of flags unchanged.
497 //
498 // 7 6 5 4 3 2 1 0
499 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
500 // 0 0 1 0 1 0 1 1 (0x2B)
501 //
502 __ andl(Address(rsp, 0), 0xffffff2b);
503 __ popf();
504 __ bind(exit);
505 }
506
507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
508 Label done;
509 __ movl(dst, -1);
510 __ jcc(Assembler::parity, done);
511 __ jcc(Assembler::below, done);
512 __ setb(Assembler::notEqual, dst);
513 __ movzbl(dst, dst);
514 __ bind(done);
515 }
516
517
518 //=============================================================================
519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
520
521 int ConstantTable::calculate_table_base_offset() const {
522 return 0; // absolute addressing, no offset
523 }
524
525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
527 ShouldNotReachHere();
528 }
529
530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
531 // Empty encoding
532 }
533
534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
535 return 0;
536 }
537
538 #ifndef PRODUCT
539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
540 st->print("# MachConstantBaseNode (empty encoding)");
541 }
542 #endif
543
544
545 //=============================================================================
546 #ifndef PRODUCT
547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
548 Compile* C = ra_->C;
549
550 int framesize = C->output()->frame_size_in_bytes();
551 int bangsize = C->output()->bang_size_in_bytes();
552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
553 // Remove wordSize for return addr which is already pushed.
554 framesize -= wordSize;
555
556 if (C->output()->need_stack_bang(bangsize)) {
557 framesize -= wordSize;
558 st->print("# stack bang (%d bytes)", bangsize);
559 st->print("\n\t");
560 st->print("PUSH EBP\t# Save EBP");
561 if (PreserveFramePointer) {
562 st->print("\n\t");
563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
564 }
565 if (framesize) {
566 st->print("\n\t");
567 st->print("SUB ESP, #%d\t# Create frame",framesize);
568 }
569 } else {
570 st->print("SUB ESP, #%d\t# Create frame",framesize);
571 st->print("\n\t");
572 framesize -= wordSize;
573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize);
574 if (PreserveFramePointer) {
575 st->print("\n\t");
576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
577 if (framesize > 0) {
578 st->print("\n\t");
579 st->print("ADD EBP, #%d", framesize);
580 }
581 }
582 }
583
584 if (VerifyStackAtCalls) {
585 st->print("\n\t");
586 framesize -= wordSize;
587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
588 }
589
590 if( C->in_24_bit_fp_mode() ) {
591 st->print("\n\t");
592 st->print("FLDCW \t# load 24 bit fpu control word");
593 }
594 if (UseSSE >= 2 && VerifyFPU) {
595 st->print("\n\t");
596 st->print("# verify FPU stack (must be clean on entry)");
597 }
598
599 #ifdef ASSERT
600 if (VerifyStackAtCalls) {
601 st->print("\n\t");
602 st->print("# stack alignment check");
603 }
604 #endif
605 st->cr();
606 }
607 #endif
608
609
610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
611 Compile* C = ra_->C;
612 C2_MacroAssembler _masm(&cbuf);
613
614 int framesize = C->output()->frame_size_in_bytes();
615 int bangsize = C->output()->bang_size_in_bytes();
616
617 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
618
619 C->output()->set_frame_complete(cbuf.insts_size());
620
621 if (C->has_mach_constant_base_node()) {
622 // NOTE: We set the table base offset here because users might be
623 // emitted before MachConstantBaseNode.
624 ConstantTable& constant_table = C->output()->constant_table();
625 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
626 }
627 }
628
629 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
630 return MachNode::size(ra_); // too many variables; just compute it the hard way
631 }
632
633 int MachPrologNode::reloc() const {
634 return 0; // a large enough number
635 }
636
637 //=============================================================================
638 #ifndef PRODUCT
639 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
640 Compile *C = ra_->C;
641 int framesize = C->output()->frame_size_in_bytes();
642 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
643 // Remove two words for return addr and rbp,
644 framesize -= 2*wordSize;
645
646 if (C->max_vector_size() > 16) {
647 st->print("VZEROUPPER");
648 st->cr(); st->print("\t");
649 }
650 if (C->in_24_bit_fp_mode()) {
651 st->print("FLDCW standard control word");
652 st->cr(); st->print("\t");
653 }
654 if (framesize) {
655 st->print("ADD ESP,%d\t# Destroy frame",framesize);
656 st->cr(); st->print("\t");
657 }
658 st->print_cr("POPL EBP"); st->print("\t");
659 if (do_polling() && C->is_method_compilation()) {
660 st->print("CMPL rsp, poll_offset[thread] \n\t"
661 "JA #safepoint_stub\t"
662 "# Safepoint: poll for GC");
663 }
664 }
665 #endif
666
667 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
668 Compile *C = ra_->C;
669 MacroAssembler _masm(&cbuf);
670
671 if (C->max_vector_size() > 16) {
672 // Clear upper bits of YMM registers when current compiled code uses
673 // wide vectors to avoid AVX <-> SSE transition penalty during call.
674 _masm.vzeroupper();
675 }
676 // If method set FPU control word, restore to standard control word
677 if (C->in_24_bit_fp_mode()) {
678 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
679 }
680
681 int framesize = C->output()->frame_size_in_bytes();
682 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
683 // Remove two words for return addr and rbp,
684 framesize -= 2*wordSize;
685
686 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
687
688 if (framesize >= 128) {
689 emit_opcode(cbuf, 0x81); // add SP, #framesize
690 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
691 emit_d32(cbuf, framesize);
692 } else if (framesize) {
693 emit_opcode(cbuf, 0x83); // add SP, #framesize
694 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
695 emit_d8(cbuf, framesize);
696 }
697
698 emit_opcode(cbuf, 0x58 | EBP_enc);
699
700 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
701 __ reserved_stack_check();
702 }
703
704 if (do_polling() && C->is_method_compilation()) {
705 Register thread = as_Register(EBX_enc);
706 MacroAssembler masm(&cbuf);
707 __ get_thread(thread);
708 Label dummy_label;
709 Label* code_stub = &dummy_label;
710 if (!C->output()->in_scratch_emit_size()) {
711 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
712 C->output()->add_stub(stub);
713 code_stub = &stub->entry();
714 }
715 __ relocate(relocInfo::poll_return_type);
716 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
717 }
718 }
719
720 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
721 return MachNode::size(ra_); // too many variables; just compute it
722 // the hard way
723 }
724
725 int MachEpilogNode::reloc() const {
726 return 0; // a large enough number
727 }
728
729 const Pipeline * MachEpilogNode::pipeline() const {
730 return MachNode::pipeline_class();
731 }
732
733 //=============================================================================
734
735 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
736 static enum RC rc_class( OptoReg::Name reg ) {
737
738 if( !OptoReg::is_valid(reg) ) return rc_bad;
739 if (OptoReg::is_stack(reg)) return rc_stack;
740
741 VMReg r = OptoReg::as_VMReg(reg);
742 if (r->is_Register()) return rc_int;
743 if (r->is_FloatRegister()) {
744 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
745 return rc_float;
746 }
747 if (r->is_KRegister()) return rc_kreg;
748 assert(r->is_XMMRegister(), "must be");
749 return rc_xmm;
750 }
751
752 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
753 int opcode, const char *op_str, int size, outputStream* st ) {
754 if( cbuf ) {
755 emit_opcode (*cbuf, opcode );
756 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
757 #ifndef PRODUCT
758 } else if( !do_size ) {
759 if( size != 0 ) st->print("\n\t");
760 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
761 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
762 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
763 } else { // FLD, FST, PUSH, POP
764 st->print("%s [ESP + #%d]",op_str,offset);
765 }
766 #endif
767 }
768 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
769 return size+3+offset_size;
770 }
771
772 // Helper for XMM registers. Extra opcode bits, limited syntax.
773 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
774 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
775 int in_size_in_bits = Assembler::EVEX_32bit;
776 int evex_encoding = 0;
777 if (reg_lo+1 == reg_hi) {
778 in_size_in_bits = Assembler::EVEX_64bit;
779 evex_encoding = Assembler::VEX_W;
780 }
781 if (cbuf) {
782 MacroAssembler _masm(cbuf);
783 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
784 // it maps more cases to single byte displacement
785 _masm.set_managed();
786 if (reg_lo+1 == reg_hi) { // double move?
787 if (is_load) {
788 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
789 } else {
790 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
791 }
792 } else {
793 if (is_load) {
794 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
795 } else {
796 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
797 }
798 }
799 #ifndef PRODUCT
800 } else if (!do_size) {
801 if (size != 0) st->print("\n\t");
802 if (reg_lo+1 == reg_hi) { // double move?
803 if (is_load) st->print("%s %s,[ESP + #%d]",
804 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
805 Matcher::regName[reg_lo], offset);
806 else st->print("MOVSD [ESP + #%d],%s",
807 offset, Matcher::regName[reg_lo]);
808 } else {
809 if (is_load) st->print("MOVSS %s,[ESP + #%d]",
810 Matcher::regName[reg_lo], offset);
811 else st->print("MOVSS [ESP + #%d],%s",
812 offset, Matcher::regName[reg_lo]);
813 }
814 #endif
815 }
816 bool is_single_byte = false;
817 if ((UseAVX > 2) && (offset != 0)) {
818 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
819 }
820 int offset_size = 0;
821 if (UseAVX > 2 ) {
822 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
823 } else {
824 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
825 }
826 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
827 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
828 return size+5+offset_size;
829 }
830
831
832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
833 int src_hi, int dst_hi, int size, outputStream* st ) {
834 if (cbuf) {
835 MacroAssembler _masm(cbuf);
836 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
837 _masm.set_managed();
838 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
839 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
840 as_XMMRegister(Matcher::_regEncode[src_lo]));
841 } else {
842 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
843 as_XMMRegister(Matcher::_regEncode[src_lo]));
844 }
845 #ifndef PRODUCT
846 } else if (!do_size) {
847 if (size != 0) st->print("\n\t");
848 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
849 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
850 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
851 } else {
852 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
853 }
854 } else {
855 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
856 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
857 } else {
858 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
859 }
860 }
861 #endif
862 }
863 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
864 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes.
865 int sz = (UseAVX > 2) ? 6 : 4;
866 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
867 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
868 return size + sz;
869 }
870
871 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
872 int src_hi, int dst_hi, int size, outputStream* st ) {
873 // 32-bit
874 if (cbuf) {
875 MacroAssembler _masm(cbuf);
876 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
877 _masm.set_managed();
878 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
879 as_Register(Matcher::_regEncode[src_lo]));
880 #ifndef PRODUCT
881 } else if (!do_size) {
882 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
883 #endif
884 }
885 return (UseAVX> 2) ? 6 : 4;
886 }
887
888
889 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
890 int src_hi, int dst_hi, int size, outputStream* st ) {
891 // 32-bit
892 if (cbuf) {
893 MacroAssembler _masm(cbuf);
894 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
895 _masm.set_managed();
896 __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
897 as_XMMRegister(Matcher::_regEncode[src_lo]));
898 #ifndef PRODUCT
899 } else if (!do_size) {
900 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
901 #endif
902 }
903 return (UseAVX> 2) ? 6 : 4;
904 }
905
906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
907 if( cbuf ) {
908 emit_opcode(*cbuf, 0x8B );
909 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
910 #ifndef PRODUCT
911 } else if( !do_size ) {
912 if( size != 0 ) st->print("\n\t");
913 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
914 #endif
915 }
916 return size+2;
917 }
918
919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
920 int offset, int size, outputStream* st ) {
921 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
922 if( cbuf ) {
923 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it)
924 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
925 #ifndef PRODUCT
926 } else if( !do_size ) {
927 if( size != 0 ) st->print("\n\t");
928 st->print("FLD %s",Matcher::regName[src_lo]);
929 #endif
930 }
931 size += 2;
932 }
933
934 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
935 const char *op_str;
936 int op;
937 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
938 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
939 op = 0xDD;
940 } else { // 32-bit store
941 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
942 op = 0xD9;
943 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
944 }
945
946 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
947 }
948
949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
950 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
951 int src_hi, int dst_hi, uint ireg, outputStream* st);
952
953 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
954 int stack_offset, int reg, uint ireg, outputStream* st);
955
956 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
957 int dst_offset, uint ireg, outputStream* st) {
958 if (cbuf) {
959 MacroAssembler _masm(cbuf);
960 switch (ireg) {
961 case Op_VecS:
962 __ pushl(Address(rsp, src_offset));
963 __ popl (Address(rsp, dst_offset));
964 break;
965 case Op_VecD:
966 __ pushl(Address(rsp, src_offset));
967 __ popl (Address(rsp, dst_offset));
968 __ pushl(Address(rsp, src_offset+4));
969 __ popl (Address(rsp, dst_offset+4));
970 break;
971 case Op_VecX:
972 __ movdqu(Address(rsp, -16), xmm0);
973 __ movdqu(xmm0, Address(rsp, src_offset));
974 __ movdqu(Address(rsp, dst_offset), xmm0);
975 __ movdqu(xmm0, Address(rsp, -16));
976 break;
977 case Op_VecY:
978 __ vmovdqu(Address(rsp, -32), xmm0);
979 __ vmovdqu(xmm0, Address(rsp, src_offset));
980 __ vmovdqu(Address(rsp, dst_offset), xmm0);
981 __ vmovdqu(xmm0, Address(rsp, -32));
982 break;
983 case Op_VecZ:
984 __ evmovdquq(Address(rsp, -64), xmm0, 2);
985 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
986 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
987 __ evmovdquq(xmm0, Address(rsp, -64), 2);
988 break;
989 default:
990 ShouldNotReachHere();
991 }
992 #ifndef PRODUCT
993 } else {
994 switch (ireg) {
995 case Op_VecS:
996 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
997 "popl [rsp + #%d]",
998 src_offset, dst_offset);
999 break;
1000 case Op_VecD:
1001 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1002 "popq [rsp + #%d]\n\t"
1003 "pushl [rsp + #%d]\n\t"
1004 "popq [rsp + #%d]",
1005 src_offset, dst_offset, src_offset+4, dst_offset+4);
1006 break;
1007 case Op_VecX:
1008 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1009 "movdqu xmm0, [rsp + #%d]\n\t"
1010 "movdqu [rsp + #%d], xmm0\n\t"
1011 "movdqu xmm0, [rsp - #16]",
1012 src_offset, dst_offset);
1013 break;
1014 case Op_VecY:
1015 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1016 "vmovdqu xmm0, [rsp + #%d]\n\t"
1017 "vmovdqu [rsp + #%d], xmm0\n\t"
1018 "vmovdqu xmm0, [rsp - #32]",
1019 src_offset, dst_offset);
1020 break;
1021 case Op_VecZ:
1022 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1023 "vmovdqu xmm0, [rsp + #%d]\n\t"
1024 "vmovdqu [rsp + #%d], xmm0\n\t"
1025 "vmovdqu xmm0, [rsp - #64]",
1026 src_offset, dst_offset);
1027 break;
1028 default:
1029 ShouldNotReachHere();
1030 }
1031 #endif
1032 }
1033 }
1034
1035 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1036 // Get registers to move
1037 OptoReg::Name src_second = ra_->get_reg_second(in(1));
1038 OptoReg::Name src_first = ra_->get_reg_first(in(1));
1039 OptoReg::Name dst_second = ra_->get_reg_second(this );
1040 OptoReg::Name dst_first = ra_->get_reg_first(this );
1041
1042 enum RC src_second_rc = rc_class(src_second);
1043 enum RC src_first_rc = rc_class(src_first);
1044 enum RC dst_second_rc = rc_class(dst_second);
1045 enum RC dst_first_rc = rc_class(dst_first);
1046
1047 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1048
1049 // Generate spill code!
1050 int size = 0;
1051
1052 if( src_first == dst_first && src_second == dst_second )
1053 return size; // Self copy, no move
1054
1055 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
1056 uint ireg = ideal_reg();
1057 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1058 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1059 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1060 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1061 // mem -> mem
1062 int src_offset = ra_->reg2offset(src_first);
1063 int dst_offset = ra_->reg2offset(dst_first);
1064 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1065 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1066 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
1067 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1068 int stack_offset = ra_->reg2offset(dst_first);
1069 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
1070 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1071 int stack_offset = ra_->reg2offset(src_first);
1072 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
1073 } else {
1074 ShouldNotReachHere();
1075 }
1076 return 0;
1077 }
1078
1079 // --------------------------------------
1080 // Check for mem-mem move. push/pop to move.
1081 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1082 if( src_second == dst_first ) { // overlapping stack copy ranges
1083 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1084 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
1085 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
1086 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
1087 }
1088 // move low bits
1089 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
1090 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
1091 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1092 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
1093 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
1094 }
1095 return size;
1096 }
1097
1098 // --------------------------------------
1099 // Check for integer reg-reg copy
1100 if( src_first_rc == rc_int && dst_first_rc == rc_int )
1101 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1102
1103 // Check for integer store
1104 if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1105 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1106
1107 // Check for integer load
1108 if( src_first_rc == rc_stack && dst_first_rc == rc_int )
1109 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1110
1111 // Check for integer reg-xmm reg copy
1112 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1113 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1114 "no 64 bit integer-float reg moves" );
1115 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1116 }
1117 // --------------------------------------
1118 // Check for float reg-reg copy
1119 if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1120 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1121 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1122 if( cbuf ) {
1123
1124 // Note the mucking with the register encode to compensate for the 0/1
1125 // indexing issue mentioned in a comment in the reg_def sections
1126 // for FPR registers many lines above here.
1127
1128 if( src_first != FPR1L_num ) {
1129 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i)
1130 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1131 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
1132 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1133 } else {
1134 emit_opcode (*cbuf, 0xDD ); // FST ST(i)
1135 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1136 }
1137 #ifndef PRODUCT
1138 } else if( !do_size ) {
1139 if( size != 0 ) st->print("\n\t");
1140 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1141 else st->print( "FST %s", Matcher::regName[dst_first]);
1142 #endif
1143 }
1144 return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1145 }
1146
1147 // Check for float store
1148 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1149 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1150 }
1151
1152 // Check for float load
1153 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1154 int offset = ra_->reg2offset(src_first);
1155 const char *op_str;
1156 int op;
1157 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1158 op_str = "FLD_D";
1159 op = 0xDD;
1160 } else { // 32-bit load
1161 op_str = "FLD_S";
1162 op = 0xD9;
1163 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1164 }
1165 if( cbuf ) {
1166 emit_opcode (*cbuf, op );
1167 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1168 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
1169 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1170 #ifndef PRODUCT
1171 } else if( !do_size ) {
1172 if( size != 0 ) st->print("\n\t");
1173 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
1174 #endif
1175 }
1176 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1177 return size + 3+offset_size+2;
1178 }
1179
1180 // Check for xmm reg-reg copy
1181 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1182 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1183 (src_first+1 == src_second && dst_first+1 == dst_second),
1184 "no non-adjacent float-moves" );
1185 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1186 }
1187
1188 // Check for xmm reg-integer reg copy
1189 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1190 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1191 "no 64 bit float-integer reg moves" );
1192 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1193 }
1194
1195 // Check for xmm store
1196 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1197 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
1198 }
1199
1200 // Check for float xmm load
1201 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1202 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1203 }
1204
1205 // Copy from float reg to xmm reg
1206 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
1207 // copy to the top of stack from floating point reg
1208 // and use LEA to preserve flags
1209 if( cbuf ) {
1210 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8]
1211 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1212 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1213 emit_d8(*cbuf,0xF8);
1214 #ifndef PRODUCT
1215 } else if( !do_size ) {
1216 if( size != 0 ) st->print("\n\t");
1217 st->print("LEA ESP,[ESP-8]");
1218 #endif
1219 }
1220 size += 4;
1221
1222 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1223
1224 // Copy from the temp memory to the xmm reg.
1225 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1226
1227 if( cbuf ) {
1228 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8]
1229 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1230 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1231 emit_d8(*cbuf,0x08);
1232 #ifndef PRODUCT
1233 } else if( !do_size ) {
1234 if( size != 0 ) st->print("\n\t");
1235 st->print("LEA ESP,[ESP+8]");
1236 #endif
1237 }
1238 size += 4;
1239 return size;
1240 }
1241
1242 // AVX-512 opmask specific spilling.
1243 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
1244 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1245 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1246 int offset = ra_->reg2offset(src_first);
1247 if (cbuf != nullptr) {
1248 MacroAssembler _masm(cbuf);
1249 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1250 #ifndef PRODUCT
1251 } else {
1252 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset);
1253 #endif
1254 }
1255 return 0;
1256 }
1257
1258 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
1259 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1260 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1261 int offset = ra_->reg2offset(dst_first);
1262 if (cbuf != nullptr) {
1263 MacroAssembler _masm(cbuf);
1264 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
1265 #ifndef PRODUCT
1266 } else {
1267 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]);
1268 #endif
1269 }
1270 return 0;
1271 }
1272
1273 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
1274 Unimplemented();
1275 return 0;
1276 }
1277
1278 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
1279 Unimplemented();
1280 return 0;
1281 }
1282
1283 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
1284 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1285 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1286 if (cbuf != nullptr) {
1287 MacroAssembler _masm(cbuf);
1288 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
1289 #ifndef PRODUCT
1290 } else {
1291 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
1292 #endif
1293 }
1294 return 0;
1295 }
1296
1297 assert( size > 0, "missed a case" );
1298
1299 // --------------------------------------------------------------------
1300 // Check for second bits still needing moving.
1301 if( src_second == dst_second )
1302 return size; // Self copy; no move
1303 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1304
1305 // Check for second word int-int move
1306 if( src_second_rc == rc_int && dst_second_rc == rc_int )
1307 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1308
1309 // Check for second word integer store
1310 if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1311 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1312
1313 // Check for second word integer load
1314 if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1315 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1316
1317 Unimplemented();
1318 return 0; // Mute compiler
1319 }
1320
1321 #ifndef PRODUCT
1322 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1323 implementation( NULL, ra_, false, st );
1324 }
1325 #endif
1326
1327 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1328 implementation( &cbuf, ra_, false, NULL );
1329 }
1330
1331 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1332 return MachNode::size(ra_);
1333 }
1334
1335
1336 //=============================================================================
1337 #ifndef PRODUCT
1338 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1339 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1340 int reg = ra_->get_reg_first(this);
1341 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
1342 }
1343 #endif
1344
1345 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1346 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1347 int reg = ra_->get_encode(this);
1348 if( offset >= 128 ) {
1349 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1350 emit_rm(cbuf, 0x2, reg, 0x04);
1351 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1352 emit_d32(cbuf, offset);
1353 }
1354 else {
1355 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1356 emit_rm(cbuf, 0x1, reg, 0x04);
1357 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1358 emit_d8(cbuf, offset);
1359 }
1360 }
1361
1362 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1363 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1364 if( offset >= 128 ) {
1365 return 7;
1366 }
1367 else {
1368 return 4;
1369 }
1370 }
1371
1372 //=============================================================================
1373 #ifndef PRODUCT
1374 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1375 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
1376 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
1377 st->print_cr("\tNOP");
1378 st->print_cr("\tNOP");
1379 if( !OptoBreakpoint )
1380 st->print_cr("\tNOP");
1381 }
1382 #endif
1383
1384 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1385 MacroAssembler masm(&cbuf);
1386 #ifdef ASSERT
1387 uint insts_size = cbuf.insts_size();
1388 #endif
1389 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1390 masm.jump_cc(Assembler::notEqual,
1391 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1392 /* WARNING these NOPs are critical so that verified entry point is properly
1393 aligned for patching by NativeJump::patch_verified_entry() */
1394 int nops_cnt = 2;
1395 if( !OptoBreakpoint ) // Leave space for int3
1396 nops_cnt += 1;
1397 masm.nop(nops_cnt);
1398
1399 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1400 }
1401
1402 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1403 return OptoBreakpoint ? 11 : 12;
1404 }
1405
1406
1407 //=============================================================================
1408
1409 // Vector calling convention not supported.
1410 const bool Matcher::supports_vector_calling_convention() {
1411 return false;
1412 }
1413
1414 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1415 Unimplemented();
1416 return OptoRegPair(0, 0);
1417 }
1418
1419 // Is this branch offset short enough that a short branch can be used?
1420 //
1421 // NOTE: If the platform does not provide any short branch variants, then
1422 // this method should return false for offset 0.
1423 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1424 // The passed offset is relative to address of the branch.
1425 // On 86 a branch displacement is calculated relative to address
1426 // of a next instruction.
1427 offset -= br_size;
1428
1429 // the short version of jmpConUCF2 contains multiple branches,
1430 // making the reach slightly less
1431 if (rule == jmpConUCF2_rule)
1432 return (-126 <= offset && offset <= 125);
1433 return (-128 <= offset && offset <= 127);
1434 }
1435
1436 // Return whether or not this register is ever used as an argument. This
1437 // function is used on startup to build the trampoline stubs in generateOptoStub.
1438 // Registers not mentioned will be killed by the VM call in the trampoline, and
1439 // arguments in those registers not be available to the callee.
1440 bool Matcher::can_be_java_arg( int reg ) {
1441 if( reg == ECX_num || reg == EDX_num ) return true;
1442 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true;
1443 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1444 return false;
1445 }
1446
1447 bool Matcher::is_spillable_arg( int reg ) {
1448 return can_be_java_arg(reg);
1449 }
1450
1451 uint Matcher::int_pressure_limit()
1452 {
1453 return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
1454 }
1455
1456 uint Matcher::float_pressure_limit()
1457 {
1458 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
1459 }
1460
1461 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1462 // Use hardware integer DIV instruction when
1463 // it is faster than a code which use multiply.
1464 // Only when constant divisor fits into 32 bit
1465 // (min_jint is excluded to get only correct
1466 // positive 32 bit values from negative).
1467 return VM_Version::has_fast_idiv() &&
1468 (divisor == (int)divisor && divisor != min_jint);
1469 }
1470
1471 // Register for DIVI projection of divmodI
1472 RegMask Matcher::divI_proj_mask() {
1473 return EAX_REG_mask();
1474 }
1475
1476 // Register for MODI projection of divmodI
1477 RegMask Matcher::modI_proj_mask() {
1478 return EDX_REG_mask();
1479 }
1480
1481 // Register for DIVL projection of divmodL
1482 RegMask Matcher::divL_proj_mask() {
1483 ShouldNotReachHere();
1484 return RegMask();
1485 }
1486
1487 // Register for MODL projection of divmodL
1488 RegMask Matcher::modL_proj_mask() {
1489 ShouldNotReachHere();
1490 return RegMask();
1491 }
1492
1493 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1494 return NO_REG_mask();
1495 }
1496
1497 // Returns true if the high 32 bits of the value is known to be zero.
1498 bool is_operand_hi32_zero(Node* n) {
1499 int opc = n->Opcode();
1500 if (opc == Op_AndL) {
1501 Node* o2 = n->in(2);
1502 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1503 return true;
1504 }
1505 }
1506 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1507 return true;
1508 }
1509 return false;
1510 }
1511
1512 %}
1513
1514 //----------ENCODING BLOCK-----------------------------------------------------
1515 // This block specifies the encoding classes used by the compiler to output
1516 // byte streams. Encoding classes generate functions which are called by
1517 // Machine Instruction Nodes in order to generate the bit encoding of the
1518 // instruction. Operands specify their base encoding interface with the
1519 // interface keyword. There are currently supported four interfaces,
1520 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
1521 // operand to generate a function which returns its register number when
1522 // queried. CONST_INTER causes an operand to generate a function which
1523 // returns the value of the constant when queried. MEMORY_INTER causes an
1524 // operand to generate four functions which return the Base Register, the
1525 // Index Register, the Scale Value, and the Offset Value of the operand when
1526 // queried. COND_INTER causes an operand to generate six functions which
1527 // return the encoding code (ie - encoding bits for the instruction)
1528 // associated with each basic boolean condition for a conditional instruction.
1529 // Instructions specify two basic values for encoding. They use the
1530 // ins_encode keyword to specify their encoding class (which must be one of
1531 // the class names specified in the encoding block), and they use the
1532 // opcode keyword to specify, in order, their primary, secondary, and
1533 // tertiary opcode. Only the opcode sections which a particular instruction
1534 // needs for encoding need to be specified.
1535 encode %{
1536 // Build emit functions for each basic byte or larger field in the intel
1537 // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1538 // code in the enc_class source block. Emit functions will live in the
1539 // main source block for now. In future, we can generalize this by
1540 // adding a syntax that specifies the sizes of fields in an order,
1541 // so that the adlc can build the emit functions automagically
1542
1543 // Emit primary opcode
1544 enc_class OpcP %{
1545 emit_opcode(cbuf, $primary);
1546 %}
1547
1548 // Emit secondary opcode
1549 enc_class OpcS %{
1550 emit_opcode(cbuf, $secondary);
1551 %}
1552
1553 // Emit opcode directly
1554 enc_class Opcode(immI d8) %{
1555 emit_opcode(cbuf, $d8$$constant);
1556 %}
1557
1558 enc_class SizePrefix %{
1559 emit_opcode(cbuf,0x66);
1560 %}
1561
1562 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
1563 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1564 %}
1565
1566 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many)
1567 emit_opcode(cbuf,$opcode$$constant);
1568 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1569 %}
1570
1571 enc_class mov_r32_imm0( rRegI dst ) %{
1572 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
1573 emit_d32 ( cbuf, 0x0 ); // imm32==0x0
1574 %}
1575
1576 enc_class cdq_enc %{
1577 // Full implementation of Java idiv and irem; checks for
1578 // special case as described in JVM spec., p.243 & p.271.
1579 //
1580 // normal case special case
1581 //
1582 // input : rax,: dividend min_int
1583 // reg: divisor -1
1584 //
1585 // output: rax,: quotient (= rax, idiv reg) min_int
1586 // rdx: remainder (= rax, irem reg) 0
1587 //
1588 // Code sequnce:
1589 //
1590 // 81 F8 00 00 00 80 cmp rax,80000000h
1591 // 0F 85 0B 00 00 00 jne normal_case
1592 // 33 D2 xor rdx,edx
1593 // 83 F9 FF cmp rcx,0FFh
1594 // 0F 84 03 00 00 00 je done
1595 // normal_case:
1596 // 99 cdq
1597 // F7 F9 idiv rax,ecx
1598 // done:
1599 //
1600 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1601 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1602 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h
1603 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1604 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1605 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case
1606 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx
1607 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1608 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1609 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1610 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done
1611 // normal_case:
1612 emit_opcode(cbuf,0x99); // cdq
1613 // idiv (note: must be emitted by the user of this rule)
1614 // normal:
1615 %}
1616
1617 // Dense encoding for older common ops
1618 enc_class Opc_plus(immI opcode, rRegI reg) %{
1619 emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1620 %}
1621
1622
1623 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1624 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1625 // Check for 8-bit immediate, and set sign extend bit in opcode
1626 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1627 emit_opcode(cbuf, $primary | 0x02);
1628 }
1629 else { // If 32-bit immediate
1630 emit_opcode(cbuf, $primary);
1631 }
1632 %}
1633
1634 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m
1635 // Emit primary opcode and set sign-extend bit
1636 // Check for 8-bit immediate, and set sign extend bit in opcode
1637 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1638 emit_opcode(cbuf, $primary | 0x02); }
1639 else { // If 32-bit immediate
1640 emit_opcode(cbuf, $primary);
1641 }
1642 // Emit r/m byte with secondary opcode, after primary opcode.
1643 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1644 %}
1645
1646 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
1647 // Check for 8-bit immediate, and set sign extend bit in opcode
1648 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1649 $$$emit8$imm$$constant;
1650 }
1651 else { // If 32-bit immediate
1652 // Output immediate
1653 $$$emit32$imm$$constant;
1654 }
1655 %}
1656
1657 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1658 // Emit primary opcode and set sign-extend bit
1659 // Check for 8-bit immediate, and set sign extend bit in opcode
1660 int con = (int)$imm$$constant; // Throw away top bits
1661 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1662 // Emit r/m byte with secondary opcode, after primary opcode.
1663 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1664 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1665 else emit_d32(cbuf,con);
1666 %}
1667
1668 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1669 // Emit primary opcode and set sign-extend bit
1670 // Check for 8-bit immediate, and set sign extend bit in opcode
1671 int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1672 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1673 // Emit r/m byte with tertiary opcode, after primary opcode.
1674 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
1675 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1676 else emit_d32(cbuf,con);
1677 %}
1678
1679 enc_class OpcSReg (rRegI dst) %{ // BSWAP
1680 emit_cc(cbuf, $secondary, $dst$$reg );
1681 %}
1682
1683 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1684 int destlo = $dst$$reg;
1685 int desthi = HIGH_FROM_LOW_ENC(destlo);
1686 // bswap lo
1687 emit_opcode(cbuf, 0x0F);
1688 emit_cc(cbuf, 0xC8, destlo);
1689 // bswap hi
1690 emit_opcode(cbuf, 0x0F);
1691 emit_cc(cbuf, 0xC8, desthi);
1692 // xchg lo and hi
1693 emit_opcode(cbuf, 0x87);
1694 emit_rm(cbuf, 0x3, destlo, desthi);
1695 %}
1696
1697 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1698 emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1699 %}
1700
1701 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1702 $$$emit8$primary;
1703 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1704 %}
1705
1706 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1707 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1708 emit_d8(cbuf, op >> 8 );
1709 emit_d8(cbuf, op & 255);
1710 %}
1711
1712 // emulate a CMOV with a conditional branch around a MOV
1713 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1714 // Invert sense of branch from sense of CMOV
1715 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1716 emit_d8( cbuf, $brOffs$$constant );
1717 %}
1718
1719 enc_class enc_PartialSubtypeCheck( ) %{
1720 Register Redi = as_Register(EDI_enc); // result register
1721 Register Reax = as_Register(EAX_enc); // super class
1722 Register Recx = as_Register(ECX_enc); // killed
1723 Register Resi = as_Register(ESI_enc); // sub class
1724 Label miss;
1725
1726 MacroAssembler _masm(&cbuf);
1727 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1728 NULL, &miss,
1729 /*set_cond_codes:*/ true);
1730 if ($primary) {
1731 __ xorptr(Redi, Redi);
1732 }
1733 __ bind(miss);
1734 %}
1735
1736 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
1737 MacroAssembler masm(&cbuf);
1738 int start = masm.offset();
1739 if (UseSSE >= 2) {
1740 if (VerifyFPU) {
1741 masm.verify_FPU(0, "must be empty in SSE2+ mode");
1742 }
1743 } else {
1744 // External c_calling_convention expects the FPU stack to be 'clean'.
1745 // Compiled code leaves it dirty. Do cleanup now.
1746 masm.empty_FPU_stack();
1747 }
1748 if (sizeof_FFree_Float_Stack_All == -1) {
1749 sizeof_FFree_Float_Stack_All = masm.offset() - start;
1750 } else {
1751 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1752 }
1753 %}
1754
1755 enc_class Verify_FPU_For_Leaf %{
1756 if( VerifyFPU ) {
1757 MacroAssembler masm(&cbuf);
1758 masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1759 }
1760 %}
1761
1762 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1763 // This is the instruction starting address for relocation info.
1764 MacroAssembler _masm(&cbuf);
1765 cbuf.set_insts_mark();
1766 $$$emit8$primary;
1767 // CALL directly to the runtime
1768 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1769 runtime_call_Relocation::spec(), RELOC_IMM32 );
1770 __ post_call_nop();
1771
1772 if (UseSSE >= 2) {
1773 MacroAssembler _masm(&cbuf);
1774 BasicType rt = tf()->return_type();
1775
1776 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1777 // A C runtime call where the return value is unused. In SSE2+
1778 // mode the result needs to be removed from the FPU stack. It's
1779 // likely that this function call could be removed by the
1780 // optimizer if the C function is a pure function.
1781 __ ffree(0);
1782 } else if (rt == T_FLOAT) {
1783 __ lea(rsp, Address(rsp, -4));
1784 __ fstp_s(Address(rsp, 0));
1785 __ movflt(xmm0, Address(rsp, 0));
1786 __ lea(rsp, Address(rsp, 4));
1787 } else if (rt == T_DOUBLE) {
1788 __ lea(rsp, Address(rsp, -8));
1789 __ fstp_d(Address(rsp, 0));
1790 __ movdbl(xmm0, Address(rsp, 0));
1791 __ lea(rsp, Address(rsp, 8));
1792 }
1793 }
1794 %}
1795
1796 enc_class pre_call_resets %{
1797 // If method sets FPU control word restore it here
1798 debug_only(int off0 = cbuf.insts_size());
1799 if (ra_->C->in_24_bit_fp_mode()) {
1800 MacroAssembler _masm(&cbuf);
1801 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
1802 }
1803 // Clear upper bits of YMM registers when current compiled code uses
1804 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1805 MacroAssembler _masm(&cbuf);
1806 __ vzeroupper();
1807 debug_only(int off1 = cbuf.insts_size());
1808 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1809 %}
1810
1811 enc_class post_call_FPU %{
1812 // If method sets FPU control word do it here also
1813 if (Compile::current()->in_24_bit_fp_mode()) {
1814 MacroAssembler masm(&cbuf);
1815 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
1816 }
1817 %}
1818
1819 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL
1820 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1821 // who we intended to call.
1822 MacroAssembler _masm(&cbuf);
1823 cbuf.set_insts_mark();
1824 $$$emit8$primary;
1825
1826 if (!_method) {
1827 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1828 runtime_call_Relocation::spec(),
1829 RELOC_IMM32);
1830 __ post_call_nop();
1831 } else {
1832 int method_index = resolved_method_index(cbuf);
1833 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1834 : static_call_Relocation::spec(method_index);
1835 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1836 rspec, RELOC_DISP32);
1837 __ post_call_nop();
1838 address mark = cbuf.insts_mark();
1839 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
1840 // Calls of the same statically bound method can share
1841 // a stub to the interpreter.
1842 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
1843 } else {
1844 // Emit stubs for static call.
1845 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
1846 if (stub == NULL) {
1847 ciEnv::current()->record_failure("CodeCache is full");
1848 return;
1849 }
1850 }
1851 }
1852 %}
1853
1854 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
1855 MacroAssembler _masm(&cbuf);
1856 __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1857 __ post_call_nop();
1858 %}
1859
1860 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1861 int disp = in_bytes(Method::from_compiled_offset());
1862 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1863
1864 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1865 MacroAssembler _masm(&cbuf);
1866 cbuf.set_insts_mark();
1867 $$$emit8$primary;
1868 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
1869 emit_d8(cbuf, disp); // Displacement
1870 __ post_call_nop();
1871 %}
1872
1873 // Following encoding is no longer used, but may be restored if calling
1874 // convention changes significantly.
1875 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1876 //
1877 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL
1878 // // int ic_reg = Matcher::inline_cache_reg();
1879 // // int ic_encode = Matcher::_regEncode[ic_reg];
1880 // // int imo_reg = Matcher::interpreter_method_reg();
1881 // // int imo_encode = Matcher::_regEncode[imo_reg];
1882 //
1883 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
1884 // // // so we load it immediately before the call
1885 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr
1886 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1887 //
1888 // // xor rbp,ebp
1889 // emit_opcode(cbuf, 0x33);
1890 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1891 //
1892 // // CALL to interpreter.
1893 // cbuf.set_insts_mark();
1894 // $$$emit8$primary;
1895 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1896 // runtime_call_Relocation::spec(), RELOC_IMM32 );
1897 // %}
1898
1899 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR
1900 $$$emit8$primary;
1901 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1902 $$$emit8$shift$$constant;
1903 %}
1904
1905 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate
1906 // Load immediate does not have a zero or sign extended version
1907 // for 8-bit immediates
1908 emit_opcode(cbuf, 0xB8 + $dst$$reg);
1909 $$$emit32$src$$constant;
1910 %}
1911
1912 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate
1913 // Load immediate does not have a zero or sign extended version
1914 // for 8-bit immediates
1915 emit_opcode(cbuf, $primary + $dst$$reg);
1916 $$$emit32$src$$constant;
1917 %}
1918
1919 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate
1920 // Load immediate does not have a zero or sign extended version
1921 // for 8-bit immediates
1922 int dst_enc = $dst$$reg;
1923 int src_con = $src$$constant & 0x0FFFFFFFFL;
1924 if (src_con == 0) {
1925 // xor dst, dst
1926 emit_opcode(cbuf, 0x33);
1927 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1928 } else {
1929 emit_opcode(cbuf, $primary + dst_enc);
1930 emit_d32(cbuf, src_con);
1931 }
1932 %}
1933
1934 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
1935 // Load immediate does not have a zero or sign extended version
1936 // for 8-bit immediates
1937 int dst_enc = $dst$$reg + 2;
1938 int src_con = ((julong)($src$$constant)) >> 32;
1939 if (src_con == 0) {
1940 // xor dst, dst
1941 emit_opcode(cbuf, 0x33);
1942 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1943 } else {
1944 emit_opcode(cbuf, $primary + dst_enc);
1945 emit_d32(cbuf, src_con);
1946 }
1947 %}
1948
1949
1950 // Encode a reg-reg copy. If it is useless, then empty encoding.
1951 enc_class enc_Copy( rRegI dst, rRegI src ) %{
1952 encode_Copy( cbuf, $dst$$reg, $src$$reg );
1953 %}
1954
1955 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
1956 encode_Copy( cbuf, $dst$$reg, $src$$reg );
1957 %}
1958
1959 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
1960 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1961 %}
1962
1963 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
1964 $$$emit8$primary;
1965 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1966 %}
1967
1968 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
1969 $$$emit8$secondary;
1970 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
1971 %}
1972
1973 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
1974 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1975 %}
1976
1977 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
1978 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
1979 %}
1980
1981 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
1982 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
1983 %}
1984
1985 enc_class Con32 (immI src) %{ // Con32(storeImmI)
1986 // Output immediate
1987 $$$emit32$src$$constant;
1988 %}
1989
1990 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm
1991 // Output Float immediate bits
1992 jfloat jf = $src$$constant;
1993 int jf_as_bits = jint_cast( jf );
1994 emit_d32(cbuf, jf_as_bits);
1995 %}
1996
1997 enc_class Con32F_as_bits(immF src) %{ // storeX_imm
1998 // Output Float immediate bits
1999 jfloat jf = $src$$constant;
2000 int jf_as_bits = jint_cast( jf );
2001 emit_d32(cbuf, jf_as_bits);
2002 %}
2003
2004 enc_class Con16 (immI src) %{ // Con16(storeImmI)
2005 // Output immediate
2006 $$$emit16$src$$constant;
2007 %}
2008
2009 enc_class Con_d32(immI src) %{
2010 emit_d32(cbuf,$src$$constant);
2011 %}
2012
2013 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
2014 // Output immediate memory reference
2015 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2016 emit_d32(cbuf, 0x00);
2017 %}
2018
2019 enc_class lock_prefix( ) %{
2020 emit_opcode(cbuf,0xF0); // [Lock]
2021 %}
2022
2023 // Cmp-xchg long value.
2024 // Note: we need to swap rbx, and rcx before and after the
2025 // cmpxchg8 instruction because the instruction uses
2026 // rcx as the high order word of the new value to store but
2027 // our register encoding uses rbx,.
2028 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2029
2030 // XCHG rbx,ecx
2031 emit_opcode(cbuf,0x87);
2032 emit_opcode(cbuf,0xD9);
2033 // [Lock]
2034 emit_opcode(cbuf,0xF0);
2035 // CMPXCHG8 [Eptr]
2036 emit_opcode(cbuf,0x0F);
2037 emit_opcode(cbuf,0xC7);
2038 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2039 // XCHG rbx,ecx
2040 emit_opcode(cbuf,0x87);
2041 emit_opcode(cbuf,0xD9);
2042 %}
2043
2044 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2045 // [Lock]
2046 emit_opcode(cbuf,0xF0);
2047
2048 // CMPXCHG [Eptr]
2049 emit_opcode(cbuf,0x0F);
2050 emit_opcode(cbuf,0xB1);
2051 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2052 %}
2053
2054 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2055 // [Lock]
2056 emit_opcode(cbuf,0xF0);
2057
2058 // CMPXCHGB [Eptr]
2059 emit_opcode(cbuf,0x0F);
2060 emit_opcode(cbuf,0xB0);
2061 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2062 %}
2063
2064 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2065 // [Lock]
2066 emit_opcode(cbuf,0xF0);
2067
2068 // 16-bit mode
2069 emit_opcode(cbuf, 0x66);
2070
2071 // CMPXCHGW [Eptr]
2072 emit_opcode(cbuf,0x0F);
2073 emit_opcode(cbuf,0xB1);
2074 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2075 %}
2076
2077 enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2078 int res_encoding = $res$$reg;
2079
2080 // MOV res,0
2081 emit_opcode( cbuf, 0xB8 + res_encoding);
2082 emit_d32( cbuf, 0 );
2083 // JNE,s fail
2084 emit_opcode(cbuf,0x75);
2085 emit_d8(cbuf, 5 );
2086 // MOV res,1
2087 emit_opcode( cbuf, 0xB8 + res_encoding);
2088 emit_d32( cbuf, 1 );
2089 // fail:
2090 %}
2091
2092 enc_class set_instruction_start( ) %{
2093 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2094 %}
2095
2096 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem
2097 int reg_encoding = $ereg$$reg;
2098 int base = $mem$$base;
2099 int index = $mem$$index;
2100 int scale = $mem$$scale;
2101 int displace = $mem$$disp;
2102 relocInfo::relocType disp_reloc = $mem->disp_reloc();
2103 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2104 %}
2105
2106 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem
2107 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo
2108 int base = $mem$$base;
2109 int index = $mem$$index;
2110 int scale = $mem$$scale;
2111 int displace = $mem$$disp + 4; // Offset is 4 further in memory
2112 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2113 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2114 %}
2115
2116 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2117 int r1, r2;
2118 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
2119 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
2120 emit_opcode(cbuf,0x0F);
2121 emit_opcode(cbuf,$tertiary);
2122 emit_rm(cbuf, 0x3, r1, r2);
2123 emit_d8(cbuf,$cnt$$constant);
2124 emit_d8(cbuf,$primary);
2125 emit_rm(cbuf, 0x3, $secondary, r1);
2126 emit_d8(cbuf,$cnt$$constant);
2127 %}
2128
2129 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2130 emit_opcode( cbuf, 0x8B ); // Move
2131 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
2132 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2133 emit_d8(cbuf,$primary);
2134 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2135 emit_d8(cbuf,$cnt$$constant-32);
2136 }
2137 emit_d8(cbuf,$primary);
2138 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
2139 emit_d8(cbuf,31);
2140 %}
2141
2142 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2143 int r1, r2;
2144 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
2145 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
2146
2147 emit_opcode( cbuf, 0x8B ); // Move r1,r2
2148 emit_rm(cbuf, 0x3, r1, r2);
2149 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2150 emit_opcode(cbuf,$primary);
2151 emit_rm(cbuf, 0x3, $secondary, r1);
2152 emit_d8(cbuf,$cnt$$constant-32);
2153 }
2154 emit_opcode(cbuf,0x33); // XOR r2,r2
2155 emit_rm(cbuf, 0x3, r2, r2);
2156 %}
2157
2158 // Clone of RegMem but accepts an extra parameter to access each
2159 // half of a double in memory; it never needs relocation info.
2160 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2161 emit_opcode(cbuf,$opcode$$constant);
2162 int reg_encoding = $rm_reg$$reg;
2163 int base = $mem$$base;
2164 int index = $mem$$index;
2165 int scale = $mem$$scale;
2166 int displace = $mem$$disp + $disp_for_half$$constant;
2167 relocInfo::relocType disp_reloc = relocInfo::none;
2168 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2169 %}
2170
2171 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2172 //
2173 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2174 // and it never needs relocation information.
2175 // Frequently used to move data between FPU's Stack Top and memory.
2176 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2177 int rm_byte_opcode = $rm_opcode$$constant;
2178 int base = $mem$$base;
2179 int index = $mem$$index;
2180 int scale = $mem$$scale;
2181 int displace = $mem$$disp;
2182 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2183 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2184 %}
2185
2186 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2187 int rm_byte_opcode = $rm_opcode$$constant;
2188 int base = $mem$$base;
2189 int index = $mem$$index;
2190 int scale = $mem$$scale;
2191 int displace = $mem$$disp;
2192 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2193 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2194 %}
2195
2196 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea
2197 int reg_encoding = $dst$$reg;
2198 int base = $src0$$reg; // 0xFFFFFFFF indicates no base
2199 int index = 0x04; // 0x04 indicates no index
2200 int scale = 0x00; // 0x00 indicates no scale
2201 int displace = $src1$$constant; // 0x00 indicates no displacement
2202 relocInfo::relocType disp_reloc = relocInfo::none;
2203 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2204 %}
2205
2206 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN
2207 // Compare dst,src
2208 emit_opcode(cbuf,0x3B);
2209 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2210 // jmp dst < src around move
2211 emit_opcode(cbuf,0x7C);
2212 emit_d8(cbuf,2);
2213 // move dst,src
2214 emit_opcode(cbuf,0x8B);
2215 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2216 %}
2217
2218 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX
2219 // Compare dst,src
2220 emit_opcode(cbuf,0x3B);
2221 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2222 // jmp dst > src around move
2223 emit_opcode(cbuf,0x7F);
2224 emit_d8(cbuf,2);
2225 // move dst,src
2226 emit_opcode(cbuf,0x8B);
2227 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2228 %}
2229
2230 enc_class enc_FPR_store(memory mem, regDPR src) %{
2231 // If src is FPR1, we can just FST to store it.
2232 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2233 int reg_encoding = 0x2; // Just store
2234 int base = $mem$$base;
2235 int index = $mem$$index;
2236 int scale = $mem$$scale;
2237 int displace = $mem$$disp;
2238 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2239 if( $src$$reg != FPR1L_enc ) {
2240 reg_encoding = 0x3; // Store & pop
2241 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2242 emit_d8( cbuf, 0xC0-1+$src$$reg );
2243 }
2244 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
2245 emit_opcode(cbuf,$primary);
2246 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2247 %}
2248
2249 enc_class neg_reg(rRegI dst) %{
2250 // NEG $dst
2251 emit_opcode(cbuf,0xF7);
2252 emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2253 %}
2254
2255 enc_class setLT_reg(eCXRegI dst) %{
2256 // SETLT $dst
2257 emit_opcode(cbuf,0x0F);
2258 emit_opcode(cbuf,0x9C);
2259 emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2260 %}
2261
2262 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT
2263 int tmpReg = $tmp$$reg;
2264
2265 // SUB $p,$q
2266 emit_opcode(cbuf,0x2B);
2267 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2268 // SBB $tmp,$tmp
2269 emit_opcode(cbuf,0x1B);
2270 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2271 // AND $tmp,$y
2272 emit_opcode(cbuf,0x23);
2273 emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2274 // ADD $p,$tmp
2275 emit_opcode(cbuf,0x03);
2276 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2277 %}
2278
2279 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2280 // TEST shift,32
2281 emit_opcode(cbuf,0xF7);
2282 emit_rm(cbuf, 0x3, 0, ECX_enc);
2283 emit_d32(cbuf,0x20);
2284 // JEQ,s small
2285 emit_opcode(cbuf, 0x74);
2286 emit_d8(cbuf, 0x04);
2287 // MOV $dst.hi,$dst.lo
2288 emit_opcode( cbuf, 0x8B );
2289 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
2290 // CLR $dst.lo
2291 emit_opcode(cbuf, 0x33);
2292 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2293 // small:
2294 // SHLD $dst.hi,$dst.lo,$shift
2295 emit_opcode(cbuf,0x0F);
2296 emit_opcode(cbuf,0xA5);
2297 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
2298 // SHL $dst.lo,$shift"
2299 emit_opcode(cbuf,0xD3);
2300 emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2301 %}
2302
2303 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2304 // TEST shift,32
2305 emit_opcode(cbuf,0xF7);
2306 emit_rm(cbuf, 0x3, 0, ECX_enc);
2307 emit_d32(cbuf,0x20);
2308 // JEQ,s small
2309 emit_opcode(cbuf, 0x74);
2310 emit_d8(cbuf, 0x04);
2311 // MOV $dst.lo,$dst.hi
2312 emit_opcode( cbuf, 0x8B );
2313 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2314 // CLR $dst.hi
2315 emit_opcode(cbuf, 0x33);
2316 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
2317 // small:
2318 // SHRD $dst.lo,$dst.hi,$shift
2319 emit_opcode(cbuf,0x0F);
2320 emit_opcode(cbuf,0xAD);
2321 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
2322 // SHR $dst.hi,$shift"
2323 emit_opcode(cbuf,0xD3);
2324 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
2325 %}
2326
2327 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2328 // TEST shift,32
2329 emit_opcode(cbuf,0xF7);
2330 emit_rm(cbuf, 0x3, 0, ECX_enc);
2331 emit_d32(cbuf,0x20);
2332 // JEQ,s small
2333 emit_opcode(cbuf, 0x74);
2334 emit_d8(cbuf, 0x05);
2335 // MOV $dst.lo,$dst.hi
2336 emit_opcode( cbuf, 0x8B );
2337 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2338 // SAR $dst.hi,31
2339 emit_opcode(cbuf, 0xC1);
2340 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
2341 emit_d8(cbuf, 0x1F );
2342 // small:
2343 // SHRD $dst.lo,$dst.hi,$shift
2344 emit_opcode(cbuf,0x0F);
2345 emit_opcode(cbuf,0xAD);
2346 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
2347 // SAR $dst.hi,$shift"
2348 emit_opcode(cbuf,0xD3);
2349 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
2350 %}
2351
2352
2353 // ----------------- Encodings for floating point unit -----------------
2354 // May leave result in FPU-TOS or FPU reg depending on opcodes
2355 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV
2356 $$$emit8$primary;
2357 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2358 %}
2359
2360 // Pop argument in FPR0 with FSTP ST(0)
2361 enc_class PopFPU() %{
2362 emit_opcode( cbuf, 0xDD );
2363 emit_d8( cbuf, 0xD8 );
2364 %}
2365
2366 // !!!!! equivalent to Pop_Reg_F
2367 enc_class Pop_Reg_DPR( regDPR dst ) %{
2368 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2369 emit_d8( cbuf, 0xD8+$dst$$reg );
2370 %}
2371
2372 enc_class Push_Reg_DPR( regDPR dst ) %{
2373 emit_opcode( cbuf, 0xD9 );
2374 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2375 %}
2376
2377 enc_class strictfp_bias1( regDPR dst ) %{
2378 emit_opcode( cbuf, 0xDB ); // FLD m80real
2379 emit_opcode( cbuf, 0x2D );
2380 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
2381 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2382 emit_opcode( cbuf, 0xC8+$dst$$reg );
2383 %}
2384
2385 enc_class strictfp_bias2( regDPR dst ) %{
2386 emit_opcode( cbuf, 0xDB ); // FLD m80real
2387 emit_opcode( cbuf, 0x2D );
2388 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
2389 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2390 emit_opcode( cbuf, 0xC8+$dst$$reg );
2391 %}
2392
2393 // Special case for moving an integer register to a stack slot.
2394 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2395 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2396 %}
2397
2398 // Special case for moving a register to a stack slot.
2399 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2400 // Opcode already emitted
2401 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
2402 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
2403 emit_d32(cbuf, $dst$$disp); // Displacement
2404 %}
2405
2406 // Push the integer in stackSlot 'src' onto FP-stack
2407 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2408 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2409 %}
2410
2411 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2412 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2413 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2414 %}
2415
2416 // Same as Pop_Mem_F except for opcode
2417 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2418 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2419 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2420 %}
2421
2422 enc_class Pop_Reg_FPR( regFPR dst ) %{
2423 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2424 emit_d8( cbuf, 0xD8+$dst$$reg );
2425 %}
2426
2427 enc_class Push_Reg_FPR( regFPR dst ) %{
2428 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2429 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2430 %}
2431
2432 // Push FPU's float to a stack-slot, and pop FPU-stack
2433 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2434 int pop = 0x02;
2435 if ($src$$reg != FPR1L_enc) {
2436 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2437 emit_d8( cbuf, 0xC0-1+$src$$reg );
2438 pop = 0x03;
2439 }
2440 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2441 %}
2442
2443 // Push FPU's double to a stack-slot, and pop FPU-stack
2444 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2445 int pop = 0x02;
2446 if ($src$$reg != FPR1L_enc) {
2447 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2448 emit_d8( cbuf, 0xC0-1+$src$$reg );
2449 pop = 0x03;
2450 }
2451 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2452 %}
2453
2454 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2455 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2456 int pop = 0xD0 - 1; // -1 since we skip FLD
2457 if ($src$$reg != FPR1L_enc) {
2458 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2459 emit_d8( cbuf, 0xC0-1+$src$$reg );
2460 pop = 0xD8;
2461 }
2462 emit_opcode( cbuf, 0xDD );
2463 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2464 %}
2465
2466
2467 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2468 // load dst in FPR0
2469 emit_opcode( cbuf, 0xD9 );
2470 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2471 if ($src$$reg != FPR1L_enc) {
2472 // fincstp
2473 emit_opcode (cbuf, 0xD9);
2474 emit_opcode (cbuf, 0xF7);
2475 // swap src with FPR1:
2476 // FXCH FPR1 with src
2477 emit_opcode(cbuf, 0xD9);
2478 emit_d8(cbuf, 0xC8-1+$src$$reg );
2479 // fdecstp
2480 emit_opcode (cbuf, 0xD9);
2481 emit_opcode (cbuf, 0xF6);
2482 }
2483 %}
2484
2485 enc_class Push_ModD_encoding(regD src0, regD src1) %{
2486 MacroAssembler _masm(&cbuf);
2487 __ subptr(rsp, 8);
2488 __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2489 __ fld_d(Address(rsp, 0));
2490 __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2491 __ fld_d(Address(rsp, 0));
2492 %}
2493
2494 enc_class Push_ModF_encoding(regF src0, regF src1) %{
2495 MacroAssembler _masm(&cbuf);
2496 __ subptr(rsp, 4);
2497 __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2498 __ fld_s(Address(rsp, 0));
2499 __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2500 __ fld_s(Address(rsp, 0));
2501 %}
2502
2503 enc_class Push_ResultD(regD dst) %{
2504 MacroAssembler _masm(&cbuf);
2505 __ fstp_d(Address(rsp, 0));
2506 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2507 __ addptr(rsp, 8);
2508 %}
2509
2510 enc_class Push_ResultF(regF dst, immI d8) %{
2511 MacroAssembler _masm(&cbuf);
2512 __ fstp_s(Address(rsp, 0));
2513 __ movflt($dst$$XMMRegister, Address(rsp, 0));
2514 __ addptr(rsp, $d8$$constant);
2515 %}
2516
2517 enc_class Push_SrcD(regD src) %{
2518 MacroAssembler _masm(&cbuf);
2519 __ subptr(rsp, 8);
2520 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2521 __ fld_d(Address(rsp, 0));
2522 %}
2523
2524 enc_class push_stack_temp_qword() %{
2525 MacroAssembler _masm(&cbuf);
2526 __ subptr(rsp, 8);
2527 %}
2528
2529 enc_class pop_stack_temp_qword() %{
2530 MacroAssembler _masm(&cbuf);
2531 __ addptr(rsp, 8);
2532 %}
2533
2534 enc_class push_xmm_to_fpr1(regD src) %{
2535 MacroAssembler _masm(&cbuf);
2536 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2537 __ fld_d(Address(rsp, 0));
2538 %}
2539
2540 enc_class Push_Result_Mod_DPR( regDPR src) %{
2541 if ($src$$reg != FPR1L_enc) {
2542 // fincstp
2543 emit_opcode (cbuf, 0xD9);
2544 emit_opcode (cbuf, 0xF7);
2545 // FXCH FPR1 with src
2546 emit_opcode(cbuf, 0xD9);
2547 emit_d8(cbuf, 0xC8-1+$src$$reg );
2548 // fdecstp
2549 emit_opcode (cbuf, 0xD9);
2550 emit_opcode (cbuf, 0xF6);
2551 }
2552 // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2553 // // FSTP FPR$dst$$reg
2554 // emit_opcode( cbuf, 0xDD );
2555 // emit_d8( cbuf, 0xD8+$dst$$reg );
2556 %}
2557
2558 enc_class fnstsw_sahf_skip_parity() %{
2559 // fnstsw ax
2560 emit_opcode( cbuf, 0xDF );
2561 emit_opcode( cbuf, 0xE0 );
2562 // sahf
2563 emit_opcode( cbuf, 0x9E );
2564 // jnp ::skip
2565 emit_opcode( cbuf, 0x7B );
2566 emit_opcode( cbuf, 0x05 );
2567 %}
2568
2569 enc_class emitModDPR() %{
2570 // fprem must be iterative
2571 // :: loop
2572 // fprem
2573 emit_opcode( cbuf, 0xD9 );
2574 emit_opcode( cbuf, 0xF8 );
2575 // wait
2576 emit_opcode( cbuf, 0x9b );
2577 // fnstsw ax
2578 emit_opcode( cbuf, 0xDF );
2579 emit_opcode( cbuf, 0xE0 );
2580 // sahf
2581 emit_opcode( cbuf, 0x9E );
2582 // jp ::loop
2583 emit_opcode( cbuf, 0x0F );
2584 emit_opcode( cbuf, 0x8A );
2585 emit_opcode( cbuf, 0xF4 );
2586 emit_opcode( cbuf, 0xFF );
2587 emit_opcode( cbuf, 0xFF );
2588 emit_opcode( cbuf, 0xFF );
2589 %}
2590
2591 enc_class fpu_flags() %{
2592 // fnstsw_ax
2593 emit_opcode( cbuf, 0xDF);
2594 emit_opcode( cbuf, 0xE0);
2595 // test ax,0x0400
2596 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate
2597 emit_opcode( cbuf, 0xA9 );
2598 emit_d16 ( cbuf, 0x0400 );
2599 // // // This sequence works, but stalls for 12-16 cycles on PPro
2600 // // test rax,0x0400
2601 // emit_opcode( cbuf, 0xA9 );
2602 // emit_d32 ( cbuf, 0x00000400 );
2603 //
2604 // jz exit (no unordered comparison)
2605 emit_opcode( cbuf, 0x74 );
2606 emit_d8 ( cbuf, 0x02 );
2607 // mov ah,1 - treat as LT case (set carry flag)
2608 emit_opcode( cbuf, 0xB4 );
2609 emit_d8 ( cbuf, 0x01 );
2610 // sahf
2611 emit_opcode( cbuf, 0x9E);
2612 %}
2613
2614 enc_class cmpF_P6_fixup() %{
2615 // Fixup the integer flags in case comparison involved a NaN
2616 //
2617 // JNP exit (no unordered comparison, P-flag is set by NaN)
2618 emit_opcode( cbuf, 0x7B );
2619 emit_d8 ( cbuf, 0x03 );
2620 // MOV AH,1 - treat as LT case (set carry flag)
2621 emit_opcode( cbuf, 0xB4 );
2622 emit_d8 ( cbuf, 0x01 );
2623 // SAHF
2624 emit_opcode( cbuf, 0x9E);
2625 // NOP // target for branch to avoid branch to branch
2626 emit_opcode( cbuf, 0x90);
2627 %}
2628
2629 // fnstsw_ax();
2630 // sahf();
2631 // movl(dst, nan_result);
2632 // jcc(Assembler::parity, exit);
2633 // movl(dst, less_result);
2634 // jcc(Assembler::below, exit);
2635 // movl(dst, equal_result);
2636 // jcc(Assembler::equal, exit);
2637 // movl(dst, greater_result);
2638
2639 // less_result = 1;
2640 // greater_result = -1;
2641 // equal_result = 0;
2642 // nan_result = -1;
2643
2644 enc_class CmpF_Result(rRegI dst) %{
2645 // fnstsw_ax();
2646 emit_opcode( cbuf, 0xDF);
2647 emit_opcode( cbuf, 0xE0);
2648 // sahf
2649 emit_opcode( cbuf, 0x9E);
2650 // movl(dst, nan_result);
2651 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2652 emit_d32( cbuf, -1 );
2653 // jcc(Assembler::parity, exit);
2654 emit_opcode( cbuf, 0x7A );
2655 emit_d8 ( cbuf, 0x13 );
2656 // movl(dst, less_result);
2657 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2658 emit_d32( cbuf, -1 );
2659 // jcc(Assembler::below, exit);
2660 emit_opcode( cbuf, 0x72 );
2661 emit_d8 ( cbuf, 0x0C );
2662 // movl(dst, equal_result);
2663 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2664 emit_d32( cbuf, 0 );
2665 // jcc(Assembler::equal, exit);
2666 emit_opcode( cbuf, 0x74 );
2667 emit_d8 ( cbuf, 0x05 );
2668 // movl(dst, greater_result);
2669 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2670 emit_d32( cbuf, 1 );
2671 %}
2672
2673
2674 // Compare the longs and set flags
2675 // BROKEN! Do Not use as-is
2676 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2677 // CMP $src1.hi,$src2.hi
2678 emit_opcode( cbuf, 0x3B );
2679 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
2680 // JNE,s done
2681 emit_opcode(cbuf,0x75);
2682 emit_d8(cbuf, 2 );
2683 // CMP $src1.lo,$src2.lo
2684 emit_opcode( cbuf, 0x3B );
2685 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2686 // done:
2687 %}
2688
2689 enc_class convert_int_long( regL dst, rRegI src ) %{
2690 // mov $dst.lo,$src
2691 int dst_encoding = $dst$$reg;
2692 int src_encoding = $src$$reg;
2693 encode_Copy( cbuf, dst_encoding , src_encoding );
2694 // mov $dst.hi,$src
2695 encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
2696 // sar $dst.hi,31
2697 emit_opcode( cbuf, 0xC1 );
2698 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
2699 emit_d8(cbuf, 0x1F );
2700 %}
2701
2702 enc_class convert_long_double( eRegL src ) %{
2703 // push $src.hi
2704 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
2705 // push $src.lo
2706 emit_opcode(cbuf, 0x50+$src$$reg );
2707 // fild 64-bits at [SP]
2708 emit_opcode(cbuf,0xdf);
2709 emit_d8(cbuf, 0x6C);
2710 emit_d8(cbuf, 0x24);
2711 emit_d8(cbuf, 0x00);
2712 // pop stack
2713 emit_opcode(cbuf, 0x83); // add SP, #8
2714 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2715 emit_d8(cbuf, 0x8);
2716 %}
2717
2718 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2719 // IMUL EDX:EAX,$src1
2720 emit_opcode( cbuf, 0xF7 );
2721 emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2722 // SAR EDX,$cnt-32
2723 int shift_count = ((int)$cnt$$constant) - 32;
2724 if (shift_count > 0) {
2725 emit_opcode(cbuf, 0xC1);
2726 emit_rm(cbuf, 0x3, 7, $dst$$reg );
2727 emit_d8(cbuf, shift_count);
2728 }
2729 %}
2730
2731 // this version doesn't have add sp, 8
2732 enc_class convert_long_double2( eRegL src ) %{
2733 // push $src.hi
2734 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
2735 // push $src.lo
2736 emit_opcode(cbuf, 0x50+$src$$reg );
2737 // fild 64-bits at [SP]
2738 emit_opcode(cbuf,0xdf);
2739 emit_d8(cbuf, 0x6C);
2740 emit_d8(cbuf, 0x24);
2741 emit_d8(cbuf, 0x00);
2742 %}
2743
2744 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2745 // Basic idea: long = (long)int * (long)int
2746 // IMUL EDX:EAX, src
2747 emit_opcode( cbuf, 0xF7 );
2748 emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2749 %}
2750
2751 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2752 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
2753 // MUL EDX:EAX, src
2754 emit_opcode( cbuf, 0xF7 );
2755 emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2756 %}
2757
2758 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2759 // Basic idea: lo(result) = lo(x_lo * y_lo)
2760 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2761 // MOV $tmp,$src.lo
2762 encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2763 // IMUL $tmp,EDX
2764 emit_opcode( cbuf, 0x0F );
2765 emit_opcode( cbuf, 0xAF );
2766 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2767 // MOV EDX,$src.hi
2768 encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
2769 // IMUL EDX,EAX
2770 emit_opcode( cbuf, 0x0F );
2771 emit_opcode( cbuf, 0xAF );
2772 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
2773 // ADD $tmp,EDX
2774 emit_opcode( cbuf, 0x03 );
2775 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2776 // MUL EDX:EAX,$src.lo
2777 emit_opcode( cbuf, 0xF7 );
2778 emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2779 // ADD EDX,ESI
2780 emit_opcode( cbuf, 0x03 );
2781 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
2782 %}
2783
2784 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2785 // Basic idea: lo(result) = lo(src * y_lo)
2786 // hi(result) = hi(src * y_lo) + lo(src * y_hi)
2787 // IMUL $tmp,EDX,$src
2788 emit_opcode( cbuf, 0x6B );
2789 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2790 emit_d8( cbuf, (int)$src$$constant );
2791 // MOV EDX,$src
2792 emit_opcode(cbuf, 0xB8 + EDX_enc);
2793 emit_d32( cbuf, (int)$src$$constant );
2794 // MUL EDX:EAX,EDX
2795 emit_opcode( cbuf, 0xF7 );
2796 emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2797 // ADD EDX,ESI
2798 emit_opcode( cbuf, 0x03 );
2799 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2800 %}
2801
2802 enc_class long_div( eRegL src1, eRegL src2 ) %{
2803 // PUSH src1.hi
2804 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
2805 // PUSH src1.lo
2806 emit_opcode(cbuf, 0x50+$src1$$reg );
2807 // PUSH src2.hi
2808 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
2809 // PUSH src2.lo
2810 emit_opcode(cbuf, 0x50+$src2$$reg );
2811 // CALL directly to the runtime
2812 MacroAssembler _masm(&cbuf);
2813 cbuf.set_insts_mark();
2814 emit_opcode(cbuf,0xE8); // Call into runtime
2815 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2816 __ post_call_nop();
2817 // Restore stack
2818 emit_opcode(cbuf, 0x83); // add SP, #framesize
2819 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2820 emit_d8(cbuf, 4*4);
2821 %}
2822
2823 enc_class long_mod( eRegL src1, eRegL src2 ) %{
2824 // PUSH src1.hi
2825 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
2826 // PUSH src1.lo
2827 emit_opcode(cbuf, 0x50+$src1$$reg );
2828 // PUSH src2.hi
2829 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
2830 // PUSH src2.lo
2831 emit_opcode(cbuf, 0x50+$src2$$reg );
2832 // CALL directly to the runtime
2833 MacroAssembler _masm(&cbuf);
2834 cbuf.set_insts_mark();
2835 emit_opcode(cbuf,0xE8); // Call into runtime
2836 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2837 __ post_call_nop();
2838 // Restore stack
2839 emit_opcode(cbuf, 0x83); // add SP, #framesize
2840 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2841 emit_d8(cbuf, 4*4);
2842 %}
2843
2844 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2845 // MOV $tmp,$src.lo
2846 emit_opcode(cbuf, 0x8B);
2847 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2848 // OR $tmp,$src.hi
2849 emit_opcode(cbuf, 0x0B);
2850 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
2851 %}
2852
2853 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2854 // CMP $src1.lo,$src2.lo
2855 emit_opcode( cbuf, 0x3B );
2856 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2857 // JNE,s skip
2858 emit_cc(cbuf, 0x70, 0x5);
2859 emit_d8(cbuf,2);
2860 // CMP $src1.hi,$src2.hi
2861 emit_opcode( cbuf, 0x3B );
2862 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
2863 %}
2864
2865 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2866 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2867 emit_opcode( cbuf, 0x3B );
2868 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2869 // MOV $tmp,$src1.hi
2870 emit_opcode( cbuf, 0x8B );
2871 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
2872 // SBB $tmp,$src2.hi\t! Compute flags for long compare
2873 emit_opcode( cbuf, 0x1B );
2874 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
2875 %}
2876
2877 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2878 // XOR $tmp,$tmp
2879 emit_opcode(cbuf,0x33); // XOR
2880 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2881 // CMP $tmp,$src.lo
2882 emit_opcode( cbuf, 0x3B );
2883 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2884 // SBB $tmp,$src.hi
2885 emit_opcode( cbuf, 0x1B );
2886 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
2887 %}
2888
2889 // Sniff, sniff... smells like Gnu Superoptimizer
2890 enc_class neg_long( eRegL dst ) %{
2891 emit_opcode(cbuf,0xF7); // NEG hi
2892 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
2893 emit_opcode(cbuf,0xF7); // NEG lo
2894 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
2895 emit_opcode(cbuf,0x83); // SBB hi,0
2896 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
2897 emit_d8 (cbuf,0 );
2898 %}
2899
2900 enc_class enc_pop_rdx() %{
2901 emit_opcode(cbuf,0x5A);
2902 %}
2903
2904 enc_class enc_rethrow() %{
2905 MacroAssembler _masm(&cbuf);
2906 cbuf.set_insts_mark();
2907 emit_opcode(cbuf, 0xE9); // jmp entry
2908 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2909 runtime_call_Relocation::spec(), RELOC_IMM32 );
2910 __ post_call_nop();
2911 %}
2912
2913
2914 // Convert a double to an int. Java semantics require we do complex
2915 // manglelations in the corner cases. So we set the rounding mode to
2916 // 'zero', store the darned double down as an int, and reset the
2917 // rounding mode to 'nearest'. The hardware throws an exception which
2918 // patches up the correct value directly to the stack.
2919 enc_class DPR2I_encoding( regDPR src ) %{
2920 // Flip to round-to-zero mode. We attempted to allow invalid-op
2921 // exceptions here, so that a NAN or other corner-case value will
2922 // thrown an exception (but normal values get converted at full speed).
2923 // However, I2C adapters and other float-stack manglers leave pending
2924 // invalid-op exceptions hanging. We would have to clear them before
2925 // enabling them and that is more expensive than just testing for the
2926 // invalid value Intel stores down in the corner cases.
2927 emit_opcode(cbuf,0xD9); // FLDCW trunc
2928 emit_opcode(cbuf,0x2D);
2929 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2930 // Allocate a word
2931 emit_opcode(cbuf,0x83); // SUB ESP,4
2932 emit_opcode(cbuf,0xEC);
2933 emit_d8(cbuf,0x04);
2934 // Encoding assumes a double has been pushed into FPR0.
2935 // Store down the double as an int, popping the FPU stack
2936 emit_opcode(cbuf,0xDB); // FISTP [ESP]
2937 emit_opcode(cbuf,0x1C);
2938 emit_d8(cbuf,0x24);
2939 // Restore the rounding mode; mask the exception
2940 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
2941 emit_opcode(cbuf,0x2D);
2942 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2943 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2944 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2945
2946 // Load the converted int; adjust CPU stack
2947 emit_opcode(cbuf,0x58); // POP EAX
2948 emit_opcode(cbuf,0x3D); // CMP EAX,imm
2949 emit_d32 (cbuf,0x80000000); // 0x80000000
2950 emit_opcode(cbuf,0x75); // JNE around_slow_call
2951 emit_d8 (cbuf,0x07); // Size of slow_call
2952 // Push src onto stack slow-path
2953 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
2954 emit_d8 (cbuf,0xC0-1+$src$$reg );
2955 // CALL directly to the runtime
2956 MacroAssembler _masm(&cbuf);
2957 cbuf.set_insts_mark();
2958 emit_opcode(cbuf,0xE8); // Call into runtime
2959 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2960 __ post_call_nop();
2961 // Carry on here...
2962 %}
2963
2964 enc_class DPR2L_encoding( regDPR src ) %{
2965 emit_opcode(cbuf,0xD9); // FLDCW trunc
2966 emit_opcode(cbuf,0x2D);
2967 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2968 // Allocate a word
2969 emit_opcode(cbuf,0x83); // SUB ESP,8
2970 emit_opcode(cbuf,0xEC);
2971 emit_d8(cbuf,0x08);
2972 // Encoding assumes a double has been pushed into FPR0.
2973 // Store down the double as a long, popping the FPU stack
2974 emit_opcode(cbuf,0xDF); // FISTP [ESP]
2975 emit_opcode(cbuf,0x3C);
2976 emit_d8(cbuf,0x24);
2977 // Restore the rounding mode; mask the exception
2978 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
2979 emit_opcode(cbuf,0x2D);
2980 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2981 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2982 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2983
2984 // Load the converted int; adjust CPU stack
2985 emit_opcode(cbuf,0x58); // POP EAX
2986 emit_opcode(cbuf,0x5A); // POP EDX
2987 emit_opcode(cbuf,0x81); // CMP EDX,imm
2988 emit_d8 (cbuf,0xFA); // rdx
2989 emit_d32 (cbuf,0x80000000); // 0x80000000
2990 emit_opcode(cbuf,0x75); // JNE around_slow_call
2991 emit_d8 (cbuf,0x07+4); // Size of slow_call
2992 emit_opcode(cbuf,0x85); // TEST EAX,EAX
2993 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
2994 emit_opcode(cbuf,0x75); // JNE around_slow_call
2995 emit_d8 (cbuf,0x07); // Size of slow_call
2996 // Push src onto stack slow-path
2997 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
2998 emit_d8 (cbuf,0xC0-1+$src$$reg );
2999 // CALL directly to the runtime
3000 MacroAssembler _masm(&cbuf);
3001 cbuf.set_insts_mark();
3002 emit_opcode(cbuf,0xE8); // Call into runtime
3003 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3004 __ post_call_nop();
3005 // Carry on here...
3006 %}
3007
3008 enc_class FMul_ST_reg( eRegFPR src1 ) %{
3009 // Operand was loaded from memory into fp ST (stack top)
3010 // FMUL ST,$src /* D8 C8+i */
3011 emit_opcode(cbuf, 0xD8);
3012 emit_opcode(cbuf, 0xC8 + $src1$$reg);
3013 %}
3014
3015 enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3016 // FADDP ST,src2 /* D8 C0+i */
3017 emit_opcode(cbuf, 0xD8);
3018 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3019 //could use FADDP src2,fpST /* DE C0+i */
3020 %}
3021
3022 enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3023 // FADDP src2,ST /* DE C0+i */
3024 emit_opcode(cbuf, 0xDE);
3025 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3026 %}
3027
3028 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3029 // Operand has been loaded into fp ST (stack top)
3030 // FSUB ST,$src1
3031 emit_opcode(cbuf, 0xD8);
3032 emit_opcode(cbuf, 0xE0 + $src1$$reg);
3033
3034 // FDIV
3035 emit_opcode(cbuf, 0xD8);
3036 emit_opcode(cbuf, 0xF0 + $src2$$reg);
3037 %}
3038
3039 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3040 // Operand was loaded from memory into fp ST (stack top)
3041 // FADD ST,$src /* D8 C0+i */
3042 emit_opcode(cbuf, 0xD8);
3043 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3044
3045 // FMUL ST,src2 /* D8 C*+i */
3046 emit_opcode(cbuf, 0xD8);
3047 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3048 %}
3049
3050
3051 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3052 // Operand was loaded from memory into fp ST (stack top)
3053 // FADD ST,$src /* D8 C0+i */
3054 emit_opcode(cbuf, 0xD8);
3055 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3056
3057 // FMULP src2,ST /* DE C8+i */
3058 emit_opcode(cbuf, 0xDE);
3059 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3060 %}
3061
3062 // Atomically load the volatile long
3063 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3064 emit_opcode(cbuf,0xDF);
3065 int rm_byte_opcode = 0x05;
3066 int base = $mem$$base;
3067 int index = $mem$$index;
3068 int scale = $mem$$scale;
3069 int displace = $mem$$disp;
3070 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3071 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3072 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3073 %}
3074
3075 // Volatile Store Long. Must be atomic, so move it into
3076 // the FP TOS and then do a 64-bit FIST. Has to probe the
3077 // target address before the store (for null-ptr checks)
3078 // so the memory operand is used twice in the encoding.
3079 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3080 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3081 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop
3082 emit_opcode(cbuf,0xDF);
3083 int rm_byte_opcode = 0x07;
3084 int base = $mem$$base;
3085 int index = $mem$$index;
3086 int scale = $mem$$scale;
3087 int displace = $mem$$disp;
3088 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3089 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3090 %}
3091
3092 %}
3093
3094
3095 //----------FRAME--------------------------------------------------------------
3096 // Definition of frame structure and management information.
3097 //
3098 // S T A C K L A Y O U T Allocators stack-slot number
3099 // | (to get allocators register number
3100 // G Owned by | | v add OptoReg::stack0())
3101 // r CALLER | |
3102 // o | +--------+ pad to even-align allocators stack-slot
3103 // w V | pad0 | numbers; owned by CALLER
3104 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
3105 // h ^ | in | 5
3106 // | | args | 4 Holes in incoming args owned by SELF
3107 // | | | | 3
3108 // | | +--------+
3109 // V | | old out| Empty on Intel, window on Sparc
3110 // | old |preserve| Must be even aligned.
3111 // | SP-+--------+----> Matcher::_old_SP, even aligned
3112 // | | in | 3 area for Intel ret address
3113 // Owned by |preserve| Empty on Sparc.
3114 // SELF +--------+
3115 // | | pad2 | 2 pad to align old SP
3116 // | +--------+ 1
3117 // | | locks | 0
3118 // | +--------+----> OptoReg::stack0(), even aligned
3119 // | | pad1 | 11 pad to align new SP
3120 // | +--------+
3121 // | | | 10
3122 // | | spills | 9 spills
3123 // V | | 8 (pad0 slot for callee)
3124 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
3125 // ^ | out | 7
3126 // | | args | 6 Holes in outgoing args owned by CALLEE
3127 // Owned by +--------+
3128 // CALLEE | new out| 6 Empty on Intel, window on Sparc
3129 // | new |preserve| Must be even-aligned.
3130 // | SP-+--------+----> Matcher::_new_SP, even aligned
3131 // | | |
3132 //
3133 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
3134 // known from SELF's arguments and the Java calling convention.
3135 // Region 6-7 is determined per call site.
3136 // Note 2: If the calling convention leaves holes in the incoming argument
3137 // area, those holes are owned by SELF. Holes in the outgoing area
3138 // are owned by the CALLEE. Holes should not be necessary in the
3139 // incoming area, as the Java calling convention is completely under
3140 // the control of the AD file. Doubles can be sorted and packed to
3141 // avoid holes. Holes in the outgoing arguments may be necessary for
3142 // varargs C calling conventions.
3143 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
3144 // even aligned with pad0 as needed.
3145 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
3146 // region 6-11 is even aligned; it may be padded out more so that
3147 // the region from SP to FP meets the minimum stack alignment.
3148
3149 frame %{
3150 // These three registers define part of the calling convention
3151 // between compiled code and the interpreter.
3152 inline_cache_reg(EAX); // Inline Cache Register
3153
3154 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3155 cisc_spilling_operand_name(indOffset32);
3156
3157 // Number of stack slots consumed by locking an object
3158 sync_stack_slots(1);
3159
3160 // Compiled code's Frame Pointer
3161 frame_pointer(ESP);
3162 // Interpreter stores its frame pointer in a register which is
3163 // stored to the stack by I2CAdaptors.
3164 // I2CAdaptors convert from interpreted java to compiled java.
3165 interpreter_frame_pointer(EBP);
3166
3167 // Stack alignment requirement
3168 // Alignment size in bytes (128-bit -> 16 bytes)
3169 stack_alignment(StackAlignmentInBytes);
3170
3171 // Number of outgoing stack slots killed above the out_preserve_stack_slots
3172 // for calls to C. Supports the var-args backing area for register parms.
3173 varargs_C_out_slots_killed(0);
3174
3175 // The after-PROLOG location of the return address. Location of
3176 // return address specifies a type (REG or STACK) and a number
3177 // representing the register number (i.e. - use a register name) or
3178 // stack slot.
3179 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3180 // Otherwise, it is above the locks and verification slot and alignment word
3181 return_addr(STACK - 1 +
3182 align_up((Compile::current()->in_preserve_stack_slots() +
3183 Compile::current()->fixed_slots()),
3184 stack_alignment_in_slots()));
3185
3186 // Location of C & interpreter return values
3187 c_return_value %{
3188 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3189 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3190 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3191
3192 // in SSE2+ mode we want to keep the FPU stack clean so pretend
3193 // that C functions return float and double results in XMM0.
3194 if( ideal_reg == Op_RegD && UseSSE>=2 )
3195 return OptoRegPair(XMM0b_num,XMM0_num);
3196 if( ideal_reg == Op_RegF && UseSSE>=2 )
3197 return OptoRegPair(OptoReg::Bad,XMM0_num);
3198
3199 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3200 %}
3201
3202 // Location of return values
3203 return_value %{
3204 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3205 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3206 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3207 if( ideal_reg == Op_RegD && UseSSE>=2 )
3208 return OptoRegPair(XMM0b_num,XMM0_num);
3209 if( ideal_reg == Op_RegF && UseSSE>=1 )
3210 return OptoRegPair(OptoReg::Bad,XMM0_num);
3211 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3212 %}
3213
3214 %}
3215
3216 //----------ATTRIBUTES---------------------------------------------------------
3217 //----------Operand Attributes-------------------------------------------------
3218 op_attrib op_cost(0); // Required cost attribute
3219
3220 //----------Instruction Attributes---------------------------------------------
3221 ins_attrib ins_cost(100); // Required cost attribute
3222 ins_attrib ins_size(8); // Required size attribute (in bits)
3223 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3224 // non-matching short branch variant of some
3225 // long branch?
3226 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2)
3227 // specifies the alignment that some part of the instruction (not
3228 // necessarily the start) requires. If > 1, a compute_padding()
3229 // function must be provided for the instruction
3230
3231 //----------OPERANDS-----------------------------------------------------------
3232 // Operand definitions must precede instruction definitions for correct parsing
3233 // in the ADLC because operands constitute user defined types which are used in
3234 // instruction definitions.
3235
3236 //----------Simple Operands----------------------------------------------------
3237 // Immediate Operands
3238 // Integer Immediate
3239 operand immI() %{
3240 match(ConI);
3241
3242 op_cost(10);
3243 format %{ %}
3244 interface(CONST_INTER);
3245 %}
3246
3247 // Constant for test vs zero
3248 operand immI_0() %{
3249 predicate(n->get_int() == 0);
3250 match(ConI);
3251
3252 op_cost(0);
3253 format %{ %}
3254 interface(CONST_INTER);
3255 %}
3256
3257 // Constant for increment
3258 operand immI_1() %{
3259 predicate(n->get_int() == 1);
3260 match(ConI);
3261
3262 op_cost(0);
3263 format %{ %}
3264 interface(CONST_INTER);
3265 %}
3266
3267 // Constant for decrement
3268 operand immI_M1() %{
3269 predicate(n->get_int() == -1);
3270 match(ConI);
3271
3272 op_cost(0);
3273 format %{ %}
3274 interface(CONST_INTER);
3275 %}
3276
3277 // Valid scale values for addressing modes
3278 operand immI2() %{
3279 predicate(0 <= n->get_int() && (n->get_int() <= 3));
3280 match(ConI);
3281
3282 format %{ %}
3283 interface(CONST_INTER);
3284 %}
3285
3286 operand immI8() %{
3287 predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3288 match(ConI);
3289
3290 op_cost(5);
3291 format %{ %}
3292 interface(CONST_INTER);
3293 %}
3294
3295 operand immU8() %{
3296 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
3297 match(ConI);
3298
3299 op_cost(5);
3300 format %{ %}
3301 interface(CONST_INTER);
3302 %}
3303
3304 operand immI16() %{
3305 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3306 match(ConI);
3307
3308 op_cost(10);
3309 format %{ %}
3310 interface(CONST_INTER);
3311 %}
3312
3313 // Int Immediate non-negative
3314 operand immU31()
3315 %{
3316 predicate(n->get_int() >= 0);
3317 match(ConI);
3318
3319 op_cost(0);
3320 format %{ %}
3321 interface(CONST_INTER);
3322 %}
3323
3324 // Constant for long shifts
3325 operand immI_32() %{
3326 predicate( n->get_int() == 32 );
3327 match(ConI);
3328
3329 op_cost(0);
3330 format %{ %}
3331 interface(CONST_INTER);
3332 %}
3333
3334 operand immI_1_31() %{
3335 predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3336 match(ConI);
3337
3338 op_cost(0);
3339 format %{ %}
3340 interface(CONST_INTER);
3341 %}
3342
3343 operand immI_32_63() %{
3344 predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3345 match(ConI);
3346 op_cost(0);
3347
3348 format %{ %}
3349 interface(CONST_INTER);
3350 %}
3351
3352 operand immI_2() %{
3353 predicate( n->get_int() == 2 );
3354 match(ConI);
3355
3356 op_cost(0);
3357 format %{ %}
3358 interface(CONST_INTER);
3359 %}
3360
3361 operand immI_3() %{
3362 predicate( n->get_int() == 3 );
3363 match(ConI);
3364
3365 op_cost(0);
3366 format %{ %}
3367 interface(CONST_INTER);
3368 %}
3369
3370 operand immI_4()
3371 %{
3372 predicate(n->get_int() == 4);
3373 match(ConI);
3374
3375 op_cost(0);
3376 format %{ %}
3377 interface(CONST_INTER);
3378 %}
3379
3380 operand immI_8()
3381 %{
3382 predicate(n->get_int() == 8);
3383 match(ConI);
3384
3385 op_cost(0);
3386 format %{ %}
3387 interface(CONST_INTER);
3388 %}
3389
3390 // Pointer Immediate
3391 operand immP() %{
3392 match(ConP);
3393
3394 op_cost(10);
3395 format %{ %}
3396 interface(CONST_INTER);
3397 %}
3398
3399 // NULL Pointer Immediate
3400 operand immP0() %{
3401 predicate( n->get_ptr() == 0 );
3402 match(ConP);
3403 op_cost(0);
3404
3405 format %{ %}
3406 interface(CONST_INTER);
3407 %}
3408
3409 // Long Immediate
3410 operand immL() %{
3411 match(ConL);
3412
3413 op_cost(20);
3414 format %{ %}
3415 interface(CONST_INTER);
3416 %}
3417
3418 // Long Immediate zero
3419 operand immL0() %{
3420 predicate( n->get_long() == 0L );
3421 match(ConL);
3422 op_cost(0);
3423
3424 format %{ %}
3425 interface(CONST_INTER);
3426 %}
3427
3428 // Long Immediate zero
3429 operand immL_M1() %{
3430 predicate( n->get_long() == -1L );
3431 match(ConL);
3432 op_cost(0);
3433
3434 format %{ %}
3435 interface(CONST_INTER);
3436 %}
3437
3438 // Long immediate from 0 to 127.
3439 // Used for a shorter form of long mul by 10.
3440 operand immL_127() %{
3441 predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3442 match(ConL);
3443 op_cost(0);
3444
3445 format %{ %}
3446 interface(CONST_INTER);
3447 %}
3448
3449 // Long Immediate: low 32-bit mask
3450 operand immL_32bits() %{
3451 predicate(n->get_long() == 0xFFFFFFFFL);
3452 match(ConL);
3453 op_cost(0);
3454
3455 format %{ %}
3456 interface(CONST_INTER);
3457 %}
3458
3459 // Long Immediate: low 32-bit mask
3460 operand immL32() %{
3461 predicate(n->get_long() == (int)(n->get_long()));
3462 match(ConL);
3463 op_cost(20);
3464
3465 format %{ %}
3466 interface(CONST_INTER);
3467 %}
3468
3469 //Double Immediate zero
3470 operand immDPR0() %{
3471 // Do additional (and counter-intuitive) test against NaN to work around VC++
3472 // bug that generates code such that NaNs compare equal to 0.0
3473 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3474 match(ConD);
3475
3476 op_cost(5);
3477 format %{ %}
3478 interface(CONST_INTER);
3479 %}
3480
3481 // Double Immediate one
3482 operand immDPR1() %{
3483 predicate( UseSSE<=1 && n->getd() == 1.0 );
3484 match(ConD);
3485
3486 op_cost(5);
3487 format %{ %}
3488 interface(CONST_INTER);
3489 %}
3490
3491 // Double Immediate
3492 operand immDPR() %{
3493 predicate(UseSSE<=1);
3494 match(ConD);
3495
3496 op_cost(5);
3497 format %{ %}
3498 interface(CONST_INTER);
3499 %}
3500
3501 operand immD() %{
3502 predicate(UseSSE>=2);
3503 match(ConD);
3504
3505 op_cost(5);
3506 format %{ %}
3507 interface(CONST_INTER);
3508 %}
3509
3510 // Double Immediate zero
3511 operand immD0() %{
3512 // Do additional (and counter-intuitive) test against NaN to work around VC++
3513 // bug that generates code such that NaNs compare equal to 0.0 AND do not
3514 // compare equal to -0.0.
3515 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3516 match(ConD);
3517
3518 format %{ %}
3519 interface(CONST_INTER);
3520 %}
3521
3522 // Float Immediate zero
3523 operand immFPR0() %{
3524 predicate(UseSSE == 0 && n->getf() == 0.0F);
3525 match(ConF);
3526
3527 op_cost(5);
3528 format %{ %}
3529 interface(CONST_INTER);
3530 %}
3531
3532 // Float Immediate one
3533 operand immFPR1() %{
3534 predicate(UseSSE == 0 && n->getf() == 1.0F);
3535 match(ConF);
3536
3537 op_cost(5);
3538 format %{ %}
3539 interface(CONST_INTER);
3540 %}
3541
3542 // Float Immediate
3543 operand immFPR() %{
3544 predicate( UseSSE == 0 );
3545 match(ConF);
3546
3547 op_cost(5);
3548 format %{ %}
3549 interface(CONST_INTER);
3550 %}
3551
3552 // Float Immediate
3553 operand immF() %{
3554 predicate(UseSSE >= 1);
3555 match(ConF);
3556
3557 op_cost(5);
3558 format %{ %}
3559 interface(CONST_INTER);
3560 %}
3561
3562 // Float Immediate zero. Zero and not -0.0
3563 operand immF0() %{
3564 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3565 match(ConF);
3566
3567 op_cost(5);
3568 format %{ %}
3569 interface(CONST_INTER);
3570 %}
3571
3572 // Immediates for special shifts (sign extend)
3573
3574 // Constants for increment
3575 operand immI_16() %{
3576 predicate( n->get_int() == 16 );
3577 match(ConI);
3578
3579 format %{ %}
3580 interface(CONST_INTER);
3581 %}
3582
3583 operand immI_24() %{
3584 predicate( n->get_int() == 24 );
3585 match(ConI);
3586
3587 format %{ %}
3588 interface(CONST_INTER);
3589 %}
3590
3591 // Constant for byte-wide masking
3592 operand immI_255() %{
3593 predicate( n->get_int() == 255 );
3594 match(ConI);
3595
3596 format %{ %}
3597 interface(CONST_INTER);
3598 %}
3599
3600 // Constant for short-wide masking
3601 operand immI_65535() %{
3602 predicate(n->get_int() == 65535);
3603 match(ConI);
3604
3605 format %{ %}
3606 interface(CONST_INTER);
3607 %}
3608
3609 operand kReg()
3610 %{
3611 constraint(ALLOC_IN_RC(vectmask_reg));
3612 match(RegVectMask);
3613 format %{%}
3614 interface(REG_INTER);
3615 %}
3616
3617 operand kReg_K1()
3618 %{
3619 constraint(ALLOC_IN_RC(vectmask_reg_K1));
3620 match(RegVectMask);
3621 format %{%}
3622 interface(REG_INTER);
3623 %}
3624
3625 operand kReg_K2()
3626 %{
3627 constraint(ALLOC_IN_RC(vectmask_reg_K2));
3628 match(RegVectMask);
3629 format %{%}
3630 interface(REG_INTER);
3631 %}
3632
3633 // Special Registers
3634 operand kReg_K3()
3635 %{
3636 constraint(ALLOC_IN_RC(vectmask_reg_K3));
3637 match(RegVectMask);
3638 format %{%}
3639 interface(REG_INTER);
3640 %}
3641
3642 operand kReg_K4()
3643 %{
3644 constraint(ALLOC_IN_RC(vectmask_reg_K4));
3645 match(RegVectMask);
3646 format %{%}
3647 interface(REG_INTER);
3648 %}
3649
3650 operand kReg_K5()
3651 %{
3652 constraint(ALLOC_IN_RC(vectmask_reg_K5));
3653 match(RegVectMask);
3654 format %{%}
3655 interface(REG_INTER);
3656 %}
3657
3658 operand kReg_K6()
3659 %{
3660 constraint(ALLOC_IN_RC(vectmask_reg_K6));
3661 match(RegVectMask);
3662 format %{%}
3663 interface(REG_INTER);
3664 %}
3665
3666 // Special Registers
3667 operand kReg_K7()
3668 %{
3669 constraint(ALLOC_IN_RC(vectmask_reg_K7));
3670 match(RegVectMask);
3671 format %{%}
3672 interface(REG_INTER);
3673 %}
3674
3675 // Register Operands
3676 // Integer Register
3677 operand rRegI() %{
3678 constraint(ALLOC_IN_RC(int_reg));
3679 match(RegI);
3680 match(xRegI);
3681 match(eAXRegI);
3682 match(eBXRegI);
3683 match(eCXRegI);
3684 match(eDXRegI);
3685 match(eDIRegI);
3686 match(eSIRegI);
3687
3688 format %{ %}
3689 interface(REG_INTER);
3690 %}
3691
3692 // Subset of Integer Register
3693 operand xRegI(rRegI reg) %{
3694 constraint(ALLOC_IN_RC(int_x_reg));
3695 match(reg);
3696 match(eAXRegI);
3697 match(eBXRegI);
3698 match(eCXRegI);
3699 match(eDXRegI);
3700
3701 format %{ %}
3702 interface(REG_INTER);
3703 %}
3704
3705 // Special Registers
3706 operand eAXRegI(xRegI reg) %{
3707 constraint(ALLOC_IN_RC(eax_reg));
3708 match(reg);
3709 match(rRegI);
3710
3711 format %{ "EAX" %}
3712 interface(REG_INTER);
3713 %}
3714
3715 // Special Registers
3716 operand eBXRegI(xRegI reg) %{
3717 constraint(ALLOC_IN_RC(ebx_reg));
3718 match(reg);
3719 match(rRegI);
3720
3721 format %{ "EBX" %}
3722 interface(REG_INTER);
3723 %}
3724
3725 operand eCXRegI(xRegI reg) %{
3726 constraint(ALLOC_IN_RC(ecx_reg));
3727 match(reg);
3728 match(rRegI);
3729
3730 format %{ "ECX" %}
3731 interface(REG_INTER);
3732 %}
3733
3734 operand eDXRegI(xRegI reg) %{
3735 constraint(ALLOC_IN_RC(edx_reg));
3736 match(reg);
3737 match(rRegI);
3738
3739 format %{ "EDX" %}
3740 interface(REG_INTER);
3741 %}
3742
3743 operand eDIRegI(xRegI reg) %{
3744 constraint(ALLOC_IN_RC(edi_reg));
3745 match(reg);
3746 match(rRegI);
3747
3748 format %{ "EDI" %}
3749 interface(REG_INTER);
3750 %}
3751
3752 operand naxRegI() %{
3753 constraint(ALLOC_IN_RC(nax_reg));
3754 match(RegI);
3755 match(eCXRegI);
3756 match(eDXRegI);
3757 match(eSIRegI);
3758 match(eDIRegI);
3759
3760 format %{ %}
3761 interface(REG_INTER);
3762 %}
3763
3764 operand nadxRegI() %{
3765 constraint(ALLOC_IN_RC(nadx_reg));
3766 match(RegI);
3767 match(eBXRegI);
3768 match(eCXRegI);
3769 match(eSIRegI);
3770 match(eDIRegI);
3771
3772 format %{ %}
3773 interface(REG_INTER);
3774 %}
3775
3776 operand ncxRegI() %{
3777 constraint(ALLOC_IN_RC(ncx_reg));
3778 match(RegI);
3779 match(eAXRegI);
3780 match(eDXRegI);
3781 match(eSIRegI);
3782 match(eDIRegI);
3783
3784 format %{ %}
3785 interface(REG_INTER);
3786 %}
3787
3788 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3789 // //
3790 operand eSIRegI(xRegI reg) %{
3791 constraint(ALLOC_IN_RC(esi_reg));
3792 match(reg);
3793 match(rRegI);
3794
3795 format %{ "ESI" %}
3796 interface(REG_INTER);
3797 %}
3798
3799 // Pointer Register
3800 operand anyRegP() %{
3801 constraint(ALLOC_IN_RC(any_reg));
3802 match(RegP);
3803 match(eAXRegP);
3804 match(eBXRegP);
3805 match(eCXRegP);
3806 match(eDIRegP);
3807 match(eRegP);
3808
3809 format %{ %}
3810 interface(REG_INTER);
3811 %}
3812
3813 operand eRegP() %{
3814 constraint(ALLOC_IN_RC(int_reg));
3815 match(RegP);
3816 match(eAXRegP);
3817 match(eBXRegP);
3818 match(eCXRegP);
3819 match(eDIRegP);
3820
3821 format %{ %}
3822 interface(REG_INTER);
3823 %}
3824
3825 operand rRegP() %{
3826 constraint(ALLOC_IN_RC(int_reg));
3827 match(RegP);
3828 match(eAXRegP);
3829 match(eBXRegP);
3830 match(eCXRegP);
3831 match(eDIRegP);
3832
3833 format %{ %}
3834 interface(REG_INTER);
3835 %}
3836
3837 // On windows95, EBP is not safe to use for implicit null tests.
3838 operand eRegP_no_EBP() %{
3839 constraint(ALLOC_IN_RC(int_reg_no_ebp));
3840 match(RegP);
3841 match(eAXRegP);
3842 match(eBXRegP);
3843 match(eCXRegP);
3844 match(eDIRegP);
3845
3846 op_cost(100);
3847 format %{ %}
3848 interface(REG_INTER);
3849 %}
3850
3851 operand naxRegP() %{
3852 constraint(ALLOC_IN_RC(nax_reg));
3853 match(RegP);
3854 match(eBXRegP);
3855 match(eDXRegP);
3856 match(eCXRegP);
3857 match(eSIRegP);
3858 match(eDIRegP);
3859
3860 format %{ %}
3861 interface(REG_INTER);
3862 %}
3863
3864 operand nabxRegP() %{
3865 constraint(ALLOC_IN_RC(nabx_reg));
3866 match(RegP);
3867 match(eCXRegP);
3868 match(eDXRegP);
3869 match(eSIRegP);
3870 match(eDIRegP);
3871
3872 format %{ %}
3873 interface(REG_INTER);
3874 %}
3875
3876 operand pRegP() %{
3877 constraint(ALLOC_IN_RC(p_reg));
3878 match(RegP);
3879 match(eBXRegP);
3880 match(eDXRegP);
3881 match(eSIRegP);
3882 match(eDIRegP);
3883
3884 format %{ %}
3885 interface(REG_INTER);
3886 %}
3887
3888 // Special Registers
3889 // Return a pointer value
3890 operand eAXRegP(eRegP reg) %{
3891 constraint(ALLOC_IN_RC(eax_reg));
3892 match(reg);
3893 format %{ "EAX" %}
3894 interface(REG_INTER);
3895 %}
3896
3897 // Used in AtomicAdd
3898 operand eBXRegP(eRegP reg) %{
3899 constraint(ALLOC_IN_RC(ebx_reg));
3900 match(reg);
3901 format %{ "EBX" %}
3902 interface(REG_INTER);
3903 %}
3904
3905 // Tail-call (interprocedural jump) to interpreter
3906 operand eCXRegP(eRegP reg) %{
3907 constraint(ALLOC_IN_RC(ecx_reg));
3908 match(reg);
3909 format %{ "ECX" %}
3910 interface(REG_INTER);
3911 %}
3912
3913 operand eDXRegP(eRegP reg) %{
3914 constraint(ALLOC_IN_RC(edx_reg));
3915 match(reg);
3916 format %{ "EDX" %}
3917 interface(REG_INTER);
3918 %}
3919
3920 operand eSIRegP(eRegP reg) %{
3921 constraint(ALLOC_IN_RC(esi_reg));
3922 match(reg);
3923 format %{ "ESI" %}
3924 interface(REG_INTER);
3925 %}
3926
3927 // Used in rep stosw
3928 operand eDIRegP(eRegP reg) %{
3929 constraint(ALLOC_IN_RC(edi_reg));
3930 match(reg);
3931 format %{ "EDI" %}
3932 interface(REG_INTER);
3933 %}
3934
3935 operand eRegL() %{
3936 constraint(ALLOC_IN_RC(long_reg));
3937 match(RegL);
3938 match(eADXRegL);
3939
3940 format %{ %}
3941 interface(REG_INTER);
3942 %}
3943
3944 operand eADXRegL( eRegL reg ) %{
3945 constraint(ALLOC_IN_RC(eadx_reg));
3946 match(reg);
3947
3948 format %{ "EDX:EAX" %}
3949 interface(REG_INTER);
3950 %}
3951
3952 operand eBCXRegL( eRegL reg ) %{
3953 constraint(ALLOC_IN_RC(ebcx_reg));
3954 match(reg);
3955
3956 format %{ "EBX:ECX" %}
3957 interface(REG_INTER);
3958 %}
3959
3960 operand eBDPRegL( eRegL reg ) %{
3961 constraint(ALLOC_IN_RC(ebpd_reg));
3962 match(reg);
3963
3964 format %{ "EBP:EDI" %}
3965 interface(REG_INTER);
3966 %}
3967 // Special case for integer high multiply
3968 operand eADXRegL_low_only() %{
3969 constraint(ALLOC_IN_RC(eadx_reg));
3970 match(RegL);
3971
3972 format %{ "EAX" %}
3973 interface(REG_INTER);
3974 %}
3975
3976 // Flags register, used as output of compare instructions
3977 operand rFlagsReg() %{
3978 constraint(ALLOC_IN_RC(int_flags));
3979 match(RegFlags);
3980
3981 format %{ "EFLAGS" %}
3982 interface(REG_INTER);
3983 %}
3984
3985 // Flags register, used as output of compare instructions
3986 operand eFlagsReg() %{
3987 constraint(ALLOC_IN_RC(int_flags));
3988 match(RegFlags);
3989
3990 format %{ "EFLAGS" %}
3991 interface(REG_INTER);
3992 %}
3993
3994 // Flags register, used as output of FLOATING POINT compare instructions
3995 operand eFlagsRegU() %{
3996 constraint(ALLOC_IN_RC(int_flags));
3997 match(RegFlags);
3998
3999 format %{ "EFLAGS_U" %}
4000 interface(REG_INTER);
4001 %}
4002
4003 operand eFlagsRegUCF() %{
4004 constraint(ALLOC_IN_RC(int_flags));
4005 match(RegFlags);
4006 predicate(false);
4007
4008 format %{ "EFLAGS_U_CF" %}
4009 interface(REG_INTER);
4010 %}
4011
4012 // Condition Code Register used by long compare
4013 operand flagsReg_long_LTGE() %{
4014 constraint(ALLOC_IN_RC(int_flags));
4015 match(RegFlags);
4016 format %{ "FLAGS_LTGE" %}
4017 interface(REG_INTER);
4018 %}
4019 operand flagsReg_long_EQNE() %{
4020 constraint(ALLOC_IN_RC(int_flags));
4021 match(RegFlags);
4022 format %{ "FLAGS_EQNE" %}
4023 interface(REG_INTER);
4024 %}
4025 operand flagsReg_long_LEGT() %{
4026 constraint(ALLOC_IN_RC(int_flags));
4027 match(RegFlags);
4028 format %{ "FLAGS_LEGT" %}
4029 interface(REG_INTER);
4030 %}
4031
4032 // Condition Code Register used by unsigned long compare
4033 operand flagsReg_ulong_LTGE() %{
4034 constraint(ALLOC_IN_RC(int_flags));
4035 match(RegFlags);
4036 format %{ "FLAGS_U_LTGE" %}
4037 interface(REG_INTER);
4038 %}
4039 operand flagsReg_ulong_EQNE() %{
4040 constraint(ALLOC_IN_RC(int_flags));
4041 match(RegFlags);
4042 format %{ "FLAGS_U_EQNE" %}
4043 interface(REG_INTER);
4044 %}
4045 operand flagsReg_ulong_LEGT() %{
4046 constraint(ALLOC_IN_RC(int_flags));
4047 match(RegFlags);
4048 format %{ "FLAGS_U_LEGT" %}
4049 interface(REG_INTER);
4050 %}
4051
4052 // Float register operands
4053 operand regDPR() %{
4054 predicate( UseSSE < 2 );
4055 constraint(ALLOC_IN_RC(fp_dbl_reg));
4056 match(RegD);
4057 match(regDPR1);
4058 match(regDPR2);
4059 format %{ %}
4060 interface(REG_INTER);
4061 %}
4062
4063 operand regDPR1(regDPR reg) %{
4064 predicate( UseSSE < 2 );
4065 constraint(ALLOC_IN_RC(fp_dbl_reg0));
4066 match(reg);
4067 format %{ "FPR1" %}
4068 interface(REG_INTER);
4069 %}
4070
4071 operand regDPR2(regDPR reg) %{
4072 predicate( UseSSE < 2 );
4073 constraint(ALLOC_IN_RC(fp_dbl_reg1));
4074 match(reg);
4075 format %{ "FPR2" %}
4076 interface(REG_INTER);
4077 %}
4078
4079 operand regnotDPR1(regDPR reg) %{
4080 predicate( UseSSE < 2 );
4081 constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4082 match(reg);
4083 format %{ %}
4084 interface(REG_INTER);
4085 %}
4086
4087 // Float register operands
4088 operand regFPR() %{
4089 predicate( UseSSE < 2 );
4090 constraint(ALLOC_IN_RC(fp_flt_reg));
4091 match(RegF);
4092 match(regFPR1);
4093 format %{ %}
4094 interface(REG_INTER);
4095 %}
4096
4097 // Float register operands
4098 operand regFPR1(regFPR reg) %{
4099 predicate( UseSSE < 2 );
4100 constraint(ALLOC_IN_RC(fp_flt_reg0));
4101 match(reg);
4102 format %{ "FPR1" %}
4103 interface(REG_INTER);
4104 %}
4105
4106 // XMM Float register operands
4107 operand regF() %{
4108 predicate( UseSSE>=1 );
4109 constraint(ALLOC_IN_RC(float_reg_legacy));
4110 match(RegF);
4111 format %{ %}
4112 interface(REG_INTER);
4113 %}
4114
4115 operand legRegF() %{
4116 predicate( UseSSE>=1 );
4117 constraint(ALLOC_IN_RC(float_reg_legacy));
4118 match(RegF);
4119 format %{ %}
4120 interface(REG_INTER);
4121 %}
4122
4123 // Float register operands
4124 operand vlRegF() %{
4125 constraint(ALLOC_IN_RC(float_reg_vl));
4126 match(RegF);
4127
4128 format %{ %}
4129 interface(REG_INTER);
4130 %}
4131
4132 // XMM Double register operands
4133 operand regD() %{
4134 predicate( UseSSE>=2 );
4135 constraint(ALLOC_IN_RC(double_reg_legacy));
4136 match(RegD);
4137 format %{ %}
4138 interface(REG_INTER);
4139 %}
4140
4141 // Double register operands
4142 operand legRegD() %{
4143 predicate( UseSSE>=2 );
4144 constraint(ALLOC_IN_RC(double_reg_legacy));
4145 match(RegD);
4146 format %{ %}
4147 interface(REG_INTER);
4148 %}
4149
4150 operand vlRegD() %{
4151 constraint(ALLOC_IN_RC(double_reg_vl));
4152 match(RegD);
4153
4154 format %{ %}
4155 interface(REG_INTER);
4156 %}
4157
4158 //----------Memory Operands----------------------------------------------------
4159 // Direct Memory Operand
4160 operand direct(immP addr) %{
4161 match(addr);
4162
4163 format %{ "[$addr]" %}
4164 interface(MEMORY_INTER) %{
4165 base(0xFFFFFFFF);
4166 index(0x4);
4167 scale(0x0);
4168 disp($addr);
4169 %}
4170 %}
4171
4172 // Indirect Memory Operand
4173 operand indirect(eRegP reg) %{
4174 constraint(ALLOC_IN_RC(int_reg));
4175 match(reg);
4176
4177 format %{ "[$reg]" %}
4178 interface(MEMORY_INTER) %{
4179 base($reg);
4180 index(0x4);
4181 scale(0x0);
4182 disp(0x0);
4183 %}
4184 %}
4185
4186 // Indirect Memory Plus Short Offset Operand
4187 operand indOffset8(eRegP reg, immI8 off) %{
4188 match(AddP reg off);
4189
4190 format %{ "[$reg + $off]" %}
4191 interface(MEMORY_INTER) %{
4192 base($reg);
4193 index(0x4);
4194 scale(0x0);
4195 disp($off);
4196 %}
4197 %}
4198
4199 // Indirect Memory Plus Long Offset Operand
4200 operand indOffset32(eRegP reg, immI off) %{
4201 match(AddP reg off);
4202
4203 format %{ "[$reg + $off]" %}
4204 interface(MEMORY_INTER) %{
4205 base($reg);
4206 index(0x4);
4207 scale(0x0);
4208 disp($off);
4209 %}
4210 %}
4211
4212 // Indirect Memory Plus Long Offset Operand
4213 operand indOffset32X(rRegI reg, immP off) %{
4214 match(AddP off reg);
4215
4216 format %{ "[$reg + $off]" %}
4217 interface(MEMORY_INTER) %{
4218 base($reg);
4219 index(0x4);
4220 scale(0x0);
4221 disp($off);
4222 %}
4223 %}
4224
4225 // Indirect Memory Plus Index Register Plus Offset Operand
4226 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4227 match(AddP (AddP reg ireg) off);
4228
4229 op_cost(10);
4230 format %{"[$reg + $off + $ireg]" %}
4231 interface(MEMORY_INTER) %{
4232 base($reg);
4233 index($ireg);
4234 scale(0x0);
4235 disp($off);
4236 %}
4237 %}
4238
4239 // Indirect Memory Plus Index Register Plus Offset Operand
4240 operand indIndex(eRegP reg, rRegI ireg) %{
4241 match(AddP reg ireg);
4242
4243 op_cost(10);
4244 format %{"[$reg + $ireg]" %}
4245 interface(MEMORY_INTER) %{
4246 base($reg);
4247 index($ireg);
4248 scale(0x0);
4249 disp(0x0);
4250 %}
4251 %}
4252
4253 // // -------------------------------------------------------------------------
4254 // // 486 architecture doesn't support "scale * index + offset" with out a base
4255 // // -------------------------------------------------------------------------
4256 // // Scaled Memory Operands
4257 // // Indirect Memory Times Scale Plus Offset Operand
4258 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4259 // match(AddP off (LShiftI ireg scale));
4260 //
4261 // op_cost(10);
4262 // format %{"[$off + $ireg << $scale]" %}
4263 // interface(MEMORY_INTER) %{
4264 // base(0x4);
4265 // index($ireg);
4266 // scale($scale);
4267 // disp($off);
4268 // %}
4269 // %}
4270
4271 // Indirect Memory Times Scale Plus Index Register
4272 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4273 match(AddP reg (LShiftI ireg scale));
4274
4275 op_cost(10);
4276 format %{"[$reg + $ireg << $scale]" %}
4277 interface(MEMORY_INTER) %{
4278 base($reg);
4279 index($ireg);
4280 scale($scale);
4281 disp(0x0);
4282 %}
4283 %}
4284
4285 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4286 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4287 match(AddP (AddP reg (LShiftI ireg scale)) off);
4288
4289 op_cost(10);
4290 format %{"[$reg + $off + $ireg << $scale]" %}
4291 interface(MEMORY_INTER) %{
4292 base($reg);
4293 index($ireg);
4294 scale($scale);
4295 disp($off);
4296 %}
4297 %}
4298
4299 //----------Load Long Memory Operands------------------------------------------
4300 // The load-long idiom will use it's address expression again after loading
4301 // the first word of the long. If the load-long destination overlaps with
4302 // registers used in the addressing expression, the 2nd half will be loaded
4303 // from a clobbered address. Fix this by requiring that load-long use
4304 // address registers that do not overlap with the load-long target.
4305
4306 // load-long support
4307 operand load_long_RegP() %{
4308 constraint(ALLOC_IN_RC(esi_reg));
4309 match(RegP);
4310 match(eSIRegP);
4311 op_cost(100);
4312 format %{ %}
4313 interface(REG_INTER);
4314 %}
4315
4316 // Indirect Memory Operand Long
4317 operand load_long_indirect(load_long_RegP reg) %{
4318 constraint(ALLOC_IN_RC(esi_reg));
4319 match(reg);
4320
4321 format %{ "[$reg]" %}
4322 interface(MEMORY_INTER) %{
4323 base($reg);
4324 index(0x4);
4325 scale(0x0);
4326 disp(0x0);
4327 %}
4328 %}
4329
4330 // Indirect Memory Plus Long Offset Operand
4331 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4332 match(AddP reg off);
4333
4334 format %{ "[$reg + $off]" %}
4335 interface(MEMORY_INTER) %{
4336 base($reg);
4337 index(0x4);
4338 scale(0x0);
4339 disp($off);
4340 %}
4341 %}
4342
4343 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4344
4345
4346 //----------Special Memory Operands--------------------------------------------
4347 // Stack Slot Operand - This operand is used for loading and storing temporary
4348 // values on the stack where a match requires a value to
4349 // flow through memory.
4350 operand stackSlotP(sRegP reg) %{
4351 constraint(ALLOC_IN_RC(stack_slots));
4352 // No match rule because this operand is only generated in matching
4353 format %{ "[$reg]" %}
4354 interface(MEMORY_INTER) %{
4355 base(0x4); // ESP
4356 index(0x4); // No Index
4357 scale(0x0); // No Scale
4358 disp($reg); // Stack Offset
4359 %}
4360 %}
4361
4362 operand stackSlotI(sRegI reg) %{
4363 constraint(ALLOC_IN_RC(stack_slots));
4364 // No match rule because this operand is only generated in matching
4365 format %{ "[$reg]" %}
4366 interface(MEMORY_INTER) %{
4367 base(0x4); // ESP
4368 index(0x4); // No Index
4369 scale(0x0); // No Scale
4370 disp($reg); // Stack Offset
4371 %}
4372 %}
4373
4374 operand stackSlotF(sRegF reg) %{
4375 constraint(ALLOC_IN_RC(stack_slots));
4376 // No match rule because this operand is only generated in matching
4377 format %{ "[$reg]" %}
4378 interface(MEMORY_INTER) %{
4379 base(0x4); // ESP
4380 index(0x4); // No Index
4381 scale(0x0); // No Scale
4382 disp($reg); // Stack Offset
4383 %}
4384 %}
4385
4386 operand stackSlotD(sRegD reg) %{
4387 constraint(ALLOC_IN_RC(stack_slots));
4388 // No match rule because this operand is only generated in matching
4389 format %{ "[$reg]" %}
4390 interface(MEMORY_INTER) %{
4391 base(0x4); // ESP
4392 index(0x4); // No Index
4393 scale(0x0); // No Scale
4394 disp($reg); // Stack Offset
4395 %}
4396 %}
4397
4398 operand stackSlotL(sRegL reg) %{
4399 constraint(ALLOC_IN_RC(stack_slots));
4400 // No match rule because this operand is only generated in matching
4401 format %{ "[$reg]" %}
4402 interface(MEMORY_INTER) %{
4403 base(0x4); // ESP
4404 index(0x4); // No Index
4405 scale(0x0); // No Scale
4406 disp($reg); // Stack Offset
4407 %}
4408 %}
4409
4410 //----------Conditional Branch Operands----------------------------------------
4411 // Comparison Op - This is the operation of the comparison, and is limited to
4412 // the following set of codes:
4413 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4414 //
4415 // Other attributes of the comparison, such as unsignedness, are specified
4416 // by the comparison instruction that sets a condition code flags register.
4417 // That result is represented by a flags operand whose subtype is appropriate
4418 // to the unsignedness (etc.) of the comparison.
4419 //
4420 // Later, the instruction which matches both the Comparison Op (a Bool) and
4421 // the flags (produced by the Cmp) specifies the coding of the comparison op
4422 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4423
4424 // Comparison Code
4425 operand cmpOp() %{
4426 match(Bool);
4427
4428 format %{ "" %}
4429 interface(COND_INTER) %{
4430 equal(0x4, "e");
4431 not_equal(0x5, "ne");
4432 less(0xC, "l");
4433 greater_equal(0xD, "ge");
4434 less_equal(0xE, "le");
4435 greater(0xF, "g");
4436 overflow(0x0, "o");
4437 no_overflow(0x1, "no");
4438 %}
4439 %}
4440
4441 // Comparison Code, unsigned compare. Used by FP also, with
4442 // C2 (unordered) turned into GT or LT already. The other bits
4443 // C0 and C3 are turned into Carry & Zero flags.
4444 operand cmpOpU() %{
4445 match(Bool);
4446
4447 format %{ "" %}
4448 interface(COND_INTER) %{
4449 equal(0x4, "e");
4450 not_equal(0x5, "ne");
4451 less(0x2, "b");
4452 greater_equal(0x3, "nb");
4453 less_equal(0x6, "be");
4454 greater(0x7, "nbe");
4455 overflow(0x0, "o");
4456 no_overflow(0x1, "no");
4457 %}
4458 %}
4459
4460 // Floating comparisons that don't require any fixup for the unordered case
4461 operand cmpOpUCF() %{
4462 match(Bool);
4463 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4464 n->as_Bool()->_test._test == BoolTest::ge ||
4465 n->as_Bool()->_test._test == BoolTest::le ||
4466 n->as_Bool()->_test._test == BoolTest::gt);
4467 format %{ "" %}
4468 interface(COND_INTER) %{
4469 equal(0x4, "e");
4470 not_equal(0x5, "ne");
4471 less(0x2, "b");
4472 greater_equal(0x3, "nb");
4473 less_equal(0x6, "be");
4474 greater(0x7, "nbe");
4475 overflow(0x0, "o");
4476 no_overflow(0x1, "no");
4477 %}
4478 %}
4479
4480
4481 // Floating comparisons that can be fixed up with extra conditional jumps
4482 operand cmpOpUCF2() %{
4483 match(Bool);
4484 predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4485 n->as_Bool()->_test._test == BoolTest::eq);
4486 format %{ "" %}
4487 interface(COND_INTER) %{
4488 equal(0x4, "e");
4489 not_equal(0x5, "ne");
4490 less(0x2, "b");
4491 greater_equal(0x3, "nb");
4492 less_equal(0x6, "be");
4493 greater(0x7, "nbe");
4494 overflow(0x0, "o");
4495 no_overflow(0x1, "no");
4496 %}
4497 %}
4498
4499 // Comparison Code for FP conditional move
4500 operand cmpOp_fcmov() %{
4501 match(Bool);
4502
4503 predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4504 n->as_Bool()->_test._test != BoolTest::no_overflow);
4505 format %{ "" %}
4506 interface(COND_INTER) %{
4507 equal (0x0C8);
4508 not_equal (0x1C8);
4509 less (0x0C0);
4510 greater_equal(0x1C0);
4511 less_equal (0x0D0);
4512 greater (0x1D0);
4513 overflow(0x0, "o"); // not really supported by the instruction
4514 no_overflow(0x1, "no"); // not really supported by the instruction
4515 %}
4516 %}
4517
4518 // Comparison Code used in long compares
4519 operand cmpOp_commute() %{
4520 match(Bool);
4521
4522 format %{ "" %}
4523 interface(COND_INTER) %{
4524 equal(0x4, "e");
4525 not_equal(0x5, "ne");
4526 less(0xF, "g");
4527 greater_equal(0xE, "le");
4528 less_equal(0xD, "ge");
4529 greater(0xC, "l");
4530 overflow(0x0, "o");
4531 no_overflow(0x1, "no");
4532 %}
4533 %}
4534
4535 // Comparison Code used in unsigned long compares
4536 operand cmpOpU_commute() %{
4537 match(Bool);
4538
4539 format %{ "" %}
4540 interface(COND_INTER) %{
4541 equal(0x4, "e");
4542 not_equal(0x5, "ne");
4543 less(0x7, "nbe");
4544 greater_equal(0x6, "be");
4545 less_equal(0x3, "nb");
4546 greater(0x2, "b");
4547 overflow(0x0, "o");
4548 no_overflow(0x1, "no");
4549 %}
4550 %}
4551
4552 //----------OPERAND CLASSES----------------------------------------------------
4553 // Operand Classes are groups of operands that are used as to simplify
4554 // instruction definitions by not requiring the AD writer to specify separate
4555 // instructions for every form of operand when the instruction accepts
4556 // multiple operand types with the same basic encoding and format. The classic
4557 // case of this is memory operands.
4558
4559 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4560 indIndex, indIndexScale, indIndexScaleOffset);
4561
4562 // Long memory operations are encoded in 2 instructions and a +4 offset.
4563 // This means some kind of offset is always required and you cannot use
4564 // an oop as the offset (done when working on static globals).
4565 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4566 indIndex, indIndexScale, indIndexScaleOffset);
4567
4568
4569 //----------PIPELINE-----------------------------------------------------------
4570 // Rules which define the behavior of the target architectures pipeline.
4571 pipeline %{
4572
4573 //----------ATTRIBUTES---------------------------------------------------------
4574 attributes %{
4575 variable_size_instructions; // Fixed size instructions
4576 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
4577 instruction_unit_size = 1; // An instruction is 1 bytes long
4578 instruction_fetch_unit_size = 16; // The processor fetches one line
4579 instruction_fetch_units = 1; // of 16 bytes
4580
4581 // List of nop instructions
4582 nops( MachNop );
4583 %}
4584
4585 //----------RESOURCES----------------------------------------------------------
4586 // Resources are the functional units available to the machine
4587
4588 // Generic P2/P3 pipeline
4589 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4590 // 3 instructions decoded per cycle.
4591 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4592 // 2 ALU op, only ALU0 handles mul/div instructions.
4593 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4594 MS0, MS1, MEM = MS0 | MS1,
4595 BR, FPU,
4596 ALU0, ALU1, ALU = ALU0 | ALU1 );
4597
4598 //----------PIPELINE DESCRIPTION-----------------------------------------------
4599 // Pipeline Description specifies the stages in the machine's pipeline
4600
4601 // Generic P2/P3 pipeline
4602 pipe_desc(S0, S1, S2, S3, S4, S5);
4603
4604 //----------PIPELINE CLASSES---------------------------------------------------
4605 // Pipeline Classes describe the stages in which input and output are
4606 // referenced by the hardware pipeline.
4607
4608 // Naming convention: ialu or fpu
4609 // Then: _reg
4610 // Then: _reg if there is a 2nd register
4611 // Then: _long if it's a pair of instructions implementing a long
4612 // Then: _fat if it requires the big decoder
4613 // Or: _mem if it requires the big decoder and a memory unit.
4614
4615 // Integer ALU reg operation
4616 pipe_class ialu_reg(rRegI dst) %{
4617 single_instruction;
4618 dst : S4(write);
4619 dst : S3(read);
4620 DECODE : S0; // any decoder
4621 ALU : S3; // any alu
4622 %}
4623
4624 // Long ALU reg operation
4625 pipe_class ialu_reg_long(eRegL dst) %{
4626 instruction_count(2);
4627 dst : S4(write);
4628 dst : S3(read);
4629 DECODE : S0(2); // any 2 decoders
4630 ALU : S3(2); // both alus
4631 %}
4632
4633 // Integer ALU reg operation using big decoder
4634 pipe_class ialu_reg_fat(rRegI dst) %{
4635 single_instruction;
4636 dst : S4(write);
4637 dst : S3(read);
4638 D0 : S0; // big decoder only
4639 ALU : S3; // any alu
4640 %}
4641
4642 // Long ALU reg operation using big decoder
4643 pipe_class ialu_reg_long_fat(eRegL dst) %{
4644 instruction_count(2);
4645 dst : S4(write);
4646 dst : S3(read);
4647 D0 : S0(2); // big decoder only; twice
4648 ALU : S3(2); // any 2 alus
4649 %}
4650
4651 // Integer ALU reg-reg operation
4652 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4653 single_instruction;
4654 dst : S4(write);
4655 src : S3(read);
4656 DECODE : S0; // any decoder
4657 ALU : S3; // any alu
4658 %}
4659
4660 // Long ALU reg-reg operation
4661 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4662 instruction_count(2);
4663 dst : S4(write);
4664 src : S3(read);
4665 DECODE : S0(2); // any 2 decoders
4666 ALU : S3(2); // both alus
4667 %}
4668
4669 // Integer ALU reg-reg operation
4670 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4671 single_instruction;
4672 dst : S4(write);
4673 src : S3(read);
4674 D0 : S0; // big decoder only
4675 ALU : S3; // any alu
4676 %}
4677
4678 // Long ALU reg-reg operation
4679 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4680 instruction_count(2);
4681 dst : S4(write);
4682 src : S3(read);
4683 D0 : S0(2); // big decoder only; twice
4684 ALU : S3(2); // both alus
4685 %}
4686
4687 // Integer ALU reg-mem operation
4688 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4689 single_instruction;
4690 dst : S5(write);
4691 mem : S3(read);
4692 D0 : S0; // big decoder only
4693 ALU : S4; // any alu
4694 MEM : S3; // any mem
4695 %}
4696
4697 // Long ALU reg-mem operation
4698 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4699 instruction_count(2);
4700 dst : S5(write);
4701 mem : S3(read);
4702 D0 : S0(2); // big decoder only; twice
4703 ALU : S4(2); // any 2 alus
4704 MEM : S3(2); // both mems
4705 %}
4706
4707 // Integer mem operation (prefetch)
4708 pipe_class ialu_mem(memory mem)
4709 %{
4710 single_instruction;
4711 mem : S3(read);
4712 D0 : S0; // big decoder only
4713 MEM : S3; // any mem
4714 %}
4715
4716 // Integer Store to Memory
4717 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4718 single_instruction;
4719 mem : S3(read);
4720 src : S5(read);
4721 D0 : S0; // big decoder only
4722 ALU : S4; // any alu
4723 MEM : S3;
4724 %}
4725
4726 // Long Store to Memory
4727 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4728 instruction_count(2);
4729 mem : S3(read);
4730 src : S5(read);
4731 D0 : S0(2); // big decoder only; twice
4732 ALU : S4(2); // any 2 alus
4733 MEM : S3(2); // Both mems
4734 %}
4735
4736 // Integer Store to Memory
4737 pipe_class ialu_mem_imm(memory mem) %{
4738 single_instruction;
4739 mem : S3(read);
4740 D0 : S0; // big decoder only
4741 ALU : S4; // any alu
4742 MEM : S3;
4743 %}
4744
4745 // Integer ALU0 reg-reg operation
4746 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4747 single_instruction;
4748 dst : S4(write);
4749 src : S3(read);
4750 D0 : S0; // Big decoder only
4751 ALU0 : S3; // only alu0
4752 %}
4753
4754 // Integer ALU0 reg-mem operation
4755 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4756 single_instruction;
4757 dst : S5(write);
4758 mem : S3(read);
4759 D0 : S0; // big decoder only
4760 ALU0 : S4; // ALU0 only
4761 MEM : S3; // any mem
4762 %}
4763
4764 // Integer ALU reg-reg operation
4765 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4766 single_instruction;
4767 cr : S4(write);
4768 src1 : S3(read);
4769 src2 : S3(read);
4770 DECODE : S0; // any decoder
4771 ALU : S3; // any alu
4772 %}
4773
4774 // Integer ALU reg-imm operation
4775 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4776 single_instruction;
4777 cr : S4(write);
4778 src1 : S3(read);
4779 DECODE : S0; // any decoder
4780 ALU : S3; // any alu
4781 %}
4782
4783 // Integer ALU reg-mem operation
4784 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4785 single_instruction;
4786 cr : S4(write);
4787 src1 : S3(read);
4788 src2 : S3(read);
4789 D0 : S0; // big decoder only
4790 ALU : S4; // any alu
4791 MEM : S3;
4792 %}
4793
4794 // Conditional move reg-reg
4795 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4796 instruction_count(4);
4797 y : S4(read);
4798 q : S3(read);
4799 p : S3(read);
4800 DECODE : S0(4); // any decoder
4801 %}
4802
4803 // Conditional move reg-reg
4804 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4805 single_instruction;
4806 dst : S4(write);
4807 src : S3(read);
4808 cr : S3(read);
4809 DECODE : S0; // any decoder
4810 %}
4811
4812 // Conditional move reg-mem
4813 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4814 single_instruction;
4815 dst : S4(write);
4816 src : S3(read);
4817 cr : S3(read);
4818 DECODE : S0; // any decoder
4819 MEM : S3;
4820 %}
4821
4822 // Conditional move reg-reg long
4823 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4824 single_instruction;
4825 dst : S4(write);
4826 src : S3(read);
4827 cr : S3(read);
4828 DECODE : S0(2); // any 2 decoders
4829 %}
4830
4831 // Conditional move double reg-reg
4832 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4833 single_instruction;
4834 dst : S4(write);
4835 src : S3(read);
4836 cr : S3(read);
4837 DECODE : S0; // any decoder
4838 %}
4839
4840 // Float reg-reg operation
4841 pipe_class fpu_reg(regDPR dst) %{
4842 instruction_count(2);
4843 dst : S3(read);
4844 DECODE : S0(2); // any 2 decoders
4845 FPU : S3;
4846 %}
4847
4848 // Float reg-reg operation
4849 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4850 instruction_count(2);
4851 dst : S4(write);
4852 src : S3(read);
4853 DECODE : S0(2); // any 2 decoders
4854 FPU : S3;
4855 %}
4856
4857 // Float reg-reg operation
4858 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4859 instruction_count(3);
4860 dst : S4(write);
4861 src1 : S3(read);
4862 src2 : S3(read);
4863 DECODE : S0(3); // any 3 decoders
4864 FPU : S3(2);
4865 %}
4866
4867 // Float reg-reg operation
4868 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4869 instruction_count(4);
4870 dst : S4(write);
4871 src1 : S3(read);
4872 src2 : S3(read);
4873 src3 : S3(read);
4874 DECODE : S0(4); // any 3 decoders
4875 FPU : S3(2);
4876 %}
4877
4878 // Float reg-reg operation
4879 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4880 instruction_count(4);
4881 dst : S4(write);
4882 src1 : S3(read);
4883 src2 : S3(read);
4884 src3 : S3(read);
4885 DECODE : S1(3); // any 3 decoders
4886 D0 : S0; // Big decoder only
4887 FPU : S3(2);
4888 MEM : S3;
4889 %}
4890
4891 // Float reg-mem operation
4892 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4893 instruction_count(2);
4894 dst : S5(write);
4895 mem : S3(read);
4896 D0 : S0; // big decoder only
4897 DECODE : S1; // any decoder for FPU POP
4898 FPU : S4;
4899 MEM : S3; // any mem
4900 %}
4901
4902 // Float reg-mem operation
4903 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4904 instruction_count(3);
4905 dst : S5(write);
4906 src1 : S3(read);
4907 mem : S3(read);
4908 D0 : S0; // big decoder only
4909 DECODE : S1(2); // any decoder for FPU POP
4910 FPU : S4;
4911 MEM : S3; // any mem
4912 %}
4913
4914 // Float mem-reg operation
4915 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4916 instruction_count(2);
4917 src : S5(read);
4918 mem : S3(read);
4919 DECODE : S0; // any decoder for FPU PUSH
4920 D0 : S1; // big decoder only
4921 FPU : S4;
4922 MEM : S3; // any mem
4923 %}
4924
4925 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4926 instruction_count(3);
4927 src1 : S3(read);
4928 src2 : S3(read);
4929 mem : S3(read);
4930 DECODE : S0(2); // any decoder for FPU PUSH
4931 D0 : S1; // big decoder only
4932 FPU : S4;
4933 MEM : S3; // any mem
4934 %}
4935
4936 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4937 instruction_count(3);
4938 src1 : S3(read);
4939 src2 : S3(read);
4940 mem : S4(read);
4941 DECODE : S0; // any decoder for FPU PUSH
4942 D0 : S0(2); // big decoder only
4943 FPU : S4;
4944 MEM : S3(2); // any mem
4945 %}
4946
4947 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4948 instruction_count(2);
4949 src1 : S3(read);
4950 dst : S4(read);
4951 D0 : S0(2); // big decoder only
4952 MEM : S3(2); // any mem
4953 %}
4954
4955 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4956 instruction_count(3);
4957 src1 : S3(read);
4958 src2 : S3(read);
4959 dst : S4(read);
4960 D0 : S0(3); // big decoder only
4961 FPU : S4;
4962 MEM : S3(3); // any mem
4963 %}
4964
4965 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4966 instruction_count(3);
4967 src1 : S4(read);
4968 mem : S4(read);
4969 DECODE : S0; // any decoder for FPU PUSH
4970 D0 : S0(2); // big decoder only
4971 FPU : S4;
4972 MEM : S3(2); // any mem
4973 %}
4974
4975 // Float load constant
4976 pipe_class fpu_reg_con(regDPR dst) %{
4977 instruction_count(2);
4978 dst : S5(write);
4979 D0 : S0; // big decoder only for the load
4980 DECODE : S1; // any decoder for FPU POP
4981 FPU : S4;
4982 MEM : S3; // any mem
4983 %}
4984
4985 // Float load constant
4986 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
4987 instruction_count(3);
4988 dst : S5(write);
4989 src : S3(read);
4990 D0 : S0; // big decoder only for the load
4991 DECODE : S1(2); // any decoder for FPU POP
4992 FPU : S4;
4993 MEM : S3; // any mem
4994 %}
4995
4996 // UnConditional branch
4997 pipe_class pipe_jmp( label labl ) %{
4998 single_instruction;
4999 BR : S3;
5000 %}
5001
5002 // Conditional branch
5003 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5004 single_instruction;
5005 cr : S1(read);
5006 BR : S3;
5007 %}
5008
5009 // Allocation idiom
5010 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5011 instruction_count(1); force_serialization;
5012 fixed_latency(6);
5013 heap_ptr : S3(read);
5014 DECODE : S0(3);
5015 D0 : S2;
5016 MEM : S3;
5017 ALU : S3(2);
5018 dst : S5(write);
5019 BR : S5;
5020 %}
5021
5022 // Generic big/slow expanded idiom
5023 pipe_class pipe_slow( ) %{
5024 instruction_count(10); multiple_bundles; force_serialization;
5025 fixed_latency(100);
5026 D0 : S0(2);
5027 MEM : S3(2);
5028 %}
5029
5030 // The real do-nothing guy
5031 pipe_class empty( ) %{
5032 instruction_count(0);
5033 %}
5034
5035 // Define the class for the Nop node
5036 define %{
5037 MachNop = empty;
5038 %}
5039
5040 %}
5041
5042 //----------INSTRUCTIONS-------------------------------------------------------
5043 //
5044 // match -- States which machine-independent subtree may be replaced
5045 // by this instruction.
5046 // ins_cost -- The estimated cost of this instruction is used by instruction
5047 // selection to identify a minimum cost tree of machine
5048 // instructions that matches a tree of machine-independent
5049 // instructions.
5050 // format -- A string providing the disassembly for this instruction.
5051 // The value of an instruction's operand may be inserted
5052 // by referring to it with a '$' prefix.
5053 // opcode -- Three instruction opcodes may be provided. These are referred
5054 // to within an encode class as $primary, $secondary, and $tertiary
5055 // respectively. The primary opcode is commonly used to
5056 // indicate the type of machine instruction, while secondary
5057 // and tertiary are often used for prefix options or addressing
5058 // modes.
5059 // ins_encode -- A list of encode classes with parameters. The encode class
5060 // name must have been defined in an 'enc_class' specification
5061 // in the encode section of the architecture description.
5062
5063 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
5064 // Load Float
5065 instruct MoveF2LEG(legRegF dst, regF src) %{
5066 match(Set dst src);
5067 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
5068 ins_encode %{
5069 ShouldNotReachHere();
5070 %}
5071 ins_pipe( fpu_reg_reg );
5072 %}
5073
5074 // Load Float
5075 instruct MoveLEG2F(regF dst, legRegF src) %{
5076 match(Set dst src);
5077 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
5078 ins_encode %{
5079 ShouldNotReachHere();
5080 %}
5081 ins_pipe( fpu_reg_reg );
5082 %}
5083
5084 // Load Float
5085 instruct MoveF2VL(vlRegF dst, regF src) %{
5086 match(Set dst src);
5087 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
5088 ins_encode %{
5089 ShouldNotReachHere();
5090 %}
5091 ins_pipe( fpu_reg_reg );
5092 %}
5093
5094 // Load Float
5095 instruct MoveVL2F(regF dst, vlRegF src) %{
5096 match(Set dst src);
5097 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
5098 ins_encode %{
5099 ShouldNotReachHere();
5100 %}
5101 ins_pipe( fpu_reg_reg );
5102 %}
5103
5104
5105
5106 // Load Double
5107 instruct MoveD2LEG(legRegD dst, regD src) %{
5108 match(Set dst src);
5109 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
5110 ins_encode %{
5111 ShouldNotReachHere();
5112 %}
5113 ins_pipe( fpu_reg_reg );
5114 %}
5115
5116 // Load Double
5117 instruct MoveLEG2D(regD dst, legRegD src) %{
5118 match(Set dst src);
5119 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
5120 ins_encode %{
5121 ShouldNotReachHere();
5122 %}
5123 ins_pipe( fpu_reg_reg );
5124 %}
5125
5126 // Load Double
5127 instruct MoveD2VL(vlRegD dst, regD src) %{
5128 match(Set dst src);
5129 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
5130 ins_encode %{
5131 ShouldNotReachHere();
5132 %}
5133 ins_pipe( fpu_reg_reg );
5134 %}
5135
5136 // Load Double
5137 instruct MoveVL2D(regD dst, vlRegD src) %{
5138 match(Set dst src);
5139 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
5140 ins_encode %{
5141 ShouldNotReachHere();
5142 %}
5143 ins_pipe( fpu_reg_reg );
5144 %}
5145
5146 //----------BSWAP-Instruction--------------------------------------------------
5147 instruct bytes_reverse_int(rRegI dst) %{
5148 match(Set dst (ReverseBytesI dst));
5149
5150 format %{ "BSWAP $dst" %}
5151 opcode(0x0F, 0xC8);
5152 ins_encode( OpcP, OpcSReg(dst) );
5153 ins_pipe( ialu_reg );
5154 %}
5155
5156 instruct bytes_reverse_long(eRegL dst) %{
5157 match(Set dst (ReverseBytesL dst));
5158
5159 format %{ "BSWAP $dst.lo\n\t"
5160 "BSWAP $dst.hi\n\t"
5161 "XCHG $dst.lo $dst.hi" %}
5162
5163 ins_cost(125);
5164 ins_encode( bswap_long_bytes(dst) );
5165 ins_pipe( ialu_reg_reg);
5166 %}
5167
5168 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5169 match(Set dst (ReverseBytesUS dst));
5170 effect(KILL cr);
5171
5172 format %{ "BSWAP $dst\n\t"
5173 "SHR $dst,16\n\t" %}
5174 ins_encode %{
5175 __ bswapl($dst$$Register);
5176 __ shrl($dst$$Register, 16);
5177 %}
5178 ins_pipe( ialu_reg );
5179 %}
5180
5181 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5182 match(Set dst (ReverseBytesS dst));
5183 effect(KILL cr);
5184
5185 format %{ "BSWAP $dst\n\t"
5186 "SAR $dst,16\n\t" %}
5187 ins_encode %{
5188 __ bswapl($dst$$Register);
5189 __ sarl($dst$$Register, 16);
5190 %}
5191 ins_pipe( ialu_reg );
5192 %}
5193
5194
5195 //---------- Zeros Count Instructions ------------------------------------------
5196
5197 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5198 predicate(UseCountLeadingZerosInstruction);
5199 match(Set dst (CountLeadingZerosI src));
5200 effect(KILL cr);
5201
5202 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %}
5203 ins_encode %{
5204 __ lzcntl($dst$$Register, $src$$Register);
5205 %}
5206 ins_pipe(ialu_reg);
5207 %}
5208
5209 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5210 predicate(!UseCountLeadingZerosInstruction);
5211 match(Set dst (CountLeadingZerosI src));
5212 effect(KILL cr);
5213
5214 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t"
5215 "JNZ skip\n\t"
5216 "MOV $dst, -1\n"
5217 "skip:\n\t"
5218 "NEG $dst\n\t"
5219 "ADD $dst, 31" %}
5220 ins_encode %{
5221 Register Rdst = $dst$$Register;
5222 Register Rsrc = $src$$Register;
5223 Label skip;
5224 __ bsrl(Rdst, Rsrc);
5225 __ jccb(Assembler::notZero, skip);
5226 __ movl(Rdst, -1);
5227 __ bind(skip);
5228 __ negl(Rdst);
5229 __ addl(Rdst, BitsPerInt - 1);
5230 %}
5231 ins_pipe(ialu_reg);
5232 %}
5233
5234 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5235 predicate(UseCountLeadingZerosInstruction);
5236 match(Set dst (CountLeadingZerosL src));
5237 effect(TEMP dst, KILL cr);
5238
5239 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t"
5240 "JNC done\n\t"
5241 "LZCNT $dst, $src.lo\n\t"
5242 "ADD $dst, 32\n"
5243 "done:" %}
5244 ins_encode %{
5245 Register Rdst = $dst$$Register;
5246 Register Rsrc = $src$$Register;
5247 Label done;
5248 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5249 __ jccb(Assembler::carryClear, done);
5250 __ lzcntl(Rdst, Rsrc);
5251 __ addl(Rdst, BitsPerInt);
5252 __ bind(done);
5253 %}
5254 ins_pipe(ialu_reg);
5255 %}
5256
5257 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5258 predicate(!UseCountLeadingZerosInstruction);
5259 match(Set dst (CountLeadingZerosL src));
5260 effect(TEMP dst, KILL cr);
5261
5262 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t"
5263 "JZ msw_is_zero\n\t"
5264 "ADD $dst, 32\n\t"
5265 "JMP not_zero\n"
5266 "msw_is_zero:\n\t"
5267 "BSR $dst, $src.lo\n\t"
5268 "JNZ not_zero\n\t"
5269 "MOV $dst, -1\n"
5270 "not_zero:\n\t"
5271 "NEG $dst\n\t"
5272 "ADD $dst, 63\n" %}
5273 ins_encode %{
5274 Register Rdst = $dst$$Register;
5275 Register Rsrc = $src$$Register;
5276 Label msw_is_zero;
5277 Label not_zero;
5278 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5279 __ jccb(Assembler::zero, msw_is_zero);
5280 __ addl(Rdst, BitsPerInt);
5281 __ jmpb(not_zero);
5282 __ bind(msw_is_zero);
5283 __ bsrl(Rdst, Rsrc);
5284 __ jccb(Assembler::notZero, not_zero);
5285 __ movl(Rdst, -1);
5286 __ bind(not_zero);
5287 __ negl(Rdst);
5288 __ addl(Rdst, BitsPerLong - 1);
5289 %}
5290 ins_pipe(ialu_reg);
5291 %}
5292
5293 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5294 predicate(UseCountTrailingZerosInstruction);
5295 match(Set dst (CountTrailingZerosI src));
5296 effect(KILL cr);
5297
5298 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %}
5299 ins_encode %{
5300 __ tzcntl($dst$$Register, $src$$Register);
5301 %}
5302 ins_pipe(ialu_reg);
5303 %}
5304
5305 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5306 predicate(!UseCountTrailingZerosInstruction);
5307 match(Set dst (CountTrailingZerosI src));
5308 effect(KILL cr);
5309
5310 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t"
5311 "JNZ done\n\t"
5312 "MOV $dst, 32\n"
5313 "done:" %}
5314 ins_encode %{
5315 Register Rdst = $dst$$Register;
5316 Label done;
5317 __ bsfl(Rdst, $src$$Register);
5318 __ jccb(Assembler::notZero, done);
5319 __ movl(Rdst, BitsPerInt);
5320 __ bind(done);
5321 %}
5322 ins_pipe(ialu_reg);
5323 %}
5324
5325 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5326 predicate(UseCountTrailingZerosInstruction);
5327 match(Set dst (CountTrailingZerosL src));
5328 effect(TEMP dst, KILL cr);
5329
5330 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t"
5331 "JNC done\n\t"
5332 "TZCNT $dst, $src.hi\n\t"
5333 "ADD $dst, 32\n"
5334 "done:" %}
5335 ins_encode %{
5336 Register Rdst = $dst$$Register;
5337 Register Rsrc = $src$$Register;
5338 Label done;
5339 __ tzcntl(Rdst, Rsrc);
5340 __ jccb(Assembler::carryClear, done);
5341 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5342 __ addl(Rdst, BitsPerInt);
5343 __ bind(done);
5344 %}
5345 ins_pipe(ialu_reg);
5346 %}
5347
5348 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5349 predicate(!UseCountTrailingZerosInstruction);
5350 match(Set dst (CountTrailingZerosL src));
5351 effect(TEMP dst, KILL cr);
5352
5353 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t"
5354 "JNZ done\n\t"
5355 "BSF $dst, $src.hi\n\t"
5356 "JNZ msw_not_zero\n\t"
5357 "MOV $dst, 32\n"
5358 "msw_not_zero:\n\t"
5359 "ADD $dst, 32\n"
5360 "done:" %}
5361 ins_encode %{
5362 Register Rdst = $dst$$Register;
5363 Register Rsrc = $src$$Register;
5364 Label msw_not_zero;
5365 Label done;
5366 __ bsfl(Rdst, Rsrc);
5367 __ jccb(Assembler::notZero, done);
5368 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5369 __ jccb(Assembler::notZero, msw_not_zero);
5370 __ movl(Rdst, BitsPerInt);
5371 __ bind(msw_not_zero);
5372 __ addl(Rdst, BitsPerInt);
5373 __ bind(done);
5374 %}
5375 ins_pipe(ialu_reg);
5376 %}
5377
5378
5379 //---------- Population Count Instructions -------------------------------------
5380
5381 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5382 predicate(UsePopCountInstruction);
5383 match(Set dst (PopCountI src));
5384 effect(KILL cr);
5385
5386 format %{ "POPCNT $dst, $src" %}
5387 ins_encode %{
5388 __ popcntl($dst$$Register, $src$$Register);
5389 %}
5390 ins_pipe(ialu_reg);
5391 %}
5392
5393 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5394 predicate(UsePopCountInstruction);
5395 match(Set dst (PopCountI (LoadI mem)));
5396 effect(KILL cr);
5397
5398 format %{ "POPCNT $dst, $mem" %}
5399 ins_encode %{
5400 __ popcntl($dst$$Register, $mem$$Address);
5401 %}
5402 ins_pipe(ialu_reg);
5403 %}
5404
5405 // Note: Long.bitCount(long) returns an int.
5406 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5407 predicate(UsePopCountInstruction);
5408 match(Set dst (PopCountL src));
5409 effect(KILL cr, TEMP tmp, TEMP dst);
5410
5411 format %{ "POPCNT $dst, $src.lo\n\t"
5412 "POPCNT $tmp, $src.hi\n\t"
5413 "ADD $dst, $tmp" %}
5414 ins_encode %{
5415 __ popcntl($dst$$Register, $src$$Register);
5416 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5417 __ addl($dst$$Register, $tmp$$Register);
5418 %}
5419 ins_pipe(ialu_reg);
5420 %}
5421
5422 // Note: Long.bitCount(long) returns an int.
5423 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5424 predicate(UsePopCountInstruction);
5425 match(Set dst (PopCountL (LoadL mem)));
5426 effect(KILL cr, TEMP tmp, TEMP dst);
5427
5428 format %{ "POPCNT $dst, $mem\n\t"
5429 "POPCNT $tmp, $mem+4\n\t"
5430 "ADD $dst, $tmp" %}
5431 ins_encode %{
5432 //__ popcntl($dst$$Register, $mem$$Address$$first);
5433 //__ popcntl($tmp$$Register, $mem$$Address$$second);
5434 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5435 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5436 __ addl($dst$$Register, $tmp$$Register);
5437 %}
5438 ins_pipe(ialu_reg);
5439 %}
5440
5441
5442 //----------Load/Store/Move Instructions---------------------------------------
5443 //----------Load Instructions--------------------------------------------------
5444 // Load Byte (8bit signed)
5445 instruct loadB(xRegI dst, memory mem) %{
5446 match(Set dst (LoadB mem));
5447
5448 ins_cost(125);
5449 format %{ "MOVSX8 $dst,$mem\t# byte" %}
5450
5451 ins_encode %{
5452 __ movsbl($dst$$Register, $mem$$Address);
5453 %}
5454
5455 ins_pipe(ialu_reg_mem);
5456 %}
5457
5458 // Load Byte (8bit signed) into Long Register
5459 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5460 match(Set dst (ConvI2L (LoadB mem)));
5461 effect(KILL cr);
5462
5463 ins_cost(375);
5464 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5465 "MOV $dst.hi,$dst.lo\n\t"
5466 "SAR $dst.hi,7" %}
5467
5468 ins_encode %{
5469 __ movsbl($dst$$Register, $mem$$Address);
5470 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5471 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5472 %}
5473
5474 ins_pipe(ialu_reg_mem);
5475 %}
5476
5477 // Load Unsigned Byte (8bit UNsigned)
5478 instruct loadUB(xRegI dst, memory mem) %{
5479 match(Set dst (LoadUB mem));
5480
5481 ins_cost(125);
5482 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5483
5484 ins_encode %{
5485 __ movzbl($dst$$Register, $mem$$Address);
5486 %}
5487
5488 ins_pipe(ialu_reg_mem);
5489 %}
5490
5491 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5492 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5493 match(Set dst (ConvI2L (LoadUB mem)));
5494 effect(KILL cr);
5495
5496 ins_cost(250);
5497 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5498 "XOR $dst.hi,$dst.hi" %}
5499
5500 ins_encode %{
5501 Register Rdst = $dst$$Register;
5502 __ movzbl(Rdst, $mem$$Address);
5503 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5504 %}
5505
5506 ins_pipe(ialu_reg_mem);
5507 %}
5508
5509 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5510 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5511 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5512 effect(KILL cr);
5513
5514 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5515 "XOR $dst.hi,$dst.hi\n\t"
5516 "AND $dst.lo,right_n_bits($mask, 8)" %}
5517 ins_encode %{
5518 Register Rdst = $dst$$Register;
5519 __ movzbl(Rdst, $mem$$Address);
5520 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5521 __ andl(Rdst, $mask$$constant & right_n_bits(8));
5522 %}
5523 ins_pipe(ialu_reg_mem);
5524 %}
5525
5526 // Load Short (16bit signed)
5527 instruct loadS(rRegI dst, memory mem) %{
5528 match(Set dst (LoadS mem));
5529
5530 ins_cost(125);
5531 format %{ "MOVSX $dst,$mem\t# short" %}
5532
5533 ins_encode %{
5534 __ movswl($dst$$Register, $mem$$Address);
5535 %}
5536
5537 ins_pipe(ialu_reg_mem);
5538 %}
5539
5540 // Load Short (16 bit signed) to Byte (8 bit signed)
5541 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5542 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5543
5544 ins_cost(125);
5545 format %{ "MOVSX $dst, $mem\t# short -> byte" %}
5546 ins_encode %{
5547 __ movsbl($dst$$Register, $mem$$Address);
5548 %}
5549 ins_pipe(ialu_reg_mem);
5550 %}
5551
5552 // Load Short (16bit signed) into Long Register
5553 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5554 match(Set dst (ConvI2L (LoadS mem)));
5555 effect(KILL cr);
5556
5557 ins_cost(375);
5558 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t"
5559 "MOV $dst.hi,$dst.lo\n\t"
5560 "SAR $dst.hi,15" %}
5561
5562 ins_encode %{
5563 __ movswl($dst$$Register, $mem$$Address);
5564 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5565 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5566 %}
5567
5568 ins_pipe(ialu_reg_mem);
5569 %}
5570
5571 // Load Unsigned Short/Char (16bit unsigned)
5572 instruct loadUS(rRegI dst, memory mem) %{
5573 match(Set dst (LoadUS mem));
5574
5575 ins_cost(125);
5576 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %}
5577
5578 ins_encode %{
5579 __ movzwl($dst$$Register, $mem$$Address);
5580 %}
5581
5582 ins_pipe(ialu_reg_mem);
5583 %}
5584
5585 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5586 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5587 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5588
5589 ins_cost(125);
5590 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %}
5591 ins_encode %{
5592 __ movsbl($dst$$Register, $mem$$Address);
5593 %}
5594 ins_pipe(ialu_reg_mem);
5595 %}
5596
5597 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5598 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5599 match(Set dst (ConvI2L (LoadUS mem)));
5600 effect(KILL cr);
5601
5602 ins_cost(250);
5603 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t"
5604 "XOR $dst.hi,$dst.hi" %}
5605
5606 ins_encode %{
5607 __ movzwl($dst$$Register, $mem$$Address);
5608 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5609 %}
5610
5611 ins_pipe(ialu_reg_mem);
5612 %}
5613
5614 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5615 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5616 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5617 effect(KILL cr);
5618
5619 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5620 "XOR $dst.hi,$dst.hi" %}
5621 ins_encode %{
5622 Register Rdst = $dst$$Register;
5623 __ movzbl(Rdst, $mem$$Address);
5624 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5625 %}
5626 ins_pipe(ialu_reg_mem);
5627 %}
5628
5629 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5630 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5631 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5632 effect(KILL cr);
5633
5634 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5635 "XOR $dst.hi,$dst.hi\n\t"
5636 "AND $dst.lo,right_n_bits($mask, 16)" %}
5637 ins_encode %{
5638 Register Rdst = $dst$$Register;
5639 __ movzwl(Rdst, $mem$$Address);
5640 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5641 __ andl(Rdst, $mask$$constant & right_n_bits(16));
5642 %}
5643 ins_pipe(ialu_reg_mem);
5644 %}
5645
5646 // Load Integer
5647 instruct loadI(rRegI dst, memory mem) %{
5648 match(Set dst (LoadI mem));
5649
5650 ins_cost(125);
5651 format %{ "MOV $dst,$mem\t# int" %}
5652
5653 ins_encode %{
5654 __ movl($dst$$Register, $mem$$Address);
5655 %}
5656
5657 ins_pipe(ialu_reg_mem);
5658 %}
5659
5660 // Load Integer (32 bit signed) to Byte (8 bit signed)
5661 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5662 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5663
5664 ins_cost(125);
5665 format %{ "MOVSX $dst, $mem\t# int -> byte" %}
5666 ins_encode %{
5667 __ movsbl($dst$$Register, $mem$$Address);
5668 %}
5669 ins_pipe(ialu_reg_mem);
5670 %}
5671
5672 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5673 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5674 match(Set dst (AndI (LoadI mem) mask));
5675
5676 ins_cost(125);
5677 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %}
5678 ins_encode %{
5679 __ movzbl($dst$$Register, $mem$$Address);
5680 %}
5681 ins_pipe(ialu_reg_mem);
5682 %}
5683
5684 // Load Integer (32 bit signed) to Short (16 bit signed)
5685 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5686 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5687
5688 ins_cost(125);
5689 format %{ "MOVSX $dst, $mem\t# int -> short" %}
5690 ins_encode %{
5691 __ movswl($dst$$Register, $mem$$Address);
5692 %}
5693 ins_pipe(ialu_reg_mem);
5694 %}
5695
5696 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5697 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5698 match(Set dst (AndI (LoadI mem) mask));
5699
5700 ins_cost(125);
5701 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %}
5702 ins_encode %{
5703 __ movzwl($dst$$Register, $mem$$Address);
5704 %}
5705 ins_pipe(ialu_reg_mem);
5706 %}
5707
5708 // Load Integer into Long Register
5709 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5710 match(Set dst (ConvI2L (LoadI mem)));
5711 effect(KILL cr);
5712
5713 ins_cost(375);
5714 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t"
5715 "MOV $dst.hi,$dst.lo\n\t"
5716 "SAR $dst.hi,31" %}
5717
5718 ins_encode %{
5719 __ movl($dst$$Register, $mem$$Address);
5720 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5721 __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5722 %}
5723
5724 ins_pipe(ialu_reg_mem);
5725 %}
5726
5727 // Load Integer with mask 0xFF into Long Register
5728 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5729 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5730 effect(KILL cr);
5731
5732 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5733 "XOR $dst.hi,$dst.hi" %}
5734 ins_encode %{
5735 Register Rdst = $dst$$Register;
5736 __ movzbl(Rdst, $mem$$Address);
5737 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5738 %}
5739 ins_pipe(ialu_reg_mem);
5740 %}
5741
5742 // Load Integer with mask 0xFFFF into Long Register
5743 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5744 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5745 effect(KILL cr);
5746
5747 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5748 "XOR $dst.hi,$dst.hi" %}
5749 ins_encode %{
5750 Register Rdst = $dst$$Register;
5751 __ movzwl(Rdst, $mem$$Address);
5752 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5753 %}
5754 ins_pipe(ialu_reg_mem);
5755 %}
5756
5757 // Load Integer with 31-bit mask into Long Register
5758 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5759 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5760 effect(KILL cr);
5761
5762 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5763 "XOR $dst.hi,$dst.hi\n\t"
5764 "AND $dst.lo,$mask" %}
5765 ins_encode %{
5766 Register Rdst = $dst$$Register;
5767 __ movl(Rdst, $mem$$Address);
5768 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5769 __ andl(Rdst, $mask$$constant);
5770 %}
5771 ins_pipe(ialu_reg_mem);
5772 %}
5773
5774 // Load Unsigned Integer into Long Register
5775 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5776 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5777 effect(KILL cr);
5778
5779 ins_cost(250);
5780 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t"
5781 "XOR $dst.hi,$dst.hi" %}
5782
5783 ins_encode %{
5784 __ movl($dst$$Register, $mem$$Address);
5785 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5786 %}
5787
5788 ins_pipe(ialu_reg_mem);
5789 %}
5790
5791 // Load Long. Cannot clobber address while loading, so restrict address
5792 // register to ESI
5793 instruct loadL(eRegL dst, load_long_memory mem) %{
5794 predicate(!((LoadLNode*)n)->require_atomic_access());
5795 match(Set dst (LoadL mem));
5796
5797 ins_cost(250);
5798 format %{ "MOV $dst.lo,$mem\t# long\n\t"
5799 "MOV $dst.hi,$mem+4" %}
5800
5801 ins_encode %{
5802 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5803 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5804 __ movl($dst$$Register, Amemlo);
5805 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5806 %}
5807
5808 ins_pipe(ialu_reg_long_mem);
5809 %}
5810
5811 // Volatile Load Long. Must be atomic, so do 64-bit FILD
5812 // then store it down to the stack and reload on the int
5813 // side.
5814 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5815 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5816 match(Set dst (LoadL mem));
5817
5818 ins_cost(200);
5819 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
5820 "FISTp $dst" %}
5821 ins_encode(enc_loadL_volatile(mem,dst));
5822 ins_pipe( fpu_reg_mem );
5823 %}
5824
5825 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5826 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5827 match(Set dst (LoadL mem));
5828 effect(TEMP tmp);
5829 ins_cost(180);
5830 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
5831 "MOVSD $dst,$tmp" %}
5832 ins_encode %{
5833 __ movdbl($tmp$$XMMRegister, $mem$$Address);
5834 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5835 %}
5836 ins_pipe( pipe_slow );
5837 %}
5838
5839 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5840 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5841 match(Set dst (LoadL mem));
5842 effect(TEMP tmp);
5843 ins_cost(160);
5844 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
5845 "MOVD $dst.lo,$tmp\n\t"
5846 "PSRLQ $tmp,32\n\t"
5847 "MOVD $dst.hi,$tmp" %}
5848 ins_encode %{
5849 __ movdbl($tmp$$XMMRegister, $mem$$Address);
5850 __ movdl($dst$$Register, $tmp$$XMMRegister);
5851 __ psrlq($tmp$$XMMRegister, 32);
5852 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5853 %}
5854 ins_pipe( pipe_slow );
5855 %}
5856
5857 // Load Range
5858 instruct loadRange(rRegI dst, memory mem) %{
5859 match(Set dst (LoadRange mem));
5860
5861 ins_cost(125);
5862 format %{ "MOV $dst,$mem" %}
5863 opcode(0x8B);
5864 ins_encode( OpcP, RegMem(dst,mem));
5865 ins_pipe( ialu_reg_mem );
5866 %}
5867
5868
5869 // Load Pointer
5870 instruct loadP(eRegP dst, memory mem) %{
5871 match(Set dst (LoadP mem));
5872
5873 ins_cost(125);
5874 format %{ "MOV $dst,$mem" %}
5875 opcode(0x8B);
5876 ins_encode( OpcP, RegMem(dst,mem));
5877 ins_pipe( ialu_reg_mem );
5878 %}
5879
5880 // Load Klass Pointer
5881 instruct loadKlass(eRegP dst, memory mem) %{
5882 match(Set dst (LoadKlass mem));
5883
5884 ins_cost(125);
5885 format %{ "MOV $dst,$mem" %}
5886 opcode(0x8B);
5887 ins_encode( OpcP, RegMem(dst,mem));
5888 ins_pipe( ialu_reg_mem );
5889 %}
5890
5891 // Load Double
5892 instruct loadDPR(regDPR dst, memory mem) %{
5893 predicate(UseSSE<=1);
5894 match(Set dst (LoadD mem));
5895
5896 ins_cost(150);
5897 format %{ "FLD_D ST,$mem\n\t"
5898 "FSTP $dst" %}
5899 opcode(0xDD); /* DD /0 */
5900 ins_encode( OpcP, RMopc_Mem(0x00,mem),
5901 Pop_Reg_DPR(dst) );
5902 ins_pipe( fpu_reg_mem );
5903 %}
5904
5905 // Load Double to XMM
5906 instruct loadD(regD dst, memory mem) %{
5907 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5908 match(Set dst (LoadD mem));
5909 ins_cost(145);
5910 format %{ "MOVSD $dst,$mem" %}
5911 ins_encode %{
5912 __ movdbl ($dst$$XMMRegister, $mem$$Address);
5913 %}
5914 ins_pipe( pipe_slow );
5915 %}
5916
5917 instruct loadD_partial(regD dst, memory mem) %{
5918 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5919 match(Set dst (LoadD mem));
5920 ins_cost(145);
5921 format %{ "MOVLPD $dst,$mem" %}
5922 ins_encode %{
5923 __ movdbl ($dst$$XMMRegister, $mem$$Address);
5924 %}
5925 ins_pipe( pipe_slow );
5926 %}
5927
5928 // Load to XMM register (single-precision floating point)
5929 // MOVSS instruction
5930 instruct loadF(regF dst, memory mem) %{
5931 predicate(UseSSE>=1);
5932 match(Set dst (LoadF mem));
5933 ins_cost(145);
5934 format %{ "MOVSS $dst,$mem" %}
5935 ins_encode %{
5936 __ movflt ($dst$$XMMRegister, $mem$$Address);
5937 %}
5938 ins_pipe( pipe_slow );
5939 %}
5940
5941 // Load Float
5942 instruct loadFPR(regFPR dst, memory mem) %{
5943 predicate(UseSSE==0);
5944 match(Set dst (LoadF mem));
5945
5946 ins_cost(150);
5947 format %{ "FLD_S ST,$mem\n\t"
5948 "FSTP $dst" %}
5949 opcode(0xD9); /* D9 /0 */
5950 ins_encode( OpcP, RMopc_Mem(0x00,mem),
5951 Pop_Reg_FPR(dst) );
5952 ins_pipe( fpu_reg_mem );
5953 %}
5954
5955 // Load Effective Address
5956 instruct leaP8(eRegP dst, indOffset8 mem) %{
5957 match(Set dst mem);
5958
5959 ins_cost(110);
5960 format %{ "LEA $dst,$mem" %}
5961 opcode(0x8D);
5962 ins_encode( OpcP, RegMem(dst,mem));
5963 ins_pipe( ialu_reg_reg_fat );
5964 %}
5965
5966 instruct leaP32(eRegP dst, indOffset32 mem) %{
5967 match(Set dst mem);
5968
5969 ins_cost(110);
5970 format %{ "LEA $dst,$mem" %}
5971 opcode(0x8D);
5972 ins_encode( OpcP, RegMem(dst,mem));
5973 ins_pipe( ialu_reg_reg_fat );
5974 %}
5975
5976 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5977 match(Set dst mem);
5978
5979 ins_cost(110);
5980 format %{ "LEA $dst,$mem" %}
5981 opcode(0x8D);
5982 ins_encode( OpcP, RegMem(dst,mem));
5983 ins_pipe( ialu_reg_reg_fat );
5984 %}
5985
5986 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5987 match(Set dst mem);
5988
5989 ins_cost(110);
5990 format %{ "LEA $dst,$mem" %}
5991 opcode(0x8D);
5992 ins_encode( OpcP, RegMem(dst,mem));
5993 ins_pipe( ialu_reg_reg_fat );
5994 %}
5995
5996 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5997 match(Set dst mem);
5998
5999 ins_cost(110);
6000 format %{ "LEA $dst,$mem" %}
6001 opcode(0x8D);
6002 ins_encode( OpcP, RegMem(dst,mem));
6003 ins_pipe( ialu_reg_reg_fat );
6004 %}
6005
6006 // Load Constant
6007 instruct loadConI(rRegI dst, immI src) %{
6008 match(Set dst src);
6009
6010 format %{ "MOV $dst,$src" %}
6011 ins_encode( LdImmI(dst, src) );
6012 ins_pipe( ialu_reg_fat );
6013 %}
6014
6015 // Load Constant zero
6016 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
6017 match(Set dst src);
6018 effect(KILL cr);
6019
6020 ins_cost(50);
6021 format %{ "XOR $dst,$dst" %}
6022 opcode(0x33); /* + rd */
6023 ins_encode( OpcP, RegReg( dst, dst ) );
6024 ins_pipe( ialu_reg );
6025 %}
6026
6027 instruct loadConP(eRegP dst, immP src) %{
6028 match(Set dst src);
6029
6030 format %{ "MOV $dst,$src" %}
6031 opcode(0xB8); /* + rd */
6032 ins_encode( LdImmP(dst, src) );
6033 ins_pipe( ialu_reg_fat );
6034 %}
6035
6036 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6037 match(Set dst src);
6038 effect(KILL cr);
6039 ins_cost(200);
6040 format %{ "MOV $dst.lo,$src.lo\n\t"
6041 "MOV $dst.hi,$src.hi" %}
6042 opcode(0xB8);
6043 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6044 ins_pipe( ialu_reg_long_fat );
6045 %}
6046
6047 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6048 match(Set dst src);
6049 effect(KILL cr);
6050 ins_cost(150);
6051 format %{ "XOR $dst.lo,$dst.lo\n\t"
6052 "XOR $dst.hi,$dst.hi" %}
6053 opcode(0x33,0x33);
6054 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6055 ins_pipe( ialu_reg_long );
6056 %}
6057
6058 // The instruction usage is guarded by predicate in operand immFPR().
6059 instruct loadConFPR(regFPR dst, immFPR con) %{
6060 match(Set dst con);
6061 ins_cost(125);
6062 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6063 "FSTP $dst" %}
6064 ins_encode %{
6065 __ fld_s($constantaddress($con));
6066 __ fstp_d($dst$$reg);
6067 %}
6068 ins_pipe(fpu_reg_con);
6069 %}
6070
6071 // The instruction usage is guarded by predicate in operand immFPR0().
6072 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6073 match(Set dst con);
6074 ins_cost(125);
6075 format %{ "FLDZ ST\n\t"
6076 "FSTP $dst" %}
6077 ins_encode %{
6078 __ fldz();
6079 __ fstp_d($dst$$reg);
6080 %}
6081 ins_pipe(fpu_reg_con);
6082 %}
6083
6084 // The instruction usage is guarded by predicate in operand immFPR1().
6085 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6086 match(Set dst con);
6087 ins_cost(125);
6088 format %{ "FLD1 ST\n\t"
6089 "FSTP $dst" %}
6090 ins_encode %{
6091 __ fld1();
6092 __ fstp_d($dst$$reg);
6093 %}
6094 ins_pipe(fpu_reg_con);
6095 %}
6096
6097 // The instruction usage is guarded by predicate in operand immF().
6098 instruct loadConF(regF dst, immF con) %{
6099 match(Set dst con);
6100 ins_cost(125);
6101 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6102 ins_encode %{
6103 __ movflt($dst$$XMMRegister, $constantaddress($con));
6104 %}
6105 ins_pipe(pipe_slow);
6106 %}
6107
6108 // The instruction usage is guarded by predicate in operand immF0().
6109 instruct loadConF0(regF dst, immF0 src) %{
6110 match(Set dst src);
6111 ins_cost(100);
6112 format %{ "XORPS $dst,$dst\t# float 0.0" %}
6113 ins_encode %{
6114 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6115 %}
6116 ins_pipe(pipe_slow);
6117 %}
6118
6119 // The instruction usage is guarded by predicate in operand immDPR().
6120 instruct loadConDPR(regDPR dst, immDPR con) %{
6121 match(Set dst con);
6122 ins_cost(125);
6123
6124 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6125 "FSTP $dst" %}
6126 ins_encode %{
6127 __ fld_d($constantaddress($con));
6128 __ fstp_d($dst$$reg);
6129 %}
6130 ins_pipe(fpu_reg_con);
6131 %}
6132
6133 // The instruction usage is guarded by predicate in operand immDPR0().
6134 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6135 match(Set dst con);
6136 ins_cost(125);
6137
6138 format %{ "FLDZ ST\n\t"
6139 "FSTP $dst" %}
6140 ins_encode %{
6141 __ fldz();
6142 __ fstp_d($dst$$reg);
6143 %}
6144 ins_pipe(fpu_reg_con);
6145 %}
6146
6147 // The instruction usage is guarded by predicate in operand immDPR1().
6148 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6149 match(Set dst con);
6150 ins_cost(125);
6151
6152 format %{ "FLD1 ST\n\t"
6153 "FSTP $dst" %}
6154 ins_encode %{
6155 __ fld1();
6156 __ fstp_d($dst$$reg);
6157 %}
6158 ins_pipe(fpu_reg_con);
6159 %}
6160
6161 // The instruction usage is guarded by predicate in operand immD().
6162 instruct loadConD(regD dst, immD con) %{
6163 match(Set dst con);
6164 ins_cost(125);
6165 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6166 ins_encode %{
6167 __ movdbl($dst$$XMMRegister, $constantaddress($con));
6168 %}
6169 ins_pipe(pipe_slow);
6170 %}
6171
6172 // The instruction usage is guarded by predicate in operand immD0().
6173 instruct loadConD0(regD dst, immD0 src) %{
6174 match(Set dst src);
6175 ins_cost(100);
6176 format %{ "XORPD $dst,$dst\t# double 0.0" %}
6177 ins_encode %{
6178 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6179 %}
6180 ins_pipe( pipe_slow );
6181 %}
6182
6183 // Load Stack Slot
6184 instruct loadSSI(rRegI dst, stackSlotI src) %{
6185 match(Set dst src);
6186 ins_cost(125);
6187
6188 format %{ "MOV $dst,$src" %}
6189 opcode(0x8B);
6190 ins_encode( OpcP, RegMem(dst,src));
6191 ins_pipe( ialu_reg_mem );
6192 %}
6193
6194 instruct loadSSL(eRegL dst, stackSlotL src) %{
6195 match(Set dst src);
6196
6197 ins_cost(200);
6198 format %{ "MOV $dst,$src.lo\n\t"
6199 "MOV $dst+4,$src.hi" %}
6200 opcode(0x8B, 0x8B);
6201 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6202 ins_pipe( ialu_mem_long_reg );
6203 %}
6204
6205 // Load Stack Slot
6206 instruct loadSSP(eRegP dst, stackSlotP src) %{
6207 match(Set dst src);
6208 ins_cost(125);
6209
6210 format %{ "MOV $dst,$src" %}
6211 opcode(0x8B);
6212 ins_encode( OpcP, RegMem(dst,src));
6213 ins_pipe( ialu_reg_mem );
6214 %}
6215
6216 // Load Stack Slot
6217 instruct loadSSF(regFPR dst, stackSlotF src) %{
6218 match(Set dst src);
6219 ins_cost(125);
6220
6221 format %{ "FLD_S $src\n\t"
6222 "FSTP $dst" %}
6223 opcode(0xD9); /* D9 /0, FLD m32real */
6224 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6225 Pop_Reg_FPR(dst) );
6226 ins_pipe( fpu_reg_mem );
6227 %}
6228
6229 // Load Stack Slot
6230 instruct loadSSD(regDPR dst, stackSlotD src) %{
6231 match(Set dst src);
6232 ins_cost(125);
6233
6234 format %{ "FLD_D $src\n\t"
6235 "FSTP $dst" %}
6236 opcode(0xDD); /* DD /0, FLD m64real */
6237 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6238 Pop_Reg_DPR(dst) );
6239 ins_pipe( fpu_reg_mem );
6240 %}
6241
6242 // Prefetch instructions for allocation.
6243 // Must be safe to execute with invalid address (cannot fault).
6244
6245 instruct prefetchAlloc0( memory mem ) %{
6246 predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6247 match(PrefetchAllocation mem);
6248 ins_cost(0);
6249 size(0);
6250 format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6251 ins_encode();
6252 ins_pipe(empty);
6253 %}
6254
6255 instruct prefetchAlloc( memory mem ) %{
6256 predicate(AllocatePrefetchInstr==3);
6257 match( PrefetchAllocation mem );
6258 ins_cost(100);
6259
6260 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6261 ins_encode %{
6262 __ prefetchw($mem$$Address);
6263 %}
6264 ins_pipe(ialu_mem);
6265 %}
6266
6267 instruct prefetchAllocNTA( memory mem ) %{
6268 predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6269 match(PrefetchAllocation mem);
6270 ins_cost(100);
6271
6272 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6273 ins_encode %{
6274 __ prefetchnta($mem$$Address);
6275 %}
6276 ins_pipe(ialu_mem);
6277 %}
6278
6279 instruct prefetchAllocT0( memory mem ) %{
6280 predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6281 match(PrefetchAllocation mem);
6282 ins_cost(100);
6283
6284 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6285 ins_encode %{
6286 __ prefetcht0($mem$$Address);
6287 %}
6288 ins_pipe(ialu_mem);
6289 %}
6290
6291 instruct prefetchAllocT2( memory mem ) %{
6292 predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6293 match(PrefetchAllocation mem);
6294 ins_cost(100);
6295
6296 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6297 ins_encode %{
6298 __ prefetcht2($mem$$Address);
6299 %}
6300 ins_pipe(ialu_mem);
6301 %}
6302
6303 //----------Store Instructions-------------------------------------------------
6304
6305 // Store Byte
6306 instruct storeB(memory mem, xRegI src) %{
6307 match(Set mem (StoreB mem src));
6308
6309 ins_cost(125);
6310 format %{ "MOV8 $mem,$src" %}
6311 opcode(0x88);
6312 ins_encode( OpcP, RegMem( src, mem ) );
6313 ins_pipe( ialu_mem_reg );
6314 %}
6315
6316 // Store Char/Short
6317 instruct storeC(memory mem, rRegI src) %{
6318 match(Set mem (StoreC mem src));
6319
6320 ins_cost(125);
6321 format %{ "MOV16 $mem,$src" %}
6322 opcode(0x89, 0x66);
6323 ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6324 ins_pipe( ialu_mem_reg );
6325 %}
6326
6327 // Store Integer
6328 instruct storeI(memory mem, rRegI src) %{
6329 match(Set mem (StoreI mem src));
6330
6331 ins_cost(125);
6332 format %{ "MOV $mem,$src" %}
6333 opcode(0x89);
6334 ins_encode( OpcP, RegMem( src, mem ) );
6335 ins_pipe( ialu_mem_reg );
6336 %}
6337
6338 // Store Long
6339 instruct storeL(long_memory mem, eRegL src) %{
6340 predicate(!((StoreLNode*)n)->require_atomic_access());
6341 match(Set mem (StoreL mem src));
6342
6343 ins_cost(200);
6344 format %{ "MOV $mem,$src.lo\n\t"
6345 "MOV $mem+4,$src.hi" %}
6346 opcode(0x89, 0x89);
6347 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6348 ins_pipe( ialu_mem_long_reg );
6349 %}
6350
6351 // Store Long to Integer
6352 instruct storeL2I(memory mem, eRegL src) %{
6353 match(Set mem (StoreI mem (ConvL2I src)));
6354
6355 format %{ "MOV $mem,$src.lo\t# long -> int" %}
6356 ins_encode %{
6357 __ movl($mem$$Address, $src$$Register);
6358 %}
6359 ins_pipe(ialu_mem_reg);
6360 %}
6361
6362 // Volatile Store Long. Must be atomic, so move it into
6363 // the FP TOS and then do a 64-bit FIST. Has to probe the
6364 // target address before the store (for null-ptr checks)
6365 // so the memory operand is used twice in the encoding.
6366 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6367 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6368 match(Set mem (StoreL mem src));
6369 effect( KILL cr );
6370 ins_cost(400);
6371 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6372 "FILD $src\n\t"
6373 "FISTp $mem\t # 64-bit atomic volatile long store" %}
6374 opcode(0x3B);
6375 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6376 ins_pipe( fpu_reg_mem );
6377 %}
6378
6379 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6380 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6381 match(Set mem (StoreL mem src));
6382 effect( TEMP tmp, KILL cr );
6383 ins_cost(380);
6384 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6385 "MOVSD $tmp,$src\n\t"
6386 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6387 ins_encode %{
6388 __ cmpl(rax, $mem$$Address);
6389 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6390 __ movdbl($mem$$Address, $tmp$$XMMRegister);
6391 %}
6392 ins_pipe( pipe_slow );
6393 %}
6394
6395 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6396 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6397 match(Set mem (StoreL mem src));
6398 effect( TEMP tmp2 , TEMP tmp, KILL cr );
6399 ins_cost(360);
6400 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6401 "MOVD $tmp,$src.lo\n\t"
6402 "MOVD $tmp2,$src.hi\n\t"
6403 "PUNPCKLDQ $tmp,$tmp2\n\t"
6404 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6405 ins_encode %{
6406 __ cmpl(rax, $mem$$Address);
6407 __ movdl($tmp$$XMMRegister, $src$$Register);
6408 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6409 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6410 __ movdbl($mem$$Address, $tmp$$XMMRegister);
6411 %}
6412 ins_pipe( pipe_slow );
6413 %}
6414
6415 // Store Pointer; for storing unknown oops and raw pointers
6416 instruct storeP(memory mem, anyRegP src) %{
6417 match(Set mem (StoreP mem src));
6418
6419 ins_cost(125);
6420 format %{ "MOV $mem,$src" %}
6421 opcode(0x89);
6422 ins_encode( OpcP, RegMem( src, mem ) );
6423 ins_pipe( ialu_mem_reg );
6424 %}
6425
6426 // Store Integer Immediate
6427 instruct storeImmI(memory mem, immI src) %{
6428 match(Set mem (StoreI mem src));
6429
6430 ins_cost(150);
6431 format %{ "MOV $mem,$src" %}
6432 opcode(0xC7); /* C7 /0 */
6433 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
6434 ins_pipe( ialu_mem_imm );
6435 %}
6436
6437 // Store Short/Char Immediate
6438 instruct storeImmI16(memory mem, immI16 src) %{
6439 predicate(UseStoreImmI16);
6440 match(Set mem (StoreC mem src));
6441
6442 ins_cost(150);
6443 format %{ "MOV16 $mem,$src" %}
6444 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6445 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src ));
6446 ins_pipe( ialu_mem_imm );
6447 %}
6448
6449 // Store Pointer Immediate; null pointers or constant oops that do not
6450 // need card-mark barriers.
6451 instruct storeImmP(memory mem, immP src) %{
6452 match(Set mem (StoreP mem src));
6453
6454 ins_cost(150);
6455 format %{ "MOV $mem,$src" %}
6456 opcode(0xC7); /* C7 /0 */
6457 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src ));
6458 ins_pipe( ialu_mem_imm );
6459 %}
6460
6461 // Store Byte Immediate
6462 instruct storeImmB(memory mem, immI8 src) %{
6463 match(Set mem (StoreB mem src));
6464
6465 ins_cost(150);
6466 format %{ "MOV8 $mem,$src" %}
6467 opcode(0xC6); /* C6 /0 */
6468 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
6469 ins_pipe( ialu_mem_imm );
6470 %}
6471
6472 // Store CMS card-mark Immediate
6473 instruct storeImmCM(memory mem, immI8 src) %{
6474 match(Set mem (StoreCM mem src));
6475
6476 ins_cost(150);
6477 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
6478 opcode(0xC6); /* C6 /0 */
6479 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
6480 ins_pipe( ialu_mem_imm );
6481 %}
6482
6483 // Store Double
6484 instruct storeDPR( memory mem, regDPR1 src) %{
6485 predicate(UseSSE<=1);
6486 match(Set mem (StoreD mem src));
6487
6488 ins_cost(100);
6489 format %{ "FST_D $mem,$src" %}
6490 opcode(0xDD); /* DD /2 */
6491 ins_encode( enc_FPR_store(mem,src) );
6492 ins_pipe( fpu_mem_reg );
6493 %}
6494
6495 // Store double does rounding on x86
6496 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6497 predicate(UseSSE<=1);
6498 match(Set mem (StoreD mem (RoundDouble src)));
6499
6500 ins_cost(100);
6501 format %{ "FST_D $mem,$src\t# round" %}
6502 opcode(0xDD); /* DD /2 */
6503 ins_encode( enc_FPR_store(mem,src) );
6504 ins_pipe( fpu_mem_reg );
6505 %}
6506
6507 // Store XMM register to memory (double-precision floating points)
6508 // MOVSD instruction
6509 instruct storeD(memory mem, regD src) %{
6510 predicate(UseSSE>=2);
6511 match(Set mem (StoreD mem src));
6512 ins_cost(95);
6513 format %{ "MOVSD $mem,$src" %}
6514 ins_encode %{
6515 __ movdbl($mem$$Address, $src$$XMMRegister);
6516 %}
6517 ins_pipe( pipe_slow );
6518 %}
6519
6520 // Store XMM register to memory (single-precision floating point)
6521 // MOVSS instruction
6522 instruct storeF(memory mem, regF src) %{
6523 predicate(UseSSE>=1);
6524 match(Set mem (StoreF mem src));
6525 ins_cost(95);
6526 format %{ "MOVSS $mem,$src" %}
6527 ins_encode %{
6528 __ movflt($mem$$Address, $src$$XMMRegister);
6529 %}
6530 ins_pipe( pipe_slow );
6531 %}
6532
6533
6534 // Store Float
6535 instruct storeFPR( memory mem, regFPR1 src) %{
6536 predicate(UseSSE==0);
6537 match(Set mem (StoreF mem src));
6538
6539 ins_cost(100);
6540 format %{ "FST_S $mem,$src" %}
6541 opcode(0xD9); /* D9 /2 */
6542 ins_encode( enc_FPR_store(mem,src) );
6543 ins_pipe( fpu_mem_reg );
6544 %}
6545
6546 // Store Float does rounding on x86
6547 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6548 predicate(UseSSE==0);
6549 match(Set mem (StoreF mem (RoundFloat src)));
6550
6551 ins_cost(100);
6552 format %{ "FST_S $mem,$src\t# round" %}
6553 opcode(0xD9); /* D9 /2 */
6554 ins_encode( enc_FPR_store(mem,src) );
6555 ins_pipe( fpu_mem_reg );
6556 %}
6557
6558 // Store Float does rounding on x86
6559 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6560 predicate(UseSSE<=1);
6561 match(Set mem (StoreF mem (ConvD2F src)));
6562
6563 ins_cost(100);
6564 format %{ "FST_S $mem,$src\t# D-round" %}
6565 opcode(0xD9); /* D9 /2 */
6566 ins_encode( enc_FPR_store(mem,src) );
6567 ins_pipe( fpu_mem_reg );
6568 %}
6569
6570 // Store immediate Float value (it is faster than store from FPU register)
6571 // The instruction usage is guarded by predicate in operand immFPR().
6572 instruct storeFPR_imm( memory mem, immFPR src) %{
6573 match(Set mem (StoreF mem src));
6574
6575 ins_cost(50);
6576 format %{ "MOV $mem,$src\t# store float" %}
6577 opcode(0xC7); /* C7 /0 */
6578 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src ));
6579 ins_pipe( ialu_mem_imm );
6580 %}
6581
6582 // Store immediate Float value (it is faster than store from XMM register)
6583 // The instruction usage is guarded by predicate in operand immF().
6584 instruct storeF_imm( memory mem, immF src) %{
6585 match(Set mem (StoreF mem src));
6586
6587 ins_cost(50);
6588 format %{ "MOV $mem,$src\t# store float" %}
6589 opcode(0xC7); /* C7 /0 */
6590 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
6591 ins_pipe( ialu_mem_imm );
6592 %}
6593
6594 // Store Integer to stack slot
6595 instruct storeSSI(stackSlotI dst, rRegI src) %{
6596 match(Set dst src);
6597
6598 ins_cost(100);
6599 format %{ "MOV $dst,$src" %}
6600 opcode(0x89);
6601 ins_encode( OpcPRegSS( dst, src ) );
6602 ins_pipe( ialu_mem_reg );
6603 %}
6604
6605 // Store Integer to stack slot
6606 instruct storeSSP(stackSlotP dst, eRegP src) %{
6607 match(Set dst src);
6608
6609 ins_cost(100);
6610 format %{ "MOV $dst,$src" %}
6611 opcode(0x89);
6612 ins_encode( OpcPRegSS( dst, src ) );
6613 ins_pipe( ialu_mem_reg );
6614 %}
6615
6616 // Store Long to stack slot
6617 instruct storeSSL(stackSlotL dst, eRegL src) %{
6618 match(Set dst src);
6619
6620 ins_cost(200);
6621 format %{ "MOV $dst,$src.lo\n\t"
6622 "MOV $dst+4,$src.hi" %}
6623 opcode(0x89, 0x89);
6624 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6625 ins_pipe( ialu_mem_long_reg );
6626 %}
6627
6628 //----------MemBar Instructions-----------------------------------------------
6629 // Memory barrier flavors
6630
6631 instruct membar_acquire() %{
6632 match(MemBarAcquire);
6633 match(LoadFence);
6634 ins_cost(400);
6635
6636 size(0);
6637 format %{ "MEMBAR-acquire ! (empty encoding)" %}
6638 ins_encode();
6639 ins_pipe(empty);
6640 %}
6641
6642 instruct membar_acquire_lock() %{
6643 match(MemBarAcquireLock);
6644 ins_cost(0);
6645
6646 size(0);
6647 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6648 ins_encode( );
6649 ins_pipe(empty);
6650 %}
6651
6652 instruct membar_release() %{
6653 match(MemBarRelease);
6654 match(StoreFence);
6655 ins_cost(400);
6656
6657 size(0);
6658 format %{ "MEMBAR-release ! (empty encoding)" %}
6659 ins_encode( );
6660 ins_pipe(empty);
6661 %}
6662
6663 instruct membar_release_lock() %{
6664 match(MemBarReleaseLock);
6665 ins_cost(0);
6666
6667 size(0);
6668 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6669 ins_encode( );
6670 ins_pipe(empty);
6671 %}
6672
6673 instruct membar_volatile(eFlagsReg cr) %{
6674 match(MemBarVolatile);
6675 effect(KILL cr);
6676 ins_cost(400);
6677
6678 format %{
6679 $$template
6680 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6681 %}
6682 ins_encode %{
6683 __ membar(Assembler::StoreLoad);
6684 %}
6685 ins_pipe(pipe_slow);
6686 %}
6687
6688 instruct unnecessary_membar_volatile() %{
6689 match(MemBarVolatile);
6690 predicate(Matcher::post_store_load_barrier(n));
6691 ins_cost(0);
6692
6693 size(0);
6694 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6695 ins_encode( );
6696 ins_pipe(empty);
6697 %}
6698
6699 instruct membar_storestore() %{
6700 match(MemBarStoreStore);
6701 match(StoreStoreFence);
6702 ins_cost(0);
6703
6704 size(0);
6705 format %{ "MEMBAR-storestore (empty encoding)" %}
6706 ins_encode( );
6707 ins_pipe(empty);
6708 %}
6709
6710 //----------Move Instructions--------------------------------------------------
6711 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6712 match(Set dst (CastX2P src));
6713 format %{ "# X2P $dst, $src" %}
6714 ins_encode( /*empty encoding*/ );
6715 ins_cost(0);
6716 ins_pipe(empty);
6717 %}
6718
6719 instruct castP2X(rRegI dst, eRegP src ) %{
6720 match(Set dst (CastP2X src));
6721 ins_cost(50);
6722 format %{ "MOV $dst, $src\t# CastP2X" %}
6723 ins_encode( enc_Copy( dst, src) );
6724 ins_pipe( ialu_reg_reg );
6725 %}
6726
6727 //----------Conditional Move---------------------------------------------------
6728 // Conditional move
6729 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6730 predicate(!VM_Version::supports_cmov() );
6731 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6732 ins_cost(200);
6733 format %{ "J$cop,us skip\t# signed cmove\n\t"
6734 "MOV $dst,$src\n"
6735 "skip:" %}
6736 ins_encode %{
6737 Label Lskip;
6738 // Invert sense of branch from sense of CMOV
6739 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6740 __ movl($dst$$Register, $src$$Register);
6741 __ bind(Lskip);
6742 %}
6743 ins_pipe( pipe_cmov_reg );
6744 %}
6745
6746 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6747 predicate(!VM_Version::supports_cmov() );
6748 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6749 ins_cost(200);
6750 format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6751 "MOV $dst,$src\n"
6752 "skip:" %}
6753 ins_encode %{
6754 Label Lskip;
6755 // Invert sense of branch from sense of CMOV
6756 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6757 __ movl($dst$$Register, $src$$Register);
6758 __ bind(Lskip);
6759 %}
6760 ins_pipe( pipe_cmov_reg );
6761 %}
6762
6763 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6764 predicate(VM_Version::supports_cmov() );
6765 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6766 ins_cost(200);
6767 format %{ "CMOV$cop $dst,$src" %}
6768 opcode(0x0F,0x40);
6769 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6770 ins_pipe( pipe_cmov_reg );
6771 %}
6772
6773 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6774 predicate(VM_Version::supports_cmov() );
6775 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6776 ins_cost(200);
6777 format %{ "CMOV$cop $dst,$src" %}
6778 opcode(0x0F,0x40);
6779 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6780 ins_pipe( pipe_cmov_reg );
6781 %}
6782
6783 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6784 predicate(VM_Version::supports_cmov() );
6785 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6786 ins_cost(200);
6787 expand %{
6788 cmovI_regU(cop, cr, dst, src);
6789 %}
6790 %}
6791
6792 // Conditional move
6793 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6794 predicate(VM_Version::supports_cmov() );
6795 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6796 ins_cost(250);
6797 format %{ "CMOV$cop $dst,$src" %}
6798 opcode(0x0F,0x40);
6799 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6800 ins_pipe( pipe_cmov_mem );
6801 %}
6802
6803 // Conditional move
6804 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6805 predicate(VM_Version::supports_cmov() );
6806 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6807 ins_cost(250);
6808 format %{ "CMOV$cop $dst,$src" %}
6809 opcode(0x0F,0x40);
6810 ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6811 ins_pipe( pipe_cmov_mem );
6812 %}
6813
6814 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6815 predicate(VM_Version::supports_cmov() );
6816 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6817 ins_cost(250);
6818 expand %{
6819 cmovI_memU(cop, cr, dst, src);
6820 %}
6821 %}
6822
6823 // Conditional move
6824 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6825 predicate(VM_Version::supports_cmov() );
6826 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6827 ins_cost(200);
6828 format %{ "CMOV$cop $dst,$src\t# ptr" %}
6829 opcode(0x0F,0x40);
6830 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6831 ins_pipe( pipe_cmov_reg );
6832 %}
6833
6834 // Conditional move (non-P6 version)
6835 // Note: a CMoveP is generated for stubs and native wrappers
6836 // regardless of whether we are on a P6, so we
6837 // emulate a cmov here
6838 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6839 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6840 ins_cost(300);
6841 format %{ "Jn$cop skip\n\t"
6842 "MOV $dst,$src\t# pointer\n"
6843 "skip:" %}
6844 opcode(0x8b);
6845 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6846 ins_pipe( pipe_cmov_reg );
6847 %}
6848
6849 // Conditional move
6850 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6851 predicate(VM_Version::supports_cmov() );
6852 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6853 ins_cost(200);
6854 format %{ "CMOV$cop $dst,$src\t# ptr" %}
6855 opcode(0x0F,0x40);
6856 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6857 ins_pipe( pipe_cmov_reg );
6858 %}
6859
6860 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6861 predicate(VM_Version::supports_cmov() );
6862 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6863 ins_cost(200);
6864 expand %{
6865 cmovP_regU(cop, cr, dst, src);
6866 %}
6867 %}
6868
6869 // DISABLED: Requires the ADLC to emit a bottom_type call that
6870 // correctly meets the two pointer arguments; one is an incoming
6871 // register but the other is a memory operand. ALSO appears to
6872 // be buggy with implicit null checks.
6873 //
6874 //// Conditional move
6875 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6876 // predicate(VM_Version::supports_cmov() );
6877 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6878 // ins_cost(250);
6879 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
6880 // opcode(0x0F,0x40);
6881 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6882 // ins_pipe( pipe_cmov_mem );
6883 //%}
6884 //
6885 //// Conditional move
6886 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6887 // predicate(VM_Version::supports_cmov() );
6888 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6889 // ins_cost(250);
6890 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
6891 // opcode(0x0F,0x40);
6892 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6893 // ins_pipe( pipe_cmov_mem );
6894 //%}
6895
6896 // Conditional move
6897 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6898 predicate(UseSSE<=1);
6899 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6900 ins_cost(200);
6901 format %{ "FCMOV$cop $dst,$src\t# double" %}
6902 opcode(0xDA);
6903 ins_encode( enc_cmov_dpr(cop,src) );
6904 ins_pipe( pipe_cmovDPR_reg );
6905 %}
6906
6907 // Conditional move
6908 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6909 predicate(UseSSE==0);
6910 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6911 ins_cost(200);
6912 format %{ "FCMOV$cop $dst,$src\t# float" %}
6913 opcode(0xDA);
6914 ins_encode( enc_cmov_dpr(cop,src) );
6915 ins_pipe( pipe_cmovDPR_reg );
6916 %}
6917
6918 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6919 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6920 predicate(UseSSE<=1);
6921 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6922 ins_cost(200);
6923 format %{ "Jn$cop skip\n\t"
6924 "MOV $dst,$src\t# double\n"
6925 "skip:" %}
6926 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
6927 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6928 ins_pipe( pipe_cmovDPR_reg );
6929 %}
6930
6931 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6932 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6933 predicate(UseSSE==0);
6934 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6935 ins_cost(200);
6936 format %{ "Jn$cop skip\n\t"
6937 "MOV $dst,$src\t# float\n"
6938 "skip:" %}
6939 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
6940 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6941 ins_pipe( pipe_cmovDPR_reg );
6942 %}
6943
6944 // No CMOVE with SSE/SSE2
6945 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6946 predicate (UseSSE>=1);
6947 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6948 ins_cost(200);
6949 format %{ "Jn$cop skip\n\t"
6950 "MOVSS $dst,$src\t# float\n"
6951 "skip:" %}
6952 ins_encode %{
6953 Label skip;
6954 // Invert sense of branch from sense of CMOV
6955 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6956 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6957 __ bind(skip);
6958 %}
6959 ins_pipe( pipe_slow );
6960 %}
6961
6962 // No CMOVE with SSE/SSE2
6963 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6964 predicate (UseSSE>=2);
6965 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6966 ins_cost(200);
6967 format %{ "Jn$cop skip\n\t"
6968 "MOVSD $dst,$src\t# float\n"
6969 "skip:" %}
6970 ins_encode %{
6971 Label skip;
6972 // Invert sense of branch from sense of CMOV
6973 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6974 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6975 __ bind(skip);
6976 %}
6977 ins_pipe( pipe_slow );
6978 %}
6979
6980 // unsigned version
6981 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6982 predicate (UseSSE>=1);
6983 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6984 ins_cost(200);
6985 format %{ "Jn$cop skip\n\t"
6986 "MOVSS $dst,$src\t# float\n"
6987 "skip:" %}
6988 ins_encode %{
6989 Label skip;
6990 // Invert sense of branch from sense of CMOV
6991 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6992 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6993 __ bind(skip);
6994 %}
6995 ins_pipe( pipe_slow );
6996 %}
6997
6998 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6999 predicate (UseSSE>=1);
7000 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7001 ins_cost(200);
7002 expand %{
7003 fcmovF_regU(cop, cr, dst, src);
7004 %}
7005 %}
7006
7007 // unsigned version
7008 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7009 predicate (UseSSE>=2);
7010 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7011 ins_cost(200);
7012 format %{ "Jn$cop skip\n\t"
7013 "MOVSD $dst,$src\t# float\n"
7014 "skip:" %}
7015 ins_encode %{
7016 Label skip;
7017 // Invert sense of branch from sense of CMOV
7018 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7019 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7020 __ bind(skip);
7021 %}
7022 ins_pipe( pipe_slow );
7023 %}
7024
7025 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7026 predicate (UseSSE>=2);
7027 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7028 ins_cost(200);
7029 expand %{
7030 fcmovD_regU(cop, cr, dst, src);
7031 %}
7032 %}
7033
7034 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7035 predicate(VM_Version::supports_cmov() );
7036 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7037 ins_cost(200);
7038 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7039 "CMOV$cop $dst.hi,$src.hi" %}
7040 opcode(0x0F,0x40);
7041 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7042 ins_pipe( pipe_cmov_reg_long );
7043 %}
7044
7045 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7046 predicate(VM_Version::supports_cmov() );
7047 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7048 ins_cost(200);
7049 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7050 "CMOV$cop $dst.hi,$src.hi" %}
7051 opcode(0x0F,0x40);
7052 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7053 ins_pipe( pipe_cmov_reg_long );
7054 %}
7055
7056 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7057 predicate(VM_Version::supports_cmov() );
7058 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7059 ins_cost(200);
7060 expand %{
7061 cmovL_regU(cop, cr, dst, src);
7062 %}
7063 %}
7064
7065 //----------Arithmetic Instructions--------------------------------------------
7066 //----------Addition Instructions----------------------------------------------
7067
7068 // Integer Addition Instructions
7069 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7070 match(Set dst (AddI dst src));
7071 effect(KILL cr);
7072
7073 size(2);
7074 format %{ "ADD $dst,$src" %}
7075 opcode(0x03);
7076 ins_encode( OpcP, RegReg( dst, src) );
7077 ins_pipe( ialu_reg_reg );
7078 %}
7079
7080 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7081 match(Set dst (AddI dst src));
7082 effect(KILL cr);
7083
7084 format %{ "ADD $dst,$src" %}
7085 opcode(0x81, 0x00); /* /0 id */
7086 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7087 ins_pipe( ialu_reg );
7088 %}
7089
7090 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
7091 predicate(UseIncDec);
7092 match(Set dst (AddI dst src));
7093 effect(KILL cr);
7094
7095 size(1);
7096 format %{ "INC $dst" %}
7097 opcode(0x40); /* */
7098 ins_encode( Opc_plus( primary, dst ) );
7099 ins_pipe( ialu_reg );
7100 %}
7101
7102 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7103 match(Set dst (AddI src0 src1));
7104 ins_cost(110);
7105
7106 format %{ "LEA $dst,[$src0 + $src1]" %}
7107 opcode(0x8D); /* 0x8D /r */
7108 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7109 ins_pipe( ialu_reg_reg );
7110 %}
7111
7112 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7113 match(Set dst (AddP src0 src1));
7114 ins_cost(110);
7115
7116 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %}
7117 opcode(0x8D); /* 0x8D /r */
7118 ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7119 ins_pipe( ialu_reg_reg );
7120 %}
7121
7122 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7123 predicate(UseIncDec);
7124 match(Set dst (AddI dst src));
7125 effect(KILL cr);
7126
7127 size(1);
7128 format %{ "DEC $dst" %}
7129 opcode(0x48); /* */
7130 ins_encode( Opc_plus( primary, dst ) );
7131 ins_pipe( ialu_reg );
7132 %}
7133
7134 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7135 match(Set dst (AddP dst src));
7136 effect(KILL cr);
7137
7138 size(2);
7139 format %{ "ADD $dst,$src" %}
7140 opcode(0x03);
7141 ins_encode( OpcP, RegReg( dst, src) );
7142 ins_pipe( ialu_reg_reg );
7143 %}
7144
7145 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7146 match(Set dst (AddP dst src));
7147 effect(KILL cr);
7148
7149 format %{ "ADD $dst,$src" %}
7150 opcode(0x81,0x00); /* Opcode 81 /0 id */
7151 // ins_encode( RegImm( dst, src) );
7152 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7153 ins_pipe( ialu_reg );
7154 %}
7155
7156 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7157 match(Set dst (AddI dst (LoadI src)));
7158 effect(KILL cr);
7159
7160 ins_cost(150);
7161 format %{ "ADD $dst,$src" %}
7162 opcode(0x03);
7163 ins_encode( OpcP, RegMem( dst, src) );
7164 ins_pipe( ialu_reg_mem );
7165 %}
7166
7167 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7168 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7169 effect(KILL cr);
7170
7171 ins_cost(150);
7172 format %{ "ADD $dst,$src" %}
7173 opcode(0x01); /* Opcode 01 /r */
7174 ins_encode( OpcP, RegMem( src, dst ) );
7175 ins_pipe( ialu_mem_reg );
7176 %}
7177
7178 // Add Memory with Immediate
7179 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7180 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7181 effect(KILL cr);
7182
7183 ins_cost(125);
7184 format %{ "ADD $dst,$src" %}
7185 opcode(0x81); /* Opcode 81 /0 id */
7186 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7187 ins_pipe( ialu_mem_imm );
7188 %}
7189
7190 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
7191 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7192 effect(KILL cr);
7193
7194 ins_cost(125);
7195 format %{ "INC $dst" %}
7196 opcode(0xFF); /* Opcode FF /0 */
7197 ins_encode( OpcP, RMopc_Mem(0x00,dst));
7198 ins_pipe( ialu_mem_imm );
7199 %}
7200
7201 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7202 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7203 effect(KILL cr);
7204
7205 ins_cost(125);
7206 format %{ "DEC $dst" %}
7207 opcode(0xFF); /* Opcode FF /1 */
7208 ins_encode( OpcP, RMopc_Mem(0x01,dst));
7209 ins_pipe( ialu_mem_imm );
7210 %}
7211
7212
7213 instruct checkCastPP( eRegP dst ) %{
7214 match(Set dst (CheckCastPP dst));
7215
7216 size(0);
7217 format %{ "#checkcastPP of $dst" %}
7218 ins_encode( /*empty encoding*/ );
7219 ins_pipe( empty );
7220 %}
7221
7222 instruct castPP( eRegP dst ) %{
7223 match(Set dst (CastPP dst));
7224 format %{ "#castPP of $dst" %}
7225 ins_encode( /*empty encoding*/ );
7226 ins_pipe( empty );
7227 %}
7228
7229 instruct castII( rRegI dst ) %{
7230 match(Set dst (CastII dst));
7231 format %{ "#castII of $dst" %}
7232 ins_encode( /*empty encoding*/ );
7233 ins_cost(0);
7234 ins_pipe( empty );
7235 %}
7236
7237 instruct castLL( eRegL dst ) %{
7238 match(Set dst (CastLL dst));
7239 format %{ "#castLL of $dst" %}
7240 ins_encode( /*empty encoding*/ );
7241 ins_cost(0);
7242 ins_pipe( empty );
7243 %}
7244
7245 instruct castFF( regF dst ) %{
7246 predicate(UseSSE >= 1);
7247 match(Set dst (CastFF dst));
7248 format %{ "#castFF of $dst" %}
7249 ins_encode( /*empty encoding*/ );
7250 ins_cost(0);
7251 ins_pipe( empty );
7252 %}
7253
7254 instruct castDD( regD dst ) %{
7255 predicate(UseSSE >= 2);
7256 match(Set dst (CastDD dst));
7257 format %{ "#castDD of $dst" %}
7258 ins_encode( /*empty encoding*/ );
7259 ins_cost(0);
7260 ins_pipe( empty );
7261 %}
7262
7263 instruct castFF_PR( regFPR dst ) %{
7264 predicate(UseSSE < 1);
7265 match(Set dst (CastFF dst));
7266 format %{ "#castFF of $dst" %}
7267 ins_encode( /*empty encoding*/ );
7268 ins_cost(0);
7269 ins_pipe( empty );
7270 %}
7271
7272 instruct castDD_PR( regDPR dst ) %{
7273 predicate(UseSSE < 2);
7274 match(Set dst (CastDD dst));
7275 format %{ "#castDD of $dst" %}
7276 ins_encode( /*empty encoding*/ );
7277 ins_cost(0);
7278 ins_pipe( empty );
7279 %}
7280
7281 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7282
7283 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7284 predicate(VM_Version::supports_cx8());
7285 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7286 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7287 effect(KILL cr, KILL oldval);
7288 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7289 "MOV $res,0\n\t"
7290 "JNE,s fail\n\t"
7291 "MOV $res,1\n"
7292 "fail:" %}
7293 ins_encode( enc_cmpxchg8(mem_ptr),
7294 enc_flags_ne_to_boolean(res) );
7295 ins_pipe( pipe_cmpxchg );
7296 %}
7297
7298 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7299 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7300 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7301 effect(KILL cr, KILL oldval);
7302 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7303 "MOV $res,0\n\t"
7304 "JNE,s fail\n\t"
7305 "MOV $res,1\n"
7306 "fail:" %}
7307 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7308 ins_pipe( pipe_cmpxchg );
7309 %}
7310
7311 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7312 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7313 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7314 effect(KILL cr, KILL oldval);
7315 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7316 "MOV $res,0\n\t"
7317 "JNE,s fail\n\t"
7318 "MOV $res,1\n"
7319 "fail:" %}
7320 ins_encode( enc_cmpxchgb(mem_ptr),
7321 enc_flags_ne_to_boolean(res) );
7322 ins_pipe( pipe_cmpxchg );
7323 %}
7324
7325 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7326 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7327 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7328 effect(KILL cr, KILL oldval);
7329 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7330 "MOV $res,0\n\t"
7331 "JNE,s fail\n\t"
7332 "MOV $res,1\n"
7333 "fail:" %}
7334 ins_encode( enc_cmpxchgw(mem_ptr),
7335 enc_flags_ne_to_boolean(res) );
7336 ins_pipe( pipe_cmpxchg );
7337 %}
7338
7339 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7340 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7341 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7342 effect(KILL cr, KILL oldval);
7343 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7344 "MOV $res,0\n\t"
7345 "JNE,s fail\n\t"
7346 "MOV $res,1\n"
7347 "fail:" %}
7348 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7349 ins_pipe( pipe_cmpxchg );
7350 %}
7351
7352 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7353 predicate(VM_Version::supports_cx8());
7354 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7355 effect(KILL cr);
7356 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7357 ins_encode( enc_cmpxchg8(mem_ptr) );
7358 ins_pipe( pipe_cmpxchg );
7359 %}
7360
7361 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7362 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7363 effect(KILL cr);
7364 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7365 ins_encode( enc_cmpxchg(mem_ptr) );
7366 ins_pipe( pipe_cmpxchg );
7367 %}
7368
7369 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7370 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7371 effect(KILL cr);
7372 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7373 ins_encode( enc_cmpxchgb(mem_ptr) );
7374 ins_pipe( pipe_cmpxchg );
7375 %}
7376
7377 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7378 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7379 effect(KILL cr);
7380 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7381 ins_encode( enc_cmpxchgw(mem_ptr) );
7382 ins_pipe( pipe_cmpxchg );
7383 %}
7384
7385 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7386 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7387 effect(KILL cr);
7388 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7389 ins_encode( enc_cmpxchg(mem_ptr) );
7390 ins_pipe( pipe_cmpxchg );
7391 %}
7392
7393 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7394 predicate(n->as_LoadStore()->result_not_used());
7395 match(Set dummy (GetAndAddB mem add));
7396 effect(KILL cr);
7397 format %{ "ADDB [$mem],$add" %}
7398 ins_encode %{
7399 __ lock();
7400 __ addb($mem$$Address, $add$$constant);
7401 %}
7402 ins_pipe( pipe_cmpxchg );
7403 %}
7404
7405 // Important to match to xRegI: only 8-bit regs.
7406 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7407 match(Set newval (GetAndAddB mem newval));
7408 effect(KILL cr);
7409 format %{ "XADDB [$mem],$newval" %}
7410 ins_encode %{
7411 __ lock();
7412 __ xaddb($mem$$Address, $newval$$Register);
7413 %}
7414 ins_pipe( pipe_cmpxchg );
7415 %}
7416
7417 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7418 predicate(n->as_LoadStore()->result_not_used());
7419 match(Set dummy (GetAndAddS mem add));
7420 effect(KILL cr);
7421 format %{ "ADDS [$mem],$add" %}
7422 ins_encode %{
7423 __ lock();
7424 __ addw($mem$$Address, $add$$constant);
7425 %}
7426 ins_pipe( pipe_cmpxchg );
7427 %}
7428
7429 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7430 match(Set newval (GetAndAddS mem newval));
7431 effect(KILL cr);
7432 format %{ "XADDS [$mem],$newval" %}
7433 ins_encode %{
7434 __ lock();
7435 __ xaddw($mem$$Address, $newval$$Register);
7436 %}
7437 ins_pipe( pipe_cmpxchg );
7438 %}
7439
7440 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7441 predicate(n->as_LoadStore()->result_not_used());
7442 match(Set dummy (GetAndAddI mem add));
7443 effect(KILL cr);
7444 format %{ "ADDL [$mem],$add" %}
7445 ins_encode %{
7446 __ lock();
7447 __ addl($mem$$Address, $add$$constant);
7448 %}
7449 ins_pipe( pipe_cmpxchg );
7450 %}
7451
7452 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7453 match(Set newval (GetAndAddI mem newval));
7454 effect(KILL cr);
7455 format %{ "XADDL [$mem],$newval" %}
7456 ins_encode %{
7457 __ lock();
7458 __ xaddl($mem$$Address, $newval$$Register);
7459 %}
7460 ins_pipe( pipe_cmpxchg );
7461 %}
7462
7463 // Important to match to xRegI: only 8-bit regs.
7464 instruct xchgB( memory mem, xRegI newval) %{
7465 match(Set newval (GetAndSetB mem newval));
7466 format %{ "XCHGB $newval,[$mem]" %}
7467 ins_encode %{
7468 __ xchgb($newval$$Register, $mem$$Address);
7469 %}
7470 ins_pipe( pipe_cmpxchg );
7471 %}
7472
7473 instruct xchgS( memory mem, rRegI newval) %{
7474 match(Set newval (GetAndSetS mem newval));
7475 format %{ "XCHGW $newval,[$mem]" %}
7476 ins_encode %{
7477 __ xchgw($newval$$Register, $mem$$Address);
7478 %}
7479 ins_pipe( pipe_cmpxchg );
7480 %}
7481
7482 instruct xchgI( memory mem, rRegI newval) %{
7483 match(Set newval (GetAndSetI mem newval));
7484 format %{ "XCHGL $newval,[$mem]" %}
7485 ins_encode %{
7486 __ xchgl($newval$$Register, $mem$$Address);
7487 %}
7488 ins_pipe( pipe_cmpxchg );
7489 %}
7490
7491 instruct xchgP( memory mem, pRegP newval) %{
7492 match(Set newval (GetAndSetP mem newval));
7493 format %{ "XCHGL $newval,[$mem]" %}
7494 ins_encode %{
7495 __ xchgl($newval$$Register, $mem$$Address);
7496 %}
7497 ins_pipe( pipe_cmpxchg );
7498 %}
7499
7500 //----------Subtraction Instructions-------------------------------------------
7501
7502 // Integer Subtraction Instructions
7503 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7504 match(Set dst (SubI dst src));
7505 effect(KILL cr);
7506
7507 size(2);
7508 format %{ "SUB $dst,$src" %}
7509 opcode(0x2B);
7510 ins_encode( OpcP, RegReg( dst, src) );
7511 ins_pipe( ialu_reg_reg );
7512 %}
7513
7514 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7515 match(Set dst (SubI dst src));
7516 effect(KILL cr);
7517
7518 format %{ "SUB $dst,$src" %}
7519 opcode(0x81,0x05); /* Opcode 81 /5 */
7520 // ins_encode( RegImm( dst, src) );
7521 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7522 ins_pipe( ialu_reg );
7523 %}
7524
7525 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7526 match(Set dst (SubI dst (LoadI src)));
7527 effect(KILL cr);
7528
7529 ins_cost(150);
7530 format %{ "SUB $dst,$src" %}
7531 opcode(0x2B);
7532 ins_encode( OpcP, RegMem( dst, src) );
7533 ins_pipe( ialu_reg_mem );
7534 %}
7535
7536 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7537 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7538 effect(KILL cr);
7539
7540 ins_cost(150);
7541 format %{ "SUB $dst,$src" %}
7542 opcode(0x29); /* Opcode 29 /r */
7543 ins_encode( OpcP, RegMem( src, dst ) );
7544 ins_pipe( ialu_mem_reg );
7545 %}
7546
7547 // Subtract from a pointer
7548 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
7549 match(Set dst (AddP dst (SubI zero src)));
7550 effect(KILL cr);
7551
7552 size(2);
7553 format %{ "SUB $dst,$src" %}
7554 opcode(0x2B);
7555 ins_encode( OpcP, RegReg( dst, src) );
7556 ins_pipe( ialu_reg_reg );
7557 %}
7558
7559 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
7560 match(Set dst (SubI zero dst));
7561 effect(KILL cr);
7562
7563 size(2);
7564 format %{ "NEG $dst" %}
7565 opcode(0xF7,0x03); // Opcode F7 /3
7566 ins_encode( OpcP, RegOpc( dst ) );
7567 ins_pipe( ialu_reg );
7568 %}
7569
7570 //----------Multiplication/Division Instructions-------------------------------
7571 // Integer Multiplication Instructions
7572 // Multiply Register
7573 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7574 match(Set dst (MulI dst src));
7575 effect(KILL cr);
7576
7577 size(3);
7578 ins_cost(300);
7579 format %{ "IMUL $dst,$src" %}
7580 opcode(0xAF, 0x0F);
7581 ins_encode( OpcS, OpcP, RegReg( dst, src) );
7582 ins_pipe( ialu_reg_reg_alu0 );
7583 %}
7584
7585 // Multiply 32-bit Immediate
7586 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7587 match(Set dst (MulI src imm));
7588 effect(KILL cr);
7589
7590 ins_cost(300);
7591 format %{ "IMUL $dst,$src,$imm" %}
7592 opcode(0x69); /* 69 /r id */
7593 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7594 ins_pipe( ialu_reg_reg_alu0 );
7595 %}
7596
7597 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7598 match(Set dst src);
7599 effect(KILL cr);
7600
7601 // Note that this is artificially increased to make it more expensive than loadConL
7602 ins_cost(250);
7603 format %{ "MOV EAX,$src\t// low word only" %}
7604 opcode(0xB8);
7605 ins_encode( LdImmL_Lo(dst, src) );
7606 ins_pipe( ialu_reg_fat );
7607 %}
7608
7609 // Multiply by 32-bit Immediate, taking the shifted high order results
7610 // (special case for shift by 32)
7611 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7612 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7613 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7614 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7615 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7616 effect(USE src1, KILL cr);
7617
7618 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7619 ins_cost(0*100 + 1*400 - 150);
7620 format %{ "IMUL EDX:EAX,$src1" %}
7621 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7622 ins_pipe( pipe_slow );
7623 %}
7624
7625 // Multiply by 32-bit Immediate, taking the shifted high order results
7626 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7627 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7628 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7629 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7630 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7631 effect(USE src1, KILL cr);
7632
7633 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7634 ins_cost(1*100 + 1*400 - 150);
7635 format %{ "IMUL EDX:EAX,$src1\n\t"
7636 "SAR EDX,$cnt-32" %}
7637 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7638 ins_pipe( pipe_slow );
7639 %}
7640
7641 // Multiply Memory 32-bit Immediate
7642 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7643 match(Set dst (MulI (LoadI src) imm));
7644 effect(KILL cr);
7645
7646 ins_cost(300);
7647 format %{ "IMUL $dst,$src,$imm" %}
7648 opcode(0x69); /* 69 /r id */
7649 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7650 ins_pipe( ialu_reg_mem_alu0 );
7651 %}
7652
7653 // Multiply Memory
7654 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7655 match(Set dst (MulI dst (LoadI src)));
7656 effect(KILL cr);
7657
7658 ins_cost(350);
7659 format %{ "IMUL $dst,$src" %}
7660 opcode(0xAF, 0x0F);
7661 ins_encode( OpcS, OpcP, RegMem( dst, src) );
7662 ins_pipe( ialu_reg_mem_alu0 );
7663 %}
7664
7665 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7666 %{
7667 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7668 effect(KILL cr, KILL src2);
7669
7670 expand %{ mulI_eReg(dst, src1, cr);
7671 mulI_eReg(src2, src3, cr);
7672 addI_eReg(dst, src2, cr); %}
7673 %}
7674
7675 // Multiply Register Int to Long
7676 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7677 // Basic Idea: long = (long)int * (long)int
7678 match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7679 effect(DEF dst, USE src, USE src1, KILL flags);
7680
7681 ins_cost(300);
7682 format %{ "IMUL $dst,$src1" %}
7683
7684 ins_encode( long_int_multiply( dst, src1 ) );
7685 ins_pipe( ialu_reg_reg_alu0 );
7686 %}
7687
7688 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7689 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
7690 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7691 effect(KILL flags);
7692
7693 ins_cost(300);
7694 format %{ "MUL $dst,$src1" %}
7695
7696 ins_encode( long_uint_multiply(dst, src1) );
7697 ins_pipe( ialu_reg_reg_alu0 );
7698 %}
7699
7700 // Multiply Register Long
7701 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7702 match(Set dst (MulL dst src));
7703 effect(KILL cr, TEMP tmp);
7704 ins_cost(4*100+3*400);
7705 // Basic idea: lo(result) = lo(x_lo * y_lo)
7706 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7707 format %{ "MOV $tmp,$src.lo\n\t"
7708 "IMUL $tmp,EDX\n\t"
7709 "MOV EDX,$src.hi\n\t"
7710 "IMUL EDX,EAX\n\t"
7711 "ADD $tmp,EDX\n\t"
7712 "MUL EDX:EAX,$src.lo\n\t"
7713 "ADD EDX,$tmp" %}
7714 ins_encode( long_multiply( dst, src, tmp ) );
7715 ins_pipe( pipe_slow );
7716 %}
7717
7718 // Multiply Register Long where the left operand's high 32 bits are zero
7719 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7720 predicate(is_operand_hi32_zero(n->in(1)));
7721 match(Set dst (MulL dst src));
7722 effect(KILL cr, TEMP tmp);
7723 ins_cost(2*100+2*400);
7724 // Basic idea: lo(result) = lo(x_lo * y_lo)
7725 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7726 format %{ "MOV $tmp,$src.hi\n\t"
7727 "IMUL $tmp,EAX\n\t"
7728 "MUL EDX:EAX,$src.lo\n\t"
7729 "ADD EDX,$tmp" %}
7730 ins_encode %{
7731 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7732 __ imull($tmp$$Register, rax);
7733 __ mull($src$$Register);
7734 __ addl(rdx, $tmp$$Register);
7735 %}
7736 ins_pipe( pipe_slow );
7737 %}
7738
7739 // Multiply Register Long where the right operand's high 32 bits are zero
7740 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7741 predicate(is_operand_hi32_zero(n->in(2)));
7742 match(Set dst (MulL dst src));
7743 effect(KILL cr, TEMP tmp);
7744 ins_cost(2*100+2*400);
7745 // Basic idea: lo(result) = lo(x_lo * y_lo)
7746 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7747 format %{ "MOV $tmp,$src.lo\n\t"
7748 "IMUL $tmp,EDX\n\t"
7749 "MUL EDX:EAX,$src.lo\n\t"
7750 "ADD EDX,$tmp" %}
7751 ins_encode %{
7752 __ movl($tmp$$Register, $src$$Register);
7753 __ imull($tmp$$Register, rdx);
7754 __ mull($src$$Register);
7755 __ addl(rdx, $tmp$$Register);
7756 %}
7757 ins_pipe( pipe_slow );
7758 %}
7759
7760 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7761 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7762 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7763 match(Set dst (MulL dst src));
7764 effect(KILL cr);
7765 ins_cost(1*400);
7766 // Basic idea: lo(result) = lo(x_lo * y_lo)
7767 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7768 format %{ "MUL EDX:EAX,$src.lo\n\t" %}
7769 ins_encode %{
7770 __ mull($src$$Register);
7771 %}
7772 ins_pipe( pipe_slow );
7773 %}
7774
7775 // Multiply Register Long by small constant
7776 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7777 match(Set dst (MulL dst src));
7778 effect(KILL cr, TEMP tmp);
7779 ins_cost(2*100+2*400);
7780 size(12);
7781 // Basic idea: lo(result) = lo(src * EAX)
7782 // hi(result) = hi(src * EAX) + lo(src * EDX)
7783 format %{ "IMUL $tmp,EDX,$src\n\t"
7784 "MOV EDX,$src\n\t"
7785 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t"
7786 "ADD EDX,$tmp" %}
7787 ins_encode( long_multiply_con( dst, src, tmp ) );
7788 ins_pipe( pipe_slow );
7789 %}
7790
7791 // Integer DIV with Register
7792 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7793 match(Set rax (DivI rax div));
7794 effect(KILL rdx, KILL cr);
7795 size(26);
7796 ins_cost(30*100+10*100);
7797 format %{ "CMP EAX,0x80000000\n\t"
7798 "JNE,s normal\n\t"
7799 "XOR EDX,EDX\n\t"
7800 "CMP ECX,-1\n\t"
7801 "JE,s done\n"
7802 "normal: CDQ\n\t"
7803 "IDIV $div\n\t"
7804 "done:" %}
7805 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7806 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7807 ins_pipe( ialu_reg_reg_alu0 );
7808 %}
7809
7810 // Divide Register Long
7811 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7812 match(Set dst (DivL src1 src2));
7813 effect(CALL);
7814 ins_cost(10000);
7815 format %{ "PUSH $src1.hi\n\t"
7816 "PUSH $src1.lo\n\t"
7817 "PUSH $src2.hi\n\t"
7818 "PUSH $src2.lo\n\t"
7819 "CALL SharedRuntime::ldiv\n\t"
7820 "ADD ESP,16" %}
7821 ins_encode( long_div(src1,src2) );
7822 ins_pipe( pipe_slow );
7823 %}
7824
7825 // Integer DIVMOD with Register, both quotient and mod results
7826 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7827 match(DivModI rax div);
7828 effect(KILL cr);
7829 size(26);
7830 ins_cost(30*100+10*100);
7831 format %{ "CMP EAX,0x80000000\n\t"
7832 "JNE,s normal\n\t"
7833 "XOR EDX,EDX\n\t"
7834 "CMP ECX,-1\n\t"
7835 "JE,s done\n"
7836 "normal: CDQ\n\t"
7837 "IDIV $div\n\t"
7838 "done:" %}
7839 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7840 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7841 ins_pipe( pipe_slow );
7842 %}
7843
7844 // Integer MOD with Register
7845 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7846 match(Set rdx (ModI rax div));
7847 effect(KILL rax, KILL cr);
7848
7849 size(26);
7850 ins_cost(300);
7851 format %{ "CDQ\n\t"
7852 "IDIV $div" %}
7853 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7854 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7855 ins_pipe( ialu_reg_reg_alu0 );
7856 %}
7857
7858 // Remainder Register Long
7859 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7860 match(Set dst (ModL src1 src2));
7861 effect(CALL);
7862 ins_cost(10000);
7863 format %{ "PUSH $src1.hi\n\t"
7864 "PUSH $src1.lo\n\t"
7865 "PUSH $src2.hi\n\t"
7866 "PUSH $src2.lo\n\t"
7867 "CALL SharedRuntime::lrem\n\t"
7868 "ADD ESP,16" %}
7869 ins_encode( long_mod(src1,src2) );
7870 ins_pipe( pipe_slow );
7871 %}
7872
7873 // Divide Register Long (no special case since divisor != -1)
7874 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7875 match(Set dst (DivL dst imm));
7876 effect( TEMP tmp, TEMP tmp2, KILL cr );
7877 ins_cost(1000);
7878 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7879 "XOR $tmp2,$tmp2\n\t"
7880 "CMP $tmp,EDX\n\t"
7881 "JA,s fast\n\t"
7882 "MOV $tmp2,EAX\n\t"
7883 "MOV EAX,EDX\n\t"
7884 "MOV EDX,0\n\t"
7885 "JLE,s pos\n\t"
7886 "LNEG EAX : $tmp2\n\t"
7887 "DIV $tmp # unsigned division\n\t"
7888 "XCHG EAX,$tmp2\n\t"
7889 "DIV $tmp\n\t"
7890 "LNEG $tmp2 : EAX\n\t"
7891 "JMP,s done\n"
7892 "pos:\n\t"
7893 "DIV $tmp\n\t"
7894 "XCHG EAX,$tmp2\n"
7895 "fast:\n\t"
7896 "DIV $tmp\n"
7897 "done:\n\t"
7898 "MOV EDX,$tmp2\n\t"
7899 "NEG EDX:EAX # if $imm < 0" %}
7900 ins_encode %{
7901 int con = (int)$imm$$constant;
7902 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7903 int pcon = (con > 0) ? con : -con;
7904 Label Lfast, Lpos, Ldone;
7905
7906 __ movl($tmp$$Register, pcon);
7907 __ xorl($tmp2$$Register,$tmp2$$Register);
7908 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7909 __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7910
7911 __ movl($tmp2$$Register, $dst$$Register); // save
7912 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7913 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7914 __ jccb(Assembler::lessEqual, Lpos); // result is positive
7915
7916 // Negative dividend.
7917 // convert value to positive to use unsigned division
7918 __ lneg($dst$$Register, $tmp2$$Register);
7919 __ divl($tmp$$Register);
7920 __ xchgl($dst$$Register, $tmp2$$Register);
7921 __ divl($tmp$$Register);
7922 // revert result back to negative
7923 __ lneg($tmp2$$Register, $dst$$Register);
7924 __ jmpb(Ldone);
7925
7926 __ bind(Lpos);
7927 __ divl($tmp$$Register); // Use unsigned division
7928 __ xchgl($dst$$Register, $tmp2$$Register);
7929 // Fallthrow for final divide, tmp2 has 32 bit hi result
7930
7931 __ bind(Lfast);
7932 // fast path: src is positive
7933 __ divl($tmp$$Register); // Use unsigned division
7934
7935 __ bind(Ldone);
7936 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7937 if (con < 0) {
7938 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7939 }
7940 %}
7941 ins_pipe( pipe_slow );
7942 %}
7943
7944 // Remainder Register Long (remainder fit into 32 bits)
7945 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7946 match(Set dst (ModL dst imm));
7947 effect( TEMP tmp, TEMP tmp2, KILL cr );
7948 ins_cost(1000);
7949 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7950 "CMP $tmp,EDX\n\t"
7951 "JA,s fast\n\t"
7952 "MOV $tmp2,EAX\n\t"
7953 "MOV EAX,EDX\n\t"
7954 "MOV EDX,0\n\t"
7955 "JLE,s pos\n\t"
7956 "LNEG EAX : $tmp2\n\t"
7957 "DIV $tmp # unsigned division\n\t"
7958 "MOV EAX,$tmp2\n\t"
7959 "DIV $tmp\n\t"
7960 "NEG EDX\n\t"
7961 "JMP,s done\n"
7962 "pos:\n\t"
7963 "DIV $tmp\n\t"
7964 "MOV EAX,$tmp2\n"
7965 "fast:\n\t"
7966 "DIV $tmp\n"
7967 "done:\n\t"
7968 "MOV EAX,EDX\n\t"
7969 "SAR EDX,31\n\t" %}
7970 ins_encode %{
7971 int con = (int)$imm$$constant;
7972 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7973 int pcon = (con > 0) ? con : -con;
7974 Label Lfast, Lpos, Ldone;
7975
7976 __ movl($tmp$$Register, pcon);
7977 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7978 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7979
7980 __ movl($tmp2$$Register, $dst$$Register); // save
7981 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7982 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7983 __ jccb(Assembler::lessEqual, Lpos); // result is positive
7984
7985 // Negative dividend.
7986 // convert value to positive to use unsigned division
7987 __ lneg($dst$$Register, $tmp2$$Register);
7988 __ divl($tmp$$Register);
7989 __ movl($dst$$Register, $tmp2$$Register);
7990 __ divl($tmp$$Register);
7991 // revert remainder back to negative
7992 __ negl(HIGH_FROM_LOW($dst$$Register));
7993 __ jmpb(Ldone);
7994
7995 __ bind(Lpos);
7996 __ divl($tmp$$Register);
7997 __ movl($dst$$Register, $tmp2$$Register);
7998
7999 __ bind(Lfast);
8000 // fast path: src is positive
8001 __ divl($tmp$$Register);
8002
8003 __ bind(Ldone);
8004 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8005 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8006
8007 %}
8008 ins_pipe( pipe_slow );
8009 %}
8010
8011 // Integer Shift Instructions
8012 // Shift Left by one
8013 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8014 match(Set dst (LShiftI dst shift));
8015 effect(KILL cr);
8016
8017 size(2);
8018 format %{ "SHL $dst,$shift" %}
8019 opcode(0xD1, 0x4); /* D1 /4 */
8020 ins_encode( OpcP, RegOpc( dst ) );
8021 ins_pipe( ialu_reg );
8022 %}
8023
8024 // Shift Left by 8-bit immediate
8025 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8026 match(Set dst (LShiftI dst shift));
8027 effect(KILL cr);
8028
8029 size(3);
8030 format %{ "SHL $dst,$shift" %}
8031 opcode(0xC1, 0x4); /* C1 /4 ib */
8032 ins_encode( RegOpcImm( dst, shift) );
8033 ins_pipe( ialu_reg );
8034 %}
8035
8036 // Shift Left by variable
8037 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8038 match(Set dst (LShiftI dst shift));
8039 effect(KILL cr);
8040
8041 size(2);
8042 format %{ "SHL $dst,$shift" %}
8043 opcode(0xD3, 0x4); /* D3 /4 */
8044 ins_encode( OpcP, RegOpc( dst ) );
8045 ins_pipe( ialu_reg_reg );
8046 %}
8047
8048 // Arithmetic shift right by one
8049 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8050 match(Set dst (RShiftI dst shift));
8051 effect(KILL cr);
8052
8053 size(2);
8054 format %{ "SAR $dst,$shift" %}
8055 opcode(0xD1, 0x7); /* D1 /7 */
8056 ins_encode( OpcP, RegOpc( dst ) );
8057 ins_pipe( ialu_reg );
8058 %}
8059
8060 // Arithmetic shift right by one
8061 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
8062 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8063 effect(KILL cr);
8064 format %{ "SAR $dst,$shift" %}
8065 opcode(0xD1, 0x7); /* D1 /7 */
8066 ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8067 ins_pipe( ialu_mem_imm );
8068 %}
8069
8070 // Arithmetic Shift Right by 8-bit immediate
8071 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8072 match(Set dst (RShiftI dst shift));
8073 effect(KILL cr);
8074
8075 size(3);
8076 format %{ "SAR $dst,$shift" %}
8077 opcode(0xC1, 0x7); /* C1 /7 ib */
8078 ins_encode( RegOpcImm( dst, shift ) );
8079 ins_pipe( ialu_mem_imm );
8080 %}
8081
8082 // Arithmetic Shift Right by 8-bit immediate
8083 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8084 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8085 effect(KILL cr);
8086
8087 format %{ "SAR $dst,$shift" %}
8088 opcode(0xC1, 0x7); /* C1 /7 ib */
8089 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8090 ins_pipe( ialu_mem_imm );
8091 %}
8092
8093 // Arithmetic Shift Right by variable
8094 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8095 match(Set dst (RShiftI dst shift));
8096 effect(KILL cr);
8097
8098 size(2);
8099 format %{ "SAR $dst,$shift" %}
8100 opcode(0xD3, 0x7); /* D3 /7 */
8101 ins_encode( OpcP, RegOpc( dst ) );
8102 ins_pipe( ialu_reg_reg );
8103 %}
8104
8105 // Logical shift right by one
8106 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8107 match(Set dst (URShiftI dst shift));
8108 effect(KILL cr);
8109
8110 size(2);
8111 format %{ "SHR $dst,$shift" %}
8112 opcode(0xD1, 0x5); /* D1 /5 */
8113 ins_encode( OpcP, RegOpc( dst ) );
8114 ins_pipe( ialu_reg );
8115 %}
8116
8117 // Logical Shift Right by 8-bit immediate
8118 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8119 match(Set dst (URShiftI dst shift));
8120 effect(KILL cr);
8121
8122 size(3);
8123 format %{ "SHR $dst,$shift" %}
8124 opcode(0xC1, 0x5); /* C1 /5 ib */
8125 ins_encode( RegOpcImm( dst, shift) );
8126 ins_pipe( ialu_reg );
8127 %}
8128
8129
8130 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8131 // This idiom is used by the compiler for the i2b bytecode.
8132 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8133 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8134
8135 size(3);
8136 format %{ "MOVSX $dst,$src :8" %}
8137 ins_encode %{
8138 __ movsbl($dst$$Register, $src$$Register);
8139 %}
8140 ins_pipe(ialu_reg_reg);
8141 %}
8142
8143 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8144 // This idiom is used by the compiler the i2s bytecode.
8145 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8146 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8147
8148 size(3);
8149 format %{ "MOVSX $dst,$src :16" %}
8150 ins_encode %{
8151 __ movswl($dst$$Register, $src$$Register);
8152 %}
8153 ins_pipe(ialu_reg_reg);
8154 %}
8155
8156
8157 // Logical Shift Right by variable
8158 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8159 match(Set dst (URShiftI dst shift));
8160 effect(KILL cr);
8161
8162 size(2);
8163 format %{ "SHR $dst,$shift" %}
8164 opcode(0xD3, 0x5); /* D3 /5 */
8165 ins_encode( OpcP, RegOpc( dst ) );
8166 ins_pipe( ialu_reg_reg );
8167 %}
8168
8169
8170 //----------Logical Instructions-----------------------------------------------
8171 //----------Integer Logical Instructions---------------------------------------
8172 // And Instructions
8173 // And Register with Register
8174 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8175 match(Set dst (AndI dst src));
8176 effect(KILL cr);
8177
8178 size(2);
8179 format %{ "AND $dst,$src" %}
8180 opcode(0x23);
8181 ins_encode( OpcP, RegReg( dst, src) );
8182 ins_pipe( ialu_reg_reg );
8183 %}
8184
8185 // And Register with Immediate
8186 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8187 match(Set dst (AndI dst src));
8188 effect(KILL cr);
8189
8190 format %{ "AND $dst,$src" %}
8191 opcode(0x81,0x04); /* Opcode 81 /4 */
8192 // ins_encode( RegImm( dst, src) );
8193 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8194 ins_pipe( ialu_reg );
8195 %}
8196
8197 // And Register with Memory
8198 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8199 match(Set dst (AndI dst (LoadI src)));
8200 effect(KILL cr);
8201
8202 ins_cost(150);
8203 format %{ "AND $dst,$src" %}
8204 opcode(0x23);
8205 ins_encode( OpcP, RegMem( dst, src) );
8206 ins_pipe( ialu_reg_mem );
8207 %}
8208
8209 // And Memory with Register
8210 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8211 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8212 effect(KILL cr);
8213
8214 ins_cost(150);
8215 format %{ "AND $dst,$src" %}
8216 opcode(0x21); /* Opcode 21 /r */
8217 ins_encode( OpcP, RegMem( src, dst ) );
8218 ins_pipe( ialu_mem_reg );
8219 %}
8220
8221 // And Memory with Immediate
8222 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8223 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8224 effect(KILL cr);
8225
8226 ins_cost(125);
8227 format %{ "AND $dst,$src" %}
8228 opcode(0x81, 0x4); /* Opcode 81 /4 id */
8229 // ins_encode( MemImm( dst, src) );
8230 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8231 ins_pipe( ialu_mem_imm );
8232 %}
8233
8234 // BMI1 instructions
8235 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8236 match(Set dst (AndI (XorI src1 minus_1) src2));
8237 predicate(UseBMI1Instructions);
8238 effect(KILL cr);
8239
8240 format %{ "ANDNL $dst, $src1, $src2" %}
8241
8242 ins_encode %{
8243 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8244 %}
8245 ins_pipe(ialu_reg);
8246 %}
8247
8248 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8249 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8250 predicate(UseBMI1Instructions);
8251 effect(KILL cr);
8252
8253 ins_cost(125);
8254 format %{ "ANDNL $dst, $src1, $src2" %}
8255
8256 ins_encode %{
8257 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8258 %}
8259 ins_pipe(ialu_reg_mem);
8260 %}
8261
8262 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
8263 match(Set dst (AndI (SubI imm_zero src) src));
8264 predicate(UseBMI1Instructions);
8265 effect(KILL cr);
8266
8267 format %{ "BLSIL $dst, $src" %}
8268
8269 ins_encode %{
8270 __ blsil($dst$$Register, $src$$Register);
8271 %}
8272 ins_pipe(ialu_reg);
8273 %}
8274
8275 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
8276 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8277 predicate(UseBMI1Instructions);
8278 effect(KILL cr);
8279
8280 ins_cost(125);
8281 format %{ "BLSIL $dst, $src" %}
8282
8283 ins_encode %{
8284 __ blsil($dst$$Register, $src$$Address);
8285 %}
8286 ins_pipe(ialu_reg_mem);
8287 %}
8288
8289 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8290 %{
8291 match(Set dst (XorI (AddI src minus_1) src));
8292 predicate(UseBMI1Instructions);
8293 effect(KILL cr);
8294
8295 format %{ "BLSMSKL $dst, $src" %}
8296
8297 ins_encode %{
8298 __ blsmskl($dst$$Register, $src$$Register);
8299 %}
8300
8301 ins_pipe(ialu_reg);
8302 %}
8303
8304 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8305 %{
8306 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8307 predicate(UseBMI1Instructions);
8308 effect(KILL cr);
8309
8310 ins_cost(125);
8311 format %{ "BLSMSKL $dst, $src" %}
8312
8313 ins_encode %{
8314 __ blsmskl($dst$$Register, $src$$Address);
8315 %}
8316
8317 ins_pipe(ialu_reg_mem);
8318 %}
8319
8320 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8321 %{
8322 match(Set dst (AndI (AddI src minus_1) src) );
8323 predicate(UseBMI1Instructions);
8324 effect(KILL cr);
8325
8326 format %{ "BLSRL $dst, $src" %}
8327
8328 ins_encode %{
8329 __ blsrl($dst$$Register, $src$$Register);
8330 %}
8331
8332 ins_pipe(ialu_reg);
8333 %}
8334
8335 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8336 %{
8337 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8338 predicate(UseBMI1Instructions);
8339 effect(KILL cr);
8340
8341 ins_cost(125);
8342 format %{ "BLSRL $dst, $src" %}
8343
8344 ins_encode %{
8345 __ blsrl($dst$$Register, $src$$Address);
8346 %}
8347
8348 ins_pipe(ialu_reg_mem);
8349 %}
8350
8351 // Or Instructions
8352 // Or Register with Register
8353 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8354 match(Set dst (OrI dst src));
8355 effect(KILL cr);
8356
8357 size(2);
8358 format %{ "OR $dst,$src" %}
8359 opcode(0x0B);
8360 ins_encode( OpcP, RegReg( dst, src) );
8361 ins_pipe( ialu_reg_reg );
8362 %}
8363
8364 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8365 match(Set dst (OrI dst (CastP2X src)));
8366 effect(KILL cr);
8367
8368 size(2);
8369 format %{ "OR $dst,$src" %}
8370 opcode(0x0B);
8371 ins_encode( OpcP, RegReg( dst, src) );
8372 ins_pipe( ialu_reg_reg );
8373 %}
8374
8375
8376 // Or Register with Immediate
8377 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8378 match(Set dst (OrI dst src));
8379 effect(KILL cr);
8380
8381 format %{ "OR $dst,$src" %}
8382 opcode(0x81,0x01); /* Opcode 81 /1 id */
8383 // ins_encode( RegImm( dst, src) );
8384 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8385 ins_pipe( ialu_reg );
8386 %}
8387
8388 // Or Register with Memory
8389 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8390 match(Set dst (OrI dst (LoadI src)));
8391 effect(KILL cr);
8392
8393 ins_cost(150);
8394 format %{ "OR $dst,$src" %}
8395 opcode(0x0B);
8396 ins_encode( OpcP, RegMem( dst, src) );
8397 ins_pipe( ialu_reg_mem );
8398 %}
8399
8400 // Or Memory with Register
8401 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8402 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8403 effect(KILL cr);
8404
8405 ins_cost(150);
8406 format %{ "OR $dst,$src" %}
8407 opcode(0x09); /* Opcode 09 /r */
8408 ins_encode( OpcP, RegMem( src, dst ) );
8409 ins_pipe( ialu_mem_reg );
8410 %}
8411
8412 // Or Memory with Immediate
8413 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8414 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8415 effect(KILL cr);
8416
8417 ins_cost(125);
8418 format %{ "OR $dst,$src" %}
8419 opcode(0x81,0x1); /* Opcode 81 /1 id */
8420 // ins_encode( MemImm( dst, src) );
8421 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8422 ins_pipe( ialu_mem_imm );
8423 %}
8424
8425 // ROL/ROR
8426 // ROL expand
8427 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8428 effect(USE_DEF dst, USE shift, KILL cr);
8429
8430 format %{ "ROL $dst, $shift" %}
8431 opcode(0xD1, 0x0); /* Opcode D1 /0 */
8432 ins_encode( OpcP, RegOpc( dst ));
8433 ins_pipe( ialu_reg );
8434 %}
8435
8436 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8437 effect(USE_DEF dst, USE shift, KILL cr);
8438
8439 format %{ "ROL $dst, $shift" %}
8440 opcode(0xC1, 0x0); /*Opcode /C1 /0 */
8441 ins_encode( RegOpcImm(dst, shift) );
8442 ins_pipe(ialu_reg);
8443 %}
8444
8445 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8446 effect(USE_DEF dst, USE shift, KILL cr);
8447
8448 format %{ "ROL $dst, $shift" %}
8449 opcode(0xD3, 0x0); /* Opcode D3 /0 */
8450 ins_encode(OpcP, RegOpc(dst));
8451 ins_pipe( ialu_reg_reg );
8452 %}
8453 // end of ROL expand
8454
8455 // ROL 32bit by one once
8456 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8457 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8458
8459 expand %{
8460 rolI_eReg_imm1(dst, lshift, cr);
8461 %}
8462 %}
8463
8464 // ROL 32bit var by imm8 once
8465 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8466 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8467 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8468
8469 expand %{
8470 rolI_eReg_imm8(dst, lshift, cr);
8471 %}
8472 %}
8473
8474 // ROL 32bit var by var once
8475 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8476 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8477
8478 expand %{
8479 rolI_eReg_CL(dst, shift, cr);
8480 %}
8481 %}
8482
8483 // ROL 32bit var by var once
8484 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8485 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8486
8487 expand %{
8488 rolI_eReg_CL(dst, shift, cr);
8489 %}
8490 %}
8491
8492 // ROR expand
8493 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8494 effect(USE_DEF dst, USE shift, KILL cr);
8495
8496 format %{ "ROR $dst, $shift" %}
8497 opcode(0xD1,0x1); /* Opcode D1 /1 */
8498 ins_encode( OpcP, RegOpc( dst ) );
8499 ins_pipe( ialu_reg );
8500 %}
8501
8502 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8503 effect (USE_DEF dst, USE shift, KILL cr);
8504
8505 format %{ "ROR $dst, $shift" %}
8506 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8507 ins_encode( RegOpcImm(dst, shift) );
8508 ins_pipe( ialu_reg );
8509 %}
8510
8511 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8512 effect(USE_DEF dst, USE shift, KILL cr);
8513
8514 format %{ "ROR $dst, $shift" %}
8515 opcode(0xD3, 0x1); /* Opcode D3 /1 */
8516 ins_encode(OpcP, RegOpc(dst));
8517 ins_pipe( ialu_reg_reg );
8518 %}
8519 // end of ROR expand
8520
8521 // ROR right once
8522 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8523 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8524
8525 expand %{
8526 rorI_eReg_imm1(dst, rshift, cr);
8527 %}
8528 %}
8529
8530 // ROR 32bit by immI8 once
8531 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8532 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8533 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8534
8535 expand %{
8536 rorI_eReg_imm8(dst, rshift, cr);
8537 %}
8538 %}
8539
8540 // ROR 32bit var by var once
8541 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8542 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8543
8544 expand %{
8545 rorI_eReg_CL(dst, shift, cr);
8546 %}
8547 %}
8548
8549 // ROR 32bit var by var once
8550 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8551 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8552
8553 expand %{
8554 rorI_eReg_CL(dst, shift, cr);
8555 %}
8556 %}
8557
8558 // Xor Instructions
8559 // Xor Register with Register
8560 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8561 match(Set dst (XorI dst src));
8562 effect(KILL cr);
8563
8564 size(2);
8565 format %{ "XOR $dst,$src" %}
8566 opcode(0x33);
8567 ins_encode( OpcP, RegReg( dst, src) );
8568 ins_pipe( ialu_reg_reg );
8569 %}
8570
8571 // Xor Register with Immediate -1
8572 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8573 match(Set dst (XorI dst imm));
8574
8575 size(2);
8576 format %{ "NOT $dst" %}
8577 ins_encode %{
8578 __ notl($dst$$Register);
8579 %}
8580 ins_pipe( ialu_reg );
8581 %}
8582
8583 // Xor Register with Immediate
8584 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8585 match(Set dst (XorI dst src));
8586 effect(KILL cr);
8587
8588 format %{ "XOR $dst,$src" %}
8589 opcode(0x81,0x06); /* Opcode 81 /6 id */
8590 // ins_encode( RegImm( dst, src) );
8591 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8592 ins_pipe( ialu_reg );
8593 %}
8594
8595 // Xor Register with Memory
8596 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8597 match(Set dst (XorI dst (LoadI src)));
8598 effect(KILL cr);
8599
8600 ins_cost(150);
8601 format %{ "XOR $dst,$src" %}
8602 opcode(0x33);
8603 ins_encode( OpcP, RegMem(dst, src) );
8604 ins_pipe( ialu_reg_mem );
8605 %}
8606
8607 // Xor Memory with Register
8608 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8609 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8610 effect(KILL cr);
8611
8612 ins_cost(150);
8613 format %{ "XOR $dst,$src" %}
8614 opcode(0x31); /* Opcode 31 /r */
8615 ins_encode( OpcP, RegMem( src, dst ) );
8616 ins_pipe( ialu_mem_reg );
8617 %}
8618
8619 // Xor Memory with Immediate
8620 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8621 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8622 effect(KILL cr);
8623
8624 ins_cost(125);
8625 format %{ "XOR $dst,$src" %}
8626 opcode(0x81,0x6); /* Opcode 81 /6 id */
8627 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8628 ins_pipe( ialu_mem_imm );
8629 %}
8630
8631 //----------Convert Int to Boolean---------------------------------------------
8632
8633 instruct movI_nocopy(rRegI dst, rRegI src) %{
8634 effect( DEF dst, USE src );
8635 format %{ "MOV $dst,$src" %}
8636 ins_encode( enc_Copy( dst, src) );
8637 ins_pipe( ialu_reg_reg );
8638 %}
8639
8640 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8641 effect( USE_DEF dst, USE src, KILL cr );
8642
8643 size(4);
8644 format %{ "NEG $dst\n\t"
8645 "ADC $dst,$src" %}
8646 ins_encode( neg_reg(dst),
8647 OpcRegReg(0x13,dst,src) );
8648 ins_pipe( ialu_reg_reg_long );
8649 %}
8650
8651 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8652 match(Set dst (Conv2B src));
8653
8654 expand %{
8655 movI_nocopy(dst,src);
8656 ci2b(dst,src,cr);
8657 %}
8658 %}
8659
8660 instruct movP_nocopy(rRegI dst, eRegP src) %{
8661 effect( DEF dst, USE src );
8662 format %{ "MOV $dst,$src" %}
8663 ins_encode( enc_Copy( dst, src) );
8664 ins_pipe( ialu_reg_reg );
8665 %}
8666
8667 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8668 effect( USE_DEF dst, USE src, KILL cr );
8669 format %{ "NEG $dst\n\t"
8670 "ADC $dst,$src" %}
8671 ins_encode( neg_reg(dst),
8672 OpcRegReg(0x13,dst,src) );
8673 ins_pipe( ialu_reg_reg_long );
8674 %}
8675
8676 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8677 match(Set dst (Conv2B src));
8678
8679 expand %{
8680 movP_nocopy(dst,src);
8681 cp2b(dst,src,cr);
8682 %}
8683 %}
8684
8685 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8686 match(Set dst (CmpLTMask p q));
8687 effect(KILL cr);
8688 ins_cost(400);
8689
8690 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8691 format %{ "XOR $dst,$dst\n\t"
8692 "CMP $p,$q\n\t"
8693 "SETlt $dst\n\t"
8694 "NEG $dst" %}
8695 ins_encode %{
8696 Register Rp = $p$$Register;
8697 Register Rq = $q$$Register;
8698 Register Rd = $dst$$Register;
8699 Label done;
8700 __ xorl(Rd, Rd);
8701 __ cmpl(Rp, Rq);
8702 __ setb(Assembler::less, Rd);
8703 __ negl(Rd);
8704 %}
8705
8706 ins_pipe(pipe_slow);
8707 %}
8708
8709 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
8710 match(Set dst (CmpLTMask dst zero));
8711 effect(DEF dst, KILL cr);
8712 ins_cost(100);
8713
8714 format %{ "SAR $dst,31\t# cmpLTMask0" %}
8715 ins_encode %{
8716 __ sarl($dst$$Register, 31);
8717 %}
8718 ins_pipe(ialu_reg);
8719 %}
8720
8721 /* better to save a register than avoid a branch */
8722 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8723 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8724 effect(KILL cr);
8725 ins_cost(400);
8726 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t"
8727 "JGE done\n\t"
8728 "ADD $p,$y\n"
8729 "done: " %}
8730 ins_encode %{
8731 Register Rp = $p$$Register;
8732 Register Rq = $q$$Register;
8733 Register Ry = $y$$Register;
8734 Label done;
8735 __ subl(Rp, Rq);
8736 __ jccb(Assembler::greaterEqual, done);
8737 __ addl(Rp, Ry);
8738 __ bind(done);
8739 %}
8740
8741 ins_pipe(pipe_cmplt);
8742 %}
8743
8744 /* better to save a register than avoid a branch */
8745 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8746 match(Set y (AndI (CmpLTMask p q) y));
8747 effect(KILL cr);
8748
8749 ins_cost(300);
8750
8751 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t"
8752 "JLT done\n\t"
8753 "XORL $y, $y\n"
8754 "done: " %}
8755 ins_encode %{
8756 Register Rp = $p$$Register;
8757 Register Rq = $q$$Register;
8758 Register Ry = $y$$Register;
8759 Label done;
8760 __ cmpl(Rp, Rq);
8761 __ jccb(Assembler::less, done);
8762 __ xorl(Ry, Ry);
8763 __ bind(done);
8764 %}
8765
8766 ins_pipe(pipe_cmplt);
8767 %}
8768
8769 /* If I enable this, I encourage spilling in the inner loop of compress.
8770 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8771 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8772 */
8773 //----------Overflow Math Instructions-----------------------------------------
8774
8775 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8776 %{
8777 match(Set cr (OverflowAddI op1 op2));
8778 effect(DEF cr, USE_KILL op1, USE op2);
8779
8780 format %{ "ADD $op1, $op2\t# overflow check int" %}
8781
8782 ins_encode %{
8783 __ addl($op1$$Register, $op2$$Register);
8784 %}
8785 ins_pipe(ialu_reg_reg);
8786 %}
8787
8788 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8789 %{
8790 match(Set cr (OverflowAddI op1 op2));
8791 effect(DEF cr, USE_KILL op1, USE op2);
8792
8793 format %{ "ADD $op1, $op2\t# overflow check int" %}
8794
8795 ins_encode %{
8796 __ addl($op1$$Register, $op2$$constant);
8797 %}
8798 ins_pipe(ialu_reg_reg);
8799 %}
8800
8801 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8802 %{
8803 match(Set cr (OverflowSubI op1 op2));
8804
8805 format %{ "CMP $op1, $op2\t# overflow check int" %}
8806 ins_encode %{
8807 __ cmpl($op1$$Register, $op2$$Register);
8808 %}
8809 ins_pipe(ialu_reg_reg);
8810 %}
8811
8812 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8813 %{
8814 match(Set cr (OverflowSubI op1 op2));
8815
8816 format %{ "CMP $op1, $op2\t# overflow check int" %}
8817 ins_encode %{
8818 __ cmpl($op1$$Register, $op2$$constant);
8819 %}
8820 ins_pipe(ialu_reg_reg);
8821 %}
8822
8823 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
8824 %{
8825 match(Set cr (OverflowSubI zero op2));
8826 effect(DEF cr, USE_KILL op2);
8827
8828 format %{ "NEG $op2\t# overflow check int" %}
8829 ins_encode %{
8830 __ negl($op2$$Register);
8831 %}
8832 ins_pipe(ialu_reg_reg);
8833 %}
8834
8835 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8836 %{
8837 match(Set cr (OverflowMulI op1 op2));
8838 effect(DEF cr, USE_KILL op1, USE op2);
8839
8840 format %{ "IMUL $op1, $op2\t# overflow check int" %}
8841 ins_encode %{
8842 __ imull($op1$$Register, $op2$$Register);
8843 %}
8844 ins_pipe(ialu_reg_reg_alu0);
8845 %}
8846
8847 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8848 %{
8849 match(Set cr (OverflowMulI op1 op2));
8850 effect(DEF cr, TEMP tmp, USE op1, USE op2);
8851
8852 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %}
8853 ins_encode %{
8854 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8855 %}
8856 ins_pipe(ialu_reg_reg_alu0);
8857 %}
8858
8859 // Integer Absolute Instructions
8860 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8861 %{
8862 match(Set dst (AbsI src));
8863 effect(TEMP dst, TEMP tmp, KILL cr);
8864 format %{ "movl $tmp, $src\n\t"
8865 "sarl $tmp, 31\n\t"
8866 "movl $dst, $src\n\t"
8867 "xorl $dst, $tmp\n\t"
8868 "subl $dst, $tmp\n"
8869 %}
8870 ins_encode %{
8871 __ movl($tmp$$Register, $src$$Register);
8872 __ sarl($tmp$$Register, 31);
8873 __ movl($dst$$Register, $src$$Register);
8874 __ xorl($dst$$Register, $tmp$$Register);
8875 __ subl($dst$$Register, $tmp$$Register);
8876 %}
8877
8878 ins_pipe(ialu_reg_reg);
8879 %}
8880
8881 //----------Long Instructions------------------------------------------------
8882 // Add Long Register with Register
8883 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8884 match(Set dst (AddL dst src));
8885 effect(KILL cr);
8886 ins_cost(200);
8887 format %{ "ADD $dst.lo,$src.lo\n\t"
8888 "ADC $dst.hi,$src.hi" %}
8889 opcode(0x03, 0x13);
8890 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8891 ins_pipe( ialu_reg_reg_long );
8892 %}
8893
8894 // Add Long Register with Immediate
8895 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8896 match(Set dst (AddL dst src));
8897 effect(KILL cr);
8898 format %{ "ADD $dst.lo,$src.lo\n\t"
8899 "ADC $dst.hi,$src.hi" %}
8900 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */
8901 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8902 ins_pipe( ialu_reg_long );
8903 %}
8904
8905 // Add Long Register with Memory
8906 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8907 match(Set dst (AddL dst (LoadL mem)));
8908 effect(KILL cr);
8909 ins_cost(125);
8910 format %{ "ADD $dst.lo,$mem\n\t"
8911 "ADC $dst.hi,$mem+4" %}
8912 opcode(0x03, 0x13);
8913 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8914 ins_pipe( ialu_reg_long_mem );
8915 %}
8916
8917 // Subtract Long Register with Register.
8918 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8919 match(Set dst (SubL dst src));
8920 effect(KILL cr);
8921 ins_cost(200);
8922 format %{ "SUB $dst.lo,$src.lo\n\t"
8923 "SBB $dst.hi,$src.hi" %}
8924 opcode(0x2B, 0x1B);
8925 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8926 ins_pipe( ialu_reg_reg_long );
8927 %}
8928
8929 // Subtract Long Register with Immediate
8930 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8931 match(Set dst (SubL dst src));
8932 effect(KILL cr);
8933 format %{ "SUB $dst.lo,$src.lo\n\t"
8934 "SBB $dst.hi,$src.hi" %}
8935 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */
8936 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8937 ins_pipe( ialu_reg_long );
8938 %}
8939
8940 // Subtract Long Register with Memory
8941 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8942 match(Set dst (SubL dst (LoadL mem)));
8943 effect(KILL cr);
8944 ins_cost(125);
8945 format %{ "SUB $dst.lo,$mem\n\t"
8946 "SBB $dst.hi,$mem+4" %}
8947 opcode(0x2B, 0x1B);
8948 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8949 ins_pipe( ialu_reg_long_mem );
8950 %}
8951
8952 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8953 match(Set dst (SubL zero dst));
8954 effect(KILL cr);
8955 ins_cost(300);
8956 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %}
8957 ins_encode( neg_long(dst) );
8958 ins_pipe( ialu_reg_reg_long );
8959 %}
8960
8961 // And Long Register with Register
8962 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8963 match(Set dst (AndL dst src));
8964 effect(KILL cr);
8965 format %{ "AND $dst.lo,$src.lo\n\t"
8966 "AND $dst.hi,$src.hi" %}
8967 opcode(0x23,0x23);
8968 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8969 ins_pipe( ialu_reg_reg_long );
8970 %}
8971
8972 // And Long Register with Immediate
8973 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8974 match(Set dst (AndL dst src));
8975 effect(KILL cr);
8976 format %{ "AND $dst.lo,$src.lo\n\t"
8977 "AND $dst.hi,$src.hi" %}
8978 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */
8979 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8980 ins_pipe( ialu_reg_long );
8981 %}
8982
8983 // And Long Register with Memory
8984 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8985 match(Set dst (AndL dst (LoadL mem)));
8986 effect(KILL cr);
8987 ins_cost(125);
8988 format %{ "AND $dst.lo,$mem\n\t"
8989 "AND $dst.hi,$mem+4" %}
8990 opcode(0x23, 0x23);
8991 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8992 ins_pipe( ialu_reg_long_mem );
8993 %}
8994
8995 // BMI1 instructions
8996 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8997 match(Set dst (AndL (XorL src1 minus_1) src2));
8998 predicate(UseBMI1Instructions);
8999 effect(KILL cr, TEMP dst);
9000
9001 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t"
9002 "ANDNL $dst.hi, $src1.hi, $src2.hi"
9003 %}
9004
9005 ins_encode %{
9006 Register Rdst = $dst$$Register;
9007 Register Rsrc1 = $src1$$Register;
9008 Register Rsrc2 = $src2$$Register;
9009 __ andnl(Rdst, Rsrc1, Rsrc2);
9010 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9011 %}
9012 ins_pipe(ialu_reg_reg_long);
9013 %}
9014
9015 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9016 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9017 predicate(UseBMI1Instructions);
9018 effect(KILL cr, TEMP dst);
9019
9020 ins_cost(125);
9021 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t"
9022 "ANDNL $dst.hi, $src1.hi, $src2+4"
9023 %}
9024
9025 ins_encode %{
9026 Register Rdst = $dst$$Register;
9027 Register Rsrc1 = $src1$$Register;
9028 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9029
9030 __ andnl(Rdst, Rsrc1, $src2$$Address);
9031 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9032 %}
9033 ins_pipe(ialu_reg_mem);
9034 %}
9035
9036 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9037 match(Set dst (AndL (SubL imm_zero src) src));
9038 predicate(UseBMI1Instructions);
9039 effect(KILL cr, TEMP dst);
9040
9041 format %{ "MOVL $dst.hi, 0\n\t"
9042 "BLSIL $dst.lo, $src.lo\n\t"
9043 "JNZ done\n\t"
9044 "BLSIL $dst.hi, $src.hi\n"
9045 "done:"
9046 %}
9047
9048 ins_encode %{
9049 Label done;
9050 Register Rdst = $dst$$Register;
9051 Register Rsrc = $src$$Register;
9052 __ movl(HIGH_FROM_LOW(Rdst), 0);
9053 __ blsil(Rdst, Rsrc);
9054 __ jccb(Assembler::notZero, done);
9055 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9056 __ bind(done);
9057 %}
9058 ins_pipe(ialu_reg);
9059 %}
9060
9061 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9062 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9063 predicate(UseBMI1Instructions);
9064 effect(KILL cr, TEMP dst);
9065
9066 ins_cost(125);
9067 format %{ "MOVL $dst.hi, 0\n\t"
9068 "BLSIL $dst.lo, $src\n\t"
9069 "JNZ done\n\t"
9070 "BLSIL $dst.hi, $src+4\n"
9071 "done:"
9072 %}
9073
9074 ins_encode %{
9075 Label done;
9076 Register Rdst = $dst$$Register;
9077 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9078
9079 __ movl(HIGH_FROM_LOW(Rdst), 0);
9080 __ blsil(Rdst, $src$$Address);
9081 __ jccb(Assembler::notZero, done);
9082 __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9083 __ bind(done);
9084 %}
9085 ins_pipe(ialu_reg_mem);
9086 %}
9087
9088 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9089 %{
9090 match(Set dst (XorL (AddL src minus_1) src));
9091 predicate(UseBMI1Instructions);
9092 effect(KILL cr, TEMP dst);
9093
9094 format %{ "MOVL $dst.hi, 0\n\t"
9095 "BLSMSKL $dst.lo, $src.lo\n\t"
9096 "JNC done\n\t"
9097 "BLSMSKL $dst.hi, $src.hi\n"
9098 "done:"
9099 %}
9100
9101 ins_encode %{
9102 Label done;
9103 Register Rdst = $dst$$Register;
9104 Register Rsrc = $src$$Register;
9105 __ movl(HIGH_FROM_LOW(Rdst), 0);
9106 __ blsmskl(Rdst, Rsrc);
9107 __ jccb(Assembler::carryClear, done);
9108 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9109 __ bind(done);
9110 %}
9111
9112 ins_pipe(ialu_reg);
9113 %}
9114
9115 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9116 %{
9117 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9118 predicate(UseBMI1Instructions);
9119 effect(KILL cr, TEMP dst);
9120
9121 ins_cost(125);
9122 format %{ "MOVL $dst.hi, 0\n\t"
9123 "BLSMSKL $dst.lo, $src\n\t"
9124 "JNC done\n\t"
9125 "BLSMSKL $dst.hi, $src+4\n"
9126 "done:"
9127 %}
9128
9129 ins_encode %{
9130 Label done;
9131 Register Rdst = $dst$$Register;
9132 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9133
9134 __ movl(HIGH_FROM_LOW(Rdst), 0);
9135 __ blsmskl(Rdst, $src$$Address);
9136 __ jccb(Assembler::carryClear, done);
9137 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9138 __ bind(done);
9139 %}
9140
9141 ins_pipe(ialu_reg_mem);
9142 %}
9143
9144 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9145 %{
9146 match(Set dst (AndL (AddL src minus_1) src) );
9147 predicate(UseBMI1Instructions);
9148 effect(KILL cr, TEMP dst);
9149
9150 format %{ "MOVL $dst.hi, $src.hi\n\t"
9151 "BLSRL $dst.lo, $src.lo\n\t"
9152 "JNC done\n\t"
9153 "BLSRL $dst.hi, $src.hi\n"
9154 "done:"
9155 %}
9156
9157 ins_encode %{
9158 Label done;
9159 Register Rdst = $dst$$Register;
9160 Register Rsrc = $src$$Register;
9161 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9162 __ blsrl(Rdst, Rsrc);
9163 __ jccb(Assembler::carryClear, done);
9164 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9165 __ bind(done);
9166 %}
9167
9168 ins_pipe(ialu_reg);
9169 %}
9170
9171 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9172 %{
9173 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9174 predicate(UseBMI1Instructions);
9175 effect(KILL cr, TEMP dst);
9176
9177 ins_cost(125);
9178 format %{ "MOVL $dst.hi, $src+4\n\t"
9179 "BLSRL $dst.lo, $src\n\t"
9180 "JNC done\n\t"
9181 "BLSRL $dst.hi, $src+4\n"
9182 "done:"
9183 %}
9184
9185 ins_encode %{
9186 Label done;
9187 Register Rdst = $dst$$Register;
9188 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9189 __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9190 __ blsrl(Rdst, $src$$Address);
9191 __ jccb(Assembler::carryClear, done);
9192 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9193 __ bind(done);
9194 %}
9195
9196 ins_pipe(ialu_reg_mem);
9197 %}
9198
9199 // Or Long Register with Register
9200 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9201 match(Set dst (OrL dst src));
9202 effect(KILL cr);
9203 format %{ "OR $dst.lo,$src.lo\n\t"
9204 "OR $dst.hi,$src.hi" %}
9205 opcode(0x0B,0x0B);
9206 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9207 ins_pipe( ialu_reg_reg_long );
9208 %}
9209
9210 // Or Long Register with Immediate
9211 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9212 match(Set dst (OrL dst src));
9213 effect(KILL cr);
9214 format %{ "OR $dst.lo,$src.lo\n\t"
9215 "OR $dst.hi,$src.hi" %}
9216 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */
9217 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9218 ins_pipe( ialu_reg_long );
9219 %}
9220
9221 // Or Long Register with Memory
9222 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9223 match(Set dst (OrL dst (LoadL mem)));
9224 effect(KILL cr);
9225 ins_cost(125);
9226 format %{ "OR $dst.lo,$mem\n\t"
9227 "OR $dst.hi,$mem+4" %}
9228 opcode(0x0B,0x0B);
9229 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9230 ins_pipe( ialu_reg_long_mem );
9231 %}
9232
9233 // Xor Long Register with Register
9234 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9235 match(Set dst (XorL dst src));
9236 effect(KILL cr);
9237 format %{ "XOR $dst.lo,$src.lo\n\t"
9238 "XOR $dst.hi,$src.hi" %}
9239 opcode(0x33,0x33);
9240 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9241 ins_pipe( ialu_reg_reg_long );
9242 %}
9243
9244 // Xor Long Register with Immediate -1
9245 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9246 match(Set dst (XorL dst imm));
9247 format %{ "NOT $dst.lo\n\t"
9248 "NOT $dst.hi" %}
9249 ins_encode %{
9250 __ notl($dst$$Register);
9251 __ notl(HIGH_FROM_LOW($dst$$Register));
9252 %}
9253 ins_pipe( ialu_reg_long );
9254 %}
9255
9256 // Xor Long Register with Immediate
9257 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9258 match(Set dst (XorL dst src));
9259 effect(KILL cr);
9260 format %{ "XOR $dst.lo,$src.lo\n\t"
9261 "XOR $dst.hi,$src.hi" %}
9262 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */
9263 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9264 ins_pipe( ialu_reg_long );
9265 %}
9266
9267 // Xor Long Register with Memory
9268 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9269 match(Set dst (XorL dst (LoadL mem)));
9270 effect(KILL cr);
9271 ins_cost(125);
9272 format %{ "XOR $dst.lo,$mem\n\t"
9273 "XOR $dst.hi,$mem+4" %}
9274 opcode(0x33,0x33);
9275 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9276 ins_pipe( ialu_reg_long_mem );
9277 %}
9278
9279 // Shift Left Long by 1
9280 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9281 predicate(UseNewLongLShift);
9282 match(Set dst (LShiftL dst cnt));
9283 effect(KILL cr);
9284 ins_cost(100);
9285 format %{ "ADD $dst.lo,$dst.lo\n\t"
9286 "ADC $dst.hi,$dst.hi" %}
9287 ins_encode %{
9288 __ addl($dst$$Register,$dst$$Register);
9289 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9290 %}
9291 ins_pipe( ialu_reg_long );
9292 %}
9293
9294 // Shift Left Long by 2
9295 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9296 predicate(UseNewLongLShift);
9297 match(Set dst (LShiftL dst cnt));
9298 effect(KILL cr);
9299 ins_cost(100);
9300 format %{ "ADD $dst.lo,$dst.lo\n\t"
9301 "ADC $dst.hi,$dst.hi\n\t"
9302 "ADD $dst.lo,$dst.lo\n\t"
9303 "ADC $dst.hi,$dst.hi" %}
9304 ins_encode %{
9305 __ addl($dst$$Register,$dst$$Register);
9306 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9307 __ addl($dst$$Register,$dst$$Register);
9308 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9309 %}
9310 ins_pipe( ialu_reg_long );
9311 %}
9312
9313 // Shift Left Long by 3
9314 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9315 predicate(UseNewLongLShift);
9316 match(Set dst (LShiftL dst cnt));
9317 effect(KILL cr);
9318 ins_cost(100);
9319 format %{ "ADD $dst.lo,$dst.lo\n\t"
9320 "ADC $dst.hi,$dst.hi\n\t"
9321 "ADD $dst.lo,$dst.lo\n\t"
9322 "ADC $dst.hi,$dst.hi\n\t"
9323 "ADD $dst.lo,$dst.lo\n\t"
9324 "ADC $dst.hi,$dst.hi" %}
9325 ins_encode %{
9326 __ addl($dst$$Register,$dst$$Register);
9327 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9328 __ addl($dst$$Register,$dst$$Register);
9329 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9330 __ addl($dst$$Register,$dst$$Register);
9331 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9332 %}
9333 ins_pipe( ialu_reg_long );
9334 %}
9335
9336 // Shift Left Long by 1-31
9337 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9338 match(Set dst (LShiftL dst cnt));
9339 effect(KILL cr);
9340 ins_cost(200);
9341 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t"
9342 "SHL $dst.lo,$cnt" %}
9343 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */
9344 ins_encode( move_long_small_shift(dst,cnt) );
9345 ins_pipe( ialu_reg_long );
9346 %}
9347
9348 // Shift Left Long by 32-63
9349 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9350 match(Set dst (LShiftL dst cnt));
9351 effect(KILL cr);
9352 ins_cost(300);
9353 format %{ "MOV $dst.hi,$dst.lo\n"
9354 "\tSHL $dst.hi,$cnt-32\n"
9355 "\tXOR $dst.lo,$dst.lo" %}
9356 opcode(0xC1, 0x4); /* C1 /4 ib */
9357 ins_encode( move_long_big_shift_clr(dst,cnt) );
9358 ins_pipe( ialu_reg_long );
9359 %}
9360
9361 // Shift Left Long by variable
9362 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9363 match(Set dst (LShiftL dst shift));
9364 effect(KILL cr);
9365 ins_cost(500+200);
9366 size(17);
9367 format %{ "TEST $shift,32\n\t"
9368 "JEQ,s small\n\t"
9369 "MOV $dst.hi,$dst.lo\n\t"
9370 "XOR $dst.lo,$dst.lo\n"
9371 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t"
9372 "SHL $dst.lo,$shift" %}
9373 ins_encode( shift_left_long( dst, shift ) );
9374 ins_pipe( pipe_slow );
9375 %}
9376
9377 // Shift Right Long by 1-31
9378 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9379 match(Set dst (URShiftL dst cnt));
9380 effect(KILL cr);
9381 ins_cost(200);
9382 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9383 "SHR $dst.hi,$cnt" %}
9384 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */
9385 ins_encode( move_long_small_shift(dst,cnt) );
9386 ins_pipe( ialu_reg_long );
9387 %}
9388
9389 // Shift Right Long by 32-63
9390 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9391 match(Set dst (URShiftL dst cnt));
9392 effect(KILL cr);
9393 ins_cost(300);
9394 format %{ "MOV $dst.lo,$dst.hi\n"
9395 "\tSHR $dst.lo,$cnt-32\n"
9396 "\tXOR $dst.hi,$dst.hi" %}
9397 opcode(0xC1, 0x5); /* C1 /5 ib */
9398 ins_encode( move_long_big_shift_clr(dst,cnt) );
9399 ins_pipe( ialu_reg_long );
9400 %}
9401
9402 // Shift Right Long by variable
9403 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9404 match(Set dst (URShiftL dst shift));
9405 effect(KILL cr);
9406 ins_cost(600);
9407 size(17);
9408 format %{ "TEST $shift,32\n\t"
9409 "JEQ,s small\n\t"
9410 "MOV $dst.lo,$dst.hi\n\t"
9411 "XOR $dst.hi,$dst.hi\n"
9412 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9413 "SHR $dst.hi,$shift" %}
9414 ins_encode( shift_right_long( dst, shift ) );
9415 ins_pipe( pipe_slow );
9416 %}
9417
9418 // Shift Right Long by 1-31
9419 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9420 match(Set dst (RShiftL dst cnt));
9421 effect(KILL cr);
9422 ins_cost(200);
9423 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9424 "SAR $dst.hi,$cnt" %}
9425 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */
9426 ins_encode( move_long_small_shift(dst,cnt) );
9427 ins_pipe( ialu_reg_long );
9428 %}
9429
9430 // Shift Right Long by 32-63
9431 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9432 match(Set dst (RShiftL dst cnt));
9433 effect(KILL cr);
9434 ins_cost(300);
9435 format %{ "MOV $dst.lo,$dst.hi\n"
9436 "\tSAR $dst.lo,$cnt-32\n"
9437 "\tSAR $dst.hi,31" %}
9438 opcode(0xC1, 0x7); /* C1 /7 ib */
9439 ins_encode( move_long_big_shift_sign(dst,cnt) );
9440 ins_pipe( ialu_reg_long );
9441 %}
9442
9443 // Shift Right arithmetic Long by variable
9444 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9445 match(Set dst (RShiftL dst shift));
9446 effect(KILL cr);
9447 ins_cost(600);
9448 size(18);
9449 format %{ "TEST $shift,32\n\t"
9450 "JEQ,s small\n\t"
9451 "MOV $dst.lo,$dst.hi\n\t"
9452 "SAR $dst.hi,31\n"
9453 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9454 "SAR $dst.hi,$shift" %}
9455 ins_encode( shift_right_arith_long( dst, shift ) );
9456 ins_pipe( pipe_slow );
9457 %}
9458
9459
9460 //----------Double Instructions------------------------------------------------
9461 // Double Math
9462
9463 // Compare & branch
9464
9465 // P6 version of float compare, sets condition codes in EFLAGS
9466 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9467 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9468 match(Set cr (CmpD src1 src2));
9469 effect(KILL rax);
9470 ins_cost(150);
9471 format %{ "FLD $src1\n\t"
9472 "FUCOMIP ST,$src2 // P6 instruction\n\t"
9473 "JNP exit\n\t"
9474 "MOV ah,1 // saw a NaN, set CF\n\t"
9475 "SAHF\n"
9476 "exit:\tNOP // avoid branch to branch" %}
9477 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9478 ins_encode( Push_Reg_DPR(src1),
9479 OpcP, RegOpc(src2),
9480 cmpF_P6_fixup );
9481 ins_pipe( pipe_slow );
9482 %}
9483
9484 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9485 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9486 match(Set cr (CmpD src1 src2));
9487 ins_cost(150);
9488 format %{ "FLD $src1\n\t"
9489 "FUCOMIP ST,$src2 // P6 instruction" %}
9490 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9491 ins_encode( Push_Reg_DPR(src1),
9492 OpcP, RegOpc(src2));
9493 ins_pipe( pipe_slow );
9494 %}
9495
9496 // Compare & branch
9497 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9498 predicate(UseSSE<=1);
9499 match(Set cr (CmpD src1 src2));
9500 effect(KILL rax);
9501 ins_cost(200);
9502 format %{ "FLD $src1\n\t"
9503 "FCOMp $src2\n\t"
9504 "FNSTSW AX\n\t"
9505 "TEST AX,0x400\n\t"
9506 "JZ,s flags\n\t"
9507 "MOV AH,1\t# unordered treat as LT\n"
9508 "flags:\tSAHF" %}
9509 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9510 ins_encode( Push_Reg_DPR(src1),
9511 OpcP, RegOpc(src2),
9512 fpu_flags);
9513 ins_pipe( pipe_slow );
9514 %}
9515
9516 // Compare vs zero into -1,0,1
9517 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9518 predicate(UseSSE<=1);
9519 match(Set dst (CmpD3 src1 zero));
9520 effect(KILL cr, KILL rax);
9521 ins_cost(280);
9522 format %{ "FTSTD $dst,$src1" %}
9523 opcode(0xE4, 0xD9);
9524 ins_encode( Push_Reg_DPR(src1),
9525 OpcS, OpcP, PopFPU,
9526 CmpF_Result(dst));
9527 ins_pipe( pipe_slow );
9528 %}
9529
9530 // Compare into -1,0,1
9531 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9532 predicate(UseSSE<=1);
9533 match(Set dst (CmpD3 src1 src2));
9534 effect(KILL cr, KILL rax);
9535 ins_cost(300);
9536 format %{ "FCMPD $dst,$src1,$src2" %}
9537 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9538 ins_encode( Push_Reg_DPR(src1),
9539 OpcP, RegOpc(src2),
9540 CmpF_Result(dst));
9541 ins_pipe( pipe_slow );
9542 %}
9543
9544 // float compare and set condition codes in EFLAGS by XMM regs
9545 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9546 predicate(UseSSE>=2);
9547 match(Set cr (CmpD src1 src2));
9548 ins_cost(145);
9549 format %{ "UCOMISD $src1,$src2\n\t"
9550 "JNP,s exit\n\t"
9551 "PUSHF\t# saw NaN, set CF\n\t"
9552 "AND [rsp], #0xffffff2b\n\t"
9553 "POPF\n"
9554 "exit:" %}
9555 ins_encode %{
9556 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9557 emit_cmpfp_fixup(_masm);
9558 %}
9559 ins_pipe( pipe_slow );
9560 %}
9561
9562 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9563 predicate(UseSSE>=2);
9564 match(Set cr (CmpD src1 src2));
9565 ins_cost(100);
9566 format %{ "UCOMISD $src1,$src2" %}
9567 ins_encode %{
9568 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9569 %}
9570 ins_pipe( pipe_slow );
9571 %}
9572
9573 // float compare and set condition codes in EFLAGS by XMM regs
9574 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9575 predicate(UseSSE>=2);
9576 match(Set cr (CmpD src1 (LoadD src2)));
9577 ins_cost(145);
9578 format %{ "UCOMISD $src1,$src2\n\t"
9579 "JNP,s exit\n\t"
9580 "PUSHF\t# saw NaN, set CF\n\t"
9581 "AND [rsp], #0xffffff2b\n\t"
9582 "POPF\n"
9583 "exit:" %}
9584 ins_encode %{
9585 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9586 emit_cmpfp_fixup(_masm);
9587 %}
9588 ins_pipe( pipe_slow );
9589 %}
9590
9591 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9592 predicate(UseSSE>=2);
9593 match(Set cr (CmpD src1 (LoadD src2)));
9594 ins_cost(100);
9595 format %{ "UCOMISD $src1,$src2" %}
9596 ins_encode %{
9597 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9598 %}
9599 ins_pipe( pipe_slow );
9600 %}
9601
9602 // Compare into -1,0,1 in XMM
9603 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9604 predicate(UseSSE>=2);
9605 match(Set dst (CmpD3 src1 src2));
9606 effect(KILL cr);
9607 ins_cost(255);
9608 format %{ "UCOMISD $src1, $src2\n\t"
9609 "MOV $dst, #-1\n\t"
9610 "JP,s done\n\t"
9611 "JB,s done\n\t"
9612 "SETNE $dst\n\t"
9613 "MOVZB $dst, $dst\n"
9614 "done:" %}
9615 ins_encode %{
9616 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9617 emit_cmpfp3(_masm, $dst$$Register);
9618 %}
9619 ins_pipe( pipe_slow );
9620 %}
9621
9622 // Compare into -1,0,1 in XMM and memory
9623 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9624 predicate(UseSSE>=2);
9625 match(Set dst (CmpD3 src1 (LoadD src2)));
9626 effect(KILL cr);
9627 ins_cost(275);
9628 format %{ "UCOMISD $src1, $src2\n\t"
9629 "MOV $dst, #-1\n\t"
9630 "JP,s done\n\t"
9631 "JB,s done\n\t"
9632 "SETNE $dst\n\t"
9633 "MOVZB $dst, $dst\n"
9634 "done:" %}
9635 ins_encode %{
9636 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9637 emit_cmpfp3(_masm, $dst$$Register);
9638 %}
9639 ins_pipe( pipe_slow );
9640 %}
9641
9642
9643 instruct subDPR_reg(regDPR dst, regDPR src) %{
9644 predicate (UseSSE <=1);
9645 match(Set dst (SubD dst src));
9646
9647 format %{ "FLD $src\n\t"
9648 "DSUBp $dst,ST" %}
9649 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
9650 ins_cost(150);
9651 ins_encode( Push_Reg_DPR(src),
9652 OpcP, RegOpc(dst) );
9653 ins_pipe( fpu_reg_reg );
9654 %}
9655
9656 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9657 predicate (UseSSE <=1);
9658 match(Set dst (RoundDouble (SubD src1 src2)));
9659 ins_cost(250);
9660
9661 format %{ "FLD $src2\n\t"
9662 "DSUB ST,$src1\n\t"
9663 "FSTP_D $dst\t# D-round" %}
9664 opcode(0xD8, 0x5);
9665 ins_encode( Push_Reg_DPR(src2),
9666 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9667 ins_pipe( fpu_mem_reg_reg );
9668 %}
9669
9670
9671 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9672 predicate (UseSSE <=1);
9673 match(Set dst (SubD dst (LoadD src)));
9674 ins_cost(150);
9675
9676 format %{ "FLD $src\n\t"
9677 "DSUBp $dst,ST" %}
9678 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9679 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9680 OpcP, RegOpc(dst) );
9681 ins_pipe( fpu_reg_mem );
9682 %}
9683
9684 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9685 predicate (UseSSE<=1);
9686 match(Set dst (AbsD src));
9687 ins_cost(100);
9688 format %{ "FABS" %}
9689 opcode(0xE1, 0xD9);
9690 ins_encode( OpcS, OpcP );
9691 ins_pipe( fpu_reg_reg );
9692 %}
9693
9694 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9695 predicate(UseSSE<=1);
9696 match(Set dst (NegD src));
9697 ins_cost(100);
9698 format %{ "FCHS" %}
9699 opcode(0xE0, 0xD9);
9700 ins_encode( OpcS, OpcP );
9701 ins_pipe( fpu_reg_reg );
9702 %}
9703
9704 instruct addDPR_reg(regDPR dst, regDPR src) %{
9705 predicate(UseSSE<=1);
9706 match(Set dst (AddD dst src));
9707 format %{ "FLD $src\n\t"
9708 "DADD $dst,ST" %}
9709 size(4);
9710 ins_cost(150);
9711 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9712 ins_encode( Push_Reg_DPR(src),
9713 OpcP, RegOpc(dst) );
9714 ins_pipe( fpu_reg_reg );
9715 %}
9716
9717
9718 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9719 predicate(UseSSE<=1);
9720 match(Set dst (RoundDouble (AddD src1 src2)));
9721 ins_cost(250);
9722
9723 format %{ "FLD $src2\n\t"
9724 "DADD ST,$src1\n\t"
9725 "FSTP_D $dst\t# D-round" %}
9726 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9727 ins_encode( Push_Reg_DPR(src2),
9728 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9729 ins_pipe( fpu_mem_reg_reg );
9730 %}
9731
9732
9733 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9734 predicate(UseSSE<=1);
9735 match(Set dst (AddD dst (LoadD src)));
9736 ins_cost(150);
9737
9738 format %{ "FLD $src\n\t"
9739 "DADDp $dst,ST" %}
9740 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9741 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9742 OpcP, RegOpc(dst) );
9743 ins_pipe( fpu_reg_mem );
9744 %}
9745
9746 // add-to-memory
9747 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9748 predicate(UseSSE<=1);
9749 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9750 ins_cost(150);
9751
9752 format %{ "FLD_D $dst\n\t"
9753 "DADD ST,$src\n\t"
9754 "FST_D $dst" %}
9755 opcode(0xDD, 0x0);
9756 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9757 Opcode(0xD8), RegOpc(src),
9758 set_instruction_start,
9759 Opcode(0xDD), RMopc_Mem(0x03,dst) );
9760 ins_pipe( fpu_reg_mem );
9761 %}
9762
9763 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9764 predicate(UseSSE<=1);
9765 match(Set dst (AddD dst con));
9766 ins_cost(125);
9767 format %{ "FLD1\n\t"
9768 "DADDp $dst,ST" %}
9769 ins_encode %{
9770 __ fld1();
9771 __ faddp($dst$$reg);
9772 %}
9773 ins_pipe(fpu_reg);
9774 %}
9775
9776 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9777 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9778 match(Set dst (AddD dst con));
9779 ins_cost(200);
9780 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9781 "DADDp $dst,ST" %}
9782 ins_encode %{
9783 __ fld_d($constantaddress($con));
9784 __ faddp($dst$$reg);
9785 %}
9786 ins_pipe(fpu_reg_mem);
9787 %}
9788
9789 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9790 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9791 match(Set dst (RoundDouble (AddD src con)));
9792 ins_cost(200);
9793 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9794 "DADD ST,$src\n\t"
9795 "FSTP_D $dst\t# D-round" %}
9796 ins_encode %{
9797 __ fld_d($constantaddress($con));
9798 __ fadd($src$$reg);
9799 __ fstp_d(Address(rsp, $dst$$disp));
9800 %}
9801 ins_pipe(fpu_mem_reg_con);
9802 %}
9803
9804 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9805 predicate(UseSSE<=1);
9806 match(Set dst (MulD dst src));
9807 format %{ "FLD $src\n\t"
9808 "DMULp $dst,ST" %}
9809 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9810 ins_cost(150);
9811 ins_encode( Push_Reg_DPR(src),
9812 OpcP, RegOpc(dst) );
9813 ins_pipe( fpu_reg_reg );
9814 %}
9815
9816 // Strict FP instruction biases argument before multiply then
9817 // biases result to avoid double rounding of subnormals.
9818 //
9819 // scale arg1 by multiplying arg1 by 2^(-15360)
9820 // load arg2
9821 // multiply scaled arg1 by arg2
9822 // rescale product by 2^(15360)
9823 //
9824 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9825 predicate( UseSSE<=1 && Compile::current()->has_method() );
9826 match(Set dst (MulD dst src));
9827 ins_cost(1); // Select this instruction for all FP double multiplies
9828
9829 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9830 "DMULp $dst,ST\n\t"
9831 "FLD $src\n\t"
9832 "DMULp $dst,ST\n\t"
9833 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9834 "DMULp $dst,ST\n\t" %}
9835 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9836 ins_encode( strictfp_bias1(dst),
9837 Push_Reg_DPR(src),
9838 OpcP, RegOpc(dst),
9839 strictfp_bias2(dst) );
9840 ins_pipe( fpu_reg_reg );
9841 %}
9842
9843 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9844 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9845 match(Set dst (MulD dst con));
9846 ins_cost(200);
9847 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9848 "DMULp $dst,ST" %}
9849 ins_encode %{
9850 __ fld_d($constantaddress($con));
9851 __ fmulp($dst$$reg);
9852 %}
9853 ins_pipe(fpu_reg_mem);
9854 %}
9855
9856
9857 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9858 predicate( UseSSE<=1 );
9859 match(Set dst (MulD dst (LoadD src)));
9860 ins_cost(200);
9861 format %{ "FLD_D $src\n\t"
9862 "DMULp $dst,ST" %}
9863 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */
9864 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9865 OpcP, RegOpc(dst) );
9866 ins_pipe( fpu_reg_mem );
9867 %}
9868
9869 //
9870 // Cisc-alternate to reg-reg multiply
9871 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9872 predicate( UseSSE<=1 );
9873 match(Set dst (MulD src (LoadD mem)));
9874 ins_cost(250);
9875 format %{ "FLD_D $mem\n\t"
9876 "DMUL ST,$src\n\t"
9877 "FSTP_D $dst" %}
9878 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
9879 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9880 OpcReg_FPR(src),
9881 Pop_Reg_DPR(dst) );
9882 ins_pipe( fpu_reg_reg_mem );
9883 %}
9884
9885
9886 // MACRO3 -- addDPR a mulDPR
9887 // This instruction is a '2-address' instruction in that the result goes
9888 // back to src2. This eliminates a move from the macro; possibly the
9889 // register allocator will have to add it back (and maybe not).
9890 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9891 predicate( UseSSE<=1 );
9892 match(Set src2 (AddD (MulD src0 src1) src2));
9893 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9894 "DMUL ST,$src1\n\t"
9895 "DADDp $src2,ST" %}
9896 ins_cost(250);
9897 opcode(0xDD); /* LoadD DD /0 */
9898 ins_encode( Push_Reg_FPR(src0),
9899 FMul_ST_reg(src1),
9900 FAddP_reg_ST(src2) );
9901 ins_pipe( fpu_reg_reg_reg );
9902 %}
9903
9904
9905 // MACRO3 -- subDPR a mulDPR
9906 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9907 predicate( UseSSE<=1 );
9908 match(Set src2 (SubD (MulD src0 src1) src2));
9909 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9910 "DMUL ST,$src1\n\t"
9911 "DSUBRp $src2,ST" %}
9912 ins_cost(250);
9913 ins_encode( Push_Reg_FPR(src0),
9914 FMul_ST_reg(src1),
9915 Opcode(0xDE), Opc_plus(0xE0,src2));
9916 ins_pipe( fpu_reg_reg_reg );
9917 %}
9918
9919
9920 instruct divDPR_reg(regDPR dst, regDPR src) %{
9921 predicate( UseSSE<=1 );
9922 match(Set dst (DivD dst src));
9923
9924 format %{ "FLD $src\n\t"
9925 "FDIVp $dst,ST" %}
9926 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9927 ins_cost(150);
9928 ins_encode( Push_Reg_DPR(src),
9929 OpcP, RegOpc(dst) );
9930 ins_pipe( fpu_reg_reg );
9931 %}
9932
9933 // Strict FP instruction biases argument before division then
9934 // biases result, to avoid double rounding of subnormals.
9935 //
9936 // scale dividend by multiplying dividend by 2^(-15360)
9937 // load divisor
9938 // divide scaled dividend by divisor
9939 // rescale quotient by 2^(15360)
9940 //
9941 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9942 predicate (UseSSE<=1);
9943 match(Set dst (DivD dst src));
9944 predicate( UseSSE<=1 && Compile::current()->has_method() );
9945 ins_cost(01);
9946
9947 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9948 "DMULp $dst,ST\n\t"
9949 "FLD $src\n\t"
9950 "FDIVp $dst,ST\n\t"
9951 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9952 "DMULp $dst,ST\n\t" %}
9953 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9954 ins_encode( strictfp_bias1(dst),
9955 Push_Reg_DPR(src),
9956 OpcP, RegOpc(dst),
9957 strictfp_bias2(dst) );
9958 ins_pipe( fpu_reg_reg );
9959 %}
9960
9961 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9962 predicate(UseSSE<=1);
9963 match(Set dst (ModD dst src));
9964 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9965
9966 format %{ "DMOD $dst,$src" %}
9967 ins_cost(250);
9968 ins_encode(Push_Reg_Mod_DPR(dst, src),
9969 emitModDPR(),
9970 Push_Result_Mod_DPR(src),
9971 Pop_Reg_DPR(dst));
9972 ins_pipe( pipe_slow );
9973 %}
9974
9975 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9976 predicate(UseSSE>=2);
9977 match(Set dst (ModD src0 src1));
9978 effect(KILL rax, KILL cr);
9979
9980 format %{ "SUB ESP,8\t # DMOD\n"
9981 "\tMOVSD [ESP+0],$src1\n"
9982 "\tFLD_D [ESP+0]\n"
9983 "\tMOVSD [ESP+0],$src0\n"
9984 "\tFLD_D [ESP+0]\n"
9985 "loop:\tFPREM\n"
9986 "\tFWAIT\n"
9987 "\tFNSTSW AX\n"
9988 "\tSAHF\n"
9989 "\tJP loop\n"
9990 "\tFSTP_D [ESP+0]\n"
9991 "\tMOVSD $dst,[ESP+0]\n"
9992 "\tADD ESP,8\n"
9993 "\tFSTP ST0\t # Restore FPU Stack"
9994 %}
9995 ins_cost(250);
9996 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9997 ins_pipe( pipe_slow );
9998 %}
9999
10000 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10001 predicate (UseSSE<=1);
10002 match(Set dst(AtanD dst src));
10003 format %{ "DATA $dst,$src" %}
10004 opcode(0xD9, 0xF3);
10005 ins_encode( Push_Reg_DPR(src),
10006 OpcP, OpcS, RegOpc(dst) );
10007 ins_pipe( pipe_slow );
10008 %}
10009
10010 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10011 predicate (UseSSE>=2);
10012 match(Set dst(AtanD dst src));
10013 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10014 format %{ "DATA $dst,$src" %}
10015 opcode(0xD9, 0xF3);
10016 ins_encode( Push_SrcD(src),
10017 OpcP, OpcS, Push_ResultD(dst) );
10018 ins_pipe( pipe_slow );
10019 %}
10020
10021 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10022 predicate (UseSSE<=1);
10023 match(Set dst (SqrtD src));
10024 format %{ "DSQRT $dst,$src" %}
10025 opcode(0xFA, 0xD9);
10026 ins_encode( Push_Reg_DPR(src),
10027 OpcS, OpcP, Pop_Reg_DPR(dst) );
10028 ins_pipe( pipe_slow );
10029 %}
10030
10031 //-------------Float Instructions-------------------------------
10032 // Float Math
10033
10034 // Code for float compare:
10035 // fcompp();
10036 // fwait(); fnstsw_ax();
10037 // sahf();
10038 // movl(dst, unordered_result);
10039 // jcc(Assembler::parity, exit);
10040 // movl(dst, less_result);
10041 // jcc(Assembler::below, exit);
10042 // movl(dst, equal_result);
10043 // jcc(Assembler::equal, exit);
10044 // movl(dst, greater_result);
10045 // exit:
10046
10047 // P6 version of float compare, sets condition codes in EFLAGS
10048 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10049 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10050 match(Set cr (CmpF src1 src2));
10051 effect(KILL rax);
10052 ins_cost(150);
10053 format %{ "FLD $src1\n\t"
10054 "FUCOMIP ST,$src2 // P6 instruction\n\t"
10055 "JNP exit\n\t"
10056 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
10057 "SAHF\n"
10058 "exit:\tNOP // avoid branch to branch" %}
10059 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10060 ins_encode( Push_Reg_DPR(src1),
10061 OpcP, RegOpc(src2),
10062 cmpF_P6_fixup );
10063 ins_pipe( pipe_slow );
10064 %}
10065
10066 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10067 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10068 match(Set cr (CmpF src1 src2));
10069 ins_cost(100);
10070 format %{ "FLD $src1\n\t"
10071 "FUCOMIP ST,$src2 // P6 instruction" %}
10072 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10073 ins_encode( Push_Reg_DPR(src1),
10074 OpcP, RegOpc(src2));
10075 ins_pipe( pipe_slow );
10076 %}
10077
10078
10079 // Compare & branch
10080 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10081 predicate(UseSSE == 0);
10082 match(Set cr (CmpF src1 src2));
10083 effect(KILL rax);
10084 ins_cost(200);
10085 format %{ "FLD $src1\n\t"
10086 "FCOMp $src2\n\t"
10087 "FNSTSW AX\n\t"
10088 "TEST AX,0x400\n\t"
10089 "JZ,s flags\n\t"
10090 "MOV AH,1\t# unordered treat as LT\n"
10091 "flags:\tSAHF" %}
10092 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10093 ins_encode( Push_Reg_DPR(src1),
10094 OpcP, RegOpc(src2),
10095 fpu_flags);
10096 ins_pipe( pipe_slow );
10097 %}
10098
10099 // Compare vs zero into -1,0,1
10100 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10101 predicate(UseSSE == 0);
10102 match(Set dst (CmpF3 src1 zero));
10103 effect(KILL cr, KILL rax);
10104 ins_cost(280);
10105 format %{ "FTSTF $dst,$src1" %}
10106 opcode(0xE4, 0xD9);
10107 ins_encode( Push_Reg_DPR(src1),
10108 OpcS, OpcP, PopFPU,
10109 CmpF_Result(dst));
10110 ins_pipe( pipe_slow );
10111 %}
10112
10113 // Compare into -1,0,1
10114 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10115 predicate(UseSSE == 0);
10116 match(Set dst (CmpF3 src1 src2));
10117 effect(KILL cr, KILL rax);
10118 ins_cost(300);
10119 format %{ "FCMPF $dst,$src1,$src2" %}
10120 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10121 ins_encode( Push_Reg_DPR(src1),
10122 OpcP, RegOpc(src2),
10123 CmpF_Result(dst));
10124 ins_pipe( pipe_slow );
10125 %}
10126
10127 // float compare and set condition codes in EFLAGS by XMM regs
10128 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10129 predicate(UseSSE>=1);
10130 match(Set cr (CmpF src1 src2));
10131 ins_cost(145);
10132 format %{ "UCOMISS $src1,$src2\n\t"
10133 "JNP,s exit\n\t"
10134 "PUSHF\t# saw NaN, set CF\n\t"
10135 "AND [rsp], #0xffffff2b\n\t"
10136 "POPF\n"
10137 "exit:" %}
10138 ins_encode %{
10139 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10140 emit_cmpfp_fixup(_masm);
10141 %}
10142 ins_pipe( pipe_slow );
10143 %}
10144
10145 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10146 predicate(UseSSE>=1);
10147 match(Set cr (CmpF src1 src2));
10148 ins_cost(100);
10149 format %{ "UCOMISS $src1,$src2" %}
10150 ins_encode %{
10151 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10152 %}
10153 ins_pipe( pipe_slow );
10154 %}
10155
10156 // float compare and set condition codes in EFLAGS by XMM regs
10157 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10158 predicate(UseSSE>=1);
10159 match(Set cr (CmpF src1 (LoadF src2)));
10160 ins_cost(165);
10161 format %{ "UCOMISS $src1,$src2\n\t"
10162 "JNP,s exit\n\t"
10163 "PUSHF\t# saw NaN, set CF\n\t"
10164 "AND [rsp], #0xffffff2b\n\t"
10165 "POPF\n"
10166 "exit:" %}
10167 ins_encode %{
10168 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10169 emit_cmpfp_fixup(_masm);
10170 %}
10171 ins_pipe( pipe_slow );
10172 %}
10173
10174 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10175 predicate(UseSSE>=1);
10176 match(Set cr (CmpF src1 (LoadF src2)));
10177 ins_cost(100);
10178 format %{ "UCOMISS $src1,$src2" %}
10179 ins_encode %{
10180 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10181 %}
10182 ins_pipe( pipe_slow );
10183 %}
10184
10185 // Compare into -1,0,1 in XMM
10186 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10187 predicate(UseSSE>=1);
10188 match(Set dst (CmpF3 src1 src2));
10189 effect(KILL cr);
10190 ins_cost(255);
10191 format %{ "UCOMISS $src1, $src2\n\t"
10192 "MOV $dst, #-1\n\t"
10193 "JP,s done\n\t"
10194 "JB,s done\n\t"
10195 "SETNE $dst\n\t"
10196 "MOVZB $dst, $dst\n"
10197 "done:" %}
10198 ins_encode %{
10199 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10200 emit_cmpfp3(_masm, $dst$$Register);
10201 %}
10202 ins_pipe( pipe_slow );
10203 %}
10204
10205 // Compare into -1,0,1 in XMM and memory
10206 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10207 predicate(UseSSE>=1);
10208 match(Set dst (CmpF3 src1 (LoadF src2)));
10209 effect(KILL cr);
10210 ins_cost(275);
10211 format %{ "UCOMISS $src1, $src2\n\t"
10212 "MOV $dst, #-1\n\t"
10213 "JP,s done\n\t"
10214 "JB,s done\n\t"
10215 "SETNE $dst\n\t"
10216 "MOVZB $dst, $dst\n"
10217 "done:" %}
10218 ins_encode %{
10219 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10220 emit_cmpfp3(_masm, $dst$$Register);
10221 %}
10222 ins_pipe( pipe_slow );
10223 %}
10224
10225 // Spill to obtain 24-bit precision
10226 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10227 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10228 match(Set dst (SubF src1 src2));
10229
10230 format %{ "FSUB $dst,$src1 - $src2" %}
10231 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10232 ins_encode( Push_Reg_FPR(src1),
10233 OpcReg_FPR(src2),
10234 Pop_Mem_FPR(dst) );
10235 ins_pipe( fpu_mem_reg_reg );
10236 %}
10237 //
10238 // This instruction does not round to 24-bits
10239 instruct subFPR_reg(regFPR dst, regFPR src) %{
10240 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10241 match(Set dst (SubF dst src));
10242
10243 format %{ "FSUB $dst,$src" %}
10244 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10245 ins_encode( Push_Reg_FPR(src),
10246 OpcP, RegOpc(dst) );
10247 ins_pipe( fpu_reg_reg );
10248 %}
10249
10250 // Spill to obtain 24-bit precision
10251 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10252 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10253 match(Set dst (AddF src1 src2));
10254
10255 format %{ "FADD $dst,$src1,$src2" %}
10256 opcode(0xD8, 0x0); /* D8 C0+i */
10257 ins_encode( Push_Reg_FPR(src2),
10258 OpcReg_FPR(src1),
10259 Pop_Mem_FPR(dst) );
10260 ins_pipe( fpu_mem_reg_reg );
10261 %}
10262 //
10263 // This instruction does not round to 24-bits
10264 instruct addFPR_reg(regFPR dst, regFPR src) %{
10265 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10266 match(Set dst (AddF dst src));
10267
10268 format %{ "FLD $src\n\t"
10269 "FADDp $dst,ST" %}
10270 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10271 ins_encode( Push_Reg_FPR(src),
10272 OpcP, RegOpc(dst) );
10273 ins_pipe( fpu_reg_reg );
10274 %}
10275
10276 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10277 predicate(UseSSE==0);
10278 match(Set dst (AbsF src));
10279 ins_cost(100);
10280 format %{ "FABS" %}
10281 opcode(0xE1, 0xD9);
10282 ins_encode( OpcS, OpcP );
10283 ins_pipe( fpu_reg_reg );
10284 %}
10285
10286 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10287 predicate(UseSSE==0);
10288 match(Set dst (NegF src));
10289 ins_cost(100);
10290 format %{ "FCHS" %}
10291 opcode(0xE0, 0xD9);
10292 ins_encode( OpcS, OpcP );
10293 ins_pipe( fpu_reg_reg );
10294 %}
10295
10296 // Cisc-alternate to addFPR_reg
10297 // Spill to obtain 24-bit precision
10298 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10299 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10300 match(Set dst (AddF src1 (LoadF src2)));
10301
10302 format %{ "FLD $src2\n\t"
10303 "FADD ST,$src1\n\t"
10304 "FSTP_S $dst" %}
10305 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10306 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10307 OpcReg_FPR(src1),
10308 Pop_Mem_FPR(dst) );
10309 ins_pipe( fpu_mem_reg_mem );
10310 %}
10311 //
10312 // Cisc-alternate to addFPR_reg
10313 // This instruction does not round to 24-bits
10314 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10315 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10316 match(Set dst (AddF dst (LoadF src)));
10317
10318 format %{ "FADD $dst,$src" %}
10319 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
10320 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10321 OpcP, RegOpc(dst) );
10322 ins_pipe( fpu_reg_mem );
10323 %}
10324
10325 // // Following two instructions for _222_mpegaudio
10326 // Spill to obtain 24-bit precision
10327 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10328 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10329 match(Set dst (AddF src1 src2));
10330
10331 format %{ "FADD $dst,$src1,$src2" %}
10332 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10333 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10334 OpcReg_FPR(src2),
10335 Pop_Mem_FPR(dst) );
10336 ins_pipe( fpu_mem_reg_mem );
10337 %}
10338
10339 // Cisc-spill variant
10340 // Spill to obtain 24-bit precision
10341 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10342 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10343 match(Set dst (AddF src1 (LoadF src2)));
10344
10345 format %{ "FADD $dst,$src1,$src2 cisc" %}
10346 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10347 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10348 set_instruction_start,
10349 OpcP, RMopc_Mem(secondary,src1),
10350 Pop_Mem_FPR(dst) );
10351 ins_pipe( fpu_mem_mem_mem );
10352 %}
10353
10354 // Spill to obtain 24-bit precision
10355 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10356 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10357 match(Set dst (AddF src1 src2));
10358
10359 format %{ "FADD $dst,$src1,$src2" %}
10360 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
10361 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10362 set_instruction_start,
10363 OpcP, RMopc_Mem(secondary,src1),
10364 Pop_Mem_FPR(dst) );
10365 ins_pipe( fpu_mem_mem_mem );
10366 %}
10367
10368
10369 // Spill to obtain 24-bit precision
10370 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10371 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10372 match(Set dst (AddF src con));
10373 format %{ "FLD $src\n\t"
10374 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10375 "FSTP_S $dst" %}
10376 ins_encode %{
10377 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10378 __ fadd_s($constantaddress($con));
10379 __ fstp_s(Address(rsp, $dst$$disp));
10380 %}
10381 ins_pipe(fpu_mem_reg_con);
10382 %}
10383 //
10384 // This instruction does not round to 24-bits
10385 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10386 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10387 match(Set dst (AddF src con));
10388 format %{ "FLD $src\n\t"
10389 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10390 "FSTP $dst" %}
10391 ins_encode %{
10392 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10393 __ fadd_s($constantaddress($con));
10394 __ fstp_d($dst$$reg);
10395 %}
10396 ins_pipe(fpu_reg_reg_con);
10397 %}
10398
10399 // Spill to obtain 24-bit precision
10400 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10401 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10402 match(Set dst (MulF src1 src2));
10403
10404 format %{ "FLD $src1\n\t"
10405 "FMUL $src2\n\t"
10406 "FSTP_S $dst" %}
10407 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10408 ins_encode( Push_Reg_FPR(src1),
10409 OpcReg_FPR(src2),
10410 Pop_Mem_FPR(dst) );
10411 ins_pipe( fpu_mem_reg_reg );
10412 %}
10413 //
10414 // This instruction does not round to 24-bits
10415 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10416 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10417 match(Set dst (MulF src1 src2));
10418
10419 format %{ "FLD $src1\n\t"
10420 "FMUL $src2\n\t"
10421 "FSTP_S $dst" %}
10422 opcode(0xD8, 0x1); /* D8 C8+i */
10423 ins_encode( Push_Reg_FPR(src2),
10424 OpcReg_FPR(src1),
10425 Pop_Reg_FPR(dst) );
10426 ins_pipe( fpu_reg_reg_reg );
10427 %}
10428
10429
10430 // Spill to obtain 24-bit precision
10431 // Cisc-alternate to reg-reg multiply
10432 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10433 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10434 match(Set dst (MulF src1 (LoadF src2)));
10435
10436 format %{ "FLD_S $src2\n\t"
10437 "FMUL $src1\n\t"
10438 "FSTP_S $dst" %}
10439 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
10440 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10441 OpcReg_FPR(src1),
10442 Pop_Mem_FPR(dst) );
10443 ins_pipe( fpu_mem_reg_mem );
10444 %}
10445 //
10446 // This instruction does not round to 24-bits
10447 // Cisc-alternate to reg-reg multiply
10448 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10449 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10450 match(Set dst (MulF src1 (LoadF src2)));
10451
10452 format %{ "FMUL $dst,$src1,$src2" %}
10453 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
10454 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10455 OpcReg_FPR(src1),
10456 Pop_Reg_FPR(dst) );
10457 ins_pipe( fpu_reg_reg_mem );
10458 %}
10459
10460 // Spill to obtain 24-bit precision
10461 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10462 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10463 match(Set dst (MulF src1 src2));
10464
10465 format %{ "FMUL $dst,$src1,$src2" %}
10466 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
10467 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10468 set_instruction_start,
10469 OpcP, RMopc_Mem(secondary,src1),
10470 Pop_Mem_FPR(dst) );
10471 ins_pipe( fpu_mem_mem_mem );
10472 %}
10473
10474 // Spill to obtain 24-bit precision
10475 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10476 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10477 match(Set dst (MulF src con));
10478
10479 format %{ "FLD $src\n\t"
10480 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10481 "FSTP_S $dst" %}
10482 ins_encode %{
10483 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10484 __ fmul_s($constantaddress($con));
10485 __ fstp_s(Address(rsp, $dst$$disp));
10486 %}
10487 ins_pipe(fpu_mem_reg_con);
10488 %}
10489 //
10490 // This instruction does not round to 24-bits
10491 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10492 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10493 match(Set dst (MulF src con));
10494
10495 format %{ "FLD $src\n\t"
10496 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10497 "FSTP $dst" %}
10498 ins_encode %{
10499 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10500 __ fmul_s($constantaddress($con));
10501 __ fstp_d($dst$$reg);
10502 %}
10503 ins_pipe(fpu_reg_reg_con);
10504 %}
10505
10506
10507 //
10508 // MACRO1 -- subsume unshared load into mulFPR
10509 // This instruction does not round to 24-bits
10510 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10511 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10512 match(Set dst (MulF (LoadF mem1) src));
10513
10514 format %{ "FLD $mem1 ===MACRO1===\n\t"
10515 "FMUL ST,$src\n\t"
10516 "FSTP $dst" %}
10517 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
10518 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10519 OpcReg_FPR(src),
10520 Pop_Reg_FPR(dst) );
10521 ins_pipe( fpu_reg_reg_mem );
10522 %}
10523 //
10524 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10525 // This instruction does not round to 24-bits
10526 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10527 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10528 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10529 ins_cost(95);
10530
10531 format %{ "FLD $mem1 ===MACRO2===\n\t"
10532 "FMUL ST,$src1 subsume mulFPR left load\n\t"
10533 "FADD ST,$src2\n\t"
10534 "FSTP $dst" %}
10535 opcode(0xD9); /* LoadF D9 /0 */
10536 ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10537 FMul_ST_reg(src1),
10538 FAdd_ST_reg(src2),
10539 Pop_Reg_FPR(dst) );
10540 ins_pipe( fpu_reg_mem_reg_reg );
10541 %}
10542
10543 // MACRO3 -- addFPR a mulFPR
10544 // This instruction does not round to 24-bits. It is a '2-address'
10545 // instruction in that the result goes back to src2. This eliminates
10546 // a move from the macro; possibly the register allocator will have
10547 // to add it back (and maybe not).
10548 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10549 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10550 match(Set src2 (AddF (MulF src0 src1) src2));
10551
10552 format %{ "FLD $src0 ===MACRO3===\n\t"
10553 "FMUL ST,$src1\n\t"
10554 "FADDP $src2,ST" %}
10555 opcode(0xD9); /* LoadF D9 /0 */
10556 ins_encode( Push_Reg_FPR(src0),
10557 FMul_ST_reg(src1),
10558 FAddP_reg_ST(src2) );
10559 ins_pipe( fpu_reg_reg_reg );
10560 %}
10561
10562 // MACRO4 -- divFPR subFPR
10563 // This instruction does not round to 24-bits
10564 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10565 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10566 match(Set dst (DivF (SubF src2 src1) src3));
10567
10568 format %{ "FLD $src2 ===MACRO4===\n\t"
10569 "FSUB ST,$src1\n\t"
10570 "FDIV ST,$src3\n\t"
10571 "FSTP $dst" %}
10572 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10573 ins_encode( Push_Reg_FPR(src2),
10574 subFPR_divFPR_encode(src1,src3),
10575 Pop_Reg_FPR(dst) );
10576 ins_pipe( fpu_reg_reg_reg_reg );
10577 %}
10578
10579 // Spill to obtain 24-bit precision
10580 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10581 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10582 match(Set dst (DivF src1 src2));
10583
10584 format %{ "FDIV $dst,$src1,$src2" %}
10585 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10586 ins_encode( Push_Reg_FPR(src1),
10587 OpcReg_FPR(src2),
10588 Pop_Mem_FPR(dst) );
10589 ins_pipe( fpu_mem_reg_reg );
10590 %}
10591 //
10592 // This instruction does not round to 24-bits
10593 instruct divFPR_reg(regFPR dst, regFPR src) %{
10594 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10595 match(Set dst (DivF dst src));
10596
10597 format %{ "FDIV $dst,$src" %}
10598 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10599 ins_encode( Push_Reg_FPR(src),
10600 OpcP, RegOpc(dst) );
10601 ins_pipe( fpu_reg_reg );
10602 %}
10603
10604
10605 // Spill to obtain 24-bit precision
10606 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10607 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10608 match(Set dst (ModF src1 src2));
10609 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10610
10611 format %{ "FMOD $dst,$src1,$src2" %}
10612 ins_encode( Push_Reg_Mod_DPR(src1, src2),
10613 emitModDPR(),
10614 Push_Result_Mod_DPR(src2),
10615 Pop_Mem_FPR(dst));
10616 ins_pipe( pipe_slow );
10617 %}
10618 //
10619 // This instruction does not round to 24-bits
10620 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10621 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10622 match(Set dst (ModF dst src));
10623 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10624
10625 format %{ "FMOD $dst,$src" %}
10626 ins_encode(Push_Reg_Mod_DPR(dst, src),
10627 emitModDPR(),
10628 Push_Result_Mod_DPR(src),
10629 Pop_Reg_FPR(dst));
10630 ins_pipe( pipe_slow );
10631 %}
10632
10633 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10634 predicate(UseSSE>=1);
10635 match(Set dst (ModF src0 src1));
10636 effect(KILL rax, KILL cr);
10637 format %{ "SUB ESP,4\t # FMOD\n"
10638 "\tMOVSS [ESP+0],$src1\n"
10639 "\tFLD_S [ESP+0]\n"
10640 "\tMOVSS [ESP+0],$src0\n"
10641 "\tFLD_S [ESP+0]\n"
10642 "loop:\tFPREM\n"
10643 "\tFWAIT\n"
10644 "\tFNSTSW AX\n"
10645 "\tSAHF\n"
10646 "\tJP loop\n"
10647 "\tFSTP_S [ESP+0]\n"
10648 "\tMOVSS $dst,[ESP+0]\n"
10649 "\tADD ESP,4\n"
10650 "\tFSTP ST0\t # Restore FPU Stack"
10651 %}
10652 ins_cost(250);
10653 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10654 ins_pipe( pipe_slow );
10655 %}
10656
10657
10658 //----------Arithmetic Conversion Instructions---------------------------------
10659 // The conversions operations are all Alpha sorted. Please keep it that way!
10660
10661 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10662 predicate(UseSSE==0);
10663 match(Set dst (RoundFloat src));
10664 ins_cost(125);
10665 format %{ "FST_S $dst,$src\t# F-round" %}
10666 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10667 ins_pipe( fpu_mem_reg );
10668 %}
10669
10670 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10671 predicate(UseSSE<=1);
10672 match(Set dst (RoundDouble src));
10673 ins_cost(125);
10674 format %{ "FST_D $dst,$src\t# D-round" %}
10675 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10676 ins_pipe( fpu_mem_reg );
10677 %}
10678
10679 // Force rounding to 24-bit precision and 6-bit exponent
10680 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10681 predicate(UseSSE==0);
10682 match(Set dst (ConvD2F src));
10683 format %{ "FST_S $dst,$src\t# F-round" %}
10684 expand %{
10685 roundFloat_mem_reg(dst,src);
10686 %}
10687 %}
10688
10689 // Force rounding to 24-bit precision and 6-bit exponent
10690 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10691 predicate(UseSSE==1);
10692 match(Set dst (ConvD2F src));
10693 effect( KILL cr );
10694 format %{ "SUB ESP,4\n\t"
10695 "FST_S [ESP],$src\t# F-round\n\t"
10696 "MOVSS $dst,[ESP]\n\t"
10697 "ADD ESP,4" %}
10698 ins_encode %{
10699 __ subptr(rsp, 4);
10700 if ($src$$reg != FPR1L_enc) {
10701 __ fld_s($src$$reg-1);
10702 __ fstp_s(Address(rsp, 0));
10703 } else {
10704 __ fst_s(Address(rsp, 0));
10705 }
10706 __ movflt($dst$$XMMRegister, Address(rsp, 0));
10707 __ addptr(rsp, 4);
10708 %}
10709 ins_pipe( pipe_slow );
10710 %}
10711
10712 // Force rounding double precision to single precision
10713 instruct convD2F_reg(regF dst, regD src) %{
10714 predicate(UseSSE>=2);
10715 match(Set dst (ConvD2F src));
10716 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10717 ins_encode %{
10718 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10719 %}
10720 ins_pipe( pipe_slow );
10721 %}
10722
10723 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10724 predicate(UseSSE==0);
10725 match(Set dst (ConvF2D src));
10726 format %{ "FST_S $dst,$src\t# D-round" %}
10727 ins_encode( Pop_Reg_Reg_DPR(dst, src));
10728 ins_pipe( fpu_reg_reg );
10729 %}
10730
10731 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10732 predicate(UseSSE==1);
10733 match(Set dst (ConvF2D src));
10734 format %{ "FST_D $dst,$src\t# D-round" %}
10735 expand %{
10736 roundDouble_mem_reg(dst,src);
10737 %}
10738 %}
10739
10740 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10741 predicate(UseSSE==1);
10742 match(Set dst (ConvF2D src));
10743 effect( KILL cr );
10744 format %{ "SUB ESP,4\n\t"
10745 "MOVSS [ESP] $src\n\t"
10746 "FLD_S [ESP]\n\t"
10747 "ADD ESP,4\n\t"
10748 "FSTP $dst\t# D-round" %}
10749 ins_encode %{
10750 __ subptr(rsp, 4);
10751 __ movflt(Address(rsp, 0), $src$$XMMRegister);
10752 __ fld_s(Address(rsp, 0));
10753 __ addptr(rsp, 4);
10754 __ fstp_d($dst$$reg);
10755 %}
10756 ins_pipe( pipe_slow );
10757 %}
10758
10759 instruct convF2D_reg(regD dst, regF src) %{
10760 predicate(UseSSE>=2);
10761 match(Set dst (ConvF2D src));
10762 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10763 ins_encode %{
10764 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10765 %}
10766 ins_pipe( pipe_slow );
10767 %}
10768
10769 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10770 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10771 predicate(UseSSE<=1);
10772 match(Set dst (ConvD2I src));
10773 effect( KILL tmp, KILL cr );
10774 format %{ "FLD $src\t# Convert double to int \n\t"
10775 "FLDCW trunc mode\n\t"
10776 "SUB ESP,4\n\t"
10777 "FISTp [ESP + #0]\n\t"
10778 "FLDCW std/24-bit mode\n\t"
10779 "POP EAX\n\t"
10780 "CMP EAX,0x80000000\n\t"
10781 "JNE,s fast\n\t"
10782 "FLD_D $src\n\t"
10783 "CALL d2i_wrapper\n"
10784 "fast:" %}
10785 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10786 ins_pipe( pipe_slow );
10787 %}
10788
10789 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10790 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10791 predicate(UseSSE>=2);
10792 match(Set dst (ConvD2I src));
10793 effect( KILL tmp, KILL cr );
10794 format %{ "CVTTSD2SI $dst, $src\n\t"
10795 "CMP $dst,0x80000000\n\t"
10796 "JNE,s fast\n\t"
10797 "SUB ESP, 8\n\t"
10798 "MOVSD [ESP], $src\n\t"
10799 "FLD_D [ESP]\n\t"
10800 "ADD ESP, 8\n\t"
10801 "CALL d2i_wrapper\n"
10802 "fast:" %}
10803 ins_encode %{
10804 Label fast;
10805 __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10806 __ cmpl($dst$$Register, 0x80000000);
10807 __ jccb(Assembler::notEqual, fast);
10808 __ subptr(rsp, 8);
10809 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10810 __ fld_d(Address(rsp, 0));
10811 __ addptr(rsp, 8);
10812 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10813 __ post_call_nop();
10814 __ bind(fast);
10815 %}
10816 ins_pipe( pipe_slow );
10817 %}
10818
10819 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10820 predicate(UseSSE<=1);
10821 match(Set dst (ConvD2L src));
10822 effect( KILL cr );
10823 format %{ "FLD $src\t# Convert double to long\n\t"
10824 "FLDCW trunc mode\n\t"
10825 "SUB ESP,8\n\t"
10826 "FISTp [ESP + #0]\n\t"
10827 "FLDCW std/24-bit mode\n\t"
10828 "POP EAX\n\t"
10829 "POP EDX\n\t"
10830 "CMP EDX,0x80000000\n\t"
10831 "JNE,s fast\n\t"
10832 "TEST EAX,EAX\n\t"
10833 "JNE,s fast\n\t"
10834 "FLD $src\n\t"
10835 "CALL d2l_wrapper\n"
10836 "fast:" %}
10837 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) );
10838 ins_pipe( pipe_slow );
10839 %}
10840
10841 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10842 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10843 predicate (UseSSE>=2);
10844 match(Set dst (ConvD2L src));
10845 effect( KILL cr );
10846 format %{ "SUB ESP,8\t# Convert double to long\n\t"
10847 "MOVSD [ESP],$src\n\t"
10848 "FLD_D [ESP]\n\t"
10849 "FLDCW trunc mode\n\t"
10850 "FISTp [ESP + #0]\n\t"
10851 "FLDCW std/24-bit mode\n\t"
10852 "POP EAX\n\t"
10853 "POP EDX\n\t"
10854 "CMP EDX,0x80000000\n\t"
10855 "JNE,s fast\n\t"
10856 "TEST EAX,EAX\n\t"
10857 "JNE,s fast\n\t"
10858 "SUB ESP,8\n\t"
10859 "MOVSD [ESP],$src\n\t"
10860 "FLD_D [ESP]\n\t"
10861 "ADD ESP,8\n\t"
10862 "CALL d2l_wrapper\n"
10863 "fast:" %}
10864 ins_encode %{
10865 Label fast;
10866 __ subptr(rsp, 8);
10867 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10868 __ fld_d(Address(rsp, 0));
10869 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10870 __ fistp_d(Address(rsp, 0));
10871 // Restore the rounding mode, mask the exception
10872 if (Compile::current()->in_24_bit_fp_mode()) {
10873 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10874 } else {
10875 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10876 }
10877 // Load the converted long, adjust CPU stack
10878 __ pop(rax);
10879 __ pop(rdx);
10880 __ cmpl(rdx, 0x80000000);
10881 __ jccb(Assembler::notEqual, fast);
10882 __ testl(rax, rax);
10883 __ jccb(Assembler::notEqual, fast);
10884 __ subptr(rsp, 8);
10885 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10886 __ fld_d(Address(rsp, 0));
10887 __ addptr(rsp, 8);
10888 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10889 __ post_call_nop();
10890 __ bind(fast);
10891 %}
10892 ins_pipe( pipe_slow );
10893 %}
10894
10895 // Convert a double to an int. Java semantics require we do complex
10896 // manglations in the corner cases. So we set the rounding mode to
10897 // 'zero', store the darned double down as an int, and reset the
10898 // rounding mode to 'nearest'. The hardware stores a flag value down
10899 // if we would overflow or converted a NAN; we check for this and
10900 // and go the slow path if needed.
10901 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10902 predicate(UseSSE==0);
10903 match(Set dst (ConvF2I src));
10904 effect( KILL tmp, KILL cr );
10905 format %{ "FLD $src\t# Convert float to int \n\t"
10906 "FLDCW trunc mode\n\t"
10907 "SUB ESP,4\n\t"
10908 "FISTp [ESP + #0]\n\t"
10909 "FLDCW std/24-bit mode\n\t"
10910 "POP EAX\n\t"
10911 "CMP EAX,0x80000000\n\t"
10912 "JNE,s fast\n\t"
10913 "FLD $src\n\t"
10914 "CALL d2i_wrapper\n"
10915 "fast:" %}
10916 // DPR2I_encoding works for FPR2I
10917 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10918 ins_pipe( pipe_slow );
10919 %}
10920
10921 // Convert a float in xmm to an int reg.
10922 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10923 predicate(UseSSE>=1);
10924 match(Set dst (ConvF2I src));
10925 effect( KILL tmp, KILL cr );
10926 format %{ "CVTTSS2SI $dst, $src\n\t"
10927 "CMP $dst,0x80000000\n\t"
10928 "JNE,s fast\n\t"
10929 "SUB ESP, 4\n\t"
10930 "MOVSS [ESP], $src\n\t"
10931 "FLD [ESP]\n\t"
10932 "ADD ESP, 4\n\t"
10933 "CALL d2i_wrapper\n"
10934 "fast:" %}
10935 ins_encode %{
10936 Label fast;
10937 __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10938 __ cmpl($dst$$Register, 0x80000000);
10939 __ jccb(Assembler::notEqual, fast);
10940 __ subptr(rsp, 4);
10941 __ movflt(Address(rsp, 0), $src$$XMMRegister);
10942 __ fld_s(Address(rsp, 0));
10943 __ addptr(rsp, 4);
10944 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10945 __ post_call_nop();
10946 __ bind(fast);
10947 %}
10948 ins_pipe( pipe_slow );
10949 %}
10950
10951 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10952 predicate(UseSSE==0);
10953 match(Set dst (ConvF2L src));
10954 effect( KILL cr );
10955 format %{ "FLD $src\t# Convert float to long\n\t"
10956 "FLDCW trunc mode\n\t"
10957 "SUB ESP,8\n\t"
10958 "FISTp [ESP + #0]\n\t"
10959 "FLDCW std/24-bit mode\n\t"
10960 "POP EAX\n\t"
10961 "POP EDX\n\t"
10962 "CMP EDX,0x80000000\n\t"
10963 "JNE,s fast\n\t"
10964 "TEST EAX,EAX\n\t"
10965 "JNE,s fast\n\t"
10966 "FLD $src\n\t"
10967 "CALL d2l_wrapper\n"
10968 "fast:" %}
10969 // DPR2L_encoding works for FPR2L
10970 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10971 ins_pipe( pipe_slow );
10972 %}
10973
10974 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10975 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10976 predicate (UseSSE>=1);
10977 match(Set dst (ConvF2L src));
10978 effect( KILL cr );
10979 format %{ "SUB ESP,8\t# Convert float to long\n\t"
10980 "MOVSS [ESP],$src\n\t"
10981 "FLD_S [ESP]\n\t"
10982 "FLDCW trunc mode\n\t"
10983 "FISTp [ESP + #0]\n\t"
10984 "FLDCW std/24-bit mode\n\t"
10985 "POP EAX\n\t"
10986 "POP EDX\n\t"
10987 "CMP EDX,0x80000000\n\t"
10988 "JNE,s fast\n\t"
10989 "TEST EAX,EAX\n\t"
10990 "JNE,s fast\n\t"
10991 "SUB ESP,4\t# Convert float to long\n\t"
10992 "MOVSS [ESP],$src\n\t"
10993 "FLD_S [ESP]\n\t"
10994 "ADD ESP,4\n\t"
10995 "CALL d2l_wrapper\n"
10996 "fast:" %}
10997 ins_encode %{
10998 Label fast;
10999 __ subptr(rsp, 8);
11000 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11001 __ fld_s(Address(rsp, 0));
11002 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11003 __ fistp_d(Address(rsp, 0));
11004 // Restore the rounding mode, mask the exception
11005 if (Compile::current()->in_24_bit_fp_mode()) {
11006 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11007 } else {
11008 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11009 }
11010 // Load the converted long, adjust CPU stack
11011 __ pop(rax);
11012 __ pop(rdx);
11013 __ cmpl(rdx, 0x80000000);
11014 __ jccb(Assembler::notEqual, fast);
11015 __ testl(rax, rax);
11016 __ jccb(Assembler::notEqual, fast);
11017 __ subptr(rsp, 4);
11018 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11019 __ fld_s(Address(rsp, 0));
11020 __ addptr(rsp, 4);
11021 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11022 __ post_call_nop();
11023 __ bind(fast);
11024 %}
11025 ins_pipe( pipe_slow );
11026 %}
11027
11028 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11029 predicate( UseSSE<=1 );
11030 match(Set dst (ConvI2D src));
11031 format %{ "FILD $src\n\t"
11032 "FSTP $dst" %}
11033 opcode(0xDB, 0x0); /* DB /0 */
11034 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11035 ins_pipe( fpu_reg_mem );
11036 %}
11037
11038 instruct convI2D_reg(regD dst, rRegI src) %{
11039 predicate( UseSSE>=2 && !UseXmmI2D );
11040 match(Set dst (ConvI2D src));
11041 format %{ "CVTSI2SD $dst,$src" %}
11042 ins_encode %{
11043 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11044 %}
11045 ins_pipe( pipe_slow );
11046 %}
11047
11048 instruct convI2D_mem(regD dst, memory mem) %{
11049 predicate( UseSSE>=2 );
11050 match(Set dst (ConvI2D (LoadI mem)));
11051 format %{ "CVTSI2SD $dst,$mem" %}
11052 ins_encode %{
11053 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11054 %}
11055 ins_pipe( pipe_slow );
11056 %}
11057
11058 instruct convXI2D_reg(regD dst, rRegI src)
11059 %{
11060 predicate( UseSSE>=2 && UseXmmI2D );
11061 match(Set dst (ConvI2D src));
11062
11063 format %{ "MOVD $dst,$src\n\t"
11064 "CVTDQ2PD $dst,$dst\t# i2d" %}
11065 ins_encode %{
11066 __ movdl($dst$$XMMRegister, $src$$Register);
11067 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11068 %}
11069 ins_pipe(pipe_slow); // XXX
11070 %}
11071
11072 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11073 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11074 match(Set dst (ConvI2D (LoadI mem)));
11075 format %{ "FILD $mem\n\t"
11076 "FSTP $dst" %}
11077 opcode(0xDB); /* DB /0 */
11078 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11079 Pop_Reg_DPR(dst));
11080 ins_pipe( fpu_reg_mem );
11081 %}
11082
11083 // Convert a byte to a float; no rounding step needed.
11084 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11085 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11086 match(Set dst (ConvI2F src));
11087 format %{ "FILD $src\n\t"
11088 "FSTP $dst" %}
11089
11090 opcode(0xDB, 0x0); /* DB /0 */
11091 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11092 ins_pipe( fpu_reg_mem );
11093 %}
11094
11095 // In 24-bit mode, force exponent rounding by storing back out
11096 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11097 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11098 match(Set dst (ConvI2F src));
11099 ins_cost(200);
11100 format %{ "FILD $src\n\t"
11101 "FSTP_S $dst" %}
11102 opcode(0xDB, 0x0); /* DB /0 */
11103 ins_encode( Push_Mem_I(src),
11104 Pop_Mem_FPR(dst));
11105 ins_pipe( fpu_mem_mem );
11106 %}
11107
11108 // In 24-bit mode, force exponent rounding by storing back out
11109 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11110 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11111 match(Set dst (ConvI2F (LoadI mem)));
11112 ins_cost(200);
11113 format %{ "FILD $mem\n\t"
11114 "FSTP_S $dst" %}
11115 opcode(0xDB); /* DB /0 */
11116 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11117 Pop_Mem_FPR(dst));
11118 ins_pipe( fpu_mem_mem );
11119 %}
11120
11121 // This instruction does not round to 24-bits
11122 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11123 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11124 match(Set dst (ConvI2F src));
11125 format %{ "FILD $src\n\t"
11126 "FSTP $dst" %}
11127 opcode(0xDB, 0x0); /* DB /0 */
11128 ins_encode( Push_Mem_I(src),
11129 Pop_Reg_FPR(dst));
11130 ins_pipe( fpu_reg_mem );
11131 %}
11132
11133 // This instruction does not round to 24-bits
11134 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11135 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11136 match(Set dst (ConvI2F (LoadI mem)));
11137 format %{ "FILD $mem\n\t"
11138 "FSTP $dst" %}
11139 opcode(0xDB); /* DB /0 */
11140 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11141 Pop_Reg_FPR(dst));
11142 ins_pipe( fpu_reg_mem );
11143 %}
11144
11145 // Convert an int to a float in xmm; no rounding step needed.
11146 instruct convI2F_reg(regF dst, rRegI src) %{
11147 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11148 match(Set dst (ConvI2F src));
11149 format %{ "CVTSI2SS $dst, $src" %}
11150 ins_encode %{
11151 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11152 %}
11153 ins_pipe( pipe_slow );
11154 %}
11155
11156 instruct convXI2F_reg(regF dst, rRegI src)
11157 %{
11158 predicate( UseSSE>=2 && UseXmmI2F );
11159 match(Set dst (ConvI2F src));
11160
11161 format %{ "MOVD $dst,$src\n\t"
11162 "CVTDQ2PS $dst,$dst\t# i2f" %}
11163 ins_encode %{
11164 __ movdl($dst$$XMMRegister, $src$$Register);
11165 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11166 %}
11167 ins_pipe(pipe_slow); // XXX
11168 %}
11169
11170 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11171 match(Set dst (ConvI2L src));
11172 effect(KILL cr);
11173 ins_cost(375);
11174 format %{ "MOV $dst.lo,$src\n\t"
11175 "MOV $dst.hi,$src\n\t"
11176 "SAR $dst.hi,31" %}
11177 ins_encode(convert_int_long(dst,src));
11178 ins_pipe( ialu_reg_reg_long );
11179 %}
11180
11181 // Zero-extend convert int to long
11182 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11183 match(Set dst (AndL (ConvI2L src) mask) );
11184 effect( KILL flags );
11185 ins_cost(250);
11186 format %{ "MOV $dst.lo,$src\n\t"
11187 "XOR $dst.hi,$dst.hi" %}
11188 opcode(0x33); // XOR
11189 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11190 ins_pipe( ialu_reg_reg_long );
11191 %}
11192
11193 // Zero-extend long
11194 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11195 match(Set dst (AndL src mask) );
11196 effect( KILL flags );
11197 ins_cost(250);
11198 format %{ "MOV $dst.lo,$src.lo\n\t"
11199 "XOR $dst.hi,$dst.hi\n\t" %}
11200 opcode(0x33); // XOR
11201 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11202 ins_pipe( ialu_reg_reg_long );
11203 %}
11204
11205 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11206 predicate (UseSSE<=1);
11207 match(Set dst (ConvL2D src));
11208 effect( KILL cr );
11209 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11210 "PUSH $src.lo\n\t"
11211 "FILD ST,[ESP + #0]\n\t"
11212 "ADD ESP,8\n\t"
11213 "FSTP_D $dst\t# D-round" %}
11214 opcode(0xDF, 0x5); /* DF /5 */
11215 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11216 ins_pipe( pipe_slow );
11217 %}
11218
11219 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11220 predicate (UseSSE>=2);
11221 match(Set dst (ConvL2D src));
11222 effect( KILL cr );
11223 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11224 "PUSH $src.lo\n\t"
11225 "FILD_D [ESP]\n\t"
11226 "FSTP_D [ESP]\n\t"
11227 "MOVSD $dst,[ESP]\n\t"
11228 "ADD ESP,8" %}
11229 opcode(0xDF, 0x5); /* DF /5 */
11230 ins_encode(convert_long_double2(src), Push_ResultD(dst));
11231 ins_pipe( pipe_slow );
11232 %}
11233
11234 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11235 predicate (UseSSE>=1);
11236 match(Set dst (ConvL2F src));
11237 effect( KILL cr );
11238 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11239 "PUSH $src.lo\n\t"
11240 "FILD_D [ESP]\n\t"
11241 "FSTP_S [ESP]\n\t"
11242 "MOVSS $dst,[ESP]\n\t"
11243 "ADD ESP,8" %}
11244 opcode(0xDF, 0x5); /* DF /5 */
11245 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11246 ins_pipe( pipe_slow );
11247 %}
11248
11249 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11250 match(Set dst (ConvL2F src));
11251 effect( KILL cr );
11252 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11253 "PUSH $src.lo\n\t"
11254 "FILD ST,[ESP + #0]\n\t"
11255 "ADD ESP,8\n\t"
11256 "FSTP_S $dst\t# F-round" %}
11257 opcode(0xDF, 0x5); /* DF /5 */
11258 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11259 ins_pipe( pipe_slow );
11260 %}
11261
11262 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11263 match(Set dst (ConvL2I src));
11264 effect( DEF dst, USE src );
11265 format %{ "MOV $dst,$src.lo" %}
11266 ins_encode(enc_CopyL_Lo(dst,src));
11267 ins_pipe( ialu_reg_reg );
11268 %}
11269
11270 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11271 match(Set dst (MoveF2I src));
11272 effect( DEF dst, USE src );
11273 ins_cost(100);
11274 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
11275 ins_encode %{
11276 __ movl($dst$$Register, Address(rsp, $src$$disp));
11277 %}
11278 ins_pipe( ialu_reg_mem );
11279 %}
11280
11281 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11282 predicate(UseSSE==0);
11283 match(Set dst (MoveF2I src));
11284 effect( DEF dst, USE src );
11285
11286 ins_cost(125);
11287 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
11288 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11289 ins_pipe( fpu_mem_reg );
11290 %}
11291
11292 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11293 predicate(UseSSE>=1);
11294 match(Set dst (MoveF2I src));
11295 effect( DEF dst, USE src );
11296
11297 ins_cost(95);
11298 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
11299 ins_encode %{
11300 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11301 %}
11302 ins_pipe( pipe_slow );
11303 %}
11304
11305 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11306 predicate(UseSSE>=2);
11307 match(Set dst (MoveF2I src));
11308 effect( DEF dst, USE src );
11309 ins_cost(85);
11310 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
11311 ins_encode %{
11312 __ movdl($dst$$Register, $src$$XMMRegister);
11313 %}
11314 ins_pipe( pipe_slow );
11315 %}
11316
11317 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11318 match(Set dst (MoveI2F src));
11319 effect( DEF dst, USE src );
11320
11321 ins_cost(100);
11322 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
11323 ins_encode %{
11324 __ movl(Address(rsp, $dst$$disp), $src$$Register);
11325 %}
11326 ins_pipe( ialu_mem_reg );
11327 %}
11328
11329
11330 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11331 predicate(UseSSE==0);
11332 match(Set dst (MoveI2F src));
11333 effect(DEF dst, USE src);
11334
11335 ins_cost(125);
11336 format %{ "FLD_S $src\n\t"
11337 "FSTP $dst\t# MoveI2F_stack_reg" %}
11338 opcode(0xD9); /* D9 /0, FLD m32real */
11339 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11340 Pop_Reg_FPR(dst) );
11341 ins_pipe( fpu_reg_mem );
11342 %}
11343
11344 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11345 predicate(UseSSE>=1);
11346 match(Set dst (MoveI2F src));
11347 effect( DEF dst, USE src );
11348
11349 ins_cost(95);
11350 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
11351 ins_encode %{
11352 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11353 %}
11354 ins_pipe( pipe_slow );
11355 %}
11356
11357 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11358 predicate(UseSSE>=2);
11359 match(Set dst (MoveI2F src));
11360 effect( DEF dst, USE src );
11361
11362 ins_cost(85);
11363 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
11364 ins_encode %{
11365 __ movdl($dst$$XMMRegister, $src$$Register);
11366 %}
11367 ins_pipe( pipe_slow );
11368 %}
11369
11370 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11371 match(Set dst (MoveD2L src));
11372 effect(DEF dst, USE src);
11373
11374 ins_cost(250);
11375 format %{ "MOV $dst.lo,$src\n\t"
11376 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11377 opcode(0x8B, 0x8B);
11378 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11379 ins_pipe( ialu_mem_long_reg );
11380 %}
11381
11382 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11383 predicate(UseSSE<=1);
11384 match(Set dst (MoveD2L src));
11385 effect(DEF dst, USE src);
11386
11387 ins_cost(125);
11388 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
11389 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11390 ins_pipe( fpu_mem_reg );
11391 %}
11392
11393 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11394 predicate(UseSSE>=2);
11395 match(Set dst (MoveD2L src));
11396 effect(DEF dst, USE src);
11397 ins_cost(95);
11398 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
11399 ins_encode %{
11400 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11401 %}
11402 ins_pipe( pipe_slow );
11403 %}
11404
11405 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11406 predicate(UseSSE>=2);
11407 match(Set dst (MoveD2L src));
11408 effect(DEF dst, USE src, TEMP tmp);
11409 ins_cost(85);
11410 format %{ "MOVD $dst.lo,$src\n\t"
11411 "PSHUFLW $tmp,$src,0x4E\n\t"
11412 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11413 ins_encode %{
11414 __ movdl($dst$$Register, $src$$XMMRegister);
11415 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11416 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11417 %}
11418 ins_pipe( pipe_slow );
11419 %}
11420
11421 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11422 match(Set dst (MoveL2D src));
11423 effect(DEF dst, USE src);
11424
11425 ins_cost(200);
11426 format %{ "MOV $dst,$src.lo\n\t"
11427 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11428 opcode(0x89, 0x89);
11429 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11430 ins_pipe( ialu_mem_long_reg );
11431 %}
11432
11433
11434 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11435 predicate(UseSSE<=1);
11436 match(Set dst (MoveL2D src));
11437 effect(DEF dst, USE src);
11438 ins_cost(125);
11439
11440 format %{ "FLD_D $src\n\t"
11441 "FSTP $dst\t# MoveL2D_stack_reg" %}
11442 opcode(0xDD); /* DD /0, FLD m64real */
11443 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11444 Pop_Reg_DPR(dst) );
11445 ins_pipe( fpu_reg_mem );
11446 %}
11447
11448
11449 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11450 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11451 match(Set dst (MoveL2D src));
11452 effect(DEF dst, USE src);
11453
11454 ins_cost(95);
11455 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11456 ins_encode %{
11457 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11458 %}
11459 ins_pipe( pipe_slow );
11460 %}
11461
11462 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11463 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11464 match(Set dst (MoveL2D src));
11465 effect(DEF dst, USE src);
11466
11467 ins_cost(95);
11468 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11469 ins_encode %{
11470 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11471 %}
11472 ins_pipe( pipe_slow );
11473 %}
11474
11475 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11476 predicate(UseSSE>=2);
11477 match(Set dst (MoveL2D src));
11478 effect(TEMP dst, USE src, TEMP tmp);
11479 ins_cost(85);
11480 format %{ "MOVD $dst,$src.lo\n\t"
11481 "MOVD $tmp,$src.hi\n\t"
11482 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11483 ins_encode %{
11484 __ movdl($dst$$XMMRegister, $src$$Register);
11485 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11486 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11487 %}
11488 ins_pipe( pipe_slow );
11489 %}
11490
11491 //----------------------------- CompressBits/ExpandBits ------------------------
11492
11493 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11494 predicate(n->bottom_type()->isa_long());
11495 match(Set dst (CompressBits src mask));
11496 effect(TEMP rtmp, TEMP xtmp, KILL cr);
11497 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11498 ins_encode %{
11499 Label exit, partail_result;
11500 // Parallely extract both upper and lower 32 bits of source into destination register pair.
11501 // Merge the results of upper and lower destination registers such that upper destination
11502 // results are contiguously laid out after the lower destination result.
11503 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11504 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11505 __ popcntl($rtmp$$Register, $mask$$Register);
11506 // Skip merging if bit count of lower mask register is equal to 32 (register size).
11507 __ cmpl($rtmp$$Register, 32);
11508 __ jccb(Assembler::equal, exit);
11509 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11510 __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11511 // Shift left the contents of upper destination register by true bit count of lower mask register
11512 // and merge with lower destination register.
11513 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11514 __ orl($dst$$Register, $rtmp$$Register);
11515 __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11516 // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11517 // since contents of upper destination have already been copied to lower destination
11518 // register.
11519 __ cmpl($rtmp$$Register, 0);
11520 __ jccb(Assembler::greater, partail_result);
11521 __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11522 __ jmp(exit);
11523 __ bind(partail_result);
11524 // Perform right shift over upper destination register to move out bits already copied
11525 // to lower destination register.
11526 __ subl($rtmp$$Register, 32);
11527 __ negl($rtmp$$Register);
11528 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11529 __ bind(exit);
11530 %}
11531 ins_pipe( pipe_slow );
11532 %}
11533
11534 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11535 predicate(n->bottom_type()->isa_long());
11536 match(Set dst (ExpandBits src mask));
11537 effect(TEMP rtmp, TEMP xtmp, KILL cr);
11538 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11539 ins_encode %{
11540 // Extraction operation sequentially reads the bits from source register starting from LSB
11541 // and lays them out into destination register at bit locations corresponding to true bits
11542 // in mask register. Thus number of source bits read are equal to combined true bit count
11543 // of mask register pair.
11544 Label exit, mask_clipping;
11545 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11546 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11547 __ popcntl($rtmp$$Register, $mask$$Register);
11548 // If true bit count of lower mask register is 32 then none of bit of lower source register
11549 // will feed to upper destination register.
11550 __ cmpl($rtmp$$Register, 32);
11551 __ jccb(Assembler::equal, exit);
11552 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11553 __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11554 // Shift right the contents of lower source register to remove already consumed bits.
11555 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11556 // Extract the bits from lower source register starting from LSB under the influence
11557 // of upper mask register.
11558 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11559 __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11560 __ subl($rtmp$$Register, 32);
11561 __ negl($rtmp$$Register);
11562 __ movdl($xtmp$$XMMRegister, $mask$$Register);
11563 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11564 // Clear the set bits in upper mask register which have been used to extract the contents
11565 // from lower source register.
11566 __ bind(mask_clipping);
11567 __ blsrl($mask$$Register, $mask$$Register);
11568 __ decrementl($rtmp$$Register, 1);
11569 __ jccb(Assembler::greater, mask_clipping);
11570 // Starting from LSB extract the bits from upper source register under the influence of
11571 // remaining set bits in upper mask register.
11572 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11573 // Merge the partial results extracted from lower and upper source register bits.
11574 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11575 __ movdl($mask$$Register, $xtmp$$XMMRegister);
11576 __ bind(exit);
11577 %}
11578 ins_pipe( pipe_slow );
11579 %}
11580
11581 // =======================================================================
11582 // fast clearing of an array
11583 // Small ClearArray non-AVX512.
11584 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11585 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11586 match(Set dummy (ClearArray cnt base));
11587 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11588
11589 format %{ $$template
11590 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11591 $$emit$$"CMP InitArrayShortSize,rcx\n\t"
11592 $$emit$$"JG LARGE\n\t"
11593 $$emit$$"SHL ECX, 1\n\t"
11594 $$emit$$"DEC ECX\n\t"
11595 $$emit$$"JS DONE\t# Zero length\n\t"
11596 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
11597 $$emit$$"DEC ECX\n\t"
11598 $$emit$$"JGE LOOP\n\t"
11599 $$emit$$"JMP DONE\n\t"
11600 $$emit$$"# LARGE:\n\t"
11601 if (UseFastStosb) {
11602 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11603 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11604 } else if (UseXMMForObjInit) {
11605 $$emit$$"MOV RDI,RAX\n\t"
11606 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11607 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11608 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11609 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11610 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11611 $$emit$$"ADD 0x40,RAX\n\t"
11612 $$emit$$"# L_zero_64_bytes:\n\t"
11613 $$emit$$"SUB 0x8,RCX\n\t"
11614 $$emit$$"JGE L_loop\n\t"
11615 $$emit$$"ADD 0x4,RCX\n\t"
11616 $$emit$$"JL L_tail\n\t"
11617 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11618 $$emit$$"ADD 0x20,RAX\n\t"
11619 $$emit$$"SUB 0x4,RCX\n\t"
11620 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11621 $$emit$$"ADD 0x4,RCX\n\t"
11622 $$emit$$"JLE L_end\n\t"
11623 $$emit$$"DEC RCX\n\t"
11624 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11625 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11626 $$emit$$"ADD 0x8,RAX\n\t"
11627 $$emit$$"DEC RCX\n\t"
11628 $$emit$$"JGE L_sloop\n\t"
11629 $$emit$$"# L_end:\n\t"
11630 } else {
11631 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11632 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11633 }
11634 $$emit$$"# DONE"
11635 %}
11636 ins_encode %{
11637 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11638 $tmp$$XMMRegister, false, knoreg);
11639 %}
11640 ins_pipe( pipe_slow );
11641 %}
11642
11643 // Small ClearArray AVX512 non-constant length.
11644 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11645 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11646 match(Set dummy (ClearArray cnt base));
11647 ins_cost(125);
11648 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11649
11650 format %{ $$template
11651 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11652 $$emit$$"CMP InitArrayShortSize,rcx\n\t"
11653 $$emit$$"JG LARGE\n\t"
11654 $$emit$$"SHL ECX, 1\n\t"
11655 $$emit$$"DEC ECX\n\t"
11656 $$emit$$"JS DONE\t# Zero length\n\t"
11657 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
11658 $$emit$$"DEC ECX\n\t"
11659 $$emit$$"JGE LOOP\n\t"
11660 $$emit$$"JMP DONE\n\t"
11661 $$emit$$"# LARGE:\n\t"
11662 if (UseFastStosb) {
11663 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11664 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11665 } else if (UseXMMForObjInit) {
11666 $$emit$$"MOV RDI,RAX\n\t"
11667 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11668 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11669 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11670 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11671 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11672 $$emit$$"ADD 0x40,RAX\n\t"
11673 $$emit$$"# L_zero_64_bytes:\n\t"
11674 $$emit$$"SUB 0x8,RCX\n\t"
11675 $$emit$$"JGE L_loop\n\t"
11676 $$emit$$"ADD 0x4,RCX\n\t"
11677 $$emit$$"JL L_tail\n\t"
11678 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11679 $$emit$$"ADD 0x20,RAX\n\t"
11680 $$emit$$"SUB 0x4,RCX\n\t"
11681 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11682 $$emit$$"ADD 0x4,RCX\n\t"
11683 $$emit$$"JLE L_end\n\t"
11684 $$emit$$"DEC RCX\n\t"
11685 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11686 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11687 $$emit$$"ADD 0x8,RAX\n\t"
11688 $$emit$$"DEC RCX\n\t"
11689 $$emit$$"JGE L_sloop\n\t"
11690 $$emit$$"# L_end:\n\t"
11691 } else {
11692 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11693 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11694 }
11695 $$emit$$"# DONE"
11696 %}
11697 ins_encode %{
11698 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11699 $tmp$$XMMRegister, false, $ktmp$$KRegister);
11700 %}
11701 ins_pipe( pipe_slow );
11702 %}
11703
11704 // Large ClearArray non-AVX512.
11705 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11706 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11707 match(Set dummy (ClearArray cnt base));
11708 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11709 format %{ $$template
11710 if (UseFastStosb) {
11711 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11712 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11713 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11714 } else if (UseXMMForObjInit) {
11715 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
11716 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11717 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11718 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11719 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11720 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11721 $$emit$$"ADD 0x40,RAX\n\t"
11722 $$emit$$"# L_zero_64_bytes:\n\t"
11723 $$emit$$"SUB 0x8,RCX\n\t"
11724 $$emit$$"JGE L_loop\n\t"
11725 $$emit$$"ADD 0x4,RCX\n\t"
11726 $$emit$$"JL L_tail\n\t"
11727 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11728 $$emit$$"ADD 0x20,RAX\n\t"
11729 $$emit$$"SUB 0x4,RCX\n\t"
11730 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11731 $$emit$$"ADD 0x4,RCX\n\t"
11732 $$emit$$"JLE L_end\n\t"
11733 $$emit$$"DEC RCX\n\t"
11734 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11735 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11736 $$emit$$"ADD 0x8,RAX\n\t"
11737 $$emit$$"DEC RCX\n\t"
11738 $$emit$$"JGE L_sloop\n\t"
11739 $$emit$$"# L_end:\n\t"
11740 } else {
11741 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11742 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11743 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11744 }
11745 $$emit$$"# DONE"
11746 %}
11747 ins_encode %{
11748 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11749 $tmp$$XMMRegister, true, knoreg);
11750 %}
11751 ins_pipe( pipe_slow );
11752 %}
11753
11754 // Large ClearArray AVX512.
11755 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11756 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11757 match(Set dummy (ClearArray cnt base));
11758 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11759 format %{ $$template
11760 if (UseFastStosb) {
11761 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11762 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11763 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11764 } else if (UseXMMForObjInit) {
11765 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
11766 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11767 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11768 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11769 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11770 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11771 $$emit$$"ADD 0x40,RAX\n\t"
11772 $$emit$$"# L_zero_64_bytes:\n\t"
11773 $$emit$$"SUB 0x8,RCX\n\t"
11774 $$emit$$"JGE L_loop\n\t"
11775 $$emit$$"ADD 0x4,RCX\n\t"
11776 $$emit$$"JL L_tail\n\t"
11777 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11778 $$emit$$"ADD 0x20,RAX\n\t"
11779 $$emit$$"SUB 0x4,RCX\n\t"
11780 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11781 $$emit$$"ADD 0x4,RCX\n\t"
11782 $$emit$$"JLE L_end\n\t"
11783 $$emit$$"DEC RCX\n\t"
11784 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11785 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11786 $$emit$$"ADD 0x8,RAX\n\t"
11787 $$emit$$"DEC RCX\n\t"
11788 $$emit$$"JGE L_sloop\n\t"
11789 $$emit$$"# L_end:\n\t"
11790 } else {
11791 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11792 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11793 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11794 }
11795 $$emit$$"# DONE"
11796 %}
11797 ins_encode %{
11798 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11799 $tmp$$XMMRegister, true, $ktmp$$KRegister);
11800 %}
11801 ins_pipe( pipe_slow );
11802 %}
11803
11804 // Small ClearArray AVX512 constant length.
11805 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11806 %{
11807 predicate(!((ClearArrayNode*)n)->is_large() &&
11808 ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11809 match(Set dummy (ClearArray cnt base));
11810 ins_cost(100);
11811 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11812 format %{ "clear_mem_imm $base , $cnt \n\t" %}
11813 ins_encode %{
11814 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11815 %}
11816 ins_pipe(pipe_slow);
11817 %}
11818
11819 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11820 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11821 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11822 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11823 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11824
11825 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11826 ins_encode %{
11827 __ string_compare($str1$$Register, $str2$$Register,
11828 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11829 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11830 %}
11831 ins_pipe( pipe_slow );
11832 %}
11833
11834 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11835 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11836 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11837 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11838 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11839
11840 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11841 ins_encode %{
11842 __ string_compare($str1$$Register, $str2$$Register,
11843 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11844 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11845 %}
11846 ins_pipe( pipe_slow );
11847 %}
11848
11849 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11850 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11851 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11852 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11853 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11854
11855 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11856 ins_encode %{
11857 __ string_compare($str1$$Register, $str2$$Register,
11858 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11859 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11860 %}
11861 ins_pipe( pipe_slow );
11862 %}
11863
11864 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11865 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11866 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11867 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11868 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11869
11870 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11871 ins_encode %{
11872 __ string_compare($str1$$Register, $str2$$Register,
11873 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11874 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11875 %}
11876 ins_pipe( pipe_slow );
11877 %}
11878
11879 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11880 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11881 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11882 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11883 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11884
11885 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11886 ins_encode %{
11887 __ string_compare($str1$$Register, $str2$$Register,
11888 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11889 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11890 %}
11891 ins_pipe( pipe_slow );
11892 %}
11893
11894 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11895 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11896 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11897 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11898 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11899
11900 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11901 ins_encode %{
11902 __ string_compare($str1$$Register, $str2$$Register,
11903 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11904 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11905 %}
11906 ins_pipe( pipe_slow );
11907 %}
11908
11909 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11910 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11911 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11912 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11913 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11914
11915 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11916 ins_encode %{
11917 __ string_compare($str2$$Register, $str1$$Register,
11918 $cnt2$$Register, $cnt1$$Register, $result$$Register,
11919 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11920 %}
11921 ins_pipe( pipe_slow );
11922 %}
11923
11924 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11925 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11926 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11927 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11928 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11929
11930 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11931 ins_encode %{
11932 __ string_compare($str2$$Register, $str1$$Register,
11933 $cnt2$$Register, $cnt1$$Register, $result$$Register,
11934 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11935 %}
11936 ins_pipe( pipe_slow );
11937 %}
11938
11939 // fast string equals
11940 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11941 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11942 predicate(!VM_Version::supports_avx512vlbw());
11943 match(Set result (StrEquals (Binary str1 str2) cnt));
11944 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11945
11946 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
11947 ins_encode %{
11948 __ arrays_equals(false, $str1$$Register, $str2$$Register,
11949 $cnt$$Register, $result$$Register, $tmp3$$Register,
11950 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11951 %}
11952
11953 ins_pipe( pipe_slow );
11954 %}
11955
11956 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11957 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11958 predicate(VM_Version::supports_avx512vlbw());
11959 match(Set result (StrEquals (Binary str1 str2) cnt));
11960 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11961
11962 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
11963 ins_encode %{
11964 __ arrays_equals(false, $str1$$Register, $str2$$Register,
11965 $cnt$$Register, $result$$Register, $tmp3$$Register,
11966 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11967 %}
11968
11969 ins_pipe( pipe_slow );
11970 %}
11971
11972
11973 // fast search of substring with known size.
11974 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11975 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11976 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11977 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11978 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11979
11980 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11981 ins_encode %{
11982 int icnt2 = (int)$int_cnt2$$constant;
11983 if (icnt2 >= 16) {
11984 // IndexOf for constant substrings with size >= 16 elements
11985 // which don't need to be loaded through stack.
11986 __ string_indexofC8($str1$$Register, $str2$$Register,
11987 $cnt1$$Register, $cnt2$$Register,
11988 icnt2, $result$$Register,
11989 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11990 } else {
11991 // Small strings are loaded through stack if they cross page boundary.
11992 __ string_indexof($str1$$Register, $str2$$Register,
11993 $cnt1$$Register, $cnt2$$Register,
11994 icnt2, $result$$Register,
11995 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11996 }
11997 %}
11998 ins_pipe( pipe_slow );
11999 %}
12000
12001 // fast search of substring with known size.
12002 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12003 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12004 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12005 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12006 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12007
12008 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12009 ins_encode %{
12010 int icnt2 = (int)$int_cnt2$$constant;
12011 if (icnt2 >= 8) {
12012 // IndexOf for constant substrings with size >= 8 elements
12013 // which don't need to be loaded through stack.
12014 __ string_indexofC8($str1$$Register, $str2$$Register,
12015 $cnt1$$Register, $cnt2$$Register,
12016 icnt2, $result$$Register,
12017 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12018 } else {
12019 // Small strings are loaded through stack if they cross page boundary.
12020 __ string_indexof($str1$$Register, $str2$$Register,
12021 $cnt1$$Register, $cnt2$$Register,
12022 icnt2, $result$$Register,
12023 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12024 }
12025 %}
12026 ins_pipe( pipe_slow );
12027 %}
12028
12029 // fast search of substring with known size.
12030 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12031 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12032 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12033 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12034 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12035
12036 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12037 ins_encode %{
12038 int icnt2 = (int)$int_cnt2$$constant;
12039 if (icnt2 >= 8) {
12040 // IndexOf for constant substrings with size >= 8 elements
12041 // which don't need to be loaded through stack.
12042 __ string_indexofC8($str1$$Register, $str2$$Register,
12043 $cnt1$$Register, $cnt2$$Register,
12044 icnt2, $result$$Register,
12045 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12046 } else {
12047 // Small strings are loaded through stack if they cross page boundary.
12048 __ string_indexof($str1$$Register, $str2$$Register,
12049 $cnt1$$Register, $cnt2$$Register,
12050 icnt2, $result$$Register,
12051 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12052 }
12053 %}
12054 ins_pipe( pipe_slow );
12055 %}
12056
12057 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12058 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12059 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12060 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12061 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12062
12063 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
12064 ins_encode %{
12065 __ string_indexof($str1$$Register, $str2$$Register,
12066 $cnt1$$Register, $cnt2$$Register,
12067 (-1), $result$$Register,
12068 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12069 %}
12070 ins_pipe( pipe_slow );
12071 %}
12072
12073 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12074 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12075 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12076 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12077 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12078
12079 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
12080 ins_encode %{
12081 __ string_indexof($str1$$Register, $str2$$Register,
12082 $cnt1$$Register, $cnt2$$Register,
12083 (-1), $result$$Register,
12084 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12085 %}
12086 ins_pipe( pipe_slow );
12087 %}
12088
12089 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12090 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12091 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12092 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12093 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12094
12095 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
12096 ins_encode %{
12097 __ string_indexof($str1$$Register, $str2$$Register,
12098 $cnt1$$Register, $cnt2$$Register,
12099 (-1), $result$$Register,
12100 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12101 %}
12102 ins_pipe( pipe_slow );
12103 %}
12104
12105 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12106 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12107 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12108 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12109 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12110 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
12111 ins_encode %{
12112 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12113 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12114 %}
12115 ins_pipe( pipe_slow );
12116 %}
12117
12118 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12119 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12120 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12121 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12122 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12123 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
12124 ins_encode %{
12125 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12126 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12127 %}
12128 ins_pipe( pipe_slow );
12129 %}
12130
12131
12132 // fast array equals
12133 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12134 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12135 %{
12136 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12137 match(Set result (AryEq ary1 ary2));
12138 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12139 //ins_cost(300);
12140
12141 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12142 ins_encode %{
12143 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12144 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12145 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12146 %}
12147 ins_pipe( pipe_slow );
12148 %}
12149
12150 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12151 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12152 %{
12153 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12154 match(Set result (AryEq ary1 ary2));
12155 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12156 //ins_cost(300);
12157
12158 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12159 ins_encode %{
12160 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12161 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12162 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12163 %}
12164 ins_pipe( pipe_slow );
12165 %}
12166
12167 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12168 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12169 %{
12170 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12171 match(Set result (AryEq ary1 ary2));
12172 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12173 //ins_cost(300);
12174
12175 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12176 ins_encode %{
12177 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12178 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12179 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12180 %}
12181 ins_pipe( pipe_slow );
12182 %}
12183
12184 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12185 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12186 %{
12187 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12188 match(Set result (AryEq ary1 ary2));
12189 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12190 //ins_cost(300);
12191
12192 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12193 ins_encode %{
12194 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12195 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12196 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12197 %}
12198 ins_pipe( pipe_slow );
12199 %}
12200
12201 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12202 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12203 %{
12204 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12205 match(Set result (CountPositives ary1 len));
12206 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12207
12208 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12209 ins_encode %{
12210 __ count_positives($ary1$$Register, $len$$Register,
12211 $result$$Register, $tmp3$$Register,
12212 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12213 %}
12214 ins_pipe( pipe_slow );
12215 %}
12216
12217 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12218 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12219 %{
12220 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12221 match(Set result (CountPositives ary1 len));
12222 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12223
12224 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12225 ins_encode %{
12226 __ count_positives($ary1$$Register, $len$$Register,
12227 $result$$Register, $tmp3$$Register,
12228 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12229 %}
12230 ins_pipe( pipe_slow );
12231 %}
12232
12233
12234 // fast char[] to byte[] compression
12235 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12236 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12237 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12238 match(Set result (StrCompressedCopy src (Binary dst len)));
12239 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12240
12241 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
12242 ins_encode %{
12243 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12244 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12245 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12246 knoreg, knoreg);
12247 %}
12248 ins_pipe( pipe_slow );
12249 %}
12250
12251 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12252 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12253 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12254 match(Set result (StrCompressedCopy src (Binary dst len)));
12255 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12256
12257 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
12258 ins_encode %{
12259 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12260 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12261 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12262 $ktmp1$$KRegister, $ktmp2$$KRegister);
12263 %}
12264 ins_pipe( pipe_slow );
12265 %}
12266
12267 // fast byte[] to char[] inflation
12268 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12269 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12270 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12271 match(Set dummy (StrInflatedCopy src (Binary dst len)));
12272 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12273
12274 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
12275 ins_encode %{
12276 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12277 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12278 %}
12279 ins_pipe( pipe_slow );
12280 %}
12281
12282 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12283 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12284 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12285 match(Set dummy (StrInflatedCopy src (Binary dst len)));
12286 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12287
12288 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
12289 ins_encode %{
12290 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12291 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12292 %}
12293 ins_pipe( pipe_slow );
12294 %}
12295
12296 // encode char[] to byte[] in ISO_8859_1
12297 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12298 regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12299 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12300 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12301 match(Set result (EncodeISOArray src (Binary dst len)));
12302 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12303
12304 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12305 ins_encode %{
12306 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12307 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12308 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12309 %}
12310 ins_pipe( pipe_slow );
12311 %}
12312
12313 // encode char[] to byte[] in ASCII
12314 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12315 regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12316 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12317 predicate(((EncodeISOArrayNode*)n)->is_ascii());
12318 match(Set result (EncodeISOArray src (Binary dst len)));
12319 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12320
12321 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12322 ins_encode %{
12323 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12324 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12325 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12326 %}
12327 ins_pipe( pipe_slow );
12328 %}
12329
12330 //----------Control Flow Instructions------------------------------------------
12331 // Signed compare Instructions
12332 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12333 match(Set cr (CmpI op1 op2));
12334 effect( DEF cr, USE op1, USE op2 );
12335 format %{ "CMP $op1,$op2" %}
12336 opcode(0x3B); /* Opcode 3B /r */
12337 ins_encode( OpcP, RegReg( op1, op2) );
12338 ins_pipe( ialu_cr_reg_reg );
12339 %}
12340
12341 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12342 match(Set cr (CmpI op1 op2));
12343 effect( DEF cr, USE op1 );
12344 format %{ "CMP $op1,$op2" %}
12345 opcode(0x81,0x07); /* Opcode 81 /7 */
12346 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */
12347 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12348 ins_pipe( ialu_cr_reg_imm );
12349 %}
12350
12351 // Cisc-spilled version of cmpI_eReg
12352 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12353 match(Set cr (CmpI op1 (LoadI op2)));
12354
12355 format %{ "CMP $op1,$op2" %}
12356 ins_cost(500);
12357 opcode(0x3B); /* Opcode 3B /r */
12358 ins_encode( OpcP, RegMem( op1, op2) );
12359 ins_pipe( ialu_cr_reg_mem );
12360 %}
12361
12362 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12363 match(Set cr (CmpI src zero));
12364 effect( DEF cr, USE src );
12365
12366 format %{ "TEST $src,$src" %}
12367 opcode(0x85);
12368 ins_encode( OpcP, RegReg( src, src ) );
12369 ins_pipe( ialu_cr_reg_imm );
12370 %}
12371
12372 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12373 match(Set cr (CmpI (AndI src con) zero));
12374
12375 format %{ "TEST $src,$con" %}
12376 opcode(0xF7,0x00);
12377 ins_encode( OpcP, RegOpc(src), Con32(con) );
12378 ins_pipe( ialu_cr_reg_imm );
12379 %}
12380
12381 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12382 match(Set cr (CmpI (AndI src mem) zero));
12383
12384 format %{ "TEST $src,$mem" %}
12385 opcode(0x85);
12386 ins_encode( OpcP, RegMem( src, mem ) );
12387 ins_pipe( ialu_cr_reg_mem );
12388 %}
12389
12390 // Unsigned compare Instructions; really, same as signed except they
12391 // produce an eFlagsRegU instead of eFlagsReg.
12392 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12393 match(Set cr (CmpU op1 op2));
12394
12395 format %{ "CMPu $op1,$op2" %}
12396 opcode(0x3B); /* Opcode 3B /r */
12397 ins_encode( OpcP, RegReg( op1, op2) );
12398 ins_pipe( ialu_cr_reg_reg );
12399 %}
12400
12401 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12402 match(Set cr (CmpU op1 op2));
12403
12404 format %{ "CMPu $op1,$op2" %}
12405 opcode(0x81,0x07); /* Opcode 81 /7 */
12406 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12407 ins_pipe( ialu_cr_reg_imm );
12408 %}
12409
12410 // // Cisc-spilled version of cmpU_eReg
12411 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12412 match(Set cr (CmpU op1 (LoadI op2)));
12413
12414 format %{ "CMPu $op1,$op2" %}
12415 ins_cost(500);
12416 opcode(0x3B); /* Opcode 3B /r */
12417 ins_encode( OpcP, RegMem( op1, op2) );
12418 ins_pipe( ialu_cr_reg_mem );
12419 %}
12420
12421 // // Cisc-spilled version of cmpU_eReg
12422 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12423 // match(Set cr (CmpU (LoadI op1) op2));
12424 //
12425 // format %{ "CMPu $op1,$op2" %}
12426 // ins_cost(500);
12427 // opcode(0x39); /* Opcode 39 /r */
12428 // ins_encode( OpcP, RegMem( op1, op2) );
12429 //%}
12430
12431 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12432 match(Set cr (CmpU src zero));
12433
12434 format %{ "TESTu $src,$src" %}
12435 opcode(0x85);
12436 ins_encode( OpcP, RegReg( src, src ) );
12437 ins_pipe( ialu_cr_reg_imm );
12438 %}
12439
12440 // Unsigned pointer compare Instructions
12441 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12442 match(Set cr (CmpP op1 op2));
12443
12444 format %{ "CMPu $op1,$op2" %}
12445 opcode(0x3B); /* Opcode 3B /r */
12446 ins_encode( OpcP, RegReg( op1, op2) );
12447 ins_pipe( ialu_cr_reg_reg );
12448 %}
12449
12450 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12451 match(Set cr (CmpP op1 op2));
12452
12453 format %{ "CMPu $op1,$op2" %}
12454 opcode(0x81,0x07); /* Opcode 81 /7 */
12455 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12456 ins_pipe( ialu_cr_reg_imm );
12457 %}
12458
12459 // // Cisc-spilled version of cmpP_eReg
12460 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12461 match(Set cr (CmpP op1 (LoadP op2)));
12462
12463 format %{ "CMPu $op1,$op2" %}
12464 ins_cost(500);
12465 opcode(0x3B); /* Opcode 3B /r */
12466 ins_encode( OpcP, RegMem( op1, op2) );
12467 ins_pipe( ialu_cr_reg_mem );
12468 %}
12469
12470 // // Cisc-spilled version of cmpP_eReg
12471 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12472 // match(Set cr (CmpP (LoadP op1) op2));
12473 //
12474 // format %{ "CMPu $op1,$op2" %}
12475 // ins_cost(500);
12476 // opcode(0x39); /* Opcode 39 /r */
12477 // ins_encode( OpcP, RegMem( op1, op2) );
12478 //%}
12479
12480 // Compare raw pointer (used in out-of-heap check).
12481 // Only works because non-oop pointers must be raw pointers
12482 // and raw pointers have no anti-dependencies.
12483 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12484 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12485 match(Set cr (CmpP op1 (LoadP op2)));
12486
12487 format %{ "CMPu $op1,$op2" %}
12488 opcode(0x3B); /* Opcode 3B /r */
12489 ins_encode( OpcP, RegMem( op1, op2) );
12490 ins_pipe( ialu_cr_reg_mem );
12491 %}
12492
12493 //
12494 // This will generate a signed flags result. This should be ok
12495 // since any compare to a zero should be eq/neq.
12496 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12497 match(Set cr (CmpP src zero));
12498
12499 format %{ "TEST $src,$src" %}
12500 opcode(0x85);
12501 ins_encode( OpcP, RegReg( src, src ) );
12502 ins_pipe( ialu_cr_reg_imm );
12503 %}
12504
12505 // Cisc-spilled version of testP_reg
12506 // This will generate a signed flags result. This should be ok
12507 // since any compare to a zero should be eq/neq.
12508 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12509 match(Set cr (CmpP (LoadP op) zero));
12510
12511 format %{ "TEST $op,0xFFFFFFFF" %}
12512 ins_cost(500);
12513 opcode(0xF7); /* Opcode F7 /0 */
12514 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12515 ins_pipe( ialu_cr_reg_imm );
12516 %}
12517
12518 // Yanked all unsigned pointer compare operations.
12519 // Pointer compares are done with CmpP which is already unsigned.
12520
12521 //----------Max and Min--------------------------------------------------------
12522 // Min Instructions
12523 ////
12524 // *** Min and Max using the conditional move are slower than the
12525 // *** branch version on a Pentium III.
12526 // // Conditional move for min
12527 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12528 // effect( USE_DEF op2, USE op1, USE cr );
12529 // format %{ "CMOVlt $op2,$op1\t! min" %}
12530 // opcode(0x4C,0x0F);
12531 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12532 // ins_pipe( pipe_cmov_reg );
12533 //%}
12534 //
12535 //// Min Register with Register (P6 version)
12536 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12537 // predicate(VM_Version::supports_cmov() );
12538 // match(Set op2 (MinI op1 op2));
12539 // ins_cost(200);
12540 // expand %{
12541 // eFlagsReg cr;
12542 // compI_eReg(cr,op1,op2);
12543 // cmovI_reg_lt(op2,op1,cr);
12544 // %}
12545 //%}
12546
12547 // Min Register with Register (generic version)
12548 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12549 match(Set dst (MinI dst src));
12550 effect(KILL flags);
12551 ins_cost(300);
12552
12553 format %{ "MIN $dst,$src" %}
12554 opcode(0xCC);
12555 ins_encode( min_enc(dst,src) );
12556 ins_pipe( pipe_slow );
12557 %}
12558
12559 // Max Register with Register
12560 // *** Min and Max using the conditional move are slower than the
12561 // *** branch version on a Pentium III.
12562 // // Conditional move for max
12563 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12564 // effect( USE_DEF op2, USE op1, USE cr );
12565 // format %{ "CMOVgt $op2,$op1\t! max" %}
12566 // opcode(0x4F,0x0F);
12567 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12568 // ins_pipe( pipe_cmov_reg );
12569 //%}
12570 //
12571 // // Max Register with Register (P6 version)
12572 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12573 // predicate(VM_Version::supports_cmov() );
12574 // match(Set op2 (MaxI op1 op2));
12575 // ins_cost(200);
12576 // expand %{
12577 // eFlagsReg cr;
12578 // compI_eReg(cr,op1,op2);
12579 // cmovI_reg_gt(op2,op1,cr);
12580 // %}
12581 //%}
12582
12583 // Max Register with Register (generic version)
12584 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12585 match(Set dst (MaxI dst src));
12586 effect(KILL flags);
12587 ins_cost(300);
12588
12589 format %{ "MAX $dst,$src" %}
12590 opcode(0xCC);
12591 ins_encode( max_enc(dst,src) );
12592 ins_pipe( pipe_slow );
12593 %}
12594
12595 // ============================================================================
12596 // Counted Loop limit node which represents exact final iterator value.
12597 // Note: the resulting value should fit into integer range since
12598 // counted loops have limit check on overflow.
12599 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12600 match(Set limit (LoopLimit (Binary init limit) stride));
12601 effect(TEMP limit_hi, TEMP tmp, KILL flags);
12602 ins_cost(300);
12603
12604 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12605 ins_encode %{
12606 int strd = (int)$stride$$constant;
12607 assert(strd != 1 && strd != -1, "sanity");
12608 int m1 = (strd > 0) ? 1 : -1;
12609 // Convert limit to long (EAX:EDX)
12610 __ cdql();
12611 // Convert init to long (init:tmp)
12612 __ movl($tmp$$Register, $init$$Register);
12613 __ sarl($tmp$$Register, 31);
12614 // $limit - $init
12615 __ subl($limit$$Register, $init$$Register);
12616 __ sbbl($limit_hi$$Register, $tmp$$Register);
12617 // + ($stride - 1)
12618 if (strd > 0) {
12619 __ addl($limit$$Register, (strd - 1));
12620 __ adcl($limit_hi$$Register, 0);
12621 __ movl($tmp$$Register, strd);
12622 } else {
12623 __ addl($limit$$Register, (strd + 1));
12624 __ adcl($limit_hi$$Register, -1);
12625 __ lneg($limit_hi$$Register, $limit$$Register);
12626 __ movl($tmp$$Register, -strd);
12627 }
12628 // signed division: (EAX:EDX) / pos_stride
12629 __ idivl($tmp$$Register);
12630 if (strd < 0) {
12631 // restore sign
12632 __ negl($tmp$$Register);
12633 }
12634 // (EAX) * stride
12635 __ mull($tmp$$Register);
12636 // + init (ignore upper bits)
12637 __ addl($limit$$Register, $init$$Register);
12638 %}
12639 ins_pipe( pipe_slow );
12640 %}
12641
12642 // ============================================================================
12643 // Branch Instructions
12644 // Jump Table
12645 instruct jumpXtnd(rRegI switch_val) %{
12646 match(Jump switch_val);
12647 ins_cost(350);
12648 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %}
12649 ins_encode %{
12650 // Jump to Address(table_base + switch_reg)
12651 Address index(noreg, $switch_val$$Register, Address::times_1);
12652 __ jump(ArrayAddress($constantaddress, index), noreg);
12653 %}
12654 ins_pipe(pipe_jmp);
12655 %}
12656
12657 // Jump Direct - Label defines a relative address from JMP+1
12658 instruct jmpDir(label labl) %{
12659 match(Goto);
12660 effect(USE labl);
12661
12662 ins_cost(300);
12663 format %{ "JMP $labl" %}
12664 size(5);
12665 ins_encode %{
12666 Label* L = $labl$$label;
12667 __ jmp(*L, false); // Always long jump
12668 %}
12669 ins_pipe( pipe_jmp );
12670 %}
12671
12672 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12673 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12674 match(If cop cr);
12675 effect(USE labl);
12676
12677 ins_cost(300);
12678 format %{ "J$cop $labl" %}
12679 size(6);
12680 ins_encode %{
12681 Label* L = $labl$$label;
12682 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12683 %}
12684 ins_pipe( pipe_jcc );
12685 %}
12686
12687 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12688 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12689 match(CountedLoopEnd cop cr);
12690 effect(USE labl);
12691
12692 ins_cost(300);
12693 format %{ "J$cop $labl\t# Loop end" %}
12694 size(6);
12695 ins_encode %{
12696 Label* L = $labl$$label;
12697 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12698 %}
12699 ins_pipe( pipe_jcc );
12700 %}
12701
12702 // Jump Direct Conditional - using unsigned comparison
12703 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12704 match(If cop cmp);
12705 effect(USE labl);
12706
12707 ins_cost(300);
12708 format %{ "J$cop,u $labl" %}
12709 size(6);
12710 ins_encode %{
12711 Label* L = $labl$$label;
12712 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12713 %}
12714 ins_pipe(pipe_jcc);
12715 %}
12716
12717 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12718 match(If cop cmp);
12719 effect(USE labl);
12720
12721 ins_cost(200);
12722 format %{ "J$cop,u $labl" %}
12723 size(6);
12724 ins_encode %{
12725 Label* L = $labl$$label;
12726 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12727 %}
12728 ins_pipe(pipe_jcc);
12729 %}
12730
12731 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12732 match(If cop cmp);
12733 effect(USE labl);
12734
12735 ins_cost(200);
12736 format %{ $$template
12737 if ($cop$$cmpcode == Assembler::notEqual) {
12738 $$emit$$"JP,u $labl\n\t"
12739 $$emit$$"J$cop,u $labl"
12740 } else {
12741 $$emit$$"JP,u done\n\t"
12742 $$emit$$"J$cop,u $labl\n\t"
12743 $$emit$$"done:"
12744 }
12745 %}
12746 ins_encode %{
12747 Label* l = $labl$$label;
12748 if ($cop$$cmpcode == Assembler::notEqual) {
12749 __ jcc(Assembler::parity, *l, false);
12750 __ jcc(Assembler::notEqual, *l, false);
12751 } else if ($cop$$cmpcode == Assembler::equal) {
12752 Label done;
12753 __ jccb(Assembler::parity, done);
12754 __ jcc(Assembler::equal, *l, false);
12755 __ bind(done);
12756 } else {
12757 ShouldNotReachHere();
12758 }
12759 %}
12760 ins_pipe(pipe_jcc);
12761 %}
12762
12763 // ============================================================================
12764 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
12765 // array for an instance of the superklass. Set a hidden internal cache on a
12766 // hit (cache is checked with exposed code in gen_subtype_check()). Return
12767 // NZ for a miss or zero for a hit. The encoding ALSO sets flags.
12768 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12769 match(Set result (PartialSubtypeCheck sub super));
12770 effect( KILL rcx, KILL cr );
12771
12772 ins_cost(1100); // slightly larger than the next version
12773 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12774 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12775 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12776 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12777 "JNE,s miss\t\t# Missed: EDI not-zero\n\t"
12778 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12779 "XOR $result,$result\t\t Hit: EDI zero\n\t"
12780 "miss:\t" %}
12781
12782 opcode(0x1); // Force a XOR of EDI
12783 ins_encode( enc_PartialSubtypeCheck() );
12784 ins_pipe( pipe_slow );
12785 %}
12786
12787 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12788 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12789 effect( KILL rcx, KILL result );
12790
12791 ins_cost(1000);
12792 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12793 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12794 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12795 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12796 "JNE,s miss\t\t# Missed: flags NZ\n\t"
12797 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12798 "miss:\t" %}
12799
12800 opcode(0x0); // No need to XOR EDI
12801 ins_encode( enc_PartialSubtypeCheck() );
12802 ins_pipe( pipe_slow );
12803 %}
12804
12805 // ============================================================================
12806 // Branch Instructions -- short offset versions
12807 //
12808 // These instructions are used to replace jumps of a long offset (the default
12809 // match) with jumps of a shorter offset. These instructions are all tagged
12810 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12811 // match rules in general matching. Instead, the ADLC generates a conversion
12812 // method in the MachNode which can be used to do in-place replacement of the
12813 // long variant with the shorter variant. The compiler will determine if a
12814 // branch can be taken by the is_short_branch_offset() predicate in the machine
12815 // specific code section of the file.
12816
12817 // Jump Direct - Label defines a relative address from JMP+1
12818 instruct jmpDir_short(label labl) %{
12819 match(Goto);
12820 effect(USE labl);
12821
12822 ins_cost(300);
12823 format %{ "JMP,s $labl" %}
12824 size(2);
12825 ins_encode %{
12826 Label* L = $labl$$label;
12827 __ jmpb(*L);
12828 %}
12829 ins_pipe( pipe_jmp );
12830 ins_short_branch(1);
12831 %}
12832
12833 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12834 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12835 match(If cop cr);
12836 effect(USE labl);
12837
12838 ins_cost(300);
12839 format %{ "J$cop,s $labl" %}
12840 size(2);
12841 ins_encode %{
12842 Label* L = $labl$$label;
12843 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12844 %}
12845 ins_pipe( pipe_jcc );
12846 ins_short_branch(1);
12847 %}
12848
12849 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12850 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12851 match(CountedLoopEnd cop cr);
12852 effect(USE labl);
12853
12854 ins_cost(300);
12855 format %{ "J$cop,s $labl\t# Loop end" %}
12856 size(2);
12857 ins_encode %{
12858 Label* L = $labl$$label;
12859 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12860 %}
12861 ins_pipe( pipe_jcc );
12862 ins_short_branch(1);
12863 %}
12864
12865 // Jump Direct Conditional - using unsigned comparison
12866 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12867 match(If cop cmp);
12868 effect(USE labl);
12869
12870 ins_cost(300);
12871 format %{ "J$cop,us $labl" %}
12872 size(2);
12873 ins_encode %{
12874 Label* L = $labl$$label;
12875 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12876 %}
12877 ins_pipe( pipe_jcc );
12878 ins_short_branch(1);
12879 %}
12880
12881 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12882 match(If cop cmp);
12883 effect(USE labl);
12884
12885 ins_cost(300);
12886 format %{ "J$cop,us $labl" %}
12887 size(2);
12888 ins_encode %{
12889 Label* L = $labl$$label;
12890 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12891 %}
12892 ins_pipe( pipe_jcc );
12893 ins_short_branch(1);
12894 %}
12895
12896 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12897 match(If cop cmp);
12898 effect(USE labl);
12899
12900 ins_cost(300);
12901 format %{ $$template
12902 if ($cop$$cmpcode == Assembler::notEqual) {
12903 $$emit$$"JP,u,s $labl\n\t"
12904 $$emit$$"J$cop,u,s $labl"
12905 } else {
12906 $$emit$$"JP,u,s done\n\t"
12907 $$emit$$"J$cop,u,s $labl\n\t"
12908 $$emit$$"done:"
12909 }
12910 %}
12911 size(4);
12912 ins_encode %{
12913 Label* l = $labl$$label;
12914 if ($cop$$cmpcode == Assembler::notEqual) {
12915 __ jccb(Assembler::parity, *l);
12916 __ jccb(Assembler::notEqual, *l);
12917 } else if ($cop$$cmpcode == Assembler::equal) {
12918 Label done;
12919 __ jccb(Assembler::parity, done);
12920 __ jccb(Assembler::equal, *l);
12921 __ bind(done);
12922 } else {
12923 ShouldNotReachHere();
12924 }
12925 %}
12926 ins_pipe(pipe_jcc);
12927 ins_short_branch(1);
12928 %}
12929
12930 // ============================================================================
12931 // Long Compare
12932 //
12933 // Currently we hold longs in 2 registers. Comparing such values efficiently
12934 // is tricky. The flavor of compare used depends on whether we are testing
12935 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit.
12936 // The GE test is the negated LT test. The LE test can be had by commuting
12937 // the operands (yielding a GE test) and then negating; negate again for the
12938 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the
12939 // NE test is negated from that.
12940
12941 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12942 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
12943 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections
12944 // are collapsed internally in the ADLC's dfa-gen code. The match for
12945 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12946 // foo match ends up with the wrong leaf. One fix is to not match both
12947 // reg-reg and reg-zero forms of long-compare. This is unfortunate because
12948 // both forms beat the trinary form of long-compare and both are very useful
12949 // on Intel which has so few registers.
12950
12951 // Manifest a CmpL result in an integer register. Very painful.
12952 // This is the test to avoid.
12953 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12954 match(Set dst (CmpL3 src1 src2));
12955 effect( KILL flags );
12956 ins_cost(1000);
12957 format %{ "XOR $dst,$dst\n\t"
12958 "CMP $src1.hi,$src2.hi\n\t"
12959 "JLT,s m_one\n\t"
12960 "JGT,s p_one\n\t"
12961 "CMP $src1.lo,$src2.lo\n\t"
12962 "JB,s m_one\n\t"
12963 "JEQ,s done\n"
12964 "p_one:\tINC $dst\n\t"
12965 "JMP,s done\n"
12966 "m_one:\tDEC $dst\n"
12967 "done:" %}
12968 ins_encode %{
12969 Label p_one, m_one, done;
12970 __ xorptr($dst$$Register, $dst$$Register);
12971 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12972 __ jccb(Assembler::less, m_one);
12973 __ jccb(Assembler::greater, p_one);
12974 __ cmpl($src1$$Register, $src2$$Register);
12975 __ jccb(Assembler::below, m_one);
12976 __ jccb(Assembler::equal, done);
12977 __ bind(p_one);
12978 __ incrementl($dst$$Register);
12979 __ jmpb(done);
12980 __ bind(m_one);
12981 __ decrementl($dst$$Register);
12982 __ bind(done);
12983 %}
12984 ins_pipe( pipe_slow );
12985 %}
12986
12987 //======
12988 // Manifest a CmpL result in the normal flags. Only good for LT or GE
12989 // compares. Can be used for LE or GT compares by reversing arguments.
12990 // NOT GOOD FOR EQ/NE tests.
12991 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12992 match( Set flags (CmpL src zero ));
12993 ins_cost(100);
12994 format %{ "TEST $src.hi,$src.hi" %}
12995 opcode(0x85);
12996 ins_encode( OpcP, RegReg_Hi2( src, src ) );
12997 ins_pipe( ialu_cr_reg_reg );
12998 %}
12999
13000 // Manifest a CmpL result in the normal flags. Only good for LT or GE
13001 // compares. Can be used for LE or GT compares by reversing arguments.
13002 // NOT GOOD FOR EQ/NE tests.
13003 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13004 match( Set flags (CmpL src1 src2 ));
13005 effect( TEMP tmp );
13006 ins_cost(300);
13007 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13008 "MOV $tmp,$src1.hi\n\t"
13009 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %}
13010 ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13011 ins_pipe( ialu_cr_reg_reg );
13012 %}
13013
13014 // Long compares reg < zero/req OR reg >= zero/req.
13015 // Just a wrapper for a normal branch, plus the predicate test.
13016 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13017 match(If cmp flags);
13018 effect(USE labl);
13019 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13020 expand %{
13021 jmpCon(cmp,flags,labl); // JLT or JGE...
13022 %}
13023 %}
13024
13025 //======
13026 // Manifest a CmpUL result in the normal flags. Only good for LT or GE
13027 // compares. Can be used for LE or GT compares by reversing arguments.
13028 // NOT GOOD FOR EQ/NE tests.
13029 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13030 match(Set flags (CmpUL src zero));
13031 ins_cost(100);
13032 format %{ "TEST $src.hi,$src.hi" %}
13033 opcode(0x85);
13034 ins_encode(OpcP, RegReg_Hi2(src, src));
13035 ins_pipe(ialu_cr_reg_reg);
13036 %}
13037
13038 // Manifest a CmpUL result in the normal flags. Only good for LT or GE
13039 // compares. Can be used for LE or GT compares by reversing arguments.
13040 // NOT GOOD FOR EQ/NE tests.
13041 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13042 match(Set flags (CmpUL src1 src2));
13043 effect(TEMP tmp);
13044 ins_cost(300);
13045 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13046 "MOV $tmp,$src1.hi\n\t"
13047 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13048 ins_encode(long_cmp_flags2(src1, src2, tmp));
13049 ins_pipe(ialu_cr_reg_reg);
13050 %}
13051
13052 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13053 // Just a wrapper for a normal branch, plus the predicate test.
13054 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13055 match(If cmp flags);
13056 effect(USE labl);
13057 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13058 expand %{
13059 jmpCon(cmp, flags, labl); // JLT or JGE...
13060 %}
13061 %}
13062
13063 // Compare 2 longs and CMOVE longs.
13064 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13065 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13066 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13067 ins_cost(400);
13068 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13069 "CMOV$cmp $dst.hi,$src.hi" %}
13070 opcode(0x0F,0x40);
13071 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13072 ins_pipe( pipe_cmov_reg_long );
13073 %}
13074
13075 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13076 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13077 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13078 ins_cost(500);
13079 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13080 "CMOV$cmp $dst.hi,$src.hi" %}
13081 opcode(0x0F,0x40);
13082 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13083 ins_pipe( pipe_cmov_reg_long );
13084 %}
13085
13086 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13087 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13088 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13089 ins_cost(400);
13090 expand %{
13091 cmovLL_reg_LTGE(cmp, flags, dst, src);
13092 %}
13093 %}
13094
13095 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13096 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13097 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13098 ins_cost(500);
13099 expand %{
13100 cmovLL_mem_LTGE(cmp, flags, dst, src);
13101 %}
13102 %}
13103
13104 // Compare 2 longs and CMOVE ints.
13105 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13106 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13107 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13108 ins_cost(200);
13109 format %{ "CMOV$cmp $dst,$src" %}
13110 opcode(0x0F,0x40);
13111 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13112 ins_pipe( pipe_cmov_reg );
13113 %}
13114
13115 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13116 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13117 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13118 ins_cost(250);
13119 format %{ "CMOV$cmp $dst,$src" %}
13120 opcode(0x0F,0x40);
13121 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13122 ins_pipe( pipe_cmov_mem );
13123 %}
13124
13125 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13126 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13127 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13128 ins_cost(200);
13129 expand %{
13130 cmovII_reg_LTGE(cmp, flags, dst, src);
13131 %}
13132 %}
13133
13134 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13135 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13136 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13137 ins_cost(250);
13138 expand %{
13139 cmovII_mem_LTGE(cmp, flags, dst, src);
13140 %}
13141 %}
13142
13143 // Compare 2 longs and CMOVE ptrs.
13144 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13145 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13146 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13147 ins_cost(200);
13148 format %{ "CMOV$cmp $dst,$src" %}
13149 opcode(0x0F,0x40);
13150 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13151 ins_pipe( pipe_cmov_reg );
13152 %}
13153
13154 // Compare 2 unsigned longs and CMOVE ptrs.
13155 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13156 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13157 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13158 ins_cost(200);
13159 expand %{
13160 cmovPP_reg_LTGE(cmp,flags,dst,src);
13161 %}
13162 %}
13163
13164 // Compare 2 longs and CMOVE doubles
13165 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13166 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13167 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13168 ins_cost(200);
13169 expand %{
13170 fcmovDPR_regS(cmp,flags,dst,src);
13171 %}
13172 %}
13173
13174 // Compare 2 longs and CMOVE doubles
13175 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13176 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13177 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13178 ins_cost(200);
13179 expand %{
13180 fcmovD_regS(cmp,flags,dst,src);
13181 %}
13182 %}
13183
13184 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13185 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13186 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13187 ins_cost(200);
13188 expand %{
13189 fcmovFPR_regS(cmp,flags,dst,src);
13190 %}
13191 %}
13192
13193 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13194 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13195 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13196 ins_cost(200);
13197 expand %{
13198 fcmovF_regS(cmp,flags,dst,src);
13199 %}
13200 %}
13201
13202 //======
13203 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13204 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13205 match( Set flags (CmpL src zero ));
13206 effect(TEMP tmp);
13207 ins_cost(200);
13208 format %{ "MOV $tmp,$src.lo\n\t"
13209 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13210 ins_encode( long_cmp_flags0( src, tmp ) );
13211 ins_pipe( ialu_reg_reg_long );
13212 %}
13213
13214 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13215 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13216 match( Set flags (CmpL src1 src2 ));
13217 ins_cost(200+300);
13218 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13219 "JNE,s skip\n\t"
13220 "CMP $src1.hi,$src2.hi\n\t"
13221 "skip:\t" %}
13222 ins_encode( long_cmp_flags1( src1, src2 ) );
13223 ins_pipe( ialu_cr_reg_reg );
13224 %}
13225
13226 // Long compare reg == zero/reg OR reg != zero/reg
13227 // Just a wrapper for a normal branch, plus the predicate test.
13228 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13229 match(If cmp flags);
13230 effect(USE labl);
13231 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13232 expand %{
13233 jmpCon(cmp,flags,labl); // JEQ or JNE...
13234 %}
13235 %}
13236
13237 //======
13238 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares.
13239 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13240 match(Set flags (CmpUL src zero));
13241 effect(TEMP tmp);
13242 ins_cost(200);
13243 format %{ "MOV $tmp,$src.lo\n\t"
13244 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13245 ins_encode(long_cmp_flags0(src, tmp));
13246 ins_pipe(ialu_reg_reg_long);
13247 %}
13248
13249 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares.
13250 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13251 match(Set flags (CmpUL src1 src2));
13252 ins_cost(200+300);
13253 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13254 "JNE,s skip\n\t"
13255 "CMP $src1.hi,$src2.hi\n\t"
13256 "skip:\t" %}
13257 ins_encode(long_cmp_flags1(src1, src2));
13258 ins_pipe(ialu_cr_reg_reg);
13259 %}
13260
13261 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13262 // Just a wrapper for a normal branch, plus the predicate test.
13263 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13264 match(If cmp flags);
13265 effect(USE labl);
13266 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13267 expand %{
13268 jmpCon(cmp, flags, labl); // JEQ or JNE...
13269 %}
13270 %}
13271
13272 // Compare 2 longs and CMOVE longs.
13273 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13274 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13275 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13276 ins_cost(400);
13277 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13278 "CMOV$cmp $dst.hi,$src.hi" %}
13279 opcode(0x0F,0x40);
13280 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13281 ins_pipe( pipe_cmov_reg_long );
13282 %}
13283
13284 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13285 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13286 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13287 ins_cost(500);
13288 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13289 "CMOV$cmp $dst.hi,$src.hi" %}
13290 opcode(0x0F,0x40);
13291 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13292 ins_pipe( pipe_cmov_reg_long );
13293 %}
13294
13295 // Compare 2 longs and CMOVE ints.
13296 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13297 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13298 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13299 ins_cost(200);
13300 format %{ "CMOV$cmp $dst,$src" %}
13301 opcode(0x0F,0x40);
13302 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13303 ins_pipe( pipe_cmov_reg );
13304 %}
13305
13306 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13307 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13308 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13309 ins_cost(250);
13310 format %{ "CMOV$cmp $dst,$src" %}
13311 opcode(0x0F,0x40);
13312 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13313 ins_pipe( pipe_cmov_mem );
13314 %}
13315
13316 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13317 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13318 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13319 ins_cost(200);
13320 expand %{
13321 cmovII_reg_EQNE(cmp, flags, dst, src);
13322 %}
13323 %}
13324
13325 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13326 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13327 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13328 ins_cost(250);
13329 expand %{
13330 cmovII_mem_EQNE(cmp, flags, dst, src);
13331 %}
13332 %}
13333
13334 // Compare 2 longs and CMOVE ptrs.
13335 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13336 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13337 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13338 ins_cost(200);
13339 format %{ "CMOV$cmp $dst,$src" %}
13340 opcode(0x0F,0x40);
13341 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13342 ins_pipe( pipe_cmov_reg );
13343 %}
13344
13345 // Compare 2 unsigned longs and CMOVE ptrs.
13346 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13347 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13348 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13349 ins_cost(200);
13350 expand %{
13351 cmovPP_reg_EQNE(cmp,flags,dst,src);
13352 %}
13353 %}
13354
13355 // Compare 2 longs and CMOVE doubles
13356 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13357 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13358 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13359 ins_cost(200);
13360 expand %{
13361 fcmovDPR_regS(cmp,flags,dst,src);
13362 %}
13363 %}
13364
13365 // Compare 2 longs and CMOVE doubles
13366 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13367 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13368 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13369 ins_cost(200);
13370 expand %{
13371 fcmovD_regS(cmp,flags,dst,src);
13372 %}
13373 %}
13374
13375 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13376 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13377 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13378 ins_cost(200);
13379 expand %{
13380 fcmovFPR_regS(cmp,flags,dst,src);
13381 %}
13382 %}
13383
13384 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13385 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13386 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13387 ins_cost(200);
13388 expand %{
13389 fcmovF_regS(cmp,flags,dst,src);
13390 %}
13391 %}
13392
13393 //======
13394 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13395 // Same as cmpL_reg_flags_LEGT except must negate src
13396 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13397 match( Set flags (CmpL src zero ));
13398 effect( TEMP tmp );
13399 ins_cost(300);
13400 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13401 "CMP $tmp,$src.lo\n\t"
13402 "SBB $tmp,$src.hi\n\t" %}
13403 ins_encode( long_cmp_flags3(src, tmp) );
13404 ins_pipe( ialu_reg_reg_long );
13405 %}
13406
13407 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13408 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
13409 // requires a commuted test to get the same result.
13410 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13411 match( Set flags (CmpL src1 src2 ));
13412 effect( TEMP tmp );
13413 ins_cost(300);
13414 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13415 "MOV $tmp,$src2.hi\n\t"
13416 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %}
13417 ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13418 ins_pipe( ialu_cr_reg_reg );
13419 %}
13420
13421 // Long compares reg < zero/req OR reg >= zero/req.
13422 // Just a wrapper for a normal branch, plus the predicate test
13423 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13424 match(If cmp flags);
13425 effect(USE labl);
13426 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13427 ins_cost(300);
13428 expand %{
13429 jmpCon(cmp,flags,labl); // JGT or JLE...
13430 %}
13431 %}
13432
13433 //======
13434 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares.
13435 // Same as cmpUL_reg_flags_LEGT except must negate src
13436 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13437 match(Set flags (CmpUL src zero));
13438 effect(TEMP tmp);
13439 ins_cost(300);
13440 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13441 "CMP $tmp,$src.lo\n\t"
13442 "SBB $tmp,$src.hi\n\t" %}
13443 ins_encode(long_cmp_flags3(src, tmp));
13444 ins_pipe(ialu_reg_reg_long);
13445 %}
13446
13447 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares.
13448 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands
13449 // requires a commuted test to get the same result.
13450 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13451 match(Set flags (CmpUL src1 src2));
13452 effect(TEMP tmp);
13453 ins_cost(300);
13454 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13455 "MOV $tmp,$src2.hi\n\t"
13456 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13457 ins_encode(long_cmp_flags2( src2, src1, tmp));
13458 ins_pipe(ialu_cr_reg_reg);
13459 %}
13460
13461 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13462 // Just a wrapper for a normal branch, plus the predicate test
13463 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13464 match(If cmp flags);
13465 effect(USE labl);
13466 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13467 ins_cost(300);
13468 expand %{
13469 jmpCon(cmp, flags, labl); // JGT or JLE...
13470 %}
13471 %}
13472
13473 // Compare 2 longs and CMOVE longs.
13474 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13475 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13476 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13477 ins_cost(400);
13478 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13479 "CMOV$cmp $dst.hi,$src.hi" %}
13480 opcode(0x0F,0x40);
13481 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13482 ins_pipe( pipe_cmov_reg_long );
13483 %}
13484
13485 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13486 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13487 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13488 ins_cost(500);
13489 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13490 "CMOV$cmp $dst.hi,$src.hi+4" %}
13491 opcode(0x0F,0x40);
13492 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13493 ins_pipe( pipe_cmov_reg_long );
13494 %}
13495
13496 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13497 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13498 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13499 ins_cost(400);
13500 expand %{
13501 cmovLL_reg_LEGT(cmp, flags, dst, src);
13502 %}
13503 %}
13504
13505 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13506 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13507 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13508 ins_cost(500);
13509 expand %{
13510 cmovLL_mem_LEGT(cmp, flags, dst, src);
13511 %}
13512 %}
13513
13514 // Compare 2 longs and CMOVE ints.
13515 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13516 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13517 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13518 ins_cost(200);
13519 format %{ "CMOV$cmp $dst,$src" %}
13520 opcode(0x0F,0x40);
13521 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13522 ins_pipe( pipe_cmov_reg );
13523 %}
13524
13525 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13526 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13527 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13528 ins_cost(250);
13529 format %{ "CMOV$cmp $dst,$src" %}
13530 opcode(0x0F,0x40);
13531 ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13532 ins_pipe( pipe_cmov_mem );
13533 %}
13534
13535 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13536 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13537 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13538 ins_cost(200);
13539 expand %{
13540 cmovII_reg_LEGT(cmp, flags, dst, src);
13541 %}
13542 %}
13543
13544 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13545 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13546 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13547 ins_cost(250);
13548 expand %{
13549 cmovII_mem_LEGT(cmp, flags, dst, src);
13550 %}
13551 %}
13552
13553 // Compare 2 longs and CMOVE ptrs.
13554 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13555 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13556 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13557 ins_cost(200);
13558 format %{ "CMOV$cmp $dst,$src" %}
13559 opcode(0x0F,0x40);
13560 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13561 ins_pipe( pipe_cmov_reg );
13562 %}
13563
13564 // Compare 2 unsigned longs and CMOVE ptrs.
13565 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13566 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13567 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13568 ins_cost(200);
13569 expand %{
13570 cmovPP_reg_LEGT(cmp,flags,dst,src);
13571 %}
13572 %}
13573
13574 // Compare 2 longs and CMOVE doubles
13575 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13576 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13577 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13578 ins_cost(200);
13579 expand %{
13580 fcmovDPR_regS(cmp,flags,dst,src);
13581 %}
13582 %}
13583
13584 // Compare 2 longs and CMOVE doubles
13585 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13586 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13587 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13588 ins_cost(200);
13589 expand %{
13590 fcmovD_regS(cmp,flags,dst,src);
13591 %}
13592 %}
13593
13594 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13595 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13596 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13597 ins_cost(200);
13598 expand %{
13599 fcmovFPR_regS(cmp,flags,dst,src);
13600 %}
13601 %}
13602
13603
13604 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13605 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13606 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13607 ins_cost(200);
13608 expand %{
13609 fcmovF_regS(cmp,flags,dst,src);
13610 %}
13611 %}
13612
13613
13614 // ============================================================================
13615 // Procedure Call/Return Instructions
13616 // Call Java Static Instruction
13617 // Note: If this code changes, the corresponding ret_addr_offset() and
13618 // compute_padding() functions will have to be adjusted.
13619 instruct CallStaticJavaDirect(method meth) %{
13620 match(CallStaticJava);
13621 effect(USE meth);
13622
13623 ins_cost(300);
13624 format %{ "CALL,static " %}
13625 opcode(0xE8); /* E8 cd */
13626 ins_encode( pre_call_resets,
13627 Java_Static_Call( meth ),
13628 call_epilog,
13629 post_call_FPU );
13630 ins_pipe( pipe_slow );
13631 ins_alignment(4);
13632 %}
13633
13634 // Call Java Dynamic Instruction
13635 // Note: If this code changes, the corresponding ret_addr_offset() and
13636 // compute_padding() functions will have to be adjusted.
13637 instruct CallDynamicJavaDirect(method meth) %{
13638 match(CallDynamicJava);
13639 effect(USE meth);
13640
13641 ins_cost(300);
13642 format %{ "MOV EAX,(oop)-1\n\t"
13643 "CALL,dynamic" %}
13644 opcode(0xE8); /* E8 cd */
13645 ins_encode( pre_call_resets,
13646 Java_Dynamic_Call( meth ),
13647 call_epilog,
13648 post_call_FPU );
13649 ins_pipe( pipe_slow );
13650 ins_alignment(4);
13651 %}
13652
13653 // Call Runtime Instruction
13654 instruct CallRuntimeDirect(method meth) %{
13655 match(CallRuntime );
13656 effect(USE meth);
13657
13658 ins_cost(300);
13659 format %{ "CALL,runtime " %}
13660 opcode(0xE8); /* E8 cd */
13661 // Use FFREEs to clear entries in float stack
13662 ins_encode( pre_call_resets,
13663 FFree_Float_Stack_All,
13664 Java_To_Runtime( meth ),
13665 post_call_FPU );
13666 ins_pipe( pipe_slow );
13667 %}
13668
13669 // Call runtime without safepoint
13670 instruct CallLeafDirect(method meth) %{
13671 match(CallLeaf);
13672 effect(USE meth);
13673
13674 ins_cost(300);
13675 format %{ "CALL_LEAF,runtime " %}
13676 opcode(0xE8); /* E8 cd */
13677 ins_encode( pre_call_resets,
13678 FFree_Float_Stack_All,
13679 Java_To_Runtime( meth ),
13680 Verify_FPU_For_Leaf, post_call_FPU );
13681 ins_pipe( pipe_slow );
13682 %}
13683
13684 instruct CallLeafNoFPDirect(method meth) %{
13685 match(CallLeafNoFP);
13686 effect(USE meth);
13687
13688 ins_cost(300);
13689 format %{ "CALL_LEAF_NOFP,runtime " %}
13690 opcode(0xE8); /* E8 cd */
13691 ins_encode(pre_call_resets, Java_To_Runtime(meth));
13692 ins_pipe( pipe_slow );
13693 %}
13694
13695
13696 // Return Instruction
13697 // Remove the return address & jump to it.
13698 instruct Ret() %{
13699 match(Return);
13700 format %{ "RET" %}
13701 opcode(0xC3);
13702 ins_encode(OpcP);
13703 ins_pipe( pipe_jmp );
13704 %}
13705
13706 // Tail Call; Jump from runtime stub to Java code.
13707 // Also known as an 'interprocedural jump'.
13708 // Target of jump will eventually return to caller.
13709 // TailJump below removes the return address.
13710 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been
13711 // emitted just above the TailCall which has reset ebp to the caller state.
13712 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13713 match(TailCall jump_target method_ptr);
13714 ins_cost(300);
13715 format %{ "JMP $jump_target \t# EBX holds method" %}
13716 opcode(0xFF, 0x4); /* Opcode FF /4 */
13717 ins_encode( OpcP, RegOpc(jump_target) );
13718 ins_pipe( pipe_jmp );
13719 %}
13720
13721
13722 // Tail Jump; remove the return address; jump to target.
13723 // TailCall above leaves the return address around.
13724 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13725 match( TailJump jump_target ex_oop );
13726 ins_cost(300);
13727 format %{ "POP EDX\t# pop return address into dummy\n\t"
13728 "JMP $jump_target " %}
13729 opcode(0xFF, 0x4); /* Opcode FF /4 */
13730 ins_encode( enc_pop_rdx,
13731 OpcP, RegOpc(jump_target) );
13732 ins_pipe( pipe_jmp );
13733 %}
13734
13735 // Create exception oop: created by stack-crawling runtime code.
13736 // Created exception is now available to this handler, and is setup
13737 // just prior to jumping to this handler. No code emitted.
13738 instruct CreateException( eAXRegP ex_oop )
13739 %{
13740 match(Set ex_oop (CreateEx));
13741
13742 size(0);
13743 // use the following format syntax
13744 format %{ "# exception oop is in EAX; no code emitted" %}
13745 ins_encode();
13746 ins_pipe( empty );
13747 %}
13748
13749
13750 // Rethrow exception:
13751 // The exception oop will come in the first argument position.
13752 // Then JUMP (not call) to the rethrow stub code.
13753 instruct RethrowException()
13754 %{
13755 match(Rethrow);
13756
13757 // use the following format syntax
13758 format %{ "JMP rethrow_stub" %}
13759 ins_encode(enc_rethrow);
13760 ins_pipe( pipe_jmp );
13761 %}
13762
13763 // inlined locking and unlocking
13764
13765 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13766 predicate(Compile::current()->use_rtm());
13767 match(Set cr (FastLock object box));
13768 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13769 ins_cost(300);
13770 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13771 ins_encode %{
13772 __ get_thread($thread$$Register);
13773 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13774 $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13775 _rtm_counters, _stack_rtm_counters,
13776 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13777 true, ra_->C->profile_rtm());
13778 %}
13779 ins_pipe(pipe_slow);
13780 %}
13781
13782 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13783 predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm());
13784 match(Set cr (FastLock object box));
13785 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13786 ins_cost(300);
13787 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13788 ins_encode %{
13789 __ get_thread($thread$$Register);
13790 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13791 $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false);
13792 %}
13793 ins_pipe(pipe_slow);
13794 %}
13795
13796 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13797 predicate(LockingMode != LM_LIGHTWEIGHT);
13798 match(Set cr (FastUnlock object box));
13799 effect(TEMP tmp, USE_KILL box);
13800 ins_cost(300);
13801 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13802 ins_encode %{
13803 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13804 %}
13805 ins_pipe(pipe_slow);
13806 %}
13807
13808 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13809 predicate(LockingMode == LM_LIGHTWEIGHT);
13810 match(Set cr (FastLock object box));
13811 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13812 ins_cost(300);
13813 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13814 ins_encode %{
13815 __ get_thread($thread$$Register);
13816 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13817 %}
13818 ins_pipe(pipe_slow);
13819 %}
13820
13821 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13822 predicate(LockingMode == LM_LIGHTWEIGHT);
13823 match(Set cr (FastUnlock object eax_reg));
13824 effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13825 ins_cost(300);
13826 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13827 ins_encode %{
13828 __ get_thread($thread$$Register);
13829 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13830 %}
13831 ins_pipe(pipe_slow);
13832 %}
13833
13834 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13835 predicate(Matcher::vector_length(n) <= 32);
13836 match(Set dst (MaskAll src));
13837 format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13838 ins_encode %{
13839 int mask_len = Matcher::vector_length(this);
13840 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13841 %}
13842 ins_pipe( pipe_slow );
13843 %}
13844
13845 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13846 predicate(Matcher::vector_length(n) > 32);
13847 match(Set dst (MaskAll src));
13848 effect(TEMP ktmp);
13849 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13850 ins_encode %{
13851 int mask_len = Matcher::vector_length(this);
13852 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13853 %}
13854 ins_pipe( pipe_slow );
13855 %}
13856
13857 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13858 predicate(Matcher::vector_length(n) > 32);
13859 match(Set dst (MaskAll src));
13860 effect(TEMP ktmp);
13861 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13862 ins_encode %{
13863 int mask_len = Matcher::vector_length(this);
13864 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13865 %}
13866 ins_pipe( pipe_slow );
13867 %}
13868
13869 // ============================================================================
13870 // Safepoint Instruction
13871 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13872 match(SafePoint poll);
13873 effect(KILL cr, USE poll);
13874
13875 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %}
13876 ins_cost(125);
13877 // EBP would need size(3)
13878 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13879 ins_encode %{
13880 __ relocate(relocInfo::poll_type);
13881 address pre_pc = __ pc();
13882 __ testl(rax, Address($poll$$Register, 0));
13883 address post_pc = __ pc();
13884 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13885 %}
13886 ins_pipe(ialu_reg_mem);
13887 %}
13888
13889
13890 // ============================================================================
13891 // This name is KNOWN by the ADLC and cannot be changed.
13892 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13893 // for this guy.
13894 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13895 match(Set dst (ThreadLocal));
13896 effect(DEF dst, KILL cr);
13897
13898 format %{ "MOV $dst, Thread::current()" %}
13899 ins_encode %{
13900 Register dstReg = as_Register($dst$$reg);
13901 __ get_thread(dstReg);
13902 %}
13903 ins_pipe( ialu_reg_fat );
13904 %}
13905
13906
13907
13908 //----------PEEPHOLE RULES-----------------------------------------------------
13909 // These must follow all instruction definitions as they use the names
13910 // defined in the instructions definitions.
13911 //
13912 // peepmatch ( root_instr_name [preceding_instruction]* );
13913 //
13914 // peepconstraint %{
13915 // (instruction_number.operand_name relational_op instruction_number.operand_name
13916 // [, ...] );
13917 // // instruction numbers are zero-based using left to right order in peepmatch
13918 //
13919 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
13920 // // provide an instruction_number.operand_name for each operand that appears
13921 // // in the replacement instruction's match rule
13922 //
13923 // ---------VM FLAGS---------------------------------------------------------
13924 //
13925 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13926 //
13927 // Each peephole rule is given an identifying number starting with zero and
13928 // increasing by one in the order seen by the parser. An individual peephole
13929 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13930 // on the command-line.
13931 //
13932 // ---------CURRENT LIMITATIONS----------------------------------------------
13933 //
13934 // Only match adjacent instructions in same basic block
13935 // Only equality constraints
13936 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13937 // Only one replacement instruction
13938 //
13939 // ---------EXAMPLE----------------------------------------------------------
13940 //
13941 // // pertinent parts of existing instructions in architecture description
13942 // instruct movI(rRegI dst, rRegI src) %{
13943 // match(Set dst (CopyI src));
13944 // %}
13945 //
13946 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13947 // match(Set dst (AddI dst src));
13948 // effect(KILL cr);
13949 // %}
13950 //
13951 // // Change (inc mov) to lea
13952 // peephole %{
13953 // // increment preceded by register-register move
13954 // peepmatch ( incI_eReg movI );
13955 // // require that the destination register of the increment
13956 // // match the destination register of the move
13957 // peepconstraint ( 0.dst == 1.dst );
13958 // // construct a replacement instruction that sets
13959 // // the destination to ( move's source register + one )
13960 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13961 // %}
13962 //
13963 // Implementation no longer uses movX instructions since
13964 // machine-independent system no longer uses CopyX nodes.
13965 //
13966 // peephole %{
13967 // peepmatch ( incI_eReg movI );
13968 // peepconstraint ( 0.dst == 1.dst );
13969 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13970 // %}
13971 //
13972 // peephole %{
13973 // peepmatch ( decI_eReg movI );
13974 // peepconstraint ( 0.dst == 1.dst );
13975 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13976 // %}
13977 //
13978 // peephole %{
13979 // peepmatch ( addI_eReg_imm movI );
13980 // peepconstraint ( 0.dst == 1.dst );
13981 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13982 // %}
13983 //
13984 // peephole %{
13985 // peepmatch ( addP_eReg_imm movP );
13986 // peepconstraint ( 0.dst == 1.dst );
13987 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13988 // %}
13989
13990 // // Change load of spilled value to only a spill
13991 // instruct storeI(memory mem, rRegI src) %{
13992 // match(Set mem (StoreI mem src));
13993 // %}
13994 //
13995 // instruct loadI(rRegI dst, memory mem) %{
13996 // match(Set dst (LoadI mem));
13997 // %}
13998 //
13999 peephole %{
14000 peepmatch ( loadI storeI );
14001 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14002 peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14003 %}
14004
14005 //----------SMARTSPILL RULES---------------------------------------------------
14006 // These must follow all instruction definitions as they use the names
14007 // defined in the instructions definitions.