1 //
2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
66
67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
76
77 // Float registers. We treat TOS/FPR0 special. It is invisible to the
78 // allocator, and only shows up in the encodings.
79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
81 // Ok so here's the trick FPR1 is really st(0) except in the midst
82 // of emission of assembly for a machnode. During the emission the fpu stack
83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
84 // the stack will not have this element so FPR1 == st(0) from the
85 // oopMap viewpoint. This same weirdness with numbering causes
86 // instruction encoding to have to play games with the register
87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
88 // where it does flt->flt moves to see an example
89 //
90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
104 //
105 // Empty fill registers, which are never used, but supply alignment to xmm regs
106 //
107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
115
116 // Specify priority of register selection within phases of register
117 // allocation. Highest priority is first. A useful heuristic is to
118 // give registers a low priority when they are required by machine
119 // instructions, like EAX and EDX. Registers which are used as
120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
121 // For the Intel integer registers, the equivalent Long pairs are
122 // EDX:EAX, EBX:ECX, and EDI:EBP.
123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
126 FPR6L, FPR6H, FPR7L, FPR7H,
127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
128
129
130 //----------Architecture Description Register Classes--------------------------
131 // Several register classes are automatically defined based upon information in
132 // this architecture description.
133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
135 //
136 // Class for no registers (empty set).
137 reg_class no_reg();
138
139 // Class for all registers
140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
141 // Class for all registers (excluding EBP)
142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
143 // Dynamic register class that selects at runtime between register classes
144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
147
148 // Class for general registers
149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
150 // Class for general registers (excluding EBP).
151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
152 // Used also if the PreserveFramePointer flag is true.
153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
156
157 // Class of "X" registers
158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
159
160 // Class of registers that can appear in an address with no offset.
161 // EBP and ESP require an extra instruction byte for zero offset.
162 // Used in fast-unlock
163 reg_class p_reg(EDX, EDI, ESI, EBX);
164
165 // Class for general registers excluding ECX
166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
167 // Class for general registers excluding ECX (and EBP)
168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
171
172 // Class for general registers excluding EAX
173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
174
175 // Class for general registers excluding EAX and EBX.
176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
177 // Class for general registers excluding EAX and EBX (and EBP)
178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
181
182 // Class of EAX (for multiply and divide operations)
183 reg_class eax_reg(EAX);
184
185 // Class of EBX (for atomic add)
186 reg_class ebx_reg(EBX);
187
188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
189 reg_class ecx_reg(ECX);
190
191 // Class of EDX (for multiply and divide operations)
192 reg_class edx_reg(EDX);
193
194 // Class of EDI (for synchronization)
195 reg_class edi_reg(EDI);
196
197 // Class of ESI (for synchronization)
198 reg_class esi_reg(ESI);
199
200 // Singleton class for stack pointer
201 reg_class sp_reg(ESP);
202
203 // Singleton class for instruction pointer
204 // reg_class ip_reg(EIP);
205
206 // Class of integer register pairs
207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
208 // Class of integer register pairs (excluding EBP and EDI);
209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
212
213 // Class of integer register pairs that aligns with calling convention
214 reg_class eadx_reg( EAX,EDX );
215 reg_class ebcx_reg( ECX,EBX );
216 reg_class ebpd_reg( EBP,EDI );
217
218 // Not AX or DX, used in divides
219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
220 // Not AX or DX (and neither EBP), used in divides
221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
224
225 // Floating point registers. Notice FPR0 is not a choice.
226 // FPR0 is not ever allocated; we use clever encodings to fake
227 // a 2-address instructions out of Intels FP stack.
228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
229
230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
232 FPR7L,FPR7H );
233
234 reg_class fp_flt_reg0( FPR1L );
235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
239
240 %}
241
242
243 //----------SOURCE BLOCK-------------------------------------------------------
244 // This is a block of C++ code which provides values, functions, and
245 // definitions necessary in the rest of the architecture description
246 source_hpp %{
247 // Must be visible to the DFA in dfa_x86_32.cpp
248 extern bool is_operand_hi32_zero(Node* n);
249 %}
250
251 source %{
252 #define RELOC_IMM32 Assembler::imm_operand
253 #define RELOC_DISP32 Assembler::disp32_operand
254
255 #define __ masm->
256
257 // How to find the high register of a Long pair, given the low register
258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
259 #define HIGH_FROM_LOW_ENC(x) ((x)+2)
260
261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
262 // instructions, to allow sign-masking or sign-bit flipping. They allow
263 // fast versions of NegF/NegD and AbsF/AbsD.
264
265 void reg_mask_init() {}
266
267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
270 // of 128-bits operands for SSE instructions.
271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
272 // Store the value to a 128-bits operand.
273 operand[0] = lo;
274 operand[1] = hi;
275 return operand;
276 }
277
278 // Buffer for 128-bits masks used by SSE instructions.
279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
280
281 // Static initialization during VM startup.
282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
286
287 // Offset hacking within calls.
288 static int pre_call_resets_size() {
289 int size = 0;
290 Compile* C = Compile::current();
291 if (C->in_24_bit_fp_mode()) {
292 size += 6; // fldcw
293 }
294 if (VM_Version::supports_vzeroupper()) {
295 size += 3; // vzeroupper
296 }
297 return size;
298 }
299
300 // !!!!! Special hack to get all type of calls to specify the byte offset
301 // from the start of the call to the point where the return address
302 // will point.
303 int MachCallStaticJavaNode::ret_addr_offset() {
304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points
305 }
306
307 int MachCallDynamicJavaNode::ret_addr_offset() {
308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points
309 }
310
311 static int sizeof_FFree_Float_Stack_All = -1;
312
313 int MachCallRuntimeNode::ret_addr_offset() {
314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
316 }
317
318 //
319 // Compute padding required for nodes which need alignment
320 //
321
322 // The address of the call instruction needs to be 4-byte aligned to
323 // ensure that it does not span a cache line so that it can be patched.
324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
325 current_offset += pre_call_resets_size(); // skip fldcw, if any
326 current_offset += 1; // skip call opcode byte
327 return align_up(current_offset, alignment_required()) - current_offset;
328 }
329
330 // The address of the call instruction needs to be 4-byte aligned to
331 // ensure that it does not span a cache line so that it can be patched.
332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
333 current_offset += pre_call_resets_size(); // skip fldcw, if any
334 current_offset += 5; // skip MOV instruction
335 current_offset += 1; // skip call opcode byte
336 return align_up(current_offset, alignment_required()) - current_offset;
337 }
338
339 // EMIT_RM()
340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) {
341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
342 __ emit_int8(c);
343 }
344
345 // EMIT_CC()
346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) {
347 unsigned char c = (unsigned char)( f1 | f2 );
348 __ emit_int8(c);
349 }
350
351 // EMIT_OPCODE()
352 void emit_opcode(C2_MacroAssembler *masm, int code) {
353 __ emit_int8((unsigned char) code);
354 }
355
356 // EMIT_OPCODE() w/ relocation information
357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) {
358 __ relocate(__ inst_mark() + offset, reloc);
359 emit_opcode(masm, code);
360 }
361
362 // EMIT_D8()
363 void emit_d8(C2_MacroAssembler *masm, int d8) {
364 __ emit_int8((unsigned char) d8);
365 }
366
367 // EMIT_D16()
368 void emit_d16(C2_MacroAssembler *masm, int d16) {
369 __ emit_int16(d16);
370 }
371
372 // EMIT_D32()
373 void emit_d32(C2_MacroAssembler *masm, int d32) {
374 __ emit_int32(d32);
375 }
376
377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc,
379 int format) {
380 __ relocate(__ inst_mark(), reloc, format);
381 __ emit_int32(d32);
382 }
383
384 // emit 32 bit value and construct relocation entry from RelocationHolder
385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec,
386 int format) {
387 #ifdef ASSERT
388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
390 }
391 #endif
392 __ relocate(__ inst_mark(), rspec, format);
393 __ emit_int32(d32);
394 }
395
396 // Access stack slot for load or store
397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) {
398 emit_opcode( masm, opcode ); // (e.g., FILD [ESP+src])
399 if( -128 <= disp && disp <= 127 ) {
400 emit_rm( masm, 0x01, rm_field, ESP_enc ); // R/M byte
401 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte
402 emit_d8 (masm, disp); // Displacement // R/M byte
403 } else {
404 emit_rm( masm, 0x02, rm_field, ESP_enc ); // R/M byte
405 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte
406 emit_d32(masm, disp); // Displacement // R/M byte
407 }
408 }
409
410 // rRegI ereg, memory mem) %{ // emit_reg_mem
411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
412 // There is no index & no scale, use form without SIB byte
413 if ((index == 0x4) &&
414 (scale == 0) && (base != ESP_enc)) {
415 // If no displacement, mode is 0x0; unless base is [EBP]
416 if ( (displace == 0) && (base != EBP_enc) ) {
417 emit_rm(masm, 0x0, reg_encoding, base);
418 }
419 else { // If 8-bit displacement, mode 0x1
420 if ((displace >= -128) && (displace <= 127)
421 && (disp_reloc == relocInfo::none) ) {
422 emit_rm(masm, 0x1, reg_encoding, base);
423 emit_d8(masm, displace);
424 }
425 else { // If 32-bit displacement
426 if (base == -1) { // Special flag for absolute address
427 emit_rm(masm, 0x0, reg_encoding, 0x5);
428 // (manual lies; no SIB needed here)
429 if ( disp_reloc != relocInfo::none ) {
430 emit_d32_reloc(masm, displace, disp_reloc, 1);
431 } else {
432 emit_d32 (masm, displace);
433 }
434 }
435 else { // Normal base + offset
436 emit_rm(masm, 0x2, reg_encoding, base);
437 if ( disp_reloc != relocInfo::none ) {
438 emit_d32_reloc(masm, displace, disp_reloc, 1);
439 } else {
440 emit_d32 (masm, displace);
441 }
442 }
443 }
444 }
445 }
446 else { // Else, encode with the SIB byte
447 // If no displacement, mode is 0x0; unless base is [EBP]
448 if (displace == 0 && (base != EBP_enc)) { // If no displacement
449 emit_rm(masm, 0x0, reg_encoding, 0x4);
450 emit_rm(masm, scale, index, base);
451 }
452 else { // If 8-bit displacement, mode 0x1
453 if ((displace >= -128) && (displace <= 127)
454 && (disp_reloc == relocInfo::none) ) {
455 emit_rm(masm, 0x1, reg_encoding, 0x4);
456 emit_rm(masm, scale, index, base);
457 emit_d8(masm, displace);
458 }
459 else { // If 32-bit displacement
460 if (base == 0x04 ) {
461 emit_rm(masm, 0x2, reg_encoding, 0x4);
462 emit_rm(masm, scale, index, 0x04);
463 } else {
464 emit_rm(masm, 0x2, reg_encoding, 0x4);
465 emit_rm(masm, scale, index, base);
466 }
467 if ( disp_reloc != relocInfo::none ) {
468 emit_d32_reloc(masm, displace, disp_reloc, 1);
469 } else {
470 emit_d32 (masm, displace);
471 }
472 }
473 }
474 }
475 }
476
477
478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) {
479 if( dst_encoding == src_encoding ) {
480 // reg-reg copy, use an empty encoding
481 } else {
482 emit_opcode( masm, 0x8B );
483 emit_rm(masm, 0x3, dst_encoding, src_encoding );
484 }
485 }
486
487 void emit_cmpfp_fixup(MacroAssembler* masm) {
488 Label exit;
489 __ jccb(Assembler::noParity, exit);
490 __ pushf();
491 //
492 // comiss/ucomiss instructions set ZF,PF,CF flags and
493 // zero OF,AF,SF for NaN values.
494 // Fixup flags by zeroing ZF,PF so that compare of NaN
495 // values returns 'less than' result (CF is set).
496 // Leave the rest of flags unchanged.
497 //
498 // 7 6 5 4 3 2 1 0
499 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
500 // 0 0 1 0 1 0 1 1 (0x2B)
501 //
502 __ andl(Address(rsp, 0), 0xffffff2b);
503 __ popf();
504 __ bind(exit);
505 }
506
507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
508 Label done;
509 __ movl(dst, -1);
510 __ jcc(Assembler::parity, done);
511 __ jcc(Assembler::below, done);
512 __ setb(Assembler::notEqual, dst);
513 __ movzbl(dst, dst);
514 __ bind(done);
515 }
516
517
518 //=============================================================================
519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
520
521 int ConstantTable::calculate_table_base_offset() const {
522 return 0; // absolute addressing, no offset
523 }
524
525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
527 ShouldNotReachHere();
528 }
529
530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
531 // Empty encoding
532 }
533
534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
535 return 0;
536 }
537
538 #ifndef PRODUCT
539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
540 st->print("# MachConstantBaseNode (empty encoding)");
541 }
542 #endif
543
544
545 //=============================================================================
546 #ifndef PRODUCT
547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
548 Compile* C = ra_->C;
549
550 int framesize = C->output()->frame_size_in_bytes();
551 int bangsize = C->output()->bang_size_in_bytes();
552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
553 // Remove wordSize for return addr which is already pushed.
554 framesize -= wordSize;
555
556 if (C->output()->need_stack_bang(bangsize)) {
557 framesize -= wordSize;
558 st->print("# stack bang (%d bytes)", bangsize);
559 st->print("\n\t");
560 st->print("PUSH EBP\t# Save EBP");
561 if (PreserveFramePointer) {
562 st->print("\n\t");
563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
564 }
565 if (framesize) {
566 st->print("\n\t");
567 st->print("SUB ESP, #%d\t# Create frame",framesize);
568 }
569 } else {
570 st->print("SUB ESP, #%d\t# Create frame",framesize);
571 st->print("\n\t");
572 framesize -= wordSize;
573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize);
574 if (PreserveFramePointer) {
575 st->print("\n\t");
576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
577 if (framesize > 0) {
578 st->print("\n\t");
579 st->print("ADD EBP, #%d", framesize);
580 }
581 }
582 }
583
584 if (VerifyStackAtCalls) {
585 st->print("\n\t");
586 framesize -= wordSize;
587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
588 }
589
590 if( C->in_24_bit_fp_mode() ) {
591 st->print("\n\t");
592 st->print("FLDCW \t# load 24 bit fpu control word");
593 }
594 if (UseSSE >= 2 && VerifyFPU) {
595 st->print("\n\t");
596 st->print("# verify FPU stack (must be clean on entry)");
597 }
598
599 #ifdef ASSERT
600 if (VerifyStackAtCalls) {
601 st->print("\n\t");
602 st->print("# stack alignment check");
603 }
604 #endif
605 st->cr();
606 }
607 #endif
608
609
610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
611 Compile* C = ra_->C;
612
613 __ verified_entry(C);
614
615 C->output()->set_frame_complete(__ offset());
616
617 if (C->has_mach_constant_base_node()) {
618 // NOTE: We set the table base offset here because users might be
619 // emitted before MachConstantBaseNode.
620 ConstantTable& constant_table = C->output()->constant_table();
621 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
622 }
623 }
624
625 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
626 return MachNode::size(ra_); // too many variables; just compute it the hard way
627 }
628
629 int MachPrologNode::reloc() const {
630 return 0; // a large enough number
631 }
632
633 //=============================================================================
634 #ifndef PRODUCT
635 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
636 Compile *C = ra_->C;
637 int framesize = C->output()->frame_size_in_bytes();
638 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
639 // Remove two words for return addr and rbp,
640 framesize -= 2*wordSize;
641
642 if (C->max_vector_size() > 16) {
643 st->print("VZEROUPPER");
644 st->cr(); st->print("\t");
645 }
646 if (C->in_24_bit_fp_mode()) {
647 st->print("FLDCW standard control word");
648 st->cr(); st->print("\t");
649 }
650 if (framesize) {
651 st->print("ADD ESP,%d\t# Destroy frame",framesize);
652 st->cr(); st->print("\t");
653 }
654 st->print_cr("POPL EBP"); st->print("\t");
655 if (do_polling() && C->is_method_compilation()) {
656 st->print("CMPL rsp, poll_offset[thread] \n\t"
657 "JA #safepoint_stub\t"
658 "# Safepoint: poll for GC");
659 }
660 }
661 #endif
662
663 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
664 Compile *C = ra_->C;
665
666 if (C->max_vector_size() > 16) {
667 // Clear upper bits of YMM registers when current compiled code uses
668 // wide vectors to avoid AVX <-> SSE transition penalty during call.
669 __ vzeroupper();
670 }
671 // If method set FPU control word, restore to standard control word
672 if (C->in_24_bit_fp_mode()) {
673 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
674 }
675
676 int framesize = C->output()->frame_size_in_bytes();
677 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
678 // Remove two words for return addr and rbp,
679 framesize -= 2*wordSize;
680
681 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
682
683 if (framesize >= 128) {
684 emit_opcode(masm, 0x81); // add SP, #framesize
685 emit_rm(masm, 0x3, 0x00, ESP_enc);
686 emit_d32(masm, framesize);
687 } else if (framesize) {
688 emit_opcode(masm, 0x83); // add SP, #framesize
689 emit_rm(masm, 0x3, 0x00, ESP_enc);
690 emit_d8(masm, framesize);
691 }
692
693 emit_opcode(masm, 0x58 | EBP_enc);
694
695 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
696 __ reserved_stack_check();
697 }
698
699 if (do_polling() && C->is_method_compilation()) {
700 Register thread = as_Register(EBX_enc);
701 __ get_thread(thread);
702 Label dummy_label;
703 Label* code_stub = &dummy_label;
704 if (!C->output()->in_scratch_emit_size()) {
705 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
706 C->output()->add_stub(stub);
707 code_stub = &stub->entry();
708 }
709 __ set_inst_mark();
710 __ relocate(relocInfo::poll_return_type);
711 __ clear_inst_mark();
712 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
713 }
714 }
715
716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
717 return MachNode::size(ra_); // too many variables; just compute it
718 // the hard way
719 }
720
721 int MachEpilogNode::reloc() const {
722 return 0; // a large enough number
723 }
724
725 const Pipeline * MachEpilogNode::pipeline() const {
726 return MachNode::pipeline_class();
727 }
728
729 //=============================================================================
730
731 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
732 static enum RC rc_class( OptoReg::Name reg ) {
733
734 if( !OptoReg::is_valid(reg) ) return rc_bad;
735 if (OptoReg::is_stack(reg)) return rc_stack;
736
737 VMReg r = OptoReg::as_VMReg(reg);
738 if (r->is_Register()) return rc_int;
739 if (r->is_FloatRegister()) {
740 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
741 return rc_float;
742 }
743 if (r->is_KRegister()) return rc_kreg;
744 assert(r->is_XMMRegister(), "must be");
745 return rc_xmm;
746 }
747
748 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg,
749 int opcode, const char *op_str, int size, outputStream* st ) {
750 if( masm ) {
751 masm->set_inst_mark();
752 emit_opcode (masm, opcode );
753 encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
754 masm->clear_inst_mark();
755 #ifndef PRODUCT
756 } else if( !do_size ) {
757 if( size != 0 ) st->print("\n\t");
758 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
759 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
760 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
761 } else { // FLD, FST, PUSH, POP
762 st->print("%s [ESP + #%d]",op_str,offset);
763 }
764 #endif
765 }
766 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
767 return size+3+offset_size;
768 }
769
770 // Helper for XMM registers. Extra opcode bits, limited syntax.
771 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load,
772 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
773 int in_size_in_bits = Assembler::EVEX_32bit;
774 int evex_encoding = 0;
775 if (reg_lo+1 == reg_hi) {
776 in_size_in_bits = Assembler::EVEX_64bit;
777 evex_encoding = Assembler::VEX_W;
778 }
779 if (masm) {
780 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
781 // it maps more cases to single byte displacement
782 __ set_managed();
783 if (reg_lo+1 == reg_hi) { // double move?
784 if (is_load) {
785 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
786 } else {
787 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
788 }
789 } else {
790 if (is_load) {
791 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
792 } else {
793 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
794 }
795 }
796 #ifndef PRODUCT
797 } else if (!do_size) {
798 if (size != 0) st->print("\n\t");
799 if (reg_lo+1 == reg_hi) { // double move?
800 if (is_load) st->print("%s %s,[ESP + #%d]",
801 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
802 Matcher::regName[reg_lo], offset);
803 else st->print("MOVSD [ESP + #%d],%s",
804 offset, Matcher::regName[reg_lo]);
805 } else {
806 if (is_load) st->print("MOVSS %s,[ESP + #%d]",
807 Matcher::regName[reg_lo], offset);
808 else st->print("MOVSS [ESP + #%d],%s",
809 offset, Matcher::regName[reg_lo]);
810 }
811 #endif
812 }
813 bool is_single_byte = false;
814 if ((UseAVX > 2) && (offset != 0)) {
815 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
816 }
817 int offset_size = 0;
818 if (UseAVX > 2 ) {
819 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
820 } else {
821 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
822 }
823 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
824 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
825 return size+5+offset_size;
826 }
827
828
829 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
830 int src_hi, int dst_hi, int size, outputStream* st ) {
831 if (masm) {
832 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
833 __ set_managed();
834 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
835 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
836 as_XMMRegister(Matcher::_regEncode[src_lo]));
837 } else {
838 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
839 as_XMMRegister(Matcher::_regEncode[src_lo]));
840 }
841 #ifndef PRODUCT
842 } else if (!do_size) {
843 if (size != 0) st->print("\n\t");
844 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
845 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
846 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
847 } else {
848 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
849 }
850 } else {
851 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
852 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
853 } else {
854 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
855 }
856 }
857 #endif
858 }
859 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
860 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes.
861 int sz = (UseAVX > 2) ? 6 : 4;
862 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
863 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
864 return size + sz;
865 }
866
867 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
868 int src_hi, int dst_hi, int size, outputStream* st ) {
869 // 32-bit
870 if (masm) {
871 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
872 __ set_managed();
873 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
874 as_Register(Matcher::_regEncode[src_lo]));
875 #ifndef PRODUCT
876 } else if (!do_size) {
877 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
878 #endif
879 }
880 return (UseAVX> 2) ? 6 : 4;
881 }
882
883
884 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
885 int src_hi, int dst_hi, int size, outputStream* st ) {
886 // 32-bit
887 if (masm) {
888 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
889 __ set_managed();
890 __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
891 as_XMMRegister(Matcher::_regEncode[src_lo]));
892 #ifndef PRODUCT
893 } else if (!do_size) {
894 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
895 #endif
896 }
897 return (UseAVX> 2) ? 6 : 4;
898 }
899
900 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) {
901 if( masm ) {
902 emit_opcode(masm, 0x8B );
903 emit_rm (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
904 #ifndef PRODUCT
905 } else if( !do_size ) {
906 if( size != 0 ) st->print("\n\t");
907 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
908 #endif
909 }
910 return size+2;
911 }
912
913 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
914 int offset, int size, outputStream* st ) {
915 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
916 if( masm ) {
917 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
918 emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] );
919 #ifndef PRODUCT
920 } else if( !do_size ) {
921 if( size != 0 ) st->print("\n\t");
922 st->print("FLD %s",Matcher::regName[src_lo]);
923 #endif
924 }
925 size += 2;
926 }
927
928 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
929 const char *op_str;
930 int op;
931 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
932 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
933 op = 0xDD;
934 } else { // 32-bit store
935 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
936 op = 0xD9;
937 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
938 }
939
940 return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st);
941 }
942
943 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
944 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
945 int src_hi, int dst_hi, uint ireg, outputStream* st);
946
947 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
948 int stack_offset, int reg, uint ireg, outputStream* st);
949
950 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
951 int dst_offset, uint ireg, outputStream* st) {
952 if (masm) {
953 switch (ireg) {
954 case Op_VecS:
955 __ pushl(Address(rsp, src_offset));
956 __ popl (Address(rsp, dst_offset));
957 break;
958 case Op_VecD:
959 __ pushl(Address(rsp, src_offset));
960 __ popl (Address(rsp, dst_offset));
961 __ pushl(Address(rsp, src_offset+4));
962 __ popl (Address(rsp, dst_offset+4));
963 break;
964 case Op_VecX:
965 __ movdqu(Address(rsp, -16), xmm0);
966 __ movdqu(xmm0, Address(rsp, src_offset));
967 __ movdqu(Address(rsp, dst_offset), xmm0);
968 __ movdqu(xmm0, Address(rsp, -16));
969 break;
970 case Op_VecY:
971 __ vmovdqu(Address(rsp, -32), xmm0);
972 __ vmovdqu(xmm0, Address(rsp, src_offset));
973 __ vmovdqu(Address(rsp, dst_offset), xmm0);
974 __ vmovdqu(xmm0, Address(rsp, -32));
975 break;
976 case Op_VecZ:
977 __ evmovdquq(Address(rsp, -64), xmm0, 2);
978 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
979 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
980 __ evmovdquq(xmm0, Address(rsp, -64), 2);
981 break;
982 default:
983 ShouldNotReachHere();
984 }
985 #ifndef PRODUCT
986 } else {
987 switch (ireg) {
988 case Op_VecS:
989 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
990 "popl [rsp + #%d]",
991 src_offset, dst_offset);
992 break;
993 case Op_VecD:
994 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
995 "popq [rsp + #%d]\n\t"
996 "pushl [rsp + #%d]\n\t"
997 "popq [rsp + #%d]",
998 src_offset, dst_offset, src_offset+4, dst_offset+4);
999 break;
1000 case Op_VecX:
1001 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1002 "movdqu xmm0, [rsp + #%d]\n\t"
1003 "movdqu [rsp + #%d], xmm0\n\t"
1004 "movdqu xmm0, [rsp - #16]",
1005 src_offset, dst_offset);
1006 break;
1007 case Op_VecY:
1008 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1009 "vmovdqu xmm0, [rsp + #%d]\n\t"
1010 "vmovdqu [rsp + #%d], xmm0\n\t"
1011 "vmovdqu xmm0, [rsp - #32]",
1012 src_offset, dst_offset);
1013 break;
1014 case Op_VecZ:
1015 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1016 "vmovdqu xmm0, [rsp + #%d]\n\t"
1017 "vmovdqu [rsp + #%d], xmm0\n\t"
1018 "vmovdqu xmm0, [rsp - #64]",
1019 src_offset, dst_offset);
1020 break;
1021 default:
1022 ShouldNotReachHere();
1023 }
1024 #endif
1025 }
1026 }
1027
1028 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1029 // Get registers to move
1030 OptoReg::Name src_second = ra_->get_reg_second(in(1));
1031 OptoReg::Name src_first = ra_->get_reg_first(in(1));
1032 OptoReg::Name dst_second = ra_->get_reg_second(this );
1033 OptoReg::Name dst_first = ra_->get_reg_first(this );
1034
1035 enum RC src_second_rc = rc_class(src_second);
1036 enum RC src_first_rc = rc_class(src_first);
1037 enum RC dst_second_rc = rc_class(dst_second);
1038 enum RC dst_first_rc = rc_class(dst_first);
1039
1040 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1041
1042 // Generate spill code!
1043 int size = 0;
1044
1045 if( src_first == dst_first && src_second == dst_second )
1046 return size; // Self copy, no move
1047
1048 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
1049 uint ireg = ideal_reg();
1050 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1051 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1052 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1053 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1054 // mem -> mem
1055 int src_offset = ra_->reg2offset(src_first);
1056 int dst_offset = ra_->reg2offset(dst_first);
1057 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
1058 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1059 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
1060 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1061 int stack_offset = ra_->reg2offset(dst_first);
1062 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
1063 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1064 int stack_offset = ra_->reg2offset(src_first);
1065 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
1066 } else {
1067 ShouldNotReachHere();
1068 }
1069 return 0;
1070 }
1071
1072 // --------------------------------------
1073 // Check for mem-mem move. push/pop to move.
1074 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1075 if( src_second == dst_first ) { // overlapping stack copy ranges
1076 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1077 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
1078 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
1079 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
1080 }
1081 // move low bits
1082 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
1083 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
1084 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1085 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
1086 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
1087 }
1088 return size;
1089 }
1090
1091 // --------------------------------------
1092 // Check for integer reg-reg copy
1093 if( src_first_rc == rc_int && dst_first_rc == rc_int )
1094 size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st);
1095
1096 // Check for integer store
1097 if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1098 size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1099
1100 // Check for integer load
1101 if( src_first_rc == rc_stack && dst_first_rc == rc_int )
1102 size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1103
1104 // Check for integer reg-xmm reg copy
1105 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1106 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1107 "no 64 bit integer-float reg moves" );
1108 return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
1109 }
1110 // --------------------------------------
1111 // Check for float reg-reg copy
1112 if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1113 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1114 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1115 if( masm ) {
1116
1117 // Note the mucking with the register encode to compensate for the 0/1
1118 // indexing issue mentioned in a comment in the reg_def sections
1119 // for FPR registers many lines above here.
1120
1121 if( src_first != FPR1L_num ) {
1122 emit_opcode (masm, 0xD9 ); // FLD ST(i)
1123 emit_d8 (masm, 0xC0+Matcher::_regEncode[src_first]-1 );
1124 emit_opcode (masm, 0xDD ); // FSTP ST(i)
1125 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] );
1126 } else {
1127 emit_opcode (masm, 0xDD ); // FST ST(i)
1128 emit_d8 (masm, 0xD0+Matcher::_regEncode[dst_first]-1 );
1129 }
1130 #ifndef PRODUCT
1131 } else if( !do_size ) {
1132 if( size != 0 ) st->print("\n\t");
1133 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1134 else st->print( "FST %s", Matcher::regName[dst_first]);
1135 #endif
1136 }
1137 return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1138 }
1139
1140 // Check for float store
1141 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1142 return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1143 }
1144
1145 // Check for float load
1146 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1147 int offset = ra_->reg2offset(src_first);
1148 const char *op_str;
1149 int op;
1150 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1151 op_str = "FLD_D";
1152 op = 0xDD;
1153 } else { // 32-bit load
1154 op_str = "FLD_S";
1155 op = 0xD9;
1156 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1157 }
1158 if( masm ) {
1159 masm->set_inst_mark();
1160 emit_opcode (masm, op );
1161 encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1162 emit_opcode (masm, 0xDD ); // FSTP ST(i)
1163 emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] );
1164 masm->clear_inst_mark();
1165 #ifndef PRODUCT
1166 } else if( !do_size ) {
1167 if( size != 0 ) st->print("\n\t");
1168 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
1169 #endif
1170 }
1171 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1172 return size + 3+offset_size+2;
1173 }
1174
1175 // Check for xmm reg-reg copy
1176 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1177 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1178 (src_first+1 == src_second && dst_first+1 == dst_second),
1179 "no non-adjacent float-moves" );
1180 return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
1181 }
1182
1183 // Check for xmm reg-integer reg copy
1184 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1185 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1186 "no 64 bit float-integer reg moves" );
1187 return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
1188 }
1189
1190 // Check for xmm store
1191 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1192 return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
1193 }
1194
1195 // Check for float xmm load
1196 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1197 return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1198 }
1199
1200 // Copy from float reg to xmm reg
1201 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
1202 // copy to the top of stack from floating point reg
1203 // and use LEA to preserve flags
1204 if( masm ) {
1205 emit_opcode(masm,0x8D); // LEA ESP,[ESP-8]
1206 emit_rm(masm, 0x1, ESP_enc, 0x04);
1207 emit_rm(masm, 0x0, 0x04, ESP_enc);
1208 emit_d8(masm,0xF8);
1209 #ifndef PRODUCT
1210 } else if( !do_size ) {
1211 if( size != 0 ) st->print("\n\t");
1212 st->print("LEA ESP,[ESP-8]");
1213 #endif
1214 }
1215 size += 4;
1216
1217 size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1218
1219 // Copy from the temp memory to the xmm reg.
1220 size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st);
1221
1222 if( masm ) {
1223 emit_opcode(masm,0x8D); // LEA ESP,[ESP+8]
1224 emit_rm(masm, 0x1, ESP_enc, 0x04);
1225 emit_rm(masm, 0x0, 0x04, ESP_enc);
1226 emit_d8(masm,0x08);
1227 #ifndef PRODUCT
1228 } else if( !do_size ) {
1229 if( size != 0 ) st->print("\n\t");
1230 st->print("LEA ESP,[ESP+8]");
1231 #endif
1232 }
1233 size += 4;
1234 return size;
1235 }
1236
1237 // AVX-512 opmask specific spilling.
1238 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
1239 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1240 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1241 int offset = ra_->reg2offset(src_first);
1242 if (masm != nullptr) {
1243 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1244 #ifndef PRODUCT
1245 } else {
1246 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset);
1247 #endif
1248 }
1249 return 0;
1250 }
1251
1252 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
1253 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1254 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1255 int offset = ra_->reg2offset(dst_first);
1256 if (masm != nullptr) {
1257 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
1258 #ifndef PRODUCT
1259 } else {
1260 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]);
1261 #endif
1262 }
1263 return 0;
1264 }
1265
1266 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
1267 Unimplemented();
1268 return 0;
1269 }
1270
1271 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
1272 Unimplemented();
1273 return 0;
1274 }
1275
1276 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
1277 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1278 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1279 if (masm != nullptr) {
1280 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
1281 #ifndef PRODUCT
1282 } else {
1283 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
1284 #endif
1285 }
1286 return 0;
1287 }
1288
1289 assert( size > 0, "missed a case" );
1290
1291 // --------------------------------------------------------------------
1292 // Check for second bits still needing moving.
1293 if( src_second == dst_second )
1294 return size; // Self copy; no move
1295 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1296
1297 // Check for second word int-int move
1298 if( src_second_rc == rc_int && dst_second_rc == rc_int )
1299 return impl_mov_helper(masm,do_size,src_second,dst_second,size, st);
1300
1301 // Check for second word integer store
1302 if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1303 return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1304
1305 // Check for second word integer load
1306 if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1307 return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1308
1309 Unimplemented();
1310 return 0; // Mute compiler
1311 }
1312
1313 #ifndef PRODUCT
1314 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1315 implementation( nullptr, ra_, false, st );
1316 }
1317 #endif
1318
1319 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1320 implementation( masm, ra_, false, nullptr );
1321 }
1322
1323 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1324 return MachNode::size(ra_);
1325 }
1326
1327
1328 //=============================================================================
1329 #ifndef PRODUCT
1330 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1331 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1332 int reg = ra_->get_reg_first(this);
1333 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
1334 }
1335 #endif
1336
1337 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1338 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1339 int reg = ra_->get_encode(this);
1340 if( offset >= 128 ) {
1341 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset]
1342 emit_rm(masm, 0x2, reg, 0x04);
1343 emit_rm(masm, 0x0, 0x04, ESP_enc);
1344 emit_d32(masm, offset);
1345 }
1346 else {
1347 emit_opcode(masm, 0x8D); // LEA reg,[SP+offset]
1348 emit_rm(masm, 0x1, reg, 0x04);
1349 emit_rm(masm, 0x0, 0x04, ESP_enc);
1350 emit_d8(masm, offset);
1351 }
1352 }
1353
1354 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1355 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1356 if( offset >= 128 ) {
1357 return 7;
1358 }
1359 else {
1360 return 4;
1361 }
1362 }
1363
1364 //=============================================================================
1365 #ifndef PRODUCT
1366 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1367 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
1368 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
1369 st->print_cr("\tNOP");
1370 st->print_cr("\tNOP");
1371 if( !OptoBreakpoint )
1372 st->print_cr("\tNOP");
1373 }
1374 #endif
1375
1376 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1377 __ ic_check(CodeEntryAlignment);
1378 }
1379
1380 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1381 return MachNode::size(ra_); // too many variables; just compute it
1382 // the hard way
1383 }
1384
1385
1386 //=============================================================================
1387
1388 // Vector calling convention not supported.
1389 bool Matcher::supports_vector_calling_convention() {
1390 return false;
1391 }
1392
1393 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1394 Unimplemented();
1395 return OptoRegPair(0, 0);
1396 }
1397
1398 // Is this branch offset short enough that a short branch can be used?
1399 //
1400 // NOTE: If the platform does not provide any short branch variants, then
1401 // this method should return false for offset 0.
1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403 // The passed offset is relative to address of the branch.
1404 // On 86 a branch displacement is calculated relative to address
1405 // of a next instruction.
1406 offset -= br_size;
1407
1408 // the short version of jmpConUCF2 contains multiple branches,
1409 // making the reach slightly less
1410 if (rule == jmpConUCF2_rule)
1411 return (-126 <= offset && offset <= 125);
1412 return (-128 <= offset && offset <= 127);
1413 }
1414
1415 // Return whether or not this register is ever used as an argument. This
1416 // function is used on startup to build the trampoline stubs in generateOptoStub.
1417 // Registers not mentioned will be killed by the VM call in the trampoline, and
1418 // arguments in those registers not be available to the callee.
1419 bool Matcher::can_be_java_arg( int reg ) {
1420 if( reg == ECX_num || reg == EDX_num ) return true;
1421 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true;
1422 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1423 return false;
1424 }
1425
1426 bool Matcher::is_spillable_arg( int reg ) {
1427 return can_be_java_arg(reg);
1428 }
1429
1430 uint Matcher::int_pressure_limit()
1431 {
1432 return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
1433 }
1434
1435 uint Matcher::float_pressure_limit()
1436 {
1437 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
1438 }
1439
1440 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1441 // Use hardware integer DIV instruction when
1442 // it is faster than a code which use multiply.
1443 // Only when constant divisor fits into 32 bit
1444 // (min_jint is excluded to get only correct
1445 // positive 32 bit values from negative).
1446 return VM_Version::has_fast_idiv() &&
1447 (divisor == (int)divisor && divisor != min_jint);
1448 }
1449
1450 // Register for DIVI projection of divmodI
1451 RegMask Matcher::divI_proj_mask() {
1452 return EAX_REG_mask();
1453 }
1454
1455 // Register for MODI projection of divmodI
1456 RegMask Matcher::modI_proj_mask() {
1457 return EDX_REG_mask();
1458 }
1459
1460 // Register for DIVL projection of divmodL
1461 RegMask Matcher::divL_proj_mask() {
1462 ShouldNotReachHere();
1463 return RegMask();
1464 }
1465
1466 // Register for MODL projection of divmodL
1467 RegMask Matcher::modL_proj_mask() {
1468 ShouldNotReachHere();
1469 return RegMask();
1470 }
1471
1472 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1473 return NO_REG_mask();
1474 }
1475
1476 // Returns true if the high 32 bits of the value is known to be zero.
1477 bool is_operand_hi32_zero(Node* n) {
1478 int opc = n->Opcode();
1479 if (opc == Op_AndL) {
1480 Node* o2 = n->in(2);
1481 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1482 return true;
1483 }
1484 }
1485 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1486 return true;
1487 }
1488 return false;
1489 }
1490
1491 %}
1492
1493 //----------ENCODING BLOCK-----------------------------------------------------
1494 // This block specifies the encoding classes used by the compiler to output
1495 // byte streams. Encoding classes generate functions which are called by
1496 // Machine Instruction Nodes in order to generate the bit encoding of the
1497 // instruction. Operands specify their base encoding interface with the
1498 // interface keyword. There are currently supported four interfaces,
1499 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
1500 // operand to generate a function which returns its register number when
1501 // queried. CONST_INTER causes an operand to generate a function which
1502 // returns the value of the constant when queried. MEMORY_INTER causes an
1503 // operand to generate four functions which return the Base Register, the
1504 // Index Register, the Scale Value, and the Offset Value of the operand when
1505 // queried. COND_INTER causes an operand to generate six functions which
1506 // return the encoding code (ie - encoding bits for the instruction)
1507 // associated with each basic boolean condition for a conditional instruction.
1508 // Instructions specify two basic values for encoding. They use the
1509 // ins_encode keyword to specify their encoding class (which must be one of
1510 // the class names specified in the encoding block), and they use the
1511 // opcode keyword to specify, in order, their primary, secondary, and
1512 // tertiary opcode. Only the opcode sections which a particular instruction
1513 // needs for encoding need to be specified.
1514 encode %{
1515 // Build emit functions for each basic byte or larger field in the intel
1516 // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1517 // code in the enc_class source block. Emit functions will live in the
1518 // main source block for now. In future, we can generalize this by
1519 // adding a syntax that specifies the sizes of fields in an order,
1520 // so that the adlc can build the emit functions automagically
1521
1522 // Set instruction mark in MacroAssembler. This is used only in
1523 // instructions that emit bytes directly to the CodeBuffer wraped
1524 // in the MacroAssembler. Should go away once all "instruct" are
1525 // patched to emit bytes only using methods in MacroAssembler.
1526 enc_class SetInstMark %{
1527 __ set_inst_mark();
1528 %}
1529
1530 enc_class ClearInstMark %{
1531 __ clear_inst_mark();
1532 %}
1533
1534 // Emit primary opcode
1535 enc_class OpcP %{
1536 emit_opcode(masm, $primary);
1537 %}
1538
1539 // Emit secondary opcode
1540 enc_class OpcS %{
1541 emit_opcode(masm, $secondary);
1542 %}
1543
1544 // Emit opcode directly
1545 enc_class Opcode(immI d8) %{
1546 emit_opcode(masm, $d8$$constant);
1547 %}
1548
1549 enc_class SizePrefix %{
1550 emit_opcode(masm,0x66);
1551 %}
1552
1553 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
1554 emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1555 %}
1556
1557 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many)
1558 emit_opcode(masm,$opcode$$constant);
1559 emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1560 %}
1561
1562 enc_class mov_r32_imm0( rRegI dst ) %{
1563 emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
1564 emit_d32 ( masm, 0x0 ); // imm32==0x0
1565 %}
1566
1567 enc_class cdq_enc %{
1568 // Full implementation of Java idiv and irem; checks for
1569 // special case as described in JVM spec., p.243 & p.271.
1570 //
1571 // normal case special case
1572 //
1573 // input : rax,: dividend min_int
1574 // reg: divisor -1
1575 //
1576 // output: rax,: quotient (= rax, idiv reg) min_int
1577 // rdx: remainder (= rax, irem reg) 0
1578 //
1579 // Code sequnce:
1580 //
1581 // 81 F8 00 00 00 80 cmp rax,80000000h
1582 // 0F 85 0B 00 00 00 jne normal_case
1583 // 33 D2 xor rdx,edx
1584 // 83 F9 FF cmp rcx,0FFh
1585 // 0F 84 03 00 00 00 je done
1586 // normal_case:
1587 // 99 cdq
1588 // F7 F9 idiv rax,ecx
1589 // done:
1590 //
1591 emit_opcode(masm,0x81); emit_d8(masm,0xF8);
1592 emit_opcode(masm,0x00); emit_d8(masm,0x00);
1593 emit_opcode(masm,0x00); emit_d8(masm,0x80); // cmp rax,80000000h
1594 emit_opcode(masm,0x0F); emit_d8(masm,0x85);
1595 emit_opcode(masm,0x0B); emit_d8(masm,0x00);
1596 emit_opcode(masm,0x00); emit_d8(masm,0x00); // jne normal_case
1597 emit_opcode(masm,0x33); emit_d8(masm,0xD2); // xor rdx,edx
1598 emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh
1599 emit_opcode(masm,0x0F); emit_d8(masm,0x84);
1600 emit_opcode(masm,0x03); emit_d8(masm,0x00);
1601 emit_opcode(masm,0x00); emit_d8(masm,0x00); // je done
1602 // normal_case:
1603 emit_opcode(masm,0x99); // cdq
1604 // idiv (note: must be emitted by the user of this rule)
1605 // normal:
1606 %}
1607
1608 // Dense encoding for older common ops
1609 enc_class Opc_plus(immI opcode, rRegI reg) %{
1610 emit_opcode(masm, $opcode$$constant + $reg$$reg);
1611 %}
1612
1613
1614 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1615 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1616 // Check for 8-bit immediate, and set sign extend bit in opcode
1617 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1618 emit_opcode(masm, $primary | 0x02);
1619 }
1620 else { // If 32-bit immediate
1621 emit_opcode(masm, $primary);
1622 }
1623 %}
1624
1625 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m
1626 // Emit primary opcode and set sign-extend bit
1627 // Check for 8-bit immediate, and set sign extend bit in opcode
1628 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1629 emit_opcode(masm, $primary | 0x02); }
1630 else { // If 32-bit immediate
1631 emit_opcode(masm, $primary);
1632 }
1633 // Emit r/m byte with secondary opcode, after primary opcode.
1634 emit_rm(masm, 0x3, $secondary, $dst$$reg);
1635 %}
1636
1637 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
1638 // Check for 8-bit immediate, and set sign extend bit in opcode
1639 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1640 $$$emit8$imm$$constant;
1641 }
1642 else { // If 32-bit immediate
1643 // Output immediate
1644 $$$emit32$imm$$constant;
1645 }
1646 %}
1647
1648 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1649 // Emit primary opcode and set sign-extend bit
1650 // Check for 8-bit immediate, and set sign extend bit in opcode
1651 int con = (int)$imm$$constant; // Throw away top bits
1652 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1653 // Emit r/m byte with secondary opcode, after primary opcode.
1654 emit_rm(masm, 0x3, $secondary, $dst$$reg);
1655 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
1656 else emit_d32(masm,con);
1657 %}
1658
1659 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1660 // Emit primary opcode and set sign-extend bit
1661 // Check for 8-bit immediate, and set sign extend bit in opcode
1662 int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1663 emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1664 // Emit r/m byte with tertiary opcode, after primary opcode.
1665 emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
1666 if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
1667 else emit_d32(masm,con);
1668 %}
1669
1670 enc_class OpcSReg (rRegI dst) %{ // BSWAP
1671 emit_cc(masm, $secondary, $dst$$reg );
1672 %}
1673
1674 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1675 int destlo = $dst$$reg;
1676 int desthi = HIGH_FROM_LOW_ENC(destlo);
1677 // bswap lo
1678 emit_opcode(masm, 0x0F);
1679 emit_cc(masm, 0xC8, destlo);
1680 // bswap hi
1681 emit_opcode(masm, 0x0F);
1682 emit_cc(masm, 0xC8, desthi);
1683 // xchg lo and hi
1684 emit_opcode(masm, 0x87);
1685 emit_rm(masm, 0x3, destlo, desthi);
1686 %}
1687
1688 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1689 emit_rm(masm, 0x3, $secondary, $div$$reg );
1690 %}
1691
1692 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1693 $$$emit8$primary;
1694 emit_cc(masm, $secondary, $cop$$cmpcode);
1695 %}
1696
1697 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1698 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1699 emit_d8(masm, op >> 8 );
1700 emit_d8(masm, op & 255);
1701 %}
1702
1703 // emulate a CMOV with a conditional branch around a MOV
1704 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1705 // Invert sense of branch from sense of CMOV
1706 emit_cc( masm, 0x70, ($cop$$cmpcode^1) );
1707 emit_d8( masm, $brOffs$$constant );
1708 %}
1709
1710 enc_class enc_PartialSubtypeCheck( ) %{
1711 Register Redi = as_Register(EDI_enc); // result register
1712 Register Reax = as_Register(EAX_enc); // super class
1713 Register Recx = as_Register(ECX_enc); // killed
1714 Register Resi = as_Register(ESI_enc); // sub class
1715 Label miss;
1716
1717 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1718 nullptr, &miss,
1719 /*set_cond_codes:*/ true);
1720 if ($primary) {
1721 __ xorptr(Redi, Redi);
1722 }
1723 __ bind(miss);
1724 %}
1725
1726 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
1727 int start = __ offset();
1728 if (UseSSE >= 2) {
1729 if (VerifyFPU) {
1730 __ verify_FPU(0, "must be empty in SSE2+ mode");
1731 }
1732 } else {
1733 // External c_calling_convention expects the FPU stack to be 'clean'.
1734 // Compiled code leaves it dirty. Do cleanup now.
1735 __ empty_FPU_stack();
1736 }
1737 if (sizeof_FFree_Float_Stack_All == -1) {
1738 sizeof_FFree_Float_Stack_All = __ offset() - start;
1739 } else {
1740 assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1741 }
1742 %}
1743
1744 enc_class Verify_FPU_For_Leaf %{
1745 if( VerifyFPU ) {
1746 __ verify_FPU( -3, "Returning from Runtime Leaf call");
1747 }
1748 %}
1749
1750 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1751 // This is the instruction starting address for relocation info.
1752 __ set_inst_mark();
1753 $$$emit8$primary;
1754 // CALL directly to the runtime
1755 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
1756 runtime_call_Relocation::spec(), RELOC_IMM32 );
1757 __ clear_inst_mark();
1758 __ post_call_nop();
1759
1760 if (UseSSE >= 2) {
1761 BasicType rt = tf()->return_type();
1762
1763 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1764 // A C runtime call where the return value is unused. In SSE2+
1765 // mode the result needs to be removed from the FPU stack. It's
1766 // likely that this function call could be removed by the
1767 // optimizer if the C function is a pure function.
1768 __ ffree(0);
1769 } else if (rt == T_FLOAT) {
1770 __ lea(rsp, Address(rsp, -4));
1771 __ fstp_s(Address(rsp, 0));
1772 __ movflt(xmm0, Address(rsp, 0));
1773 __ lea(rsp, Address(rsp, 4));
1774 } else if (rt == T_DOUBLE) {
1775 __ lea(rsp, Address(rsp, -8));
1776 __ fstp_d(Address(rsp, 0));
1777 __ movdbl(xmm0, Address(rsp, 0));
1778 __ lea(rsp, Address(rsp, 8));
1779 }
1780 }
1781 %}
1782
1783 enc_class pre_call_resets %{
1784 // If method sets FPU control word restore it here
1785 debug_only(int off0 = __ offset());
1786 if (ra_->C->in_24_bit_fp_mode()) {
1787 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
1788 }
1789 // Clear upper bits of YMM registers when current compiled code uses
1790 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1791 __ vzeroupper();
1792 debug_only(int off1 = __ offset());
1793 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1794 %}
1795
1796 enc_class post_call_FPU %{
1797 // If method sets FPU control word do it here also
1798 if (Compile::current()->in_24_bit_fp_mode()) {
1799 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
1800 }
1801 %}
1802
1803 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL
1804 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1805 // who we intended to call.
1806 __ set_inst_mark();
1807 $$$emit8$primary;
1808
1809 if (!_method) {
1810 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
1811 runtime_call_Relocation::spec(),
1812 RELOC_IMM32);
1813 __ clear_inst_mark();
1814 __ post_call_nop();
1815 } else {
1816 int method_index = resolved_method_index(masm);
1817 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1818 : static_call_Relocation::spec(method_index);
1819 emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
1820 rspec, RELOC_DISP32);
1821 __ post_call_nop();
1822 address mark = __ inst_mark();
1823 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
1824 // Calls of the same statically bound method can share
1825 // a stub to the interpreter.
1826 __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off());
1827 __ clear_inst_mark();
1828 } else {
1829 // Emit stubs for static call.
1830 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
1831 __ clear_inst_mark();
1832 if (stub == nullptr) {
1833 ciEnv::current()->record_failure("CodeCache is full");
1834 return;
1835 }
1836 }
1837 }
1838 %}
1839
1840 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
1841 __ ic_call((address)$meth$$method, resolved_method_index(masm));
1842 __ post_call_nop();
1843 %}
1844
1845 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1846 int disp = in_bytes(Method::from_compiled_offset());
1847 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1848
1849 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1850 __ set_inst_mark();
1851 $$$emit8$primary;
1852 emit_rm(masm, 0x01, $secondary, EAX_enc ); // R/M byte
1853 emit_d8(masm, disp); // Displacement
1854 __ clear_inst_mark();
1855 __ post_call_nop();
1856 %}
1857
1858 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR
1859 $$$emit8$primary;
1860 emit_rm(masm, 0x3, $secondary, $dst$$reg);
1861 $$$emit8$shift$$constant;
1862 %}
1863
1864 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate
1865 // Load immediate does not have a zero or sign extended version
1866 // for 8-bit immediates
1867 emit_opcode(masm, 0xB8 + $dst$$reg);
1868 $$$emit32$src$$constant;
1869 %}
1870
1871 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate
1872 // Load immediate does not have a zero or sign extended version
1873 // for 8-bit immediates
1874 emit_opcode(masm, $primary + $dst$$reg);
1875 $$$emit32$src$$constant;
1876 %}
1877
1878 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate
1879 // Load immediate does not have a zero or sign extended version
1880 // for 8-bit immediates
1881 int dst_enc = $dst$$reg;
1882 int src_con = $src$$constant & 0x0FFFFFFFFL;
1883 if (src_con == 0) {
1884 // xor dst, dst
1885 emit_opcode(masm, 0x33);
1886 emit_rm(masm, 0x3, dst_enc, dst_enc);
1887 } else {
1888 emit_opcode(masm, $primary + dst_enc);
1889 emit_d32(masm, src_con);
1890 }
1891 %}
1892
1893 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
1894 // Load immediate does not have a zero or sign extended version
1895 // for 8-bit immediates
1896 int dst_enc = $dst$$reg + 2;
1897 int src_con = ((julong)($src$$constant)) >> 32;
1898 if (src_con == 0) {
1899 // xor dst, dst
1900 emit_opcode(masm, 0x33);
1901 emit_rm(masm, 0x3, dst_enc, dst_enc);
1902 } else {
1903 emit_opcode(masm, $primary + dst_enc);
1904 emit_d32(masm, src_con);
1905 }
1906 %}
1907
1908
1909 // Encode a reg-reg copy. If it is useless, then empty encoding.
1910 enc_class enc_Copy( rRegI dst, rRegI src ) %{
1911 encode_Copy( masm, $dst$$reg, $src$$reg );
1912 %}
1913
1914 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
1915 encode_Copy( masm, $dst$$reg, $src$$reg );
1916 %}
1917
1918 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
1919 emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1920 %}
1921
1922 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
1923 $$$emit8$primary;
1924 emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1925 %}
1926
1927 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
1928 $$$emit8$secondary;
1929 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
1930 %}
1931
1932 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
1933 emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1934 %}
1935
1936 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
1937 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
1938 %}
1939
1940 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
1941 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
1942 %}
1943
1944 enc_class Con32 (immI src) %{ // Con32(storeImmI)
1945 // Output immediate
1946 $$$emit32$src$$constant;
1947 %}
1948
1949 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm
1950 // Output Float immediate bits
1951 jfloat jf = $src$$constant;
1952 int jf_as_bits = jint_cast( jf );
1953 emit_d32(masm, jf_as_bits);
1954 %}
1955
1956 enc_class Con32F_as_bits(immF src) %{ // storeX_imm
1957 // Output Float immediate bits
1958 jfloat jf = $src$$constant;
1959 int jf_as_bits = jint_cast( jf );
1960 emit_d32(masm, jf_as_bits);
1961 %}
1962
1963 enc_class Con16 (immI src) %{ // Con16(storeImmI)
1964 // Output immediate
1965 $$$emit16$src$$constant;
1966 %}
1967
1968 enc_class Con_d32(immI src) %{
1969 emit_d32(masm,$src$$constant);
1970 %}
1971
1972 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
1973 // Output immediate memory reference
1974 emit_rm(masm, 0x00, $t1$$reg, 0x05 );
1975 emit_d32(masm, 0x00);
1976 %}
1977
1978 enc_class lock_prefix( ) %{
1979 emit_opcode(masm,0xF0); // [Lock]
1980 %}
1981
1982 // Cmp-xchg long value.
1983 // Note: we need to swap rbx, and rcx before and after the
1984 // cmpxchg8 instruction because the instruction uses
1985 // rcx as the high order word of the new value to store but
1986 // our register encoding uses rbx,.
1987 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
1988
1989 // XCHG rbx,ecx
1990 emit_opcode(masm,0x87);
1991 emit_opcode(masm,0xD9);
1992 // [Lock]
1993 emit_opcode(masm,0xF0);
1994 // CMPXCHG8 [Eptr]
1995 emit_opcode(masm,0x0F);
1996 emit_opcode(masm,0xC7);
1997 emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
1998 // XCHG rbx,ecx
1999 emit_opcode(masm,0x87);
2000 emit_opcode(masm,0xD9);
2001 %}
2002
2003 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2004 // [Lock]
2005 emit_opcode(masm,0xF0);
2006
2007 // CMPXCHG [Eptr]
2008 emit_opcode(masm,0x0F);
2009 emit_opcode(masm,0xB1);
2010 emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
2011 %}
2012
2013 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2014 // [Lock]
2015 emit_opcode(masm,0xF0);
2016
2017 // CMPXCHGB [Eptr]
2018 emit_opcode(masm,0x0F);
2019 emit_opcode(masm,0xB0);
2020 emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
2021 %}
2022
2023 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2024 // [Lock]
2025 emit_opcode(masm,0xF0);
2026
2027 // 16-bit mode
2028 emit_opcode(masm, 0x66);
2029
2030 // CMPXCHGW [Eptr]
2031 emit_opcode(masm,0x0F);
2032 emit_opcode(masm,0xB1);
2033 emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
2034 %}
2035
2036 enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2037 int res_encoding = $res$$reg;
2038
2039 // MOV res,0
2040 emit_opcode( masm, 0xB8 + res_encoding);
2041 emit_d32( masm, 0 );
2042 // JNE,s fail
2043 emit_opcode(masm,0x75);
2044 emit_d8(masm, 5 );
2045 // MOV res,1
2046 emit_opcode( masm, 0xB8 + res_encoding);
2047 emit_d32( masm, 1 );
2048 // fail:
2049 %}
2050
2051 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem
2052 int reg_encoding = $ereg$$reg;
2053 int base = $mem$$base;
2054 int index = $mem$$index;
2055 int scale = $mem$$scale;
2056 int displace = $mem$$disp;
2057 relocInfo::relocType disp_reloc = $mem->disp_reloc();
2058 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2059 %}
2060
2061 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem
2062 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo
2063 int base = $mem$$base;
2064 int index = $mem$$index;
2065 int scale = $mem$$scale;
2066 int displace = $mem$$disp + 4; // Offset is 4 further in memory
2067 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2068 encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none);
2069 %}
2070
2071 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2072 int r1, r2;
2073 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
2074 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
2075 emit_opcode(masm,0x0F);
2076 emit_opcode(masm,$tertiary);
2077 emit_rm(masm, 0x3, r1, r2);
2078 emit_d8(masm,$cnt$$constant);
2079 emit_d8(masm,$primary);
2080 emit_rm(masm, 0x3, $secondary, r1);
2081 emit_d8(masm,$cnt$$constant);
2082 %}
2083
2084 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2085 emit_opcode( masm, 0x8B ); // Move
2086 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
2087 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2088 emit_d8(masm,$primary);
2089 emit_rm(masm, 0x3, $secondary, $dst$$reg);
2090 emit_d8(masm,$cnt$$constant-32);
2091 }
2092 emit_d8(masm,$primary);
2093 emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
2094 emit_d8(masm,31);
2095 %}
2096
2097 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2098 int r1, r2;
2099 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
2100 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
2101
2102 emit_opcode( masm, 0x8B ); // Move r1,r2
2103 emit_rm(masm, 0x3, r1, r2);
2104 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2105 emit_opcode(masm,$primary);
2106 emit_rm(masm, 0x3, $secondary, r1);
2107 emit_d8(masm,$cnt$$constant-32);
2108 }
2109 emit_opcode(masm,0x33); // XOR r2,r2
2110 emit_rm(masm, 0x3, r2, r2);
2111 %}
2112
2113 // Clone of RegMem but accepts an extra parameter to access each
2114 // half of a double in memory; it never needs relocation info.
2115 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2116 emit_opcode(masm,$opcode$$constant);
2117 int reg_encoding = $rm_reg$$reg;
2118 int base = $mem$$base;
2119 int index = $mem$$index;
2120 int scale = $mem$$scale;
2121 int displace = $mem$$disp + $disp_for_half$$constant;
2122 relocInfo::relocType disp_reloc = relocInfo::none;
2123 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2124 %}
2125
2126 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2127 //
2128 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2129 // and it never needs relocation information.
2130 // Frequently used to move data between FPU's Stack Top and memory.
2131 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2132 int rm_byte_opcode = $rm_opcode$$constant;
2133 int base = $mem$$base;
2134 int index = $mem$$index;
2135 int scale = $mem$$scale;
2136 int displace = $mem$$disp;
2137 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2138 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2139 %}
2140
2141 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2142 int rm_byte_opcode = $rm_opcode$$constant;
2143 int base = $mem$$base;
2144 int index = $mem$$index;
2145 int scale = $mem$$scale;
2146 int displace = $mem$$disp;
2147 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2148 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2149 %}
2150
2151 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea
2152 int reg_encoding = $dst$$reg;
2153 int base = $src0$$reg; // 0xFFFFFFFF indicates no base
2154 int index = 0x04; // 0x04 indicates no index
2155 int scale = 0x00; // 0x00 indicates no scale
2156 int displace = $src1$$constant; // 0x00 indicates no displacement
2157 relocInfo::relocType disp_reloc = relocInfo::none;
2158 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2159 %}
2160
2161 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN
2162 // Compare dst,src
2163 emit_opcode(masm,0x3B);
2164 emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2165 // jmp dst < src around move
2166 emit_opcode(masm,0x7C);
2167 emit_d8(masm,2);
2168 // move dst,src
2169 emit_opcode(masm,0x8B);
2170 emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2171 %}
2172
2173 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX
2174 // Compare dst,src
2175 emit_opcode(masm,0x3B);
2176 emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2177 // jmp dst > src around move
2178 emit_opcode(masm,0x7F);
2179 emit_d8(masm,2);
2180 // move dst,src
2181 emit_opcode(masm,0x8B);
2182 emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2183 %}
2184
2185 enc_class enc_FPR_store(memory mem, regDPR src) %{
2186 // If src is FPR1, we can just FST to store it.
2187 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2188 int reg_encoding = 0x2; // Just store
2189 int base = $mem$$base;
2190 int index = $mem$$index;
2191 int scale = $mem$$scale;
2192 int displace = $mem$$disp;
2193 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2194 if( $src$$reg != FPR1L_enc ) {
2195 reg_encoding = 0x3; // Store & pop
2196 emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
2197 emit_d8( masm, 0xC0-1+$src$$reg );
2198 }
2199 __ set_inst_mark(); // Mark start of opcode for reloc info in mem operand
2200 emit_opcode(masm,$primary);
2201 encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2202 __ clear_inst_mark();
2203 %}
2204
2205 enc_class neg_reg(rRegI dst) %{
2206 // NEG $dst
2207 emit_opcode(masm,0xF7);
2208 emit_rm(masm, 0x3, 0x03, $dst$$reg );
2209 %}
2210
2211 enc_class setLT_reg(eCXRegI dst) %{
2212 // SETLT $dst
2213 emit_opcode(masm,0x0F);
2214 emit_opcode(masm,0x9C);
2215 emit_rm( masm, 0x3, 0x4, $dst$$reg );
2216 %}
2217
2218 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT
2219 int tmpReg = $tmp$$reg;
2220
2221 // SUB $p,$q
2222 emit_opcode(masm,0x2B);
2223 emit_rm(masm, 0x3, $p$$reg, $q$$reg);
2224 // SBB $tmp,$tmp
2225 emit_opcode(masm,0x1B);
2226 emit_rm(masm, 0x3, tmpReg, tmpReg);
2227 // AND $tmp,$y
2228 emit_opcode(masm,0x23);
2229 emit_rm(masm, 0x3, tmpReg, $y$$reg);
2230 // ADD $p,$tmp
2231 emit_opcode(masm,0x03);
2232 emit_rm(masm, 0x3, $p$$reg, tmpReg);
2233 %}
2234
2235 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2236 // TEST shift,32
2237 emit_opcode(masm,0xF7);
2238 emit_rm(masm, 0x3, 0, ECX_enc);
2239 emit_d32(masm,0x20);
2240 // JEQ,s small
2241 emit_opcode(masm, 0x74);
2242 emit_d8(masm, 0x04);
2243 // MOV $dst.hi,$dst.lo
2244 emit_opcode( masm, 0x8B );
2245 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
2246 // CLR $dst.lo
2247 emit_opcode(masm, 0x33);
2248 emit_rm(masm, 0x3, $dst$$reg, $dst$$reg);
2249 // small:
2250 // SHLD $dst.hi,$dst.lo,$shift
2251 emit_opcode(masm,0x0F);
2252 emit_opcode(masm,0xA5);
2253 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
2254 // SHL $dst.lo,$shift"
2255 emit_opcode(masm,0xD3);
2256 emit_rm(masm, 0x3, 0x4, $dst$$reg );
2257 %}
2258
2259 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2260 // TEST shift,32
2261 emit_opcode(masm,0xF7);
2262 emit_rm(masm, 0x3, 0, ECX_enc);
2263 emit_d32(masm,0x20);
2264 // JEQ,s small
2265 emit_opcode(masm, 0x74);
2266 emit_d8(masm, 0x04);
2267 // MOV $dst.lo,$dst.hi
2268 emit_opcode( masm, 0x8B );
2269 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2270 // CLR $dst.hi
2271 emit_opcode(masm, 0x33);
2272 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
2273 // small:
2274 // SHRD $dst.lo,$dst.hi,$shift
2275 emit_opcode(masm,0x0F);
2276 emit_opcode(masm,0xAD);
2277 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
2278 // SHR $dst.hi,$shift"
2279 emit_opcode(masm,0xD3);
2280 emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
2281 %}
2282
2283 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2284 // TEST shift,32
2285 emit_opcode(masm,0xF7);
2286 emit_rm(masm, 0x3, 0, ECX_enc);
2287 emit_d32(masm,0x20);
2288 // JEQ,s small
2289 emit_opcode(masm, 0x74);
2290 emit_d8(masm, 0x05);
2291 // MOV $dst.lo,$dst.hi
2292 emit_opcode( masm, 0x8B );
2293 emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2294 // SAR $dst.hi,31
2295 emit_opcode(masm, 0xC1);
2296 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
2297 emit_d8(masm, 0x1F );
2298 // small:
2299 // SHRD $dst.lo,$dst.hi,$shift
2300 emit_opcode(masm,0x0F);
2301 emit_opcode(masm,0xAD);
2302 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
2303 // SAR $dst.hi,$shift"
2304 emit_opcode(masm,0xD3);
2305 emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
2306 %}
2307
2308
2309 // ----------------- Encodings for floating point unit -----------------
2310 // May leave result in FPU-TOS or FPU reg depending on opcodes
2311 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV
2312 $$$emit8$primary;
2313 emit_rm(masm, 0x3, $secondary, $src$$reg );
2314 %}
2315
2316 // Pop argument in FPR0 with FSTP ST(0)
2317 enc_class PopFPU() %{
2318 emit_opcode( masm, 0xDD );
2319 emit_d8( masm, 0xD8 );
2320 %}
2321
2322 // !!!!! equivalent to Pop_Reg_F
2323 enc_class Pop_Reg_DPR( regDPR dst ) %{
2324 emit_opcode( masm, 0xDD ); // FSTP ST(i)
2325 emit_d8( masm, 0xD8+$dst$$reg );
2326 %}
2327
2328 enc_class Push_Reg_DPR( regDPR dst ) %{
2329 emit_opcode( masm, 0xD9 );
2330 emit_d8( masm, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2331 %}
2332
2333 enc_class strictfp_bias1( regDPR dst ) %{
2334 emit_opcode( masm, 0xDB ); // FLD m80real
2335 emit_opcode( masm, 0x2D );
2336 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
2337 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0
2338 emit_opcode( masm, 0xC8+$dst$$reg );
2339 %}
2340
2341 enc_class strictfp_bias2( regDPR dst ) %{
2342 emit_opcode( masm, 0xDB ); // FLD m80real
2343 emit_opcode( masm, 0x2D );
2344 emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
2345 emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0
2346 emit_opcode( masm, 0xC8+$dst$$reg );
2347 %}
2348
2349 // Special case for moving an integer register to a stack slot.
2350 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2351 store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp );
2352 %}
2353
2354 // Special case for moving a register to a stack slot.
2355 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2356 // Opcode already emitted
2357 emit_rm( masm, 0x02, $src$$reg, ESP_enc ); // R/M byte
2358 emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte
2359 emit_d32(masm, $dst$$disp); // Displacement
2360 %}
2361
2362 // Push the integer in stackSlot 'src' onto FP-stack
2363 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2364 store_to_stackslot( masm, $primary, $secondary, $src$$disp );
2365 %}
2366
2367 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2368 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2369 store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp );
2370 %}
2371
2372 // Same as Pop_Mem_F except for opcode
2373 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2374 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2375 store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp );
2376 %}
2377
2378 enc_class Pop_Reg_FPR( regFPR dst ) %{
2379 emit_opcode( masm, 0xDD ); // FSTP ST(i)
2380 emit_d8( masm, 0xD8+$dst$$reg );
2381 %}
2382
2383 enc_class Push_Reg_FPR( regFPR dst ) %{
2384 emit_opcode( masm, 0xD9 ); // FLD ST(i-1)
2385 emit_d8( masm, 0xC0-1+$dst$$reg );
2386 %}
2387
2388 // Push FPU's float to a stack-slot, and pop FPU-stack
2389 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2390 int pop = 0x02;
2391 if ($src$$reg != FPR1L_enc) {
2392 emit_opcode( masm, 0xD9 ); // FLD ST(i-1)
2393 emit_d8( masm, 0xC0-1+$src$$reg );
2394 pop = 0x03;
2395 }
2396 store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2397 %}
2398
2399 // Push FPU's double to a stack-slot, and pop FPU-stack
2400 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2401 int pop = 0x02;
2402 if ($src$$reg != FPR1L_enc) {
2403 emit_opcode( masm, 0xD9 ); // FLD ST(i-1)
2404 emit_d8( masm, 0xC0-1+$src$$reg );
2405 pop = 0x03;
2406 }
2407 store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2408 %}
2409
2410 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2411 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2412 int pop = 0xD0 - 1; // -1 since we skip FLD
2413 if ($src$$reg != FPR1L_enc) {
2414 emit_opcode( masm, 0xD9 ); // FLD ST(src-1)
2415 emit_d8( masm, 0xC0-1+$src$$reg );
2416 pop = 0xD8;
2417 }
2418 emit_opcode( masm, 0xDD );
2419 emit_d8( masm, pop+$dst$$reg ); // FST<P> ST(i)
2420 %}
2421
2422
2423 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2424 // load dst in FPR0
2425 emit_opcode( masm, 0xD9 );
2426 emit_d8( masm, 0xC0-1+$dst$$reg );
2427 if ($src$$reg != FPR1L_enc) {
2428 // fincstp
2429 emit_opcode (masm, 0xD9);
2430 emit_opcode (masm, 0xF7);
2431 // swap src with FPR1:
2432 // FXCH FPR1 with src
2433 emit_opcode(masm, 0xD9);
2434 emit_d8(masm, 0xC8-1+$src$$reg );
2435 // fdecstp
2436 emit_opcode (masm, 0xD9);
2437 emit_opcode (masm, 0xF6);
2438 }
2439 %}
2440
2441 enc_class Push_ModD_encoding(regD src0, regD src1) %{
2442 __ subptr(rsp, 8);
2443 __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2444 __ fld_d(Address(rsp, 0));
2445 __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2446 __ fld_d(Address(rsp, 0));
2447 %}
2448
2449 enc_class Push_ModF_encoding(regF src0, regF src1) %{
2450 __ subptr(rsp, 4);
2451 __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2452 __ fld_s(Address(rsp, 0));
2453 __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2454 __ fld_s(Address(rsp, 0));
2455 %}
2456
2457 enc_class Push_ResultD(regD dst) %{
2458 __ fstp_d(Address(rsp, 0));
2459 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2460 __ addptr(rsp, 8);
2461 %}
2462
2463 enc_class Push_ResultF(regF dst, immI d8) %{
2464 __ fstp_s(Address(rsp, 0));
2465 __ movflt($dst$$XMMRegister, Address(rsp, 0));
2466 __ addptr(rsp, $d8$$constant);
2467 %}
2468
2469 enc_class Push_SrcD(regD src) %{
2470 __ subptr(rsp, 8);
2471 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2472 __ fld_d(Address(rsp, 0));
2473 %}
2474
2475 enc_class push_stack_temp_qword() %{
2476 __ subptr(rsp, 8);
2477 %}
2478
2479 enc_class pop_stack_temp_qword() %{
2480 __ addptr(rsp, 8);
2481 %}
2482
2483 enc_class push_xmm_to_fpr1(regD src) %{
2484 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2485 __ fld_d(Address(rsp, 0));
2486 %}
2487
2488 enc_class Push_Result_Mod_DPR( regDPR src) %{
2489 if ($src$$reg != FPR1L_enc) {
2490 // fincstp
2491 emit_opcode (masm, 0xD9);
2492 emit_opcode (masm, 0xF7);
2493 // FXCH FPR1 with src
2494 emit_opcode(masm, 0xD9);
2495 emit_d8(masm, 0xC8-1+$src$$reg );
2496 // fdecstp
2497 emit_opcode (masm, 0xD9);
2498 emit_opcode (masm, 0xF6);
2499 }
2500 %}
2501
2502 enc_class fnstsw_sahf_skip_parity() %{
2503 // fnstsw ax
2504 emit_opcode( masm, 0xDF );
2505 emit_opcode( masm, 0xE0 );
2506 // sahf
2507 emit_opcode( masm, 0x9E );
2508 // jnp ::skip
2509 emit_opcode( masm, 0x7B );
2510 emit_opcode( masm, 0x05 );
2511 %}
2512
2513 enc_class emitModDPR() %{
2514 // fprem must be iterative
2515 // :: loop
2516 // fprem
2517 emit_opcode( masm, 0xD9 );
2518 emit_opcode( masm, 0xF8 );
2519 // wait
2520 emit_opcode( masm, 0x9b );
2521 // fnstsw ax
2522 emit_opcode( masm, 0xDF );
2523 emit_opcode( masm, 0xE0 );
2524 // sahf
2525 emit_opcode( masm, 0x9E );
2526 // jp ::loop
2527 emit_opcode( masm, 0x0F );
2528 emit_opcode( masm, 0x8A );
2529 emit_opcode( masm, 0xF4 );
2530 emit_opcode( masm, 0xFF );
2531 emit_opcode( masm, 0xFF );
2532 emit_opcode( masm, 0xFF );
2533 %}
2534
2535 enc_class fpu_flags() %{
2536 // fnstsw_ax
2537 emit_opcode( masm, 0xDF);
2538 emit_opcode( masm, 0xE0);
2539 // test ax,0x0400
2540 emit_opcode( masm, 0x66 ); // operand-size prefix for 16-bit immediate
2541 emit_opcode( masm, 0xA9 );
2542 emit_d16 ( masm, 0x0400 );
2543 // // // This sequence works, but stalls for 12-16 cycles on PPro
2544 // // test rax,0x0400
2545 // emit_opcode( masm, 0xA9 );
2546 // emit_d32 ( masm, 0x00000400 );
2547 //
2548 // jz exit (no unordered comparison)
2549 emit_opcode( masm, 0x74 );
2550 emit_d8 ( masm, 0x02 );
2551 // mov ah,1 - treat as LT case (set carry flag)
2552 emit_opcode( masm, 0xB4 );
2553 emit_d8 ( masm, 0x01 );
2554 // sahf
2555 emit_opcode( masm, 0x9E);
2556 %}
2557
2558 enc_class cmpF_P6_fixup() %{
2559 // Fixup the integer flags in case comparison involved a NaN
2560 //
2561 // JNP exit (no unordered comparison, P-flag is set by NaN)
2562 emit_opcode( masm, 0x7B );
2563 emit_d8 ( masm, 0x03 );
2564 // MOV AH,1 - treat as LT case (set carry flag)
2565 emit_opcode( masm, 0xB4 );
2566 emit_d8 ( masm, 0x01 );
2567 // SAHF
2568 emit_opcode( masm, 0x9E);
2569 // NOP // target for branch to avoid branch to branch
2570 emit_opcode( masm, 0x90);
2571 %}
2572
2573 // fnstsw_ax();
2574 // sahf();
2575 // movl(dst, nan_result);
2576 // jcc(Assembler::parity, exit);
2577 // movl(dst, less_result);
2578 // jcc(Assembler::below, exit);
2579 // movl(dst, equal_result);
2580 // jcc(Assembler::equal, exit);
2581 // movl(dst, greater_result);
2582
2583 // less_result = 1;
2584 // greater_result = -1;
2585 // equal_result = 0;
2586 // nan_result = -1;
2587
2588 enc_class CmpF_Result(rRegI dst) %{
2589 // fnstsw_ax();
2590 emit_opcode( masm, 0xDF);
2591 emit_opcode( masm, 0xE0);
2592 // sahf
2593 emit_opcode( masm, 0x9E);
2594 // movl(dst, nan_result);
2595 emit_opcode( masm, 0xB8 + $dst$$reg);
2596 emit_d32( masm, -1 );
2597 // jcc(Assembler::parity, exit);
2598 emit_opcode( masm, 0x7A );
2599 emit_d8 ( masm, 0x13 );
2600 // movl(dst, less_result);
2601 emit_opcode( masm, 0xB8 + $dst$$reg);
2602 emit_d32( masm, -1 );
2603 // jcc(Assembler::below, exit);
2604 emit_opcode( masm, 0x72 );
2605 emit_d8 ( masm, 0x0C );
2606 // movl(dst, equal_result);
2607 emit_opcode( masm, 0xB8 + $dst$$reg);
2608 emit_d32( masm, 0 );
2609 // jcc(Assembler::equal, exit);
2610 emit_opcode( masm, 0x74 );
2611 emit_d8 ( masm, 0x05 );
2612 // movl(dst, greater_result);
2613 emit_opcode( masm, 0xB8 + $dst$$reg);
2614 emit_d32( masm, 1 );
2615 %}
2616
2617
2618 // Compare the longs and set flags
2619 // BROKEN! Do Not use as-is
2620 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2621 // CMP $src1.hi,$src2.hi
2622 emit_opcode( masm, 0x3B );
2623 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
2624 // JNE,s done
2625 emit_opcode(masm,0x75);
2626 emit_d8(masm, 2 );
2627 // CMP $src1.lo,$src2.lo
2628 emit_opcode( masm, 0x3B );
2629 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
2630 // done:
2631 %}
2632
2633 enc_class convert_int_long( regL dst, rRegI src ) %{
2634 // mov $dst.lo,$src
2635 int dst_encoding = $dst$$reg;
2636 int src_encoding = $src$$reg;
2637 encode_Copy( masm, dst_encoding , src_encoding );
2638 // mov $dst.hi,$src
2639 encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
2640 // sar $dst.hi,31
2641 emit_opcode( masm, 0xC1 );
2642 emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
2643 emit_d8(masm, 0x1F );
2644 %}
2645
2646 enc_class convert_long_double( eRegL src ) %{
2647 // push $src.hi
2648 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
2649 // push $src.lo
2650 emit_opcode(masm, 0x50+$src$$reg );
2651 // fild 64-bits at [SP]
2652 emit_opcode(masm,0xdf);
2653 emit_d8(masm, 0x6C);
2654 emit_d8(masm, 0x24);
2655 emit_d8(masm, 0x00);
2656 // pop stack
2657 emit_opcode(masm, 0x83); // add SP, #8
2658 emit_rm(masm, 0x3, 0x00, ESP_enc);
2659 emit_d8(masm, 0x8);
2660 %}
2661
2662 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2663 // IMUL EDX:EAX,$src1
2664 emit_opcode( masm, 0xF7 );
2665 emit_rm( masm, 0x3, 0x5, $src1$$reg );
2666 // SAR EDX,$cnt-32
2667 int shift_count = ((int)$cnt$$constant) - 32;
2668 if (shift_count > 0) {
2669 emit_opcode(masm, 0xC1);
2670 emit_rm(masm, 0x3, 7, $dst$$reg );
2671 emit_d8(masm, shift_count);
2672 }
2673 %}
2674
2675 // this version doesn't have add sp, 8
2676 enc_class convert_long_double2( eRegL src ) %{
2677 // push $src.hi
2678 emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
2679 // push $src.lo
2680 emit_opcode(masm, 0x50+$src$$reg );
2681 // fild 64-bits at [SP]
2682 emit_opcode(masm,0xdf);
2683 emit_d8(masm, 0x6C);
2684 emit_d8(masm, 0x24);
2685 emit_d8(masm, 0x00);
2686 %}
2687
2688 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2689 // Basic idea: long = (long)int * (long)int
2690 // IMUL EDX:EAX, src
2691 emit_opcode( masm, 0xF7 );
2692 emit_rm( masm, 0x3, 0x5, $src$$reg);
2693 %}
2694
2695 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2696 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
2697 // MUL EDX:EAX, src
2698 emit_opcode( masm, 0xF7 );
2699 emit_rm( masm, 0x3, 0x4, $src$$reg);
2700 %}
2701
2702 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2703 // Basic idea: lo(result) = lo(x_lo * y_lo)
2704 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2705 // MOV $tmp,$src.lo
2706 encode_Copy( masm, $tmp$$reg, $src$$reg );
2707 // IMUL $tmp,EDX
2708 emit_opcode( masm, 0x0F );
2709 emit_opcode( masm, 0xAF );
2710 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2711 // MOV EDX,$src.hi
2712 encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
2713 // IMUL EDX,EAX
2714 emit_opcode( masm, 0x0F );
2715 emit_opcode( masm, 0xAF );
2716 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
2717 // ADD $tmp,EDX
2718 emit_opcode( masm, 0x03 );
2719 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2720 // MUL EDX:EAX,$src.lo
2721 emit_opcode( masm, 0xF7 );
2722 emit_rm( masm, 0x3, 0x4, $src$$reg );
2723 // ADD EDX,ESI
2724 emit_opcode( masm, 0x03 );
2725 emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
2726 %}
2727
2728 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2729 // Basic idea: lo(result) = lo(src * y_lo)
2730 // hi(result) = hi(src * y_lo) + lo(src * y_hi)
2731 // IMUL $tmp,EDX,$src
2732 emit_opcode( masm, 0x6B );
2733 emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2734 emit_d8( masm, (int)$src$$constant );
2735 // MOV EDX,$src
2736 emit_opcode(masm, 0xB8 + EDX_enc);
2737 emit_d32( masm, (int)$src$$constant );
2738 // MUL EDX:EAX,EDX
2739 emit_opcode( masm, 0xF7 );
2740 emit_rm( masm, 0x3, 0x4, EDX_enc );
2741 // ADD EDX,ESI
2742 emit_opcode( masm, 0x03 );
2743 emit_rm( masm, 0x3, EDX_enc, $tmp$$reg );
2744 %}
2745
2746 enc_class long_div( eRegL src1, eRegL src2 ) %{
2747 // PUSH src1.hi
2748 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
2749 // PUSH src1.lo
2750 emit_opcode(masm, 0x50+$src1$$reg );
2751 // PUSH src2.hi
2752 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
2753 // PUSH src2.lo
2754 emit_opcode(masm, 0x50+$src2$$reg );
2755 // CALL directly to the runtime
2756 __ set_inst_mark();
2757 emit_opcode(masm,0xE8); // Call into runtime
2758 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2759 __ clear_inst_mark();
2760 __ post_call_nop();
2761 // Restore stack
2762 emit_opcode(masm, 0x83); // add SP, #framesize
2763 emit_rm(masm, 0x3, 0x00, ESP_enc);
2764 emit_d8(masm, 4*4);
2765 %}
2766
2767 enc_class long_mod( eRegL src1, eRegL src2 ) %{
2768 // PUSH src1.hi
2769 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
2770 // PUSH src1.lo
2771 emit_opcode(masm, 0x50+$src1$$reg );
2772 // PUSH src2.hi
2773 emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
2774 // PUSH src2.lo
2775 emit_opcode(masm, 0x50+$src2$$reg );
2776 // CALL directly to the runtime
2777 __ set_inst_mark();
2778 emit_opcode(masm,0xE8); // Call into runtime
2779 emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2780 __ clear_inst_mark();
2781 __ post_call_nop();
2782 // Restore stack
2783 emit_opcode(masm, 0x83); // add SP, #framesize
2784 emit_rm(masm, 0x3, 0x00, ESP_enc);
2785 emit_d8(masm, 4*4);
2786 %}
2787
2788 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2789 // MOV $tmp,$src.lo
2790 emit_opcode(masm, 0x8B);
2791 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg);
2792 // OR $tmp,$src.hi
2793 emit_opcode(masm, 0x0B);
2794 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
2795 %}
2796
2797 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2798 // CMP $src1.lo,$src2.lo
2799 emit_opcode( masm, 0x3B );
2800 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
2801 // JNE,s skip
2802 emit_cc(masm, 0x70, 0x5);
2803 emit_d8(masm,2);
2804 // CMP $src1.hi,$src2.hi
2805 emit_opcode( masm, 0x3B );
2806 emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
2807 %}
2808
2809 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2810 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2811 emit_opcode( masm, 0x3B );
2812 emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
2813 // MOV $tmp,$src1.hi
2814 emit_opcode( masm, 0x8B );
2815 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
2816 // SBB $tmp,$src2.hi\t! Compute flags for long compare
2817 emit_opcode( masm, 0x1B );
2818 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
2819 %}
2820
2821 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2822 // XOR $tmp,$tmp
2823 emit_opcode(masm,0x33); // XOR
2824 emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg);
2825 // CMP $tmp,$src.lo
2826 emit_opcode( masm, 0x3B );
2827 emit_rm(masm, 0x3, $tmp$$reg, $src$$reg );
2828 // SBB $tmp,$src.hi
2829 emit_opcode( masm, 0x1B );
2830 emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
2831 %}
2832
2833 // Sniff, sniff... smells like Gnu Superoptimizer
2834 enc_class neg_long( eRegL dst ) %{
2835 emit_opcode(masm,0xF7); // NEG hi
2836 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
2837 emit_opcode(masm,0xF7); // NEG lo
2838 emit_rm (masm,0x3, 0x3, $dst$$reg );
2839 emit_opcode(masm,0x83); // SBB hi,0
2840 emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
2841 emit_d8 (masm,0 );
2842 %}
2843
2844 enc_class enc_pop_rdx() %{
2845 emit_opcode(masm,0x5A);
2846 %}
2847
2848 enc_class enc_rethrow() %{
2849 __ set_inst_mark();
2850 emit_opcode(masm, 0xE9); // jmp entry
2851 emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4,
2852 runtime_call_Relocation::spec(), RELOC_IMM32 );
2853 __ clear_inst_mark();
2854 __ post_call_nop();
2855 %}
2856
2857
2858 // Convert a double to an int. Java semantics require we do complex
2859 // manglelations in the corner cases. So we set the rounding mode to
2860 // 'zero', store the darned double down as an int, and reset the
2861 // rounding mode to 'nearest'. The hardware throws an exception which
2862 // patches up the correct value directly to the stack.
2863 enc_class DPR2I_encoding( regDPR src ) %{
2864 // Flip to round-to-zero mode. We attempted to allow invalid-op
2865 // exceptions here, so that a NAN or other corner-case value will
2866 // thrown an exception (but normal values get converted at full speed).
2867 // However, I2C adapters and other float-stack manglers leave pending
2868 // invalid-op exceptions hanging. We would have to clear them before
2869 // enabling them and that is more expensive than just testing for the
2870 // invalid value Intel stores down in the corner cases.
2871 emit_opcode(masm,0xD9); // FLDCW trunc
2872 emit_opcode(masm,0x2D);
2873 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2874 // Allocate a word
2875 emit_opcode(masm,0x83); // SUB ESP,4
2876 emit_opcode(masm,0xEC);
2877 emit_d8(masm,0x04);
2878 // Encoding assumes a double has been pushed into FPR0.
2879 // Store down the double as an int, popping the FPU stack
2880 emit_opcode(masm,0xDB); // FISTP [ESP]
2881 emit_opcode(masm,0x1C);
2882 emit_d8(masm,0x24);
2883 // Restore the rounding mode; mask the exception
2884 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode
2885 emit_opcode(masm,0x2D);
2886 emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
2887 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2888 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2889
2890 // Load the converted int; adjust CPU stack
2891 emit_opcode(masm,0x58); // POP EAX
2892 emit_opcode(masm,0x3D); // CMP EAX,imm
2893 emit_d32 (masm,0x80000000); // 0x80000000
2894 emit_opcode(masm,0x75); // JNE around_slow_call
2895 emit_d8 (masm,0x07); // Size of slow_call
2896 // Push src onto stack slow-path
2897 emit_opcode(masm,0xD9 ); // FLD ST(i)
2898 emit_d8 (masm,0xC0-1+$src$$reg );
2899 // CALL directly to the runtime
2900 __ set_inst_mark();
2901 emit_opcode(masm,0xE8); // Call into runtime
2902 emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2903 __ clear_inst_mark();
2904 __ post_call_nop();
2905 // Carry on here...
2906 %}
2907
2908 enc_class DPR2L_encoding( regDPR src ) %{
2909 emit_opcode(masm,0xD9); // FLDCW trunc
2910 emit_opcode(masm,0x2D);
2911 emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2912 // Allocate a word
2913 emit_opcode(masm,0x83); // SUB ESP,8
2914 emit_opcode(masm,0xEC);
2915 emit_d8(masm,0x08);
2916 // Encoding assumes a double has been pushed into FPR0.
2917 // Store down the double as a long, popping the FPU stack
2918 emit_opcode(masm,0xDF); // FISTP [ESP]
2919 emit_opcode(masm,0x3C);
2920 emit_d8(masm,0x24);
2921 // Restore the rounding mode; mask the exception
2922 emit_opcode(masm,0xD9); // FLDCW std/24-bit mode
2923 emit_opcode(masm,0x2D);
2924 emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
2925 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2926 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2927
2928 // Load the converted int; adjust CPU stack
2929 emit_opcode(masm,0x58); // POP EAX
2930 emit_opcode(masm,0x5A); // POP EDX
2931 emit_opcode(masm,0x81); // CMP EDX,imm
2932 emit_d8 (masm,0xFA); // rdx
2933 emit_d32 (masm,0x80000000); // 0x80000000
2934 emit_opcode(masm,0x75); // JNE around_slow_call
2935 emit_d8 (masm,0x07+4); // Size of slow_call
2936 emit_opcode(masm,0x85); // TEST EAX,EAX
2937 emit_opcode(masm,0xC0); // 2/rax,/rax,
2938 emit_opcode(masm,0x75); // JNE around_slow_call
2939 emit_d8 (masm,0x07); // Size of slow_call
2940 // Push src onto stack slow-path
2941 emit_opcode(masm,0xD9 ); // FLD ST(i)
2942 emit_d8 (masm,0xC0-1+$src$$reg );
2943 // CALL directly to the runtime
2944 __ set_inst_mark();
2945 emit_opcode(masm,0xE8); // Call into runtime
2946 emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2947 __ clear_inst_mark();
2948 __ post_call_nop();
2949 // Carry on here...
2950 %}
2951
2952 enc_class FMul_ST_reg( eRegFPR src1 ) %{
2953 // Operand was loaded from memory into fp ST (stack top)
2954 // FMUL ST,$src /* D8 C8+i */
2955 emit_opcode(masm, 0xD8);
2956 emit_opcode(masm, 0xC8 + $src1$$reg);
2957 %}
2958
2959 enc_class FAdd_ST_reg( eRegFPR src2 ) %{
2960 // FADDP ST,src2 /* D8 C0+i */
2961 emit_opcode(masm, 0xD8);
2962 emit_opcode(masm, 0xC0 + $src2$$reg);
2963 //could use FADDP src2,fpST /* DE C0+i */
2964 %}
2965
2966 enc_class FAddP_reg_ST( eRegFPR src2 ) %{
2967 // FADDP src2,ST /* DE C0+i */
2968 emit_opcode(masm, 0xDE);
2969 emit_opcode(masm, 0xC0 + $src2$$reg);
2970 %}
2971
2972 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
2973 // Operand has been loaded into fp ST (stack top)
2974 // FSUB ST,$src1
2975 emit_opcode(masm, 0xD8);
2976 emit_opcode(masm, 0xE0 + $src1$$reg);
2977
2978 // FDIV
2979 emit_opcode(masm, 0xD8);
2980 emit_opcode(masm, 0xF0 + $src2$$reg);
2981 %}
2982
2983 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
2984 // Operand was loaded from memory into fp ST (stack top)
2985 // FADD ST,$src /* D8 C0+i */
2986 emit_opcode(masm, 0xD8);
2987 emit_opcode(masm, 0xC0 + $src1$$reg);
2988
2989 // FMUL ST,src2 /* D8 C*+i */
2990 emit_opcode(masm, 0xD8);
2991 emit_opcode(masm, 0xC8 + $src2$$reg);
2992 %}
2993
2994
2995 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
2996 // Operand was loaded from memory into fp ST (stack top)
2997 // FADD ST,$src /* D8 C0+i */
2998 emit_opcode(masm, 0xD8);
2999 emit_opcode(masm, 0xC0 + $src1$$reg);
3000
3001 // FMULP src2,ST /* DE C8+i */
3002 emit_opcode(masm, 0xDE);
3003 emit_opcode(masm, 0xC8 + $src2$$reg);
3004 %}
3005
3006 // Atomically load the volatile long
3007 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3008 emit_opcode(masm,0xDF);
3009 int rm_byte_opcode = 0x05;
3010 int base = $mem$$base;
3011 int index = $mem$$index;
3012 int scale = $mem$$scale;
3013 int displace = $mem$$disp;
3014 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3015 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3016 store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp );
3017 %}
3018
3019 // Volatile Store Long. Must be atomic, so move it into
3020 // the FP TOS and then do a 64-bit FIST. Has to probe the
3021 // target address before the store (for null-ptr checks)
3022 // so the memory operand is used twice in the encoding.
3023 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3024 store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp );
3025 __ set_inst_mark(); // Mark start of FIST in case $mem has an oop
3026 emit_opcode(masm,0xDF);
3027 int rm_byte_opcode = 0x07;
3028 int base = $mem$$base;
3029 int index = $mem$$index;
3030 int scale = $mem$$scale;
3031 int displace = $mem$$disp;
3032 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3033 encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3034 __ clear_inst_mark();
3035 %}
3036
3037 %}
3038
3039
3040 //----------FRAME--------------------------------------------------------------
3041 // Definition of frame structure and management information.
3042 //
3043 // S T A C K L A Y O U T Allocators stack-slot number
3044 // | (to get allocators register number
3045 // G Owned by | | v add OptoReg::stack0())
3046 // r CALLER | |
3047 // o | +--------+ pad to even-align allocators stack-slot
3048 // w V | pad0 | numbers; owned by CALLER
3049 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
3050 // h ^ | in | 5
3051 // | | args | 4 Holes in incoming args owned by SELF
3052 // | | | | 3
3053 // | | +--------+
3054 // V | | old out| Empty on Intel, window on Sparc
3055 // | old |preserve| Must be even aligned.
3056 // | SP-+--------+----> Matcher::_old_SP, even aligned
3057 // | | in | 3 area for Intel ret address
3058 // Owned by |preserve| Empty on Sparc.
3059 // SELF +--------+
3060 // | | pad2 | 2 pad to align old SP
3061 // | +--------+ 1
3062 // | | locks | 0
3063 // | +--------+----> OptoReg::stack0(), even aligned
3064 // | | pad1 | 11 pad to align new SP
3065 // | +--------+
3066 // | | | 10
3067 // | | spills | 9 spills
3068 // V | | 8 (pad0 slot for callee)
3069 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
3070 // ^ | out | 7
3071 // | | args | 6 Holes in outgoing args owned by CALLEE
3072 // Owned by +--------+
3073 // CALLEE | new out| 6 Empty on Intel, window on Sparc
3074 // | new |preserve| Must be even-aligned.
3075 // | SP-+--------+----> Matcher::_new_SP, even aligned
3076 // | | |
3077 //
3078 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
3079 // known from SELF's arguments and the Java calling convention.
3080 // Region 6-7 is determined per call site.
3081 // Note 2: If the calling convention leaves holes in the incoming argument
3082 // area, those holes are owned by SELF. Holes in the outgoing area
3083 // are owned by the CALLEE. Holes should not be necessary in the
3084 // incoming area, as the Java calling convention is completely under
3085 // the control of the AD file. Doubles can be sorted and packed to
3086 // avoid holes. Holes in the outgoing arguments may be necessary for
3087 // varargs C calling conventions.
3088 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
3089 // even aligned with pad0 as needed.
3090 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
3091 // region 6-11 is even aligned; it may be padded out more so that
3092 // the region from SP to FP meets the minimum stack alignment.
3093
3094 frame %{
3095 // These three registers define part of the calling convention
3096 // between compiled code and the interpreter.
3097 inline_cache_reg(EAX); // Inline Cache Register
3098
3099 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3100 cisc_spilling_operand_name(indOffset32);
3101
3102 // Number of stack slots consumed by locking an object
3103 sync_stack_slots(1);
3104
3105 // Compiled code's Frame Pointer
3106 frame_pointer(ESP);
3107 // Interpreter stores its frame pointer in a register which is
3108 // stored to the stack by I2CAdaptors.
3109 // I2CAdaptors convert from interpreted java to compiled java.
3110 interpreter_frame_pointer(EBP);
3111
3112 // Stack alignment requirement
3113 // Alignment size in bytes (128-bit -> 16 bytes)
3114 stack_alignment(StackAlignmentInBytes);
3115
3116 // Number of outgoing stack slots killed above the out_preserve_stack_slots
3117 // for calls to C. Supports the var-args backing area for register parms.
3118 varargs_C_out_slots_killed(0);
3119
3120 // The after-PROLOG location of the return address. Location of
3121 // return address specifies a type (REG or STACK) and a number
3122 // representing the register number (i.e. - use a register name) or
3123 // stack slot.
3124 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3125 // Otherwise, it is above the locks and verification slot and alignment word
3126 return_addr(STACK - 1 +
3127 align_up((Compile::current()->in_preserve_stack_slots() +
3128 Compile::current()->fixed_slots()),
3129 stack_alignment_in_slots()));
3130
3131 // Location of C & interpreter return values
3132 c_return_value %{
3133 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3134 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3135 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3136
3137 // in SSE2+ mode we want to keep the FPU stack clean so pretend
3138 // that C functions return float and double results in XMM0.
3139 if( ideal_reg == Op_RegD && UseSSE>=2 )
3140 return OptoRegPair(XMM0b_num,XMM0_num);
3141 if( ideal_reg == Op_RegF && UseSSE>=2 )
3142 return OptoRegPair(OptoReg::Bad,XMM0_num);
3143
3144 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3145 %}
3146
3147 // Location of return values
3148 return_value %{
3149 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3150 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3151 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3152 if( ideal_reg == Op_RegD && UseSSE>=2 )
3153 return OptoRegPair(XMM0b_num,XMM0_num);
3154 if( ideal_reg == Op_RegF && UseSSE>=1 )
3155 return OptoRegPair(OptoReg::Bad,XMM0_num);
3156 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3157 %}
3158
3159 %}
3160
3161 //----------ATTRIBUTES---------------------------------------------------------
3162 //----------Operand Attributes-------------------------------------------------
3163 op_attrib op_cost(0); // Required cost attribute
3164
3165 //----------Instruction Attributes---------------------------------------------
3166 ins_attrib ins_cost(100); // Required cost attribute
3167 ins_attrib ins_size(8); // Required size attribute (in bits)
3168 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3169 // non-matching short branch variant of some
3170 // long branch?
3171 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2)
3172 // specifies the alignment that some part of the instruction (not
3173 // necessarily the start) requires. If > 1, a compute_padding()
3174 // function must be provided for the instruction
3175
3176 //----------OPERANDS-----------------------------------------------------------
3177 // Operand definitions must precede instruction definitions for correct parsing
3178 // in the ADLC because operands constitute user defined types which are used in
3179 // instruction definitions.
3180
3181 //----------Simple Operands----------------------------------------------------
3182 // Immediate Operands
3183 // Integer Immediate
3184 operand immI() %{
3185 match(ConI);
3186
3187 op_cost(10);
3188 format %{ %}
3189 interface(CONST_INTER);
3190 %}
3191
3192 // Constant for test vs zero
3193 operand immI_0() %{
3194 predicate(n->get_int() == 0);
3195 match(ConI);
3196
3197 op_cost(0);
3198 format %{ %}
3199 interface(CONST_INTER);
3200 %}
3201
3202 // Constant for increment
3203 operand immI_1() %{
3204 predicate(n->get_int() == 1);
3205 match(ConI);
3206
3207 op_cost(0);
3208 format %{ %}
3209 interface(CONST_INTER);
3210 %}
3211
3212 // Constant for decrement
3213 operand immI_M1() %{
3214 predicate(n->get_int() == -1);
3215 match(ConI);
3216
3217 op_cost(0);
3218 format %{ %}
3219 interface(CONST_INTER);
3220 %}
3221
3222 // Valid scale values for addressing modes
3223 operand immI2() %{
3224 predicate(0 <= n->get_int() && (n->get_int() <= 3));
3225 match(ConI);
3226
3227 format %{ %}
3228 interface(CONST_INTER);
3229 %}
3230
3231 operand immI8() %{
3232 predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3233 match(ConI);
3234
3235 op_cost(5);
3236 format %{ %}
3237 interface(CONST_INTER);
3238 %}
3239
3240 operand immU8() %{
3241 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
3242 match(ConI);
3243
3244 op_cost(5);
3245 format %{ %}
3246 interface(CONST_INTER);
3247 %}
3248
3249 operand immI16() %{
3250 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3251 match(ConI);
3252
3253 op_cost(10);
3254 format %{ %}
3255 interface(CONST_INTER);
3256 %}
3257
3258 // Int Immediate non-negative
3259 operand immU31()
3260 %{
3261 predicate(n->get_int() >= 0);
3262 match(ConI);
3263
3264 op_cost(0);
3265 format %{ %}
3266 interface(CONST_INTER);
3267 %}
3268
3269 // Constant for long shifts
3270 operand immI_32() %{
3271 predicate( n->get_int() == 32 );
3272 match(ConI);
3273
3274 op_cost(0);
3275 format %{ %}
3276 interface(CONST_INTER);
3277 %}
3278
3279 operand immI_1_31() %{
3280 predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3281 match(ConI);
3282
3283 op_cost(0);
3284 format %{ %}
3285 interface(CONST_INTER);
3286 %}
3287
3288 operand immI_32_63() %{
3289 predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3290 match(ConI);
3291 op_cost(0);
3292
3293 format %{ %}
3294 interface(CONST_INTER);
3295 %}
3296
3297 operand immI_2() %{
3298 predicate( n->get_int() == 2 );
3299 match(ConI);
3300
3301 op_cost(0);
3302 format %{ %}
3303 interface(CONST_INTER);
3304 %}
3305
3306 operand immI_3() %{
3307 predicate( n->get_int() == 3 );
3308 match(ConI);
3309
3310 op_cost(0);
3311 format %{ %}
3312 interface(CONST_INTER);
3313 %}
3314
3315 operand immI_4()
3316 %{
3317 predicate(n->get_int() == 4);
3318 match(ConI);
3319
3320 op_cost(0);
3321 format %{ %}
3322 interface(CONST_INTER);
3323 %}
3324
3325 operand immI_8()
3326 %{
3327 predicate(n->get_int() == 8);
3328 match(ConI);
3329
3330 op_cost(0);
3331 format %{ %}
3332 interface(CONST_INTER);
3333 %}
3334
3335 // Pointer Immediate
3336 operand immP() %{
3337 match(ConP);
3338
3339 op_cost(10);
3340 format %{ %}
3341 interface(CONST_INTER);
3342 %}
3343
3344 // Null Pointer Immediate
3345 operand immP0() %{
3346 predicate( n->get_ptr() == 0 );
3347 match(ConP);
3348 op_cost(0);
3349
3350 format %{ %}
3351 interface(CONST_INTER);
3352 %}
3353
3354 // Long Immediate
3355 operand immL() %{
3356 match(ConL);
3357
3358 op_cost(20);
3359 format %{ %}
3360 interface(CONST_INTER);
3361 %}
3362
3363 // Long Immediate zero
3364 operand immL0() %{
3365 predicate( n->get_long() == 0L );
3366 match(ConL);
3367 op_cost(0);
3368
3369 format %{ %}
3370 interface(CONST_INTER);
3371 %}
3372
3373 // Long Immediate zero
3374 operand immL_M1() %{
3375 predicate( n->get_long() == -1L );
3376 match(ConL);
3377 op_cost(0);
3378
3379 format %{ %}
3380 interface(CONST_INTER);
3381 %}
3382
3383 // Long immediate from 0 to 127.
3384 // Used for a shorter form of long mul by 10.
3385 operand immL_127() %{
3386 predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3387 match(ConL);
3388 op_cost(0);
3389
3390 format %{ %}
3391 interface(CONST_INTER);
3392 %}
3393
3394 // Long Immediate: low 32-bit mask
3395 operand immL_32bits() %{
3396 predicate(n->get_long() == 0xFFFFFFFFL);
3397 match(ConL);
3398 op_cost(0);
3399
3400 format %{ %}
3401 interface(CONST_INTER);
3402 %}
3403
3404 // Long Immediate: low 32-bit mask
3405 operand immL32() %{
3406 predicate(n->get_long() == (int)(n->get_long()));
3407 match(ConL);
3408 op_cost(20);
3409
3410 format %{ %}
3411 interface(CONST_INTER);
3412 %}
3413
3414 //Double Immediate zero
3415 operand immDPR0() %{
3416 // Do additional (and counter-intuitive) test against NaN to work around VC++
3417 // bug that generates code such that NaNs compare equal to 0.0
3418 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3419 match(ConD);
3420
3421 op_cost(5);
3422 format %{ %}
3423 interface(CONST_INTER);
3424 %}
3425
3426 // Double Immediate one
3427 operand immDPR1() %{
3428 predicate( UseSSE<=1 && n->getd() == 1.0 );
3429 match(ConD);
3430
3431 op_cost(5);
3432 format %{ %}
3433 interface(CONST_INTER);
3434 %}
3435
3436 // Double Immediate
3437 operand immDPR() %{
3438 predicate(UseSSE<=1);
3439 match(ConD);
3440
3441 op_cost(5);
3442 format %{ %}
3443 interface(CONST_INTER);
3444 %}
3445
3446 operand immD() %{
3447 predicate(UseSSE>=2);
3448 match(ConD);
3449
3450 op_cost(5);
3451 format %{ %}
3452 interface(CONST_INTER);
3453 %}
3454
3455 // Double Immediate zero
3456 operand immD0() %{
3457 // Do additional (and counter-intuitive) test against NaN to work around VC++
3458 // bug that generates code such that NaNs compare equal to 0.0 AND do not
3459 // compare equal to -0.0.
3460 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3461 match(ConD);
3462
3463 format %{ %}
3464 interface(CONST_INTER);
3465 %}
3466
3467 // Float Immediate zero
3468 operand immFPR0() %{
3469 predicate(UseSSE == 0 && n->getf() == 0.0F);
3470 match(ConF);
3471
3472 op_cost(5);
3473 format %{ %}
3474 interface(CONST_INTER);
3475 %}
3476
3477 // Float Immediate one
3478 operand immFPR1() %{
3479 predicate(UseSSE == 0 && n->getf() == 1.0F);
3480 match(ConF);
3481
3482 op_cost(5);
3483 format %{ %}
3484 interface(CONST_INTER);
3485 %}
3486
3487 // Float Immediate
3488 operand immFPR() %{
3489 predicate( UseSSE == 0 );
3490 match(ConF);
3491
3492 op_cost(5);
3493 format %{ %}
3494 interface(CONST_INTER);
3495 %}
3496
3497 // Float Immediate
3498 operand immF() %{
3499 predicate(UseSSE >= 1);
3500 match(ConF);
3501
3502 op_cost(5);
3503 format %{ %}
3504 interface(CONST_INTER);
3505 %}
3506
3507 // Float Immediate zero. Zero and not -0.0
3508 operand immF0() %{
3509 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3510 match(ConF);
3511
3512 op_cost(5);
3513 format %{ %}
3514 interface(CONST_INTER);
3515 %}
3516
3517 // Immediates for special shifts (sign extend)
3518
3519 // Constants for increment
3520 operand immI_16() %{
3521 predicate( n->get_int() == 16 );
3522 match(ConI);
3523
3524 format %{ %}
3525 interface(CONST_INTER);
3526 %}
3527
3528 operand immI_24() %{
3529 predicate( n->get_int() == 24 );
3530 match(ConI);
3531
3532 format %{ %}
3533 interface(CONST_INTER);
3534 %}
3535
3536 // Constant for byte-wide masking
3537 operand immI_255() %{
3538 predicate( n->get_int() == 255 );
3539 match(ConI);
3540
3541 format %{ %}
3542 interface(CONST_INTER);
3543 %}
3544
3545 // Constant for short-wide masking
3546 operand immI_65535() %{
3547 predicate(n->get_int() == 65535);
3548 match(ConI);
3549
3550 format %{ %}
3551 interface(CONST_INTER);
3552 %}
3553
3554 operand kReg()
3555 %{
3556 constraint(ALLOC_IN_RC(vectmask_reg));
3557 match(RegVectMask);
3558 format %{%}
3559 interface(REG_INTER);
3560 %}
3561
3562 // Register Operands
3563 // Integer Register
3564 operand rRegI() %{
3565 constraint(ALLOC_IN_RC(int_reg));
3566 match(RegI);
3567 match(xRegI);
3568 match(eAXRegI);
3569 match(eBXRegI);
3570 match(eCXRegI);
3571 match(eDXRegI);
3572 match(eDIRegI);
3573 match(eSIRegI);
3574
3575 format %{ %}
3576 interface(REG_INTER);
3577 %}
3578
3579 // Subset of Integer Register
3580 operand xRegI(rRegI reg) %{
3581 constraint(ALLOC_IN_RC(int_x_reg));
3582 match(reg);
3583 match(eAXRegI);
3584 match(eBXRegI);
3585 match(eCXRegI);
3586 match(eDXRegI);
3587
3588 format %{ %}
3589 interface(REG_INTER);
3590 %}
3591
3592 // Special Registers
3593 operand eAXRegI(xRegI reg) %{
3594 constraint(ALLOC_IN_RC(eax_reg));
3595 match(reg);
3596 match(rRegI);
3597
3598 format %{ "EAX" %}
3599 interface(REG_INTER);
3600 %}
3601
3602 // Special Registers
3603 operand eBXRegI(xRegI reg) %{
3604 constraint(ALLOC_IN_RC(ebx_reg));
3605 match(reg);
3606 match(rRegI);
3607
3608 format %{ "EBX" %}
3609 interface(REG_INTER);
3610 %}
3611
3612 operand eCXRegI(xRegI reg) %{
3613 constraint(ALLOC_IN_RC(ecx_reg));
3614 match(reg);
3615 match(rRegI);
3616
3617 format %{ "ECX" %}
3618 interface(REG_INTER);
3619 %}
3620
3621 operand eDXRegI(xRegI reg) %{
3622 constraint(ALLOC_IN_RC(edx_reg));
3623 match(reg);
3624 match(rRegI);
3625
3626 format %{ "EDX" %}
3627 interface(REG_INTER);
3628 %}
3629
3630 operand eDIRegI(xRegI reg) %{
3631 constraint(ALLOC_IN_RC(edi_reg));
3632 match(reg);
3633 match(rRegI);
3634
3635 format %{ "EDI" %}
3636 interface(REG_INTER);
3637 %}
3638
3639 operand nadxRegI() %{
3640 constraint(ALLOC_IN_RC(nadx_reg));
3641 match(RegI);
3642 match(eBXRegI);
3643 match(eCXRegI);
3644 match(eSIRegI);
3645 match(eDIRegI);
3646
3647 format %{ %}
3648 interface(REG_INTER);
3649 %}
3650
3651 operand ncxRegI() %{
3652 constraint(ALLOC_IN_RC(ncx_reg));
3653 match(RegI);
3654 match(eAXRegI);
3655 match(eDXRegI);
3656 match(eSIRegI);
3657 match(eDIRegI);
3658
3659 format %{ %}
3660 interface(REG_INTER);
3661 %}
3662
3663 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3664 // //
3665 operand eSIRegI(xRegI reg) %{
3666 constraint(ALLOC_IN_RC(esi_reg));
3667 match(reg);
3668 match(rRegI);
3669
3670 format %{ "ESI" %}
3671 interface(REG_INTER);
3672 %}
3673
3674 // Pointer Register
3675 operand anyRegP() %{
3676 constraint(ALLOC_IN_RC(any_reg));
3677 match(RegP);
3678 match(eAXRegP);
3679 match(eBXRegP);
3680 match(eCXRegP);
3681 match(eDIRegP);
3682 match(eRegP);
3683
3684 format %{ %}
3685 interface(REG_INTER);
3686 %}
3687
3688 operand eRegP() %{
3689 constraint(ALLOC_IN_RC(int_reg));
3690 match(RegP);
3691 match(eAXRegP);
3692 match(eBXRegP);
3693 match(eCXRegP);
3694 match(eDIRegP);
3695
3696 format %{ %}
3697 interface(REG_INTER);
3698 %}
3699
3700 operand rRegP() %{
3701 constraint(ALLOC_IN_RC(int_reg));
3702 match(RegP);
3703 match(eAXRegP);
3704 match(eBXRegP);
3705 match(eCXRegP);
3706 match(eDIRegP);
3707
3708 format %{ %}
3709 interface(REG_INTER);
3710 %}
3711
3712 // On windows95, EBP is not safe to use for implicit null tests.
3713 operand eRegP_no_EBP() %{
3714 constraint(ALLOC_IN_RC(int_reg_no_ebp));
3715 match(RegP);
3716 match(eAXRegP);
3717 match(eBXRegP);
3718 match(eCXRegP);
3719 match(eDIRegP);
3720
3721 op_cost(100);
3722 format %{ %}
3723 interface(REG_INTER);
3724 %}
3725
3726 operand pRegP() %{
3727 constraint(ALLOC_IN_RC(p_reg));
3728 match(RegP);
3729 match(eBXRegP);
3730 match(eDXRegP);
3731 match(eSIRegP);
3732 match(eDIRegP);
3733
3734 format %{ %}
3735 interface(REG_INTER);
3736 %}
3737
3738 // Special Registers
3739 // Return a pointer value
3740 operand eAXRegP(eRegP reg) %{
3741 constraint(ALLOC_IN_RC(eax_reg));
3742 match(reg);
3743 format %{ "EAX" %}
3744 interface(REG_INTER);
3745 %}
3746
3747 // Used in AtomicAdd
3748 operand eBXRegP(eRegP reg) %{
3749 constraint(ALLOC_IN_RC(ebx_reg));
3750 match(reg);
3751 format %{ "EBX" %}
3752 interface(REG_INTER);
3753 %}
3754
3755 // Tail-call (interprocedural jump) to interpreter
3756 operand eCXRegP(eRegP reg) %{
3757 constraint(ALLOC_IN_RC(ecx_reg));
3758 match(reg);
3759 format %{ "ECX" %}
3760 interface(REG_INTER);
3761 %}
3762
3763 operand eDXRegP(eRegP reg) %{
3764 constraint(ALLOC_IN_RC(edx_reg));
3765 match(reg);
3766 format %{ "EDX" %}
3767 interface(REG_INTER);
3768 %}
3769
3770 operand eSIRegP(eRegP reg) %{
3771 constraint(ALLOC_IN_RC(esi_reg));
3772 match(reg);
3773 format %{ "ESI" %}
3774 interface(REG_INTER);
3775 %}
3776
3777 // Used in rep stosw
3778 operand eDIRegP(eRegP reg) %{
3779 constraint(ALLOC_IN_RC(edi_reg));
3780 match(reg);
3781 format %{ "EDI" %}
3782 interface(REG_INTER);
3783 %}
3784
3785 operand eRegL() %{
3786 constraint(ALLOC_IN_RC(long_reg));
3787 match(RegL);
3788 match(eADXRegL);
3789
3790 format %{ %}
3791 interface(REG_INTER);
3792 %}
3793
3794 operand eADXRegL( eRegL reg ) %{
3795 constraint(ALLOC_IN_RC(eadx_reg));
3796 match(reg);
3797
3798 format %{ "EDX:EAX" %}
3799 interface(REG_INTER);
3800 %}
3801
3802 operand eBCXRegL( eRegL reg ) %{
3803 constraint(ALLOC_IN_RC(ebcx_reg));
3804 match(reg);
3805
3806 format %{ "EBX:ECX" %}
3807 interface(REG_INTER);
3808 %}
3809
3810 operand eBDPRegL( eRegL reg ) %{
3811 constraint(ALLOC_IN_RC(ebpd_reg));
3812 match(reg);
3813
3814 format %{ "EBP:EDI" %}
3815 interface(REG_INTER);
3816 %}
3817 // Special case for integer high multiply
3818 operand eADXRegL_low_only() %{
3819 constraint(ALLOC_IN_RC(eadx_reg));
3820 match(RegL);
3821
3822 format %{ "EAX" %}
3823 interface(REG_INTER);
3824 %}
3825
3826 // Flags register, used as output of compare instructions
3827 operand rFlagsReg() %{
3828 constraint(ALLOC_IN_RC(int_flags));
3829 match(RegFlags);
3830
3831 format %{ "EFLAGS" %}
3832 interface(REG_INTER);
3833 %}
3834
3835 // Flags register, used as output of compare instructions
3836 operand eFlagsReg() %{
3837 constraint(ALLOC_IN_RC(int_flags));
3838 match(RegFlags);
3839
3840 format %{ "EFLAGS" %}
3841 interface(REG_INTER);
3842 %}
3843
3844 // Flags register, used as output of FLOATING POINT compare instructions
3845 operand eFlagsRegU() %{
3846 constraint(ALLOC_IN_RC(int_flags));
3847 match(RegFlags);
3848
3849 format %{ "EFLAGS_U" %}
3850 interface(REG_INTER);
3851 %}
3852
3853 operand eFlagsRegUCF() %{
3854 constraint(ALLOC_IN_RC(int_flags));
3855 match(RegFlags);
3856 predicate(false);
3857
3858 format %{ "EFLAGS_U_CF" %}
3859 interface(REG_INTER);
3860 %}
3861
3862 // Condition Code Register used by long compare
3863 operand flagsReg_long_LTGE() %{
3864 constraint(ALLOC_IN_RC(int_flags));
3865 match(RegFlags);
3866 format %{ "FLAGS_LTGE" %}
3867 interface(REG_INTER);
3868 %}
3869 operand flagsReg_long_EQNE() %{
3870 constraint(ALLOC_IN_RC(int_flags));
3871 match(RegFlags);
3872 format %{ "FLAGS_EQNE" %}
3873 interface(REG_INTER);
3874 %}
3875 operand flagsReg_long_LEGT() %{
3876 constraint(ALLOC_IN_RC(int_flags));
3877 match(RegFlags);
3878 format %{ "FLAGS_LEGT" %}
3879 interface(REG_INTER);
3880 %}
3881
3882 // Condition Code Register used by unsigned long compare
3883 operand flagsReg_ulong_LTGE() %{
3884 constraint(ALLOC_IN_RC(int_flags));
3885 match(RegFlags);
3886 format %{ "FLAGS_U_LTGE" %}
3887 interface(REG_INTER);
3888 %}
3889 operand flagsReg_ulong_EQNE() %{
3890 constraint(ALLOC_IN_RC(int_flags));
3891 match(RegFlags);
3892 format %{ "FLAGS_U_EQNE" %}
3893 interface(REG_INTER);
3894 %}
3895 operand flagsReg_ulong_LEGT() %{
3896 constraint(ALLOC_IN_RC(int_flags));
3897 match(RegFlags);
3898 format %{ "FLAGS_U_LEGT" %}
3899 interface(REG_INTER);
3900 %}
3901
3902 // Float register operands
3903 operand regDPR() %{
3904 predicate( UseSSE < 2 );
3905 constraint(ALLOC_IN_RC(fp_dbl_reg));
3906 match(RegD);
3907 match(regDPR1);
3908 match(regDPR2);
3909 format %{ %}
3910 interface(REG_INTER);
3911 %}
3912
3913 operand regDPR1(regDPR reg) %{
3914 predicate( UseSSE < 2 );
3915 constraint(ALLOC_IN_RC(fp_dbl_reg0));
3916 match(reg);
3917 format %{ "FPR1" %}
3918 interface(REG_INTER);
3919 %}
3920
3921 operand regDPR2(regDPR reg) %{
3922 predicate( UseSSE < 2 );
3923 constraint(ALLOC_IN_RC(fp_dbl_reg1));
3924 match(reg);
3925 format %{ "FPR2" %}
3926 interface(REG_INTER);
3927 %}
3928
3929 operand regnotDPR1(regDPR reg) %{
3930 predicate( UseSSE < 2 );
3931 constraint(ALLOC_IN_RC(fp_dbl_notreg0));
3932 match(reg);
3933 format %{ %}
3934 interface(REG_INTER);
3935 %}
3936
3937 // Float register operands
3938 operand regFPR() %{
3939 predicate( UseSSE < 2 );
3940 constraint(ALLOC_IN_RC(fp_flt_reg));
3941 match(RegF);
3942 match(regFPR1);
3943 format %{ %}
3944 interface(REG_INTER);
3945 %}
3946
3947 // Float register operands
3948 operand regFPR1(regFPR reg) %{
3949 predicate( UseSSE < 2 );
3950 constraint(ALLOC_IN_RC(fp_flt_reg0));
3951 match(reg);
3952 format %{ "FPR1" %}
3953 interface(REG_INTER);
3954 %}
3955
3956 // XMM Float register operands
3957 operand regF() %{
3958 predicate( UseSSE>=1 );
3959 constraint(ALLOC_IN_RC(float_reg_legacy));
3960 match(RegF);
3961 format %{ %}
3962 interface(REG_INTER);
3963 %}
3964
3965 operand legRegF() %{
3966 predicate( UseSSE>=1 );
3967 constraint(ALLOC_IN_RC(float_reg_legacy));
3968 match(RegF);
3969 format %{ %}
3970 interface(REG_INTER);
3971 %}
3972
3973 // Float register operands
3974 operand vlRegF() %{
3975 constraint(ALLOC_IN_RC(float_reg_vl));
3976 match(RegF);
3977
3978 format %{ %}
3979 interface(REG_INTER);
3980 %}
3981
3982 // XMM Double register operands
3983 operand regD() %{
3984 predicate( UseSSE>=2 );
3985 constraint(ALLOC_IN_RC(double_reg_legacy));
3986 match(RegD);
3987 format %{ %}
3988 interface(REG_INTER);
3989 %}
3990
3991 // Double register operands
3992 operand legRegD() %{
3993 predicate( UseSSE>=2 );
3994 constraint(ALLOC_IN_RC(double_reg_legacy));
3995 match(RegD);
3996 format %{ %}
3997 interface(REG_INTER);
3998 %}
3999
4000 operand vlRegD() %{
4001 constraint(ALLOC_IN_RC(double_reg_vl));
4002 match(RegD);
4003
4004 format %{ %}
4005 interface(REG_INTER);
4006 %}
4007
4008 //----------Memory Operands----------------------------------------------------
4009 // Direct Memory Operand
4010 operand direct(immP addr) %{
4011 match(addr);
4012
4013 format %{ "[$addr]" %}
4014 interface(MEMORY_INTER) %{
4015 base(0xFFFFFFFF);
4016 index(0x4);
4017 scale(0x0);
4018 disp($addr);
4019 %}
4020 %}
4021
4022 // Indirect Memory Operand
4023 operand indirect(eRegP reg) %{
4024 constraint(ALLOC_IN_RC(int_reg));
4025 match(reg);
4026
4027 format %{ "[$reg]" %}
4028 interface(MEMORY_INTER) %{
4029 base($reg);
4030 index(0x4);
4031 scale(0x0);
4032 disp(0x0);
4033 %}
4034 %}
4035
4036 // Indirect Memory Plus Short Offset Operand
4037 operand indOffset8(eRegP reg, immI8 off) %{
4038 match(AddP reg off);
4039
4040 format %{ "[$reg + $off]" %}
4041 interface(MEMORY_INTER) %{
4042 base($reg);
4043 index(0x4);
4044 scale(0x0);
4045 disp($off);
4046 %}
4047 %}
4048
4049 // Indirect Memory Plus Long Offset Operand
4050 operand indOffset32(eRegP reg, immI off) %{
4051 match(AddP reg off);
4052
4053 format %{ "[$reg + $off]" %}
4054 interface(MEMORY_INTER) %{
4055 base($reg);
4056 index(0x4);
4057 scale(0x0);
4058 disp($off);
4059 %}
4060 %}
4061
4062 // Indirect Memory Plus Long Offset Operand
4063 operand indOffset32X(rRegI reg, immP off) %{
4064 match(AddP off reg);
4065
4066 format %{ "[$reg + $off]" %}
4067 interface(MEMORY_INTER) %{
4068 base($reg);
4069 index(0x4);
4070 scale(0x0);
4071 disp($off);
4072 %}
4073 %}
4074
4075 // Indirect Memory Plus Index Register Plus Offset Operand
4076 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4077 match(AddP (AddP reg ireg) off);
4078
4079 op_cost(10);
4080 format %{"[$reg + $off + $ireg]" %}
4081 interface(MEMORY_INTER) %{
4082 base($reg);
4083 index($ireg);
4084 scale(0x0);
4085 disp($off);
4086 %}
4087 %}
4088
4089 // Indirect Memory Plus Index Register Plus Offset Operand
4090 operand indIndex(eRegP reg, rRegI ireg) %{
4091 match(AddP reg ireg);
4092
4093 op_cost(10);
4094 format %{"[$reg + $ireg]" %}
4095 interface(MEMORY_INTER) %{
4096 base($reg);
4097 index($ireg);
4098 scale(0x0);
4099 disp(0x0);
4100 %}
4101 %}
4102
4103 // // -------------------------------------------------------------------------
4104 // // 486 architecture doesn't support "scale * index + offset" with out a base
4105 // // -------------------------------------------------------------------------
4106 // // Scaled Memory Operands
4107 // // Indirect Memory Times Scale Plus Offset Operand
4108 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4109 // match(AddP off (LShiftI ireg scale));
4110 //
4111 // op_cost(10);
4112 // format %{"[$off + $ireg << $scale]" %}
4113 // interface(MEMORY_INTER) %{
4114 // base(0x4);
4115 // index($ireg);
4116 // scale($scale);
4117 // disp($off);
4118 // %}
4119 // %}
4120
4121 // Indirect Memory Times Scale Plus Index Register
4122 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4123 match(AddP reg (LShiftI ireg scale));
4124
4125 op_cost(10);
4126 format %{"[$reg + $ireg << $scale]" %}
4127 interface(MEMORY_INTER) %{
4128 base($reg);
4129 index($ireg);
4130 scale($scale);
4131 disp(0x0);
4132 %}
4133 %}
4134
4135 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4136 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4137 match(AddP (AddP reg (LShiftI ireg scale)) off);
4138
4139 op_cost(10);
4140 format %{"[$reg + $off + $ireg << $scale]" %}
4141 interface(MEMORY_INTER) %{
4142 base($reg);
4143 index($ireg);
4144 scale($scale);
4145 disp($off);
4146 %}
4147 %}
4148
4149 //----------Load Long Memory Operands------------------------------------------
4150 // The load-long idiom will use it's address expression again after loading
4151 // the first word of the long. If the load-long destination overlaps with
4152 // registers used in the addressing expression, the 2nd half will be loaded
4153 // from a clobbered address. Fix this by requiring that load-long use
4154 // address registers that do not overlap with the load-long target.
4155
4156 // load-long support
4157 operand load_long_RegP() %{
4158 constraint(ALLOC_IN_RC(esi_reg));
4159 match(RegP);
4160 match(eSIRegP);
4161 op_cost(100);
4162 format %{ %}
4163 interface(REG_INTER);
4164 %}
4165
4166 // Indirect Memory Operand Long
4167 operand load_long_indirect(load_long_RegP reg) %{
4168 constraint(ALLOC_IN_RC(esi_reg));
4169 match(reg);
4170
4171 format %{ "[$reg]" %}
4172 interface(MEMORY_INTER) %{
4173 base($reg);
4174 index(0x4);
4175 scale(0x0);
4176 disp(0x0);
4177 %}
4178 %}
4179
4180 // Indirect Memory Plus Long Offset Operand
4181 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4182 match(AddP reg off);
4183
4184 format %{ "[$reg + $off]" %}
4185 interface(MEMORY_INTER) %{
4186 base($reg);
4187 index(0x4);
4188 scale(0x0);
4189 disp($off);
4190 %}
4191 %}
4192
4193 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4194
4195
4196 //----------Special Memory Operands--------------------------------------------
4197 // Stack Slot Operand - This operand is used for loading and storing temporary
4198 // values on the stack where a match requires a value to
4199 // flow through memory.
4200 operand stackSlotP(sRegP reg) %{
4201 constraint(ALLOC_IN_RC(stack_slots));
4202 // No match rule because this operand is only generated in matching
4203 format %{ "[$reg]" %}
4204 interface(MEMORY_INTER) %{
4205 base(0x4); // ESP
4206 index(0x4); // No Index
4207 scale(0x0); // No Scale
4208 disp($reg); // Stack Offset
4209 %}
4210 %}
4211
4212 operand stackSlotI(sRegI reg) %{
4213 constraint(ALLOC_IN_RC(stack_slots));
4214 // No match rule because this operand is only generated in matching
4215 format %{ "[$reg]" %}
4216 interface(MEMORY_INTER) %{
4217 base(0x4); // ESP
4218 index(0x4); // No Index
4219 scale(0x0); // No Scale
4220 disp($reg); // Stack Offset
4221 %}
4222 %}
4223
4224 operand stackSlotF(sRegF reg) %{
4225 constraint(ALLOC_IN_RC(stack_slots));
4226 // No match rule because this operand is only generated in matching
4227 format %{ "[$reg]" %}
4228 interface(MEMORY_INTER) %{
4229 base(0x4); // ESP
4230 index(0x4); // No Index
4231 scale(0x0); // No Scale
4232 disp($reg); // Stack Offset
4233 %}
4234 %}
4235
4236 operand stackSlotD(sRegD reg) %{
4237 constraint(ALLOC_IN_RC(stack_slots));
4238 // No match rule because this operand is only generated in matching
4239 format %{ "[$reg]" %}
4240 interface(MEMORY_INTER) %{
4241 base(0x4); // ESP
4242 index(0x4); // No Index
4243 scale(0x0); // No Scale
4244 disp($reg); // Stack Offset
4245 %}
4246 %}
4247
4248 operand stackSlotL(sRegL reg) %{
4249 constraint(ALLOC_IN_RC(stack_slots));
4250 // No match rule because this operand is only generated in matching
4251 format %{ "[$reg]" %}
4252 interface(MEMORY_INTER) %{
4253 base(0x4); // ESP
4254 index(0x4); // No Index
4255 scale(0x0); // No Scale
4256 disp($reg); // Stack Offset
4257 %}
4258 %}
4259
4260 //----------Conditional Branch Operands----------------------------------------
4261 // Comparison Op - This is the operation of the comparison, and is limited to
4262 // the following set of codes:
4263 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4264 //
4265 // Other attributes of the comparison, such as unsignedness, are specified
4266 // by the comparison instruction that sets a condition code flags register.
4267 // That result is represented by a flags operand whose subtype is appropriate
4268 // to the unsignedness (etc.) of the comparison.
4269 //
4270 // Later, the instruction which matches both the Comparison Op (a Bool) and
4271 // the flags (produced by the Cmp) specifies the coding of the comparison op
4272 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4273
4274 // Comparison Code
4275 operand cmpOp() %{
4276 match(Bool);
4277
4278 format %{ "" %}
4279 interface(COND_INTER) %{
4280 equal(0x4, "e");
4281 not_equal(0x5, "ne");
4282 less(0xC, "l");
4283 greater_equal(0xD, "ge");
4284 less_equal(0xE, "le");
4285 greater(0xF, "g");
4286 overflow(0x0, "o");
4287 no_overflow(0x1, "no");
4288 %}
4289 %}
4290
4291 // Comparison Code, unsigned compare. Used by FP also, with
4292 // C2 (unordered) turned into GT or LT already. The other bits
4293 // C0 and C3 are turned into Carry & Zero flags.
4294 operand cmpOpU() %{
4295 match(Bool);
4296
4297 format %{ "" %}
4298 interface(COND_INTER) %{
4299 equal(0x4, "e");
4300 not_equal(0x5, "ne");
4301 less(0x2, "b");
4302 greater_equal(0x3, "nb");
4303 less_equal(0x6, "be");
4304 greater(0x7, "nbe");
4305 overflow(0x0, "o");
4306 no_overflow(0x1, "no");
4307 %}
4308 %}
4309
4310 // Floating comparisons that don't require any fixup for the unordered case
4311 operand cmpOpUCF() %{
4312 match(Bool);
4313 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4314 n->as_Bool()->_test._test == BoolTest::ge ||
4315 n->as_Bool()->_test._test == BoolTest::le ||
4316 n->as_Bool()->_test._test == BoolTest::gt);
4317 format %{ "" %}
4318 interface(COND_INTER) %{
4319 equal(0x4, "e");
4320 not_equal(0x5, "ne");
4321 less(0x2, "b");
4322 greater_equal(0x3, "nb");
4323 less_equal(0x6, "be");
4324 greater(0x7, "nbe");
4325 overflow(0x0, "o");
4326 no_overflow(0x1, "no");
4327 %}
4328 %}
4329
4330
4331 // Floating comparisons that can be fixed up with extra conditional jumps
4332 operand cmpOpUCF2() %{
4333 match(Bool);
4334 predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4335 n->as_Bool()->_test._test == BoolTest::eq);
4336 format %{ "" %}
4337 interface(COND_INTER) %{
4338 equal(0x4, "e");
4339 not_equal(0x5, "ne");
4340 less(0x2, "b");
4341 greater_equal(0x3, "nb");
4342 less_equal(0x6, "be");
4343 greater(0x7, "nbe");
4344 overflow(0x0, "o");
4345 no_overflow(0x1, "no");
4346 %}
4347 %}
4348
4349 // Comparison Code for FP conditional move
4350 operand cmpOp_fcmov() %{
4351 match(Bool);
4352
4353 predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4354 n->as_Bool()->_test._test != BoolTest::no_overflow);
4355 format %{ "" %}
4356 interface(COND_INTER) %{
4357 equal (0x0C8);
4358 not_equal (0x1C8);
4359 less (0x0C0);
4360 greater_equal(0x1C0);
4361 less_equal (0x0D0);
4362 greater (0x1D0);
4363 overflow(0x0, "o"); // not really supported by the instruction
4364 no_overflow(0x1, "no"); // not really supported by the instruction
4365 %}
4366 %}
4367
4368 // Comparison Code used in long compares
4369 operand cmpOp_commute() %{
4370 match(Bool);
4371
4372 format %{ "" %}
4373 interface(COND_INTER) %{
4374 equal(0x4, "e");
4375 not_equal(0x5, "ne");
4376 less(0xF, "g");
4377 greater_equal(0xE, "le");
4378 less_equal(0xD, "ge");
4379 greater(0xC, "l");
4380 overflow(0x0, "o");
4381 no_overflow(0x1, "no");
4382 %}
4383 %}
4384
4385 // Comparison Code used in unsigned long compares
4386 operand cmpOpU_commute() %{
4387 match(Bool);
4388
4389 format %{ "" %}
4390 interface(COND_INTER) %{
4391 equal(0x4, "e");
4392 not_equal(0x5, "ne");
4393 less(0x7, "nbe");
4394 greater_equal(0x6, "be");
4395 less_equal(0x3, "nb");
4396 greater(0x2, "b");
4397 overflow(0x0, "o");
4398 no_overflow(0x1, "no");
4399 %}
4400 %}
4401
4402 //----------OPERAND CLASSES----------------------------------------------------
4403 // Operand Classes are groups of operands that are used as to simplify
4404 // instruction definitions by not requiring the AD writer to specify separate
4405 // instructions for every form of operand when the instruction accepts
4406 // multiple operand types with the same basic encoding and format. The classic
4407 // case of this is memory operands.
4408
4409 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4410 indIndex, indIndexScale, indIndexScaleOffset);
4411
4412 // Long memory operations are encoded in 2 instructions and a +4 offset.
4413 // This means some kind of offset is always required and you cannot use
4414 // an oop as the offset (done when working on static globals).
4415 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4416 indIndex, indIndexScale, indIndexScaleOffset);
4417
4418
4419 //----------PIPELINE-----------------------------------------------------------
4420 // Rules which define the behavior of the target architectures pipeline.
4421 pipeline %{
4422
4423 //----------ATTRIBUTES---------------------------------------------------------
4424 attributes %{
4425 variable_size_instructions; // Fixed size instructions
4426 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
4427 instruction_unit_size = 1; // An instruction is 1 bytes long
4428 instruction_fetch_unit_size = 16; // The processor fetches one line
4429 instruction_fetch_units = 1; // of 16 bytes
4430
4431 // List of nop instructions
4432 nops( MachNop );
4433 %}
4434
4435 //----------RESOURCES----------------------------------------------------------
4436 // Resources are the functional units available to the machine
4437
4438 // Generic P2/P3 pipeline
4439 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4440 // 3 instructions decoded per cycle.
4441 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4442 // 2 ALU op, only ALU0 handles mul/div instructions.
4443 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4444 MS0, MS1, MEM = MS0 | MS1,
4445 BR, FPU,
4446 ALU0, ALU1, ALU = ALU0 | ALU1 );
4447
4448 //----------PIPELINE DESCRIPTION-----------------------------------------------
4449 // Pipeline Description specifies the stages in the machine's pipeline
4450
4451 // Generic P2/P3 pipeline
4452 pipe_desc(S0, S1, S2, S3, S4, S5);
4453
4454 //----------PIPELINE CLASSES---------------------------------------------------
4455 // Pipeline Classes describe the stages in which input and output are
4456 // referenced by the hardware pipeline.
4457
4458 // Naming convention: ialu or fpu
4459 // Then: _reg
4460 // Then: _reg if there is a 2nd register
4461 // Then: _long if it's a pair of instructions implementing a long
4462 // Then: _fat if it requires the big decoder
4463 // Or: _mem if it requires the big decoder and a memory unit.
4464
4465 // Integer ALU reg operation
4466 pipe_class ialu_reg(rRegI dst) %{
4467 single_instruction;
4468 dst : S4(write);
4469 dst : S3(read);
4470 DECODE : S0; // any decoder
4471 ALU : S3; // any alu
4472 %}
4473
4474 // Long ALU reg operation
4475 pipe_class ialu_reg_long(eRegL dst) %{
4476 instruction_count(2);
4477 dst : S4(write);
4478 dst : S3(read);
4479 DECODE : S0(2); // any 2 decoders
4480 ALU : S3(2); // both alus
4481 %}
4482
4483 // Integer ALU reg operation using big decoder
4484 pipe_class ialu_reg_fat(rRegI dst) %{
4485 single_instruction;
4486 dst : S4(write);
4487 dst : S3(read);
4488 D0 : S0; // big decoder only
4489 ALU : S3; // any alu
4490 %}
4491
4492 // Long ALU reg operation using big decoder
4493 pipe_class ialu_reg_long_fat(eRegL dst) %{
4494 instruction_count(2);
4495 dst : S4(write);
4496 dst : S3(read);
4497 D0 : S0(2); // big decoder only; twice
4498 ALU : S3(2); // any 2 alus
4499 %}
4500
4501 // Integer ALU reg-reg operation
4502 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4503 single_instruction;
4504 dst : S4(write);
4505 src : S3(read);
4506 DECODE : S0; // any decoder
4507 ALU : S3; // any alu
4508 %}
4509
4510 // Long ALU reg-reg operation
4511 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4512 instruction_count(2);
4513 dst : S4(write);
4514 src : S3(read);
4515 DECODE : S0(2); // any 2 decoders
4516 ALU : S3(2); // both alus
4517 %}
4518
4519 // Integer ALU reg-reg operation
4520 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4521 single_instruction;
4522 dst : S4(write);
4523 src : S3(read);
4524 D0 : S0; // big decoder only
4525 ALU : S3; // any alu
4526 %}
4527
4528 // Long ALU reg-reg operation
4529 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4530 instruction_count(2);
4531 dst : S4(write);
4532 src : S3(read);
4533 D0 : S0(2); // big decoder only; twice
4534 ALU : S3(2); // both alus
4535 %}
4536
4537 // Integer ALU reg-mem operation
4538 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4539 single_instruction;
4540 dst : S5(write);
4541 mem : S3(read);
4542 D0 : S0; // big decoder only
4543 ALU : S4; // any alu
4544 MEM : S3; // any mem
4545 %}
4546
4547 // Long ALU reg-mem operation
4548 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4549 instruction_count(2);
4550 dst : S5(write);
4551 mem : S3(read);
4552 D0 : S0(2); // big decoder only; twice
4553 ALU : S4(2); // any 2 alus
4554 MEM : S3(2); // both mems
4555 %}
4556
4557 // Integer mem operation (prefetch)
4558 pipe_class ialu_mem(memory mem)
4559 %{
4560 single_instruction;
4561 mem : S3(read);
4562 D0 : S0; // big decoder only
4563 MEM : S3; // any mem
4564 %}
4565
4566 // Integer Store to Memory
4567 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4568 single_instruction;
4569 mem : S3(read);
4570 src : S5(read);
4571 D0 : S0; // big decoder only
4572 ALU : S4; // any alu
4573 MEM : S3;
4574 %}
4575
4576 // Long Store to Memory
4577 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4578 instruction_count(2);
4579 mem : S3(read);
4580 src : S5(read);
4581 D0 : S0(2); // big decoder only; twice
4582 ALU : S4(2); // any 2 alus
4583 MEM : S3(2); // Both mems
4584 %}
4585
4586 // Integer Store to Memory
4587 pipe_class ialu_mem_imm(memory mem) %{
4588 single_instruction;
4589 mem : S3(read);
4590 D0 : S0; // big decoder only
4591 ALU : S4; // any alu
4592 MEM : S3;
4593 %}
4594
4595 // Integer ALU0 reg-reg operation
4596 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4597 single_instruction;
4598 dst : S4(write);
4599 src : S3(read);
4600 D0 : S0; // Big decoder only
4601 ALU0 : S3; // only alu0
4602 %}
4603
4604 // Integer ALU0 reg-mem operation
4605 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4606 single_instruction;
4607 dst : S5(write);
4608 mem : S3(read);
4609 D0 : S0; // big decoder only
4610 ALU0 : S4; // ALU0 only
4611 MEM : S3; // any mem
4612 %}
4613
4614 // Integer ALU reg-reg operation
4615 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4616 single_instruction;
4617 cr : S4(write);
4618 src1 : S3(read);
4619 src2 : S3(read);
4620 DECODE : S0; // any decoder
4621 ALU : S3; // any alu
4622 %}
4623
4624 // Integer ALU reg-imm operation
4625 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4626 single_instruction;
4627 cr : S4(write);
4628 src1 : S3(read);
4629 DECODE : S0; // any decoder
4630 ALU : S3; // any alu
4631 %}
4632
4633 // Integer ALU reg-mem operation
4634 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4635 single_instruction;
4636 cr : S4(write);
4637 src1 : S3(read);
4638 src2 : S3(read);
4639 D0 : S0; // big decoder only
4640 ALU : S4; // any alu
4641 MEM : S3;
4642 %}
4643
4644 // Conditional move reg-reg
4645 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4646 instruction_count(4);
4647 y : S4(read);
4648 q : S3(read);
4649 p : S3(read);
4650 DECODE : S0(4); // any decoder
4651 %}
4652
4653 // Conditional move reg-reg
4654 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4655 single_instruction;
4656 dst : S4(write);
4657 src : S3(read);
4658 cr : S3(read);
4659 DECODE : S0; // any decoder
4660 %}
4661
4662 // Conditional move reg-mem
4663 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4664 single_instruction;
4665 dst : S4(write);
4666 src : S3(read);
4667 cr : S3(read);
4668 DECODE : S0; // any decoder
4669 MEM : S3;
4670 %}
4671
4672 // Conditional move reg-reg long
4673 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4674 single_instruction;
4675 dst : S4(write);
4676 src : S3(read);
4677 cr : S3(read);
4678 DECODE : S0(2); // any 2 decoders
4679 %}
4680
4681 // Conditional move double reg-reg
4682 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4683 single_instruction;
4684 dst : S4(write);
4685 src : S3(read);
4686 cr : S3(read);
4687 DECODE : S0; // any decoder
4688 %}
4689
4690 // Float reg-reg operation
4691 pipe_class fpu_reg(regDPR dst) %{
4692 instruction_count(2);
4693 dst : S3(read);
4694 DECODE : S0(2); // any 2 decoders
4695 FPU : S3;
4696 %}
4697
4698 // Float reg-reg operation
4699 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4700 instruction_count(2);
4701 dst : S4(write);
4702 src : S3(read);
4703 DECODE : S0(2); // any 2 decoders
4704 FPU : S3;
4705 %}
4706
4707 // Float reg-reg operation
4708 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4709 instruction_count(3);
4710 dst : S4(write);
4711 src1 : S3(read);
4712 src2 : S3(read);
4713 DECODE : S0(3); // any 3 decoders
4714 FPU : S3(2);
4715 %}
4716
4717 // Float reg-reg operation
4718 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4719 instruction_count(4);
4720 dst : S4(write);
4721 src1 : S3(read);
4722 src2 : S3(read);
4723 src3 : S3(read);
4724 DECODE : S0(4); // any 3 decoders
4725 FPU : S3(2);
4726 %}
4727
4728 // Float reg-reg operation
4729 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4730 instruction_count(4);
4731 dst : S4(write);
4732 src1 : S3(read);
4733 src2 : S3(read);
4734 src3 : S3(read);
4735 DECODE : S1(3); // any 3 decoders
4736 D0 : S0; // Big decoder only
4737 FPU : S3(2);
4738 MEM : S3;
4739 %}
4740
4741 // Float reg-mem operation
4742 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4743 instruction_count(2);
4744 dst : S5(write);
4745 mem : S3(read);
4746 D0 : S0; // big decoder only
4747 DECODE : S1; // any decoder for FPU POP
4748 FPU : S4;
4749 MEM : S3; // any mem
4750 %}
4751
4752 // Float reg-mem operation
4753 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4754 instruction_count(3);
4755 dst : S5(write);
4756 src1 : S3(read);
4757 mem : S3(read);
4758 D0 : S0; // big decoder only
4759 DECODE : S1(2); // any decoder for FPU POP
4760 FPU : S4;
4761 MEM : S3; // any mem
4762 %}
4763
4764 // Float mem-reg operation
4765 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4766 instruction_count(2);
4767 src : S5(read);
4768 mem : S3(read);
4769 DECODE : S0; // any decoder for FPU PUSH
4770 D0 : S1; // big decoder only
4771 FPU : S4;
4772 MEM : S3; // any mem
4773 %}
4774
4775 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4776 instruction_count(3);
4777 src1 : S3(read);
4778 src2 : S3(read);
4779 mem : S3(read);
4780 DECODE : S0(2); // any decoder for FPU PUSH
4781 D0 : S1; // big decoder only
4782 FPU : S4;
4783 MEM : S3; // any mem
4784 %}
4785
4786 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4787 instruction_count(3);
4788 src1 : S3(read);
4789 src2 : S3(read);
4790 mem : S4(read);
4791 DECODE : S0; // any decoder for FPU PUSH
4792 D0 : S0(2); // big decoder only
4793 FPU : S4;
4794 MEM : S3(2); // any mem
4795 %}
4796
4797 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4798 instruction_count(2);
4799 src1 : S3(read);
4800 dst : S4(read);
4801 D0 : S0(2); // big decoder only
4802 MEM : S3(2); // any mem
4803 %}
4804
4805 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4806 instruction_count(3);
4807 src1 : S3(read);
4808 src2 : S3(read);
4809 dst : S4(read);
4810 D0 : S0(3); // big decoder only
4811 FPU : S4;
4812 MEM : S3(3); // any mem
4813 %}
4814
4815 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4816 instruction_count(3);
4817 src1 : S4(read);
4818 mem : S4(read);
4819 DECODE : S0; // any decoder for FPU PUSH
4820 D0 : S0(2); // big decoder only
4821 FPU : S4;
4822 MEM : S3(2); // any mem
4823 %}
4824
4825 // Float load constant
4826 pipe_class fpu_reg_con(regDPR dst) %{
4827 instruction_count(2);
4828 dst : S5(write);
4829 D0 : S0; // big decoder only for the load
4830 DECODE : S1; // any decoder for FPU POP
4831 FPU : S4;
4832 MEM : S3; // any mem
4833 %}
4834
4835 // Float load constant
4836 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
4837 instruction_count(3);
4838 dst : S5(write);
4839 src : S3(read);
4840 D0 : S0; // big decoder only for the load
4841 DECODE : S1(2); // any decoder for FPU POP
4842 FPU : S4;
4843 MEM : S3; // any mem
4844 %}
4845
4846 // UnConditional branch
4847 pipe_class pipe_jmp( label labl ) %{
4848 single_instruction;
4849 BR : S3;
4850 %}
4851
4852 // Conditional branch
4853 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
4854 single_instruction;
4855 cr : S1(read);
4856 BR : S3;
4857 %}
4858
4859 // Allocation idiom
4860 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
4861 instruction_count(1); force_serialization;
4862 fixed_latency(6);
4863 heap_ptr : S3(read);
4864 DECODE : S0(3);
4865 D0 : S2;
4866 MEM : S3;
4867 ALU : S3(2);
4868 dst : S5(write);
4869 BR : S5;
4870 %}
4871
4872 // Generic big/slow expanded idiom
4873 pipe_class pipe_slow( ) %{
4874 instruction_count(10); multiple_bundles; force_serialization;
4875 fixed_latency(100);
4876 D0 : S0(2);
4877 MEM : S3(2);
4878 %}
4879
4880 // The real do-nothing guy
4881 pipe_class empty( ) %{
4882 instruction_count(0);
4883 %}
4884
4885 // Define the class for the Nop node
4886 define %{
4887 MachNop = empty;
4888 %}
4889
4890 %}
4891
4892 //----------INSTRUCTIONS-------------------------------------------------------
4893 //
4894 // match -- States which machine-independent subtree may be replaced
4895 // by this instruction.
4896 // ins_cost -- The estimated cost of this instruction is used by instruction
4897 // selection to identify a minimum cost tree of machine
4898 // instructions that matches a tree of machine-independent
4899 // instructions.
4900 // format -- A string providing the disassembly for this instruction.
4901 // The value of an instruction's operand may be inserted
4902 // by referring to it with a '$' prefix.
4903 // opcode -- Three instruction opcodes may be provided. These are referred
4904 // to within an encode class as $primary, $secondary, and $tertiary
4905 // respectively. The primary opcode is commonly used to
4906 // indicate the type of machine instruction, while secondary
4907 // and tertiary are often used for prefix options or addressing
4908 // modes.
4909 // ins_encode -- A list of encode classes with parameters. The encode class
4910 // name must have been defined in an 'enc_class' specification
4911 // in the encode section of the architecture description.
4912
4913 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
4914 // Load Float
4915 instruct MoveF2LEG(legRegF dst, regF src) %{
4916 match(Set dst src);
4917 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
4918 ins_encode %{
4919 ShouldNotReachHere();
4920 %}
4921 ins_pipe( fpu_reg_reg );
4922 %}
4923
4924 // Load Float
4925 instruct MoveLEG2F(regF dst, legRegF src) %{
4926 match(Set dst src);
4927 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
4928 ins_encode %{
4929 ShouldNotReachHere();
4930 %}
4931 ins_pipe( fpu_reg_reg );
4932 %}
4933
4934 // Load Float
4935 instruct MoveF2VL(vlRegF dst, regF src) %{
4936 match(Set dst src);
4937 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
4938 ins_encode %{
4939 ShouldNotReachHere();
4940 %}
4941 ins_pipe( fpu_reg_reg );
4942 %}
4943
4944 // Load Float
4945 instruct MoveVL2F(regF dst, vlRegF src) %{
4946 match(Set dst src);
4947 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
4948 ins_encode %{
4949 ShouldNotReachHere();
4950 %}
4951 ins_pipe( fpu_reg_reg );
4952 %}
4953
4954
4955
4956 // Load Double
4957 instruct MoveD2LEG(legRegD dst, regD src) %{
4958 match(Set dst src);
4959 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
4960 ins_encode %{
4961 ShouldNotReachHere();
4962 %}
4963 ins_pipe( fpu_reg_reg );
4964 %}
4965
4966 // Load Double
4967 instruct MoveLEG2D(regD dst, legRegD src) %{
4968 match(Set dst src);
4969 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
4970 ins_encode %{
4971 ShouldNotReachHere();
4972 %}
4973 ins_pipe( fpu_reg_reg );
4974 %}
4975
4976 // Load Double
4977 instruct MoveD2VL(vlRegD dst, regD src) %{
4978 match(Set dst src);
4979 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
4980 ins_encode %{
4981 ShouldNotReachHere();
4982 %}
4983 ins_pipe( fpu_reg_reg );
4984 %}
4985
4986 // Load Double
4987 instruct MoveVL2D(regD dst, vlRegD src) %{
4988 match(Set dst src);
4989 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
4990 ins_encode %{
4991 ShouldNotReachHere();
4992 %}
4993 ins_pipe( fpu_reg_reg );
4994 %}
4995
4996 //----------BSWAP-Instruction--------------------------------------------------
4997 instruct bytes_reverse_int(rRegI dst) %{
4998 match(Set dst (ReverseBytesI dst));
4999
5000 format %{ "BSWAP $dst" %}
5001 opcode(0x0F, 0xC8);
5002 ins_encode( OpcP, OpcSReg(dst) );
5003 ins_pipe( ialu_reg );
5004 %}
5005
5006 instruct bytes_reverse_long(eRegL dst) %{
5007 match(Set dst (ReverseBytesL dst));
5008
5009 format %{ "BSWAP $dst.lo\n\t"
5010 "BSWAP $dst.hi\n\t"
5011 "XCHG $dst.lo $dst.hi" %}
5012
5013 ins_cost(125);
5014 ins_encode( bswap_long_bytes(dst) );
5015 ins_pipe( ialu_reg_reg);
5016 %}
5017
5018 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5019 match(Set dst (ReverseBytesUS dst));
5020 effect(KILL cr);
5021
5022 format %{ "BSWAP $dst\n\t"
5023 "SHR $dst,16\n\t" %}
5024 ins_encode %{
5025 __ bswapl($dst$$Register);
5026 __ shrl($dst$$Register, 16);
5027 %}
5028 ins_pipe( ialu_reg );
5029 %}
5030
5031 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5032 match(Set dst (ReverseBytesS dst));
5033 effect(KILL cr);
5034
5035 format %{ "BSWAP $dst\n\t"
5036 "SAR $dst,16\n\t" %}
5037 ins_encode %{
5038 __ bswapl($dst$$Register);
5039 __ sarl($dst$$Register, 16);
5040 %}
5041 ins_pipe( ialu_reg );
5042 %}
5043
5044
5045 //---------- Zeros Count Instructions ------------------------------------------
5046
5047 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5048 predicate(UseCountLeadingZerosInstruction);
5049 match(Set dst (CountLeadingZerosI src));
5050 effect(KILL cr);
5051
5052 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %}
5053 ins_encode %{
5054 __ lzcntl($dst$$Register, $src$$Register);
5055 %}
5056 ins_pipe(ialu_reg);
5057 %}
5058
5059 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5060 predicate(!UseCountLeadingZerosInstruction);
5061 match(Set dst (CountLeadingZerosI src));
5062 effect(KILL cr);
5063
5064 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t"
5065 "JNZ skip\n\t"
5066 "MOV $dst, -1\n"
5067 "skip:\n\t"
5068 "NEG $dst\n\t"
5069 "ADD $dst, 31" %}
5070 ins_encode %{
5071 Register Rdst = $dst$$Register;
5072 Register Rsrc = $src$$Register;
5073 Label skip;
5074 __ bsrl(Rdst, Rsrc);
5075 __ jccb(Assembler::notZero, skip);
5076 __ movl(Rdst, -1);
5077 __ bind(skip);
5078 __ negl(Rdst);
5079 __ addl(Rdst, BitsPerInt - 1);
5080 %}
5081 ins_pipe(ialu_reg);
5082 %}
5083
5084 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5085 predicate(UseCountLeadingZerosInstruction);
5086 match(Set dst (CountLeadingZerosL src));
5087 effect(TEMP dst, KILL cr);
5088
5089 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t"
5090 "JNC done\n\t"
5091 "LZCNT $dst, $src.lo\n\t"
5092 "ADD $dst, 32\n"
5093 "done:" %}
5094 ins_encode %{
5095 Register Rdst = $dst$$Register;
5096 Register Rsrc = $src$$Register;
5097 Label done;
5098 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5099 __ jccb(Assembler::carryClear, done);
5100 __ lzcntl(Rdst, Rsrc);
5101 __ addl(Rdst, BitsPerInt);
5102 __ bind(done);
5103 %}
5104 ins_pipe(ialu_reg);
5105 %}
5106
5107 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5108 predicate(!UseCountLeadingZerosInstruction);
5109 match(Set dst (CountLeadingZerosL src));
5110 effect(TEMP dst, KILL cr);
5111
5112 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t"
5113 "JZ msw_is_zero\n\t"
5114 "ADD $dst, 32\n\t"
5115 "JMP not_zero\n"
5116 "msw_is_zero:\n\t"
5117 "BSR $dst, $src.lo\n\t"
5118 "JNZ not_zero\n\t"
5119 "MOV $dst, -1\n"
5120 "not_zero:\n\t"
5121 "NEG $dst\n\t"
5122 "ADD $dst, 63\n" %}
5123 ins_encode %{
5124 Register Rdst = $dst$$Register;
5125 Register Rsrc = $src$$Register;
5126 Label msw_is_zero;
5127 Label not_zero;
5128 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5129 __ jccb(Assembler::zero, msw_is_zero);
5130 __ addl(Rdst, BitsPerInt);
5131 __ jmpb(not_zero);
5132 __ bind(msw_is_zero);
5133 __ bsrl(Rdst, Rsrc);
5134 __ jccb(Assembler::notZero, not_zero);
5135 __ movl(Rdst, -1);
5136 __ bind(not_zero);
5137 __ negl(Rdst);
5138 __ addl(Rdst, BitsPerLong - 1);
5139 %}
5140 ins_pipe(ialu_reg);
5141 %}
5142
5143 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5144 predicate(UseCountTrailingZerosInstruction);
5145 match(Set dst (CountTrailingZerosI src));
5146 effect(KILL cr);
5147
5148 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %}
5149 ins_encode %{
5150 __ tzcntl($dst$$Register, $src$$Register);
5151 %}
5152 ins_pipe(ialu_reg);
5153 %}
5154
5155 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5156 predicate(!UseCountTrailingZerosInstruction);
5157 match(Set dst (CountTrailingZerosI src));
5158 effect(KILL cr);
5159
5160 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t"
5161 "JNZ done\n\t"
5162 "MOV $dst, 32\n"
5163 "done:" %}
5164 ins_encode %{
5165 Register Rdst = $dst$$Register;
5166 Label done;
5167 __ bsfl(Rdst, $src$$Register);
5168 __ jccb(Assembler::notZero, done);
5169 __ movl(Rdst, BitsPerInt);
5170 __ bind(done);
5171 %}
5172 ins_pipe(ialu_reg);
5173 %}
5174
5175 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5176 predicate(UseCountTrailingZerosInstruction);
5177 match(Set dst (CountTrailingZerosL src));
5178 effect(TEMP dst, KILL cr);
5179
5180 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t"
5181 "JNC done\n\t"
5182 "TZCNT $dst, $src.hi\n\t"
5183 "ADD $dst, 32\n"
5184 "done:" %}
5185 ins_encode %{
5186 Register Rdst = $dst$$Register;
5187 Register Rsrc = $src$$Register;
5188 Label done;
5189 __ tzcntl(Rdst, Rsrc);
5190 __ jccb(Assembler::carryClear, done);
5191 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5192 __ addl(Rdst, BitsPerInt);
5193 __ bind(done);
5194 %}
5195 ins_pipe(ialu_reg);
5196 %}
5197
5198 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5199 predicate(!UseCountTrailingZerosInstruction);
5200 match(Set dst (CountTrailingZerosL src));
5201 effect(TEMP dst, KILL cr);
5202
5203 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t"
5204 "JNZ done\n\t"
5205 "BSF $dst, $src.hi\n\t"
5206 "JNZ msw_not_zero\n\t"
5207 "MOV $dst, 32\n"
5208 "msw_not_zero:\n\t"
5209 "ADD $dst, 32\n"
5210 "done:" %}
5211 ins_encode %{
5212 Register Rdst = $dst$$Register;
5213 Register Rsrc = $src$$Register;
5214 Label msw_not_zero;
5215 Label done;
5216 __ bsfl(Rdst, Rsrc);
5217 __ jccb(Assembler::notZero, done);
5218 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5219 __ jccb(Assembler::notZero, msw_not_zero);
5220 __ movl(Rdst, BitsPerInt);
5221 __ bind(msw_not_zero);
5222 __ addl(Rdst, BitsPerInt);
5223 __ bind(done);
5224 %}
5225 ins_pipe(ialu_reg);
5226 %}
5227
5228
5229 //---------- Population Count Instructions -------------------------------------
5230
5231 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5232 predicate(UsePopCountInstruction);
5233 match(Set dst (PopCountI src));
5234 effect(KILL cr);
5235
5236 format %{ "POPCNT $dst, $src" %}
5237 ins_encode %{
5238 __ popcntl($dst$$Register, $src$$Register);
5239 %}
5240 ins_pipe(ialu_reg);
5241 %}
5242
5243 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5244 predicate(UsePopCountInstruction);
5245 match(Set dst (PopCountI (LoadI mem)));
5246 effect(KILL cr);
5247
5248 format %{ "POPCNT $dst, $mem" %}
5249 ins_encode %{
5250 __ popcntl($dst$$Register, $mem$$Address);
5251 %}
5252 ins_pipe(ialu_reg);
5253 %}
5254
5255 // Note: Long.bitCount(long) returns an int.
5256 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5257 predicate(UsePopCountInstruction);
5258 match(Set dst (PopCountL src));
5259 effect(KILL cr, TEMP tmp, TEMP dst);
5260
5261 format %{ "POPCNT $dst, $src.lo\n\t"
5262 "POPCNT $tmp, $src.hi\n\t"
5263 "ADD $dst, $tmp" %}
5264 ins_encode %{
5265 __ popcntl($dst$$Register, $src$$Register);
5266 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5267 __ addl($dst$$Register, $tmp$$Register);
5268 %}
5269 ins_pipe(ialu_reg);
5270 %}
5271
5272 // Note: Long.bitCount(long) returns an int.
5273 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5274 predicate(UsePopCountInstruction);
5275 match(Set dst (PopCountL (LoadL mem)));
5276 effect(KILL cr, TEMP tmp, TEMP dst);
5277
5278 format %{ "POPCNT $dst, $mem\n\t"
5279 "POPCNT $tmp, $mem+4\n\t"
5280 "ADD $dst, $tmp" %}
5281 ins_encode %{
5282 //__ popcntl($dst$$Register, $mem$$Address$$first);
5283 //__ popcntl($tmp$$Register, $mem$$Address$$second);
5284 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5285 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5286 __ addl($dst$$Register, $tmp$$Register);
5287 %}
5288 ins_pipe(ialu_reg);
5289 %}
5290
5291
5292 //----------Load/Store/Move Instructions---------------------------------------
5293 //----------Load Instructions--------------------------------------------------
5294 // Load Byte (8bit signed)
5295 instruct loadB(xRegI dst, memory mem) %{
5296 match(Set dst (LoadB mem));
5297
5298 ins_cost(125);
5299 format %{ "MOVSX8 $dst,$mem\t# byte" %}
5300
5301 ins_encode %{
5302 __ movsbl($dst$$Register, $mem$$Address);
5303 %}
5304
5305 ins_pipe(ialu_reg_mem);
5306 %}
5307
5308 // Load Byte (8bit signed) into Long Register
5309 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5310 match(Set dst (ConvI2L (LoadB mem)));
5311 effect(KILL cr);
5312
5313 ins_cost(375);
5314 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5315 "MOV $dst.hi,$dst.lo\n\t"
5316 "SAR $dst.hi,7" %}
5317
5318 ins_encode %{
5319 __ movsbl($dst$$Register, $mem$$Address);
5320 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5321 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5322 %}
5323
5324 ins_pipe(ialu_reg_mem);
5325 %}
5326
5327 // Load Unsigned Byte (8bit UNsigned)
5328 instruct loadUB(xRegI dst, memory mem) %{
5329 match(Set dst (LoadUB mem));
5330
5331 ins_cost(125);
5332 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5333
5334 ins_encode %{
5335 __ movzbl($dst$$Register, $mem$$Address);
5336 %}
5337
5338 ins_pipe(ialu_reg_mem);
5339 %}
5340
5341 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5342 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5343 match(Set dst (ConvI2L (LoadUB mem)));
5344 effect(KILL cr);
5345
5346 ins_cost(250);
5347 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5348 "XOR $dst.hi,$dst.hi" %}
5349
5350 ins_encode %{
5351 Register Rdst = $dst$$Register;
5352 __ movzbl(Rdst, $mem$$Address);
5353 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5354 %}
5355
5356 ins_pipe(ialu_reg_mem);
5357 %}
5358
5359 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5360 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5361 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5362 effect(KILL cr);
5363
5364 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5365 "XOR $dst.hi,$dst.hi\n\t"
5366 "AND $dst.lo,right_n_bits($mask, 8)" %}
5367 ins_encode %{
5368 Register Rdst = $dst$$Register;
5369 __ movzbl(Rdst, $mem$$Address);
5370 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5371 __ andl(Rdst, $mask$$constant & right_n_bits(8));
5372 %}
5373 ins_pipe(ialu_reg_mem);
5374 %}
5375
5376 // Load Short (16bit signed)
5377 instruct loadS(rRegI dst, memory mem) %{
5378 match(Set dst (LoadS mem));
5379
5380 ins_cost(125);
5381 format %{ "MOVSX $dst,$mem\t# short" %}
5382
5383 ins_encode %{
5384 __ movswl($dst$$Register, $mem$$Address);
5385 %}
5386
5387 ins_pipe(ialu_reg_mem);
5388 %}
5389
5390 // Load Short (16 bit signed) to Byte (8 bit signed)
5391 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5392 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5393
5394 ins_cost(125);
5395 format %{ "MOVSX $dst, $mem\t# short -> byte" %}
5396 ins_encode %{
5397 __ movsbl($dst$$Register, $mem$$Address);
5398 %}
5399 ins_pipe(ialu_reg_mem);
5400 %}
5401
5402 // Load Short (16bit signed) into Long Register
5403 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5404 match(Set dst (ConvI2L (LoadS mem)));
5405 effect(KILL cr);
5406
5407 ins_cost(375);
5408 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t"
5409 "MOV $dst.hi,$dst.lo\n\t"
5410 "SAR $dst.hi,15" %}
5411
5412 ins_encode %{
5413 __ movswl($dst$$Register, $mem$$Address);
5414 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5415 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5416 %}
5417
5418 ins_pipe(ialu_reg_mem);
5419 %}
5420
5421 // Load Unsigned Short/Char (16bit unsigned)
5422 instruct loadUS(rRegI dst, memory mem) %{
5423 match(Set dst (LoadUS mem));
5424
5425 ins_cost(125);
5426 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %}
5427
5428 ins_encode %{
5429 __ movzwl($dst$$Register, $mem$$Address);
5430 %}
5431
5432 ins_pipe(ialu_reg_mem);
5433 %}
5434
5435 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5436 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5437 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5438
5439 ins_cost(125);
5440 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %}
5441 ins_encode %{
5442 __ movsbl($dst$$Register, $mem$$Address);
5443 %}
5444 ins_pipe(ialu_reg_mem);
5445 %}
5446
5447 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5448 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5449 match(Set dst (ConvI2L (LoadUS mem)));
5450 effect(KILL cr);
5451
5452 ins_cost(250);
5453 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t"
5454 "XOR $dst.hi,$dst.hi" %}
5455
5456 ins_encode %{
5457 __ movzwl($dst$$Register, $mem$$Address);
5458 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5459 %}
5460
5461 ins_pipe(ialu_reg_mem);
5462 %}
5463
5464 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5465 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5466 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5467 effect(KILL cr);
5468
5469 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5470 "XOR $dst.hi,$dst.hi" %}
5471 ins_encode %{
5472 Register Rdst = $dst$$Register;
5473 __ movzbl(Rdst, $mem$$Address);
5474 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5475 %}
5476 ins_pipe(ialu_reg_mem);
5477 %}
5478
5479 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5480 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5481 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5482 effect(KILL cr);
5483
5484 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5485 "XOR $dst.hi,$dst.hi\n\t"
5486 "AND $dst.lo,right_n_bits($mask, 16)" %}
5487 ins_encode %{
5488 Register Rdst = $dst$$Register;
5489 __ movzwl(Rdst, $mem$$Address);
5490 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5491 __ andl(Rdst, $mask$$constant & right_n_bits(16));
5492 %}
5493 ins_pipe(ialu_reg_mem);
5494 %}
5495
5496 // Load Integer
5497 instruct loadI(rRegI dst, memory mem) %{
5498 match(Set dst (LoadI mem));
5499
5500 ins_cost(125);
5501 format %{ "MOV $dst,$mem\t# int" %}
5502
5503 ins_encode %{
5504 __ movl($dst$$Register, $mem$$Address);
5505 %}
5506
5507 ins_pipe(ialu_reg_mem);
5508 %}
5509
5510 // Load Integer (32 bit signed) to Byte (8 bit signed)
5511 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5512 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5513
5514 ins_cost(125);
5515 format %{ "MOVSX $dst, $mem\t# int -> byte" %}
5516 ins_encode %{
5517 __ movsbl($dst$$Register, $mem$$Address);
5518 %}
5519 ins_pipe(ialu_reg_mem);
5520 %}
5521
5522 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5523 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5524 match(Set dst (AndI (LoadI mem) mask));
5525
5526 ins_cost(125);
5527 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %}
5528 ins_encode %{
5529 __ movzbl($dst$$Register, $mem$$Address);
5530 %}
5531 ins_pipe(ialu_reg_mem);
5532 %}
5533
5534 // Load Integer (32 bit signed) to Short (16 bit signed)
5535 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5536 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5537
5538 ins_cost(125);
5539 format %{ "MOVSX $dst, $mem\t# int -> short" %}
5540 ins_encode %{
5541 __ movswl($dst$$Register, $mem$$Address);
5542 %}
5543 ins_pipe(ialu_reg_mem);
5544 %}
5545
5546 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5547 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5548 match(Set dst (AndI (LoadI mem) mask));
5549
5550 ins_cost(125);
5551 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %}
5552 ins_encode %{
5553 __ movzwl($dst$$Register, $mem$$Address);
5554 %}
5555 ins_pipe(ialu_reg_mem);
5556 %}
5557
5558 // Load Integer into Long Register
5559 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5560 match(Set dst (ConvI2L (LoadI mem)));
5561 effect(KILL cr);
5562
5563 ins_cost(375);
5564 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t"
5565 "MOV $dst.hi,$dst.lo\n\t"
5566 "SAR $dst.hi,31" %}
5567
5568 ins_encode %{
5569 __ movl($dst$$Register, $mem$$Address);
5570 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5571 __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5572 %}
5573
5574 ins_pipe(ialu_reg_mem);
5575 %}
5576
5577 // Load Integer with mask 0xFF into Long Register
5578 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5579 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5580 effect(KILL cr);
5581
5582 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5583 "XOR $dst.hi,$dst.hi" %}
5584 ins_encode %{
5585 Register Rdst = $dst$$Register;
5586 __ movzbl(Rdst, $mem$$Address);
5587 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5588 %}
5589 ins_pipe(ialu_reg_mem);
5590 %}
5591
5592 // Load Integer with mask 0xFFFF into Long Register
5593 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5594 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5595 effect(KILL cr);
5596
5597 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5598 "XOR $dst.hi,$dst.hi" %}
5599 ins_encode %{
5600 Register Rdst = $dst$$Register;
5601 __ movzwl(Rdst, $mem$$Address);
5602 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5603 %}
5604 ins_pipe(ialu_reg_mem);
5605 %}
5606
5607 // Load Integer with 31-bit mask into Long Register
5608 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5609 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5610 effect(KILL cr);
5611
5612 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5613 "XOR $dst.hi,$dst.hi\n\t"
5614 "AND $dst.lo,$mask" %}
5615 ins_encode %{
5616 Register Rdst = $dst$$Register;
5617 __ movl(Rdst, $mem$$Address);
5618 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5619 __ andl(Rdst, $mask$$constant);
5620 %}
5621 ins_pipe(ialu_reg_mem);
5622 %}
5623
5624 // Load Unsigned Integer into Long Register
5625 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5626 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5627 effect(KILL cr);
5628
5629 ins_cost(250);
5630 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t"
5631 "XOR $dst.hi,$dst.hi" %}
5632
5633 ins_encode %{
5634 __ movl($dst$$Register, $mem$$Address);
5635 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5636 %}
5637
5638 ins_pipe(ialu_reg_mem);
5639 %}
5640
5641 // Load Long. Cannot clobber address while loading, so restrict address
5642 // register to ESI
5643 instruct loadL(eRegL dst, load_long_memory mem) %{
5644 predicate(!((LoadLNode*)n)->require_atomic_access());
5645 match(Set dst (LoadL mem));
5646
5647 ins_cost(250);
5648 format %{ "MOV $dst.lo,$mem\t# long\n\t"
5649 "MOV $dst.hi,$mem+4" %}
5650
5651 ins_encode %{
5652 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5653 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5654 __ movl($dst$$Register, Amemlo);
5655 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5656 %}
5657
5658 ins_pipe(ialu_reg_long_mem);
5659 %}
5660
5661 // Volatile Load Long. Must be atomic, so do 64-bit FILD
5662 // then store it down to the stack and reload on the int
5663 // side.
5664 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5665 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5666 match(Set dst (LoadL mem));
5667
5668 ins_cost(200);
5669 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
5670 "FISTp $dst" %}
5671 ins_encode(enc_loadL_volatile(mem,dst));
5672 ins_pipe( fpu_reg_mem );
5673 %}
5674
5675 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5676 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5677 match(Set dst (LoadL mem));
5678 effect(TEMP tmp);
5679 ins_cost(180);
5680 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
5681 "MOVSD $dst,$tmp" %}
5682 ins_encode %{
5683 __ movdbl($tmp$$XMMRegister, $mem$$Address);
5684 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5685 %}
5686 ins_pipe( pipe_slow );
5687 %}
5688
5689 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5690 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5691 match(Set dst (LoadL mem));
5692 effect(TEMP tmp);
5693 ins_cost(160);
5694 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
5695 "MOVD $dst.lo,$tmp\n\t"
5696 "PSRLQ $tmp,32\n\t"
5697 "MOVD $dst.hi,$tmp" %}
5698 ins_encode %{
5699 __ movdbl($tmp$$XMMRegister, $mem$$Address);
5700 __ movdl($dst$$Register, $tmp$$XMMRegister);
5701 __ psrlq($tmp$$XMMRegister, 32);
5702 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5703 %}
5704 ins_pipe( pipe_slow );
5705 %}
5706
5707 // Load Range
5708 instruct loadRange(rRegI dst, memory mem) %{
5709 match(Set dst (LoadRange mem));
5710
5711 ins_cost(125);
5712 format %{ "MOV $dst,$mem" %}
5713 opcode(0x8B);
5714 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5715 ins_pipe( ialu_reg_mem );
5716 %}
5717
5718
5719 // Load Pointer
5720 instruct loadP(eRegP dst, memory mem) %{
5721 match(Set dst (LoadP mem));
5722
5723 ins_cost(125);
5724 format %{ "MOV $dst,$mem" %}
5725 opcode(0x8B);
5726 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5727 ins_pipe( ialu_reg_mem );
5728 %}
5729
5730 // Load Klass Pointer
5731 instruct loadKlass(eRegP dst, memory mem) %{
5732 match(Set dst (LoadKlass mem));
5733
5734 ins_cost(125);
5735 format %{ "MOV $dst,$mem" %}
5736 opcode(0x8B);
5737 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5738 ins_pipe( ialu_reg_mem );
5739 %}
5740
5741 // Load Double
5742 instruct loadDPR(regDPR dst, memory mem) %{
5743 predicate(UseSSE<=1);
5744 match(Set dst (LoadD mem));
5745
5746 ins_cost(150);
5747 format %{ "FLD_D ST,$mem\n\t"
5748 "FSTP $dst" %}
5749 opcode(0xDD); /* DD /0 */
5750 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
5751 Pop_Reg_DPR(dst), ClearInstMark );
5752 ins_pipe( fpu_reg_mem );
5753 %}
5754
5755 // Load Double to XMM
5756 instruct loadD(regD dst, memory mem) %{
5757 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5758 match(Set dst (LoadD mem));
5759 ins_cost(145);
5760 format %{ "MOVSD $dst,$mem" %}
5761 ins_encode %{
5762 __ movdbl ($dst$$XMMRegister, $mem$$Address);
5763 %}
5764 ins_pipe( pipe_slow );
5765 %}
5766
5767 instruct loadD_partial(regD dst, memory mem) %{
5768 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5769 match(Set dst (LoadD mem));
5770 ins_cost(145);
5771 format %{ "MOVLPD $dst,$mem" %}
5772 ins_encode %{
5773 __ movdbl ($dst$$XMMRegister, $mem$$Address);
5774 %}
5775 ins_pipe( pipe_slow );
5776 %}
5777
5778 // Load to XMM register (single-precision floating point)
5779 // MOVSS instruction
5780 instruct loadF(regF dst, memory mem) %{
5781 predicate(UseSSE>=1);
5782 match(Set dst (LoadF mem));
5783 ins_cost(145);
5784 format %{ "MOVSS $dst,$mem" %}
5785 ins_encode %{
5786 __ movflt ($dst$$XMMRegister, $mem$$Address);
5787 %}
5788 ins_pipe( pipe_slow );
5789 %}
5790
5791 // Load Float
5792 instruct loadFPR(regFPR dst, memory mem) %{
5793 predicate(UseSSE==0);
5794 match(Set dst (LoadF mem));
5795
5796 ins_cost(150);
5797 format %{ "FLD_S ST,$mem\n\t"
5798 "FSTP $dst" %}
5799 opcode(0xD9); /* D9 /0 */
5800 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
5801 Pop_Reg_FPR(dst), ClearInstMark );
5802 ins_pipe( fpu_reg_mem );
5803 %}
5804
5805 // Load Effective Address
5806 instruct leaP8(eRegP dst, indOffset8 mem) %{
5807 match(Set dst mem);
5808
5809 ins_cost(110);
5810 format %{ "LEA $dst,$mem" %}
5811 opcode(0x8D);
5812 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5813 ins_pipe( ialu_reg_reg_fat );
5814 %}
5815
5816 instruct leaP32(eRegP dst, indOffset32 mem) %{
5817 match(Set dst mem);
5818
5819 ins_cost(110);
5820 format %{ "LEA $dst,$mem" %}
5821 opcode(0x8D);
5822 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5823 ins_pipe( ialu_reg_reg_fat );
5824 %}
5825
5826 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5827 match(Set dst mem);
5828
5829 ins_cost(110);
5830 format %{ "LEA $dst,$mem" %}
5831 opcode(0x8D);
5832 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5833 ins_pipe( ialu_reg_reg_fat );
5834 %}
5835
5836 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5837 match(Set dst mem);
5838
5839 ins_cost(110);
5840 format %{ "LEA $dst,$mem" %}
5841 opcode(0x8D);
5842 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5843 ins_pipe( ialu_reg_reg_fat );
5844 %}
5845
5846 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5847 match(Set dst mem);
5848
5849 ins_cost(110);
5850 format %{ "LEA $dst,$mem" %}
5851 opcode(0x8D);
5852 ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5853 ins_pipe( ialu_reg_reg_fat );
5854 %}
5855
5856 // Load Constant
5857 instruct loadConI(rRegI dst, immI src) %{
5858 match(Set dst src);
5859
5860 format %{ "MOV $dst,$src" %}
5861 ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark );
5862 ins_pipe( ialu_reg_fat );
5863 %}
5864
5865 // Load Constant zero
5866 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
5867 match(Set dst src);
5868 effect(KILL cr);
5869
5870 ins_cost(50);
5871 format %{ "XOR $dst,$dst" %}
5872 opcode(0x33); /* + rd */
5873 ins_encode( OpcP, RegReg( dst, dst ) );
5874 ins_pipe( ialu_reg );
5875 %}
5876
5877 instruct loadConP(eRegP dst, immP src) %{
5878 match(Set dst src);
5879
5880 format %{ "MOV $dst,$src" %}
5881 opcode(0xB8); /* + rd */
5882 ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark );
5883 ins_pipe( ialu_reg_fat );
5884 %}
5885
5886 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5887 match(Set dst src);
5888 effect(KILL cr);
5889 ins_cost(200);
5890 format %{ "MOV $dst.lo,$src.lo\n\t"
5891 "MOV $dst.hi,$src.hi" %}
5892 opcode(0xB8);
5893 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5894 ins_pipe( ialu_reg_long_fat );
5895 %}
5896
5897 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5898 match(Set dst src);
5899 effect(KILL cr);
5900 ins_cost(150);
5901 format %{ "XOR $dst.lo,$dst.lo\n\t"
5902 "XOR $dst.hi,$dst.hi" %}
5903 opcode(0x33,0x33);
5904 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5905 ins_pipe( ialu_reg_long );
5906 %}
5907
5908 // The instruction usage is guarded by predicate in operand immFPR().
5909 instruct loadConFPR(regFPR dst, immFPR con) %{
5910 match(Set dst con);
5911 ins_cost(125);
5912 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5913 "FSTP $dst" %}
5914 ins_encode %{
5915 __ fld_s($constantaddress($con));
5916 __ fstp_d($dst$$reg);
5917 %}
5918 ins_pipe(fpu_reg_con);
5919 %}
5920
5921 // The instruction usage is guarded by predicate in operand immFPR0().
5922 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
5923 match(Set dst con);
5924 ins_cost(125);
5925 format %{ "FLDZ ST\n\t"
5926 "FSTP $dst" %}
5927 ins_encode %{
5928 __ fldz();
5929 __ fstp_d($dst$$reg);
5930 %}
5931 ins_pipe(fpu_reg_con);
5932 %}
5933
5934 // The instruction usage is guarded by predicate in operand immFPR1().
5935 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
5936 match(Set dst con);
5937 ins_cost(125);
5938 format %{ "FLD1 ST\n\t"
5939 "FSTP $dst" %}
5940 ins_encode %{
5941 __ fld1();
5942 __ fstp_d($dst$$reg);
5943 %}
5944 ins_pipe(fpu_reg_con);
5945 %}
5946
5947 // The instruction usage is guarded by predicate in operand immF().
5948 instruct loadConF(regF dst, immF con) %{
5949 match(Set dst con);
5950 ins_cost(125);
5951 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
5952 ins_encode %{
5953 __ movflt($dst$$XMMRegister, $constantaddress($con));
5954 %}
5955 ins_pipe(pipe_slow);
5956 %}
5957
5958 // The instruction usage is guarded by predicate in operand immF0().
5959 instruct loadConF0(regF dst, immF0 src) %{
5960 match(Set dst src);
5961 ins_cost(100);
5962 format %{ "XORPS $dst,$dst\t# float 0.0" %}
5963 ins_encode %{
5964 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5965 %}
5966 ins_pipe(pipe_slow);
5967 %}
5968
5969 // The instruction usage is guarded by predicate in operand immDPR().
5970 instruct loadConDPR(regDPR dst, immDPR con) %{
5971 match(Set dst con);
5972 ins_cost(125);
5973
5974 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
5975 "FSTP $dst" %}
5976 ins_encode %{
5977 __ fld_d($constantaddress($con));
5978 __ fstp_d($dst$$reg);
5979 %}
5980 ins_pipe(fpu_reg_con);
5981 %}
5982
5983 // The instruction usage is guarded by predicate in operand immDPR0().
5984 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
5985 match(Set dst con);
5986 ins_cost(125);
5987
5988 format %{ "FLDZ ST\n\t"
5989 "FSTP $dst" %}
5990 ins_encode %{
5991 __ fldz();
5992 __ fstp_d($dst$$reg);
5993 %}
5994 ins_pipe(fpu_reg_con);
5995 %}
5996
5997 // The instruction usage is guarded by predicate in operand immDPR1().
5998 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
5999 match(Set dst con);
6000 ins_cost(125);
6001
6002 format %{ "FLD1 ST\n\t"
6003 "FSTP $dst" %}
6004 ins_encode %{
6005 __ fld1();
6006 __ fstp_d($dst$$reg);
6007 %}
6008 ins_pipe(fpu_reg_con);
6009 %}
6010
6011 // The instruction usage is guarded by predicate in operand immD().
6012 instruct loadConD(regD dst, immD con) %{
6013 match(Set dst con);
6014 ins_cost(125);
6015 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6016 ins_encode %{
6017 __ movdbl($dst$$XMMRegister, $constantaddress($con));
6018 %}
6019 ins_pipe(pipe_slow);
6020 %}
6021
6022 // The instruction usage is guarded by predicate in operand immD0().
6023 instruct loadConD0(regD dst, immD0 src) %{
6024 match(Set dst src);
6025 ins_cost(100);
6026 format %{ "XORPD $dst,$dst\t# double 0.0" %}
6027 ins_encode %{
6028 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6029 %}
6030 ins_pipe( pipe_slow );
6031 %}
6032
6033 // Load Stack Slot
6034 instruct loadSSI(rRegI dst, stackSlotI src) %{
6035 match(Set dst src);
6036 ins_cost(125);
6037
6038 format %{ "MOV $dst,$src" %}
6039 opcode(0x8B);
6040 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
6041 ins_pipe( ialu_reg_mem );
6042 %}
6043
6044 instruct loadSSL(eRegL dst, stackSlotL src) %{
6045 match(Set dst src);
6046
6047 ins_cost(200);
6048 format %{ "MOV $dst,$src.lo\n\t"
6049 "MOV $dst+4,$src.hi" %}
6050 opcode(0x8B, 0x8B);
6051 ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark );
6052 ins_pipe( ialu_mem_long_reg );
6053 %}
6054
6055 // Load Stack Slot
6056 instruct loadSSP(eRegP dst, stackSlotP src) %{
6057 match(Set dst src);
6058 ins_cost(125);
6059
6060 format %{ "MOV $dst,$src" %}
6061 opcode(0x8B);
6062 ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
6063 ins_pipe( ialu_reg_mem );
6064 %}
6065
6066 // Load Stack Slot
6067 instruct loadSSF(regFPR dst, stackSlotF src) %{
6068 match(Set dst src);
6069 ins_cost(125);
6070
6071 format %{ "FLD_S $src\n\t"
6072 "FSTP $dst" %}
6073 opcode(0xD9); /* D9 /0, FLD m32real */
6074 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
6075 Pop_Reg_FPR(dst), ClearInstMark );
6076 ins_pipe( fpu_reg_mem );
6077 %}
6078
6079 // Load Stack Slot
6080 instruct loadSSD(regDPR dst, stackSlotD src) %{
6081 match(Set dst src);
6082 ins_cost(125);
6083
6084 format %{ "FLD_D $src\n\t"
6085 "FSTP $dst" %}
6086 opcode(0xDD); /* DD /0, FLD m64real */
6087 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
6088 Pop_Reg_DPR(dst), ClearInstMark );
6089 ins_pipe( fpu_reg_mem );
6090 %}
6091
6092 // Prefetch instructions for allocation.
6093 // Must be safe to execute with invalid address (cannot fault).
6094
6095 instruct prefetchAlloc0( memory mem ) %{
6096 predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6097 match(PrefetchAllocation mem);
6098 ins_cost(0);
6099 size(0);
6100 format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6101 ins_encode();
6102 ins_pipe(empty);
6103 %}
6104
6105 instruct prefetchAlloc( memory mem ) %{
6106 predicate(AllocatePrefetchInstr==3);
6107 match( PrefetchAllocation mem );
6108 ins_cost(100);
6109
6110 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6111 ins_encode %{
6112 __ prefetchw($mem$$Address);
6113 %}
6114 ins_pipe(ialu_mem);
6115 %}
6116
6117 instruct prefetchAllocNTA( memory mem ) %{
6118 predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6119 match(PrefetchAllocation mem);
6120 ins_cost(100);
6121
6122 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6123 ins_encode %{
6124 __ prefetchnta($mem$$Address);
6125 %}
6126 ins_pipe(ialu_mem);
6127 %}
6128
6129 instruct prefetchAllocT0( memory mem ) %{
6130 predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6131 match(PrefetchAllocation mem);
6132 ins_cost(100);
6133
6134 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6135 ins_encode %{
6136 __ prefetcht0($mem$$Address);
6137 %}
6138 ins_pipe(ialu_mem);
6139 %}
6140
6141 instruct prefetchAllocT2( memory mem ) %{
6142 predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6143 match(PrefetchAllocation mem);
6144 ins_cost(100);
6145
6146 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6147 ins_encode %{
6148 __ prefetcht2($mem$$Address);
6149 %}
6150 ins_pipe(ialu_mem);
6151 %}
6152
6153 //----------Store Instructions-------------------------------------------------
6154
6155 // Store Byte
6156 instruct storeB(memory mem, xRegI src) %{
6157 match(Set mem (StoreB mem src));
6158
6159 ins_cost(125);
6160 format %{ "MOV8 $mem,$src" %}
6161 opcode(0x88);
6162 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
6163 ins_pipe( ialu_mem_reg );
6164 %}
6165
6166 // Store Char/Short
6167 instruct storeC(memory mem, rRegI src) %{
6168 match(Set mem (StoreC mem src));
6169
6170 ins_cost(125);
6171 format %{ "MOV16 $mem,$src" %}
6172 opcode(0x89, 0x66);
6173 ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark );
6174 ins_pipe( ialu_mem_reg );
6175 %}
6176
6177 // Store Integer
6178 instruct storeI(memory mem, rRegI src) %{
6179 match(Set mem (StoreI mem src));
6180
6181 ins_cost(125);
6182 format %{ "MOV $mem,$src" %}
6183 opcode(0x89);
6184 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
6185 ins_pipe( ialu_mem_reg );
6186 %}
6187
6188 // Store Long
6189 instruct storeL(long_memory mem, eRegL src) %{
6190 predicate(!((StoreLNode*)n)->require_atomic_access());
6191 match(Set mem (StoreL mem src));
6192
6193 ins_cost(200);
6194 format %{ "MOV $mem,$src.lo\n\t"
6195 "MOV $mem+4,$src.hi" %}
6196 opcode(0x89, 0x89);
6197 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark );
6198 ins_pipe( ialu_mem_long_reg );
6199 %}
6200
6201 // Store Long to Integer
6202 instruct storeL2I(memory mem, eRegL src) %{
6203 match(Set mem (StoreI mem (ConvL2I src)));
6204
6205 format %{ "MOV $mem,$src.lo\t# long -> int" %}
6206 ins_encode %{
6207 __ movl($mem$$Address, $src$$Register);
6208 %}
6209 ins_pipe(ialu_mem_reg);
6210 %}
6211
6212 // Volatile Store Long. Must be atomic, so move it into
6213 // the FP TOS and then do a 64-bit FIST. Has to probe the
6214 // target address before the store (for null-ptr checks)
6215 // so the memory operand is used twice in the encoding.
6216 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6217 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6218 match(Set mem (StoreL mem src));
6219 effect( KILL cr );
6220 ins_cost(400);
6221 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6222 "FILD $src\n\t"
6223 "FISTp $mem\t # 64-bit atomic volatile long store" %}
6224 opcode(0x3B);
6225 ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark);
6226 ins_pipe( fpu_reg_mem );
6227 %}
6228
6229 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6230 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6231 match(Set mem (StoreL mem src));
6232 effect( TEMP tmp, KILL cr );
6233 ins_cost(380);
6234 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6235 "MOVSD $tmp,$src\n\t"
6236 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6237 ins_encode %{
6238 __ cmpl(rax, $mem$$Address);
6239 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6240 __ movdbl($mem$$Address, $tmp$$XMMRegister);
6241 %}
6242 ins_pipe( pipe_slow );
6243 %}
6244
6245 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6246 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6247 match(Set mem (StoreL mem src));
6248 effect( TEMP tmp2 , TEMP tmp, KILL cr );
6249 ins_cost(360);
6250 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6251 "MOVD $tmp,$src.lo\n\t"
6252 "MOVD $tmp2,$src.hi\n\t"
6253 "PUNPCKLDQ $tmp,$tmp2\n\t"
6254 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6255 ins_encode %{
6256 __ cmpl(rax, $mem$$Address);
6257 __ movdl($tmp$$XMMRegister, $src$$Register);
6258 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6259 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6260 __ movdbl($mem$$Address, $tmp$$XMMRegister);
6261 %}
6262 ins_pipe( pipe_slow );
6263 %}
6264
6265 // Store Pointer; for storing unknown oops and raw pointers
6266 instruct storeP(memory mem, anyRegP src) %{
6267 match(Set mem (StoreP mem src));
6268
6269 ins_cost(125);
6270 format %{ "MOV $mem,$src" %}
6271 opcode(0x89);
6272 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
6273 ins_pipe( ialu_mem_reg );
6274 %}
6275
6276 // Store Integer Immediate
6277 instruct storeImmI(memory mem, immI src) %{
6278 match(Set mem (StoreI mem src));
6279
6280 ins_cost(150);
6281 format %{ "MOV $mem,$src" %}
6282 opcode(0xC7); /* C7 /0 */
6283 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark);
6284 ins_pipe( ialu_mem_imm );
6285 %}
6286
6287 // Store Short/Char Immediate
6288 instruct storeImmI16(memory mem, immI16 src) %{
6289 predicate(UseStoreImmI16);
6290 match(Set mem (StoreC mem src));
6291
6292 ins_cost(150);
6293 format %{ "MOV16 $mem,$src" %}
6294 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6295 ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark);
6296 ins_pipe( ialu_mem_imm );
6297 %}
6298
6299 // Store Pointer Immediate; null pointers or constant oops that do not
6300 // need card-mark barriers.
6301 instruct storeImmP(memory mem, immP src) %{
6302 match(Set mem (StoreP mem src));
6303
6304 ins_cost(150);
6305 format %{ "MOV $mem,$src" %}
6306 opcode(0xC7); /* C7 /0 */
6307 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark);
6308 ins_pipe( ialu_mem_imm );
6309 %}
6310
6311 // Store Byte Immediate
6312 instruct storeImmB(memory mem, immI8 src) %{
6313 match(Set mem (StoreB mem src));
6314
6315 ins_cost(150);
6316 format %{ "MOV8 $mem,$src" %}
6317 opcode(0xC6); /* C6 /0 */
6318 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
6319 ins_pipe( ialu_mem_imm );
6320 %}
6321
6322 // Store Double
6323 instruct storeDPR( memory mem, regDPR1 src) %{
6324 predicate(UseSSE<=1);
6325 match(Set mem (StoreD mem src));
6326
6327 ins_cost(100);
6328 format %{ "FST_D $mem,$src" %}
6329 opcode(0xDD); /* DD /2 */
6330 ins_encode( enc_FPR_store(mem,src) );
6331 ins_pipe( fpu_mem_reg );
6332 %}
6333
6334 // Store double does rounding on x86
6335 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6336 predicate(UseSSE<=1);
6337 match(Set mem (StoreD mem (RoundDouble src)));
6338
6339 ins_cost(100);
6340 format %{ "FST_D $mem,$src\t# round" %}
6341 opcode(0xDD); /* DD /2 */
6342 ins_encode( enc_FPR_store(mem,src) );
6343 ins_pipe( fpu_mem_reg );
6344 %}
6345
6346 // Store XMM register to memory (double-precision floating points)
6347 // MOVSD instruction
6348 instruct storeD(memory mem, regD src) %{
6349 predicate(UseSSE>=2);
6350 match(Set mem (StoreD mem src));
6351 ins_cost(95);
6352 format %{ "MOVSD $mem,$src" %}
6353 ins_encode %{
6354 __ movdbl($mem$$Address, $src$$XMMRegister);
6355 %}
6356 ins_pipe( pipe_slow );
6357 %}
6358
6359 // Store XMM register to memory (single-precision floating point)
6360 // MOVSS instruction
6361 instruct storeF(memory mem, regF src) %{
6362 predicate(UseSSE>=1);
6363 match(Set mem (StoreF mem src));
6364 ins_cost(95);
6365 format %{ "MOVSS $mem,$src" %}
6366 ins_encode %{
6367 __ movflt($mem$$Address, $src$$XMMRegister);
6368 %}
6369 ins_pipe( pipe_slow );
6370 %}
6371
6372
6373 // Store Float
6374 instruct storeFPR( memory mem, regFPR1 src) %{
6375 predicate(UseSSE==0);
6376 match(Set mem (StoreF mem src));
6377
6378 ins_cost(100);
6379 format %{ "FST_S $mem,$src" %}
6380 opcode(0xD9); /* D9 /2 */
6381 ins_encode( enc_FPR_store(mem,src) );
6382 ins_pipe( fpu_mem_reg );
6383 %}
6384
6385 // Store Float does rounding on x86
6386 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6387 predicate(UseSSE==0);
6388 match(Set mem (StoreF mem (RoundFloat src)));
6389
6390 ins_cost(100);
6391 format %{ "FST_S $mem,$src\t# round" %}
6392 opcode(0xD9); /* D9 /2 */
6393 ins_encode( enc_FPR_store(mem,src) );
6394 ins_pipe( fpu_mem_reg );
6395 %}
6396
6397 // Store Float does rounding on x86
6398 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6399 predicate(UseSSE<=1);
6400 match(Set mem (StoreF mem (ConvD2F src)));
6401
6402 ins_cost(100);
6403 format %{ "FST_S $mem,$src\t# D-round" %}
6404 opcode(0xD9); /* D9 /2 */
6405 ins_encode( enc_FPR_store(mem,src) );
6406 ins_pipe( fpu_mem_reg );
6407 %}
6408
6409 // Store immediate Float value (it is faster than store from FPU register)
6410 // The instruction usage is guarded by predicate in operand immFPR().
6411 instruct storeFPR_imm( memory mem, immFPR src) %{
6412 match(Set mem (StoreF mem src));
6413
6414 ins_cost(50);
6415 format %{ "MOV $mem,$src\t# store float" %}
6416 opcode(0xC7); /* C7 /0 */
6417 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits(src), ClearInstMark);
6418 ins_pipe( ialu_mem_imm );
6419 %}
6420
6421 // Store immediate Float value (it is faster than store from XMM register)
6422 // The instruction usage is guarded by predicate in operand immF().
6423 instruct storeF_imm( memory mem, immF src) %{
6424 match(Set mem (StoreF mem src));
6425
6426 ins_cost(50);
6427 format %{ "MOV $mem,$src\t# store float" %}
6428 opcode(0xC7); /* C7 /0 */
6429 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits(src), ClearInstMark);
6430 ins_pipe( ialu_mem_imm );
6431 %}
6432
6433 // Store Integer to stack slot
6434 instruct storeSSI(stackSlotI dst, rRegI src) %{
6435 match(Set dst src);
6436
6437 ins_cost(100);
6438 format %{ "MOV $dst,$src" %}
6439 opcode(0x89);
6440 ins_encode( OpcPRegSS( dst, src ) );
6441 ins_pipe( ialu_mem_reg );
6442 %}
6443
6444 // Store Integer to stack slot
6445 instruct storeSSP(stackSlotP dst, eRegP src) %{
6446 match(Set dst src);
6447
6448 ins_cost(100);
6449 format %{ "MOV $dst,$src" %}
6450 opcode(0x89);
6451 ins_encode( OpcPRegSS( dst, src ) );
6452 ins_pipe( ialu_mem_reg );
6453 %}
6454
6455 // Store Long to stack slot
6456 instruct storeSSL(stackSlotL dst, eRegL src) %{
6457 match(Set dst src);
6458
6459 ins_cost(200);
6460 format %{ "MOV $dst,$src.lo\n\t"
6461 "MOV $dst+4,$src.hi" %}
6462 opcode(0x89, 0x89);
6463 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
6464 ins_pipe( ialu_mem_long_reg );
6465 %}
6466
6467 //----------MemBar Instructions-----------------------------------------------
6468 // Memory barrier flavors
6469
6470 instruct membar_acquire() %{
6471 match(MemBarAcquire);
6472 match(LoadFence);
6473 ins_cost(400);
6474
6475 size(0);
6476 format %{ "MEMBAR-acquire ! (empty encoding)" %}
6477 ins_encode();
6478 ins_pipe(empty);
6479 %}
6480
6481 instruct membar_acquire_lock() %{
6482 match(MemBarAcquireLock);
6483 ins_cost(0);
6484
6485 size(0);
6486 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6487 ins_encode( );
6488 ins_pipe(empty);
6489 %}
6490
6491 instruct membar_release() %{
6492 match(MemBarRelease);
6493 match(StoreFence);
6494 ins_cost(400);
6495
6496 size(0);
6497 format %{ "MEMBAR-release ! (empty encoding)" %}
6498 ins_encode( );
6499 ins_pipe(empty);
6500 %}
6501
6502 instruct membar_release_lock() %{
6503 match(MemBarReleaseLock);
6504 ins_cost(0);
6505
6506 size(0);
6507 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6508 ins_encode( );
6509 ins_pipe(empty);
6510 %}
6511
6512 instruct membar_volatile(eFlagsReg cr) %{
6513 match(MemBarVolatile);
6514 effect(KILL cr);
6515 ins_cost(400);
6516
6517 format %{
6518 $$template
6519 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6520 %}
6521 ins_encode %{
6522 __ membar(Assembler::StoreLoad);
6523 %}
6524 ins_pipe(pipe_slow);
6525 %}
6526
6527 instruct unnecessary_membar_volatile() %{
6528 match(MemBarVolatile);
6529 predicate(Matcher::post_store_load_barrier(n));
6530 ins_cost(0);
6531
6532 size(0);
6533 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6534 ins_encode( );
6535 ins_pipe(empty);
6536 %}
6537
6538 instruct membar_storestore() %{
6539 match(MemBarStoreStore);
6540 match(StoreStoreFence);
6541 ins_cost(0);
6542
6543 size(0);
6544 format %{ "MEMBAR-storestore (empty encoding)" %}
6545 ins_encode( );
6546 ins_pipe(empty);
6547 %}
6548
6549 //----------Move Instructions--------------------------------------------------
6550 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6551 match(Set dst (CastX2P src));
6552 format %{ "# X2P $dst, $src" %}
6553 ins_encode( /*empty encoding*/ );
6554 ins_cost(0);
6555 ins_pipe(empty);
6556 %}
6557
6558 instruct castP2X(rRegI dst, eRegP src ) %{
6559 match(Set dst (CastP2X src));
6560 ins_cost(50);
6561 format %{ "MOV $dst, $src\t# CastP2X" %}
6562 ins_encode( enc_Copy( dst, src) );
6563 ins_pipe( ialu_reg_reg );
6564 %}
6565
6566 //----------Conditional Move---------------------------------------------------
6567 // Conditional move
6568 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6569 predicate(!VM_Version::supports_cmov() );
6570 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6571 ins_cost(200);
6572 format %{ "J$cop,us skip\t# signed cmove\n\t"
6573 "MOV $dst,$src\n"
6574 "skip:" %}
6575 ins_encode %{
6576 Label Lskip;
6577 // Invert sense of branch from sense of CMOV
6578 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6579 __ movl($dst$$Register, $src$$Register);
6580 __ bind(Lskip);
6581 %}
6582 ins_pipe( pipe_cmov_reg );
6583 %}
6584
6585 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6586 predicate(!VM_Version::supports_cmov() );
6587 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6588 ins_cost(200);
6589 format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6590 "MOV $dst,$src\n"
6591 "skip:" %}
6592 ins_encode %{
6593 Label Lskip;
6594 // Invert sense of branch from sense of CMOV
6595 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6596 __ movl($dst$$Register, $src$$Register);
6597 __ bind(Lskip);
6598 %}
6599 ins_pipe( pipe_cmov_reg );
6600 %}
6601
6602 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6603 predicate(VM_Version::supports_cmov() );
6604 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6605 ins_cost(200);
6606 format %{ "CMOV$cop $dst,$src" %}
6607 opcode(0x0F,0x40);
6608 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6609 ins_pipe( pipe_cmov_reg );
6610 %}
6611
6612 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6613 predicate(VM_Version::supports_cmov() );
6614 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6615 ins_cost(200);
6616 format %{ "CMOV$cop $dst,$src" %}
6617 opcode(0x0F,0x40);
6618 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6619 ins_pipe( pipe_cmov_reg );
6620 %}
6621
6622 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6623 predicate(VM_Version::supports_cmov() );
6624 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6625 ins_cost(200);
6626 expand %{
6627 cmovI_regU(cop, cr, dst, src);
6628 %}
6629 %}
6630
6631 // Conditional move
6632 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6633 predicate(VM_Version::supports_cmov() );
6634 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6635 ins_cost(250);
6636 format %{ "CMOV$cop $dst,$src" %}
6637 opcode(0x0F,0x40);
6638 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
6639 ins_pipe( pipe_cmov_mem );
6640 %}
6641
6642 // Conditional move
6643 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6644 predicate(VM_Version::supports_cmov() );
6645 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6646 ins_cost(250);
6647 format %{ "CMOV$cop $dst,$src" %}
6648 opcode(0x0F,0x40);
6649 ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
6650 ins_pipe( pipe_cmov_mem );
6651 %}
6652
6653 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6654 predicate(VM_Version::supports_cmov() );
6655 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6656 ins_cost(250);
6657 expand %{
6658 cmovI_memU(cop, cr, dst, src);
6659 %}
6660 %}
6661
6662 // Conditional move
6663 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6664 predicate(VM_Version::supports_cmov() );
6665 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6666 ins_cost(200);
6667 format %{ "CMOV$cop $dst,$src\t# ptr" %}
6668 opcode(0x0F,0x40);
6669 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6670 ins_pipe( pipe_cmov_reg );
6671 %}
6672
6673 // Conditional move (non-P6 version)
6674 // Note: a CMoveP is generated for stubs and native wrappers
6675 // regardless of whether we are on a P6, so we
6676 // emulate a cmov here
6677 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6678 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6679 ins_cost(300);
6680 format %{ "Jn$cop skip\n\t"
6681 "MOV $dst,$src\t# pointer\n"
6682 "skip:" %}
6683 opcode(0x8b);
6684 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6685 ins_pipe( pipe_cmov_reg );
6686 %}
6687
6688 // Conditional move
6689 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6690 predicate(VM_Version::supports_cmov() );
6691 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6692 ins_cost(200);
6693 format %{ "CMOV$cop $dst,$src\t# ptr" %}
6694 opcode(0x0F,0x40);
6695 ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6696 ins_pipe( pipe_cmov_reg );
6697 %}
6698
6699 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6700 predicate(VM_Version::supports_cmov() );
6701 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6702 ins_cost(200);
6703 expand %{
6704 cmovP_regU(cop, cr, dst, src);
6705 %}
6706 %}
6707
6708 // DISABLED: Requires the ADLC to emit a bottom_type call that
6709 // correctly meets the two pointer arguments; one is an incoming
6710 // register but the other is a memory operand. ALSO appears to
6711 // be buggy with implicit null checks.
6712 //
6713 //// Conditional move
6714 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6715 // predicate(VM_Version::supports_cmov() );
6716 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6717 // ins_cost(250);
6718 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
6719 // opcode(0x0F,0x40);
6720 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6721 // ins_pipe( pipe_cmov_mem );
6722 //%}
6723 //
6724 //// Conditional move
6725 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6726 // predicate(VM_Version::supports_cmov() );
6727 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6728 // ins_cost(250);
6729 // format %{ "CMOV$cop $dst,$src\t# ptr" %}
6730 // opcode(0x0F,0x40);
6731 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6732 // ins_pipe( pipe_cmov_mem );
6733 //%}
6734
6735 // Conditional move
6736 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6737 predicate(UseSSE<=1);
6738 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6739 ins_cost(200);
6740 format %{ "FCMOV$cop $dst,$src\t# double" %}
6741 opcode(0xDA);
6742 ins_encode( enc_cmov_dpr(cop,src) );
6743 ins_pipe( pipe_cmovDPR_reg );
6744 %}
6745
6746 // Conditional move
6747 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6748 predicate(UseSSE==0);
6749 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6750 ins_cost(200);
6751 format %{ "FCMOV$cop $dst,$src\t# float" %}
6752 opcode(0xDA);
6753 ins_encode( enc_cmov_dpr(cop,src) );
6754 ins_pipe( pipe_cmovDPR_reg );
6755 %}
6756
6757 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6758 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6759 predicate(UseSSE<=1);
6760 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6761 ins_cost(200);
6762 format %{ "Jn$cop skip\n\t"
6763 "MOV $dst,$src\t# double\n"
6764 "skip:" %}
6765 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
6766 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6767 ins_pipe( pipe_cmovDPR_reg );
6768 %}
6769
6770 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6771 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6772 predicate(UseSSE==0);
6773 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6774 ins_cost(200);
6775 format %{ "Jn$cop skip\n\t"
6776 "MOV $dst,$src\t# float\n"
6777 "skip:" %}
6778 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
6779 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6780 ins_pipe( pipe_cmovDPR_reg );
6781 %}
6782
6783 // No CMOVE with SSE/SSE2
6784 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6785 predicate (UseSSE>=1);
6786 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6787 ins_cost(200);
6788 format %{ "Jn$cop skip\n\t"
6789 "MOVSS $dst,$src\t# float\n"
6790 "skip:" %}
6791 ins_encode %{
6792 Label skip;
6793 // Invert sense of branch from sense of CMOV
6794 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6795 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6796 __ bind(skip);
6797 %}
6798 ins_pipe( pipe_slow );
6799 %}
6800
6801 // No CMOVE with SSE/SSE2
6802 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6803 predicate (UseSSE>=2);
6804 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6805 ins_cost(200);
6806 format %{ "Jn$cop skip\n\t"
6807 "MOVSD $dst,$src\t# float\n"
6808 "skip:" %}
6809 ins_encode %{
6810 Label skip;
6811 // Invert sense of branch from sense of CMOV
6812 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6813 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6814 __ bind(skip);
6815 %}
6816 ins_pipe( pipe_slow );
6817 %}
6818
6819 // unsigned version
6820 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6821 predicate (UseSSE>=1);
6822 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6823 ins_cost(200);
6824 format %{ "Jn$cop skip\n\t"
6825 "MOVSS $dst,$src\t# float\n"
6826 "skip:" %}
6827 ins_encode %{
6828 Label skip;
6829 // Invert sense of branch from sense of CMOV
6830 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6831 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6832 __ bind(skip);
6833 %}
6834 ins_pipe( pipe_slow );
6835 %}
6836
6837 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6838 predicate (UseSSE>=1);
6839 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6840 ins_cost(200);
6841 expand %{
6842 fcmovF_regU(cop, cr, dst, src);
6843 %}
6844 %}
6845
6846 // unsigned version
6847 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6848 predicate (UseSSE>=2);
6849 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6850 ins_cost(200);
6851 format %{ "Jn$cop skip\n\t"
6852 "MOVSD $dst,$src\t# float\n"
6853 "skip:" %}
6854 ins_encode %{
6855 Label skip;
6856 // Invert sense of branch from sense of CMOV
6857 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6858 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6859 __ bind(skip);
6860 %}
6861 ins_pipe( pipe_slow );
6862 %}
6863
6864 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6865 predicate (UseSSE>=2);
6866 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6867 ins_cost(200);
6868 expand %{
6869 fcmovD_regU(cop, cr, dst, src);
6870 %}
6871 %}
6872
6873 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6874 predicate(VM_Version::supports_cmov() );
6875 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6876 ins_cost(200);
6877 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6878 "CMOV$cop $dst.hi,$src.hi" %}
6879 opcode(0x0F,0x40);
6880 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6881 ins_pipe( pipe_cmov_reg_long );
6882 %}
6883
6884 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6885 predicate(VM_Version::supports_cmov() );
6886 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6887 ins_cost(200);
6888 format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6889 "CMOV$cop $dst.hi,$src.hi" %}
6890 opcode(0x0F,0x40);
6891 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6892 ins_pipe( pipe_cmov_reg_long );
6893 %}
6894
6895 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6896 predicate(VM_Version::supports_cmov() );
6897 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6898 ins_cost(200);
6899 expand %{
6900 cmovL_regU(cop, cr, dst, src);
6901 %}
6902 %}
6903
6904 //----------Arithmetic Instructions--------------------------------------------
6905 //----------Addition Instructions----------------------------------------------
6906
6907 // Integer Addition Instructions
6908 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
6909 match(Set dst (AddI dst src));
6910 effect(KILL cr);
6911
6912 size(2);
6913 format %{ "ADD $dst,$src" %}
6914 opcode(0x03);
6915 ins_encode( OpcP, RegReg( dst, src) );
6916 ins_pipe( ialu_reg_reg );
6917 %}
6918
6919 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
6920 match(Set dst (AddI dst src));
6921 effect(KILL cr);
6922
6923 format %{ "ADD $dst,$src" %}
6924 opcode(0x81, 0x00); /* /0 id */
6925 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
6926 ins_pipe( ialu_reg );
6927 %}
6928
6929 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
6930 predicate(UseIncDec);
6931 match(Set dst (AddI dst src));
6932 effect(KILL cr);
6933
6934 size(1);
6935 format %{ "INC $dst" %}
6936 opcode(0x40); /* */
6937 ins_encode( Opc_plus( primary, dst ) );
6938 ins_pipe( ialu_reg );
6939 %}
6940
6941 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
6942 match(Set dst (AddI src0 src1));
6943 ins_cost(110);
6944
6945 format %{ "LEA $dst,[$src0 + $src1]" %}
6946 opcode(0x8D); /* 0x8D /r */
6947 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
6948 ins_pipe( ialu_reg_reg );
6949 %}
6950
6951 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
6952 match(Set dst (AddP src0 src1));
6953 ins_cost(110);
6954
6955 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %}
6956 opcode(0x8D); /* 0x8D /r */
6957 ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
6958 ins_pipe( ialu_reg_reg );
6959 %}
6960
6961 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
6962 predicate(UseIncDec);
6963 match(Set dst (AddI dst src));
6964 effect(KILL cr);
6965
6966 size(1);
6967 format %{ "DEC $dst" %}
6968 opcode(0x48); /* */
6969 ins_encode( Opc_plus( primary, dst ) );
6970 ins_pipe( ialu_reg );
6971 %}
6972
6973 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
6974 match(Set dst (AddP dst src));
6975 effect(KILL cr);
6976
6977 size(2);
6978 format %{ "ADD $dst,$src" %}
6979 opcode(0x03);
6980 ins_encode( OpcP, RegReg( dst, src) );
6981 ins_pipe( ialu_reg_reg );
6982 %}
6983
6984 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
6985 match(Set dst (AddP dst src));
6986 effect(KILL cr);
6987
6988 format %{ "ADD $dst,$src" %}
6989 opcode(0x81,0x00); /* Opcode 81 /0 id */
6990 // ins_encode( RegImm( dst, src) );
6991 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
6992 ins_pipe( ialu_reg );
6993 %}
6994
6995 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
6996 match(Set dst (AddI dst (LoadI src)));
6997 effect(KILL cr);
6998
6999 ins_cost(150);
7000 format %{ "ADD $dst,$src" %}
7001 opcode(0x03);
7002 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
7003 ins_pipe( ialu_reg_mem );
7004 %}
7005
7006 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7007 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7008 effect(KILL cr);
7009
7010 ins_cost(150);
7011 format %{ "ADD $dst,$src" %}
7012 opcode(0x01); /* Opcode 01 /r */
7013 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
7014 ins_pipe( ialu_mem_reg );
7015 %}
7016
7017 // Add Memory with Immediate
7018 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7019 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7020 effect(KILL cr);
7021
7022 ins_cost(125);
7023 format %{ "ADD $dst,$src" %}
7024 opcode(0x81); /* Opcode 81 /0 id */
7025 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark );
7026 ins_pipe( ialu_mem_imm );
7027 %}
7028
7029 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
7030 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7031 effect(KILL cr);
7032
7033 ins_cost(125);
7034 format %{ "INC $dst" %}
7035 opcode(0xFF); /* Opcode FF /0 */
7036 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark);
7037 ins_pipe( ialu_mem_imm );
7038 %}
7039
7040 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7041 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7042 effect(KILL cr);
7043
7044 ins_cost(125);
7045 format %{ "DEC $dst" %}
7046 opcode(0xFF); /* Opcode FF /1 */
7047 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark);
7048 ins_pipe( ialu_mem_imm );
7049 %}
7050
7051
7052 instruct checkCastPP( eRegP dst ) %{
7053 match(Set dst (CheckCastPP dst));
7054
7055 size(0);
7056 format %{ "#checkcastPP of $dst" %}
7057 ins_encode( /*empty encoding*/ );
7058 ins_pipe( empty );
7059 %}
7060
7061 instruct castPP( eRegP dst ) %{
7062 match(Set dst (CastPP dst));
7063 format %{ "#castPP of $dst" %}
7064 ins_encode( /*empty encoding*/ );
7065 ins_pipe( empty );
7066 %}
7067
7068 instruct castII( rRegI dst ) %{
7069 match(Set dst (CastII dst));
7070 format %{ "#castII of $dst" %}
7071 ins_encode( /*empty encoding*/ );
7072 ins_cost(0);
7073 ins_pipe( empty );
7074 %}
7075
7076 instruct castLL( eRegL dst ) %{
7077 match(Set dst (CastLL dst));
7078 format %{ "#castLL of $dst" %}
7079 ins_encode( /*empty encoding*/ );
7080 ins_cost(0);
7081 ins_pipe( empty );
7082 %}
7083
7084 instruct castFF( regF dst ) %{
7085 predicate(UseSSE >= 1);
7086 match(Set dst (CastFF dst));
7087 format %{ "#castFF of $dst" %}
7088 ins_encode( /*empty encoding*/ );
7089 ins_cost(0);
7090 ins_pipe( empty );
7091 %}
7092
7093 instruct castDD( regD dst ) %{
7094 predicate(UseSSE >= 2);
7095 match(Set dst (CastDD dst));
7096 format %{ "#castDD of $dst" %}
7097 ins_encode( /*empty encoding*/ );
7098 ins_cost(0);
7099 ins_pipe( empty );
7100 %}
7101
7102 instruct castFF_PR( regFPR dst ) %{
7103 predicate(UseSSE < 1);
7104 match(Set dst (CastFF dst));
7105 format %{ "#castFF of $dst" %}
7106 ins_encode( /*empty encoding*/ );
7107 ins_cost(0);
7108 ins_pipe( empty );
7109 %}
7110
7111 instruct castDD_PR( regDPR dst ) %{
7112 predicate(UseSSE < 2);
7113 match(Set dst (CastDD dst));
7114 format %{ "#castDD of $dst" %}
7115 ins_encode( /*empty encoding*/ );
7116 ins_cost(0);
7117 ins_pipe( empty );
7118 %}
7119
7120 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7121
7122 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7123 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7124 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7125 effect(KILL cr, KILL oldval);
7126 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7127 "MOV $res,0\n\t"
7128 "JNE,s fail\n\t"
7129 "MOV $res,1\n"
7130 "fail:" %}
7131 ins_encode( enc_cmpxchg8(mem_ptr),
7132 enc_flags_ne_to_boolean(res) );
7133 ins_pipe( pipe_cmpxchg );
7134 %}
7135
7136 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7137 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7138 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7139 effect(KILL cr, KILL oldval);
7140 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7141 "MOV $res,0\n\t"
7142 "JNE,s fail\n\t"
7143 "MOV $res,1\n"
7144 "fail:" %}
7145 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7146 ins_pipe( pipe_cmpxchg );
7147 %}
7148
7149 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7150 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7151 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7152 effect(KILL cr, KILL oldval);
7153 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7154 "MOV $res,0\n\t"
7155 "JNE,s fail\n\t"
7156 "MOV $res,1\n"
7157 "fail:" %}
7158 ins_encode( enc_cmpxchgb(mem_ptr),
7159 enc_flags_ne_to_boolean(res) );
7160 ins_pipe( pipe_cmpxchg );
7161 %}
7162
7163 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7164 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7165 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7166 effect(KILL cr, KILL oldval);
7167 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7168 "MOV $res,0\n\t"
7169 "JNE,s fail\n\t"
7170 "MOV $res,1\n"
7171 "fail:" %}
7172 ins_encode( enc_cmpxchgw(mem_ptr),
7173 enc_flags_ne_to_boolean(res) );
7174 ins_pipe( pipe_cmpxchg );
7175 %}
7176
7177 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7178 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7179 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7180 effect(KILL cr, KILL oldval);
7181 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7182 "MOV $res,0\n\t"
7183 "JNE,s fail\n\t"
7184 "MOV $res,1\n"
7185 "fail:" %}
7186 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7187 ins_pipe( pipe_cmpxchg );
7188 %}
7189
7190 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7191 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7192 effect(KILL cr);
7193 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7194 ins_encode( enc_cmpxchg8(mem_ptr) );
7195 ins_pipe( pipe_cmpxchg );
7196 %}
7197
7198 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7199 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7200 effect(KILL cr);
7201 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7202 ins_encode( enc_cmpxchg(mem_ptr) );
7203 ins_pipe( pipe_cmpxchg );
7204 %}
7205
7206 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7207 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7208 effect(KILL cr);
7209 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7210 ins_encode( enc_cmpxchgb(mem_ptr) );
7211 ins_pipe( pipe_cmpxchg );
7212 %}
7213
7214 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7215 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7216 effect(KILL cr);
7217 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7218 ins_encode( enc_cmpxchgw(mem_ptr) );
7219 ins_pipe( pipe_cmpxchg );
7220 %}
7221
7222 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7223 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7224 effect(KILL cr);
7225 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7226 ins_encode( enc_cmpxchg(mem_ptr) );
7227 ins_pipe( pipe_cmpxchg );
7228 %}
7229
7230 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7231 predicate(n->as_LoadStore()->result_not_used());
7232 match(Set dummy (GetAndAddB mem add));
7233 effect(KILL cr);
7234 format %{ "ADDB [$mem],$add" %}
7235 ins_encode %{
7236 __ lock();
7237 __ addb($mem$$Address, $add$$constant);
7238 %}
7239 ins_pipe( pipe_cmpxchg );
7240 %}
7241
7242 // Important to match to xRegI: only 8-bit regs.
7243 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7244 match(Set newval (GetAndAddB mem newval));
7245 effect(KILL cr);
7246 format %{ "XADDB [$mem],$newval" %}
7247 ins_encode %{
7248 __ lock();
7249 __ xaddb($mem$$Address, $newval$$Register);
7250 %}
7251 ins_pipe( pipe_cmpxchg );
7252 %}
7253
7254 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7255 predicate(n->as_LoadStore()->result_not_used());
7256 match(Set dummy (GetAndAddS mem add));
7257 effect(KILL cr);
7258 format %{ "ADDS [$mem],$add" %}
7259 ins_encode %{
7260 __ lock();
7261 __ addw($mem$$Address, $add$$constant);
7262 %}
7263 ins_pipe( pipe_cmpxchg );
7264 %}
7265
7266 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7267 match(Set newval (GetAndAddS mem newval));
7268 effect(KILL cr);
7269 format %{ "XADDS [$mem],$newval" %}
7270 ins_encode %{
7271 __ lock();
7272 __ xaddw($mem$$Address, $newval$$Register);
7273 %}
7274 ins_pipe( pipe_cmpxchg );
7275 %}
7276
7277 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7278 predicate(n->as_LoadStore()->result_not_used());
7279 match(Set dummy (GetAndAddI mem add));
7280 effect(KILL cr);
7281 format %{ "ADDL [$mem],$add" %}
7282 ins_encode %{
7283 __ lock();
7284 __ addl($mem$$Address, $add$$constant);
7285 %}
7286 ins_pipe( pipe_cmpxchg );
7287 %}
7288
7289 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7290 match(Set newval (GetAndAddI mem newval));
7291 effect(KILL cr);
7292 format %{ "XADDL [$mem],$newval" %}
7293 ins_encode %{
7294 __ lock();
7295 __ xaddl($mem$$Address, $newval$$Register);
7296 %}
7297 ins_pipe( pipe_cmpxchg );
7298 %}
7299
7300 // Important to match to xRegI: only 8-bit regs.
7301 instruct xchgB( memory mem, xRegI newval) %{
7302 match(Set newval (GetAndSetB mem newval));
7303 format %{ "XCHGB $newval,[$mem]" %}
7304 ins_encode %{
7305 __ xchgb($newval$$Register, $mem$$Address);
7306 %}
7307 ins_pipe( pipe_cmpxchg );
7308 %}
7309
7310 instruct xchgS( memory mem, rRegI newval) %{
7311 match(Set newval (GetAndSetS mem newval));
7312 format %{ "XCHGW $newval,[$mem]" %}
7313 ins_encode %{
7314 __ xchgw($newval$$Register, $mem$$Address);
7315 %}
7316 ins_pipe( pipe_cmpxchg );
7317 %}
7318
7319 instruct xchgI( memory mem, rRegI newval) %{
7320 match(Set newval (GetAndSetI mem newval));
7321 format %{ "XCHGL $newval,[$mem]" %}
7322 ins_encode %{
7323 __ xchgl($newval$$Register, $mem$$Address);
7324 %}
7325 ins_pipe( pipe_cmpxchg );
7326 %}
7327
7328 instruct xchgP( memory mem, pRegP newval) %{
7329 match(Set newval (GetAndSetP mem newval));
7330 format %{ "XCHGL $newval,[$mem]" %}
7331 ins_encode %{
7332 __ xchgl($newval$$Register, $mem$$Address);
7333 %}
7334 ins_pipe( pipe_cmpxchg );
7335 %}
7336
7337 //----------Subtraction Instructions-------------------------------------------
7338
7339 // Integer Subtraction Instructions
7340 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7341 match(Set dst (SubI dst src));
7342 effect(KILL cr);
7343
7344 size(2);
7345 format %{ "SUB $dst,$src" %}
7346 opcode(0x2B);
7347 ins_encode( OpcP, RegReg( dst, src) );
7348 ins_pipe( ialu_reg_reg );
7349 %}
7350
7351 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7352 match(Set dst (SubI dst src));
7353 effect(KILL cr);
7354
7355 format %{ "SUB $dst,$src" %}
7356 opcode(0x81,0x05); /* Opcode 81 /5 */
7357 // ins_encode( RegImm( dst, src) );
7358 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7359 ins_pipe( ialu_reg );
7360 %}
7361
7362 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7363 match(Set dst (SubI dst (LoadI src)));
7364 effect(KILL cr);
7365
7366 ins_cost(150);
7367 format %{ "SUB $dst,$src" %}
7368 opcode(0x2B);
7369 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
7370 ins_pipe( ialu_reg_mem );
7371 %}
7372
7373 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7374 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7375 effect(KILL cr);
7376
7377 ins_cost(150);
7378 format %{ "SUB $dst,$src" %}
7379 opcode(0x29); /* Opcode 29 /r */
7380 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
7381 ins_pipe( ialu_mem_reg );
7382 %}
7383
7384 // Subtract from a pointer
7385 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
7386 match(Set dst (AddP dst (SubI zero src)));
7387 effect(KILL cr);
7388
7389 size(2);
7390 format %{ "SUB $dst,$src" %}
7391 opcode(0x2B);
7392 ins_encode( OpcP, RegReg( dst, src) );
7393 ins_pipe( ialu_reg_reg );
7394 %}
7395
7396 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
7397 match(Set dst (SubI zero dst));
7398 effect(KILL cr);
7399
7400 size(2);
7401 format %{ "NEG $dst" %}
7402 opcode(0xF7,0x03); // Opcode F7 /3
7403 ins_encode( OpcP, RegOpc( dst ) );
7404 ins_pipe( ialu_reg );
7405 %}
7406
7407 //----------Multiplication/Division Instructions-------------------------------
7408 // Integer Multiplication Instructions
7409 // Multiply Register
7410 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7411 match(Set dst (MulI dst src));
7412 effect(KILL cr);
7413
7414 size(3);
7415 ins_cost(300);
7416 format %{ "IMUL $dst,$src" %}
7417 opcode(0xAF, 0x0F);
7418 ins_encode( OpcS, OpcP, RegReg( dst, src) );
7419 ins_pipe( ialu_reg_reg_alu0 );
7420 %}
7421
7422 // Multiply 32-bit Immediate
7423 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7424 match(Set dst (MulI src imm));
7425 effect(KILL cr);
7426
7427 ins_cost(300);
7428 format %{ "IMUL $dst,$src,$imm" %}
7429 opcode(0x69); /* 69 /r id */
7430 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7431 ins_pipe( ialu_reg_reg_alu0 );
7432 %}
7433
7434 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7435 match(Set dst src);
7436 effect(KILL cr);
7437
7438 // Note that this is artificially increased to make it more expensive than loadConL
7439 ins_cost(250);
7440 format %{ "MOV EAX,$src\t// low word only" %}
7441 opcode(0xB8);
7442 ins_encode( LdImmL_Lo(dst, src) );
7443 ins_pipe( ialu_reg_fat );
7444 %}
7445
7446 // Multiply by 32-bit Immediate, taking the shifted high order results
7447 // (special case for shift by 32)
7448 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7449 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7450 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7451 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7452 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7453 effect(USE src1, KILL cr);
7454
7455 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7456 ins_cost(0*100 + 1*400 - 150);
7457 format %{ "IMUL EDX:EAX,$src1" %}
7458 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7459 ins_pipe( pipe_slow );
7460 %}
7461
7462 // Multiply by 32-bit Immediate, taking the shifted high order results
7463 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7464 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7465 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7466 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7467 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7468 effect(USE src1, KILL cr);
7469
7470 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7471 ins_cost(1*100 + 1*400 - 150);
7472 format %{ "IMUL EDX:EAX,$src1\n\t"
7473 "SAR EDX,$cnt-32" %}
7474 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7475 ins_pipe( pipe_slow );
7476 %}
7477
7478 // Multiply Memory 32-bit Immediate
7479 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7480 match(Set dst (MulI (LoadI src) imm));
7481 effect(KILL cr);
7482
7483 ins_cost(300);
7484 format %{ "IMUL $dst,$src,$imm" %}
7485 opcode(0x69); /* 69 /r id */
7486 ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark );
7487 ins_pipe( ialu_reg_mem_alu0 );
7488 %}
7489
7490 // Multiply Memory
7491 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7492 match(Set dst (MulI dst (LoadI src)));
7493 effect(KILL cr);
7494
7495 ins_cost(350);
7496 format %{ "IMUL $dst,$src" %}
7497 opcode(0xAF, 0x0F);
7498 ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark );
7499 ins_pipe( ialu_reg_mem_alu0 );
7500 %}
7501
7502 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7503 %{
7504 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7505 effect(KILL cr, KILL src2);
7506
7507 expand %{ mulI_eReg(dst, src1, cr);
7508 mulI_eReg(src2, src3, cr);
7509 addI_eReg(dst, src2, cr); %}
7510 %}
7511
7512 // Multiply Register Int to Long
7513 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7514 // Basic Idea: long = (long)int * (long)int
7515 match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7516 effect(DEF dst, USE src, USE src1, KILL flags);
7517
7518 ins_cost(300);
7519 format %{ "IMUL $dst,$src1" %}
7520
7521 ins_encode( long_int_multiply( dst, src1 ) );
7522 ins_pipe( ialu_reg_reg_alu0 );
7523 %}
7524
7525 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7526 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
7527 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7528 effect(KILL flags);
7529
7530 ins_cost(300);
7531 format %{ "MUL $dst,$src1" %}
7532
7533 ins_encode( long_uint_multiply(dst, src1) );
7534 ins_pipe( ialu_reg_reg_alu0 );
7535 %}
7536
7537 // Multiply Register Long
7538 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7539 match(Set dst (MulL dst src));
7540 effect(KILL cr, TEMP tmp);
7541 ins_cost(4*100+3*400);
7542 // Basic idea: lo(result) = lo(x_lo * y_lo)
7543 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7544 format %{ "MOV $tmp,$src.lo\n\t"
7545 "IMUL $tmp,EDX\n\t"
7546 "MOV EDX,$src.hi\n\t"
7547 "IMUL EDX,EAX\n\t"
7548 "ADD $tmp,EDX\n\t"
7549 "MUL EDX:EAX,$src.lo\n\t"
7550 "ADD EDX,$tmp" %}
7551 ins_encode( long_multiply( dst, src, tmp ) );
7552 ins_pipe( pipe_slow );
7553 %}
7554
7555 // Multiply Register Long where the left operand's high 32 bits are zero
7556 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7557 predicate(is_operand_hi32_zero(n->in(1)));
7558 match(Set dst (MulL dst src));
7559 effect(KILL cr, TEMP tmp);
7560 ins_cost(2*100+2*400);
7561 // Basic idea: lo(result) = lo(x_lo * y_lo)
7562 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7563 format %{ "MOV $tmp,$src.hi\n\t"
7564 "IMUL $tmp,EAX\n\t"
7565 "MUL EDX:EAX,$src.lo\n\t"
7566 "ADD EDX,$tmp" %}
7567 ins_encode %{
7568 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7569 __ imull($tmp$$Register, rax);
7570 __ mull($src$$Register);
7571 __ addl(rdx, $tmp$$Register);
7572 %}
7573 ins_pipe( pipe_slow );
7574 %}
7575
7576 // Multiply Register Long where the right operand's high 32 bits are zero
7577 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7578 predicate(is_operand_hi32_zero(n->in(2)));
7579 match(Set dst (MulL dst src));
7580 effect(KILL cr, TEMP tmp);
7581 ins_cost(2*100+2*400);
7582 // Basic idea: lo(result) = lo(x_lo * y_lo)
7583 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7584 format %{ "MOV $tmp,$src.lo\n\t"
7585 "IMUL $tmp,EDX\n\t"
7586 "MUL EDX:EAX,$src.lo\n\t"
7587 "ADD EDX,$tmp" %}
7588 ins_encode %{
7589 __ movl($tmp$$Register, $src$$Register);
7590 __ imull($tmp$$Register, rdx);
7591 __ mull($src$$Register);
7592 __ addl(rdx, $tmp$$Register);
7593 %}
7594 ins_pipe( pipe_slow );
7595 %}
7596
7597 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7598 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7599 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7600 match(Set dst (MulL dst src));
7601 effect(KILL cr);
7602 ins_cost(1*400);
7603 // Basic idea: lo(result) = lo(x_lo * y_lo)
7604 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7605 format %{ "MUL EDX:EAX,$src.lo\n\t" %}
7606 ins_encode %{
7607 __ mull($src$$Register);
7608 %}
7609 ins_pipe( pipe_slow );
7610 %}
7611
7612 // Multiply Register Long by small constant
7613 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7614 match(Set dst (MulL dst src));
7615 effect(KILL cr, TEMP tmp);
7616 ins_cost(2*100+2*400);
7617 size(12);
7618 // Basic idea: lo(result) = lo(src * EAX)
7619 // hi(result) = hi(src * EAX) + lo(src * EDX)
7620 format %{ "IMUL $tmp,EDX,$src\n\t"
7621 "MOV EDX,$src\n\t"
7622 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t"
7623 "ADD EDX,$tmp" %}
7624 ins_encode( long_multiply_con( dst, src, tmp ) );
7625 ins_pipe( pipe_slow );
7626 %}
7627
7628 // Integer DIV with Register
7629 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7630 match(Set rax (DivI rax div));
7631 effect(KILL rdx, KILL cr);
7632 size(26);
7633 ins_cost(30*100+10*100);
7634 format %{ "CMP EAX,0x80000000\n\t"
7635 "JNE,s normal\n\t"
7636 "XOR EDX,EDX\n\t"
7637 "CMP ECX,-1\n\t"
7638 "JE,s done\n"
7639 "normal: CDQ\n\t"
7640 "IDIV $div\n\t"
7641 "done:" %}
7642 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7643 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7644 ins_pipe( ialu_reg_reg_alu0 );
7645 %}
7646
7647 // Divide Register Long
7648 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7649 match(Set dst (DivL src1 src2));
7650 effect(CALL);
7651 ins_cost(10000);
7652 format %{ "PUSH $src1.hi\n\t"
7653 "PUSH $src1.lo\n\t"
7654 "PUSH $src2.hi\n\t"
7655 "PUSH $src2.lo\n\t"
7656 "CALL SharedRuntime::ldiv\n\t"
7657 "ADD ESP,16" %}
7658 ins_encode( long_div(src1,src2) );
7659 ins_pipe( pipe_slow );
7660 %}
7661
7662 // Integer DIVMOD with Register, both quotient and mod results
7663 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7664 match(DivModI rax div);
7665 effect(KILL cr);
7666 size(26);
7667 ins_cost(30*100+10*100);
7668 format %{ "CMP EAX,0x80000000\n\t"
7669 "JNE,s normal\n\t"
7670 "XOR EDX,EDX\n\t"
7671 "CMP ECX,-1\n\t"
7672 "JE,s done\n"
7673 "normal: CDQ\n\t"
7674 "IDIV $div\n\t"
7675 "done:" %}
7676 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7677 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7678 ins_pipe( pipe_slow );
7679 %}
7680
7681 // Integer MOD with Register
7682 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7683 match(Set rdx (ModI rax div));
7684 effect(KILL rax, KILL cr);
7685
7686 size(26);
7687 ins_cost(300);
7688 format %{ "CDQ\n\t"
7689 "IDIV $div" %}
7690 opcode(0xF7, 0x7); /* Opcode F7 /7 */
7691 ins_encode( cdq_enc, OpcP, RegOpc(div) );
7692 ins_pipe( ialu_reg_reg_alu0 );
7693 %}
7694
7695 // Remainder Register Long
7696 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7697 match(Set dst (ModL src1 src2));
7698 effect(CALL);
7699 ins_cost(10000);
7700 format %{ "PUSH $src1.hi\n\t"
7701 "PUSH $src1.lo\n\t"
7702 "PUSH $src2.hi\n\t"
7703 "PUSH $src2.lo\n\t"
7704 "CALL SharedRuntime::lrem\n\t"
7705 "ADD ESP,16" %}
7706 ins_encode( long_mod(src1,src2) );
7707 ins_pipe( pipe_slow );
7708 %}
7709
7710 // Divide Register Long (no special case since divisor != -1)
7711 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7712 match(Set dst (DivL dst imm));
7713 effect( TEMP tmp, TEMP tmp2, KILL cr );
7714 ins_cost(1000);
7715 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7716 "XOR $tmp2,$tmp2\n\t"
7717 "CMP $tmp,EDX\n\t"
7718 "JA,s fast\n\t"
7719 "MOV $tmp2,EAX\n\t"
7720 "MOV EAX,EDX\n\t"
7721 "MOV EDX,0\n\t"
7722 "JLE,s pos\n\t"
7723 "LNEG EAX : $tmp2\n\t"
7724 "DIV $tmp # unsigned division\n\t"
7725 "XCHG EAX,$tmp2\n\t"
7726 "DIV $tmp\n\t"
7727 "LNEG $tmp2 : EAX\n\t"
7728 "JMP,s done\n"
7729 "pos:\n\t"
7730 "DIV $tmp\n\t"
7731 "XCHG EAX,$tmp2\n"
7732 "fast:\n\t"
7733 "DIV $tmp\n"
7734 "done:\n\t"
7735 "MOV EDX,$tmp2\n\t"
7736 "NEG EDX:EAX # if $imm < 0" %}
7737 ins_encode %{
7738 int con = (int)$imm$$constant;
7739 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7740 int pcon = (con > 0) ? con : -con;
7741 Label Lfast, Lpos, Ldone;
7742
7743 __ movl($tmp$$Register, pcon);
7744 __ xorl($tmp2$$Register,$tmp2$$Register);
7745 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7746 __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7747
7748 __ movl($tmp2$$Register, $dst$$Register); // save
7749 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7750 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7751 __ jccb(Assembler::lessEqual, Lpos); // result is positive
7752
7753 // Negative dividend.
7754 // convert value to positive to use unsigned division
7755 __ lneg($dst$$Register, $tmp2$$Register);
7756 __ divl($tmp$$Register);
7757 __ xchgl($dst$$Register, $tmp2$$Register);
7758 __ divl($tmp$$Register);
7759 // revert result back to negative
7760 __ lneg($tmp2$$Register, $dst$$Register);
7761 __ jmpb(Ldone);
7762
7763 __ bind(Lpos);
7764 __ divl($tmp$$Register); // Use unsigned division
7765 __ xchgl($dst$$Register, $tmp2$$Register);
7766 // Fallthrow for final divide, tmp2 has 32 bit hi result
7767
7768 __ bind(Lfast);
7769 // fast path: src is positive
7770 __ divl($tmp$$Register); // Use unsigned division
7771
7772 __ bind(Ldone);
7773 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7774 if (con < 0) {
7775 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7776 }
7777 %}
7778 ins_pipe( pipe_slow );
7779 %}
7780
7781 // Remainder Register Long (remainder fit into 32 bits)
7782 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7783 match(Set dst (ModL dst imm));
7784 effect( TEMP tmp, TEMP tmp2, KILL cr );
7785 ins_cost(1000);
7786 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7787 "CMP $tmp,EDX\n\t"
7788 "JA,s fast\n\t"
7789 "MOV $tmp2,EAX\n\t"
7790 "MOV EAX,EDX\n\t"
7791 "MOV EDX,0\n\t"
7792 "JLE,s pos\n\t"
7793 "LNEG EAX : $tmp2\n\t"
7794 "DIV $tmp # unsigned division\n\t"
7795 "MOV EAX,$tmp2\n\t"
7796 "DIV $tmp\n\t"
7797 "NEG EDX\n\t"
7798 "JMP,s done\n"
7799 "pos:\n\t"
7800 "DIV $tmp\n\t"
7801 "MOV EAX,$tmp2\n"
7802 "fast:\n\t"
7803 "DIV $tmp\n"
7804 "done:\n\t"
7805 "MOV EAX,EDX\n\t"
7806 "SAR EDX,31\n\t" %}
7807 ins_encode %{
7808 int con = (int)$imm$$constant;
7809 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7810 int pcon = (con > 0) ? con : -con;
7811 Label Lfast, Lpos, Ldone;
7812
7813 __ movl($tmp$$Register, pcon);
7814 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7815 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7816
7817 __ movl($tmp2$$Register, $dst$$Register); // save
7818 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7819 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7820 __ jccb(Assembler::lessEqual, Lpos); // result is positive
7821
7822 // Negative dividend.
7823 // convert value to positive to use unsigned division
7824 __ lneg($dst$$Register, $tmp2$$Register);
7825 __ divl($tmp$$Register);
7826 __ movl($dst$$Register, $tmp2$$Register);
7827 __ divl($tmp$$Register);
7828 // revert remainder back to negative
7829 __ negl(HIGH_FROM_LOW($dst$$Register));
7830 __ jmpb(Ldone);
7831
7832 __ bind(Lpos);
7833 __ divl($tmp$$Register);
7834 __ movl($dst$$Register, $tmp2$$Register);
7835
7836 __ bind(Lfast);
7837 // fast path: src is positive
7838 __ divl($tmp$$Register);
7839
7840 __ bind(Ldone);
7841 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7842 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7843
7844 %}
7845 ins_pipe( pipe_slow );
7846 %}
7847
7848 // Integer Shift Instructions
7849 // Shift Left by one
7850 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
7851 match(Set dst (LShiftI dst shift));
7852 effect(KILL cr);
7853
7854 size(2);
7855 format %{ "SHL $dst,$shift" %}
7856 opcode(0xD1, 0x4); /* D1 /4 */
7857 ins_encode( OpcP, RegOpc( dst ) );
7858 ins_pipe( ialu_reg );
7859 %}
7860
7861 // Shift Left by 8-bit immediate
7862 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7863 match(Set dst (LShiftI dst shift));
7864 effect(KILL cr);
7865
7866 size(3);
7867 format %{ "SHL $dst,$shift" %}
7868 opcode(0xC1, 0x4); /* C1 /4 ib */
7869 ins_encode( RegOpcImm( dst, shift) );
7870 ins_pipe( ialu_reg );
7871 %}
7872
7873 // Shift Left by variable
7874 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7875 match(Set dst (LShiftI dst shift));
7876 effect(KILL cr);
7877
7878 size(2);
7879 format %{ "SHL $dst,$shift" %}
7880 opcode(0xD3, 0x4); /* D3 /4 */
7881 ins_encode( OpcP, RegOpc( dst ) );
7882 ins_pipe( ialu_reg_reg );
7883 %}
7884
7885 // Arithmetic shift right by one
7886 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
7887 match(Set dst (RShiftI dst shift));
7888 effect(KILL cr);
7889
7890 size(2);
7891 format %{ "SAR $dst,$shift" %}
7892 opcode(0xD1, 0x7); /* D1 /7 */
7893 ins_encode( OpcP, RegOpc( dst ) );
7894 ins_pipe( ialu_reg );
7895 %}
7896
7897 // Arithmetic shift right by one
7898 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
7899 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7900 effect(KILL cr);
7901 format %{ "SAR $dst,$shift" %}
7902 opcode(0xD1, 0x7); /* D1 /7 */
7903 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark );
7904 ins_pipe( ialu_mem_imm );
7905 %}
7906
7907 // Arithmetic Shift Right by 8-bit immediate
7908 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7909 match(Set dst (RShiftI dst shift));
7910 effect(KILL cr);
7911
7912 size(3);
7913 format %{ "SAR $dst,$shift" %}
7914 opcode(0xC1, 0x7); /* C1 /7 ib */
7915 ins_encode( RegOpcImm( dst, shift ) );
7916 ins_pipe( ialu_mem_imm );
7917 %}
7918
7919 // Arithmetic Shift Right by 8-bit immediate
7920 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7921 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7922 effect(KILL cr);
7923
7924 format %{ "SAR $dst,$shift" %}
7925 opcode(0xC1, 0x7); /* C1 /7 ib */
7926 ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark );
7927 ins_pipe( ialu_mem_imm );
7928 %}
7929
7930 // Arithmetic Shift Right by variable
7931 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7932 match(Set dst (RShiftI dst shift));
7933 effect(KILL cr);
7934
7935 size(2);
7936 format %{ "SAR $dst,$shift" %}
7937 opcode(0xD3, 0x7); /* D3 /7 */
7938 ins_encode( OpcP, RegOpc( dst ) );
7939 ins_pipe( ialu_reg_reg );
7940 %}
7941
7942 // Logical shift right by one
7943 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
7944 match(Set dst (URShiftI dst shift));
7945 effect(KILL cr);
7946
7947 size(2);
7948 format %{ "SHR $dst,$shift" %}
7949 opcode(0xD1, 0x5); /* D1 /5 */
7950 ins_encode( OpcP, RegOpc( dst ) );
7951 ins_pipe( ialu_reg );
7952 %}
7953
7954 // Logical Shift Right by 8-bit immediate
7955 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7956 match(Set dst (URShiftI dst shift));
7957 effect(KILL cr);
7958
7959 size(3);
7960 format %{ "SHR $dst,$shift" %}
7961 opcode(0xC1, 0x5); /* C1 /5 ib */
7962 ins_encode( RegOpcImm( dst, shift) );
7963 ins_pipe( ialu_reg );
7964 %}
7965
7966
7967 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7968 // This idiom is used by the compiler for the i2b bytecode.
7969 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7970 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7971
7972 size(3);
7973 format %{ "MOVSX $dst,$src :8" %}
7974 ins_encode %{
7975 __ movsbl($dst$$Register, $src$$Register);
7976 %}
7977 ins_pipe(ialu_reg_reg);
7978 %}
7979
7980 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7981 // This idiom is used by the compiler the i2s bytecode.
7982 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7983 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7984
7985 size(3);
7986 format %{ "MOVSX $dst,$src :16" %}
7987 ins_encode %{
7988 __ movswl($dst$$Register, $src$$Register);
7989 %}
7990 ins_pipe(ialu_reg_reg);
7991 %}
7992
7993
7994 // Logical Shift Right by variable
7995 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7996 match(Set dst (URShiftI dst shift));
7997 effect(KILL cr);
7998
7999 size(2);
8000 format %{ "SHR $dst,$shift" %}
8001 opcode(0xD3, 0x5); /* D3 /5 */
8002 ins_encode( OpcP, RegOpc( dst ) );
8003 ins_pipe( ialu_reg_reg );
8004 %}
8005
8006
8007 //----------Logical Instructions-----------------------------------------------
8008 //----------Integer Logical Instructions---------------------------------------
8009 // And Instructions
8010 // And Register with Register
8011 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8012 match(Set dst (AndI dst src));
8013 effect(KILL cr);
8014
8015 size(2);
8016 format %{ "AND $dst,$src" %}
8017 opcode(0x23);
8018 ins_encode( OpcP, RegReg( dst, src) );
8019 ins_pipe( ialu_reg_reg );
8020 %}
8021
8022 // And Register with Immediate
8023 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8024 match(Set dst (AndI dst src));
8025 effect(KILL cr);
8026
8027 format %{ "AND $dst,$src" %}
8028 opcode(0x81,0x04); /* Opcode 81 /4 */
8029 // ins_encode( RegImm( dst, src) );
8030 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8031 ins_pipe( ialu_reg );
8032 %}
8033
8034 // And Register with Memory
8035 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8036 match(Set dst (AndI dst (LoadI src)));
8037 effect(KILL cr);
8038
8039 ins_cost(150);
8040 format %{ "AND $dst,$src" %}
8041 opcode(0x23);
8042 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
8043 ins_pipe( ialu_reg_mem );
8044 %}
8045
8046 // And Memory with Register
8047 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8048 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8049 effect(KILL cr);
8050
8051 ins_cost(150);
8052 format %{ "AND $dst,$src" %}
8053 opcode(0x21); /* Opcode 21 /r */
8054 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
8055 ins_pipe( ialu_mem_reg );
8056 %}
8057
8058 // And Memory with Immediate
8059 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8060 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8061 effect(KILL cr);
8062
8063 ins_cost(125);
8064 format %{ "AND $dst,$src" %}
8065 opcode(0x81, 0x4); /* Opcode 81 /4 id */
8066 // ins_encode( MemImm( dst, src) );
8067 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
8068 ins_pipe( ialu_mem_imm );
8069 %}
8070
8071 // BMI1 instructions
8072 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8073 match(Set dst (AndI (XorI src1 minus_1) src2));
8074 predicate(UseBMI1Instructions);
8075 effect(KILL cr);
8076
8077 format %{ "ANDNL $dst, $src1, $src2" %}
8078
8079 ins_encode %{
8080 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8081 %}
8082 ins_pipe(ialu_reg);
8083 %}
8084
8085 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8086 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8087 predicate(UseBMI1Instructions);
8088 effect(KILL cr);
8089
8090 ins_cost(125);
8091 format %{ "ANDNL $dst, $src1, $src2" %}
8092
8093 ins_encode %{
8094 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8095 %}
8096 ins_pipe(ialu_reg_mem);
8097 %}
8098
8099 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
8100 match(Set dst (AndI (SubI imm_zero src) src));
8101 predicate(UseBMI1Instructions);
8102 effect(KILL cr);
8103
8104 format %{ "BLSIL $dst, $src" %}
8105
8106 ins_encode %{
8107 __ blsil($dst$$Register, $src$$Register);
8108 %}
8109 ins_pipe(ialu_reg);
8110 %}
8111
8112 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
8113 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8114 predicate(UseBMI1Instructions);
8115 effect(KILL cr);
8116
8117 ins_cost(125);
8118 format %{ "BLSIL $dst, $src" %}
8119
8120 ins_encode %{
8121 __ blsil($dst$$Register, $src$$Address);
8122 %}
8123 ins_pipe(ialu_reg_mem);
8124 %}
8125
8126 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8127 %{
8128 match(Set dst (XorI (AddI src minus_1) src));
8129 predicate(UseBMI1Instructions);
8130 effect(KILL cr);
8131
8132 format %{ "BLSMSKL $dst, $src" %}
8133
8134 ins_encode %{
8135 __ blsmskl($dst$$Register, $src$$Register);
8136 %}
8137
8138 ins_pipe(ialu_reg);
8139 %}
8140
8141 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8142 %{
8143 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8144 predicate(UseBMI1Instructions);
8145 effect(KILL cr);
8146
8147 ins_cost(125);
8148 format %{ "BLSMSKL $dst, $src" %}
8149
8150 ins_encode %{
8151 __ blsmskl($dst$$Register, $src$$Address);
8152 %}
8153
8154 ins_pipe(ialu_reg_mem);
8155 %}
8156
8157 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8158 %{
8159 match(Set dst (AndI (AddI src minus_1) src) );
8160 predicate(UseBMI1Instructions);
8161 effect(KILL cr);
8162
8163 format %{ "BLSRL $dst, $src" %}
8164
8165 ins_encode %{
8166 __ blsrl($dst$$Register, $src$$Register);
8167 %}
8168
8169 ins_pipe(ialu_reg);
8170 %}
8171
8172 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8173 %{
8174 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8175 predicate(UseBMI1Instructions);
8176 effect(KILL cr);
8177
8178 ins_cost(125);
8179 format %{ "BLSRL $dst, $src" %}
8180
8181 ins_encode %{
8182 __ blsrl($dst$$Register, $src$$Address);
8183 %}
8184
8185 ins_pipe(ialu_reg_mem);
8186 %}
8187
8188 // Or Instructions
8189 // Or Register with Register
8190 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8191 match(Set dst (OrI dst src));
8192 effect(KILL cr);
8193
8194 size(2);
8195 format %{ "OR $dst,$src" %}
8196 opcode(0x0B);
8197 ins_encode( OpcP, RegReg( dst, src) );
8198 ins_pipe( ialu_reg_reg );
8199 %}
8200
8201 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8202 match(Set dst (OrI dst (CastP2X src)));
8203 effect(KILL cr);
8204
8205 size(2);
8206 format %{ "OR $dst,$src" %}
8207 opcode(0x0B);
8208 ins_encode( OpcP, RegReg( dst, src) );
8209 ins_pipe( ialu_reg_reg );
8210 %}
8211
8212
8213 // Or Register with Immediate
8214 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8215 match(Set dst (OrI dst src));
8216 effect(KILL cr);
8217
8218 format %{ "OR $dst,$src" %}
8219 opcode(0x81,0x01); /* Opcode 81 /1 id */
8220 // ins_encode( RegImm( dst, src) );
8221 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8222 ins_pipe( ialu_reg );
8223 %}
8224
8225 // Or Register with Memory
8226 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8227 match(Set dst (OrI dst (LoadI src)));
8228 effect(KILL cr);
8229
8230 ins_cost(150);
8231 format %{ "OR $dst,$src" %}
8232 opcode(0x0B);
8233 ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
8234 ins_pipe( ialu_reg_mem );
8235 %}
8236
8237 // Or Memory with Register
8238 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8239 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8240 effect(KILL cr);
8241
8242 ins_cost(150);
8243 format %{ "OR $dst,$src" %}
8244 opcode(0x09); /* Opcode 09 /r */
8245 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
8246 ins_pipe( ialu_mem_reg );
8247 %}
8248
8249 // Or Memory with Immediate
8250 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8251 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8252 effect(KILL cr);
8253
8254 ins_cost(125);
8255 format %{ "OR $dst,$src" %}
8256 opcode(0x81,0x1); /* Opcode 81 /1 id */
8257 // ins_encode( MemImm( dst, src) );
8258 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
8259 ins_pipe( ialu_mem_imm );
8260 %}
8261
8262 // ROL/ROR
8263 // ROL expand
8264 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8265 effect(USE_DEF dst, USE shift, KILL cr);
8266
8267 format %{ "ROL $dst, $shift" %}
8268 opcode(0xD1, 0x0); /* Opcode D1 /0 */
8269 ins_encode( OpcP, RegOpc( dst ));
8270 ins_pipe( ialu_reg );
8271 %}
8272
8273 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8274 effect(USE_DEF dst, USE shift, KILL cr);
8275
8276 format %{ "ROL $dst, $shift" %}
8277 opcode(0xC1, 0x0); /*Opcode /C1 /0 */
8278 ins_encode( RegOpcImm(dst, shift) );
8279 ins_pipe(ialu_reg);
8280 %}
8281
8282 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8283 effect(USE_DEF dst, USE shift, KILL cr);
8284
8285 format %{ "ROL $dst, $shift" %}
8286 opcode(0xD3, 0x0); /* Opcode D3 /0 */
8287 ins_encode(OpcP, RegOpc(dst));
8288 ins_pipe( ialu_reg_reg );
8289 %}
8290 // end of ROL expand
8291
8292 // ROL 32bit by one once
8293 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8294 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8295
8296 expand %{
8297 rolI_eReg_imm1(dst, lshift, cr);
8298 %}
8299 %}
8300
8301 // ROL 32bit var by imm8 once
8302 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8303 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8304 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8305
8306 expand %{
8307 rolI_eReg_imm8(dst, lshift, cr);
8308 %}
8309 %}
8310
8311 // ROL 32bit var by var once
8312 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8313 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8314
8315 expand %{
8316 rolI_eReg_CL(dst, shift, cr);
8317 %}
8318 %}
8319
8320 // ROL 32bit var by var once
8321 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8322 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8323
8324 expand %{
8325 rolI_eReg_CL(dst, shift, cr);
8326 %}
8327 %}
8328
8329 // ROR expand
8330 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8331 effect(USE_DEF dst, USE shift, KILL cr);
8332
8333 format %{ "ROR $dst, $shift" %}
8334 opcode(0xD1,0x1); /* Opcode D1 /1 */
8335 ins_encode( OpcP, RegOpc( dst ) );
8336 ins_pipe( ialu_reg );
8337 %}
8338
8339 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8340 effect (USE_DEF dst, USE shift, KILL cr);
8341
8342 format %{ "ROR $dst, $shift" %}
8343 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8344 ins_encode( RegOpcImm(dst, shift) );
8345 ins_pipe( ialu_reg );
8346 %}
8347
8348 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8349 effect(USE_DEF dst, USE shift, KILL cr);
8350
8351 format %{ "ROR $dst, $shift" %}
8352 opcode(0xD3, 0x1); /* Opcode D3 /1 */
8353 ins_encode(OpcP, RegOpc(dst));
8354 ins_pipe( ialu_reg_reg );
8355 %}
8356 // end of ROR expand
8357
8358 // ROR right once
8359 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8360 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8361
8362 expand %{
8363 rorI_eReg_imm1(dst, rshift, cr);
8364 %}
8365 %}
8366
8367 // ROR 32bit by immI8 once
8368 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8369 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8370 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8371
8372 expand %{
8373 rorI_eReg_imm8(dst, rshift, cr);
8374 %}
8375 %}
8376
8377 // ROR 32bit var by var once
8378 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8379 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8380
8381 expand %{
8382 rorI_eReg_CL(dst, shift, cr);
8383 %}
8384 %}
8385
8386 // ROR 32bit var by var once
8387 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8388 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8389
8390 expand %{
8391 rorI_eReg_CL(dst, shift, cr);
8392 %}
8393 %}
8394
8395 // Xor Instructions
8396 // Xor Register with Register
8397 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8398 match(Set dst (XorI dst src));
8399 effect(KILL cr);
8400
8401 size(2);
8402 format %{ "XOR $dst,$src" %}
8403 opcode(0x33);
8404 ins_encode( OpcP, RegReg( dst, src) );
8405 ins_pipe( ialu_reg_reg );
8406 %}
8407
8408 // Xor Register with Immediate -1
8409 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8410 match(Set dst (XorI dst imm));
8411
8412 size(2);
8413 format %{ "NOT $dst" %}
8414 ins_encode %{
8415 __ notl($dst$$Register);
8416 %}
8417 ins_pipe( ialu_reg );
8418 %}
8419
8420 // Xor Register with Immediate
8421 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8422 match(Set dst (XorI dst src));
8423 effect(KILL cr);
8424
8425 format %{ "XOR $dst,$src" %}
8426 opcode(0x81,0x06); /* Opcode 81 /6 id */
8427 // ins_encode( RegImm( dst, src) );
8428 ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8429 ins_pipe( ialu_reg );
8430 %}
8431
8432 // Xor Register with Memory
8433 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8434 match(Set dst (XorI dst (LoadI src)));
8435 effect(KILL cr);
8436
8437 ins_cost(150);
8438 format %{ "XOR $dst,$src" %}
8439 opcode(0x33);
8440 ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark );
8441 ins_pipe( ialu_reg_mem );
8442 %}
8443
8444 // Xor Memory with Register
8445 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8446 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8447 effect(KILL cr);
8448
8449 ins_cost(150);
8450 format %{ "XOR $dst,$src" %}
8451 opcode(0x31); /* Opcode 31 /r */
8452 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
8453 ins_pipe( ialu_mem_reg );
8454 %}
8455
8456 // Xor Memory with Immediate
8457 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8458 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8459 effect(KILL cr);
8460
8461 ins_cost(125);
8462 format %{ "XOR $dst,$src" %}
8463 opcode(0x81,0x6); /* Opcode 81 /6 id */
8464 ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
8465 ins_pipe( ialu_mem_imm );
8466 %}
8467
8468 //----------Convert Int to Boolean---------------------------------------------
8469
8470 instruct movI_nocopy(rRegI dst, rRegI src) %{
8471 effect( DEF dst, USE src );
8472 format %{ "MOV $dst,$src" %}
8473 ins_encode( enc_Copy( dst, src) );
8474 ins_pipe( ialu_reg_reg );
8475 %}
8476
8477 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8478 effect( USE_DEF dst, USE src, KILL cr );
8479
8480 size(4);
8481 format %{ "NEG $dst\n\t"
8482 "ADC $dst,$src" %}
8483 ins_encode( neg_reg(dst),
8484 OpcRegReg(0x13,dst,src) );
8485 ins_pipe( ialu_reg_reg_long );
8486 %}
8487
8488 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8489 match(Set dst (Conv2B src));
8490
8491 expand %{
8492 movI_nocopy(dst,src);
8493 ci2b(dst,src,cr);
8494 %}
8495 %}
8496
8497 instruct movP_nocopy(rRegI dst, eRegP src) %{
8498 effect( DEF dst, USE src );
8499 format %{ "MOV $dst,$src" %}
8500 ins_encode( enc_Copy( dst, src) );
8501 ins_pipe( ialu_reg_reg );
8502 %}
8503
8504 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8505 effect( USE_DEF dst, USE src, KILL cr );
8506 format %{ "NEG $dst\n\t"
8507 "ADC $dst,$src" %}
8508 ins_encode( neg_reg(dst),
8509 OpcRegReg(0x13,dst,src) );
8510 ins_pipe( ialu_reg_reg_long );
8511 %}
8512
8513 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8514 match(Set dst (Conv2B src));
8515
8516 expand %{
8517 movP_nocopy(dst,src);
8518 cp2b(dst,src,cr);
8519 %}
8520 %}
8521
8522 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8523 match(Set dst (CmpLTMask p q));
8524 effect(KILL cr);
8525 ins_cost(400);
8526
8527 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8528 format %{ "XOR $dst,$dst\n\t"
8529 "CMP $p,$q\n\t"
8530 "SETlt $dst\n\t"
8531 "NEG $dst" %}
8532 ins_encode %{
8533 Register Rp = $p$$Register;
8534 Register Rq = $q$$Register;
8535 Register Rd = $dst$$Register;
8536 Label done;
8537 __ xorl(Rd, Rd);
8538 __ cmpl(Rp, Rq);
8539 __ setb(Assembler::less, Rd);
8540 __ negl(Rd);
8541 %}
8542
8543 ins_pipe(pipe_slow);
8544 %}
8545
8546 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
8547 match(Set dst (CmpLTMask dst zero));
8548 effect(DEF dst, KILL cr);
8549 ins_cost(100);
8550
8551 format %{ "SAR $dst,31\t# cmpLTMask0" %}
8552 ins_encode %{
8553 __ sarl($dst$$Register, 31);
8554 %}
8555 ins_pipe(ialu_reg);
8556 %}
8557
8558 /* better to save a register than avoid a branch */
8559 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8560 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8561 effect(KILL cr);
8562 ins_cost(400);
8563 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t"
8564 "JGE done\n\t"
8565 "ADD $p,$y\n"
8566 "done: " %}
8567 ins_encode %{
8568 Register Rp = $p$$Register;
8569 Register Rq = $q$$Register;
8570 Register Ry = $y$$Register;
8571 Label done;
8572 __ subl(Rp, Rq);
8573 __ jccb(Assembler::greaterEqual, done);
8574 __ addl(Rp, Ry);
8575 __ bind(done);
8576 %}
8577
8578 ins_pipe(pipe_cmplt);
8579 %}
8580
8581 /* better to save a register than avoid a branch */
8582 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8583 match(Set y (AndI (CmpLTMask p q) y));
8584 effect(KILL cr);
8585
8586 ins_cost(300);
8587
8588 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t"
8589 "JLT done\n\t"
8590 "XORL $y, $y\n"
8591 "done: " %}
8592 ins_encode %{
8593 Register Rp = $p$$Register;
8594 Register Rq = $q$$Register;
8595 Register Ry = $y$$Register;
8596 Label done;
8597 __ cmpl(Rp, Rq);
8598 __ jccb(Assembler::less, done);
8599 __ xorl(Ry, Ry);
8600 __ bind(done);
8601 %}
8602
8603 ins_pipe(pipe_cmplt);
8604 %}
8605
8606 /* If I enable this, I encourage spilling in the inner loop of compress.
8607 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8608 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8609 */
8610 //----------Overflow Math Instructions-----------------------------------------
8611
8612 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8613 %{
8614 match(Set cr (OverflowAddI op1 op2));
8615 effect(DEF cr, USE_KILL op1, USE op2);
8616
8617 format %{ "ADD $op1, $op2\t# overflow check int" %}
8618
8619 ins_encode %{
8620 __ addl($op1$$Register, $op2$$Register);
8621 %}
8622 ins_pipe(ialu_reg_reg);
8623 %}
8624
8625 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8626 %{
8627 match(Set cr (OverflowAddI op1 op2));
8628 effect(DEF cr, USE_KILL op1, USE op2);
8629
8630 format %{ "ADD $op1, $op2\t# overflow check int" %}
8631
8632 ins_encode %{
8633 __ addl($op1$$Register, $op2$$constant);
8634 %}
8635 ins_pipe(ialu_reg_reg);
8636 %}
8637
8638 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8639 %{
8640 match(Set cr (OverflowSubI op1 op2));
8641
8642 format %{ "CMP $op1, $op2\t# overflow check int" %}
8643 ins_encode %{
8644 __ cmpl($op1$$Register, $op2$$Register);
8645 %}
8646 ins_pipe(ialu_reg_reg);
8647 %}
8648
8649 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8650 %{
8651 match(Set cr (OverflowSubI op1 op2));
8652
8653 format %{ "CMP $op1, $op2\t# overflow check int" %}
8654 ins_encode %{
8655 __ cmpl($op1$$Register, $op2$$constant);
8656 %}
8657 ins_pipe(ialu_reg_reg);
8658 %}
8659
8660 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
8661 %{
8662 match(Set cr (OverflowSubI zero op2));
8663 effect(DEF cr, USE_KILL op2);
8664
8665 format %{ "NEG $op2\t# overflow check int" %}
8666 ins_encode %{
8667 __ negl($op2$$Register);
8668 %}
8669 ins_pipe(ialu_reg_reg);
8670 %}
8671
8672 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8673 %{
8674 match(Set cr (OverflowMulI op1 op2));
8675 effect(DEF cr, USE_KILL op1, USE op2);
8676
8677 format %{ "IMUL $op1, $op2\t# overflow check int" %}
8678 ins_encode %{
8679 __ imull($op1$$Register, $op2$$Register);
8680 %}
8681 ins_pipe(ialu_reg_reg_alu0);
8682 %}
8683
8684 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8685 %{
8686 match(Set cr (OverflowMulI op1 op2));
8687 effect(DEF cr, TEMP tmp, USE op1, USE op2);
8688
8689 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %}
8690 ins_encode %{
8691 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8692 %}
8693 ins_pipe(ialu_reg_reg_alu0);
8694 %}
8695
8696 // Integer Absolute Instructions
8697 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8698 %{
8699 match(Set dst (AbsI src));
8700 effect(TEMP dst, TEMP tmp, KILL cr);
8701 format %{ "movl $tmp, $src\n\t"
8702 "sarl $tmp, 31\n\t"
8703 "movl $dst, $src\n\t"
8704 "xorl $dst, $tmp\n\t"
8705 "subl $dst, $tmp\n"
8706 %}
8707 ins_encode %{
8708 __ movl($tmp$$Register, $src$$Register);
8709 __ sarl($tmp$$Register, 31);
8710 __ movl($dst$$Register, $src$$Register);
8711 __ xorl($dst$$Register, $tmp$$Register);
8712 __ subl($dst$$Register, $tmp$$Register);
8713 %}
8714
8715 ins_pipe(ialu_reg_reg);
8716 %}
8717
8718 //----------Long Instructions------------------------------------------------
8719 // Add Long Register with Register
8720 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8721 match(Set dst (AddL dst src));
8722 effect(KILL cr);
8723 ins_cost(200);
8724 format %{ "ADD $dst.lo,$src.lo\n\t"
8725 "ADC $dst.hi,$src.hi" %}
8726 opcode(0x03, 0x13);
8727 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8728 ins_pipe( ialu_reg_reg_long );
8729 %}
8730
8731 // Add Long Register with Immediate
8732 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8733 match(Set dst (AddL dst src));
8734 effect(KILL cr);
8735 format %{ "ADD $dst.lo,$src.lo\n\t"
8736 "ADC $dst.hi,$src.hi" %}
8737 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */
8738 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8739 ins_pipe( ialu_reg_long );
8740 %}
8741
8742 // Add Long Register with Memory
8743 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8744 match(Set dst (AddL dst (LoadL mem)));
8745 effect(KILL cr);
8746 ins_cost(125);
8747 format %{ "ADD $dst.lo,$mem\n\t"
8748 "ADC $dst.hi,$mem+4" %}
8749 opcode(0x03, 0x13);
8750 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
8751 ins_pipe( ialu_reg_long_mem );
8752 %}
8753
8754 // Subtract Long Register with Register.
8755 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8756 match(Set dst (SubL dst src));
8757 effect(KILL cr);
8758 ins_cost(200);
8759 format %{ "SUB $dst.lo,$src.lo\n\t"
8760 "SBB $dst.hi,$src.hi" %}
8761 opcode(0x2B, 0x1B);
8762 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8763 ins_pipe( ialu_reg_reg_long );
8764 %}
8765
8766 // Subtract Long Register with Immediate
8767 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8768 match(Set dst (SubL dst src));
8769 effect(KILL cr);
8770 format %{ "SUB $dst.lo,$src.lo\n\t"
8771 "SBB $dst.hi,$src.hi" %}
8772 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */
8773 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8774 ins_pipe( ialu_reg_long );
8775 %}
8776
8777 // Subtract Long Register with Memory
8778 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8779 match(Set dst (SubL dst (LoadL mem)));
8780 effect(KILL cr);
8781 ins_cost(125);
8782 format %{ "SUB $dst.lo,$mem\n\t"
8783 "SBB $dst.hi,$mem+4" %}
8784 opcode(0x2B, 0x1B);
8785 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
8786 ins_pipe( ialu_reg_long_mem );
8787 %}
8788
8789 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8790 match(Set dst (SubL zero dst));
8791 effect(KILL cr);
8792 ins_cost(300);
8793 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %}
8794 ins_encode( neg_long(dst) );
8795 ins_pipe( ialu_reg_reg_long );
8796 %}
8797
8798 // And Long Register with Register
8799 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8800 match(Set dst (AndL dst src));
8801 effect(KILL cr);
8802 format %{ "AND $dst.lo,$src.lo\n\t"
8803 "AND $dst.hi,$src.hi" %}
8804 opcode(0x23,0x23);
8805 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8806 ins_pipe( ialu_reg_reg_long );
8807 %}
8808
8809 // And Long Register with Immediate
8810 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8811 match(Set dst (AndL dst src));
8812 effect(KILL cr);
8813 format %{ "AND $dst.lo,$src.lo\n\t"
8814 "AND $dst.hi,$src.hi" %}
8815 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */
8816 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8817 ins_pipe( ialu_reg_long );
8818 %}
8819
8820 // And Long Register with Memory
8821 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8822 match(Set dst (AndL dst (LoadL mem)));
8823 effect(KILL cr);
8824 ins_cost(125);
8825 format %{ "AND $dst.lo,$mem\n\t"
8826 "AND $dst.hi,$mem+4" %}
8827 opcode(0x23, 0x23);
8828 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
8829 ins_pipe( ialu_reg_long_mem );
8830 %}
8831
8832 // BMI1 instructions
8833 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8834 match(Set dst (AndL (XorL src1 minus_1) src2));
8835 predicate(UseBMI1Instructions);
8836 effect(KILL cr, TEMP dst);
8837
8838 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t"
8839 "ANDNL $dst.hi, $src1.hi, $src2.hi"
8840 %}
8841
8842 ins_encode %{
8843 Register Rdst = $dst$$Register;
8844 Register Rsrc1 = $src1$$Register;
8845 Register Rsrc2 = $src2$$Register;
8846 __ andnl(Rdst, Rsrc1, Rsrc2);
8847 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8848 %}
8849 ins_pipe(ialu_reg_reg_long);
8850 %}
8851
8852 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8853 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8854 predicate(UseBMI1Instructions);
8855 effect(KILL cr, TEMP dst);
8856
8857 ins_cost(125);
8858 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t"
8859 "ANDNL $dst.hi, $src1.hi, $src2+4"
8860 %}
8861
8862 ins_encode %{
8863 Register Rdst = $dst$$Register;
8864 Register Rsrc1 = $src1$$Register;
8865 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8866
8867 __ andnl(Rdst, Rsrc1, $src2$$Address);
8868 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8869 %}
8870 ins_pipe(ialu_reg_mem);
8871 %}
8872
8873 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8874 match(Set dst (AndL (SubL imm_zero src) src));
8875 predicate(UseBMI1Instructions);
8876 effect(KILL cr, TEMP dst);
8877
8878 format %{ "MOVL $dst.hi, 0\n\t"
8879 "BLSIL $dst.lo, $src.lo\n\t"
8880 "JNZ done\n\t"
8881 "BLSIL $dst.hi, $src.hi\n"
8882 "done:"
8883 %}
8884
8885 ins_encode %{
8886 Label done;
8887 Register Rdst = $dst$$Register;
8888 Register Rsrc = $src$$Register;
8889 __ movl(HIGH_FROM_LOW(Rdst), 0);
8890 __ blsil(Rdst, Rsrc);
8891 __ jccb(Assembler::notZero, done);
8892 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8893 __ bind(done);
8894 %}
8895 ins_pipe(ialu_reg);
8896 %}
8897
8898 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8899 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8900 predicate(UseBMI1Instructions);
8901 effect(KILL cr, TEMP dst);
8902
8903 ins_cost(125);
8904 format %{ "MOVL $dst.hi, 0\n\t"
8905 "BLSIL $dst.lo, $src\n\t"
8906 "JNZ done\n\t"
8907 "BLSIL $dst.hi, $src+4\n"
8908 "done:"
8909 %}
8910
8911 ins_encode %{
8912 Label done;
8913 Register Rdst = $dst$$Register;
8914 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8915
8916 __ movl(HIGH_FROM_LOW(Rdst), 0);
8917 __ blsil(Rdst, $src$$Address);
8918 __ jccb(Assembler::notZero, done);
8919 __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8920 __ bind(done);
8921 %}
8922 ins_pipe(ialu_reg_mem);
8923 %}
8924
8925 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8926 %{
8927 match(Set dst (XorL (AddL src minus_1) src));
8928 predicate(UseBMI1Instructions);
8929 effect(KILL cr, TEMP dst);
8930
8931 format %{ "MOVL $dst.hi, 0\n\t"
8932 "BLSMSKL $dst.lo, $src.lo\n\t"
8933 "JNC done\n\t"
8934 "BLSMSKL $dst.hi, $src.hi\n"
8935 "done:"
8936 %}
8937
8938 ins_encode %{
8939 Label done;
8940 Register Rdst = $dst$$Register;
8941 Register Rsrc = $src$$Register;
8942 __ movl(HIGH_FROM_LOW(Rdst), 0);
8943 __ blsmskl(Rdst, Rsrc);
8944 __ jccb(Assembler::carryClear, done);
8945 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8946 __ bind(done);
8947 %}
8948
8949 ins_pipe(ialu_reg);
8950 %}
8951
8952 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8953 %{
8954 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8955 predicate(UseBMI1Instructions);
8956 effect(KILL cr, TEMP dst);
8957
8958 ins_cost(125);
8959 format %{ "MOVL $dst.hi, 0\n\t"
8960 "BLSMSKL $dst.lo, $src\n\t"
8961 "JNC done\n\t"
8962 "BLSMSKL $dst.hi, $src+4\n"
8963 "done:"
8964 %}
8965
8966 ins_encode %{
8967 Label done;
8968 Register Rdst = $dst$$Register;
8969 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8970
8971 __ movl(HIGH_FROM_LOW(Rdst), 0);
8972 __ blsmskl(Rdst, $src$$Address);
8973 __ jccb(Assembler::carryClear, done);
8974 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8975 __ bind(done);
8976 %}
8977
8978 ins_pipe(ialu_reg_mem);
8979 %}
8980
8981 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8982 %{
8983 match(Set dst (AndL (AddL src minus_1) src) );
8984 predicate(UseBMI1Instructions);
8985 effect(KILL cr, TEMP dst);
8986
8987 format %{ "MOVL $dst.hi, $src.hi\n\t"
8988 "BLSRL $dst.lo, $src.lo\n\t"
8989 "JNC done\n\t"
8990 "BLSRL $dst.hi, $src.hi\n"
8991 "done:"
8992 %}
8993
8994 ins_encode %{
8995 Label done;
8996 Register Rdst = $dst$$Register;
8997 Register Rsrc = $src$$Register;
8998 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8999 __ blsrl(Rdst, Rsrc);
9000 __ jccb(Assembler::carryClear, done);
9001 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9002 __ bind(done);
9003 %}
9004
9005 ins_pipe(ialu_reg);
9006 %}
9007
9008 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9009 %{
9010 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9011 predicate(UseBMI1Instructions);
9012 effect(KILL cr, TEMP dst);
9013
9014 ins_cost(125);
9015 format %{ "MOVL $dst.hi, $src+4\n\t"
9016 "BLSRL $dst.lo, $src\n\t"
9017 "JNC done\n\t"
9018 "BLSRL $dst.hi, $src+4\n"
9019 "done:"
9020 %}
9021
9022 ins_encode %{
9023 Label done;
9024 Register Rdst = $dst$$Register;
9025 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9026 __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9027 __ blsrl(Rdst, $src$$Address);
9028 __ jccb(Assembler::carryClear, done);
9029 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9030 __ bind(done);
9031 %}
9032
9033 ins_pipe(ialu_reg_mem);
9034 %}
9035
9036 // Or Long Register with Register
9037 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9038 match(Set dst (OrL dst src));
9039 effect(KILL cr);
9040 format %{ "OR $dst.lo,$src.lo\n\t"
9041 "OR $dst.hi,$src.hi" %}
9042 opcode(0x0B,0x0B);
9043 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9044 ins_pipe( ialu_reg_reg_long );
9045 %}
9046
9047 // Or Long Register with Immediate
9048 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9049 match(Set dst (OrL dst src));
9050 effect(KILL cr);
9051 format %{ "OR $dst.lo,$src.lo\n\t"
9052 "OR $dst.hi,$src.hi" %}
9053 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */
9054 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9055 ins_pipe( ialu_reg_long );
9056 %}
9057
9058 // Or Long Register with Memory
9059 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9060 match(Set dst (OrL dst (LoadL mem)));
9061 effect(KILL cr);
9062 ins_cost(125);
9063 format %{ "OR $dst.lo,$mem\n\t"
9064 "OR $dst.hi,$mem+4" %}
9065 opcode(0x0B,0x0B);
9066 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
9067 ins_pipe( ialu_reg_long_mem );
9068 %}
9069
9070 // Xor Long Register with Register
9071 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9072 match(Set dst (XorL dst src));
9073 effect(KILL cr);
9074 format %{ "XOR $dst.lo,$src.lo\n\t"
9075 "XOR $dst.hi,$src.hi" %}
9076 opcode(0x33,0x33);
9077 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9078 ins_pipe( ialu_reg_reg_long );
9079 %}
9080
9081 // Xor Long Register with Immediate -1
9082 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9083 match(Set dst (XorL dst imm));
9084 format %{ "NOT $dst.lo\n\t"
9085 "NOT $dst.hi" %}
9086 ins_encode %{
9087 __ notl($dst$$Register);
9088 __ notl(HIGH_FROM_LOW($dst$$Register));
9089 %}
9090 ins_pipe( ialu_reg_long );
9091 %}
9092
9093 // Xor Long Register with Immediate
9094 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9095 match(Set dst (XorL dst src));
9096 effect(KILL cr);
9097 format %{ "XOR $dst.lo,$src.lo\n\t"
9098 "XOR $dst.hi,$src.hi" %}
9099 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */
9100 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9101 ins_pipe( ialu_reg_long );
9102 %}
9103
9104 // Xor Long Register with Memory
9105 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9106 match(Set dst (XorL dst (LoadL mem)));
9107 effect(KILL cr);
9108 ins_cost(125);
9109 format %{ "XOR $dst.lo,$mem\n\t"
9110 "XOR $dst.hi,$mem+4" %}
9111 opcode(0x33,0x33);
9112 ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
9113 ins_pipe( ialu_reg_long_mem );
9114 %}
9115
9116 // Shift Left Long by 1
9117 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9118 predicate(UseNewLongLShift);
9119 match(Set dst (LShiftL dst cnt));
9120 effect(KILL cr);
9121 ins_cost(100);
9122 format %{ "ADD $dst.lo,$dst.lo\n\t"
9123 "ADC $dst.hi,$dst.hi" %}
9124 ins_encode %{
9125 __ addl($dst$$Register,$dst$$Register);
9126 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9127 %}
9128 ins_pipe( ialu_reg_long );
9129 %}
9130
9131 // Shift Left Long by 2
9132 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9133 predicate(UseNewLongLShift);
9134 match(Set dst (LShiftL dst cnt));
9135 effect(KILL cr);
9136 ins_cost(100);
9137 format %{ "ADD $dst.lo,$dst.lo\n\t"
9138 "ADC $dst.hi,$dst.hi\n\t"
9139 "ADD $dst.lo,$dst.lo\n\t"
9140 "ADC $dst.hi,$dst.hi" %}
9141 ins_encode %{
9142 __ addl($dst$$Register,$dst$$Register);
9143 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9144 __ addl($dst$$Register,$dst$$Register);
9145 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9146 %}
9147 ins_pipe( ialu_reg_long );
9148 %}
9149
9150 // Shift Left Long by 3
9151 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9152 predicate(UseNewLongLShift);
9153 match(Set dst (LShiftL dst cnt));
9154 effect(KILL cr);
9155 ins_cost(100);
9156 format %{ "ADD $dst.lo,$dst.lo\n\t"
9157 "ADC $dst.hi,$dst.hi\n\t"
9158 "ADD $dst.lo,$dst.lo\n\t"
9159 "ADC $dst.hi,$dst.hi\n\t"
9160 "ADD $dst.lo,$dst.lo\n\t"
9161 "ADC $dst.hi,$dst.hi" %}
9162 ins_encode %{
9163 __ addl($dst$$Register,$dst$$Register);
9164 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9165 __ addl($dst$$Register,$dst$$Register);
9166 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9167 __ addl($dst$$Register,$dst$$Register);
9168 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9169 %}
9170 ins_pipe( ialu_reg_long );
9171 %}
9172
9173 // Shift Left Long by 1-31
9174 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9175 match(Set dst (LShiftL dst cnt));
9176 effect(KILL cr);
9177 ins_cost(200);
9178 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t"
9179 "SHL $dst.lo,$cnt" %}
9180 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */
9181 ins_encode( move_long_small_shift(dst,cnt) );
9182 ins_pipe( ialu_reg_long );
9183 %}
9184
9185 // Shift Left Long by 32-63
9186 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9187 match(Set dst (LShiftL dst cnt));
9188 effect(KILL cr);
9189 ins_cost(300);
9190 format %{ "MOV $dst.hi,$dst.lo\n"
9191 "\tSHL $dst.hi,$cnt-32\n"
9192 "\tXOR $dst.lo,$dst.lo" %}
9193 opcode(0xC1, 0x4); /* C1 /4 ib */
9194 ins_encode( move_long_big_shift_clr(dst,cnt) );
9195 ins_pipe( ialu_reg_long );
9196 %}
9197
9198 // Shift Left Long by variable
9199 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9200 match(Set dst (LShiftL dst shift));
9201 effect(KILL cr);
9202 ins_cost(500+200);
9203 size(17);
9204 format %{ "TEST $shift,32\n\t"
9205 "JEQ,s small\n\t"
9206 "MOV $dst.hi,$dst.lo\n\t"
9207 "XOR $dst.lo,$dst.lo\n"
9208 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t"
9209 "SHL $dst.lo,$shift" %}
9210 ins_encode( shift_left_long( dst, shift ) );
9211 ins_pipe( pipe_slow );
9212 %}
9213
9214 // Shift Right Long by 1-31
9215 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9216 match(Set dst (URShiftL dst cnt));
9217 effect(KILL cr);
9218 ins_cost(200);
9219 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9220 "SHR $dst.hi,$cnt" %}
9221 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */
9222 ins_encode( move_long_small_shift(dst,cnt) );
9223 ins_pipe( ialu_reg_long );
9224 %}
9225
9226 // Shift Right Long by 32-63
9227 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9228 match(Set dst (URShiftL dst cnt));
9229 effect(KILL cr);
9230 ins_cost(300);
9231 format %{ "MOV $dst.lo,$dst.hi\n"
9232 "\tSHR $dst.lo,$cnt-32\n"
9233 "\tXOR $dst.hi,$dst.hi" %}
9234 opcode(0xC1, 0x5); /* C1 /5 ib */
9235 ins_encode( move_long_big_shift_clr(dst,cnt) );
9236 ins_pipe( ialu_reg_long );
9237 %}
9238
9239 // Shift Right Long by variable
9240 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9241 match(Set dst (URShiftL dst shift));
9242 effect(KILL cr);
9243 ins_cost(600);
9244 size(17);
9245 format %{ "TEST $shift,32\n\t"
9246 "JEQ,s small\n\t"
9247 "MOV $dst.lo,$dst.hi\n\t"
9248 "XOR $dst.hi,$dst.hi\n"
9249 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9250 "SHR $dst.hi,$shift" %}
9251 ins_encode( shift_right_long( dst, shift ) );
9252 ins_pipe( pipe_slow );
9253 %}
9254
9255 // Shift Right Long by 1-31
9256 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9257 match(Set dst (RShiftL dst cnt));
9258 effect(KILL cr);
9259 ins_cost(200);
9260 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9261 "SAR $dst.hi,$cnt" %}
9262 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */
9263 ins_encode( move_long_small_shift(dst,cnt) );
9264 ins_pipe( ialu_reg_long );
9265 %}
9266
9267 // Shift Right Long by 32-63
9268 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9269 match(Set dst (RShiftL dst cnt));
9270 effect(KILL cr);
9271 ins_cost(300);
9272 format %{ "MOV $dst.lo,$dst.hi\n"
9273 "\tSAR $dst.lo,$cnt-32\n"
9274 "\tSAR $dst.hi,31" %}
9275 opcode(0xC1, 0x7); /* C1 /7 ib */
9276 ins_encode( move_long_big_shift_sign(dst,cnt) );
9277 ins_pipe( ialu_reg_long );
9278 %}
9279
9280 // Shift Right arithmetic Long by variable
9281 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9282 match(Set dst (RShiftL dst shift));
9283 effect(KILL cr);
9284 ins_cost(600);
9285 size(18);
9286 format %{ "TEST $shift,32\n\t"
9287 "JEQ,s small\n\t"
9288 "MOV $dst.lo,$dst.hi\n\t"
9289 "SAR $dst.hi,31\n"
9290 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9291 "SAR $dst.hi,$shift" %}
9292 ins_encode( shift_right_arith_long( dst, shift ) );
9293 ins_pipe( pipe_slow );
9294 %}
9295
9296
9297 //----------Double Instructions------------------------------------------------
9298 // Double Math
9299
9300 // Compare & branch
9301
9302 // P6 version of float compare, sets condition codes in EFLAGS
9303 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9304 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9305 match(Set cr (CmpD src1 src2));
9306 effect(KILL rax);
9307 ins_cost(150);
9308 format %{ "FLD $src1\n\t"
9309 "FUCOMIP ST,$src2 // P6 instruction\n\t"
9310 "JNP exit\n\t"
9311 "MOV ah,1 // saw a NaN, set CF\n\t"
9312 "SAHF\n"
9313 "exit:\tNOP // avoid branch to branch" %}
9314 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9315 ins_encode( Push_Reg_DPR(src1),
9316 OpcP, RegOpc(src2),
9317 cmpF_P6_fixup );
9318 ins_pipe( pipe_slow );
9319 %}
9320
9321 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9322 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9323 match(Set cr (CmpD src1 src2));
9324 ins_cost(150);
9325 format %{ "FLD $src1\n\t"
9326 "FUCOMIP ST,$src2 // P6 instruction" %}
9327 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9328 ins_encode( Push_Reg_DPR(src1),
9329 OpcP, RegOpc(src2));
9330 ins_pipe( pipe_slow );
9331 %}
9332
9333 // Compare & branch
9334 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9335 predicate(UseSSE<=1);
9336 match(Set cr (CmpD src1 src2));
9337 effect(KILL rax);
9338 ins_cost(200);
9339 format %{ "FLD $src1\n\t"
9340 "FCOMp $src2\n\t"
9341 "FNSTSW AX\n\t"
9342 "TEST AX,0x400\n\t"
9343 "JZ,s flags\n\t"
9344 "MOV AH,1\t# unordered treat as LT\n"
9345 "flags:\tSAHF" %}
9346 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9347 ins_encode( Push_Reg_DPR(src1),
9348 OpcP, RegOpc(src2),
9349 fpu_flags);
9350 ins_pipe( pipe_slow );
9351 %}
9352
9353 // Compare vs zero into -1,0,1
9354 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9355 predicate(UseSSE<=1);
9356 match(Set dst (CmpD3 src1 zero));
9357 effect(KILL cr, KILL rax);
9358 ins_cost(280);
9359 format %{ "FTSTD $dst,$src1" %}
9360 opcode(0xE4, 0xD9);
9361 ins_encode( Push_Reg_DPR(src1),
9362 OpcS, OpcP, PopFPU,
9363 CmpF_Result(dst));
9364 ins_pipe( pipe_slow );
9365 %}
9366
9367 // Compare into -1,0,1
9368 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9369 predicate(UseSSE<=1);
9370 match(Set dst (CmpD3 src1 src2));
9371 effect(KILL cr, KILL rax);
9372 ins_cost(300);
9373 format %{ "FCMPD $dst,$src1,$src2" %}
9374 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9375 ins_encode( Push_Reg_DPR(src1),
9376 OpcP, RegOpc(src2),
9377 CmpF_Result(dst));
9378 ins_pipe( pipe_slow );
9379 %}
9380
9381 // float compare and set condition codes in EFLAGS by XMM regs
9382 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9383 predicate(UseSSE>=2);
9384 match(Set cr (CmpD src1 src2));
9385 ins_cost(145);
9386 format %{ "UCOMISD $src1,$src2\n\t"
9387 "JNP,s exit\n\t"
9388 "PUSHF\t# saw NaN, set CF\n\t"
9389 "AND [rsp], #0xffffff2b\n\t"
9390 "POPF\n"
9391 "exit:" %}
9392 ins_encode %{
9393 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9394 emit_cmpfp_fixup(masm);
9395 %}
9396 ins_pipe( pipe_slow );
9397 %}
9398
9399 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9400 predicate(UseSSE>=2);
9401 match(Set cr (CmpD src1 src2));
9402 ins_cost(100);
9403 format %{ "UCOMISD $src1,$src2" %}
9404 ins_encode %{
9405 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9406 %}
9407 ins_pipe( pipe_slow );
9408 %}
9409
9410 // float compare and set condition codes in EFLAGS by XMM regs
9411 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9412 predicate(UseSSE>=2);
9413 match(Set cr (CmpD src1 (LoadD src2)));
9414 ins_cost(145);
9415 format %{ "UCOMISD $src1,$src2\n\t"
9416 "JNP,s exit\n\t"
9417 "PUSHF\t# saw NaN, set CF\n\t"
9418 "AND [rsp], #0xffffff2b\n\t"
9419 "POPF\n"
9420 "exit:" %}
9421 ins_encode %{
9422 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9423 emit_cmpfp_fixup(masm);
9424 %}
9425 ins_pipe( pipe_slow );
9426 %}
9427
9428 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9429 predicate(UseSSE>=2);
9430 match(Set cr (CmpD src1 (LoadD src2)));
9431 ins_cost(100);
9432 format %{ "UCOMISD $src1,$src2" %}
9433 ins_encode %{
9434 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9435 %}
9436 ins_pipe( pipe_slow );
9437 %}
9438
9439 // Compare into -1,0,1 in XMM
9440 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9441 predicate(UseSSE>=2);
9442 match(Set dst (CmpD3 src1 src2));
9443 effect(KILL cr);
9444 ins_cost(255);
9445 format %{ "UCOMISD $src1, $src2\n\t"
9446 "MOV $dst, #-1\n\t"
9447 "JP,s done\n\t"
9448 "JB,s done\n\t"
9449 "SETNE $dst\n\t"
9450 "MOVZB $dst, $dst\n"
9451 "done:" %}
9452 ins_encode %{
9453 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9454 emit_cmpfp3(masm, $dst$$Register);
9455 %}
9456 ins_pipe( pipe_slow );
9457 %}
9458
9459 // Compare into -1,0,1 in XMM and memory
9460 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9461 predicate(UseSSE>=2);
9462 match(Set dst (CmpD3 src1 (LoadD src2)));
9463 effect(KILL cr);
9464 ins_cost(275);
9465 format %{ "UCOMISD $src1, $src2\n\t"
9466 "MOV $dst, #-1\n\t"
9467 "JP,s done\n\t"
9468 "JB,s done\n\t"
9469 "SETNE $dst\n\t"
9470 "MOVZB $dst, $dst\n"
9471 "done:" %}
9472 ins_encode %{
9473 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9474 emit_cmpfp3(masm, $dst$$Register);
9475 %}
9476 ins_pipe( pipe_slow );
9477 %}
9478
9479
9480 instruct subDPR_reg(regDPR dst, regDPR src) %{
9481 predicate (UseSSE <=1);
9482 match(Set dst (SubD dst src));
9483
9484 format %{ "FLD $src\n\t"
9485 "DSUBp $dst,ST" %}
9486 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
9487 ins_cost(150);
9488 ins_encode( Push_Reg_DPR(src),
9489 OpcP, RegOpc(dst) );
9490 ins_pipe( fpu_reg_reg );
9491 %}
9492
9493 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9494 predicate (UseSSE <=1);
9495 match(Set dst (RoundDouble (SubD src1 src2)));
9496 ins_cost(250);
9497
9498 format %{ "FLD $src2\n\t"
9499 "DSUB ST,$src1\n\t"
9500 "FSTP_D $dst\t# D-round" %}
9501 opcode(0xD8, 0x5);
9502 ins_encode( Push_Reg_DPR(src2),
9503 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9504 ins_pipe( fpu_mem_reg_reg );
9505 %}
9506
9507
9508 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9509 predicate (UseSSE <=1);
9510 match(Set dst (SubD dst (LoadD src)));
9511 ins_cost(150);
9512
9513 format %{ "FLD $src\n\t"
9514 "DSUBp $dst,ST" %}
9515 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9516 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
9517 OpcP, RegOpc(dst), ClearInstMark );
9518 ins_pipe( fpu_reg_mem );
9519 %}
9520
9521 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9522 predicate (UseSSE<=1);
9523 match(Set dst (AbsD src));
9524 ins_cost(100);
9525 format %{ "FABS" %}
9526 opcode(0xE1, 0xD9);
9527 ins_encode( OpcS, OpcP );
9528 ins_pipe( fpu_reg_reg );
9529 %}
9530
9531 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9532 predicate(UseSSE<=1);
9533 match(Set dst (NegD src));
9534 ins_cost(100);
9535 format %{ "FCHS" %}
9536 opcode(0xE0, 0xD9);
9537 ins_encode( OpcS, OpcP );
9538 ins_pipe( fpu_reg_reg );
9539 %}
9540
9541 instruct addDPR_reg(regDPR dst, regDPR src) %{
9542 predicate(UseSSE<=1);
9543 match(Set dst (AddD dst src));
9544 format %{ "FLD $src\n\t"
9545 "DADD $dst,ST" %}
9546 size(4);
9547 ins_cost(150);
9548 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9549 ins_encode( Push_Reg_DPR(src),
9550 OpcP, RegOpc(dst) );
9551 ins_pipe( fpu_reg_reg );
9552 %}
9553
9554
9555 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9556 predicate(UseSSE<=1);
9557 match(Set dst (RoundDouble (AddD src1 src2)));
9558 ins_cost(250);
9559
9560 format %{ "FLD $src2\n\t"
9561 "DADD ST,$src1\n\t"
9562 "FSTP_D $dst\t# D-round" %}
9563 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9564 ins_encode( Push_Reg_DPR(src2),
9565 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9566 ins_pipe( fpu_mem_reg_reg );
9567 %}
9568
9569
9570 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9571 predicate(UseSSE<=1);
9572 match(Set dst (AddD dst (LoadD src)));
9573 ins_cost(150);
9574
9575 format %{ "FLD $src\n\t"
9576 "DADDp $dst,ST" %}
9577 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9578 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
9579 OpcP, RegOpc(dst), ClearInstMark );
9580 ins_pipe( fpu_reg_mem );
9581 %}
9582
9583 // add-to-memory
9584 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9585 predicate(UseSSE<=1);
9586 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9587 ins_cost(150);
9588
9589 format %{ "FLD_D $dst\n\t"
9590 "DADD ST,$src\n\t"
9591 "FST_D $dst" %}
9592 opcode(0xDD, 0x0);
9593 ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst),
9594 Opcode(0xD8), RegOpc(src), ClearInstMark,
9595 SetInstMark,
9596 Opcode(0xDD), RMopc_Mem(0x03,dst),
9597 ClearInstMark);
9598 ins_pipe( fpu_reg_mem );
9599 %}
9600
9601 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9602 predicate(UseSSE<=1);
9603 match(Set dst (AddD dst con));
9604 ins_cost(125);
9605 format %{ "FLD1\n\t"
9606 "DADDp $dst,ST" %}
9607 ins_encode %{
9608 __ fld1();
9609 __ faddp($dst$$reg);
9610 %}
9611 ins_pipe(fpu_reg);
9612 %}
9613
9614 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9615 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9616 match(Set dst (AddD dst con));
9617 ins_cost(200);
9618 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9619 "DADDp $dst,ST" %}
9620 ins_encode %{
9621 __ fld_d($constantaddress($con));
9622 __ faddp($dst$$reg);
9623 %}
9624 ins_pipe(fpu_reg_mem);
9625 %}
9626
9627 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9628 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9629 match(Set dst (RoundDouble (AddD src con)));
9630 ins_cost(200);
9631 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9632 "DADD ST,$src\n\t"
9633 "FSTP_D $dst\t# D-round" %}
9634 ins_encode %{
9635 __ fld_d($constantaddress($con));
9636 __ fadd($src$$reg);
9637 __ fstp_d(Address(rsp, $dst$$disp));
9638 %}
9639 ins_pipe(fpu_mem_reg_con);
9640 %}
9641
9642 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9643 predicate(UseSSE<=1);
9644 match(Set dst (MulD dst src));
9645 format %{ "FLD $src\n\t"
9646 "DMULp $dst,ST" %}
9647 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9648 ins_cost(150);
9649 ins_encode( Push_Reg_DPR(src),
9650 OpcP, RegOpc(dst) );
9651 ins_pipe( fpu_reg_reg );
9652 %}
9653
9654 // Strict FP instruction biases argument before multiply then
9655 // biases result to avoid double rounding of subnormals.
9656 //
9657 // scale arg1 by multiplying arg1 by 2^(-15360)
9658 // load arg2
9659 // multiply scaled arg1 by arg2
9660 // rescale product by 2^(15360)
9661 //
9662 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9663 predicate( UseSSE<=1 && Compile::current()->has_method() );
9664 match(Set dst (MulD dst src));
9665 ins_cost(1); // Select this instruction for all FP double multiplies
9666
9667 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9668 "DMULp $dst,ST\n\t"
9669 "FLD $src\n\t"
9670 "DMULp $dst,ST\n\t"
9671 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9672 "DMULp $dst,ST\n\t" %}
9673 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9674 ins_encode( strictfp_bias1(dst),
9675 Push_Reg_DPR(src),
9676 OpcP, RegOpc(dst),
9677 strictfp_bias2(dst) );
9678 ins_pipe( fpu_reg_reg );
9679 %}
9680
9681 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9682 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9683 match(Set dst (MulD dst con));
9684 ins_cost(200);
9685 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9686 "DMULp $dst,ST" %}
9687 ins_encode %{
9688 __ fld_d($constantaddress($con));
9689 __ fmulp($dst$$reg);
9690 %}
9691 ins_pipe(fpu_reg_mem);
9692 %}
9693
9694
9695 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9696 predicate( UseSSE<=1 );
9697 match(Set dst (MulD dst (LoadD src)));
9698 ins_cost(200);
9699 format %{ "FLD_D $src\n\t"
9700 "DMULp $dst,ST" %}
9701 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */
9702 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
9703 OpcP, RegOpc(dst), ClearInstMark );
9704 ins_pipe( fpu_reg_mem );
9705 %}
9706
9707 //
9708 // Cisc-alternate to reg-reg multiply
9709 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9710 predicate( UseSSE<=1 );
9711 match(Set dst (MulD src (LoadD mem)));
9712 ins_cost(250);
9713 format %{ "FLD_D $mem\n\t"
9714 "DMUL ST,$src\n\t"
9715 "FSTP_D $dst" %}
9716 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
9717 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem),
9718 OpcReg_FPR(src),
9719 Pop_Reg_DPR(dst), ClearInstMark );
9720 ins_pipe( fpu_reg_reg_mem );
9721 %}
9722
9723
9724 // MACRO3 -- addDPR a mulDPR
9725 // This instruction is a '2-address' instruction in that the result goes
9726 // back to src2. This eliminates a move from the macro; possibly the
9727 // register allocator will have to add it back (and maybe not).
9728 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9729 predicate( UseSSE<=1 );
9730 match(Set src2 (AddD (MulD src0 src1) src2));
9731 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9732 "DMUL ST,$src1\n\t"
9733 "DADDp $src2,ST" %}
9734 ins_cost(250);
9735 opcode(0xDD); /* LoadD DD /0 */
9736 ins_encode( Push_Reg_FPR(src0),
9737 FMul_ST_reg(src1),
9738 FAddP_reg_ST(src2) );
9739 ins_pipe( fpu_reg_reg_reg );
9740 %}
9741
9742
9743 // MACRO3 -- subDPR a mulDPR
9744 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9745 predicate( UseSSE<=1 );
9746 match(Set src2 (SubD (MulD src0 src1) src2));
9747 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9748 "DMUL ST,$src1\n\t"
9749 "DSUBRp $src2,ST" %}
9750 ins_cost(250);
9751 ins_encode( Push_Reg_FPR(src0),
9752 FMul_ST_reg(src1),
9753 Opcode(0xDE), Opc_plus(0xE0,src2));
9754 ins_pipe( fpu_reg_reg_reg );
9755 %}
9756
9757
9758 instruct divDPR_reg(regDPR dst, regDPR src) %{
9759 predicate( UseSSE<=1 );
9760 match(Set dst (DivD dst src));
9761
9762 format %{ "FLD $src\n\t"
9763 "FDIVp $dst,ST" %}
9764 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9765 ins_cost(150);
9766 ins_encode( Push_Reg_DPR(src),
9767 OpcP, RegOpc(dst) );
9768 ins_pipe( fpu_reg_reg );
9769 %}
9770
9771 // Strict FP instruction biases argument before division then
9772 // biases result, to avoid double rounding of subnormals.
9773 //
9774 // scale dividend by multiplying dividend by 2^(-15360)
9775 // load divisor
9776 // divide scaled dividend by divisor
9777 // rescale quotient by 2^(15360)
9778 //
9779 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9780 predicate (UseSSE<=1);
9781 match(Set dst (DivD dst src));
9782 predicate( UseSSE<=1 && Compile::current()->has_method() );
9783 ins_cost(01);
9784
9785 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9786 "DMULp $dst,ST\n\t"
9787 "FLD $src\n\t"
9788 "FDIVp $dst,ST\n\t"
9789 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9790 "DMULp $dst,ST\n\t" %}
9791 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9792 ins_encode( strictfp_bias1(dst),
9793 Push_Reg_DPR(src),
9794 OpcP, RegOpc(dst),
9795 strictfp_bias2(dst) );
9796 ins_pipe( fpu_reg_reg );
9797 %}
9798
9799 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9800 predicate(UseSSE<=1);
9801 match(Set dst (ModD dst src));
9802 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9803
9804 format %{ "DMOD $dst,$src" %}
9805 ins_cost(250);
9806 ins_encode(Push_Reg_Mod_DPR(dst, src),
9807 emitModDPR(),
9808 Push_Result_Mod_DPR(src),
9809 Pop_Reg_DPR(dst));
9810 ins_pipe( pipe_slow );
9811 %}
9812
9813 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9814 predicate(UseSSE>=2);
9815 match(Set dst (ModD src0 src1));
9816 effect(KILL rax, KILL cr);
9817
9818 format %{ "SUB ESP,8\t # DMOD\n"
9819 "\tMOVSD [ESP+0],$src1\n"
9820 "\tFLD_D [ESP+0]\n"
9821 "\tMOVSD [ESP+0],$src0\n"
9822 "\tFLD_D [ESP+0]\n"
9823 "loop:\tFPREM\n"
9824 "\tFWAIT\n"
9825 "\tFNSTSW AX\n"
9826 "\tSAHF\n"
9827 "\tJP loop\n"
9828 "\tFSTP_D [ESP+0]\n"
9829 "\tMOVSD $dst,[ESP+0]\n"
9830 "\tADD ESP,8\n"
9831 "\tFSTP ST0\t # Restore FPU Stack"
9832 %}
9833 ins_cost(250);
9834 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9835 ins_pipe( pipe_slow );
9836 %}
9837
9838 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9839 predicate (UseSSE<=1);
9840 match(Set dst(AtanD dst src));
9841 format %{ "DATA $dst,$src" %}
9842 opcode(0xD9, 0xF3);
9843 ins_encode( Push_Reg_DPR(src),
9844 OpcP, OpcS, RegOpc(dst) );
9845 ins_pipe( pipe_slow );
9846 %}
9847
9848 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9849 predicate (UseSSE>=2);
9850 match(Set dst(AtanD dst src));
9851 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9852 format %{ "DATA $dst,$src" %}
9853 opcode(0xD9, 0xF3);
9854 ins_encode( Push_SrcD(src),
9855 OpcP, OpcS, Push_ResultD(dst) );
9856 ins_pipe( pipe_slow );
9857 %}
9858
9859 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9860 predicate (UseSSE<=1);
9861 match(Set dst (SqrtD src));
9862 format %{ "DSQRT $dst,$src" %}
9863 opcode(0xFA, 0xD9);
9864 ins_encode( Push_Reg_DPR(src),
9865 OpcS, OpcP, Pop_Reg_DPR(dst) );
9866 ins_pipe( pipe_slow );
9867 %}
9868
9869 //-------------Float Instructions-------------------------------
9870 // Float Math
9871
9872 // Code for float compare:
9873 // fcompp();
9874 // fwait(); fnstsw_ax();
9875 // sahf();
9876 // movl(dst, unordered_result);
9877 // jcc(Assembler::parity, exit);
9878 // movl(dst, less_result);
9879 // jcc(Assembler::below, exit);
9880 // movl(dst, equal_result);
9881 // jcc(Assembler::equal, exit);
9882 // movl(dst, greater_result);
9883 // exit:
9884
9885 // P6 version of float compare, sets condition codes in EFLAGS
9886 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9887 predicate(VM_Version::supports_cmov() && UseSSE == 0);
9888 match(Set cr (CmpF src1 src2));
9889 effect(KILL rax);
9890 ins_cost(150);
9891 format %{ "FLD $src1\n\t"
9892 "FUCOMIP ST,$src2 // P6 instruction\n\t"
9893 "JNP exit\n\t"
9894 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
9895 "SAHF\n"
9896 "exit:\tNOP // avoid branch to branch" %}
9897 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9898 ins_encode( Push_Reg_DPR(src1),
9899 OpcP, RegOpc(src2),
9900 cmpF_P6_fixup );
9901 ins_pipe( pipe_slow );
9902 %}
9903
9904 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9905 predicate(VM_Version::supports_cmov() && UseSSE == 0);
9906 match(Set cr (CmpF src1 src2));
9907 ins_cost(100);
9908 format %{ "FLD $src1\n\t"
9909 "FUCOMIP ST,$src2 // P6 instruction" %}
9910 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9911 ins_encode( Push_Reg_DPR(src1),
9912 OpcP, RegOpc(src2));
9913 ins_pipe( pipe_slow );
9914 %}
9915
9916
9917 // Compare & branch
9918 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9919 predicate(UseSSE == 0);
9920 match(Set cr (CmpF src1 src2));
9921 effect(KILL rax);
9922 ins_cost(200);
9923 format %{ "FLD $src1\n\t"
9924 "FCOMp $src2\n\t"
9925 "FNSTSW AX\n\t"
9926 "TEST AX,0x400\n\t"
9927 "JZ,s flags\n\t"
9928 "MOV AH,1\t# unordered treat as LT\n"
9929 "flags:\tSAHF" %}
9930 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9931 ins_encode( Push_Reg_DPR(src1),
9932 OpcP, RegOpc(src2),
9933 fpu_flags);
9934 ins_pipe( pipe_slow );
9935 %}
9936
9937 // Compare vs zero into -1,0,1
9938 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9939 predicate(UseSSE == 0);
9940 match(Set dst (CmpF3 src1 zero));
9941 effect(KILL cr, KILL rax);
9942 ins_cost(280);
9943 format %{ "FTSTF $dst,$src1" %}
9944 opcode(0xE4, 0xD9);
9945 ins_encode( Push_Reg_DPR(src1),
9946 OpcS, OpcP, PopFPU,
9947 CmpF_Result(dst));
9948 ins_pipe( pipe_slow );
9949 %}
9950
9951 // Compare into -1,0,1
9952 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9953 predicate(UseSSE == 0);
9954 match(Set dst (CmpF3 src1 src2));
9955 effect(KILL cr, KILL rax);
9956 ins_cost(300);
9957 format %{ "FCMPF $dst,$src1,$src2" %}
9958 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9959 ins_encode( Push_Reg_DPR(src1),
9960 OpcP, RegOpc(src2),
9961 CmpF_Result(dst));
9962 ins_pipe( pipe_slow );
9963 %}
9964
9965 // float compare and set condition codes in EFLAGS by XMM regs
9966 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
9967 predicate(UseSSE>=1);
9968 match(Set cr (CmpF src1 src2));
9969 ins_cost(145);
9970 format %{ "UCOMISS $src1,$src2\n\t"
9971 "JNP,s exit\n\t"
9972 "PUSHF\t# saw NaN, set CF\n\t"
9973 "AND [rsp], #0xffffff2b\n\t"
9974 "POPF\n"
9975 "exit:" %}
9976 ins_encode %{
9977 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9978 emit_cmpfp_fixup(masm);
9979 %}
9980 ins_pipe( pipe_slow );
9981 %}
9982
9983 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
9984 predicate(UseSSE>=1);
9985 match(Set cr (CmpF src1 src2));
9986 ins_cost(100);
9987 format %{ "UCOMISS $src1,$src2" %}
9988 ins_encode %{
9989 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9990 %}
9991 ins_pipe( pipe_slow );
9992 %}
9993
9994 // float compare and set condition codes in EFLAGS by XMM regs
9995 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
9996 predicate(UseSSE>=1);
9997 match(Set cr (CmpF src1 (LoadF src2)));
9998 ins_cost(165);
9999 format %{ "UCOMISS $src1,$src2\n\t"
10000 "JNP,s exit\n\t"
10001 "PUSHF\t# saw NaN, set CF\n\t"
10002 "AND [rsp], #0xffffff2b\n\t"
10003 "POPF\n"
10004 "exit:" %}
10005 ins_encode %{
10006 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10007 emit_cmpfp_fixup(masm);
10008 %}
10009 ins_pipe( pipe_slow );
10010 %}
10011
10012 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10013 predicate(UseSSE>=1);
10014 match(Set cr (CmpF src1 (LoadF src2)));
10015 ins_cost(100);
10016 format %{ "UCOMISS $src1,$src2" %}
10017 ins_encode %{
10018 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10019 %}
10020 ins_pipe( pipe_slow );
10021 %}
10022
10023 // Compare into -1,0,1 in XMM
10024 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10025 predicate(UseSSE>=1);
10026 match(Set dst (CmpF3 src1 src2));
10027 effect(KILL cr);
10028 ins_cost(255);
10029 format %{ "UCOMISS $src1, $src2\n\t"
10030 "MOV $dst, #-1\n\t"
10031 "JP,s done\n\t"
10032 "JB,s done\n\t"
10033 "SETNE $dst\n\t"
10034 "MOVZB $dst, $dst\n"
10035 "done:" %}
10036 ins_encode %{
10037 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10038 emit_cmpfp3(masm, $dst$$Register);
10039 %}
10040 ins_pipe( pipe_slow );
10041 %}
10042
10043 // Compare into -1,0,1 in XMM and memory
10044 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10045 predicate(UseSSE>=1);
10046 match(Set dst (CmpF3 src1 (LoadF src2)));
10047 effect(KILL cr);
10048 ins_cost(275);
10049 format %{ "UCOMISS $src1, $src2\n\t"
10050 "MOV $dst, #-1\n\t"
10051 "JP,s done\n\t"
10052 "JB,s done\n\t"
10053 "SETNE $dst\n\t"
10054 "MOVZB $dst, $dst\n"
10055 "done:" %}
10056 ins_encode %{
10057 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10058 emit_cmpfp3(masm, $dst$$Register);
10059 %}
10060 ins_pipe( pipe_slow );
10061 %}
10062
10063 // Spill to obtain 24-bit precision
10064 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10065 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10066 match(Set dst (SubF src1 src2));
10067
10068 format %{ "FSUB $dst,$src1 - $src2" %}
10069 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10070 ins_encode( Push_Reg_FPR(src1),
10071 OpcReg_FPR(src2),
10072 Pop_Mem_FPR(dst) );
10073 ins_pipe( fpu_mem_reg_reg );
10074 %}
10075 //
10076 // This instruction does not round to 24-bits
10077 instruct subFPR_reg(regFPR dst, regFPR src) %{
10078 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10079 match(Set dst (SubF dst src));
10080
10081 format %{ "FSUB $dst,$src" %}
10082 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10083 ins_encode( Push_Reg_FPR(src),
10084 OpcP, RegOpc(dst) );
10085 ins_pipe( fpu_reg_reg );
10086 %}
10087
10088 // Spill to obtain 24-bit precision
10089 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10090 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10091 match(Set dst (AddF src1 src2));
10092
10093 format %{ "FADD $dst,$src1,$src2" %}
10094 opcode(0xD8, 0x0); /* D8 C0+i */
10095 ins_encode( Push_Reg_FPR(src2),
10096 OpcReg_FPR(src1),
10097 Pop_Mem_FPR(dst) );
10098 ins_pipe( fpu_mem_reg_reg );
10099 %}
10100 //
10101 // This instruction does not round to 24-bits
10102 instruct addFPR_reg(regFPR dst, regFPR src) %{
10103 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10104 match(Set dst (AddF dst src));
10105
10106 format %{ "FLD $src\n\t"
10107 "FADDp $dst,ST" %}
10108 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10109 ins_encode( Push_Reg_FPR(src),
10110 OpcP, RegOpc(dst) );
10111 ins_pipe( fpu_reg_reg );
10112 %}
10113
10114 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10115 predicate(UseSSE==0);
10116 match(Set dst (AbsF src));
10117 ins_cost(100);
10118 format %{ "FABS" %}
10119 opcode(0xE1, 0xD9);
10120 ins_encode( OpcS, OpcP );
10121 ins_pipe( fpu_reg_reg );
10122 %}
10123
10124 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10125 predicate(UseSSE==0);
10126 match(Set dst (NegF src));
10127 ins_cost(100);
10128 format %{ "FCHS" %}
10129 opcode(0xE0, 0xD9);
10130 ins_encode( OpcS, OpcP );
10131 ins_pipe( fpu_reg_reg );
10132 %}
10133
10134 // Cisc-alternate to addFPR_reg
10135 // Spill to obtain 24-bit precision
10136 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10137 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10138 match(Set dst (AddF src1 (LoadF src2)));
10139
10140 format %{ "FLD $src2\n\t"
10141 "FADD ST,$src1\n\t"
10142 "FSTP_S $dst" %}
10143 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10144 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10145 OpcReg_FPR(src1),
10146 Pop_Mem_FPR(dst), ClearInstMark );
10147 ins_pipe( fpu_mem_reg_mem );
10148 %}
10149 //
10150 // Cisc-alternate to addFPR_reg
10151 // This instruction does not round to 24-bits
10152 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10153 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10154 match(Set dst (AddF dst (LoadF src)));
10155
10156 format %{ "FADD $dst,$src" %}
10157 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
10158 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
10159 OpcP, RegOpc(dst), ClearInstMark );
10160 ins_pipe( fpu_reg_mem );
10161 %}
10162
10163 // // Following two instructions for _222_mpegaudio
10164 // Spill to obtain 24-bit precision
10165 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10166 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10167 match(Set dst (AddF src1 src2));
10168
10169 format %{ "FADD $dst,$src1,$src2" %}
10170 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10171 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1),
10172 OpcReg_FPR(src2),
10173 Pop_Mem_FPR(dst), ClearInstMark );
10174 ins_pipe( fpu_mem_reg_mem );
10175 %}
10176
10177 // Cisc-spill variant
10178 // Spill to obtain 24-bit precision
10179 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10180 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10181 match(Set dst (AddF src1 (LoadF src2)));
10182
10183 format %{ "FADD $dst,$src1,$src2 cisc" %}
10184 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10185 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10186 OpcP, RMopc_Mem(secondary,src1),
10187 Pop_Mem_FPR(dst),
10188 ClearInstMark);
10189 ins_pipe( fpu_mem_mem_mem );
10190 %}
10191
10192 // Spill to obtain 24-bit precision
10193 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10194 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10195 match(Set dst (AddF src1 src2));
10196
10197 format %{ "FADD $dst,$src1,$src2" %}
10198 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
10199 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10200 OpcP, RMopc_Mem(secondary,src1),
10201 Pop_Mem_FPR(dst),
10202 ClearInstMark);
10203 ins_pipe( fpu_mem_mem_mem );
10204 %}
10205
10206
10207 // Spill to obtain 24-bit precision
10208 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10209 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10210 match(Set dst (AddF src con));
10211 format %{ "FLD $src\n\t"
10212 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10213 "FSTP_S $dst" %}
10214 ins_encode %{
10215 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10216 __ fadd_s($constantaddress($con));
10217 __ fstp_s(Address(rsp, $dst$$disp));
10218 %}
10219 ins_pipe(fpu_mem_reg_con);
10220 %}
10221 //
10222 // This instruction does not round to 24-bits
10223 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10224 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10225 match(Set dst (AddF src con));
10226 format %{ "FLD $src\n\t"
10227 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10228 "FSTP $dst" %}
10229 ins_encode %{
10230 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10231 __ fadd_s($constantaddress($con));
10232 __ fstp_d($dst$$reg);
10233 %}
10234 ins_pipe(fpu_reg_reg_con);
10235 %}
10236
10237 // Spill to obtain 24-bit precision
10238 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10239 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10240 match(Set dst (MulF src1 src2));
10241
10242 format %{ "FLD $src1\n\t"
10243 "FMUL $src2\n\t"
10244 "FSTP_S $dst" %}
10245 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10246 ins_encode( Push_Reg_FPR(src1),
10247 OpcReg_FPR(src2),
10248 Pop_Mem_FPR(dst) );
10249 ins_pipe( fpu_mem_reg_reg );
10250 %}
10251 //
10252 // This instruction does not round to 24-bits
10253 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10254 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10255 match(Set dst (MulF src1 src2));
10256
10257 format %{ "FLD $src1\n\t"
10258 "FMUL $src2\n\t"
10259 "FSTP_S $dst" %}
10260 opcode(0xD8, 0x1); /* D8 C8+i */
10261 ins_encode( Push_Reg_FPR(src2),
10262 OpcReg_FPR(src1),
10263 Pop_Reg_FPR(dst) );
10264 ins_pipe( fpu_reg_reg_reg );
10265 %}
10266
10267
10268 // Spill to obtain 24-bit precision
10269 // Cisc-alternate to reg-reg multiply
10270 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10271 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10272 match(Set dst (MulF src1 (LoadF src2)));
10273
10274 format %{ "FLD_S $src2\n\t"
10275 "FMUL $src1\n\t"
10276 "FSTP_S $dst" %}
10277 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
10278 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10279 OpcReg_FPR(src1),
10280 Pop_Mem_FPR(dst), ClearInstMark );
10281 ins_pipe( fpu_mem_reg_mem );
10282 %}
10283 //
10284 // This instruction does not round to 24-bits
10285 // Cisc-alternate to reg-reg multiply
10286 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10287 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10288 match(Set dst (MulF src1 (LoadF src2)));
10289
10290 format %{ "FMUL $dst,$src1,$src2" %}
10291 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
10292 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10293 OpcReg_FPR(src1),
10294 Pop_Reg_FPR(dst), ClearInstMark );
10295 ins_pipe( fpu_reg_reg_mem );
10296 %}
10297
10298 // Spill to obtain 24-bit precision
10299 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10300 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10301 match(Set dst (MulF src1 src2));
10302
10303 format %{ "FMUL $dst,$src1,$src2" %}
10304 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
10305 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10306 OpcP, RMopc_Mem(secondary,src1),
10307 Pop_Mem_FPR(dst),
10308 ClearInstMark );
10309 ins_pipe( fpu_mem_mem_mem );
10310 %}
10311
10312 // Spill to obtain 24-bit precision
10313 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10314 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10315 match(Set dst (MulF src con));
10316
10317 format %{ "FLD $src\n\t"
10318 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10319 "FSTP_S $dst" %}
10320 ins_encode %{
10321 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10322 __ fmul_s($constantaddress($con));
10323 __ fstp_s(Address(rsp, $dst$$disp));
10324 %}
10325 ins_pipe(fpu_mem_reg_con);
10326 %}
10327 //
10328 // This instruction does not round to 24-bits
10329 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10330 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10331 match(Set dst (MulF src con));
10332
10333 format %{ "FLD $src\n\t"
10334 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10335 "FSTP $dst" %}
10336 ins_encode %{
10337 __ fld_s($src$$reg - 1); // FLD ST(i-1)
10338 __ fmul_s($constantaddress($con));
10339 __ fstp_d($dst$$reg);
10340 %}
10341 ins_pipe(fpu_reg_reg_con);
10342 %}
10343
10344
10345 //
10346 // MACRO1 -- subsume unshared load into mulFPR
10347 // This instruction does not round to 24-bits
10348 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10349 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10350 match(Set dst (MulF (LoadF mem1) src));
10351
10352 format %{ "FLD $mem1 ===MACRO1===\n\t"
10353 "FMUL ST,$src\n\t"
10354 "FSTP $dst" %}
10355 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
10356 ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1),
10357 OpcReg_FPR(src),
10358 Pop_Reg_FPR(dst), ClearInstMark );
10359 ins_pipe( fpu_reg_reg_mem );
10360 %}
10361 //
10362 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10363 // This instruction does not round to 24-bits
10364 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10365 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10366 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10367 ins_cost(95);
10368
10369 format %{ "FLD $mem1 ===MACRO2===\n\t"
10370 "FMUL ST,$src1 subsume mulFPR left load\n\t"
10371 "FADD ST,$src2\n\t"
10372 "FSTP $dst" %}
10373 opcode(0xD9); /* LoadF D9 /0 */
10374 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1),
10375 FMul_ST_reg(src1),
10376 FAdd_ST_reg(src2),
10377 Pop_Reg_FPR(dst), ClearInstMark );
10378 ins_pipe( fpu_reg_mem_reg_reg );
10379 %}
10380
10381 // MACRO3 -- addFPR a mulFPR
10382 // This instruction does not round to 24-bits. It is a '2-address'
10383 // instruction in that the result goes back to src2. This eliminates
10384 // a move from the macro; possibly the register allocator will have
10385 // to add it back (and maybe not).
10386 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10387 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10388 match(Set src2 (AddF (MulF src0 src1) src2));
10389
10390 format %{ "FLD $src0 ===MACRO3===\n\t"
10391 "FMUL ST,$src1\n\t"
10392 "FADDP $src2,ST" %}
10393 opcode(0xD9); /* LoadF D9 /0 */
10394 ins_encode( Push_Reg_FPR(src0),
10395 FMul_ST_reg(src1),
10396 FAddP_reg_ST(src2) );
10397 ins_pipe( fpu_reg_reg_reg );
10398 %}
10399
10400 // MACRO4 -- divFPR subFPR
10401 // This instruction does not round to 24-bits
10402 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10403 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10404 match(Set dst (DivF (SubF src2 src1) src3));
10405
10406 format %{ "FLD $src2 ===MACRO4===\n\t"
10407 "FSUB ST,$src1\n\t"
10408 "FDIV ST,$src3\n\t"
10409 "FSTP $dst" %}
10410 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10411 ins_encode( Push_Reg_FPR(src2),
10412 subFPR_divFPR_encode(src1,src3),
10413 Pop_Reg_FPR(dst) );
10414 ins_pipe( fpu_reg_reg_reg_reg );
10415 %}
10416
10417 // Spill to obtain 24-bit precision
10418 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10419 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10420 match(Set dst (DivF src1 src2));
10421
10422 format %{ "FDIV $dst,$src1,$src2" %}
10423 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10424 ins_encode( Push_Reg_FPR(src1),
10425 OpcReg_FPR(src2),
10426 Pop_Mem_FPR(dst) );
10427 ins_pipe( fpu_mem_reg_reg );
10428 %}
10429 //
10430 // This instruction does not round to 24-bits
10431 instruct divFPR_reg(regFPR dst, regFPR src) %{
10432 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10433 match(Set dst (DivF dst src));
10434
10435 format %{ "FDIV $dst,$src" %}
10436 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10437 ins_encode( Push_Reg_FPR(src),
10438 OpcP, RegOpc(dst) );
10439 ins_pipe( fpu_reg_reg );
10440 %}
10441
10442
10443 // Spill to obtain 24-bit precision
10444 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10445 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10446 match(Set dst (ModF src1 src2));
10447 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10448
10449 format %{ "FMOD $dst,$src1,$src2" %}
10450 ins_encode( Push_Reg_Mod_DPR(src1, src2),
10451 emitModDPR(),
10452 Push_Result_Mod_DPR(src2),
10453 Pop_Mem_FPR(dst));
10454 ins_pipe( pipe_slow );
10455 %}
10456 //
10457 // This instruction does not round to 24-bits
10458 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10459 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10460 match(Set dst (ModF dst src));
10461 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10462
10463 format %{ "FMOD $dst,$src" %}
10464 ins_encode(Push_Reg_Mod_DPR(dst, src),
10465 emitModDPR(),
10466 Push_Result_Mod_DPR(src),
10467 Pop_Reg_FPR(dst));
10468 ins_pipe( pipe_slow );
10469 %}
10470
10471 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10472 predicate(UseSSE>=1);
10473 match(Set dst (ModF src0 src1));
10474 effect(KILL rax, KILL cr);
10475 format %{ "SUB ESP,4\t # FMOD\n"
10476 "\tMOVSS [ESP+0],$src1\n"
10477 "\tFLD_S [ESP+0]\n"
10478 "\tMOVSS [ESP+0],$src0\n"
10479 "\tFLD_S [ESP+0]\n"
10480 "loop:\tFPREM\n"
10481 "\tFWAIT\n"
10482 "\tFNSTSW AX\n"
10483 "\tSAHF\n"
10484 "\tJP loop\n"
10485 "\tFSTP_S [ESP+0]\n"
10486 "\tMOVSS $dst,[ESP+0]\n"
10487 "\tADD ESP,4\n"
10488 "\tFSTP ST0\t # Restore FPU Stack"
10489 %}
10490 ins_cost(250);
10491 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10492 ins_pipe( pipe_slow );
10493 %}
10494
10495
10496 //----------Arithmetic Conversion Instructions---------------------------------
10497 // The conversions operations are all Alpha sorted. Please keep it that way!
10498
10499 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10500 predicate(UseSSE==0);
10501 match(Set dst (RoundFloat src));
10502 ins_cost(125);
10503 format %{ "FST_S $dst,$src\t# F-round" %}
10504 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10505 ins_pipe( fpu_mem_reg );
10506 %}
10507
10508 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10509 predicate(UseSSE<=1);
10510 match(Set dst (RoundDouble src));
10511 ins_cost(125);
10512 format %{ "FST_D $dst,$src\t# D-round" %}
10513 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10514 ins_pipe( fpu_mem_reg );
10515 %}
10516
10517 // Force rounding to 24-bit precision and 6-bit exponent
10518 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10519 predicate(UseSSE==0);
10520 match(Set dst (ConvD2F src));
10521 format %{ "FST_S $dst,$src\t# F-round" %}
10522 expand %{
10523 roundFloat_mem_reg(dst,src);
10524 %}
10525 %}
10526
10527 // Force rounding to 24-bit precision and 6-bit exponent
10528 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10529 predicate(UseSSE==1);
10530 match(Set dst (ConvD2F src));
10531 effect( KILL cr );
10532 format %{ "SUB ESP,4\n\t"
10533 "FST_S [ESP],$src\t# F-round\n\t"
10534 "MOVSS $dst,[ESP]\n\t"
10535 "ADD ESP,4" %}
10536 ins_encode %{
10537 __ subptr(rsp, 4);
10538 if ($src$$reg != FPR1L_enc) {
10539 __ fld_s($src$$reg-1);
10540 __ fstp_s(Address(rsp, 0));
10541 } else {
10542 __ fst_s(Address(rsp, 0));
10543 }
10544 __ movflt($dst$$XMMRegister, Address(rsp, 0));
10545 __ addptr(rsp, 4);
10546 %}
10547 ins_pipe( pipe_slow );
10548 %}
10549
10550 // Force rounding double precision to single precision
10551 instruct convD2F_reg(regF dst, regD src) %{
10552 predicate(UseSSE>=2);
10553 match(Set dst (ConvD2F src));
10554 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10555 ins_encode %{
10556 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10557 %}
10558 ins_pipe( pipe_slow );
10559 %}
10560
10561 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10562 predicate(UseSSE==0);
10563 match(Set dst (ConvF2D src));
10564 format %{ "FST_S $dst,$src\t# D-round" %}
10565 ins_encode( Pop_Reg_Reg_DPR(dst, src));
10566 ins_pipe( fpu_reg_reg );
10567 %}
10568
10569 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10570 predicate(UseSSE==1);
10571 match(Set dst (ConvF2D src));
10572 format %{ "FST_D $dst,$src\t# D-round" %}
10573 expand %{
10574 roundDouble_mem_reg(dst,src);
10575 %}
10576 %}
10577
10578 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10579 predicate(UseSSE==1);
10580 match(Set dst (ConvF2D src));
10581 effect( KILL cr );
10582 format %{ "SUB ESP,4\n\t"
10583 "MOVSS [ESP] $src\n\t"
10584 "FLD_S [ESP]\n\t"
10585 "ADD ESP,4\n\t"
10586 "FSTP $dst\t# D-round" %}
10587 ins_encode %{
10588 __ subptr(rsp, 4);
10589 __ movflt(Address(rsp, 0), $src$$XMMRegister);
10590 __ fld_s(Address(rsp, 0));
10591 __ addptr(rsp, 4);
10592 __ fstp_d($dst$$reg);
10593 %}
10594 ins_pipe( pipe_slow );
10595 %}
10596
10597 instruct convF2D_reg(regD dst, regF src) %{
10598 predicate(UseSSE>=2);
10599 match(Set dst (ConvF2D src));
10600 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10601 ins_encode %{
10602 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10603 %}
10604 ins_pipe( pipe_slow );
10605 %}
10606
10607 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10608 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10609 predicate(UseSSE<=1);
10610 match(Set dst (ConvD2I src));
10611 effect( KILL tmp, KILL cr );
10612 format %{ "FLD $src\t# Convert double to int \n\t"
10613 "FLDCW trunc mode\n\t"
10614 "SUB ESP,4\n\t"
10615 "FISTp [ESP + #0]\n\t"
10616 "FLDCW std/24-bit mode\n\t"
10617 "POP EAX\n\t"
10618 "CMP EAX,0x80000000\n\t"
10619 "JNE,s fast\n\t"
10620 "FLD_D $src\n\t"
10621 "CALL d2i_wrapper\n"
10622 "fast:" %}
10623 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10624 ins_pipe( pipe_slow );
10625 %}
10626
10627 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10628 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10629 predicate(UseSSE>=2);
10630 match(Set dst (ConvD2I src));
10631 effect( KILL tmp, KILL cr );
10632 format %{ "CVTTSD2SI $dst, $src\n\t"
10633 "CMP $dst,0x80000000\n\t"
10634 "JNE,s fast\n\t"
10635 "SUB ESP, 8\n\t"
10636 "MOVSD [ESP], $src\n\t"
10637 "FLD_D [ESP]\n\t"
10638 "ADD ESP, 8\n\t"
10639 "CALL d2i_wrapper\n"
10640 "fast:" %}
10641 ins_encode %{
10642 Label fast;
10643 __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10644 __ cmpl($dst$$Register, 0x80000000);
10645 __ jccb(Assembler::notEqual, fast);
10646 __ subptr(rsp, 8);
10647 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10648 __ fld_d(Address(rsp, 0));
10649 __ addptr(rsp, 8);
10650 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10651 __ post_call_nop();
10652 __ bind(fast);
10653 %}
10654 ins_pipe( pipe_slow );
10655 %}
10656
10657 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10658 predicate(UseSSE<=1);
10659 match(Set dst (ConvD2L src));
10660 effect( KILL cr );
10661 format %{ "FLD $src\t# Convert double to long\n\t"
10662 "FLDCW trunc mode\n\t"
10663 "SUB ESP,8\n\t"
10664 "FISTp [ESP + #0]\n\t"
10665 "FLDCW std/24-bit mode\n\t"
10666 "POP EAX\n\t"
10667 "POP EDX\n\t"
10668 "CMP EDX,0x80000000\n\t"
10669 "JNE,s fast\n\t"
10670 "TEST EAX,EAX\n\t"
10671 "JNE,s fast\n\t"
10672 "FLD $src\n\t"
10673 "CALL d2l_wrapper\n"
10674 "fast:" %}
10675 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) );
10676 ins_pipe( pipe_slow );
10677 %}
10678
10679 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10680 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10681 predicate (UseSSE>=2);
10682 match(Set dst (ConvD2L src));
10683 effect( KILL cr );
10684 format %{ "SUB ESP,8\t# Convert double to long\n\t"
10685 "MOVSD [ESP],$src\n\t"
10686 "FLD_D [ESP]\n\t"
10687 "FLDCW trunc mode\n\t"
10688 "FISTp [ESP + #0]\n\t"
10689 "FLDCW std/24-bit mode\n\t"
10690 "POP EAX\n\t"
10691 "POP EDX\n\t"
10692 "CMP EDX,0x80000000\n\t"
10693 "JNE,s fast\n\t"
10694 "TEST EAX,EAX\n\t"
10695 "JNE,s fast\n\t"
10696 "SUB ESP,8\n\t"
10697 "MOVSD [ESP],$src\n\t"
10698 "FLD_D [ESP]\n\t"
10699 "ADD ESP,8\n\t"
10700 "CALL d2l_wrapper\n"
10701 "fast:" %}
10702 ins_encode %{
10703 Label fast;
10704 __ subptr(rsp, 8);
10705 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10706 __ fld_d(Address(rsp, 0));
10707 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10708 __ fistp_d(Address(rsp, 0));
10709 // Restore the rounding mode, mask the exception
10710 if (Compile::current()->in_24_bit_fp_mode()) {
10711 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10712 } else {
10713 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10714 }
10715 // Load the converted long, adjust CPU stack
10716 __ pop(rax);
10717 __ pop(rdx);
10718 __ cmpl(rdx, 0x80000000);
10719 __ jccb(Assembler::notEqual, fast);
10720 __ testl(rax, rax);
10721 __ jccb(Assembler::notEqual, fast);
10722 __ subptr(rsp, 8);
10723 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10724 __ fld_d(Address(rsp, 0));
10725 __ addptr(rsp, 8);
10726 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10727 __ post_call_nop();
10728 __ bind(fast);
10729 %}
10730 ins_pipe( pipe_slow );
10731 %}
10732
10733 // Convert a double to an int. Java semantics require we do complex
10734 // manglations in the corner cases. So we set the rounding mode to
10735 // 'zero', store the darned double down as an int, and reset the
10736 // rounding mode to 'nearest'. The hardware stores a flag value down
10737 // if we would overflow or converted a NAN; we check for this and
10738 // and go the slow path if needed.
10739 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10740 predicate(UseSSE==0);
10741 match(Set dst (ConvF2I src));
10742 effect( KILL tmp, KILL cr );
10743 format %{ "FLD $src\t# Convert float to int \n\t"
10744 "FLDCW trunc mode\n\t"
10745 "SUB ESP,4\n\t"
10746 "FISTp [ESP + #0]\n\t"
10747 "FLDCW std/24-bit mode\n\t"
10748 "POP EAX\n\t"
10749 "CMP EAX,0x80000000\n\t"
10750 "JNE,s fast\n\t"
10751 "FLD $src\n\t"
10752 "CALL d2i_wrapper\n"
10753 "fast:" %}
10754 // DPR2I_encoding works for FPR2I
10755 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10756 ins_pipe( pipe_slow );
10757 %}
10758
10759 // Convert a float in xmm to an int reg.
10760 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10761 predicate(UseSSE>=1);
10762 match(Set dst (ConvF2I src));
10763 effect( KILL tmp, KILL cr );
10764 format %{ "CVTTSS2SI $dst, $src\n\t"
10765 "CMP $dst,0x80000000\n\t"
10766 "JNE,s fast\n\t"
10767 "SUB ESP, 4\n\t"
10768 "MOVSS [ESP], $src\n\t"
10769 "FLD [ESP]\n\t"
10770 "ADD ESP, 4\n\t"
10771 "CALL d2i_wrapper\n"
10772 "fast:" %}
10773 ins_encode %{
10774 Label fast;
10775 __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10776 __ cmpl($dst$$Register, 0x80000000);
10777 __ jccb(Assembler::notEqual, fast);
10778 __ subptr(rsp, 4);
10779 __ movflt(Address(rsp, 0), $src$$XMMRegister);
10780 __ fld_s(Address(rsp, 0));
10781 __ addptr(rsp, 4);
10782 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10783 __ post_call_nop();
10784 __ bind(fast);
10785 %}
10786 ins_pipe( pipe_slow );
10787 %}
10788
10789 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10790 predicate(UseSSE==0);
10791 match(Set dst (ConvF2L src));
10792 effect( KILL cr );
10793 format %{ "FLD $src\t# Convert float to long\n\t"
10794 "FLDCW trunc mode\n\t"
10795 "SUB ESP,8\n\t"
10796 "FISTp [ESP + #0]\n\t"
10797 "FLDCW std/24-bit mode\n\t"
10798 "POP EAX\n\t"
10799 "POP EDX\n\t"
10800 "CMP EDX,0x80000000\n\t"
10801 "JNE,s fast\n\t"
10802 "TEST EAX,EAX\n\t"
10803 "JNE,s fast\n\t"
10804 "FLD $src\n\t"
10805 "CALL d2l_wrapper\n"
10806 "fast:" %}
10807 // DPR2L_encoding works for FPR2L
10808 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10809 ins_pipe( pipe_slow );
10810 %}
10811
10812 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10813 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10814 predicate (UseSSE>=1);
10815 match(Set dst (ConvF2L src));
10816 effect( KILL cr );
10817 format %{ "SUB ESP,8\t# Convert float to long\n\t"
10818 "MOVSS [ESP],$src\n\t"
10819 "FLD_S [ESP]\n\t"
10820 "FLDCW trunc mode\n\t"
10821 "FISTp [ESP + #0]\n\t"
10822 "FLDCW std/24-bit mode\n\t"
10823 "POP EAX\n\t"
10824 "POP EDX\n\t"
10825 "CMP EDX,0x80000000\n\t"
10826 "JNE,s fast\n\t"
10827 "TEST EAX,EAX\n\t"
10828 "JNE,s fast\n\t"
10829 "SUB ESP,4\t# Convert float to long\n\t"
10830 "MOVSS [ESP],$src\n\t"
10831 "FLD_S [ESP]\n\t"
10832 "ADD ESP,4\n\t"
10833 "CALL d2l_wrapper\n"
10834 "fast:" %}
10835 ins_encode %{
10836 Label fast;
10837 __ subptr(rsp, 8);
10838 __ movflt(Address(rsp, 0), $src$$XMMRegister);
10839 __ fld_s(Address(rsp, 0));
10840 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10841 __ fistp_d(Address(rsp, 0));
10842 // Restore the rounding mode, mask the exception
10843 if (Compile::current()->in_24_bit_fp_mode()) {
10844 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10845 } else {
10846 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10847 }
10848 // Load the converted long, adjust CPU stack
10849 __ pop(rax);
10850 __ pop(rdx);
10851 __ cmpl(rdx, 0x80000000);
10852 __ jccb(Assembler::notEqual, fast);
10853 __ testl(rax, rax);
10854 __ jccb(Assembler::notEqual, fast);
10855 __ subptr(rsp, 4);
10856 __ movflt(Address(rsp, 0), $src$$XMMRegister);
10857 __ fld_s(Address(rsp, 0));
10858 __ addptr(rsp, 4);
10859 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10860 __ post_call_nop();
10861 __ bind(fast);
10862 %}
10863 ins_pipe( pipe_slow );
10864 %}
10865
10866 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10867 predicate( UseSSE<=1 );
10868 match(Set dst (ConvI2D src));
10869 format %{ "FILD $src\n\t"
10870 "FSTP $dst" %}
10871 opcode(0xDB, 0x0); /* DB /0 */
10872 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10873 ins_pipe( fpu_reg_mem );
10874 %}
10875
10876 instruct convI2D_reg(regD dst, rRegI src) %{
10877 predicate( UseSSE>=2 && !UseXmmI2D );
10878 match(Set dst (ConvI2D src));
10879 format %{ "CVTSI2SD $dst,$src" %}
10880 ins_encode %{
10881 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10882 %}
10883 ins_pipe( pipe_slow );
10884 %}
10885
10886 instruct convI2D_mem(regD dst, memory mem) %{
10887 predicate( UseSSE>=2 );
10888 match(Set dst (ConvI2D (LoadI mem)));
10889 format %{ "CVTSI2SD $dst,$mem" %}
10890 ins_encode %{
10891 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10892 %}
10893 ins_pipe( pipe_slow );
10894 %}
10895
10896 instruct convXI2D_reg(regD dst, rRegI src)
10897 %{
10898 predicate( UseSSE>=2 && UseXmmI2D );
10899 match(Set dst (ConvI2D src));
10900
10901 format %{ "MOVD $dst,$src\n\t"
10902 "CVTDQ2PD $dst,$dst\t# i2d" %}
10903 ins_encode %{
10904 __ movdl($dst$$XMMRegister, $src$$Register);
10905 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10906 %}
10907 ins_pipe(pipe_slow); // XXX
10908 %}
10909
10910 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10911 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10912 match(Set dst (ConvI2D (LoadI mem)));
10913 format %{ "FILD $mem\n\t"
10914 "FSTP $dst" %}
10915 opcode(0xDB); /* DB /0 */
10916 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10917 Pop_Reg_DPR(dst), ClearInstMark);
10918 ins_pipe( fpu_reg_mem );
10919 %}
10920
10921 // Convert a byte to a float; no rounding step needed.
10922 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10923 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10924 match(Set dst (ConvI2F src));
10925 format %{ "FILD $src\n\t"
10926 "FSTP $dst" %}
10927
10928 opcode(0xDB, 0x0); /* DB /0 */
10929 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10930 ins_pipe( fpu_reg_mem );
10931 %}
10932
10933 // In 24-bit mode, force exponent rounding by storing back out
10934 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10935 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10936 match(Set dst (ConvI2F src));
10937 ins_cost(200);
10938 format %{ "FILD $src\n\t"
10939 "FSTP_S $dst" %}
10940 opcode(0xDB, 0x0); /* DB /0 */
10941 ins_encode( Push_Mem_I(src),
10942 Pop_Mem_FPR(dst));
10943 ins_pipe( fpu_mem_mem );
10944 %}
10945
10946 // In 24-bit mode, force exponent rounding by storing back out
10947 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10948 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10949 match(Set dst (ConvI2F (LoadI mem)));
10950 ins_cost(200);
10951 format %{ "FILD $mem\n\t"
10952 "FSTP_S $dst" %}
10953 opcode(0xDB); /* DB /0 */
10954 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10955 Pop_Mem_FPR(dst), ClearInstMark);
10956 ins_pipe( fpu_mem_mem );
10957 %}
10958
10959 // This instruction does not round to 24-bits
10960 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10961 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10962 match(Set dst (ConvI2F src));
10963 format %{ "FILD $src\n\t"
10964 "FSTP $dst" %}
10965 opcode(0xDB, 0x0); /* DB /0 */
10966 ins_encode( Push_Mem_I(src),
10967 Pop_Reg_FPR(dst));
10968 ins_pipe( fpu_reg_mem );
10969 %}
10970
10971 // This instruction does not round to 24-bits
10972 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10973 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10974 match(Set dst (ConvI2F (LoadI mem)));
10975 format %{ "FILD $mem\n\t"
10976 "FSTP $dst" %}
10977 opcode(0xDB); /* DB /0 */
10978 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10979 Pop_Reg_FPR(dst), ClearInstMark);
10980 ins_pipe( fpu_reg_mem );
10981 %}
10982
10983 // Convert an int to a float in xmm; no rounding step needed.
10984 instruct convI2F_reg(regF dst, rRegI src) %{
10985 predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
10986 match(Set dst (ConvI2F src));
10987 format %{ "CVTSI2SS $dst, $src" %}
10988 ins_encode %{
10989 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10990 %}
10991 ins_pipe( pipe_slow );
10992 %}
10993
10994 instruct convXI2F_reg(regF dst, rRegI src)
10995 %{
10996 predicate( UseSSE>=2 && UseXmmI2F );
10997 match(Set dst (ConvI2F src));
10998
10999 format %{ "MOVD $dst,$src\n\t"
11000 "CVTDQ2PS $dst,$dst\t# i2f" %}
11001 ins_encode %{
11002 __ movdl($dst$$XMMRegister, $src$$Register);
11003 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11004 %}
11005 ins_pipe(pipe_slow); // XXX
11006 %}
11007
11008 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11009 match(Set dst (ConvI2L src));
11010 effect(KILL cr);
11011 ins_cost(375);
11012 format %{ "MOV $dst.lo,$src\n\t"
11013 "MOV $dst.hi,$src\n\t"
11014 "SAR $dst.hi,31" %}
11015 ins_encode(convert_int_long(dst,src));
11016 ins_pipe( ialu_reg_reg_long );
11017 %}
11018
11019 // Zero-extend convert int to long
11020 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11021 match(Set dst (AndL (ConvI2L src) mask) );
11022 effect( KILL flags );
11023 ins_cost(250);
11024 format %{ "MOV $dst.lo,$src\n\t"
11025 "XOR $dst.hi,$dst.hi" %}
11026 opcode(0x33); // XOR
11027 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11028 ins_pipe( ialu_reg_reg_long );
11029 %}
11030
11031 // Zero-extend long
11032 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11033 match(Set dst (AndL src mask) );
11034 effect( KILL flags );
11035 ins_cost(250);
11036 format %{ "MOV $dst.lo,$src.lo\n\t"
11037 "XOR $dst.hi,$dst.hi\n\t" %}
11038 opcode(0x33); // XOR
11039 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11040 ins_pipe( ialu_reg_reg_long );
11041 %}
11042
11043 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11044 predicate (UseSSE<=1);
11045 match(Set dst (ConvL2D src));
11046 effect( KILL cr );
11047 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11048 "PUSH $src.lo\n\t"
11049 "FILD ST,[ESP + #0]\n\t"
11050 "ADD ESP,8\n\t"
11051 "FSTP_D $dst\t# D-round" %}
11052 opcode(0xDF, 0x5); /* DF /5 */
11053 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11054 ins_pipe( pipe_slow );
11055 %}
11056
11057 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11058 predicate (UseSSE>=2);
11059 match(Set dst (ConvL2D src));
11060 effect( KILL cr );
11061 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11062 "PUSH $src.lo\n\t"
11063 "FILD_D [ESP]\n\t"
11064 "FSTP_D [ESP]\n\t"
11065 "MOVSD $dst,[ESP]\n\t"
11066 "ADD ESP,8" %}
11067 opcode(0xDF, 0x5); /* DF /5 */
11068 ins_encode(convert_long_double2(src), Push_ResultD(dst));
11069 ins_pipe( pipe_slow );
11070 %}
11071
11072 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11073 predicate (UseSSE>=1);
11074 match(Set dst (ConvL2F src));
11075 effect( KILL cr );
11076 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11077 "PUSH $src.lo\n\t"
11078 "FILD_D [ESP]\n\t"
11079 "FSTP_S [ESP]\n\t"
11080 "MOVSS $dst,[ESP]\n\t"
11081 "ADD ESP,8" %}
11082 opcode(0xDF, 0x5); /* DF /5 */
11083 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11084 ins_pipe( pipe_slow );
11085 %}
11086
11087 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11088 match(Set dst (ConvL2F src));
11089 effect( KILL cr );
11090 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11091 "PUSH $src.lo\n\t"
11092 "FILD ST,[ESP + #0]\n\t"
11093 "ADD ESP,8\n\t"
11094 "FSTP_S $dst\t# F-round" %}
11095 opcode(0xDF, 0x5); /* DF /5 */
11096 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11097 ins_pipe( pipe_slow );
11098 %}
11099
11100 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11101 match(Set dst (ConvL2I src));
11102 effect( DEF dst, USE src );
11103 format %{ "MOV $dst,$src.lo" %}
11104 ins_encode(enc_CopyL_Lo(dst,src));
11105 ins_pipe( ialu_reg_reg );
11106 %}
11107
11108 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11109 match(Set dst (MoveF2I src));
11110 effect( DEF dst, USE src );
11111 ins_cost(100);
11112 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
11113 ins_encode %{
11114 __ movl($dst$$Register, Address(rsp, $src$$disp));
11115 %}
11116 ins_pipe( ialu_reg_mem );
11117 %}
11118
11119 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11120 predicate(UseSSE==0);
11121 match(Set dst (MoveF2I src));
11122 effect( DEF dst, USE src );
11123
11124 ins_cost(125);
11125 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
11126 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11127 ins_pipe( fpu_mem_reg );
11128 %}
11129
11130 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11131 predicate(UseSSE>=1);
11132 match(Set dst (MoveF2I src));
11133 effect( DEF dst, USE src );
11134
11135 ins_cost(95);
11136 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
11137 ins_encode %{
11138 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11139 %}
11140 ins_pipe( pipe_slow );
11141 %}
11142
11143 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11144 predicate(UseSSE>=2);
11145 match(Set dst (MoveF2I src));
11146 effect( DEF dst, USE src );
11147 ins_cost(85);
11148 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
11149 ins_encode %{
11150 __ movdl($dst$$Register, $src$$XMMRegister);
11151 %}
11152 ins_pipe( pipe_slow );
11153 %}
11154
11155 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11156 match(Set dst (MoveI2F src));
11157 effect( DEF dst, USE src );
11158
11159 ins_cost(100);
11160 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
11161 ins_encode %{
11162 __ movl(Address(rsp, $dst$$disp), $src$$Register);
11163 %}
11164 ins_pipe( ialu_mem_reg );
11165 %}
11166
11167
11168 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11169 predicate(UseSSE==0);
11170 match(Set dst (MoveI2F src));
11171 effect(DEF dst, USE src);
11172
11173 ins_cost(125);
11174 format %{ "FLD_S $src\n\t"
11175 "FSTP $dst\t# MoveI2F_stack_reg" %}
11176 opcode(0xD9); /* D9 /0, FLD m32real */
11177 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11178 Pop_Reg_FPR(dst), ClearInstMark );
11179 ins_pipe( fpu_reg_mem );
11180 %}
11181
11182 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11183 predicate(UseSSE>=1);
11184 match(Set dst (MoveI2F src));
11185 effect( DEF dst, USE src );
11186
11187 ins_cost(95);
11188 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
11189 ins_encode %{
11190 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11191 %}
11192 ins_pipe( pipe_slow );
11193 %}
11194
11195 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11196 predicate(UseSSE>=2);
11197 match(Set dst (MoveI2F src));
11198 effect( DEF dst, USE src );
11199
11200 ins_cost(85);
11201 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
11202 ins_encode %{
11203 __ movdl($dst$$XMMRegister, $src$$Register);
11204 %}
11205 ins_pipe( pipe_slow );
11206 %}
11207
11208 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11209 match(Set dst (MoveD2L src));
11210 effect(DEF dst, USE src);
11211
11212 ins_cost(250);
11213 format %{ "MOV $dst.lo,$src\n\t"
11214 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11215 opcode(0x8B, 0x8B);
11216 ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark);
11217 ins_pipe( ialu_mem_long_reg );
11218 %}
11219
11220 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11221 predicate(UseSSE<=1);
11222 match(Set dst (MoveD2L src));
11223 effect(DEF dst, USE src);
11224
11225 ins_cost(125);
11226 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
11227 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11228 ins_pipe( fpu_mem_reg );
11229 %}
11230
11231 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11232 predicate(UseSSE>=2);
11233 match(Set dst (MoveD2L src));
11234 effect(DEF dst, USE src);
11235 ins_cost(95);
11236 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
11237 ins_encode %{
11238 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11239 %}
11240 ins_pipe( pipe_slow );
11241 %}
11242
11243 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11244 predicate(UseSSE>=2);
11245 match(Set dst (MoveD2L src));
11246 effect(DEF dst, USE src, TEMP tmp);
11247 ins_cost(85);
11248 format %{ "MOVD $dst.lo,$src\n\t"
11249 "PSHUFLW $tmp,$src,0x4E\n\t"
11250 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11251 ins_encode %{
11252 __ movdl($dst$$Register, $src$$XMMRegister);
11253 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11254 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11255 %}
11256 ins_pipe( pipe_slow );
11257 %}
11258
11259 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11260 match(Set dst (MoveL2D src));
11261 effect(DEF dst, USE src);
11262
11263 ins_cost(200);
11264 format %{ "MOV $dst,$src.lo\n\t"
11265 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11266 opcode(0x89, 0x89);
11267 ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
11268 ins_pipe( ialu_mem_long_reg );
11269 %}
11270
11271
11272 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11273 predicate(UseSSE<=1);
11274 match(Set dst (MoveL2D src));
11275 effect(DEF dst, USE src);
11276 ins_cost(125);
11277
11278 format %{ "FLD_D $src\n\t"
11279 "FSTP $dst\t# MoveL2D_stack_reg" %}
11280 opcode(0xDD); /* DD /0, FLD m64real */
11281 ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11282 Pop_Reg_DPR(dst), ClearInstMark );
11283 ins_pipe( fpu_reg_mem );
11284 %}
11285
11286
11287 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11288 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11289 match(Set dst (MoveL2D src));
11290 effect(DEF dst, USE src);
11291
11292 ins_cost(95);
11293 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11294 ins_encode %{
11295 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11296 %}
11297 ins_pipe( pipe_slow );
11298 %}
11299
11300 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11301 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11302 match(Set dst (MoveL2D src));
11303 effect(DEF dst, USE src);
11304
11305 ins_cost(95);
11306 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11307 ins_encode %{
11308 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11309 %}
11310 ins_pipe( pipe_slow );
11311 %}
11312
11313 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11314 predicate(UseSSE>=2);
11315 match(Set dst (MoveL2D src));
11316 effect(TEMP dst, USE src, TEMP tmp);
11317 ins_cost(85);
11318 format %{ "MOVD $dst,$src.lo\n\t"
11319 "MOVD $tmp,$src.hi\n\t"
11320 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11321 ins_encode %{
11322 __ movdl($dst$$XMMRegister, $src$$Register);
11323 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11324 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11325 %}
11326 ins_pipe( pipe_slow );
11327 %}
11328
11329 //----------------------------- CompressBits/ExpandBits ------------------------
11330
11331 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11332 predicate(n->bottom_type()->isa_long());
11333 match(Set dst (CompressBits src mask));
11334 effect(TEMP rtmp, TEMP xtmp, KILL cr);
11335 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11336 ins_encode %{
11337 Label exit, partail_result;
11338 // Parallely extract both upper and lower 32 bits of source into destination register pair.
11339 // Merge the results of upper and lower destination registers such that upper destination
11340 // results are contiguously laid out after the lower destination result.
11341 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11342 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11343 __ popcntl($rtmp$$Register, $mask$$Register);
11344 // Skip merging if bit count of lower mask register is equal to 32 (register size).
11345 __ cmpl($rtmp$$Register, 32);
11346 __ jccb(Assembler::equal, exit);
11347 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11348 __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11349 // Shift left the contents of upper destination register by true bit count of lower mask register
11350 // and merge with lower destination register.
11351 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11352 __ orl($dst$$Register, $rtmp$$Register);
11353 __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11354 // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11355 // since contents of upper destination have already been copied to lower destination
11356 // register.
11357 __ cmpl($rtmp$$Register, 0);
11358 __ jccb(Assembler::greater, partail_result);
11359 __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11360 __ jmp(exit);
11361 __ bind(partail_result);
11362 // Perform right shift over upper destination register to move out bits already copied
11363 // to lower destination register.
11364 __ subl($rtmp$$Register, 32);
11365 __ negl($rtmp$$Register);
11366 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11367 __ bind(exit);
11368 %}
11369 ins_pipe( pipe_slow );
11370 %}
11371
11372 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11373 predicate(n->bottom_type()->isa_long());
11374 match(Set dst (ExpandBits src mask));
11375 effect(TEMP rtmp, TEMP xtmp, KILL cr);
11376 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11377 ins_encode %{
11378 // Extraction operation sequentially reads the bits from source register starting from LSB
11379 // and lays them out into destination register at bit locations corresponding to true bits
11380 // in mask register. Thus number of source bits read are equal to combined true bit count
11381 // of mask register pair.
11382 Label exit, mask_clipping;
11383 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11384 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11385 __ popcntl($rtmp$$Register, $mask$$Register);
11386 // If true bit count of lower mask register is 32 then none of bit of lower source register
11387 // will feed to upper destination register.
11388 __ cmpl($rtmp$$Register, 32);
11389 __ jccb(Assembler::equal, exit);
11390 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11391 __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11392 // Shift right the contents of lower source register to remove already consumed bits.
11393 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11394 // Extract the bits from lower source register starting from LSB under the influence
11395 // of upper mask register.
11396 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11397 __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11398 __ subl($rtmp$$Register, 32);
11399 __ negl($rtmp$$Register);
11400 __ movdl($xtmp$$XMMRegister, $mask$$Register);
11401 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11402 // Clear the set bits in upper mask register which have been used to extract the contents
11403 // from lower source register.
11404 __ bind(mask_clipping);
11405 __ blsrl($mask$$Register, $mask$$Register);
11406 __ decrementl($rtmp$$Register, 1);
11407 __ jccb(Assembler::greater, mask_clipping);
11408 // Starting from LSB extract the bits from upper source register under the influence of
11409 // remaining set bits in upper mask register.
11410 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11411 // Merge the partial results extracted from lower and upper source register bits.
11412 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11413 __ movdl($mask$$Register, $xtmp$$XMMRegister);
11414 __ bind(exit);
11415 %}
11416 ins_pipe( pipe_slow );
11417 %}
11418
11419 // =======================================================================
11420 // Fast clearing of an array
11421 // Small non-constant length ClearArray for non-AVX512 targets.
11422 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11423 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11424 match(Set dummy (ClearArray cnt base));
11425 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11426
11427 format %{ $$template
11428 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11429 $$emit$$"CMP InitArrayShortSize,rcx\n\t"
11430 $$emit$$"JG LARGE\n\t"
11431 $$emit$$"SHL ECX, 1\n\t"
11432 $$emit$$"DEC ECX\n\t"
11433 $$emit$$"JS DONE\t# Zero length\n\t"
11434 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
11435 $$emit$$"DEC ECX\n\t"
11436 $$emit$$"JGE LOOP\n\t"
11437 $$emit$$"JMP DONE\n\t"
11438 $$emit$$"# LARGE:\n\t"
11439 if (UseFastStosb) {
11440 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11441 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11442 } else if (UseXMMForObjInit) {
11443 $$emit$$"MOV RDI,RAX\n\t"
11444 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11445 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11446 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11447 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11448 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11449 $$emit$$"ADD 0x40,RAX\n\t"
11450 $$emit$$"# L_zero_64_bytes:\n\t"
11451 $$emit$$"SUB 0x8,RCX\n\t"
11452 $$emit$$"JGE L_loop\n\t"
11453 $$emit$$"ADD 0x4,RCX\n\t"
11454 $$emit$$"JL L_tail\n\t"
11455 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11456 $$emit$$"ADD 0x20,RAX\n\t"
11457 $$emit$$"SUB 0x4,RCX\n\t"
11458 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11459 $$emit$$"ADD 0x4,RCX\n\t"
11460 $$emit$$"JLE L_end\n\t"
11461 $$emit$$"DEC RCX\n\t"
11462 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11463 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11464 $$emit$$"ADD 0x8,RAX\n\t"
11465 $$emit$$"DEC RCX\n\t"
11466 $$emit$$"JGE L_sloop\n\t"
11467 $$emit$$"# L_end:\n\t"
11468 } else {
11469 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11470 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11471 }
11472 $$emit$$"# DONE"
11473 %}
11474 ins_encode %{
11475 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11476 $tmp$$XMMRegister, false, knoreg);
11477 %}
11478 ins_pipe( pipe_slow );
11479 %}
11480
11481 // Small non-constant length ClearArray for AVX512 targets.
11482 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11483 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11484 match(Set dummy (ClearArray cnt base));
11485 ins_cost(125);
11486 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11487
11488 format %{ $$template
11489 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11490 $$emit$$"CMP InitArrayShortSize,rcx\n\t"
11491 $$emit$$"JG LARGE\n\t"
11492 $$emit$$"SHL ECX, 1\n\t"
11493 $$emit$$"DEC ECX\n\t"
11494 $$emit$$"JS DONE\t# Zero length\n\t"
11495 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
11496 $$emit$$"DEC ECX\n\t"
11497 $$emit$$"JGE LOOP\n\t"
11498 $$emit$$"JMP DONE\n\t"
11499 $$emit$$"# LARGE:\n\t"
11500 if (UseFastStosb) {
11501 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11502 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11503 } else if (UseXMMForObjInit) {
11504 $$emit$$"MOV RDI,RAX\n\t"
11505 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11506 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11507 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11508 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11509 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11510 $$emit$$"ADD 0x40,RAX\n\t"
11511 $$emit$$"# L_zero_64_bytes:\n\t"
11512 $$emit$$"SUB 0x8,RCX\n\t"
11513 $$emit$$"JGE L_loop\n\t"
11514 $$emit$$"ADD 0x4,RCX\n\t"
11515 $$emit$$"JL L_tail\n\t"
11516 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11517 $$emit$$"ADD 0x20,RAX\n\t"
11518 $$emit$$"SUB 0x4,RCX\n\t"
11519 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11520 $$emit$$"ADD 0x4,RCX\n\t"
11521 $$emit$$"JLE L_end\n\t"
11522 $$emit$$"DEC RCX\n\t"
11523 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11524 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11525 $$emit$$"ADD 0x8,RAX\n\t"
11526 $$emit$$"DEC RCX\n\t"
11527 $$emit$$"JGE L_sloop\n\t"
11528 $$emit$$"# L_end:\n\t"
11529 } else {
11530 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11531 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11532 }
11533 $$emit$$"# DONE"
11534 %}
11535 ins_encode %{
11536 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11537 $tmp$$XMMRegister, false, $ktmp$$KRegister);
11538 %}
11539 ins_pipe( pipe_slow );
11540 %}
11541
11542 // Large non-constant length ClearArray for non-AVX512 targets.
11543 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11544 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11545 match(Set dummy (ClearArray cnt base));
11546 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11547 format %{ $$template
11548 if (UseFastStosb) {
11549 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11550 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11551 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11552 } else if (UseXMMForObjInit) {
11553 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
11554 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11555 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11556 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11557 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11558 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11559 $$emit$$"ADD 0x40,RAX\n\t"
11560 $$emit$$"# L_zero_64_bytes:\n\t"
11561 $$emit$$"SUB 0x8,RCX\n\t"
11562 $$emit$$"JGE L_loop\n\t"
11563 $$emit$$"ADD 0x4,RCX\n\t"
11564 $$emit$$"JL L_tail\n\t"
11565 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11566 $$emit$$"ADD 0x20,RAX\n\t"
11567 $$emit$$"SUB 0x4,RCX\n\t"
11568 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11569 $$emit$$"ADD 0x4,RCX\n\t"
11570 $$emit$$"JLE L_end\n\t"
11571 $$emit$$"DEC RCX\n\t"
11572 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11573 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11574 $$emit$$"ADD 0x8,RAX\n\t"
11575 $$emit$$"DEC RCX\n\t"
11576 $$emit$$"JGE L_sloop\n\t"
11577 $$emit$$"# L_end:\n\t"
11578 } else {
11579 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11580 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11581 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11582 }
11583 $$emit$$"# DONE"
11584 %}
11585 ins_encode %{
11586 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11587 $tmp$$XMMRegister, true, knoreg);
11588 %}
11589 ins_pipe( pipe_slow );
11590 %}
11591
11592 // Large non-constant length ClearArray for AVX512 targets.
11593 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11594 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11595 match(Set dummy (ClearArray cnt base));
11596 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11597 format %{ $$template
11598 if (UseFastStosb) {
11599 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11600 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11601 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11602 } else if (UseXMMForObjInit) {
11603 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
11604 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11605 $$emit$$"JMPQ L_zero_64_bytes\n\t"
11606 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11607 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11608 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11609 $$emit$$"ADD 0x40,RAX\n\t"
11610 $$emit$$"# L_zero_64_bytes:\n\t"
11611 $$emit$$"SUB 0x8,RCX\n\t"
11612 $$emit$$"JGE L_loop\n\t"
11613 $$emit$$"ADD 0x4,RCX\n\t"
11614 $$emit$$"JL L_tail\n\t"
11615 $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11616 $$emit$$"ADD 0x20,RAX\n\t"
11617 $$emit$$"SUB 0x4,RCX\n\t"
11618 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11619 $$emit$$"ADD 0x4,RCX\n\t"
11620 $$emit$$"JLE L_end\n\t"
11621 $$emit$$"DEC RCX\n\t"
11622 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11623 $$emit$$"VMOVQ XMM0,(RAX)\n\t"
11624 $$emit$$"ADD 0x8,RAX\n\t"
11625 $$emit$$"DEC RCX\n\t"
11626 $$emit$$"JGE L_sloop\n\t"
11627 $$emit$$"# L_end:\n\t"
11628 } else {
11629 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11630 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11631 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11632 }
11633 $$emit$$"# DONE"
11634 %}
11635 ins_encode %{
11636 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11637 $tmp$$XMMRegister, true, $ktmp$$KRegister);
11638 %}
11639 ins_pipe( pipe_slow );
11640 %}
11641
11642 // Small constant length ClearArray for AVX512 targets.
11643 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11644 %{
11645 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
11646 match(Set dummy (ClearArray cnt base));
11647 ins_cost(100);
11648 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11649 format %{ "clear_mem_imm $base , $cnt \n\t" %}
11650 ins_encode %{
11651 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11652 %}
11653 ins_pipe(pipe_slow);
11654 %}
11655
11656 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11657 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11658 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11659 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11660 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11661
11662 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11663 ins_encode %{
11664 __ string_compare($str1$$Register, $str2$$Register,
11665 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11666 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11667 %}
11668 ins_pipe( pipe_slow );
11669 %}
11670
11671 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11672 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11673 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11674 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11675 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11676
11677 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11678 ins_encode %{
11679 __ string_compare($str1$$Register, $str2$$Register,
11680 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11681 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11682 %}
11683 ins_pipe( pipe_slow );
11684 %}
11685
11686 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11687 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11688 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11689 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11690 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11691
11692 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11693 ins_encode %{
11694 __ string_compare($str1$$Register, $str2$$Register,
11695 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11696 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11697 %}
11698 ins_pipe( pipe_slow );
11699 %}
11700
11701 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11702 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11703 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11704 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11705 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11706
11707 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11708 ins_encode %{
11709 __ string_compare($str1$$Register, $str2$$Register,
11710 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11711 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11712 %}
11713 ins_pipe( pipe_slow );
11714 %}
11715
11716 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11717 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11718 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11719 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11720 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11721
11722 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11723 ins_encode %{
11724 __ string_compare($str1$$Register, $str2$$Register,
11725 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11726 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11727 %}
11728 ins_pipe( pipe_slow );
11729 %}
11730
11731 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11732 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11733 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11734 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11735 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11736
11737 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11738 ins_encode %{
11739 __ string_compare($str1$$Register, $str2$$Register,
11740 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11741 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11742 %}
11743 ins_pipe( pipe_slow );
11744 %}
11745
11746 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11747 eAXRegI result, regD tmp1, eFlagsReg cr) %{
11748 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11749 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11750 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11751
11752 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11753 ins_encode %{
11754 __ string_compare($str2$$Register, $str1$$Register,
11755 $cnt2$$Register, $cnt1$$Register, $result$$Register,
11756 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11757 %}
11758 ins_pipe( pipe_slow );
11759 %}
11760
11761 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11762 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11763 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11764 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11765 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11766
11767 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11768 ins_encode %{
11769 __ string_compare($str2$$Register, $str1$$Register,
11770 $cnt2$$Register, $cnt1$$Register, $result$$Register,
11771 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11772 %}
11773 ins_pipe( pipe_slow );
11774 %}
11775
11776 // fast string equals
11777 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11778 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11779 predicate(!VM_Version::supports_avx512vlbw());
11780 match(Set result (StrEquals (Binary str1 str2) cnt));
11781 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11782
11783 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
11784 ins_encode %{
11785 __ arrays_equals(false, $str1$$Register, $str2$$Register,
11786 $cnt$$Register, $result$$Register, $tmp3$$Register,
11787 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11788 %}
11789
11790 ins_pipe( pipe_slow );
11791 %}
11792
11793 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11794 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11795 predicate(VM_Version::supports_avx512vlbw());
11796 match(Set result (StrEquals (Binary str1 str2) cnt));
11797 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11798
11799 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
11800 ins_encode %{
11801 __ arrays_equals(false, $str1$$Register, $str2$$Register,
11802 $cnt$$Register, $result$$Register, $tmp3$$Register,
11803 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11804 %}
11805
11806 ins_pipe( pipe_slow );
11807 %}
11808
11809
11810 // fast search of substring with known size.
11811 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11812 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11813 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11814 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11815 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11816
11817 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11818 ins_encode %{
11819 int icnt2 = (int)$int_cnt2$$constant;
11820 if (icnt2 >= 16) {
11821 // IndexOf for constant substrings with size >= 16 elements
11822 // which don't need to be loaded through stack.
11823 __ string_indexofC8($str1$$Register, $str2$$Register,
11824 $cnt1$$Register, $cnt2$$Register,
11825 icnt2, $result$$Register,
11826 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11827 } else {
11828 // Small strings are loaded through stack if they cross page boundary.
11829 __ string_indexof($str1$$Register, $str2$$Register,
11830 $cnt1$$Register, $cnt2$$Register,
11831 icnt2, $result$$Register,
11832 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11833 }
11834 %}
11835 ins_pipe( pipe_slow );
11836 %}
11837
11838 // fast search of substring with known size.
11839 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11840 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11841 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11842 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11843 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11844
11845 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11846 ins_encode %{
11847 int icnt2 = (int)$int_cnt2$$constant;
11848 if (icnt2 >= 8) {
11849 // IndexOf for constant substrings with size >= 8 elements
11850 // which don't need to be loaded through stack.
11851 __ string_indexofC8($str1$$Register, $str2$$Register,
11852 $cnt1$$Register, $cnt2$$Register,
11853 icnt2, $result$$Register,
11854 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11855 } else {
11856 // Small strings are loaded through stack if they cross page boundary.
11857 __ string_indexof($str1$$Register, $str2$$Register,
11858 $cnt1$$Register, $cnt2$$Register,
11859 icnt2, $result$$Register,
11860 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11861 }
11862 %}
11863 ins_pipe( pipe_slow );
11864 %}
11865
11866 // fast search of substring with known size.
11867 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11868 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11869 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11870 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11871 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11872
11873 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11874 ins_encode %{
11875 int icnt2 = (int)$int_cnt2$$constant;
11876 if (icnt2 >= 8) {
11877 // IndexOf for constant substrings with size >= 8 elements
11878 // which don't need to be loaded through stack.
11879 __ string_indexofC8($str1$$Register, $str2$$Register,
11880 $cnt1$$Register, $cnt2$$Register,
11881 icnt2, $result$$Register,
11882 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11883 } else {
11884 // Small strings are loaded through stack if they cross page boundary.
11885 __ string_indexof($str1$$Register, $str2$$Register,
11886 $cnt1$$Register, $cnt2$$Register,
11887 icnt2, $result$$Register,
11888 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11889 }
11890 %}
11891 ins_pipe( pipe_slow );
11892 %}
11893
11894 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11895 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11896 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11897 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11898 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11899
11900 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
11901 ins_encode %{
11902 __ string_indexof($str1$$Register, $str2$$Register,
11903 $cnt1$$Register, $cnt2$$Register,
11904 (-1), $result$$Register,
11905 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11906 %}
11907 ins_pipe( pipe_slow );
11908 %}
11909
11910 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11911 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11912 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11913 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11914 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11915
11916 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
11917 ins_encode %{
11918 __ string_indexof($str1$$Register, $str2$$Register,
11919 $cnt1$$Register, $cnt2$$Register,
11920 (-1), $result$$Register,
11921 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11922 %}
11923 ins_pipe( pipe_slow );
11924 %}
11925
11926 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11927 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11928 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11929 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11930 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11931
11932 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
11933 ins_encode %{
11934 __ string_indexof($str1$$Register, $str2$$Register,
11935 $cnt1$$Register, $cnt2$$Register,
11936 (-1), $result$$Register,
11937 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11938 %}
11939 ins_pipe( pipe_slow );
11940 %}
11941
11942 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11943 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11944 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11945 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11946 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11947 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
11948 ins_encode %{
11949 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11950 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11951 %}
11952 ins_pipe( pipe_slow );
11953 %}
11954
11955 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11956 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11957 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11958 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11959 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11960 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
11961 ins_encode %{
11962 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11963 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11964 %}
11965 ins_pipe( pipe_slow );
11966 %}
11967
11968
11969 // fast array equals
11970 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11971 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11972 %{
11973 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11974 match(Set result (AryEq ary1 ary2));
11975 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11976 //ins_cost(300);
11977
11978 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11979 ins_encode %{
11980 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11981 $tmp3$$Register, $result$$Register, $tmp4$$Register,
11982 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11983 %}
11984 ins_pipe( pipe_slow );
11985 %}
11986
11987 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11988 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11989 %{
11990 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11991 match(Set result (AryEq ary1 ary2));
11992 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11993 //ins_cost(300);
11994
11995 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11996 ins_encode %{
11997 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11998 $tmp3$$Register, $result$$Register, $tmp4$$Register,
11999 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12000 %}
12001 ins_pipe( pipe_slow );
12002 %}
12003
12004 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12005 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12006 %{
12007 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12008 match(Set result (AryEq ary1 ary2));
12009 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12010 //ins_cost(300);
12011
12012 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12013 ins_encode %{
12014 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12015 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12016 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12017 %}
12018 ins_pipe( pipe_slow );
12019 %}
12020
12021 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12022 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12023 %{
12024 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12025 match(Set result (AryEq ary1 ary2));
12026 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12027 //ins_cost(300);
12028
12029 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12030 ins_encode %{
12031 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12032 $tmp3$$Register, $result$$Register, $tmp4$$Register,
12033 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12034 %}
12035 ins_pipe( pipe_slow );
12036 %}
12037
12038 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12039 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12040 %{
12041 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12042 match(Set result (CountPositives ary1 len));
12043 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12044
12045 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12046 ins_encode %{
12047 __ count_positives($ary1$$Register, $len$$Register,
12048 $result$$Register, $tmp3$$Register,
12049 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12050 %}
12051 ins_pipe( pipe_slow );
12052 %}
12053
12054 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12055 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12056 %{
12057 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12058 match(Set result (CountPositives ary1 len));
12059 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12060
12061 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12062 ins_encode %{
12063 __ count_positives($ary1$$Register, $len$$Register,
12064 $result$$Register, $tmp3$$Register,
12065 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12066 %}
12067 ins_pipe( pipe_slow );
12068 %}
12069
12070
12071 // fast char[] to byte[] compression
12072 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12073 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12074 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12075 match(Set result (StrCompressedCopy src (Binary dst len)));
12076 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12077
12078 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
12079 ins_encode %{
12080 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12081 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12082 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12083 knoreg, knoreg);
12084 %}
12085 ins_pipe( pipe_slow );
12086 %}
12087
12088 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12089 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12090 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12091 match(Set result (StrCompressedCopy src (Binary dst len)));
12092 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12093
12094 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
12095 ins_encode %{
12096 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12097 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12098 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12099 $ktmp1$$KRegister, $ktmp2$$KRegister);
12100 %}
12101 ins_pipe( pipe_slow );
12102 %}
12103
12104 // fast byte[] to char[] inflation
12105 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12106 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12107 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12108 match(Set dummy (StrInflatedCopy src (Binary dst len)));
12109 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12110
12111 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
12112 ins_encode %{
12113 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12114 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12115 %}
12116 ins_pipe( pipe_slow );
12117 %}
12118
12119 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12120 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12121 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12122 match(Set dummy (StrInflatedCopy src (Binary dst len)));
12123 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12124
12125 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
12126 ins_encode %{
12127 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12128 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12129 %}
12130 ins_pipe( pipe_slow );
12131 %}
12132
12133 // encode char[] to byte[] in ISO_8859_1
12134 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12135 regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12136 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12137 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12138 match(Set result (EncodeISOArray src (Binary dst len)));
12139 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12140
12141 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12142 ins_encode %{
12143 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12144 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12145 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12146 %}
12147 ins_pipe( pipe_slow );
12148 %}
12149
12150 // encode char[] to byte[] in ASCII
12151 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12152 regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12153 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12154 predicate(((EncodeISOArrayNode*)n)->is_ascii());
12155 match(Set result (EncodeISOArray src (Binary dst len)));
12156 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12157
12158 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12159 ins_encode %{
12160 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12161 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12162 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12163 %}
12164 ins_pipe( pipe_slow );
12165 %}
12166
12167 //----------Control Flow Instructions------------------------------------------
12168 // Signed compare Instructions
12169 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12170 match(Set cr (CmpI op1 op2));
12171 effect( DEF cr, USE op1, USE op2 );
12172 format %{ "CMP $op1,$op2" %}
12173 opcode(0x3B); /* Opcode 3B /r */
12174 ins_encode( OpcP, RegReg( op1, op2) );
12175 ins_pipe( ialu_cr_reg_reg );
12176 %}
12177
12178 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12179 match(Set cr (CmpI op1 op2));
12180 effect( DEF cr, USE op1 );
12181 format %{ "CMP $op1,$op2" %}
12182 opcode(0x81,0x07); /* Opcode 81 /7 */
12183 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */
12184 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12185 ins_pipe( ialu_cr_reg_imm );
12186 %}
12187
12188 // Cisc-spilled version of cmpI_eReg
12189 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12190 match(Set cr (CmpI op1 (LoadI op2)));
12191
12192 format %{ "CMP $op1,$op2" %}
12193 ins_cost(500);
12194 opcode(0x3B); /* Opcode 3B /r */
12195 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12196 ins_pipe( ialu_cr_reg_mem );
12197 %}
12198
12199 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12200 match(Set cr (CmpI src zero));
12201 effect( DEF cr, USE src );
12202
12203 format %{ "TEST $src,$src" %}
12204 opcode(0x85);
12205 ins_encode( OpcP, RegReg( src, src ) );
12206 ins_pipe( ialu_cr_reg_imm );
12207 %}
12208
12209 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12210 match(Set cr (CmpI (AndI src con) zero));
12211
12212 format %{ "TEST $src,$con" %}
12213 opcode(0xF7,0x00);
12214 ins_encode( OpcP, RegOpc(src), Con32(con) );
12215 ins_pipe( ialu_cr_reg_imm );
12216 %}
12217
12218 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12219 match(Set cr (CmpI (AndI src mem) zero));
12220
12221 format %{ "TEST $src,$mem" %}
12222 opcode(0x85);
12223 ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
12224 ins_pipe( ialu_cr_reg_mem );
12225 %}
12226
12227 // Unsigned compare Instructions; really, same as signed except they
12228 // produce an eFlagsRegU instead of eFlagsReg.
12229 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12230 match(Set cr (CmpU op1 op2));
12231
12232 format %{ "CMPu $op1,$op2" %}
12233 opcode(0x3B); /* Opcode 3B /r */
12234 ins_encode( OpcP, RegReg( op1, op2) );
12235 ins_pipe( ialu_cr_reg_reg );
12236 %}
12237
12238 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12239 match(Set cr (CmpU op1 op2));
12240
12241 format %{ "CMPu $op1,$op2" %}
12242 opcode(0x81,0x07); /* Opcode 81 /7 */
12243 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12244 ins_pipe( ialu_cr_reg_imm );
12245 %}
12246
12247 // // Cisc-spilled version of cmpU_eReg
12248 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12249 match(Set cr (CmpU op1 (LoadI op2)));
12250
12251 format %{ "CMPu $op1,$op2" %}
12252 ins_cost(500);
12253 opcode(0x3B); /* Opcode 3B /r */
12254 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12255 ins_pipe( ialu_cr_reg_mem );
12256 %}
12257
12258 // // Cisc-spilled version of cmpU_eReg
12259 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12260 // match(Set cr (CmpU (LoadI op1) op2));
12261 //
12262 // format %{ "CMPu $op1,$op2" %}
12263 // ins_cost(500);
12264 // opcode(0x39); /* Opcode 39 /r */
12265 // ins_encode( OpcP, RegMem( op1, op2) );
12266 //%}
12267
12268 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12269 match(Set cr (CmpU src zero));
12270
12271 format %{ "TESTu $src,$src" %}
12272 opcode(0x85);
12273 ins_encode( OpcP, RegReg( src, src ) );
12274 ins_pipe( ialu_cr_reg_imm );
12275 %}
12276
12277 // Unsigned pointer compare Instructions
12278 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12279 match(Set cr (CmpP op1 op2));
12280
12281 format %{ "CMPu $op1,$op2" %}
12282 opcode(0x3B); /* Opcode 3B /r */
12283 ins_encode( OpcP, RegReg( op1, op2) );
12284 ins_pipe( ialu_cr_reg_reg );
12285 %}
12286
12287 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12288 match(Set cr (CmpP op1 op2));
12289
12290 format %{ "CMPu $op1,$op2" %}
12291 opcode(0x81,0x07); /* Opcode 81 /7 */
12292 ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark );
12293 ins_pipe( ialu_cr_reg_imm );
12294 %}
12295
12296 // // Cisc-spilled version of cmpP_eReg
12297 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12298 match(Set cr (CmpP op1 (LoadP op2)));
12299
12300 format %{ "CMPu $op1,$op2" %}
12301 ins_cost(500);
12302 opcode(0x3B); /* Opcode 3B /r */
12303 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12304 ins_pipe( ialu_cr_reg_mem );
12305 %}
12306
12307 // // Cisc-spilled version of cmpP_eReg
12308 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12309 // match(Set cr (CmpP (LoadP op1) op2));
12310 //
12311 // format %{ "CMPu $op1,$op2" %}
12312 // ins_cost(500);
12313 // opcode(0x39); /* Opcode 39 /r */
12314 // ins_encode( OpcP, RegMem( op1, op2) );
12315 //%}
12316
12317 // Compare raw pointer (used in out-of-heap check).
12318 // Only works because non-oop pointers must be raw pointers
12319 // and raw pointers have no anti-dependencies.
12320 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12321 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12322 match(Set cr (CmpP op1 (LoadP op2)));
12323
12324 format %{ "CMPu $op1,$op2" %}
12325 opcode(0x3B); /* Opcode 3B /r */
12326 ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12327 ins_pipe( ialu_cr_reg_mem );
12328 %}
12329
12330 //
12331 // This will generate a signed flags result. This should be ok
12332 // since any compare to a zero should be eq/neq.
12333 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12334 match(Set cr (CmpP src zero));
12335
12336 format %{ "TEST $src,$src" %}
12337 opcode(0x85);
12338 ins_encode( OpcP, RegReg( src, src ) );
12339 ins_pipe( ialu_cr_reg_imm );
12340 %}
12341
12342 // Cisc-spilled version of testP_reg
12343 // This will generate a signed flags result. This should be ok
12344 // since any compare to a zero should be eq/neq.
12345 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12346 match(Set cr (CmpP (LoadP op) zero));
12347
12348 format %{ "TEST $op,0xFFFFFFFF" %}
12349 ins_cost(500);
12350 opcode(0xF7); /* Opcode F7 /0 */
12351 ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark );
12352 ins_pipe( ialu_cr_reg_imm );
12353 %}
12354
12355 // Yanked all unsigned pointer compare operations.
12356 // Pointer compares are done with CmpP which is already unsigned.
12357
12358 //----------Max and Min--------------------------------------------------------
12359 // Min Instructions
12360 ////
12361 // *** Min and Max using the conditional move are slower than the
12362 // *** branch version on a Pentium III.
12363 // // Conditional move for min
12364 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12365 // effect( USE_DEF op2, USE op1, USE cr );
12366 // format %{ "CMOVlt $op2,$op1\t! min" %}
12367 // opcode(0x4C,0x0F);
12368 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12369 // ins_pipe( pipe_cmov_reg );
12370 //%}
12371 //
12372 //// Min Register with Register (P6 version)
12373 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12374 // predicate(VM_Version::supports_cmov() );
12375 // match(Set op2 (MinI op1 op2));
12376 // ins_cost(200);
12377 // expand %{
12378 // eFlagsReg cr;
12379 // compI_eReg(cr,op1,op2);
12380 // cmovI_reg_lt(op2,op1,cr);
12381 // %}
12382 //%}
12383
12384 // Min Register with Register (generic version)
12385 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12386 match(Set dst (MinI dst src));
12387 effect(KILL flags);
12388 ins_cost(300);
12389
12390 format %{ "MIN $dst,$src" %}
12391 opcode(0xCC);
12392 ins_encode( min_enc(dst,src) );
12393 ins_pipe( pipe_slow );
12394 %}
12395
12396 // Max Register with Register
12397 // *** Min and Max using the conditional move are slower than the
12398 // *** branch version on a Pentium III.
12399 // // Conditional move for max
12400 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12401 // effect( USE_DEF op2, USE op1, USE cr );
12402 // format %{ "CMOVgt $op2,$op1\t! max" %}
12403 // opcode(0x4F,0x0F);
12404 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12405 // ins_pipe( pipe_cmov_reg );
12406 //%}
12407 //
12408 // // Max Register with Register (P6 version)
12409 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12410 // predicate(VM_Version::supports_cmov() );
12411 // match(Set op2 (MaxI op1 op2));
12412 // ins_cost(200);
12413 // expand %{
12414 // eFlagsReg cr;
12415 // compI_eReg(cr,op1,op2);
12416 // cmovI_reg_gt(op2,op1,cr);
12417 // %}
12418 //%}
12419
12420 // Max Register with Register (generic version)
12421 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12422 match(Set dst (MaxI dst src));
12423 effect(KILL flags);
12424 ins_cost(300);
12425
12426 format %{ "MAX $dst,$src" %}
12427 opcode(0xCC);
12428 ins_encode( max_enc(dst,src) );
12429 ins_pipe( pipe_slow );
12430 %}
12431
12432 // ============================================================================
12433 // Counted Loop limit node which represents exact final iterator value.
12434 // Note: the resulting value should fit into integer range since
12435 // counted loops have limit check on overflow.
12436 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12437 match(Set limit (LoopLimit (Binary init limit) stride));
12438 effect(TEMP limit_hi, TEMP tmp, KILL flags);
12439 ins_cost(300);
12440
12441 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12442 ins_encode %{
12443 int strd = (int)$stride$$constant;
12444 assert(strd != 1 && strd != -1, "sanity");
12445 int m1 = (strd > 0) ? 1 : -1;
12446 // Convert limit to long (EAX:EDX)
12447 __ cdql();
12448 // Convert init to long (init:tmp)
12449 __ movl($tmp$$Register, $init$$Register);
12450 __ sarl($tmp$$Register, 31);
12451 // $limit - $init
12452 __ subl($limit$$Register, $init$$Register);
12453 __ sbbl($limit_hi$$Register, $tmp$$Register);
12454 // + ($stride - 1)
12455 if (strd > 0) {
12456 __ addl($limit$$Register, (strd - 1));
12457 __ adcl($limit_hi$$Register, 0);
12458 __ movl($tmp$$Register, strd);
12459 } else {
12460 __ addl($limit$$Register, (strd + 1));
12461 __ adcl($limit_hi$$Register, -1);
12462 __ lneg($limit_hi$$Register, $limit$$Register);
12463 __ movl($tmp$$Register, -strd);
12464 }
12465 // signed division: (EAX:EDX) / pos_stride
12466 __ idivl($tmp$$Register);
12467 if (strd < 0) {
12468 // restore sign
12469 __ negl($tmp$$Register);
12470 }
12471 // (EAX) * stride
12472 __ mull($tmp$$Register);
12473 // + init (ignore upper bits)
12474 __ addl($limit$$Register, $init$$Register);
12475 %}
12476 ins_pipe( pipe_slow );
12477 %}
12478
12479 // ============================================================================
12480 // Branch Instructions
12481 // Jump Table
12482 instruct jumpXtnd(rRegI switch_val) %{
12483 match(Jump switch_val);
12484 ins_cost(350);
12485 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %}
12486 ins_encode %{
12487 // Jump to Address(table_base + switch_reg)
12488 Address index(noreg, $switch_val$$Register, Address::times_1);
12489 __ jump(ArrayAddress($constantaddress, index), noreg);
12490 %}
12491 ins_pipe(pipe_jmp);
12492 %}
12493
12494 // Jump Direct - Label defines a relative address from JMP+1
12495 instruct jmpDir(label labl) %{
12496 match(Goto);
12497 effect(USE labl);
12498
12499 ins_cost(300);
12500 format %{ "JMP $labl" %}
12501 size(5);
12502 ins_encode %{
12503 Label* L = $labl$$label;
12504 __ jmp(*L, false); // Always long jump
12505 %}
12506 ins_pipe( pipe_jmp );
12507 %}
12508
12509 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12510 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12511 match(If cop cr);
12512 effect(USE labl);
12513
12514 ins_cost(300);
12515 format %{ "J$cop $labl" %}
12516 size(6);
12517 ins_encode %{
12518 Label* L = $labl$$label;
12519 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12520 %}
12521 ins_pipe( pipe_jcc );
12522 %}
12523
12524 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12525 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12526 match(CountedLoopEnd cop cr);
12527 effect(USE labl);
12528
12529 ins_cost(300);
12530 format %{ "J$cop $labl\t# Loop end" %}
12531 size(6);
12532 ins_encode %{
12533 Label* L = $labl$$label;
12534 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12535 %}
12536 ins_pipe( pipe_jcc );
12537 %}
12538
12539 // Jump Direct Conditional - using unsigned comparison
12540 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12541 match(If cop cmp);
12542 effect(USE labl);
12543
12544 ins_cost(300);
12545 format %{ "J$cop,u $labl" %}
12546 size(6);
12547 ins_encode %{
12548 Label* L = $labl$$label;
12549 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12550 %}
12551 ins_pipe(pipe_jcc);
12552 %}
12553
12554 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12555 match(If cop cmp);
12556 effect(USE labl);
12557
12558 ins_cost(200);
12559 format %{ "J$cop,u $labl" %}
12560 size(6);
12561 ins_encode %{
12562 Label* L = $labl$$label;
12563 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12564 %}
12565 ins_pipe(pipe_jcc);
12566 %}
12567
12568 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12569 match(If cop cmp);
12570 effect(USE labl);
12571
12572 ins_cost(200);
12573 format %{ $$template
12574 if ($cop$$cmpcode == Assembler::notEqual) {
12575 $$emit$$"JP,u $labl\n\t"
12576 $$emit$$"J$cop,u $labl"
12577 } else {
12578 $$emit$$"JP,u done\n\t"
12579 $$emit$$"J$cop,u $labl\n\t"
12580 $$emit$$"done:"
12581 }
12582 %}
12583 ins_encode %{
12584 Label* l = $labl$$label;
12585 if ($cop$$cmpcode == Assembler::notEqual) {
12586 __ jcc(Assembler::parity, *l, false);
12587 __ jcc(Assembler::notEqual, *l, false);
12588 } else if ($cop$$cmpcode == Assembler::equal) {
12589 Label done;
12590 __ jccb(Assembler::parity, done);
12591 __ jcc(Assembler::equal, *l, false);
12592 __ bind(done);
12593 } else {
12594 ShouldNotReachHere();
12595 }
12596 %}
12597 ins_pipe(pipe_jcc);
12598 %}
12599
12600 // ============================================================================
12601 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
12602 // array for an instance of the superklass. Set a hidden internal cache on a
12603 // hit (cache is checked with exposed code in gen_subtype_check()). Return
12604 // NZ for a miss or zero for a hit. The encoding ALSO sets flags.
12605 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12606 match(Set result (PartialSubtypeCheck sub super));
12607 effect( KILL rcx, KILL cr );
12608
12609 ins_cost(1100); // slightly larger than the next version
12610 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12611 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12612 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12613 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12614 "JNE,s miss\t\t# Missed: EDI not-zero\n\t"
12615 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12616 "XOR $result,$result\t\t Hit: EDI zero\n\t"
12617 "miss:\t" %}
12618
12619 opcode(0x1); // Force a XOR of EDI
12620 ins_encode( enc_PartialSubtypeCheck() );
12621 ins_pipe( pipe_slow );
12622 %}
12623
12624 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12625 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12626 effect( KILL rcx, KILL result );
12627
12628 ins_cost(1000);
12629 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12630 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12631 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12632 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12633 "JNE,s miss\t\t# Missed: flags NZ\n\t"
12634 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12635 "miss:\t" %}
12636
12637 opcode(0x0); // No need to XOR EDI
12638 ins_encode( enc_PartialSubtypeCheck() );
12639 ins_pipe( pipe_slow );
12640 %}
12641
12642 // ============================================================================
12643 // Branch Instructions -- short offset versions
12644 //
12645 // These instructions are used to replace jumps of a long offset (the default
12646 // match) with jumps of a shorter offset. These instructions are all tagged
12647 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12648 // match rules in general matching. Instead, the ADLC generates a conversion
12649 // method in the MachNode which can be used to do in-place replacement of the
12650 // long variant with the shorter variant. The compiler will determine if a
12651 // branch can be taken by the is_short_branch_offset() predicate in the machine
12652 // specific code section of the file.
12653
12654 // Jump Direct - Label defines a relative address from JMP+1
12655 instruct jmpDir_short(label labl) %{
12656 match(Goto);
12657 effect(USE labl);
12658
12659 ins_cost(300);
12660 format %{ "JMP,s $labl" %}
12661 size(2);
12662 ins_encode %{
12663 Label* L = $labl$$label;
12664 __ jmpb(*L);
12665 %}
12666 ins_pipe( pipe_jmp );
12667 ins_short_branch(1);
12668 %}
12669
12670 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12671 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12672 match(If cop cr);
12673 effect(USE labl);
12674
12675 ins_cost(300);
12676 format %{ "J$cop,s $labl" %}
12677 size(2);
12678 ins_encode %{
12679 Label* L = $labl$$label;
12680 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12681 %}
12682 ins_pipe( pipe_jcc );
12683 ins_short_branch(1);
12684 %}
12685
12686 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12687 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12688 match(CountedLoopEnd cop cr);
12689 effect(USE labl);
12690
12691 ins_cost(300);
12692 format %{ "J$cop,s $labl\t# Loop end" %}
12693 size(2);
12694 ins_encode %{
12695 Label* L = $labl$$label;
12696 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12697 %}
12698 ins_pipe( pipe_jcc );
12699 ins_short_branch(1);
12700 %}
12701
12702 // Jump Direct Conditional - using unsigned comparison
12703 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12704 match(If cop cmp);
12705 effect(USE labl);
12706
12707 ins_cost(300);
12708 format %{ "J$cop,us $labl" %}
12709 size(2);
12710 ins_encode %{
12711 Label* L = $labl$$label;
12712 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12713 %}
12714 ins_pipe( pipe_jcc );
12715 ins_short_branch(1);
12716 %}
12717
12718 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12719 match(If cop cmp);
12720 effect(USE labl);
12721
12722 ins_cost(300);
12723 format %{ "J$cop,us $labl" %}
12724 size(2);
12725 ins_encode %{
12726 Label* L = $labl$$label;
12727 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12728 %}
12729 ins_pipe( pipe_jcc );
12730 ins_short_branch(1);
12731 %}
12732
12733 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12734 match(If cop cmp);
12735 effect(USE labl);
12736
12737 ins_cost(300);
12738 format %{ $$template
12739 if ($cop$$cmpcode == Assembler::notEqual) {
12740 $$emit$$"JP,u,s $labl\n\t"
12741 $$emit$$"J$cop,u,s $labl"
12742 } else {
12743 $$emit$$"JP,u,s done\n\t"
12744 $$emit$$"J$cop,u,s $labl\n\t"
12745 $$emit$$"done:"
12746 }
12747 %}
12748 size(4);
12749 ins_encode %{
12750 Label* l = $labl$$label;
12751 if ($cop$$cmpcode == Assembler::notEqual) {
12752 __ jccb(Assembler::parity, *l);
12753 __ jccb(Assembler::notEqual, *l);
12754 } else if ($cop$$cmpcode == Assembler::equal) {
12755 Label done;
12756 __ jccb(Assembler::parity, done);
12757 __ jccb(Assembler::equal, *l);
12758 __ bind(done);
12759 } else {
12760 ShouldNotReachHere();
12761 }
12762 %}
12763 ins_pipe(pipe_jcc);
12764 ins_short_branch(1);
12765 %}
12766
12767 // ============================================================================
12768 // Long Compare
12769 //
12770 // Currently we hold longs in 2 registers. Comparing such values efficiently
12771 // is tricky. The flavor of compare used depends on whether we are testing
12772 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit.
12773 // The GE test is the negated LT test. The LE test can be had by commuting
12774 // the operands (yielding a GE test) and then negating; negate again for the
12775 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the
12776 // NE test is negated from that.
12777
12778 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12779 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
12780 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections
12781 // are collapsed internally in the ADLC's dfa-gen code. The match for
12782 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12783 // foo match ends up with the wrong leaf. One fix is to not match both
12784 // reg-reg and reg-zero forms of long-compare. This is unfortunate because
12785 // both forms beat the trinary form of long-compare and both are very useful
12786 // on Intel which has so few registers.
12787
12788 // Manifest a CmpL result in an integer register. Very painful.
12789 // This is the test to avoid.
12790 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12791 match(Set dst (CmpL3 src1 src2));
12792 effect( KILL flags );
12793 ins_cost(1000);
12794 format %{ "XOR $dst,$dst\n\t"
12795 "CMP $src1.hi,$src2.hi\n\t"
12796 "JLT,s m_one\n\t"
12797 "JGT,s p_one\n\t"
12798 "CMP $src1.lo,$src2.lo\n\t"
12799 "JB,s m_one\n\t"
12800 "JEQ,s done\n"
12801 "p_one:\tINC $dst\n\t"
12802 "JMP,s done\n"
12803 "m_one:\tDEC $dst\n"
12804 "done:" %}
12805 ins_encode %{
12806 Label p_one, m_one, done;
12807 __ xorptr($dst$$Register, $dst$$Register);
12808 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12809 __ jccb(Assembler::less, m_one);
12810 __ jccb(Assembler::greater, p_one);
12811 __ cmpl($src1$$Register, $src2$$Register);
12812 __ jccb(Assembler::below, m_one);
12813 __ jccb(Assembler::equal, done);
12814 __ bind(p_one);
12815 __ incrementl($dst$$Register);
12816 __ jmpb(done);
12817 __ bind(m_one);
12818 __ decrementl($dst$$Register);
12819 __ bind(done);
12820 %}
12821 ins_pipe( pipe_slow );
12822 %}
12823
12824 //======
12825 // Manifest a CmpL result in the normal flags. Only good for LT or GE
12826 // compares. Can be used for LE or GT compares by reversing arguments.
12827 // NOT GOOD FOR EQ/NE tests.
12828 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12829 match( Set flags (CmpL src zero ));
12830 ins_cost(100);
12831 format %{ "TEST $src.hi,$src.hi" %}
12832 opcode(0x85);
12833 ins_encode( OpcP, RegReg_Hi2( src, src ) );
12834 ins_pipe( ialu_cr_reg_reg );
12835 %}
12836
12837 // Manifest a CmpL result in the normal flags. Only good for LT or GE
12838 // compares. Can be used for LE or GT compares by reversing arguments.
12839 // NOT GOOD FOR EQ/NE tests.
12840 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12841 match( Set flags (CmpL src1 src2 ));
12842 effect( TEMP tmp );
12843 ins_cost(300);
12844 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12845 "MOV $tmp,$src1.hi\n\t"
12846 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %}
12847 ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12848 ins_pipe( ialu_cr_reg_reg );
12849 %}
12850
12851 // Long compares reg < zero/req OR reg >= zero/req.
12852 // Just a wrapper for a normal branch, plus the predicate test.
12853 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12854 match(If cmp flags);
12855 effect(USE labl);
12856 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12857 expand %{
12858 jmpCon(cmp,flags,labl); // JLT or JGE...
12859 %}
12860 %}
12861
12862 //======
12863 // Manifest a CmpUL result in the normal flags. Only good for LT or GE
12864 // compares. Can be used for LE or GT compares by reversing arguments.
12865 // NOT GOOD FOR EQ/NE tests.
12866 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12867 match(Set flags (CmpUL src zero));
12868 ins_cost(100);
12869 format %{ "TEST $src.hi,$src.hi" %}
12870 opcode(0x85);
12871 ins_encode(OpcP, RegReg_Hi2(src, src));
12872 ins_pipe(ialu_cr_reg_reg);
12873 %}
12874
12875 // Manifest a CmpUL result in the normal flags. Only good for LT or GE
12876 // compares. Can be used for LE or GT compares by reversing arguments.
12877 // NOT GOOD FOR EQ/NE tests.
12878 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12879 match(Set flags (CmpUL src1 src2));
12880 effect(TEMP tmp);
12881 ins_cost(300);
12882 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12883 "MOV $tmp,$src1.hi\n\t"
12884 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12885 ins_encode(long_cmp_flags2(src1, src2, tmp));
12886 ins_pipe(ialu_cr_reg_reg);
12887 %}
12888
12889 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12890 // Just a wrapper for a normal branch, plus the predicate test.
12891 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12892 match(If cmp flags);
12893 effect(USE labl);
12894 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12895 expand %{
12896 jmpCon(cmp, flags, labl); // JLT or JGE...
12897 %}
12898 %}
12899
12900 // Compare 2 longs and CMOVE longs.
12901 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12902 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12903 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12904 ins_cost(400);
12905 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12906 "CMOV$cmp $dst.hi,$src.hi" %}
12907 opcode(0x0F,0x40);
12908 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12909 ins_pipe( pipe_cmov_reg_long );
12910 %}
12911
12912 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12913 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12914 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12915 ins_cost(500);
12916 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12917 "CMOV$cmp $dst.hi,$src.hi" %}
12918 opcode(0x0F,0x40);
12919 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
12920 ins_pipe( pipe_cmov_reg_long );
12921 %}
12922
12923 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
12924 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12925 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12926 ins_cost(400);
12927 expand %{
12928 cmovLL_reg_LTGE(cmp, flags, dst, src);
12929 %}
12930 %}
12931
12932 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
12933 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12934 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12935 ins_cost(500);
12936 expand %{
12937 cmovLL_mem_LTGE(cmp, flags, dst, src);
12938 %}
12939 %}
12940
12941 // Compare 2 longs and CMOVE ints.
12942 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12943 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12944 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12945 ins_cost(200);
12946 format %{ "CMOV$cmp $dst,$src" %}
12947 opcode(0x0F,0x40);
12948 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12949 ins_pipe( pipe_cmov_reg );
12950 %}
12951
12952 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12953 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12954 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12955 ins_cost(250);
12956 format %{ "CMOV$cmp $dst,$src" %}
12957 opcode(0x0F,0x40);
12958 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
12959 ins_pipe( pipe_cmov_mem );
12960 %}
12961
12962 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
12963 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12964 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12965 ins_cost(200);
12966 expand %{
12967 cmovII_reg_LTGE(cmp, flags, dst, src);
12968 %}
12969 %}
12970
12971 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
12972 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12973 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12974 ins_cost(250);
12975 expand %{
12976 cmovII_mem_LTGE(cmp, flags, dst, src);
12977 %}
12978 %}
12979
12980 // Compare 2 longs and CMOVE ptrs.
12981 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12982 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12983 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12984 ins_cost(200);
12985 format %{ "CMOV$cmp $dst,$src" %}
12986 opcode(0x0F,0x40);
12987 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12988 ins_pipe( pipe_cmov_reg );
12989 %}
12990
12991 // Compare 2 unsigned longs and CMOVE ptrs.
12992 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
12993 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12994 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12995 ins_cost(200);
12996 expand %{
12997 cmovPP_reg_LTGE(cmp,flags,dst,src);
12998 %}
12999 %}
13000
13001 // Compare 2 longs and CMOVE doubles
13002 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13003 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13004 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13005 ins_cost(200);
13006 expand %{
13007 fcmovDPR_regS(cmp,flags,dst,src);
13008 %}
13009 %}
13010
13011 // Compare 2 longs and CMOVE doubles
13012 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13013 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13014 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13015 ins_cost(200);
13016 expand %{
13017 fcmovD_regS(cmp,flags,dst,src);
13018 %}
13019 %}
13020
13021 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13022 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13023 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13024 ins_cost(200);
13025 expand %{
13026 fcmovFPR_regS(cmp,flags,dst,src);
13027 %}
13028 %}
13029
13030 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13031 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13032 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13033 ins_cost(200);
13034 expand %{
13035 fcmovF_regS(cmp,flags,dst,src);
13036 %}
13037 %}
13038
13039 //======
13040 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13041 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13042 match( Set flags (CmpL src zero ));
13043 effect(TEMP tmp);
13044 ins_cost(200);
13045 format %{ "MOV $tmp,$src.lo\n\t"
13046 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13047 ins_encode( long_cmp_flags0( src, tmp ) );
13048 ins_pipe( ialu_reg_reg_long );
13049 %}
13050
13051 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13052 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13053 match( Set flags (CmpL src1 src2 ));
13054 ins_cost(200+300);
13055 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13056 "JNE,s skip\n\t"
13057 "CMP $src1.hi,$src2.hi\n\t"
13058 "skip:\t" %}
13059 ins_encode( long_cmp_flags1( src1, src2 ) );
13060 ins_pipe( ialu_cr_reg_reg );
13061 %}
13062
13063 // Long compare reg == zero/reg OR reg != zero/reg
13064 // Just a wrapper for a normal branch, plus the predicate test.
13065 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13066 match(If cmp flags);
13067 effect(USE labl);
13068 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13069 expand %{
13070 jmpCon(cmp,flags,labl); // JEQ or JNE...
13071 %}
13072 %}
13073
13074 //======
13075 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares.
13076 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13077 match(Set flags (CmpUL src zero));
13078 effect(TEMP tmp);
13079 ins_cost(200);
13080 format %{ "MOV $tmp,$src.lo\n\t"
13081 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13082 ins_encode(long_cmp_flags0(src, tmp));
13083 ins_pipe(ialu_reg_reg_long);
13084 %}
13085
13086 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares.
13087 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13088 match(Set flags (CmpUL src1 src2));
13089 ins_cost(200+300);
13090 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13091 "JNE,s skip\n\t"
13092 "CMP $src1.hi,$src2.hi\n\t"
13093 "skip:\t" %}
13094 ins_encode(long_cmp_flags1(src1, src2));
13095 ins_pipe(ialu_cr_reg_reg);
13096 %}
13097
13098 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13099 // Just a wrapper for a normal branch, plus the predicate test.
13100 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13101 match(If cmp flags);
13102 effect(USE labl);
13103 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13104 expand %{
13105 jmpCon(cmp, flags, labl); // JEQ or JNE...
13106 %}
13107 %}
13108
13109 // Compare 2 longs and CMOVE longs.
13110 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13111 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13112 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13113 ins_cost(400);
13114 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13115 "CMOV$cmp $dst.hi,$src.hi" %}
13116 opcode(0x0F,0x40);
13117 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13118 ins_pipe( pipe_cmov_reg_long );
13119 %}
13120
13121 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13122 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13123 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13124 ins_cost(500);
13125 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13126 "CMOV$cmp $dst.hi,$src.hi" %}
13127 opcode(0x0F,0x40);
13128 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13129 ins_pipe( pipe_cmov_reg_long );
13130 %}
13131
13132 // Compare 2 longs and CMOVE ints.
13133 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13134 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13135 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13136 ins_cost(200);
13137 format %{ "CMOV$cmp $dst,$src" %}
13138 opcode(0x0F,0x40);
13139 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13140 ins_pipe( pipe_cmov_reg );
13141 %}
13142
13143 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13144 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13145 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13146 ins_cost(250);
13147 format %{ "CMOV$cmp $dst,$src" %}
13148 opcode(0x0F,0x40);
13149 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13150 ins_pipe( pipe_cmov_mem );
13151 %}
13152
13153 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13154 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13155 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13156 ins_cost(200);
13157 expand %{
13158 cmovII_reg_EQNE(cmp, flags, dst, src);
13159 %}
13160 %}
13161
13162 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13163 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13164 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13165 ins_cost(250);
13166 expand %{
13167 cmovII_mem_EQNE(cmp, flags, dst, src);
13168 %}
13169 %}
13170
13171 // Compare 2 longs and CMOVE ptrs.
13172 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13173 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13174 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13175 ins_cost(200);
13176 format %{ "CMOV$cmp $dst,$src" %}
13177 opcode(0x0F,0x40);
13178 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13179 ins_pipe( pipe_cmov_reg );
13180 %}
13181
13182 // Compare 2 unsigned longs and CMOVE ptrs.
13183 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13184 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13185 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13186 ins_cost(200);
13187 expand %{
13188 cmovPP_reg_EQNE(cmp,flags,dst,src);
13189 %}
13190 %}
13191
13192 // Compare 2 longs and CMOVE doubles
13193 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13194 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13195 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13196 ins_cost(200);
13197 expand %{
13198 fcmovDPR_regS(cmp,flags,dst,src);
13199 %}
13200 %}
13201
13202 // Compare 2 longs and CMOVE doubles
13203 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13204 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13205 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13206 ins_cost(200);
13207 expand %{
13208 fcmovD_regS(cmp,flags,dst,src);
13209 %}
13210 %}
13211
13212 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13213 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13214 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13215 ins_cost(200);
13216 expand %{
13217 fcmovFPR_regS(cmp,flags,dst,src);
13218 %}
13219 %}
13220
13221 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13222 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13223 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13224 ins_cost(200);
13225 expand %{
13226 fcmovF_regS(cmp,flags,dst,src);
13227 %}
13228 %}
13229
13230 //======
13231 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13232 // Same as cmpL_reg_flags_LEGT except must negate src
13233 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13234 match( Set flags (CmpL src zero ));
13235 effect( TEMP tmp );
13236 ins_cost(300);
13237 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13238 "CMP $tmp,$src.lo\n\t"
13239 "SBB $tmp,$src.hi\n\t" %}
13240 ins_encode( long_cmp_flags3(src, tmp) );
13241 ins_pipe( ialu_reg_reg_long );
13242 %}
13243
13244 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13245 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
13246 // requires a commuted test to get the same result.
13247 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13248 match( Set flags (CmpL src1 src2 ));
13249 effect( TEMP tmp );
13250 ins_cost(300);
13251 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13252 "MOV $tmp,$src2.hi\n\t"
13253 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %}
13254 ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13255 ins_pipe( ialu_cr_reg_reg );
13256 %}
13257
13258 // Long compares reg < zero/req OR reg >= zero/req.
13259 // Just a wrapper for a normal branch, plus the predicate test
13260 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13261 match(If cmp flags);
13262 effect(USE labl);
13263 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13264 ins_cost(300);
13265 expand %{
13266 jmpCon(cmp,flags,labl); // JGT or JLE...
13267 %}
13268 %}
13269
13270 //======
13271 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares.
13272 // Same as cmpUL_reg_flags_LEGT except must negate src
13273 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13274 match(Set flags (CmpUL src zero));
13275 effect(TEMP tmp);
13276 ins_cost(300);
13277 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13278 "CMP $tmp,$src.lo\n\t"
13279 "SBB $tmp,$src.hi\n\t" %}
13280 ins_encode(long_cmp_flags3(src, tmp));
13281 ins_pipe(ialu_reg_reg_long);
13282 %}
13283
13284 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares.
13285 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands
13286 // requires a commuted test to get the same result.
13287 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13288 match(Set flags (CmpUL src1 src2));
13289 effect(TEMP tmp);
13290 ins_cost(300);
13291 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13292 "MOV $tmp,$src2.hi\n\t"
13293 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13294 ins_encode(long_cmp_flags2( src2, src1, tmp));
13295 ins_pipe(ialu_cr_reg_reg);
13296 %}
13297
13298 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13299 // Just a wrapper for a normal branch, plus the predicate test
13300 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13301 match(If cmp flags);
13302 effect(USE labl);
13303 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13304 ins_cost(300);
13305 expand %{
13306 jmpCon(cmp, flags, labl); // JGT or JLE...
13307 %}
13308 %}
13309
13310 // Compare 2 longs and CMOVE longs.
13311 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13312 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13313 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13314 ins_cost(400);
13315 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13316 "CMOV$cmp $dst.hi,$src.hi" %}
13317 opcode(0x0F,0x40);
13318 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13319 ins_pipe( pipe_cmov_reg_long );
13320 %}
13321
13322 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13323 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13324 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13325 ins_cost(500);
13326 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13327 "CMOV$cmp $dst.hi,$src.hi+4" %}
13328 opcode(0x0F,0x40);
13329 ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13330 ins_pipe( pipe_cmov_reg_long );
13331 %}
13332
13333 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13334 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13335 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13336 ins_cost(400);
13337 expand %{
13338 cmovLL_reg_LEGT(cmp, flags, dst, src);
13339 %}
13340 %}
13341
13342 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13343 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13344 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13345 ins_cost(500);
13346 expand %{
13347 cmovLL_mem_LEGT(cmp, flags, dst, src);
13348 %}
13349 %}
13350
13351 // Compare 2 longs and CMOVE ints.
13352 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13353 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13354 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13355 ins_cost(200);
13356 format %{ "CMOV$cmp $dst,$src" %}
13357 opcode(0x0F,0x40);
13358 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13359 ins_pipe( pipe_cmov_reg );
13360 %}
13361
13362 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13363 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13364 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13365 ins_cost(250);
13366 format %{ "CMOV$cmp $dst,$src" %}
13367 opcode(0x0F,0x40);
13368 ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13369 ins_pipe( pipe_cmov_mem );
13370 %}
13371
13372 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13373 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13374 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13375 ins_cost(200);
13376 expand %{
13377 cmovII_reg_LEGT(cmp, flags, dst, src);
13378 %}
13379 %}
13380
13381 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13382 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13383 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13384 ins_cost(250);
13385 expand %{
13386 cmovII_mem_LEGT(cmp, flags, dst, src);
13387 %}
13388 %}
13389
13390 // Compare 2 longs and CMOVE ptrs.
13391 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13392 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13393 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13394 ins_cost(200);
13395 format %{ "CMOV$cmp $dst,$src" %}
13396 opcode(0x0F,0x40);
13397 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13398 ins_pipe( pipe_cmov_reg );
13399 %}
13400
13401 // Compare 2 unsigned longs and CMOVE ptrs.
13402 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13403 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13404 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13405 ins_cost(200);
13406 expand %{
13407 cmovPP_reg_LEGT(cmp,flags,dst,src);
13408 %}
13409 %}
13410
13411 // Compare 2 longs and CMOVE doubles
13412 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13413 predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13414 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13415 ins_cost(200);
13416 expand %{
13417 fcmovDPR_regS(cmp,flags,dst,src);
13418 %}
13419 %}
13420
13421 // Compare 2 longs and CMOVE doubles
13422 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13423 predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13424 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13425 ins_cost(200);
13426 expand %{
13427 fcmovD_regS(cmp,flags,dst,src);
13428 %}
13429 %}
13430
13431 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13432 predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13433 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13434 ins_cost(200);
13435 expand %{
13436 fcmovFPR_regS(cmp,flags,dst,src);
13437 %}
13438 %}
13439
13440
13441 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13442 predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13443 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13444 ins_cost(200);
13445 expand %{
13446 fcmovF_regS(cmp,flags,dst,src);
13447 %}
13448 %}
13449
13450
13451 // ============================================================================
13452 // Procedure Call/Return Instructions
13453 // Call Java Static Instruction
13454 // Note: If this code changes, the corresponding ret_addr_offset() and
13455 // compute_padding() functions will have to be adjusted.
13456 instruct CallStaticJavaDirect(method meth) %{
13457 match(CallStaticJava);
13458 effect(USE meth);
13459
13460 ins_cost(300);
13461 format %{ "CALL,static " %}
13462 opcode(0xE8); /* E8 cd */
13463 ins_encode( pre_call_resets,
13464 Java_Static_Call( meth ),
13465 call_epilog,
13466 post_call_FPU );
13467 ins_pipe( pipe_slow );
13468 ins_alignment(4);
13469 %}
13470
13471 // Call Java Dynamic Instruction
13472 // Note: If this code changes, the corresponding ret_addr_offset() and
13473 // compute_padding() functions will have to be adjusted.
13474 instruct CallDynamicJavaDirect(method meth) %{
13475 match(CallDynamicJava);
13476 effect(USE meth);
13477
13478 ins_cost(300);
13479 format %{ "MOV EAX,(oop)-1\n\t"
13480 "CALL,dynamic" %}
13481 opcode(0xE8); /* E8 cd */
13482 ins_encode( pre_call_resets,
13483 Java_Dynamic_Call( meth ),
13484 call_epilog,
13485 post_call_FPU );
13486 ins_pipe( pipe_slow );
13487 ins_alignment(4);
13488 %}
13489
13490 // Call Runtime Instruction
13491 instruct CallRuntimeDirect(method meth) %{
13492 match(CallRuntime );
13493 effect(USE meth);
13494
13495 ins_cost(300);
13496 format %{ "CALL,runtime " %}
13497 opcode(0xE8); /* E8 cd */
13498 // Use FFREEs to clear entries in float stack
13499 ins_encode( pre_call_resets,
13500 FFree_Float_Stack_All,
13501 Java_To_Runtime( meth ),
13502 post_call_FPU );
13503 ins_pipe( pipe_slow );
13504 %}
13505
13506 // Call runtime without safepoint
13507 instruct CallLeafDirect(method meth) %{
13508 match(CallLeaf);
13509 effect(USE meth);
13510
13511 ins_cost(300);
13512 format %{ "CALL_LEAF,runtime " %}
13513 opcode(0xE8); /* E8 cd */
13514 ins_encode( pre_call_resets,
13515 FFree_Float_Stack_All,
13516 Java_To_Runtime( meth ),
13517 Verify_FPU_For_Leaf, post_call_FPU );
13518 ins_pipe( pipe_slow );
13519 %}
13520
13521 instruct CallLeafNoFPDirect(method meth) %{
13522 match(CallLeafNoFP);
13523 effect(USE meth);
13524
13525 ins_cost(300);
13526 format %{ "CALL_LEAF_NOFP,runtime " %}
13527 opcode(0xE8); /* E8 cd */
13528 ins_encode(pre_call_resets, Java_To_Runtime(meth));
13529 ins_pipe( pipe_slow );
13530 %}
13531
13532
13533 // Return Instruction
13534 // Remove the return address & jump to it.
13535 instruct Ret() %{
13536 match(Return);
13537 format %{ "RET" %}
13538 opcode(0xC3);
13539 ins_encode(OpcP);
13540 ins_pipe( pipe_jmp );
13541 %}
13542
13543 // Tail Call; Jump from runtime stub to Java code.
13544 // Also known as an 'interprocedural jump'.
13545 // Target of jump will eventually return to caller.
13546 // TailJump below removes the return address.
13547 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been
13548 // emitted just above the TailCall which has reset ebp to the caller state.
13549 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13550 match(TailCall jump_target method_ptr);
13551 ins_cost(300);
13552 format %{ "JMP $jump_target \t# EBX holds method" %}
13553 opcode(0xFF, 0x4); /* Opcode FF /4 */
13554 ins_encode( OpcP, RegOpc(jump_target) );
13555 ins_pipe( pipe_jmp );
13556 %}
13557
13558
13559 // Tail Jump; remove the return address; jump to target.
13560 // TailCall above leaves the return address around.
13561 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13562 match( TailJump jump_target ex_oop );
13563 ins_cost(300);
13564 format %{ "POP EDX\t# pop return address into dummy\n\t"
13565 "JMP $jump_target " %}
13566 opcode(0xFF, 0x4); /* Opcode FF /4 */
13567 ins_encode( enc_pop_rdx,
13568 OpcP, RegOpc(jump_target) );
13569 ins_pipe( pipe_jmp );
13570 %}
13571
13572 // Forward exception.
13573 instruct ForwardExceptionjmp()
13574 %{
13575 match(ForwardException);
13576
13577 format %{ "JMP forward_exception_stub" %}
13578 ins_encode %{
13579 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
13580 %}
13581 ins_pipe(pipe_jmp);
13582 %}
13583
13584 // Create exception oop: created by stack-crawling runtime code.
13585 // Created exception is now available to this handler, and is setup
13586 // just prior to jumping to this handler. No code emitted.
13587 instruct CreateException( eAXRegP ex_oop )
13588 %{
13589 match(Set ex_oop (CreateEx));
13590
13591 size(0);
13592 // use the following format syntax
13593 format %{ "# exception oop is in EAX; no code emitted" %}
13594 ins_encode();
13595 ins_pipe( empty );
13596 %}
13597
13598
13599 // Rethrow exception:
13600 // The exception oop will come in the first argument position.
13601 // Then JUMP (not call) to the rethrow stub code.
13602 instruct RethrowException()
13603 %{
13604 match(Rethrow);
13605
13606 // use the following format syntax
13607 format %{ "JMP rethrow_stub" %}
13608 ins_encode(enc_rethrow);
13609 ins_pipe( pipe_jmp );
13610 %}
13611
13612 // inlined locking and unlocking
13613
13614 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13615 predicate(LockingMode != LM_LIGHTWEIGHT);
13616 match(Set cr (FastLock object box));
13617 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13618 ins_cost(300);
13619 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13620 ins_encode %{
13621 __ get_thread($thread$$Register);
13622 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13623 $scr$$Register, noreg, noreg, $thread$$Register, nullptr);
13624 %}
13625 ins_pipe(pipe_slow);
13626 %}
13627
13628 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13629 predicate(LockingMode != LM_LIGHTWEIGHT);
13630 match(Set cr (FastUnlock object box));
13631 effect(TEMP tmp, USE_KILL box);
13632 ins_cost(300);
13633 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13634 ins_encode %{
13635 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
13636 %}
13637 ins_pipe(pipe_slow);
13638 %}
13639
13640 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13641 predicate(LockingMode == LM_LIGHTWEIGHT);
13642 match(Set cr (FastLock object box));
13643 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13644 ins_cost(300);
13645 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13646 ins_encode %{
13647 __ get_thread($thread$$Register);
13648 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13649 %}
13650 ins_pipe(pipe_slow);
13651 %}
13652
13653 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13654 predicate(LockingMode == LM_LIGHTWEIGHT);
13655 match(Set cr (FastUnlock object eax_reg));
13656 effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13657 ins_cost(300);
13658 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13659 ins_encode %{
13660 __ get_thread($thread$$Register);
13661 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13662 %}
13663 ins_pipe(pipe_slow);
13664 %}
13665
13666 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13667 predicate(Matcher::vector_length(n) <= 32);
13668 match(Set dst (MaskAll src));
13669 format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13670 ins_encode %{
13671 int mask_len = Matcher::vector_length(this);
13672 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13673 %}
13674 ins_pipe( pipe_slow );
13675 %}
13676
13677 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13678 predicate(Matcher::vector_length(n) > 32);
13679 match(Set dst (MaskAll src));
13680 effect(TEMP ktmp);
13681 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13682 ins_encode %{
13683 int mask_len = Matcher::vector_length(this);
13684 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13685 %}
13686 ins_pipe( pipe_slow );
13687 %}
13688
13689 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13690 predicate(Matcher::vector_length(n) > 32);
13691 match(Set dst (MaskAll src));
13692 effect(TEMP ktmp);
13693 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13694 ins_encode %{
13695 int mask_len = Matcher::vector_length(this);
13696 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13697 %}
13698 ins_pipe( pipe_slow );
13699 %}
13700
13701 // ============================================================================
13702 // Safepoint Instruction
13703 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13704 match(SafePoint poll);
13705 effect(KILL cr, USE poll);
13706
13707 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %}
13708 ins_cost(125);
13709 // EBP would need size(3)
13710 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13711 ins_encode %{
13712 __ set_inst_mark();
13713 __ relocate(relocInfo::poll_type);
13714 __ clear_inst_mark();
13715 address pre_pc = __ pc();
13716 __ testl(rax, Address($poll$$Register, 0));
13717 address post_pc = __ pc();
13718 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13719 %}
13720 ins_pipe(ialu_reg_mem);
13721 %}
13722
13723
13724 // ============================================================================
13725 // This name is KNOWN by the ADLC and cannot be changed.
13726 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13727 // for this guy.
13728 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13729 match(Set dst (ThreadLocal));
13730 effect(DEF dst, KILL cr);
13731
13732 format %{ "MOV $dst, Thread::current()" %}
13733 ins_encode %{
13734 Register dstReg = as_Register($dst$$reg);
13735 __ get_thread(dstReg);
13736 %}
13737 ins_pipe( ialu_reg_fat );
13738 %}
13739
13740
13741
13742 //----------PEEPHOLE RULES-----------------------------------------------------
13743 // These must follow all instruction definitions as they use the names
13744 // defined in the instructions definitions.
13745 //
13746 // peepmatch ( root_instr_name [preceding_instruction]* );
13747 //
13748 // peepconstraint %{
13749 // (instruction_number.operand_name relational_op instruction_number.operand_name
13750 // [, ...] );
13751 // // instruction numbers are zero-based using left to right order in peepmatch
13752 //
13753 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
13754 // // provide an instruction_number.operand_name for each operand that appears
13755 // // in the replacement instruction's match rule
13756 //
13757 // ---------VM FLAGS---------------------------------------------------------
13758 //
13759 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13760 //
13761 // Each peephole rule is given an identifying number starting with zero and
13762 // increasing by one in the order seen by the parser. An individual peephole
13763 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13764 // on the command-line.
13765 //
13766 // ---------CURRENT LIMITATIONS----------------------------------------------
13767 //
13768 // Only match adjacent instructions in same basic block
13769 // Only equality constraints
13770 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13771 // Only one replacement instruction
13772 //
13773 // ---------EXAMPLE----------------------------------------------------------
13774 //
13775 // // pertinent parts of existing instructions in architecture description
13776 // instruct movI(rRegI dst, rRegI src) %{
13777 // match(Set dst (CopyI src));
13778 // %}
13779 //
13780 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13781 // match(Set dst (AddI dst src));
13782 // effect(KILL cr);
13783 // %}
13784 //
13785 // // Change (inc mov) to lea
13786 // peephole %{
13787 // // increment preceded by register-register move
13788 // peepmatch ( incI_eReg movI );
13789 // // require that the destination register of the increment
13790 // // match the destination register of the move
13791 // peepconstraint ( 0.dst == 1.dst );
13792 // // construct a replacement instruction that sets
13793 // // the destination to ( move's source register + one )
13794 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13795 // %}
13796 //
13797 // Implementation no longer uses movX instructions since
13798 // machine-independent system no longer uses CopyX nodes.
13799 //
13800 // peephole %{
13801 // peepmatch ( incI_eReg movI );
13802 // peepconstraint ( 0.dst == 1.dst );
13803 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13804 // %}
13805 //
13806 // peephole %{
13807 // peepmatch ( decI_eReg movI );
13808 // peepconstraint ( 0.dst == 1.dst );
13809 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13810 // %}
13811 //
13812 // peephole %{
13813 // peepmatch ( addI_eReg_imm movI );
13814 // peepconstraint ( 0.dst == 1.dst );
13815 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13816 // %}
13817 //
13818 // peephole %{
13819 // peepmatch ( addP_eReg_imm movP );
13820 // peepconstraint ( 0.dst == 1.dst );
13821 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13822 // %}
13823
13824 // // Change load of spilled value to only a spill
13825 // instruct storeI(memory mem, rRegI src) %{
13826 // match(Set mem (StoreI mem src));
13827 // %}
13828 //
13829 // instruct loadI(rRegI dst, memory mem) %{
13830 // match(Set dst (LoadI mem));
13831 // %}
13832 //
13833 peephole %{
13834 peepmatch ( loadI storeI );
13835 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13836 peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13837 %}
13838
13839 //----------SMARTSPILL RULES---------------------------------------------------
13840 // These must follow all instruction definitions as they use the names
13841 // defined in the instructions definitions.
--- EOF ---