1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 // If any floating point comparison instruction is used, unordered case always triggers jump
1703 // for below condition, CF=1 is true when at least one input is NaN
1704 Label done;
1705 __ movl(dst, -1);
1706 __ jcc(Assembler::below, done);
1707 __ setcc(Assembler::notEqual, dst);
1708 __ bind(done);
1709 }
1710
1711 // Math.min() # Math.max()
1712 // --------------------------
1713 // ucomis[s/d] #
1714 // ja -> b # a
1715 // jp -> NaN # NaN
1716 // jb -> a # b
1717 // je #
1718 // |-jz -> a | b # a & b
1719 // | -> a #
1720 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1721 XMMRegister a, XMMRegister b,
1722 XMMRegister xmmt, Register rt,
1723 bool min, bool single) {
1724
1725 Label nan, zero, below, above, done;
1726
1727 if (single)
1728 __ ucomiss(a, b);
1729 else
1730 __ ucomisd(a, b);
1731
1732 if (dst->encoding() != (min ? b : a)->encoding())
1733 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1734 else
1735 __ jccb(Assembler::above, done);
1736
1737 __ jccb(Assembler::parity, nan); // PF=1
1738 __ jccb(Assembler::below, below); // CF=1
1739
1740 // equal
1741 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1742 if (single) {
1743 __ ucomiss(a, xmmt);
1744 __ jccb(Assembler::equal, zero);
1745
1746 __ movflt(dst, a);
1747 __ jmp(done);
1748 }
1749 else {
1750 __ ucomisd(a, xmmt);
1751 __ jccb(Assembler::equal, zero);
1752
1753 __ movdbl(dst, a);
1754 __ jmp(done);
1755 }
1756
1757 __ bind(zero);
1758 if (min)
1759 __ vpor(dst, a, b, Assembler::AVX_128bit);
1760 else
1761 __ vpand(dst, a, b, Assembler::AVX_128bit);
1762
1763 __ jmp(done);
1764
1765 __ bind(above);
1766 if (single)
1767 __ movflt(dst, min ? b : a);
1768 else
1769 __ movdbl(dst, min ? b : a);
1770
1771 __ jmp(done);
1772
1773 __ bind(nan);
1774 if (single) {
1775 __ movl(rt, 0x7fc00000); // Float.NaN
1776 __ movdl(dst, rt);
1777 }
1778 else {
1779 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1780 __ movdq(dst, rt);
1781 }
1782 __ jmp(done);
1783
1784 __ bind(below);
1785 if (single)
1786 __ movflt(dst, min ? a : b);
1787 else
1788 __ movdbl(dst, min ? a : b);
1789
1790 __ bind(done);
1791 }
1792
1793 //=============================================================================
1794 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1795
1796 int ConstantTable::calculate_table_base_offset() const {
1797 return 0; // absolute addressing, no offset
1798 }
1799
1800 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1801 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1802 ShouldNotReachHere();
1803 }
1804
1805 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1806 // Empty encoding
1807 }
1808
1809 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1810 return 0;
1811 }
1812
1813 #ifndef PRODUCT
1814 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1815 st->print("# MachConstantBaseNode (empty encoding)");
1816 }
1817 #endif
1818
1819
1820 //=============================================================================
1821 #ifndef PRODUCT
1822 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1823 Compile* C = ra_->C;
1824
1825 int framesize = C->output()->frame_size_in_bytes();
1826 int bangsize = C->output()->bang_size_in_bytes();
1827 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1828 // Remove wordSize for return addr which is already pushed.
1829 framesize -= wordSize;
1830
1831 if (C->output()->need_stack_bang(bangsize)) {
1832 framesize -= wordSize;
1833 st->print("# stack bang (%d bytes)", bangsize);
1834 st->print("\n\t");
1835 st->print("pushq rbp\t# Save rbp");
1836 if (PreserveFramePointer) {
1837 st->print("\n\t");
1838 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1839 }
1840 if (framesize) {
1841 st->print("\n\t");
1842 st->print("subq rsp, #%d\t# Create frame",framesize);
1843 }
1844 } else {
1845 st->print("subq rsp, #%d\t# Create frame",framesize);
1846 st->print("\n\t");
1847 framesize -= wordSize;
1848 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1849 if (PreserveFramePointer) {
1850 st->print("\n\t");
1851 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1852 if (framesize > 0) {
1853 st->print("\n\t");
1854 st->print("addq rbp, #%d", framesize);
1855 }
1856 }
1857 }
1858
1859 if (VerifyStackAtCalls) {
1860 st->print("\n\t");
1861 framesize -= wordSize;
1862 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1863 #ifdef ASSERT
1864 st->print("\n\t");
1865 st->print("# stack alignment check");
1866 #endif
1867 }
1868 if (C->stub_function() != nullptr) {
1869 st->print("\n\t");
1870 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1871 st->print("\n\t");
1872 st->print("je fast_entry\t");
1873 st->print("\n\t");
1874 st->print("call #nmethod_entry_barrier_stub\t");
1875 st->print("\n\tfast_entry:");
1876 }
1877 st->cr();
1878 }
1879 #endif
1880
1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1882 Compile* C = ra_->C;
1883
1884 int framesize = C->output()->frame_size_in_bytes();
1885 int bangsize = C->output()->bang_size_in_bytes();
1886
1887 if (C->clinit_barrier_on_entry()) {
1888 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1889 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1890
1891 Label L_skip_barrier;
1892 Register klass = rscratch1;
1893
1894 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1895 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1896
1897 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1898
1899 __ bind(L_skip_barrier);
1900 }
1901
1902 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1915 {
1916 return MachNode::size(ra_); // too many variables; just compute it
1917 // the hard way
1918 }
1919
1920 int MachPrologNode::reloc() const
1921 {
1922 return 0; // a large enough number
1923 }
1924
1925 //=============================================================================
1926 #ifndef PRODUCT
1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1928 {
1929 Compile* C = ra_->C;
1930 if (generate_vzeroupper(C)) {
1931 st->print("vzeroupper");
1932 st->cr(); st->print("\t");
1933 }
1934
1935 int framesize = C->output()->frame_size_in_bytes();
1936 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1937 // Remove word for return adr already pushed
1938 // and RBP
1939 framesize -= 2*wordSize;
1940
1941 if (framesize) {
1942 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1943 st->print("\t");
1944 }
1945
1946 st->print_cr("popq rbp");
1947 if (do_polling() && C->is_method_compilation()) {
1948 st->print("\t");
1949 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1950 "ja #safepoint_stub\t"
1951 "# Safepoint: poll for GC");
1952 }
1953 }
1954 #endif
1955
1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1957 {
1958 Compile* C = ra_->C;
1959
1960 if (generate_vzeroupper(C)) {
1961 // Clear upper bits of YMM registers when current compiled code uses
1962 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1963 __ vzeroupper();
1964 }
1965
1966 int framesize = C->output()->frame_size_in_bytes();
1967 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1968 // Remove word for return adr already pushed
1969 // and RBP
1970 framesize -= 2*wordSize;
1971
1972 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1973
1974 if (framesize) {
1975 __ addq(rsp, framesize);
1976 }
1977
1978 __ popq(rbp);
1979
1980 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1981 __ reserved_stack_check();
1982 }
1983
1984 if (do_polling() && C->is_method_compilation()) {
1985 Label dummy_label;
1986 Label* code_stub = &dummy_label;
1987 if (!C->output()->in_scratch_emit_size()) {
1988 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1989 C->output()->add_stub(stub);
1990 code_stub = &stub->entry();
1991 }
1992 __ relocate(relocInfo::poll_return_type);
1993 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1994 }
1995 }
1996
1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1998 {
1999 return MachNode::size(ra_); // too many variables; just compute it
2000 // the hard way
2001 }
2002
2003 int MachEpilogNode::reloc() const
2004 {
2005 return 2; // a large enough number
2006 }
2007
2008 const Pipeline* MachEpilogNode::pipeline() const
2009 {
2010 return MachNode::pipeline_class();
2011 }
2012
2013 //=============================================================================
2014
2015 enum RC {
2016 rc_bad,
2017 rc_int,
2018 rc_kreg,
2019 rc_float,
2020 rc_stack
2021 };
2022
2023 static enum RC rc_class(OptoReg::Name reg)
2024 {
2025 if( !OptoReg::is_valid(reg) ) return rc_bad;
2026
2027 if (OptoReg::is_stack(reg)) return rc_stack;
2028
2029 VMReg r = OptoReg::as_VMReg(reg);
2030
2031 if (r->is_Register()) return rc_int;
2032
2033 if (r->is_KRegister()) return rc_kreg;
2034
2035 assert(r->is_XMMRegister(), "must be");
2036 return rc_float;
2037 }
2038
2039 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2040 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2041 int src_hi, int dst_hi, uint ireg, outputStream* st);
2042
2043 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2044 int stack_offset, int reg, uint ireg, outputStream* st);
2045
2046 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2047 int dst_offset, uint ireg, outputStream* st) {
2048 if (masm) {
2049 switch (ireg) {
2050 case Op_VecS:
2051 __ movq(Address(rsp, -8), rax);
2052 __ movl(rax, Address(rsp, src_offset));
2053 __ movl(Address(rsp, dst_offset), rax);
2054 __ movq(rax, Address(rsp, -8));
2055 break;
2056 case Op_VecD:
2057 __ pushq(Address(rsp, src_offset));
2058 __ popq (Address(rsp, dst_offset));
2059 break;
2060 case Op_VecX:
2061 __ pushq(Address(rsp, src_offset));
2062 __ popq (Address(rsp, dst_offset));
2063 __ pushq(Address(rsp, src_offset+8));
2064 __ popq (Address(rsp, dst_offset+8));
2065 break;
2066 case Op_VecY:
2067 __ vmovdqu(Address(rsp, -32), xmm0);
2068 __ vmovdqu(xmm0, Address(rsp, src_offset));
2069 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2070 __ vmovdqu(xmm0, Address(rsp, -32));
2071 break;
2072 case Op_VecZ:
2073 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2074 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2075 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2076 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2077 break;
2078 default:
2079 ShouldNotReachHere();
2080 }
2081 #ifndef PRODUCT
2082 } else {
2083 switch (ireg) {
2084 case Op_VecS:
2085 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2086 "movl rax, [rsp + #%d]\n\t"
2087 "movl [rsp + #%d], rax\n\t"
2088 "movq rax, [rsp - #8]",
2089 src_offset, dst_offset);
2090 break;
2091 case Op_VecD:
2092 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2093 "popq [rsp + #%d]",
2094 src_offset, dst_offset);
2095 break;
2096 case Op_VecX:
2097 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2098 "popq [rsp + #%d]\n\t"
2099 "pushq [rsp + #%d]\n\t"
2100 "popq [rsp + #%d]",
2101 src_offset, dst_offset, src_offset+8, dst_offset+8);
2102 break;
2103 case Op_VecY:
2104 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2105 "vmovdqu xmm0, [rsp + #%d]\n\t"
2106 "vmovdqu [rsp + #%d], xmm0\n\t"
2107 "vmovdqu xmm0, [rsp - #32]",
2108 src_offset, dst_offset);
2109 break;
2110 case Op_VecZ:
2111 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2112 "vmovdqu xmm0, [rsp + #%d]\n\t"
2113 "vmovdqu [rsp + #%d], xmm0\n\t"
2114 "vmovdqu xmm0, [rsp - #64]",
2115 src_offset, dst_offset);
2116 break;
2117 default:
2118 ShouldNotReachHere();
2119 }
2120 #endif
2121 }
2122 }
2123
2124 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2125 PhaseRegAlloc* ra_,
2126 bool do_size,
2127 outputStream* st) const {
2128 assert(masm != nullptr || st != nullptr, "sanity");
2129 // Get registers to move
2130 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2131 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2132 OptoReg::Name dst_second = ra_->get_reg_second(this);
2133 OptoReg::Name dst_first = ra_->get_reg_first(this);
2134
2135 enum RC src_second_rc = rc_class(src_second);
2136 enum RC src_first_rc = rc_class(src_first);
2137 enum RC dst_second_rc = rc_class(dst_second);
2138 enum RC dst_first_rc = rc_class(dst_first);
2139
2140 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2141 "must move at least 1 register" );
2142
2143 if (src_first == dst_first && src_second == dst_second) {
2144 // Self copy, no move
2145 return 0;
2146 }
2147 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2148 uint ireg = ideal_reg();
2149 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2150 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2151 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2152 // mem -> mem
2153 int src_offset = ra_->reg2offset(src_first);
2154 int dst_offset = ra_->reg2offset(dst_first);
2155 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2156 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2157 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2158 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2159 int stack_offset = ra_->reg2offset(dst_first);
2160 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2161 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2162 int stack_offset = ra_->reg2offset(src_first);
2163 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2164 } else {
2165 ShouldNotReachHere();
2166 }
2167 return 0;
2168 }
2169 if (src_first_rc == rc_stack) {
2170 // mem ->
2171 if (dst_first_rc == rc_stack) {
2172 // mem -> mem
2173 assert(src_second != dst_first, "overlap");
2174 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2175 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2176 // 64-bit
2177 int src_offset = ra_->reg2offset(src_first);
2178 int dst_offset = ra_->reg2offset(dst_first);
2179 if (masm) {
2180 __ pushq(Address(rsp, src_offset));
2181 __ popq (Address(rsp, dst_offset));
2182 #ifndef PRODUCT
2183 } else {
2184 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2185 "popq [rsp + #%d]",
2186 src_offset, dst_offset);
2187 #endif
2188 }
2189 } else {
2190 // 32-bit
2191 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2192 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2193 // No pushl/popl, so:
2194 int src_offset = ra_->reg2offset(src_first);
2195 int dst_offset = ra_->reg2offset(dst_first);
2196 if (masm) {
2197 __ movq(Address(rsp, -8), rax);
2198 __ movl(rax, Address(rsp, src_offset));
2199 __ movl(Address(rsp, dst_offset), rax);
2200 __ movq(rax, Address(rsp, -8));
2201 #ifndef PRODUCT
2202 } else {
2203 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2204 "movl rax, [rsp + #%d]\n\t"
2205 "movl [rsp + #%d], rax\n\t"
2206 "movq rax, [rsp - #8]",
2207 src_offset, dst_offset);
2208 #endif
2209 }
2210 }
2211 return 0;
2212 } else if (dst_first_rc == rc_int) {
2213 // mem -> gpr
2214 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2215 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2216 // 64-bit
2217 int offset = ra_->reg2offset(src_first);
2218 if (masm) {
2219 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2220 #ifndef PRODUCT
2221 } else {
2222 st->print("movq %s, [rsp + #%d]\t# spill",
2223 Matcher::regName[dst_first],
2224 offset);
2225 #endif
2226 }
2227 } else {
2228 // 32-bit
2229 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2230 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2231 int offset = ra_->reg2offset(src_first);
2232 if (masm) {
2233 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2234 #ifndef PRODUCT
2235 } else {
2236 st->print("movl %s, [rsp + #%d]\t# spill",
2237 Matcher::regName[dst_first],
2238 offset);
2239 #endif
2240 }
2241 }
2242 return 0;
2243 } else if (dst_first_rc == rc_float) {
2244 // mem-> xmm
2245 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2246 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2247 // 64-bit
2248 int offset = ra_->reg2offset(src_first);
2249 if (masm) {
2250 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2251 #ifndef PRODUCT
2252 } else {
2253 st->print("%s %s, [rsp + #%d]\t# spill",
2254 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2255 Matcher::regName[dst_first],
2256 offset);
2257 #endif
2258 }
2259 } else {
2260 // 32-bit
2261 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2262 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2263 int offset = ra_->reg2offset(src_first);
2264 if (masm) {
2265 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2266 #ifndef PRODUCT
2267 } else {
2268 st->print("movss %s, [rsp + #%d]\t# spill",
2269 Matcher::regName[dst_first],
2270 offset);
2271 #endif
2272 }
2273 }
2274 return 0;
2275 } else if (dst_first_rc == rc_kreg) {
2276 // mem -> kreg
2277 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2278 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2279 // 64-bit
2280 int offset = ra_->reg2offset(src_first);
2281 if (masm) {
2282 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2283 #ifndef PRODUCT
2284 } else {
2285 st->print("kmovq %s, [rsp + #%d]\t# spill",
2286 Matcher::regName[dst_first],
2287 offset);
2288 #endif
2289 }
2290 }
2291 return 0;
2292 }
2293 } else if (src_first_rc == rc_int) {
2294 // gpr ->
2295 if (dst_first_rc == rc_stack) {
2296 // gpr -> mem
2297 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2298 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2299 // 64-bit
2300 int offset = ra_->reg2offset(dst_first);
2301 if (masm) {
2302 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2303 #ifndef PRODUCT
2304 } else {
2305 st->print("movq [rsp + #%d], %s\t# spill",
2306 offset,
2307 Matcher::regName[src_first]);
2308 #endif
2309 }
2310 } else {
2311 // 32-bit
2312 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2313 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2314 int offset = ra_->reg2offset(dst_first);
2315 if (masm) {
2316 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2317 #ifndef PRODUCT
2318 } else {
2319 st->print("movl [rsp + #%d], %s\t# spill",
2320 offset,
2321 Matcher::regName[src_first]);
2322 #endif
2323 }
2324 }
2325 return 0;
2326 } else if (dst_first_rc == rc_int) {
2327 // gpr -> gpr
2328 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2329 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2330 // 64-bit
2331 if (masm) {
2332 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2333 as_Register(Matcher::_regEncode[src_first]));
2334 #ifndef PRODUCT
2335 } else {
2336 st->print("movq %s, %s\t# spill",
2337 Matcher::regName[dst_first],
2338 Matcher::regName[src_first]);
2339 #endif
2340 }
2341 return 0;
2342 } else {
2343 // 32-bit
2344 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2345 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2346 if (masm) {
2347 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2348 as_Register(Matcher::_regEncode[src_first]));
2349 #ifndef PRODUCT
2350 } else {
2351 st->print("movl %s, %s\t# spill",
2352 Matcher::regName[dst_first],
2353 Matcher::regName[src_first]);
2354 #endif
2355 }
2356 return 0;
2357 }
2358 } else if (dst_first_rc == rc_float) {
2359 // gpr -> xmm
2360 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2361 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2362 // 64-bit
2363 if (masm) {
2364 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2365 #ifndef PRODUCT
2366 } else {
2367 st->print("movdq %s, %s\t# spill",
2368 Matcher::regName[dst_first],
2369 Matcher::regName[src_first]);
2370 #endif
2371 }
2372 } else {
2373 // 32-bit
2374 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2375 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2376 if (masm) {
2377 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2378 #ifndef PRODUCT
2379 } else {
2380 st->print("movdl %s, %s\t# spill",
2381 Matcher::regName[dst_first],
2382 Matcher::regName[src_first]);
2383 #endif
2384 }
2385 }
2386 return 0;
2387 } else if (dst_first_rc == rc_kreg) {
2388 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2389 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2390 // 64-bit
2391 if (masm) {
2392 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2393 #ifndef PRODUCT
2394 } else {
2395 st->print("kmovq %s, %s\t# spill",
2396 Matcher::regName[dst_first],
2397 Matcher::regName[src_first]);
2398 #endif
2399 }
2400 }
2401 Unimplemented();
2402 return 0;
2403 }
2404 } else if (src_first_rc == rc_float) {
2405 // xmm ->
2406 if (dst_first_rc == rc_stack) {
2407 // xmm -> mem
2408 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2409 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2410 // 64-bit
2411 int offset = ra_->reg2offset(dst_first);
2412 if (masm) {
2413 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2414 #ifndef PRODUCT
2415 } else {
2416 st->print("movsd [rsp + #%d], %s\t# spill",
2417 offset,
2418 Matcher::regName[src_first]);
2419 #endif
2420 }
2421 } else {
2422 // 32-bit
2423 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2424 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2425 int offset = ra_->reg2offset(dst_first);
2426 if (masm) {
2427 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2428 #ifndef PRODUCT
2429 } else {
2430 st->print("movss [rsp + #%d], %s\t# spill",
2431 offset,
2432 Matcher::regName[src_first]);
2433 #endif
2434 }
2435 }
2436 return 0;
2437 } else if (dst_first_rc == rc_int) {
2438 // xmm -> gpr
2439 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2440 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2441 // 64-bit
2442 if (masm) {
2443 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2444 #ifndef PRODUCT
2445 } else {
2446 st->print("movdq %s, %s\t# spill",
2447 Matcher::regName[dst_first],
2448 Matcher::regName[src_first]);
2449 #endif
2450 }
2451 } else {
2452 // 32-bit
2453 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2454 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2455 if (masm) {
2456 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2457 #ifndef PRODUCT
2458 } else {
2459 st->print("movdl %s, %s\t# spill",
2460 Matcher::regName[dst_first],
2461 Matcher::regName[src_first]);
2462 #endif
2463 }
2464 }
2465 return 0;
2466 } else if (dst_first_rc == rc_float) {
2467 // xmm -> xmm
2468 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2469 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2470 // 64-bit
2471 if (masm) {
2472 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2473 #ifndef PRODUCT
2474 } else {
2475 st->print("%s %s, %s\t# spill",
2476 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2477 Matcher::regName[dst_first],
2478 Matcher::regName[src_first]);
2479 #endif
2480 }
2481 } else {
2482 // 32-bit
2483 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2484 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2485 if (masm) {
2486 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2487 #ifndef PRODUCT
2488 } else {
2489 st->print("%s %s, %s\t# spill",
2490 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2491 Matcher::regName[dst_first],
2492 Matcher::regName[src_first]);
2493 #endif
2494 }
2495 }
2496 return 0;
2497 } else if (dst_first_rc == rc_kreg) {
2498 assert(false, "Illegal spilling");
2499 return 0;
2500 }
2501 } else if (src_first_rc == rc_kreg) {
2502 if (dst_first_rc == rc_stack) {
2503 // mem -> kreg
2504 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2505 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2506 // 64-bit
2507 int offset = ra_->reg2offset(dst_first);
2508 if (masm) {
2509 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2510 #ifndef PRODUCT
2511 } else {
2512 st->print("kmovq [rsp + #%d] , %s\t# spill",
2513 offset,
2514 Matcher::regName[src_first]);
2515 #endif
2516 }
2517 }
2518 return 0;
2519 } else if (dst_first_rc == rc_int) {
2520 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2521 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2522 // 64-bit
2523 if (masm) {
2524 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2525 #ifndef PRODUCT
2526 } else {
2527 st->print("kmovq %s, %s\t# spill",
2528 Matcher::regName[dst_first],
2529 Matcher::regName[src_first]);
2530 #endif
2531 }
2532 }
2533 Unimplemented();
2534 return 0;
2535 } else if (dst_first_rc == rc_kreg) {
2536 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2537 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2538 // 64-bit
2539 if (masm) {
2540 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2541 #ifndef PRODUCT
2542 } else {
2543 st->print("kmovq %s, %s\t# spill",
2544 Matcher::regName[dst_first],
2545 Matcher::regName[src_first]);
2546 #endif
2547 }
2548 }
2549 return 0;
2550 } else if (dst_first_rc == rc_float) {
2551 assert(false, "Illegal spill");
2552 return 0;
2553 }
2554 }
2555
2556 assert(0," foo ");
2557 Unimplemented();
2558 return 0;
2559 }
2560
2561 #ifndef PRODUCT
2562 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2563 implementation(nullptr, ra_, false, st);
2564 }
2565 #endif
2566
2567 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2568 implementation(masm, ra_, false, nullptr);
2569 }
2570
2571 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2572 return MachNode::size(ra_);
2573 }
2574
2575 //=============================================================================
2576 #ifndef PRODUCT
2577 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2578 {
2579 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2580 int reg = ra_->get_reg_first(this);
2581 st->print("leaq %s, [rsp + #%d]\t# box lock",
2582 Matcher::regName[reg], offset);
2583 }
2584 #endif
2585
2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2589 int reg = ra_->get_encode(this);
2590
2591 __ lea(as_Register(reg), Address(rsp, offset));
2592 }
2593
2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2595 {
2596 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2597 if (ra_->get_encode(this) > 15) {
2598 return (offset < 0x80) ? 6 : 9; // REX2
2599 } else {
2600 return (offset < 0x80) ? 5 : 8; // REX
2601 }
2602 }
2603
2604 //=============================================================================
2605 #ifndef PRODUCT
2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2607 {
2608 if (UseCompressedClassPointers) {
2609 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2610 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2611 } else {
2612 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2613 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2614 }
2615 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2616 }
2617 #endif
2618
2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2620 {
2621 __ ic_check(InteriorEntryAlignment);
2622 }
2623
2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2625 {
2626 return MachNode::size(ra_); // too many variables; just compute it
2627 // the hard way
2628 }
2629
2630
2631 //=============================================================================
2632
2633 bool Matcher::supports_vector_calling_convention(void) {
2634 return EnableVectorSupport;
2635 }
2636
2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2638 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2639 }
2640
2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2642 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2643 }
2644
2645 #ifdef ASSERT
2646 static bool is_ndd_demotable(const MachNode* mdef) {
2647 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2648 }
2649 #endif
2650
2651 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2652 int oper_index) {
2653 if (mdef == nullptr) {
2654 return false;
2655 }
2656
2657 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2658 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2659 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2660 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2661 return false;
2662 }
2663
2664 // Complex memory operand covers multiple incoming edges needed for
2665 // address computation. Biasing def towards any address component will not
2666 // result in NDD demotion by assembler.
2667 if (mdef->operand_num_edges(oper_index) != 1) {
2668 return false;
2669 }
2670
2671 // Demotion candidate must be register mask compatible with definition.
2672 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2673 if (!oper_mask.overlap(mdef->out_RegMask())) {
2674 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2675 return false;
2676 }
2677
2678 switch (oper_index) {
2679 // First operand of MachNode corresponding to Intel APX NDD selection
2680 // pattern can share its assigned register with definition operand if
2681 // their live ranges do not overlap. In such a scenario we can demote
2682 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2683 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2684 // are decorated with a special flag by instruction selector.
2685 case 1:
2686 return is_ndd_demotable_opr1(mdef);
2687
2688 // Definition operand of commutative operation can be biased towards second
2689 // operand.
2690 case 2:
2691 return is_ndd_demotable_opr2(mdef);
2692
2693 // Current scheme only selects up to two biasing candidates
2694 default:
2695 assert(false, "unhandled operand index: %s", mdef->Name());
2696 break;
2697 }
2698
2699 return false;
2700 }
2701
2702 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2703 assert(EnableVectorSupport, "sanity");
2704 int lo = XMM0_num;
2705 int hi = XMM0b_num;
2706 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2707 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2708 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2709 return OptoRegPair(hi, lo);
2710 }
2711
2712 // Is this branch offset short enough that a short branch can be used?
2713 //
2714 // NOTE: If the platform does not provide any short branch variants, then
2715 // this method should return false for offset 0.
2716 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2717 // The passed offset is relative to address of the branch.
2718 // On 86 a branch displacement is calculated relative to address
2719 // of a next instruction.
2720 offset -= br_size;
2721
2722 // the short version of jmpConUCF2 contains multiple branches,
2723 // making the reach slightly less
2724 if (rule == jmpConUCF2_rule)
2725 return (-126 <= offset && offset <= 125);
2726 return (-128 <= offset && offset <= 127);
2727 }
2728
2729 #ifdef ASSERT
2730 // Return whether or not this register is ever used as an argument.
2731 bool Matcher::can_be_java_arg(int reg)
2732 {
2733 return
2734 reg == RDI_num || reg == RDI_H_num ||
2735 reg == RSI_num || reg == RSI_H_num ||
2736 reg == RDX_num || reg == RDX_H_num ||
2737 reg == RCX_num || reg == RCX_H_num ||
2738 reg == R8_num || reg == R8_H_num ||
2739 reg == R9_num || reg == R9_H_num ||
2740 reg == R12_num || reg == R12_H_num ||
2741 reg == XMM0_num || reg == XMM0b_num ||
2742 reg == XMM1_num || reg == XMM1b_num ||
2743 reg == XMM2_num || reg == XMM2b_num ||
2744 reg == XMM3_num || reg == XMM3b_num ||
2745 reg == XMM4_num || reg == XMM4b_num ||
2746 reg == XMM5_num || reg == XMM5b_num ||
2747 reg == XMM6_num || reg == XMM6b_num ||
2748 reg == XMM7_num || reg == XMM7b_num;
2749 }
2750 #endif
2751
2752 uint Matcher::int_pressure_limit()
2753 {
2754 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2755 }
2756
2757 uint Matcher::float_pressure_limit()
2758 {
2759 // After experiment around with different values, the following default threshold
2760 // works best for LCM's register pressure scheduling on x64.
2761 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2762 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2763 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2764 }
2765
2766 // Register for DIVI projection of divmodI
2767 const RegMask& Matcher::divI_proj_mask() {
2768 return INT_RAX_REG_mask();
2769 }
2770
2771 // Register for MODI projection of divmodI
2772 const RegMask& Matcher::modI_proj_mask() {
2773 return INT_RDX_REG_mask();
2774 }
2775
2776 // Register for DIVL projection of divmodL
2777 const RegMask& Matcher::divL_proj_mask() {
2778 return LONG_RAX_REG_mask();
2779 }
2780
2781 // Register for MODL projection of divmodL
2782 const RegMask& Matcher::modL_proj_mask() {
2783 return LONG_RDX_REG_mask();
2784 }
2785
2786 %}
2787
2788 source_hpp %{
2789 // Header information of the source block.
2790 // Method declarations/definitions which are used outside
2791 // the ad-scope can conveniently be defined here.
2792 //
2793 // To keep related declarations/definitions/uses close together,
2794 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2795
2796 #include "runtime/vm_version.hpp"
2797
2798 class NativeJump;
2799
2800 class CallStubImpl {
2801
2802 //--------------------------------------------------------------
2803 //---< Used for optimization in Compile::shorten_branches >---
2804 //--------------------------------------------------------------
2805
2806 public:
2807 // Size of call trampoline stub.
2808 static uint size_call_trampoline() {
2809 return 0; // no call trampolines on this platform
2810 }
2811
2812 // number of relocations needed by a call trampoline stub
2813 static uint reloc_call_trampoline() {
2814 return 0; // no call trampolines on this platform
2815 }
2816 };
2817
2818 class HandlerImpl {
2819
2820 public:
2821
2822 static int emit_deopt_handler(C2_MacroAssembler* masm);
2823
2824 static uint size_deopt_handler() {
2825 // one call and one jmp.
2826 return 7;
2827 }
2828 };
2829
2830 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2831 switch(bytes) {
2832 case 4: // fall-through
2833 case 8: // fall-through
2834 case 16: return Assembler::AVX_128bit;
2835 case 32: return Assembler::AVX_256bit;
2836 case 64: return Assembler::AVX_512bit;
2837
2838 default: {
2839 ShouldNotReachHere();
2840 return Assembler::AVX_NoVec;
2841 }
2842 }
2843 }
2844
2845 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2846 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2847 }
2848
2849 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2850 uint def_idx = use->operand_index(opnd);
2851 Node* def = use->in(def_idx);
2852 return vector_length_encoding(def);
2853 }
2854
2855 static inline bool is_vector_popcount_predicate(BasicType bt) {
2856 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2857 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2858 }
2859
2860 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2861 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2862 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2863 }
2864
2865 class Node::PD {
2866 public:
2867 enum NodeFlags : uint64_t {
2868 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2869 Flag_sets_carry_flag = Node::_last_flag << 2,
2870 Flag_sets_parity_flag = Node::_last_flag << 3,
2871 Flag_sets_zero_flag = Node::_last_flag << 4,
2872 Flag_sets_overflow_flag = Node::_last_flag << 5,
2873 Flag_sets_sign_flag = Node::_last_flag << 6,
2874 Flag_clears_carry_flag = Node::_last_flag << 7,
2875 Flag_clears_parity_flag = Node::_last_flag << 8,
2876 Flag_clears_zero_flag = Node::_last_flag << 9,
2877 Flag_clears_overflow_flag = Node::_last_flag << 10,
2878 Flag_clears_sign_flag = Node::_last_flag << 11,
2879 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2880 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2881 _last_flag = Flag_ndd_demotable_opr2
2882 };
2883 };
2884
2885 %} // end source_hpp
2886
2887 source %{
2888
2889 #include "opto/addnode.hpp"
2890 #include "c2_intelJccErratum_x86.hpp"
2891
2892 void PhaseOutput::pd_perform_mach_node_analysis() {
2893 if (VM_Version::has_intel_jcc_erratum()) {
2894 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2895 _buf_sizes._code += extra_padding;
2896 }
2897 }
2898
2899 int MachNode::pd_alignment_required() const {
2900 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2901 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2902 return IntelJccErratum::largest_jcc_size() + 1;
2903 } else {
2904 return 1;
2905 }
2906 }
2907
2908 int MachNode::compute_padding(int current_offset) const {
2909 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2910 Compile* C = Compile::current();
2911 PhaseOutput* output = C->output();
2912 Block* block = output->block();
2913 int index = output->index();
2914 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2915 } else {
2916 return 0;
2917 }
2918 }
2919
2920 // Emit deopt handler code.
2921 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2922
2923 // Note that the code buffer's insts_mark is always relative to insts.
2924 // That's why we must use the macroassembler to generate a handler.
2925 address base = __ start_a_stub(size_deopt_handler());
2926 if (base == nullptr) {
2927 ciEnv::current()->record_failure("CodeCache is full");
2928 return 0; // CodeBuffer::expand failed
2929 }
2930 int offset = __ offset();
2931
2932 Label start;
2933 __ bind(start);
2934
2935 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2936
2937 int entry_offset = __ offset();
2938
2939 __ jmp(start);
2940
2941 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2942 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2943 "out of bounds read in post-call NOP check");
2944 __ end_a_stub();
2945 return entry_offset;
2946 }
2947
2948 static Assembler::Width widthForType(BasicType bt) {
2949 if (bt == T_BYTE) {
2950 return Assembler::B;
2951 } else if (bt == T_SHORT) {
2952 return Assembler::W;
2953 } else if (bt == T_INT) {
2954 return Assembler::D;
2955 } else {
2956 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2957 return Assembler::Q;
2958 }
2959 }
2960
2961 //=============================================================================
2962
2963 // Float masks come from different places depending on platform.
2964 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2965 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2966 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2967 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2968 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2969 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2970 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2971 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2972 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2973 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2974 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2975 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2976 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2977 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2978 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2979 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2980 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2981 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2982 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2983
2984 //=============================================================================
2985 bool Matcher::match_rule_supported(int opcode) {
2986 if (!has_match_rule(opcode)) {
2987 return false; // no match rule present
2988 }
2989 switch (opcode) {
2990 case Op_AbsVL:
2991 case Op_StoreVectorScatter:
2992 if (UseAVX < 3) {
2993 return false;
2994 }
2995 break;
2996 case Op_PopCountI:
2997 case Op_PopCountL:
2998 if (!UsePopCountInstruction) {
2999 return false;
3000 }
3001 break;
3002 case Op_PopCountVI:
3003 if (UseAVX < 2) {
3004 return false;
3005 }
3006 break;
3007 case Op_CompressV:
3008 case Op_ExpandV:
3009 case Op_PopCountVL:
3010 if (UseAVX < 2) {
3011 return false;
3012 }
3013 break;
3014 case Op_MulVI:
3015 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3016 return false;
3017 }
3018 break;
3019 case Op_MulVL:
3020 if (UseSSE < 4) { // only with SSE4_1 or AVX
3021 return false;
3022 }
3023 break;
3024 case Op_MulReductionVL:
3025 if (VM_Version::supports_avx512dq() == false) {
3026 return false;
3027 }
3028 break;
3029 case Op_AbsVB:
3030 case Op_AbsVS:
3031 case Op_AbsVI:
3032 case Op_AddReductionVI:
3033 case Op_AndReductionV:
3034 case Op_OrReductionV:
3035 case Op_XorReductionV:
3036 if (UseSSE < 3) { // requires at least SSSE3
3037 return false;
3038 }
3039 break;
3040 case Op_MaxHF:
3041 case Op_MinHF:
3042 if (!VM_Version::supports_avx512vlbw()) {
3043 return false;
3044 } // fallthrough
3045 case Op_AddHF:
3046 case Op_DivHF:
3047 case Op_FmaHF:
3048 case Op_MulHF:
3049 case Op_ReinterpretS2HF:
3050 case Op_ReinterpretHF2S:
3051 case Op_SubHF:
3052 case Op_SqrtHF:
3053 if (!VM_Version::supports_avx512_fp16()) {
3054 return false;
3055 }
3056 break;
3057 case Op_VectorLoadShuffle:
3058 case Op_VectorRearrange:
3059 case Op_MulReductionVI:
3060 if (UseSSE < 4) { // requires at least SSE4
3061 return false;
3062 }
3063 break;
3064 case Op_IsInfiniteF:
3065 case Op_IsInfiniteD:
3066 if (!VM_Version::supports_avx512dq()) {
3067 return false;
3068 }
3069 break;
3070 case Op_SqrtVD:
3071 case Op_SqrtVF:
3072 case Op_VectorMaskCmp:
3073 case Op_VectorCastB2X:
3074 case Op_VectorCastS2X:
3075 case Op_VectorCastI2X:
3076 case Op_VectorCastL2X:
3077 case Op_VectorCastF2X:
3078 case Op_VectorCastD2X:
3079 case Op_VectorUCastB2X:
3080 case Op_VectorUCastS2X:
3081 case Op_VectorUCastI2X:
3082 case Op_VectorMaskCast:
3083 if (UseAVX < 1) { // enabled for AVX only
3084 return false;
3085 }
3086 break;
3087 case Op_PopulateIndex:
3088 if (UseAVX < 2) {
3089 return false;
3090 }
3091 break;
3092 case Op_RoundVF:
3093 if (UseAVX < 2) { // enabled for AVX2 only
3094 return false;
3095 }
3096 break;
3097 case Op_RoundVD:
3098 if (UseAVX < 3) {
3099 return false; // enabled for AVX3 only
3100 }
3101 break;
3102 case Op_CompareAndSwapL:
3103 case Op_CompareAndSwapP:
3104 break;
3105 case Op_StrIndexOf:
3106 if (!UseSSE42Intrinsics) {
3107 return false;
3108 }
3109 break;
3110 case Op_StrIndexOfChar:
3111 if (!UseSSE42Intrinsics) {
3112 return false;
3113 }
3114 break;
3115 case Op_OnSpinWait:
3116 if (VM_Version::supports_on_spin_wait() == false) {
3117 return false;
3118 }
3119 break;
3120 case Op_MulVB:
3121 case Op_LShiftVB:
3122 case Op_RShiftVB:
3123 case Op_URShiftVB:
3124 case Op_VectorInsert:
3125 case Op_VectorLoadMask:
3126 case Op_VectorStoreMask:
3127 case Op_VectorBlend:
3128 if (UseSSE < 4) {
3129 return false;
3130 }
3131 break;
3132 case Op_MaxD:
3133 case Op_MaxF:
3134 case Op_MinD:
3135 case Op_MinF:
3136 if (UseAVX < 1) { // enabled for AVX only
3137 return false;
3138 }
3139 break;
3140 case Op_CacheWB:
3141 case Op_CacheWBPreSync:
3142 case Op_CacheWBPostSync:
3143 if (!VM_Version::supports_data_cache_line_flush()) {
3144 return false;
3145 }
3146 break;
3147 case Op_ExtractB:
3148 case Op_ExtractL:
3149 case Op_ExtractI:
3150 case Op_RoundDoubleMode:
3151 if (UseSSE < 4) {
3152 return false;
3153 }
3154 break;
3155 case Op_RoundDoubleModeV:
3156 if (VM_Version::supports_avx() == false) {
3157 return false; // 128bit vroundpd is not available
3158 }
3159 break;
3160 case Op_LoadVectorGather:
3161 case Op_LoadVectorGatherMasked:
3162 if (UseAVX < 2) {
3163 return false;
3164 }
3165 break;
3166 case Op_FmaF:
3167 case Op_FmaD:
3168 case Op_FmaVD:
3169 case Op_FmaVF:
3170 if (!UseFMA) {
3171 return false;
3172 }
3173 break;
3174 case Op_MacroLogicV:
3175 if (UseAVX < 3 || !UseVectorMacroLogic) {
3176 return false;
3177 }
3178 break;
3179
3180 case Op_VectorCmpMasked:
3181 case Op_VectorMaskGen:
3182 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3183 return false;
3184 }
3185 break;
3186 case Op_VectorMaskFirstTrue:
3187 case Op_VectorMaskLastTrue:
3188 case Op_VectorMaskTrueCount:
3189 case Op_VectorMaskToLong:
3190 if (UseAVX < 1) {
3191 return false;
3192 }
3193 break;
3194 case Op_RoundF:
3195 case Op_RoundD:
3196 break;
3197 case Op_CopySignD:
3198 case Op_CopySignF:
3199 if (UseAVX < 3) {
3200 return false;
3201 }
3202 if (!VM_Version::supports_avx512vl()) {
3203 return false;
3204 }
3205 break;
3206 case Op_CompressBits:
3207 case Op_ExpandBits:
3208 if (!VM_Version::supports_bmi2()) {
3209 return false;
3210 }
3211 break;
3212 case Op_CompressM:
3213 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3214 return false;
3215 }
3216 break;
3217 case Op_ConvF2HF:
3218 case Op_ConvHF2F:
3219 if (!VM_Version::supports_float16()) {
3220 return false;
3221 }
3222 break;
3223 case Op_VectorCastF2HF:
3224 case Op_VectorCastHF2F:
3225 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3226 return false;
3227 }
3228 break;
3229 }
3230 return true; // Match rules are supported by default.
3231 }
3232
3233 //------------------------------------------------------------------------
3234
3235 static inline bool is_pop_count_instr_target(BasicType bt) {
3236 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3237 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3238 }
3239
3240 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3241 return match_rule_supported_vector(opcode, vlen, bt);
3242 }
3243
3244 // Identify extra cases that we might want to provide match rules for vector nodes and
3245 // other intrinsics guarded with vector length (vlen) and element type (bt).
3246 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3247 if (!match_rule_supported(opcode)) {
3248 return false;
3249 }
3250 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3251 // * SSE2 supports 128bit vectors for all types;
3252 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3253 // * AVX2 supports 256bit vectors for all types;
3254 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3255 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3256 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3257 // And MaxVectorSize is taken into account as well.
3258 if (!vector_size_supported(bt, vlen)) {
3259 return false;
3260 }
3261 // Special cases which require vector length follow:
3262 // * implementation limitations
3263 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3264 // * 128bit vroundpd instruction is present only in AVX1
3265 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3266 switch (opcode) {
3267 case Op_MaxVHF:
3268 case Op_MinVHF:
3269 if (!VM_Version::supports_avx512bw()) {
3270 return false;
3271 }
3272 case Op_AddVHF:
3273 case Op_DivVHF:
3274 case Op_FmaVHF:
3275 case Op_MulVHF:
3276 case Op_SubVHF:
3277 case Op_SqrtVHF:
3278 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3279 return false;
3280 }
3281 if (!VM_Version::supports_avx512_fp16()) {
3282 return false;
3283 }
3284 break;
3285 case Op_AbsVF:
3286 case Op_NegVF:
3287 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3288 return false; // 512bit vandps and vxorps are not available
3289 }
3290 break;
3291 case Op_AbsVD:
3292 case Op_NegVD:
3293 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3294 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3295 }
3296 break;
3297 case Op_RotateRightV:
3298 case Op_RotateLeftV:
3299 if (bt != T_INT && bt != T_LONG) {
3300 return false;
3301 } // fallthrough
3302 case Op_MacroLogicV:
3303 if (!VM_Version::supports_evex() ||
3304 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3305 return false;
3306 }
3307 break;
3308 case Op_ClearArray:
3309 case Op_VectorMaskGen:
3310 case Op_VectorCmpMasked:
3311 if (!VM_Version::supports_avx512bw()) {
3312 return false;
3313 }
3314 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3315 return false;
3316 }
3317 break;
3318 case Op_LoadVectorMasked:
3319 case Op_StoreVectorMasked:
3320 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3321 return false;
3322 }
3323 break;
3324 case Op_UMinV:
3325 case Op_UMaxV:
3326 if (UseAVX == 0) {
3327 return false;
3328 }
3329 break;
3330 case Op_UMinReductionV:
3331 case Op_UMaxReductionV:
3332 if (UseAVX == 0) {
3333 return false;
3334 }
3335 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3336 return false;
3337 }
3338 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3339 return false;
3340 }
3341 break;
3342 case Op_MaxV:
3343 case Op_MinV:
3344 if (UseSSE < 4 && is_integral_type(bt)) {
3345 return false;
3346 }
3347 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3348 // Float/Double intrinsics are enabled for AVX family currently.
3349 if (UseAVX == 0) {
3350 return false;
3351 }
3352 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3353 return false;
3354 }
3355 }
3356 break;
3357 case Op_CallLeafVector:
3358 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3359 return false;
3360 }
3361 break;
3362 case Op_AddReductionVI:
3363 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3364 return false;
3365 }
3366 // fallthrough
3367 case Op_AndReductionV:
3368 case Op_OrReductionV:
3369 case Op_XorReductionV:
3370 if (is_subword_type(bt) && (UseSSE < 4)) {
3371 return false;
3372 }
3373 break;
3374 case Op_MinReductionV:
3375 case Op_MaxReductionV:
3376 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3377 return false;
3378 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3379 return false;
3380 }
3381 // Float/Double intrinsics enabled for AVX family.
3382 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3383 return false;
3384 }
3385 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3386 return false;
3387 }
3388 break;
3389 case Op_VectorBlend:
3390 if (UseAVX == 0 && size_in_bits < 128) {
3391 return false;
3392 }
3393 break;
3394 case Op_VectorTest:
3395 if (UseSSE < 4) {
3396 return false; // Implementation limitation
3397 } else if (size_in_bits < 32) {
3398 return false; // Implementation limitation
3399 }
3400 break;
3401 case Op_VectorLoadShuffle:
3402 case Op_VectorRearrange:
3403 if(vlen == 2) {
3404 return false; // Implementation limitation due to how shuffle is loaded
3405 } else if (size_in_bits == 256 && UseAVX < 2) {
3406 return false; // Implementation limitation
3407 }
3408 break;
3409 case Op_VectorLoadMask:
3410 case Op_VectorMaskCast:
3411 if (size_in_bits == 256 && UseAVX < 2) {
3412 return false; // Implementation limitation
3413 }
3414 // fallthrough
3415 case Op_VectorStoreMask:
3416 if (vlen == 2) {
3417 return false; // Implementation limitation
3418 }
3419 break;
3420 case Op_PopulateIndex:
3421 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3422 return false;
3423 }
3424 break;
3425 case Op_VectorCastB2X:
3426 case Op_VectorCastS2X:
3427 case Op_VectorCastI2X:
3428 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3429 return false;
3430 }
3431 break;
3432 case Op_VectorCastL2X:
3433 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3434 return false;
3435 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3436 return false;
3437 }
3438 break;
3439 case Op_VectorCastF2X: {
3440 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3441 // happen after intermediate conversion to integer and special handling
3442 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3443 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3444 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3445 return false;
3446 }
3447 }
3448 // fallthrough
3449 case Op_VectorCastD2X:
3450 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3451 return false;
3452 }
3453 break;
3454 case Op_VectorCastF2HF:
3455 case Op_VectorCastHF2F:
3456 if (!VM_Version::supports_f16c() &&
3457 ((!VM_Version::supports_evex() ||
3458 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3459 return false;
3460 }
3461 break;
3462 case Op_RoundVD:
3463 if (!VM_Version::supports_avx512dq()) {
3464 return false;
3465 }
3466 break;
3467 case Op_MulReductionVI:
3468 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3469 return false;
3470 }
3471 break;
3472 case Op_LoadVectorGatherMasked:
3473 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3474 return false;
3475 }
3476 if (is_subword_type(bt) &&
3477 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3478 (size_in_bits < 64) ||
3479 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3480 return false;
3481 }
3482 break;
3483 case Op_StoreVectorScatterMasked:
3484 case Op_StoreVectorScatter:
3485 if (is_subword_type(bt)) {
3486 return false;
3487 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3488 return false;
3489 }
3490 // fallthrough
3491 case Op_LoadVectorGather:
3492 if (!is_subword_type(bt) && size_in_bits == 64) {
3493 return false;
3494 }
3495 if (is_subword_type(bt) && size_in_bits < 64) {
3496 return false;
3497 }
3498 break;
3499 case Op_SaturatingAddV:
3500 case Op_SaturatingSubV:
3501 if (UseAVX < 1) {
3502 return false; // Implementation limitation
3503 }
3504 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3505 return false;
3506 }
3507 break;
3508 case Op_SelectFromTwoVector:
3509 if (size_in_bits < 128) {
3510 return false;
3511 }
3512 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3513 return false;
3514 }
3515 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3516 return false;
3517 }
3518 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3519 return false;
3520 }
3521 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3522 return false;
3523 }
3524 break;
3525 case Op_MaskAll:
3526 if (!VM_Version::supports_evex()) {
3527 return false;
3528 }
3529 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3530 return false;
3531 }
3532 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3533 return false;
3534 }
3535 break;
3536 case Op_VectorMaskCmp:
3537 if (vlen < 2 || size_in_bits < 32) {
3538 return false;
3539 }
3540 break;
3541 case Op_CompressM:
3542 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3543 return false;
3544 }
3545 break;
3546 case Op_CompressV:
3547 case Op_ExpandV:
3548 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3549 return false;
3550 }
3551 if (size_in_bits < 128 ) {
3552 return false;
3553 }
3554 case Op_VectorLongToMask:
3555 if (UseAVX < 1) {
3556 return false;
3557 }
3558 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3559 return false;
3560 }
3561 break;
3562 case Op_SignumVD:
3563 case Op_SignumVF:
3564 if (UseAVX < 1) {
3565 return false;
3566 }
3567 break;
3568 case Op_PopCountVI:
3569 case Op_PopCountVL: {
3570 if (!is_pop_count_instr_target(bt) &&
3571 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3572 return false;
3573 }
3574 }
3575 break;
3576 case Op_ReverseV:
3577 case Op_ReverseBytesV:
3578 if (UseAVX < 2) {
3579 return false;
3580 }
3581 break;
3582 case Op_CountTrailingZerosV:
3583 case Op_CountLeadingZerosV:
3584 if (UseAVX < 2) {
3585 return false;
3586 }
3587 break;
3588 }
3589 return true; // Per default match rules are supported.
3590 }
3591
3592 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3593 // ADLC based match_rule_supported routine checks for the existence of pattern based
3594 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3595 // of their non-masked counterpart with mask edge being the differentiator.
3596 // This routine does a strict check on the existence of masked operation patterns
3597 // by returning a default false value for all the other opcodes apart from the
3598 // ones whose masked instruction patterns are defined in this file.
3599 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3600 return false;
3601 }
3602
3603 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3604 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3605 return false;
3606 }
3607 switch(opcode) {
3608 // Unary masked operations
3609 case Op_AbsVB:
3610 case Op_AbsVS:
3611 if(!VM_Version::supports_avx512bw()) {
3612 return false; // Implementation limitation
3613 }
3614 case Op_AbsVI:
3615 case Op_AbsVL:
3616 return true;
3617
3618 // Ternary masked operations
3619 case Op_FmaVF:
3620 case Op_FmaVD:
3621 return true;
3622
3623 case Op_MacroLogicV:
3624 if(bt != T_INT && bt != T_LONG) {
3625 return false;
3626 }
3627 return true;
3628
3629 // Binary masked operations
3630 case Op_AddVB:
3631 case Op_AddVS:
3632 case Op_SubVB:
3633 case Op_SubVS:
3634 case Op_MulVS:
3635 case Op_LShiftVS:
3636 case Op_RShiftVS:
3637 case Op_URShiftVS:
3638 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3639 if (!VM_Version::supports_avx512bw()) {
3640 return false; // Implementation limitation
3641 }
3642 return true;
3643
3644 case Op_MulVL:
3645 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3646 if (!VM_Version::supports_avx512dq()) {
3647 return false; // Implementation limitation
3648 }
3649 return true;
3650
3651 case Op_AndV:
3652 case Op_OrV:
3653 case Op_XorV:
3654 case Op_RotateRightV:
3655 case Op_RotateLeftV:
3656 if (bt != T_INT && bt != T_LONG) {
3657 return false; // Implementation limitation
3658 }
3659 return true;
3660
3661 case Op_VectorLoadMask:
3662 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3663 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3664 return false;
3665 }
3666 return true;
3667
3668 case Op_AddVI:
3669 case Op_AddVL:
3670 case Op_AddVF:
3671 case Op_AddVD:
3672 case Op_SubVI:
3673 case Op_SubVL:
3674 case Op_SubVF:
3675 case Op_SubVD:
3676 case Op_MulVI:
3677 case Op_MulVF:
3678 case Op_MulVD:
3679 case Op_DivVF:
3680 case Op_DivVD:
3681 case Op_SqrtVF:
3682 case Op_SqrtVD:
3683 case Op_LShiftVI:
3684 case Op_LShiftVL:
3685 case Op_RShiftVI:
3686 case Op_RShiftVL:
3687 case Op_URShiftVI:
3688 case Op_URShiftVL:
3689 case Op_LoadVectorMasked:
3690 case Op_StoreVectorMasked:
3691 case Op_LoadVectorGatherMasked:
3692 case Op_StoreVectorScatterMasked:
3693 return true;
3694
3695 case Op_UMinV:
3696 case Op_UMaxV:
3697 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3698 return false;
3699 } // fallthrough
3700 case Op_MaxV:
3701 case Op_MinV:
3702 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3703 return false; // Implementation limitation
3704 }
3705 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3706 return false; // Implementation limitation
3707 }
3708 return true;
3709 case Op_SaturatingAddV:
3710 case Op_SaturatingSubV:
3711 if (!is_subword_type(bt)) {
3712 return false;
3713 }
3714 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3715 return false; // Implementation limitation
3716 }
3717 return true;
3718
3719 case Op_VectorMaskCmp:
3720 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3721 return false; // Implementation limitation
3722 }
3723 return true;
3724
3725 case Op_VectorRearrange:
3726 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3727 return false; // Implementation limitation
3728 }
3729 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3730 return false; // Implementation limitation
3731 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3732 return false; // Implementation limitation
3733 }
3734 return true;
3735
3736 // Binary Logical operations
3737 case Op_AndVMask:
3738 case Op_OrVMask:
3739 case Op_XorVMask:
3740 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3741 return false; // Implementation limitation
3742 }
3743 return true;
3744
3745 case Op_PopCountVI:
3746 case Op_PopCountVL:
3747 if (!is_pop_count_instr_target(bt)) {
3748 return false;
3749 }
3750 return true;
3751
3752 case Op_MaskAll:
3753 return true;
3754
3755 case Op_CountLeadingZerosV:
3756 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3757 return true;
3758 }
3759 default:
3760 return false;
3761 }
3762 }
3763
3764 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3765 return false;
3766 }
3767
3768 // Return true if Vector::rearrange needs preparation of the shuffle argument
3769 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3770 switch (elem_bt) {
3771 case T_BYTE: return false;
3772 case T_SHORT: return !VM_Version::supports_avx512bw();
3773 case T_INT: return !VM_Version::supports_avx();
3774 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3775 default:
3776 ShouldNotReachHere();
3777 return false;
3778 }
3779 }
3780
3781 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3782 // Prefer predicate if the mask type is "TypeVectMask".
3783 return vt->isa_vectmask() != nullptr;
3784 }
3785
3786 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3787 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3788 bool legacy = (generic_opnd->opcode() == LEGVEC);
3789 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3790 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3791 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3792 return new legVecZOper();
3793 }
3794 if (legacy) {
3795 switch (ideal_reg) {
3796 case Op_VecS: return new legVecSOper();
3797 case Op_VecD: return new legVecDOper();
3798 case Op_VecX: return new legVecXOper();
3799 case Op_VecY: return new legVecYOper();
3800 case Op_VecZ: return new legVecZOper();
3801 }
3802 } else {
3803 switch (ideal_reg) {
3804 case Op_VecS: return new vecSOper();
3805 case Op_VecD: return new vecDOper();
3806 case Op_VecX: return new vecXOper();
3807 case Op_VecY: return new vecYOper();
3808 case Op_VecZ: return new vecZOper();
3809 }
3810 }
3811 ShouldNotReachHere();
3812 return nullptr;
3813 }
3814
3815 bool Matcher::is_reg2reg_move(MachNode* m) {
3816 switch (m->rule()) {
3817 case MoveVec2Leg_rule:
3818 case MoveLeg2Vec_rule:
3819 case MoveF2VL_rule:
3820 case MoveF2LEG_rule:
3821 case MoveVL2F_rule:
3822 case MoveLEG2F_rule:
3823 case MoveD2VL_rule:
3824 case MoveD2LEG_rule:
3825 case MoveVL2D_rule:
3826 case MoveLEG2D_rule:
3827 return true;
3828 default:
3829 return false;
3830 }
3831 }
3832
3833 bool Matcher::is_generic_vector(MachOper* opnd) {
3834 switch (opnd->opcode()) {
3835 case VEC:
3836 case LEGVEC:
3837 return true;
3838 default:
3839 return false;
3840 }
3841 }
3842
3843 //------------------------------------------------------------------------
3844
3845 const RegMask* Matcher::predicate_reg_mask(void) {
3846 return &_VECTMASK_REG_mask;
3847 }
3848
3849 // Max vector size in bytes. 0 if not supported.
3850 int Matcher::vector_width_in_bytes(BasicType bt) {
3851 assert(is_java_primitive(bt), "only primitive type vectors");
3852 // SSE2 supports 128bit vectors for all types.
3853 // AVX2 supports 256bit vectors for all types.
3854 // AVX2/EVEX supports 512bit vectors for all types.
3855 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3856 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3857 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3858 size = (UseAVX > 2) ? 64 : 32;
3859 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3860 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3861 // Use flag to limit vector size.
3862 size = MIN2(size,(int)MaxVectorSize);
3863 // Minimum 2 values in vector (or 4 for bytes).
3864 switch (bt) {
3865 case T_DOUBLE:
3866 case T_LONG:
3867 if (size < 16) return 0;
3868 break;
3869 case T_FLOAT:
3870 case T_INT:
3871 if (size < 8) return 0;
3872 break;
3873 case T_BOOLEAN:
3874 if (size < 4) return 0;
3875 break;
3876 case T_CHAR:
3877 if (size < 4) return 0;
3878 break;
3879 case T_BYTE:
3880 if (size < 4) return 0;
3881 break;
3882 case T_SHORT:
3883 if (size < 4) return 0;
3884 break;
3885 default:
3886 ShouldNotReachHere();
3887 }
3888 return size;
3889 }
3890
3891 // Limits on vector size (number of elements) loaded into vector.
3892 int Matcher::max_vector_size(const BasicType bt) {
3893 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3894 }
3895 int Matcher::min_vector_size(const BasicType bt) {
3896 int max_size = max_vector_size(bt);
3897 // Min size which can be loaded into vector is 4 bytes.
3898 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3899 // Support for calling svml double64 vectors
3900 if (bt == T_DOUBLE) {
3901 size = 1;
3902 }
3903 return MIN2(size,max_size);
3904 }
3905
3906 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3907 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3908 // by default on Cascade Lake
3909 if (VM_Version::is_default_intel_cascade_lake()) {
3910 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3911 }
3912 return Matcher::max_vector_size(bt);
3913 }
3914
3915 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3916 return -1;
3917 }
3918
3919 // Vector ideal reg corresponding to specified size in bytes
3920 uint Matcher::vector_ideal_reg(int size) {
3921 assert(MaxVectorSize >= size, "");
3922 switch(size) {
3923 case 4: return Op_VecS;
3924 case 8: return Op_VecD;
3925 case 16: return Op_VecX;
3926 case 32: return Op_VecY;
3927 case 64: return Op_VecZ;
3928 }
3929 ShouldNotReachHere();
3930 return 0;
3931 }
3932
3933 // Check for shift by small constant as well
3934 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3935 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3936 shift->in(2)->get_int() <= 3 &&
3937 // Are there other uses besides address expressions?
3938 !matcher->is_visited(shift)) {
3939 address_visited.set(shift->_idx); // Flag as address_visited
3940 mstack.push(shift->in(2), Matcher::Visit);
3941 Node *conv = shift->in(1);
3942 // Allow Matcher to match the rule which bypass
3943 // ConvI2L operation for an array index on LP64
3944 // if the index value is positive.
3945 if (conv->Opcode() == Op_ConvI2L &&
3946 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3947 // Are there other uses besides address expressions?
3948 !matcher->is_visited(conv)) {
3949 address_visited.set(conv->_idx); // Flag as address_visited
3950 mstack.push(conv->in(1), Matcher::Pre_Visit);
3951 } else {
3952 mstack.push(conv, Matcher::Pre_Visit);
3953 }
3954 return true;
3955 }
3956 return false;
3957 }
3958
3959 // This function identifies sub-graphs in which a 'load' node is
3960 // input to two different nodes, and such that it can be matched
3961 // with BMI instructions like blsi, blsr, etc.
3962 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3963 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3964 // refers to the same node.
3965 //
3966 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3967 // This is a temporary solution until we make DAGs expressible in ADL.
3968 template<typename ConType>
3969 class FusedPatternMatcher {
3970 Node* _op1_node;
3971 Node* _mop_node;
3972 int _con_op;
3973
3974 static int match_next(Node* n, int next_op, int next_op_idx) {
3975 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3976 return -1;
3977 }
3978
3979 if (next_op_idx == -1) { // n is commutative, try rotations
3980 if (n->in(1)->Opcode() == next_op) {
3981 return 1;
3982 } else if (n->in(2)->Opcode() == next_op) {
3983 return 2;
3984 }
3985 } else {
3986 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3987 if (n->in(next_op_idx)->Opcode() == next_op) {
3988 return next_op_idx;
3989 }
3990 }
3991 return -1;
3992 }
3993
3994 public:
3995 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3996 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3997
3998 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
3999 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4000 typename ConType::NativeType con_value) {
4001 if (_op1_node->Opcode() != op1) {
4002 return false;
4003 }
4004 if (_mop_node->outcnt() > 2) {
4005 return false;
4006 }
4007 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4008 if (op1_op2_idx == -1) {
4009 return false;
4010 }
4011 // Memory operation must be the other edge
4012 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4013
4014 // Check that the mop node is really what we want
4015 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4016 Node* op2_node = _op1_node->in(op1_op2_idx);
4017 if (op2_node->outcnt() > 1) {
4018 return false;
4019 }
4020 assert(op2_node->Opcode() == op2, "Should be");
4021 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4022 if (op2_con_idx == -1) {
4023 return false;
4024 }
4025 // Memory operation must be the other edge
4026 int op2_mop_idx = (op2_con_idx & 1) + 1;
4027 // Check that the memory operation is the same node
4028 if (op2_node->in(op2_mop_idx) == _mop_node) {
4029 // Now check the constant
4030 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4031 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4032 return true;
4033 }
4034 }
4035 }
4036 return false;
4037 }
4038 };
4039
4040 static bool is_bmi_pattern(Node* n, Node* m) {
4041 assert(UseBMI1Instructions, "sanity");
4042 if (n != nullptr && m != nullptr) {
4043 if (m->Opcode() == Op_LoadI) {
4044 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4045 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4046 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4047 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4048 } else if (m->Opcode() == Op_LoadL) {
4049 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4050 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4051 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4052 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4053 }
4054 }
4055 return false;
4056 }
4057
4058 // Should the matcher clone input 'm' of node 'n'?
4059 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4060 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4061 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4062 mstack.push(m, Visit);
4063 return true;
4064 }
4065 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4066 mstack.push(m, Visit); // m = ShiftCntV
4067 return true;
4068 }
4069 if (is_encode_and_store_pattern(n, m)) {
4070 mstack.push(m, Visit);
4071 return true;
4072 }
4073 return false;
4074 }
4075
4076 // Should the Matcher clone shifts on addressing modes, expecting them
4077 // to be subsumed into complex addressing expressions or compute them
4078 // into registers?
4079 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4080 Node *off = m->in(AddPNode::Offset);
4081 if (off->is_Con()) {
4082 address_visited.test_set(m->_idx); // Flag as address_visited
4083 Node *adr = m->in(AddPNode::Address);
4084
4085 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4086 // AtomicAdd is not an addressing expression.
4087 // Cheap to find it by looking for screwy base.
4088 if (adr->is_AddP() &&
4089 !adr->in(AddPNode::Base)->is_top() &&
4090 !adr->in(AddPNode::Offset)->is_Con() &&
4091 off->get_long() == (int) (off->get_long()) && // immL32
4092 // Are there other uses besides address expressions?
4093 !is_visited(adr)) {
4094 address_visited.set(adr->_idx); // Flag as address_visited
4095 Node *shift = adr->in(AddPNode::Offset);
4096 if (!clone_shift(shift, this, mstack, address_visited)) {
4097 mstack.push(shift, Pre_Visit);
4098 }
4099 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4100 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4101 } else {
4102 mstack.push(adr, Pre_Visit);
4103 }
4104
4105 // Clone X+offset as it also folds into most addressing expressions
4106 mstack.push(off, Visit);
4107 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4108 return true;
4109 } else if (clone_shift(off, this, mstack, address_visited)) {
4110 address_visited.test_set(m->_idx); // Flag as address_visited
4111 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4112 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4113 return true;
4114 }
4115 return false;
4116 }
4117
4118 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4119 switch (bt) {
4120 case BoolTest::eq:
4121 return Assembler::eq;
4122 case BoolTest::ne:
4123 return Assembler::neq;
4124 case BoolTest::le:
4125 case BoolTest::ule:
4126 return Assembler::le;
4127 case BoolTest::ge:
4128 case BoolTest::uge:
4129 return Assembler::nlt;
4130 case BoolTest::lt:
4131 case BoolTest::ult:
4132 return Assembler::lt;
4133 case BoolTest::gt:
4134 case BoolTest::ugt:
4135 return Assembler::nle;
4136 default : ShouldNotReachHere(); return Assembler::_false;
4137 }
4138 }
4139
4140 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4141 switch (bt) {
4142 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4143 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4144 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4145 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4146 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4147 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4148 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4149 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4150 }
4151 }
4152
4153 // Helper methods for MachSpillCopyNode::implementation().
4154 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4155 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4156 assert(ireg == Op_VecS || // 32bit vector
4157 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4158 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4159 "no non-adjacent vector moves" );
4160 if (masm) {
4161 switch (ireg) {
4162 case Op_VecS: // copy whole register
4163 case Op_VecD:
4164 case Op_VecX:
4165 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4166 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4167 } else {
4168 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4169 }
4170 break;
4171 case Op_VecY:
4172 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4173 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4174 } else {
4175 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4176 }
4177 break;
4178 case Op_VecZ:
4179 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4180 break;
4181 default:
4182 ShouldNotReachHere();
4183 }
4184 #ifndef PRODUCT
4185 } else {
4186 switch (ireg) {
4187 case Op_VecS:
4188 case Op_VecD:
4189 case Op_VecX:
4190 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4191 break;
4192 case Op_VecY:
4193 case Op_VecZ:
4194 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4195 break;
4196 default:
4197 ShouldNotReachHere();
4198 }
4199 #endif
4200 }
4201 }
4202
4203 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4204 int stack_offset, int reg, uint ireg, outputStream* st) {
4205 if (masm) {
4206 if (is_load) {
4207 switch (ireg) {
4208 case Op_VecS:
4209 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4210 break;
4211 case Op_VecD:
4212 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4213 break;
4214 case Op_VecX:
4215 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4216 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4217 } else {
4218 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4219 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4220 }
4221 break;
4222 case Op_VecY:
4223 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4224 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4225 } else {
4226 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4227 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4228 }
4229 break;
4230 case Op_VecZ:
4231 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4232 break;
4233 default:
4234 ShouldNotReachHere();
4235 }
4236 } else { // store
4237 switch (ireg) {
4238 case Op_VecS:
4239 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4240 break;
4241 case Op_VecD:
4242 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4243 break;
4244 case Op_VecX:
4245 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4246 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4247 }
4248 else {
4249 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4250 }
4251 break;
4252 case Op_VecY:
4253 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4254 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4255 }
4256 else {
4257 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4258 }
4259 break;
4260 case Op_VecZ:
4261 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4262 break;
4263 default:
4264 ShouldNotReachHere();
4265 }
4266 }
4267 #ifndef PRODUCT
4268 } else {
4269 if (is_load) {
4270 switch (ireg) {
4271 case Op_VecS:
4272 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4273 break;
4274 case Op_VecD:
4275 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4276 break;
4277 case Op_VecX:
4278 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4279 break;
4280 case Op_VecY:
4281 case Op_VecZ:
4282 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4283 break;
4284 default:
4285 ShouldNotReachHere();
4286 }
4287 } else { // store
4288 switch (ireg) {
4289 case Op_VecS:
4290 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4291 break;
4292 case Op_VecD:
4293 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4294 break;
4295 case Op_VecX:
4296 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4297 break;
4298 case Op_VecY:
4299 case Op_VecZ:
4300 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4301 break;
4302 default:
4303 ShouldNotReachHere();
4304 }
4305 }
4306 #endif
4307 }
4308 }
4309
4310 template <class T>
4311 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4312 int size = type2aelembytes(bt) * len;
4313 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4314 for (int i = 0; i < len; i++) {
4315 int offset = i * type2aelembytes(bt);
4316 switch (bt) {
4317 case T_BYTE: val->at(i) = con; break;
4318 case T_SHORT: {
4319 jshort c = con;
4320 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4321 break;
4322 }
4323 case T_INT: {
4324 jint c = con;
4325 memcpy(val->adr_at(offset), &c, sizeof(jint));
4326 break;
4327 }
4328 case T_LONG: {
4329 jlong c = con;
4330 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4331 break;
4332 }
4333 case T_FLOAT: {
4334 jfloat c = con;
4335 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4336 break;
4337 }
4338 case T_DOUBLE: {
4339 jdouble c = con;
4340 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4341 break;
4342 }
4343 default: assert(false, "%s", type2name(bt));
4344 }
4345 }
4346 return val;
4347 }
4348
4349 static inline jlong high_bit_set(BasicType bt) {
4350 switch (bt) {
4351 case T_BYTE: return 0x8080808080808080;
4352 case T_SHORT: return 0x8000800080008000;
4353 case T_INT: return 0x8000000080000000;
4354 case T_LONG: return 0x8000000000000000;
4355 default:
4356 ShouldNotReachHere();
4357 return 0;
4358 }
4359 }
4360
4361 #ifndef PRODUCT
4362 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4363 st->print("nop \t# %d bytes pad for loops and calls", _count);
4364 }
4365 #endif
4366
4367 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4368 __ nop(_count);
4369 }
4370
4371 uint MachNopNode::size(PhaseRegAlloc*) const {
4372 return _count;
4373 }
4374
4375 #ifndef PRODUCT
4376 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4377 st->print("# breakpoint");
4378 }
4379 #endif
4380
4381 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4382 __ int3();
4383 }
4384
4385 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4386 return MachNode::size(ra_);
4387 }
4388
4389 %}
4390
4391 //----------ENCODING BLOCK-----------------------------------------------------
4392 // This block specifies the encoding classes used by the compiler to
4393 // output byte streams. Encoding classes are parameterized macros
4394 // used by Machine Instruction Nodes in order to generate the bit
4395 // encoding of the instruction. Operands specify their base encoding
4396 // interface with the interface keyword. There are currently
4397 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4398 // COND_INTER. REG_INTER causes an operand to generate a function
4399 // which returns its register number when queried. CONST_INTER causes
4400 // an operand to generate a function which returns the value of the
4401 // constant when queried. MEMORY_INTER causes an operand to generate
4402 // four functions which return the Base Register, the Index Register,
4403 // the Scale Value, and the Offset Value of the operand when queried.
4404 // COND_INTER causes an operand to generate six functions which return
4405 // the encoding code (ie - encoding bits for the instruction)
4406 // associated with each basic boolean condition for a conditional
4407 // instruction.
4408 //
4409 // Instructions specify two basic values for encoding. Again, a
4410 // function is available to check if the constant displacement is an
4411 // oop. They use the ins_encode keyword to specify their encoding
4412 // classes (which must be a sequence of enc_class names, and their
4413 // parameters, specified in the encoding block), and they use the
4414 // opcode keyword to specify, in order, their primary, secondary, and
4415 // tertiary opcode. Only the opcode sections which a particular
4416 // instruction needs for encoding need to be specified.
4417 encode %{
4418 enc_class cdql_enc(no_rax_rdx_RegI div)
4419 %{
4420 // Full implementation of Java idiv and irem; checks for
4421 // special case as described in JVM spec., p.243 & p.271.
4422 //
4423 // normal case special case
4424 //
4425 // input : rax: dividend min_int
4426 // reg: divisor -1
4427 //
4428 // output: rax: quotient (= rax idiv reg) min_int
4429 // rdx: remainder (= rax irem reg) 0
4430 //
4431 // Code sequnce:
4432 //
4433 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4434 // 5: 75 07/08 jne e <normal>
4435 // 7: 33 d2 xor %edx,%edx
4436 // [div >= 8 -> offset + 1]
4437 // [REX_B]
4438 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4439 // c: 74 03/04 je 11 <done>
4440 // 000000000000000e <normal>:
4441 // e: 99 cltd
4442 // [div >= 8 -> offset + 1]
4443 // [REX_B]
4444 // f: f7 f9 idiv $div
4445 // 0000000000000011 <done>:
4446 Label normal;
4447 Label done;
4448
4449 // cmp $0x80000000,%eax
4450 __ cmpl(as_Register(RAX_enc), 0x80000000);
4451
4452 // jne e <normal>
4453 __ jccb(Assembler::notEqual, normal);
4454
4455 // xor %edx,%edx
4456 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4457
4458 // cmp $0xffffffffffffffff,%ecx
4459 __ cmpl($div$$Register, -1);
4460
4461 // je 11 <done>
4462 __ jccb(Assembler::equal, done);
4463
4464 // <normal>
4465 // cltd
4466 __ bind(normal);
4467 __ cdql();
4468
4469 // idivl
4470 // <done>
4471 __ idivl($div$$Register);
4472 __ bind(done);
4473 %}
4474
4475 enc_class cdqq_enc(no_rax_rdx_RegL div)
4476 %{
4477 // Full implementation of Java ldiv and lrem; checks for
4478 // special case as described in JVM spec., p.243 & p.271.
4479 //
4480 // normal case special case
4481 //
4482 // input : rax: dividend min_long
4483 // reg: divisor -1
4484 //
4485 // output: rax: quotient (= rax idiv reg) min_long
4486 // rdx: remainder (= rax irem reg) 0
4487 //
4488 // Code sequnce:
4489 //
4490 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4491 // 7: 00 00 80
4492 // a: 48 39 d0 cmp %rdx,%rax
4493 // d: 75 08 jne 17 <normal>
4494 // f: 33 d2 xor %edx,%edx
4495 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4496 // 15: 74 05 je 1c <done>
4497 // 0000000000000017 <normal>:
4498 // 17: 48 99 cqto
4499 // 19: 48 f7 f9 idiv $div
4500 // 000000000000001c <done>:
4501 Label normal;
4502 Label done;
4503
4504 // mov $0x8000000000000000,%rdx
4505 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4506
4507 // cmp %rdx,%rax
4508 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4509
4510 // jne 17 <normal>
4511 __ jccb(Assembler::notEqual, normal);
4512
4513 // xor %edx,%edx
4514 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4515
4516 // cmp $0xffffffffffffffff,$div
4517 __ cmpq($div$$Register, -1);
4518
4519 // je 1e <done>
4520 __ jccb(Assembler::equal, done);
4521
4522 // <normal>
4523 // cqto
4524 __ bind(normal);
4525 __ cdqq();
4526
4527 // idivq (note: must be emitted by the user of this rule)
4528 // <done>
4529 __ idivq($div$$Register);
4530 __ bind(done);
4531 %}
4532
4533 enc_class clear_avx %{
4534 DEBUG_ONLY(int off0 = __ offset());
4535 if (generate_vzeroupper(Compile::current())) {
4536 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4537 // Clear upper bits of YMM registers when current compiled code uses
4538 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4539 __ vzeroupper();
4540 }
4541 DEBUG_ONLY(int off1 = __ offset());
4542 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4543 %}
4544
4545 enc_class Java_To_Runtime(method meth) %{
4546 __ lea(r10, RuntimeAddress((address)$meth$$method));
4547 __ call(r10);
4548 __ post_call_nop();
4549 %}
4550
4551 enc_class Java_Static_Call(method meth)
4552 %{
4553 // JAVA STATIC CALL
4554 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4555 // determine who we intended to call.
4556 if (!_method) {
4557 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4558 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4559 // The NOP here is purely to ensure that eliding a call to
4560 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4561 __ addr_nop_5();
4562 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4563 } else {
4564 int method_index = resolved_method_index(masm);
4565 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4566 : static_call_Relocation::spec(method_index);
4567 address mark = __ pc();
4568 int call_offset = __ offset();
4569 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4570 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4571 // Calls of the same statically bound method can share
4572 // a stub to the interpreter.
4573 __ code()->shared_stub_to_interp_for(_method, call_offset);
4574 } else {
4575 // Emit stubs for static call.
4576 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4577 __ clear_inst_mark();
4578 if (stub == nullptr) {
4579 ciEnv::current()->record_failure("CodeCache is full");
4580 return;
4581 }
4582 }
4583 }
4584 __ post_call_nop();
4585 %}
4586
4587 enc_class Java_Dynamic_Call(method meth) %{
4588 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4589 __ post_call_nop();
4590 %}
4591
4592 enc_class call_epilog %{
4593 if (VerifyStackAtCalls) {
4594 // Check that stack depth is unchanged: find majik cookie on stack
4595 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4596 Label L;
4597 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4598 __ jccb(Assembler::equal, L);
4599 // Die if stack mismatch
4600 __ int3();
4601 __ bind(L);
4602 }
4603 %}
4604
4605 %}
4606
4607 //----------FRAME--------------------------------------------------------------
4608 // Definition of frame structure and management information.
4609 //
4610 // S T A C K L A Y O U T Allocators stack-slot number
4611 // | (to get allocators register number
4612 // G Owned by | | v add OptoReg::stack0())
4613 // r CALLER | |
4614 // o | +--------+ pad to even-align allocators stack-slot
4615 // w V | pad0 | numbers; owned by CALLER
4616 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4617 // h ^ | in | 5
4618 // | | args | 4 Holes in incoming args owned by SELF
4619 // | | | | 3
4620 // | | +--------+
4621 // V | | old out| Empty on Intel, window on Sparc
4622 // | old |preserve| Must be even aligned.
4623 // | SP-+--------+----> Matcher::_old_SP, even aligned
4624 // | | in | 3 area for Intel ret address
4625 // Owned by |preserve| Empty on Sparc.
4626 // SELF +--------+
4627 // | | pad2 | 2 pad to align old SP
4628 // | +--------+ 1
4629 // | | locks | 0
4630 // | +--------+----> OptoReg::stack0(), even aligned
4631 // | | pad1 | 11 pad to align new SP
4632 // | +--------+
4633 // | | | 10
4634 // | | spills | 9 spills
4635 // V | | 8 (pad0 slot for callee)
4636 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4637 // ^ | out | 7
4638 // | | args | 6 Holes in outgoing args owned by CALLEE
4639 // Owned by +--------+
4640 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4641 // | new |preserve| Must be even-aligned.
4642 // | SP-+--------+----> Matcher::_new_SP, even aligned
4643 // | | |
4644 //
4645 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4646 // known from SELF's arguments and the Java calling convention.
4647 // Region 6-7 is determined per call site.
4648 // Note 2: If the calling convention leaves holes in the incoming argument
4649 // area, those holes are owned by SELF. Holes in the outgoing area
4650 // are owned by the CALLEE. Holes should not be necessary in the
4651 // incoming area, as the Java calling convention is completely under
4652 // the control of the AD file. Doubles can be sorted and packed to
4653 // avoid holes. Holes in the outgoing arguments may be necessary for
4654 // varargs C calling conventions.
4655 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4656 // even aligned with pad0 as needed.
4657 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4658 // region 6-11 is even aligned; it may be padded out more so that
4659 // the region from SP to FP meets the minimum stack alignment.
4660 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4661 // alignment. Region 11, pad1, may be dynamically extended so that
4662 // SP meets the minimum alignment.
4663
4664 frame
4665 %{
4666 // These three registers define part of the calling convention
4667 // between compiled code and the interpreter.
4668 inline_cache_reg(RAX); // Inline Cache Register
4669
4670 // Optional: name the operand used by cisc-spilling to access
4671 // [stack_pointer + offset]
4672 cisc_spilling_operand_name(indOffset32);
4673
4674 // Number of stack slots consumed by locking an object
4675 sync_stack_slots(2);
4676
4677 // Compiled code's Frame Pointer
4678 frame_pointer(RSP);
4679
4680 // Stack alignment requirement
4681 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4682
4683 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4684 // for calls to C. Supports the var-args backing area for register parms.
4685 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4686
4687 // The after-PROLOG location of the return address. Location of
4688 // return address specifies a type (REG or STACK) and a number
4689 // representing the register number (i.e. - use a register name) or
4690 // stack slot.
4691 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4692 // Otherwise, it is above the locks and verification slot and alignment word
4693 return_addr(STACK - 2 +
4694 align_up((Compile::current()->in_preserve_stack_slots() +
4695 Compile::current()->fixed_slots()),
4696 stack_alignment_in_slots()));
4697
4698 // Location of compiled Java return values. Same as C for now.
4699 return_value
4700 %{
4701 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4702 "only return normal values");
4703
4704 static const int lo[Op_RegL + 1] = {
4705 0,
4706 0,
4707 RAX_num, // Op_RegN
4708 RAX_num, // Op_RegI
4709 RAX_num, // Op_RegP
4710 XMM0_num, // Op_RegF
4711 XMM0_num, // Op_RegD
4712 RAX_num // Op_RegL
4713 };
4714 static const int hi[Op_RegL + 1] = {
4715 0,
4716 0,
4717 OptoReg::Bad, // Op_RegN
4718 OptoReg::Bad, // Op_RegI
4719 RAX_H_num, // Op_RegP
4720 OptoReg::Bad, // Op_RegF
4721 XMM0b_num, // Op_RegD
4722 RAX_H_num // Op_RegL
4723 };
4724 // Excluded flags and vector registers.
4725 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4726 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4727 %}
4728 %}
4729
4730 //----------ATTRIBUTES---------------------------------------------------------
4731 //----------Operand Attributes-------------------------------------------------
4732 op_attrib op_cost(0); // Required cost attribute
4733
4734 //----------Instruction Attributes---------------------------------------------
4735 ins_attrib ins_cost(100); // Required cost attribute
4736 ins_attrib ins_size(8); // Required size attribute (in bits)
4737 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4738 // a non-matching short branch variant
4739 // of some long branch?
4740 ins_attrib ins_alignment(1); // Required alignment attribute (must
4741 // be a power of 2) specifies the
4742 // alignment that some part of the
4743 // instruction (not necessarily the
4744 // start) requires. If > 1, a
4745 // compute_padding() function must be
4746 // provided for the instruction
4747
4748 // Whether this node is expanded during code emission into a sequence of
4749 // instructions and the first instruction can perform an implicit null check.
4750 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4751
4752 //----------OPERANDS-----------------------------------------------------------
4753 // Operand definitions must precede instruction definitions for correct parsing
4754 // in the ADLC because operands constitute user defined types which are used in
4755 // instruction definitions.
4756
4757 //----------Simple Operands----------------------------------------------------
4758 // Immediate Operands
4759 // Integer Immediate
4760 operand immI()
4761 %{
4762 match(ConI);
4763
4764 op_cost(10);
4765 format %{ %}
4766 interface(CONST_INTER);
4767 %}
4768
4769 // Constant for test vs zero
4770 operand immI_0()
4771 %{
4772 predicate(n->get_int() == 0);
4773 match(ConI);
4774
4775 op_cost(0);
4776 format %{ %}
4777 interface(CONST_INTER);
4778 %}
4779
4780 // Constant for increment
4781 operand immI_1()
4782 %{
4783 predicate(n->get_int() == 1);
4784 match(ConI);
4785
4786 op_cost(0);
4787 format %{ %}
4788 interface(CONST_INTER);
4789 %}
4790
4791 // Constant for decrement
4792 operand immI_M1()
4793 %{
4794 predicate(n->get_int() == -1);
4795 match(ConI);
4796
4797 op_cost(0);
4798 format %{ %}
4799 interface(CONST_INTER);
4800 %}
4801
4802 operand immI_2()
4803 %{
4804 predicate(n->get_int() == 2);
4805 match(ConI);
4806
4807 op_cost(0);
4808 format %{ %}
4809 interface(CONST_INTER);
4810 %}
4811
4812 operand immI_4()
4813 %{
4814 predicate(n->get_int() == 4);
4815 match(ConI);
4816
4817 op_cost(0);
4818 format %{ %}
4819 interface(CONST_INTER);
4820 %}
4821
4822 operand immI_8()
4823 %{
4824 predicate(n->get_int() == 8);
4825 match(ConI);
4826
4827 op_cost(0);
4828 format %{ %}
4829 interface(CONST_INTER);
4830 %}
4831
4832 // Valid scale values for addressing modes
4833 operand immI2()
4834 %{
4835 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4836 match(ConI);
4837
4838 format %{ %}
4839 interface(CONST_INTER);
4840 %}
4841
4842 operand immU7()
4843 %{
4844 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4845 match(ConI);
4846
4847 op_cost(5);
4848 format %{ %}
4849 interface(CONST_INTER);
4850 %}
4851
4852 operand immI8()
4853 %{
4854 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4855 match(ConI);
4856
4857 op_cost(5);
4858 format %{ %}
4859 interface(CONST_INTER);
4860 %}
4861
4862 operand immU8()
4863 %{
4864 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4865 match(ConI);
4866
4867 op_cost(5);
4868 format %{ %}
4869 interface(CONST_INTER);
4870 %}
4871
4872 operand immI16()
4873 %{
4874 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4875 match(ConI);
4876
4877 op_cost(10);
4878 format %{ %}
4879 interface(CONST_INTER);
4880 %}
4881
4882 // Int Immediate non-negative
4883 operand immU31()
4884 %{
4885 predicate(n->get_int() >= 0);
4886 match(ConI);
4887
4888 op_cost(0);
4889 format %{ %}
4890 interface(CONST_INTER);
4891 %}
4892
4893 // Pointer Immediate
4894 operand immP()
4895 %{
4896 match(ConP);
4897
4898 op_cost(10);
4899 format %{ %}
4900 interface(CONST_INTER);
4901 %}
4902
4903 // Null Pointer Immediate
4904 operand immP0()
4905 %{
4906 predicate(n->get_ptr() == 0);
4907 match(ConP);
4908
4909 op_cost(5);
4910 format %{ %}
4911 interface(CONST_INTER);
4912 %}
4913
4914 // Pointer Immediate
4915 operand immN() %{
4916 match(ConN);
4917
4918 op_cost(10);
4919 format %{ %}
4920 interface(CONST_INTER);
4921 %}
4922
4923 operand immNKlass() %{
4924 match(ConNKlass);
4925
4926 op_cost(10);
4927 format %{ %}
4928 interface(CONST_INTER);
4929 %}
4930
4931 // Null Pointer Immediate
4932 operand immN0() %{
4933 predicate(n->get_narrowcon() == 0);
4934 match(ConN);
4935
4936 op_cost(5);
4937 format %{ %}
4938 interface(CONST_INTER);
4939 %}
4940
4941 operand immP31()
4942 %{
4943 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4944 && (n->get_ptr() >> 31) == 0);
4945 match(ConP);
4946
4947 op_cost(5);
4948 format %{ %}
4949 interface(CONST_INTER);
4950 %}
4951
4952
4953 // Long Immediate
4954 operand immL()
4955 %{
4956 match(ConL);
4957
4958 op_cost(20);
4959 format %{ %}
4960 interface(CONST_INTER);
4961 %}
4962
4963 // Long Immediate 8-bit
4964 operand immL8()
4965 %{
4966 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4967 match(ConL);
4968
4969 op_cost(5);
4970 format %{ %}
4971 interface(CONST_INTER);
4972 %}
4973
4974 // Long Immediate 32-bit unsigned
4975 operand immUL32()
4976 %{
4977 predicate(n->get_long() == (unsigned int) (n->get_long()));
4978 match(ConL);
4979
4980 op_cost(10);
4981 format %{ %}
4982 interface(CONST_INTER);
4983 %}
4984
4985 // Long Immediate 32-bit signed
4986 operand immL32()
4987 %{
4988 predicate(n->get_long() == (int) (n->get_long()));
4989 match(ConL);
4990
4991 op_cost(15);
4992 format %{ %}
4993 interface(CONST_INTER);
4994 %}
4995
4996 operand immL_Pow2()
4997 %{
4998 predicate(is_power_of_2((julong)n->get_long()));
4999 match(ConL);
5000
5001 op_cost(15);
5002 format %{ %}
5003 interface(CONST_INTER);
5004 %}
5005
5006 operand immL_NotPow2()
5007 %{
5008 predicate(is_power_of_2((julong)~n->get_long()));
5009 match(ConL);
5010
5011 op_cost(15);
5012 format %{ %}
5013 interface(CONST_INTER);
5014 %}
5015
5016 // Long Immediate zero
5017 operand immL0()
5018 %{
5019 predicate(n->get_long() == 0L);
5020 match(ConL);
5021
5022 op_cost(10);
5023 format %{ %}
5024 interface(CONST_INTER);
5025 %}
5026
5027 // Constant for increment
5028 operand immL1()
5029 %{
5030 predicate(n->get_long() == 1);
5031 match(ConL);
5032
5033 format %{ %}
5034 interface(CONST_INTER);
5035 %}
5036
5037 // Constant for decrement
5038 operand immL_M1()
5039 %{
5040 predicate(n->get_long() == -1);
5041 match(ConL);
5042
5043 format %{ %}
5044 interface(CONST_INTER);
5045 %}
5046
5047 // Long Immediate: low 32-bit mask
5048 operand immL_32bits()
5049 %{
5050 predicate(n->get_long() == 0xFFFFFFFFL);
5051 match(ConL);
5052 op_cost(20);
5053
5054 format %{ %}
5055 interface(CONST_INTER);
5056 %}
5057
5058 // Int Immediate: 2^n-1, positive
5059 operand immI_Pow2M1()
5060 %{
5061 predicate((n->get_int() > 0)
5062 && is_power_of_2((juint)n->get_int() + 1));
5063 match(ConI);
5064
5065 op_cost(20);
5066 format %{ %}
5067 interface(CONST_INTER);
5068 %}
5069
5070 // Float Immediate zero
5071 operand immF0()
5072 %{
5073 predicate(jint_cast(n->getf()) == 0);
5074 match(ConF);
5075
5076 op_cost(5);
5077 format %{ %}
5078 interface(CONST_INTER);
5079 %}
5080
5081 // Float Immediate
5082 operand immF()
5083 %{
5084 match(ConF);
5085
5086 op_cost(15);
5087 format %{ %}
5088 interface(CONST_INTER);
5089 %}
5090
5091 // Half Float Immediate
5092 operand immH()
5093 %{
5094 match(ConH);
5095
5096 op_cost(15);
5097 format %{ %}
5098 interface(CONST_INTER);
5099 %}
5100
5101 // Double Immediate zero
5102 operand immD0()
5103 %{
5104 predicate(jlong_cast(n->getd()) == 0);
5105 match(ConD);
5106
5107 op_cost(5);
5108 format %{ %}
5109 interface(CONST_INTER);
5110 %}
5111
5112 // Double Immediate
5113 operand immD()
5114 %{
5115 match(ConD);
5116
5117 op_cost(15);
5118 format %{ %}
5119 interface(CONST_INTER);
5120 %}
5121
5122 // Immediates for special shifts (sign extend)
5123
5124 // Constants for increment
5125 operand immI_16()
5126 %{
5127 predicate(n->get_int() == 16);
5128 match(ConI);
5129
5130 format %{ %}
5131 interface(CONST_INTER);
5132 %}
5133
5134 operand immI_24()
5135 %{
5136 predicate(n->get_int() == 24);
5137 match(ConI);
5138
5139 format %{ %}
5140 interface(CONST_INTER);
5141 %}
5142
5143 // Constant for byte-wide masking
5144 operand immI_255()
5145 %{
5146 predicate(n->get_int() == 255);
5147 match(ConI);
5148
5149 format %{ %}
5150 interface(CONST_INTER);
5151 %}
5152
5153 // Constant for short-wide masking
5154 operand immI_65535()
5155 %{
5156 predicate(n->get_int() == 65535);
5157 match(ConI);
5158
5159 format %{ %}
5160 interface(CONST_INTER);
5161 %}
5162
5163 // Constant for byte-wide masking
5164 operand immL_255()
5165 %{
5166 predicate(n->get_long() == 255);
5167 match(ConL);
5168
5169 format %{ %}
5170 interface(CONST_INTER);
5171 %}
5172
5173 // Constant for short-wide masking
5174 operand immL_65535()
5175 %{
5176 predicate(n->get_long() == 65535);
5177 match(ConL);
5178
5179 format %{ %}
5180 interface(CONST_INTER);
5181 %}
5182
5183 // AOT Runtime Constants Address
5184 operand immAOTRuntimeConstantsAddress()
5185 %{
5186 // Check if the address is in the range of AOT Runtime Constants
5187 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5188 match(ConP);
5189
5190 op_cost(0);
5191 format %{ %}
5192 interface(CONST_INTER);
5193 %}
5194
5195 operand kReg()
5196 %{
5197 constraint(ALLOC_IN_RC(vectmask_reg));
5198 match(RegVectMask);
5199 format %{%}
5200 interface(REG_INTER);
5201 %}
5202
5203 // Register Operands
5204 // Integer Register
5205 operand rRegI()
5206 %{
5207 constraint(ALLOC_IN_RC(int_reg));
5208 match(RegI);
5209
5210 match(rax_RegI);
5211 match(rbx_RegI);
5212 match(rcx_RegI);
5213 match(rdx_RegI);
5214 match(rdi_RegI);
5215
5216 format %{ %}
5217 interface(REG_INTER);
5218 %}
5219
5220 // Special Registers
5221 operand rax_RegI()
5222 %{
5223 constraint(ALLOC_IN_RC(int_rax_reg));
5224 match(RegI);
5225 match(rRegI);
5226
5227 format %{ "RAX" %}
5228 interface(REG_INTER);
5229 %}
5230
5231 // Special Registers
5232 operand rbx_RegI()
5233 %{
5234 constraint(ALLOC_IN_RC(int_rbx_reg));
5235 match(RegI);
5236 match(rRegI);
5237
5238 format %{ "RBX" %}
5239 interface(REG_INTER);
5240 %}
5241
5242 operand rcx_RegI()
5243 %{
5244 constraint(ALLOC_IN_RC(int_rcx_reg));
5245 match(RegI);
5246 match(rRegI);
5247
5248 format %{ "RCX" %}
5249 interface(REG_INTER);
5250 %}
5251
5252 operand rdx_RegI()
5253 %{
5254 constraint(ALLOC_IN_RC(int_rdx_reg));
5255 match(RegI);
5256 match(rRegI);
5257
5258 format %{ "RDX" %}
5259 interface(REG_INTER);
5260 %}
5261
5262 operand rdi_RegI()
5263 %{
5264 constraint(ALLOC_IN_RC(int_rdi_reg));
5265 match(RegI);
5266 match(rRegI);
5267
5268 format %{ "RDI" %}
5269 interface(REG_INTER);
5270 %}
5271
5272 operand no_rax_rdx_RegI()
5273 %{
5274 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5275 match(RegI);
5276 match(rbx_RegI);
5277 match(rcx_RegI);
5278 match(rdi_RegI);
5279
5280 format %{ %}
5281 interface(REG_INTER);
5282 %}
5283
5284 operand no_rbp_r13_RegI()
5285 %{
5286 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5287 match(RegI);
5288 match(rRegI);
5289 match(rax_RegI);
5290 match(rbx_RegI);
5291 match(rcx_RegI);
5292 match(rdx_RegI);
5293 match(rdi_RegI);
5294
5295 format %{ %}
5296 interface(REG_INTER);
5297 %}
5298
5299 // Pointer Register
5300 operand any_RegP()
5301 %{
5302 constraint(ALLOC_IN_RC(any_reg));
5303 match(RegP);
5304 match(rax_RegP);
5305 match(rbx_RegP);
5306 match(rdi_RegP);
5307 match(rsi_RegP);
5308 match(rbp_RegP);
5309 match(r15_RegP);
5310 match(rRegP);
5311
5312 format %{ %}
5313 interface(REG_INTER);
5314 %}
5315
5316 operand rRegP()
5317 %{
5318 constraint(ALLOC_IN_RC(ptr_reg));
5319 match(RegP);
5320 match(rax_RegP);
5321 match(rbx_RegP);
5322 match(rdi_RegP);
5323 match(rsi_RegP);
5324 match(rbp_RegP); // See Q&A below about
5325 match(r15_RegP); // r15_RegP and rbp_RegP.
5326
5327 format %{ %}
5328 interface(REG_INTER);
5329 %}
5330
5331 operand rRegN() %{
5332 constraint(ALLOC_IN_RC(int_reg));
5333 match(RegN);
5334
5335 format %{ %}
5336 interface(REG_INTER);
5337 %}
5338
5339 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5340 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5341 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5342 // The output of an instruction is controlled by the allocator, which respects
5343 // register class masks, not match rules. Unless an instruction mentions
5344 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5345 // by the allocator as an input.
5346 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5347 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5348 // result, RBP is not included in the output of the instruction either.
5349
5350 // This operand is not allowed to use RBP even if
5351 // RBP is not used to hold the frame pointer.
5352 operand no_rbp_RegP()
5353 %{
5354 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5355 match(RegP);
5356 match(rbx_RegP);
5357 match(rsi_RegP);
5358 match(rdi_RegP);
5359
5360 format %{ %}
5361 interface(REG_INTER);
5362 %}
5363
5364 // Special Registers
5365 // Return a pointer value
5366 operand rax_RegP()
5367 %{
5368 constraint(ALLOC_IN_RC(ptr_rax_reg));
5369 match(RegP);
5370 match(rRegP);
5371
5372 format %{ %}
5373 interface(REG_INTER);
5374 %}
5375
5376 // Special Registers
5377 // Return a compressed pointer value
5378 operand rax_RegN()
5379 %{
5380 constraint(ALLOC_IN_RC(int_rax_reg));
5381 match(RegN);
5382 match(rRegN);
5383
5384 format %{ %}
5385 interface(REG_INTER);
5386 %}
5387
5388 // Used in AtomicAdd
5389 operand rbx_RegP()
5390 %{
5391 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5392 match(RegP);
5393 match(rRegP);
5394
5395 format %{ %}
5396 interface(REG_INTER);
5397 %}
5398
5399 operand rsi_RegP()
5400 %{
5401 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5402 match(RegP);
5403 match(rRegP);
5404
5405 format %{ %}
5406 interface(REG_INTER);
5407 %}
5408
5409 operand rbp_RegP()
5410 %{
5411 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5412 match(RegP);
5413 match(rRegP);
5414
5415 format %{ %}
5416 interface(REG_INTER);
5417 %}
5418
5419 // Used in rep stosq
5420 operand rdi_RegP()
5421 %{
5422 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5423 match(RegP);
5424 match(rRegP);
5425
5426 format %{ %}
5427 interface(REG_INTER);
5428 %}
5429
5430 operand r15_RegP()
5431 %{
5432 constraint(ALLOC_IN_RC(ptr_r15_reg));
5433 match(RegP);
5434 match(rRegP);
5435
5436 format %{ %}
5437 interface(REG_INTER);
5438 %}
5439
5440 operand rRegL()
5441 %{
5442 constraint(ALLOC_IN_RC(long_reg));
5443 match(RegL);
5444 match(rax_RegL);
5445 match(rdx_RegL);
5446
5447 format %{ %}
5448 interface(REG_INTER);
5449 %}
5450
5451 // Special Registers
5452 operand no_rax_rdx_RegL()
5453 %{
5454 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5455 match(RegL);
5456 match(rRegL);
5457
5458 format %{ %}
5459 interface(REG_INTER);
5460 %}
5461
5462 operand rax_RegL()
5463 %{
5464 constraint(ALLOC_IN_RC(long_rax_reg));
5465 match(RegL);
5466 match(rRegL);
5467
5468 format %{ "RAX" %}
5469 interface(REG_INTER);
5470 %}
5471
5472 operand rcx_RegL()
5473 %{
5474 constraint(ALLOC_IN_RC(long_rcx_reg));
5475 match(RegL);
5476 match(rRegL);
5477
5478 format %{ %}
5479 interface(REG_INTER);
5480 %}
5481
5482 operand rdx_RegL()
5483 %{
5484 constraint(ALLOC_IN_RC(long_rdx_reg));
5485 match(RegL);
5486 match(rRegL);
5487
5488 format %{ %}
5489 interface(REG_INTER);
5490 %}
5491
5492 operand r11_RegL()
5493 %{
5494 constraint(ALLOC_IN_RC(long_r11_reg));
5495 match(RegL);
5496 match(rRegL);
5497
5498 format %{ %}
5499 interface(REG_INTER);
5500 %}
5501
5502 operand no_rbp_r13_RegL()
5503 %{
5504 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5505 match(RegL);
5506 match(rRegL);
5507 match(rax_RegL);
5508 match(rcx_RegL);
5509 match(rdx_RegL);
5510
5511 format %{ %}
5512 interface(REG_INTER);
5513 %}
5514
5515 // Flags register, used as output of compare instructions
5516 operand rFlagsReg()
5517 %{
5518 constraint(ALLOC_IN_RC(int_flags));
5519 match(RegFlags);
5520
5521 format %{ "RFLAGS" %}
5522 interface(REG_INTER);
5523 %}
5524
5525 // Flags register, used as output of FLOATING POINT compare instructions
5526 operand rFlagsRegU()
5527 %{
5528 constraint(ALLOC_IN_RC(int_flags));
5529 match(RegFlags);
5530
5531 format %{ "RFLAGS_U" %}
5532 interface(REG_INTER);
5533 %}
5534
5535 operand rFlagsRegUCF() %{
5536 constraint(ALLOC_IN_RC(int_flags));
5537 match(RegFlags);
5538 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5539
5540 format %{ "RFLAGS_U_CF" %}
5541 interface(REG_INTER);
5542 %}
5543
5544 operand rFlagsRegUCFE() %{
5545 constraint(ALLOC_IN_RC(int_flags));
5546 match(RegFlags);
5547 predicate(UseAPX && VM_Version::supports_avx10_2());
5548
5549 format %{ "RFLAGS_U_CFE" %}
5550 interface(REG_INTER);
5551 %}
5552
5553 // Float register operands
5554 operand regF() %{
5555 constraint(ALLOC_IN_RC(float_reg));
5556 match(RegF);
5557
5558 format %{ %}
5559 interface(REG_INTER);
5560 %}
5561
5562 // Float register operands
5563 operand legRegF() %{
5564 constraint(ALLOC_IN_RC(float_reg_legacy));
5565 match(RegF);
5566
5567 format %{ %}
5568 interface(REG_INTER);
5569 %}
5570
5571 // Float register operands
5572 operand vlRegF() %{
5573 constraint(ALLOC_IN_RC(float_reg_vl));
5574 match(RegF);
5575
5576 format %{ %}
5577 interface(REG_INTER);
5578 %}
5579
5580 // Double register operands
5581 operand regD() %{
5582 constraint(ALLOC_IN_RC(double_reg));
5583 match(RegD);
5584
5585 format %{ %}
5586 interface(REG_INTER);
5587 %}
5588
5589 // Double register operands
5590 operand legRegD() %{
5591 constraint(ALLOC_IN_RC(double_reg_legacy));
5592 match(RegD);
5593
5594 format %{ %}
5595 interface(REG_INTER);
5596 %}
5597
5598 // Double register operands
5599 operand vlRegD() %{
5600 constraint(ALLOC_IN_RC(double_reg_vl));
5601 match(RegD);
5602
5603 format %{ %}
5604 interface(REG_INTER);
5605 %}
5606
5607 //----------Memory Operands----------------------------------------------------
5608 // Direct Memory Operand
5609 // operand direct(immP addr)
5610 // %{
5611 // match(addr);
5612
5613 // format %{ "[$addr]" %}
5614 // interface(MEMORY_INTER) %{
5615 // base(0xFFFFFFFF);
5616 // index(0x4);
5617 // scale(0x0);
5618 // disp($addr);
5619 // %}
5620 // %}
5621
5622 // Indirect Memory Operand
5623 operand indirect(any_RegP reg)
5624 %{
5625 constraint(ALLOC_IN_RC(ptr_reg));
5626 match(reg);
5627
5628 format %{ "[$reg]" %}
5629 interface(MEMORY_INTER) %{
5630 base($reg);
5631 index(0x4);
5632 scale(0x0);
5633 disp(0x0);
5634 %}
5635 %}
5636
5637 // Indirect Memory Plus Short Offset Operand
5638 operand indOffset8(any_RegP reg, immL8 off)
5639 %{
5640 constraint(ALLOC_IN_RC(ptr_reg));
5641 match(AddP reg off);
5642
5643 format %{ "[$reg + $off (8-bit)]" %}
5644 interface(MEMORY_INTER) %{
5645 base($reg);
5646 index(0x4);
5647 scale(0x0);
5648 disp($off);
5649 %}
5650 %}
5651
5652 // Indirect Memory Plus Long Offset Operand
5653 operand indOffset32(any_RegP reg, immL32 off)
5654 %{
5655 constraint(ALLOC_IN_RC(ptr_reg));
5656 match(AddP reg off);
5657
5658 format %{ "[$reg + $off (32-bit)]" %}
5659 interface(MEMORY_INTER) %{
5660 base($reg);
5661 index(0x4);
5662 scale(0x0);
5663 disp($off);
5664 %}
5665 %}
5666
5667 // Indirect Memory Plus Index Register Plus Offset Operand
5668 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5669 %{
5670 constraint(ALLOC_IN_RC(ptr_reg));
5671 match(AddP (AddP reg lreg) off);
5672
5673 op_cost(10);
5674 format %{"[$reg + $off + $lreg]" %}
5675 interface(MEMORY_INTER) %{
5676 base($reg);
5677 index($lreg);
5678 scale(0x0);
5679 disp($off);
5680 %}
5681 %}
5682
5683 // Indirect Memory Plus Index Register Plus Offset Operand
5684 operand indIndex(any_RegP reg, rRegL lreg)
5685 %{
5686 constraint(ALLOC_IN_RC(ptr_reg));
5687 match(AddP reg lreg);
5688
5689 op_cost(10);
5690 format %{"[$reg + $lreg]" %}
5691 interface(MEMORY_INTER) %{
5692 base($reg);
5693 index($lreg);
5694 scale(0x0);
5695 disp(0x0);
5696 %}
5697 %}
5698
5699 // Indirect Memory Times Scale Plus Index Register
5700 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5701 %{
5702 constraint(ALLOC_IN_RC(ptr_reg));
5703 match(AddP reg (LShiftL lreg scale));
5704
5705 op_cost(10);
5706 format %{"[$reg + $lreg << $scale]" %}
5707 interface(MEMORY_INTER) %{
5708 base($reg);
5709 index($lreg);
5710 scale($scale);
5711 disp(0x0);
5712 %}
5713 %}
5714
5715 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5716 %{
5717 constraint(ALLOC_IN_RC(ptr_reg));
5718 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5719 match(AddP reg (LShiftL (ConvI2L idx) scale));
5720
5721 op_cost(10);
5722 format %{"[$reg + pos $idx << $scale]" %}
5723 interface(MEMORY_INTER) %{
5724 base($reg);
5725 index($idx);
5726 scale($scale);
5727 disp(0x0);
5728 %}
5729 %}
5730
5731 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5732 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5733 %{
5734 constraint(ALLOC_IN_RC(ptr_reg));
5735 match(AddP (AddP reg (LShiftL lreg scale)) off);
5736
5737 op_cost(10);
5738 format %{"[$reg + $off + $lreg << $scale]" %}
5739 interface(MEMORY_INTER) %{
5740 base($reg);
5741 index($lreg);
5742 scale($scale);
5743 disp($off);
5744 %}
5745 %}
5746
5747 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5748 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5749 %{
5750 constraint(ALLOC_IN_RC(ptr_reg));
5751 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5752 match(AddP (AddP reg (ConvI2L idx)) off);
5753
5754 op_cost(10);
5755 format %{"[$reg + $off + $idx]" %}
5756 interface(MEMORY_INTER) %{
5757 base($reg);
5758 index($idx);
5759 scale(0x0);
5760 disp($off);
5761 %}
5762 %}
5763
5764 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5765 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5766 %{
5767 constraint(ALLOC_IN_RC(ptr_reg));
5768 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5769 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5770
5771 op_cost(10);
5772 format %{"[$reg + $off + $idx << $scale]" %}
5773 interface(MEMORY_INTER) %{
5774 base($reg);
5775 index($idx);
5776 scale($scale);
5777 disp($off);
5778 %}
5779 %}
5780
5781 // Indirect Narrow Oop Plus Offset Operand
5782 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5783 // we can't free r12 even with CompressedOops::base() == nullptr.
5784 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5785 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5786 constraint(ALLOC_IN_RC(ptr_reg));
5787 match(AddP (DecodeN reg) off);
5788
5789 op_cost(10);
5790 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5791 interface(MEMORY_INTER) %{
5792 base(0xc); // R12
5793 index($reg);
5794 scale(0x3);
5795 disp($off);
5796 %}
5797 %}
5798
5799 // Indirect Memory Operand
5800 operand indirectNarrow(rRegN reg)
5801 %{
5802 predicate(CompressedOops::shift() == 0);
5803 constraint(ALLOC_IN_RC(ptr_reg));
5804 match(DecodeN reg);
5805
5806 format %{ "[$reg]" %}
5807 interface(MEMORY_INTER) %{
5808 base($reg);
5809 index(0x4);
5810 scale(0x0);
5811 disp(0x0);
5812 %}
5813 %}
5814
5815 // Indirect Memory Plus Short Offset Operand
5816 operand indOffset8Narrow(rRegN reg, immL8 off)
5817 %{
5818 predicate(CompressedOops::shift() == 0);
5819 constraint(ALLOC_IN_RC(ptr_reg));
5820 match(AddP (DecodeN reg) off);
5821
5822 format %{ "[$reg + $off (8-bit)]" %}
5823 interface(MEMORY_INTER) %{
5824 base($reg);
5825 index(0x4);
5826 scale(0x0);
5827 disp($off);
5828 %}
5829 %}
5830
5831 // Indirect Memory Plus Long Offset Operand
5832 operand indOffset32Narrow(rRegN reg, immL32 off)
5833 %{
5834 predicate(CompressedOops::shift() == 0);
5835 constraint(ALLOC_IN_RC(ptr_reg));
5836 match(AddP (DecodeN reg) off);
5837
5838 format %{ "[$reg + $off (32-bit)]" %}
5839 interface(MEMORY_INTER) %{
5840 base($reg);
5841 index(0x4);
5842 scale(0x0);
5843 disp($off);
5844 %}
5845 %}
5846
5847 // Indirect Memory Plus Index Register Plus Offset Operand
5848 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5849 %{
5850 predicate(CompressedOops::shift() == 0);
5851 constraint(ALLOC_IN_RC(ptr_reg));
5852 match(AddP (AddP (DecodeN reg) lreg) off);
5853
5854 op_cost(10);
5855 format %{"[$reg + $off + $lreg]" %}
5856 interface(MEMORY_INTER) %{
5857 base($reg);
5858 index($lreg);
5859 scale(0x0);
5860 disp($off);
5861 %}
5862 %}
5863
5864 // Indirect Memory Plus Index Register Plus Offset Operand
5865 operand indIndexNarrow(rRegN reg, rRegL lreg)
5866 %{
5867 predicate(CompressedOops::shift() == 0);
5868 constraint(ALLOC_IN_RC(ptr_reg));
5869 match(AddP (DecodeN reg) lreg);
5870
5871 op_cost(10);
5872 format %{"[$reg + $lreg]" %}
5873 interface(MEMORY_INTER) %{
5874 base($reg);
5875 index($lreg);
5876 scale(0x0);
5877 disp(0x0);
5878 %}
5879 %}
5880
5881 // Indirect Memory Times Scale Plus Index Register
5882 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5883 %{
5884 predicate(CompressedOops::shift() == 0);
5885 constraint(ALLOC_IN_RC(ptr_reg));
5886 match(AddP (DecodeN reg) (LShiftL lreg scale));
5887
5888 op_cost(10);
5889 format %{"[$reg + $lreg << $scale]" %}
5890 interface(MEMORY_INTER) %{
5891 base($reg);
5892 index($lreg);
5893 scale($scale);
5894 disp(0x0);
5895 %}
5896 %}
5897
5898 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5899 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5900 %{
5901 predicate(CompressedOops::shift() == 0);
5902 constraint(ALLOC_IN_RC(ptr_reg));
5903 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5904
5905 op_cost(10);
5906 format %{"[$reg + $off + $lreg << $scale]" %}
5907 interface(MEMORY_INTER) %{
5908 base($reg);
5909 index($lreg);
5910 scale($scale);
5911 disp($off);
5912 %}
5913 %}
5914
5915 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5916 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5917 %{
5918 constraint(ALLOC_IN_RC(ptr_reg));
5919 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5920 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5921
5922 op_cost(10);
5923 format %{"[$reg + $off + $idx]" %}
5924 interface(MEMORY_INTER) %{
5925 base($reg);
5926 index($idx);
5927 scale(0x0);
5928 disp($off);
5929 %}
5930 %}
5931
5932 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5933 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5934 %{
5935 constraint(ALLOC_IN_RC(ptr_reg));
5936 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5937 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5938
5939 op_cost(10);
5940 format %{"[$reg + $off + $idx << $scale]" %}
5941 interface(MEMORY_INTER) %{
5942 base($reg);
5943 index($idx);
5944 scale($scale);
5945 disp($off);
5946 %}
5947 %}
5948
5949 //----------Special Memory Operands--------------------------------------------
5950 // Stack Slot Operand - This operand is used for loading and storing temporary
5951 // values on the stack where a match requires a value to
5952 // flow through memory.
5953 operand stackSlotP(sRegP reg)
5954 %{
5955 constraint(ALLOC_IN_RC(stack_slots));
5956 // No match rule because this operand is only generated in matching
5957
5958 format %{ "[$reg]" %}
5959 interface(MEMORY_INTER) %{
5960 base(0x4); // RSP
5961 index(0x4); // No Index
5962 scale(0x0); // No Scale
5963 disp($reg); // Stack Offset
5964 %}
5965 %}
5966
5967 operand stackSlotI(sRegI reg)
5968 %{
5969 constraint(ALLOC_IN_RC(stack_slots));
5970 // No match rule because this operand is only generated in matching
5971
5972 format %{ "[$reg]" %}
5973 interface(MEMORY_INTER) %{
5974 base(0x4); // RSP
5975 index(0x4); // No Index
5976 scale(0x0); // No Scale
5977 disp($reg); // Stack Offset
5978 %}
5979 %}
5980
5981 operand stackSlotF(sRegF reg)
5982 %{
5983 constraint(ALLOC_IN_RC(stack_slots));
5984 // No match rule because this operand is only generated in matching
5985
5986 format %{ "[$reg]" %}
5987 interface(MEMORY_INTER) %{
5988 base(0x4); // RSP
5989 index(0x4); // No Index
5990 scale(0x0); // No Scale
5991 disp($reg); // Stack Offset
5992 %}
5993 %}
5994
5995 operand stackSlotD(sRegD reg)
5996 %{
5997 constraint(ALLOC_IN_RC(stack_slots));
5998 // No match rule because this operand is only generated in matching
5999
6000 format %{ "[$reg]" %}
6001 interface(MEMORY_INTER) %{
6002 base(0x4); // RSP
6003 index(0x4); // No Index
6004 scale(0x0); // No Scale
6005 disp($reg); // Stack Offset
6006 %}
6007 %}
6008 operand stackSlotL(sRegL reg)
6009 %{
6010 constraint(ALLOC_IN_RC(stack_slots));
6011 // No match rule because this operand is only generated in matching
6012
6013 format %{ "[$reg]" %}
6014 interface(MEMORY_INTER) %{
6015 base(0x4); // RSP
6016 index(0x4); // No Index
6017 scale(0x0); // No Scale
6018 disp($reg); // Stack Offset
6019 %}
6020 %}
6021
6022 //----------Conditional Branch Operands----------------------------------------
6023 // Comparison Op - This is the operation of the comparison, and is limited to
6024 // the following set of codes:
6025 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6026 //
6027 // Other attributes of the comparison, such as unsignedness, are specified
6028 // by the comparison instruction that sets a condition code flags register.
6029 // That result is represented by a flags operand whose subtype is appropriate
6030 // to the unsignedness (etc.) of the comparison.
6031 //
6032 // Later, the instruction which matches both the Comparison Op (a Bool) and
6033 // the flags (produced by the Cmp) specifies the coding of the comparison op
6034 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6035
6036 // Comparison Code
6037 operand cmpOp()
6038 %{
6039 match(Bool);
6040
6041 format %{ "" %}
6042 interface(COND_INTER) %{
6043 equal(0x4, "e");
6044 not_equal(0x5, "ne");
6045 less(0xc, "l");
6046 greater_equal(0xd, "ge");
6047 less_equal(0xe, "le");
6048 greater(0xf, "g");
6049 overflow(0x0, "o");
6050 no_overflow(0x1, "no");
6051 %}
6052 %}
6053
6054 // Comparison Code, unsigned compare. Used by FP also, with
6055 // C2 (unordered) turned into GT or LT already. The other bits
6056 // C0 and C3 are turned into Carry & Zero flags.
6057 operand cmpOpU()
6058 %{
6059 match(Bool);
6060
6061 format %{ "" %}
6062 interface(COND_INTER) %{
6063 equal(0x4, "e");
6064 not_equal(0x5, "ne");
6065 less(0x2, "b");
6066 greater_equal(0x3, "ae");
6067 less_equal(0x6, "be");
6068 greater(0x7, "a");
6069 overflow(0x0, "o");
6070 no_overflow(0x1, "no");
6071 %}
6072 %}
6073
6074
6075 // Floating comparisons that don't require any fixup for the unordered case,
6076 // If both inputs of the comparison are the same, ZF is always set so we
6077 // don't need to use cmpOpUCF2 for eq/ne
6078 operand cmpOpUCF() %{
6079 match(Bool);
6080 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6081 (n->as_Bool()->_test._test == BoolTest::lt ||
6082 n->as_Bool()->_test._test == BoolTest::ge ||
6083 n->as_Bool()->_test._test == BoolTest::le ||
6084 n->as_Bool()->_test._test == BoolTest::gt ||
6085 n->in(1)->in(1) == n->in(1)->in(2)));
6086 format %{ "" %}
6087 interface(COND_INTER) %{
6088 equal(0xb, "np");
6089 not_equal(0xa, "p");
6090 less(0x2, "b");
6091 greater_equal(0x3, "ae");
6092 less_equal(0x6, "be");
6093 greater(0x7, "a");
6094 overflow(0x0, "o");
6095 no_overflow(0x1, "no");
6096 %}
6097 %}
6098
6099
6100 // Floating comparisons that can be fixed up with extra conditional jumps
6101 operand cmpOpUCF2() %{
6102 match(Bool);
6103 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6104 (n->as_Bool()->_test._test == BoolTest::ne ||
6105 n->as_Bool()->_test._test == BoolTest::eq) &&
6106 n->in(1)->in(1) != n->in(1)->in(2));
6107 format %{ "" %}
6108 interface(COND_INTER) %{
6109 equal(0x4, "e");
6110 not_equal(0x5, "ne");
6111 less(0x2, "b");
6112 greater_equal(0x3, "ae");
6113 less_equal(0x6, "be");
6114 greater(0x7, "a");
6115 overflow(0x0, "o");
6116 no_overflow(0x1, "no");
6117 %}
6118 %}
6119
6120
6121 // Floating point comparisons that set condition flags to test more directly,
6122 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6123 // are used for L (<) and LE (<=) conditions. It's important to convert these
6124 // latter conditions to ones that use unsigned tests before passing into an
6125 // instruction because the preceding comparison might be based on a three way
6126 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6127 operand cmpOpUCFE()
6128 %{
6129 match(Bool);
6130 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6131 (n->as_Bool()->_test._test == BoolTest::ne ||
6132 n->as_Bool()->_test._test == BoolTest::eq ||
6133 n->as_Bool()->_test._test == BoolTest::lt ||
6134 n->as_Bool()->_test._test == BoolTest::ge ||
6135 n->as_Bool()->_test._test == BoolTest::le ||
6136 n->as_Bool()->_test._test == BoolTest::gt));
6137
6138 format %{ "" %}
6139 interface(COND_INTER) %{
6140 equal(0x4, "e");
6141 not_equal(0x5, "ne");
6142 less(0x2, "b");
6143 greater_equal(0x3, "ae");
6144 less_equal(0x6, "be");
6145 greater(0x7, "a");
6146 overflow(0x0, "o");
6147 no_overflow(0x1, "no");
6148 %}
6149 %}
6150
6151 // Operands for bound floating pointer register arguments
6152 operand rxmm0() %{
6153 constraint(ALLOC_IN_RC(xmm0_reg));
6154 match(VecX);
6155 format%{%}
6156 interface(REG_INTER);
6157 %}
6158
6159 // Vectors
6160
6161 // Dummy generic vector class. Should be used for all vector operands.
6162 // Replaced with vec[SDXYZ] during post-selection pass.
6163 operand vec() %{
6164 constraint(ALLOC_IN_RC(dynamic));
6165 match(VecX);
6166 match(VecY);
6167 match(VecZ);
6168 match(VecS);
6169 match(VecD);
6170
6171 format %{ %}
6172 interface(REG_INTER);
6173 %}
6174
6175 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6176 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6177 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6178 // runtime code generation via reg_class_dynamic.
6179 operand legVec() %{
6180 constraint(ALLOC_IN_RC(dynamic));
6181 match(VecX);
6182 match(VecY);
6183 match(VecZ);
6184 match(VecS);
6185 match(VecD);
6186
6187 format %{ %}
6188 interface(REG_INTER);
6189 %}
6190
6191 // Replaces vec during post-selection cleanup. See above.
6192 operand vecS() %{
6193 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6194 match(VecS);
6195
6196 format %{ %}
6197 interface(REG_INTER);
6198 %}
6199
6200 // Replaces legVec during post-selection cleanup. See above.
6201 operand legVecS() %{
6202 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6203 match(VecS);
6204
6205 format %{ %}
6206 interface(REG_INTER);
6207 %}
6208
6209 // Replaces vec during post-selection cleanup. See above.
6210 operand vecD() %{
6211 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6212 match(VecD);
6213
6214 format %{ %}
6215 interface(REG_INTER);
6216 %}
6217
6218 // Replaces legVec during post-selection cleanup. See above.
6219 operand legVecD() %{
6220 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6221 match(VecD);
6222
6223 format %{ %}
6224 interface(REG_INTER);
6225 %}
6226
6227 // Replaces vec during post-selection cleanup. See above.
6228 operand vecX() %{
6229 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6230 match(VecX);
6231
6232 format %{ %}
6233 interface(REG_INTER);
6234 %}
6235
6236 // Replaces legVec during post-selection cleanup. See above.
6237 operand legVecX() %{
6238 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6239 match(VecX);
6240
6241 format %{ %}
6242 interface(REG_INTER);
6243 %}
6244
6245 // Replaces vec during post-selection cleanup. See above.
6246 operand vecY() %{
6247 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6248 match(VecY);
6249
6250 format %{ %}
6251 interface(REG_INTER);
6252 %}
6253
6254 // Replaces legVec during post-selection cleanup. See above.
6255 operand legVecY() %{
6256 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6257 match(VecY);
6258
6259 format %{ %}
6260 interface(REG_INTER);
6261 %}
6262
6263 // Replaces vec during post-selection cleanup. See above.
6264 operand vecZ() %{
6265 constraint(ALLOC_IN_RC(vectorz_reg));
6266 match(VecZ);
6267
6268 format %{ %}
6269 interface(REG_INTER);
6270 %}
6271
6272 // Replaces legVec during post-selection cleanup. See above.
6273 operand legVecZ() %{
6274 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6275 match(VecZ);
6276
6277 format %{ %}
6278 interface(REG_INTER);
6279 %}
6280
6281 //----------OPERAND CLASSES----------------------------------------------------
6282 // Operand Classes are groups of operands that are used as to simplify
6283 // instruction definitions by not requiring the AD writer to specify separate
6284 // instructions for every form of operand when the instruction accepts
6285 // multiple operand types with the same basic encoding and format. The classic
6286 // case of this is memory operands.
6287
6288 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6289 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6290 indCompressedOopOffset,
6291 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6292 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6293 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6294
6295 //----------PIPELINE-----------------------------------------------------------
6296 // Rules which define the behavior of the target architectures pipeline.
6297 pipeline %{
6298
6299 //----------ATTRIBUTES---------------------------------------------------------
6300 attributes %{
6301 variable_size_instructions; // Fixed size instructions
6302 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6303 instruction_unit_size = 1; // An instruction is 1 bytes long
6304 instruction_fetch_unit_size = 16; // The processor fetches one line
6305 instruction_fetch_units = 1; // of 16 bytes
6306 %}
6307
6308 //----------RESOURCES----------------------------------------------------------
6309 // Resources are the functional units available to the machine
6310
6311 // Generic P2/P3 pipeline
6312 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6313 // 3 instructions decoded per cycle.
6314 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6315 // 3 ALU op, only ALU0 handles mul instructions.
6316 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6317 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6318 BR, FPU,
6319 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6320
6321 //----------PIPELINE DESCRIPTION-----------------------------------------------
6322 // Pipeline Description specifies the stages in the machine's pipeline
6323
6324 // Generic P2/P3 pipeline
6325 pipe_desc(S0, S1, S2, S3, S4, S5);
6326
6327 //----------PIPELINE CLASSES---------------------------------------------------
6328 // Pipeline Classes describe the stages in which input and output are
6329 // referenced by the hardware pipeline.
6330
6331 // Naming convention: ialu or fpu
6332 // Then: _reg
6333 // Then: _reg if there is a 2nd register
6334 // Then: _long if it's a pair of instructions implementing a long
6335 // Then: _fat if it requires the big decoder
6336 // Or: _mem if it requires the big decoder and a memory unit.
6337
6338 // Integer ALU reg operation
6339 pipe_class ialu_reg(rRegI dst)
6340 %{
6341 single_instruction;
6342 dst : S4(write);
6343 dst : S3(read);
6344 DECODE : S0; // any decoder
6345 ALU : S3; // any alu
6346 %}
6347
6348 // Long ALU reg operation
6349 pipe_class ialu_reg_long(rRegL dst)
6350 %{
6351 instruction_count(2);
6352 dst : S4(write);
6353 dst : S3(read);
6354 DECODE : S0(2); // any 2 decoders
6355 ALU : S3(2); // both alus
6356 %}
6357
6358 // Integer ALU reg operation using big decoder
6359 pipe_class ialu_reg_fat(rRegI dst)
6360 %{
6361 single_instruction;
6362 dst : S4(write);
6363 dst : S3(read);
6364 D0 : S0; // big decoder only
6365 ALU : S3; // any alu
6366 %}
6367
6368 // Integer ALU reg-reg operation
6369 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6370 %{
6371 single_instruction;
6372 dst : S4(write);
6373 src : S3(read);
6374 DECODE : S0; // any decoder
6375 ALU : S3; // any alu
6376 %}
6377
6378 // Integer ALU reg-reg operation
6379 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6380 %{
6381 single_instruction;
6382 dst : S4(write);
6383 src : S3(read);
6384 D0 : S0; // big decoder only
6385 ALU : S3; // any alu
6386 %}
6387
6388 // Integer ALU reg-mem operation
6389 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6390 %{
6391 single_instruction;
6392 dst : S5(write);
6393 mem : S3(read);
6394 D0 : S0; // big decoder only
6395 ALU : S4; // any alu
6396 MEM : S3; // any mem
6397 %}
6398
6399 // Integer mem operation (prefetch)
6400 pipe_class ialu_mem(memory mem)
6401 %{
6402 single_instruction;
6403 mem : S3(read);
6404 D0 : S0; // big decoder only
6405 MEM : S3; // any mem
6406 %}
6407
6408 // Integer Store to Memory
6409 pipe_class ialu_mem_reg(memory mem, rRegI src)
6410 %{
6411 single_instruction;
6412 mem : S3(read);
6413 src : S5(read);
6414 D0 : S0; // big decoder only
6415 ALU : S4; // any alu
6416 MEM : S3;
6417 %}
6418
6419 // // Long Store to Memory
6420 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6421 // %{
6422 // instruction_count(2);
6423 // mem : S3(read);
6424 // src : S5(read);
6425 // D0 : S0(2); // big decoder only; twice
6426 // ALU : S4(2); // any 2 alus
6427 // MEM : S3(2); // Both mems
6428 // %}
6429
6430 // Integer Store to Memory
6431 pipe_class ialu_mem_imm(memory mem)
6432 %{
6433 single_instruction;
6434 mem : S3(read);
6435 D0 : S0; // big decoder only
6436 ALU : S4; // any alu
6437 MEM : S3;
6438 %}
6439
6440 // Integer ALU0 reg-reg operation
6441 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6442 %{
6443 single_instruction;
6444 dst : S4(write);
6445 src : S3(read);
6446 D0 : S0; // Big decoder only
6447 ALU0 : S3; // only alu0
6448 %}
6449
6450 // Integer ALU0 reg-mem operation
6451 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6452 %{
6453 single_instruction;
6454 dst : S5(write);
6455 mem : S3(read);
6456 D0 : S0; // big decoder only
6457 ALU0 : S4; // ALU0 only
6458 MEM : S3; // any mem
6459 %}
6460
6461 // Integer ALU reg-reg operation
6462 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6463 %{
6464 single_instruction;
6465 cr : S4(write);
6466 src1 : S3(read);
6467 src2 : S3(read);
6468 DECODE : S0; // any decoder
6469 ALU : S3; // any alu
6470 %}
6471
6472 // Integer ALU reg-imm operation
6473 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6474 %{
6475 single_instruction;
6476 cr : S4(write);
6477 src1 : S3(read);
6478 DECODE : S0; // any decoder
6479 ALU : S3; // any alu
6480 %}
6481
6482 // Integer ALU reg-mem operation
6483 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6484 %{
6485 single_instruction;
6486 cr : S4(write);
6487 src1 : S3(read);
6488 src2 : S3(read);
6489 D0 : S0; // big decoder only
6490 ALU : S4; // any alu
6491 MEM : S3;
6492 %}
6493
6494 // Conditional move reg-reg
6495 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6496 %{
6497 instruction_count(4);
6498 y : S4(read);
6499 q : S3(read);
6500 p : S3(read);
6501 DECODE : S0(4); // any decoder
6502 %}
6503
6504 // Conditional move reg-reg
6505 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6506 %{
6507 single_instruction;
6508 dst : S4(write);
6509 src : S3(read);
6510 cr : S3(read);
6511 DECODE : S0; // any decoder
6512 %}
6513
6514 // Conditional move reg-mem
6515 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6516 %{
6517 single_instruction;
6518 dst : S4(write);
6519 src : S3(read);
6520 cr : S3(read);
6521 DECODE : S0; // any decoder
6522 MEM : S3;
6523 %}
6524
6525 // Conditional move reg-reg long
6526 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6527 %{
6528 single_instruction;
6529 dst : S4(write);
6530 src : S3(read);
6531 cr : S3(read);
6532 DECODE : S0(2); // any 2 decoders
6533 %}
6534
6535 // Float reg-reg operation
6536 pipe_class fpu_reg(regD dst)
6537 %{
6538 instruction_count(2);
6539 dst : S3(read);
6540 DECODE : S0(2); // any 2 decoders
6541 FPU : S3;
6542 %}
6543
6544 // Float reg-reg operation
6545 pipe_class fpu_reg_reg(regD dst, regD src)
6546 %{
6547 instruction_count(2);
6548 dst : S4(write);
6549 src : S3(read);
6550 DECODE : S0(2); // any 2 decoders
6551 FPU : S3;
6552 %}
6553
6554 // Float reg-reg operation
6555 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6556 %{
6557 instruction_count(3);
6558 dst : S4(write);
6559 src1 : S3(read);
6560 src2 : S3(read);
6561 DECODE : S0(3); // any 3 decoders
6562 FPU : S3(2);
6563 %}
6564
6565 // Float reg-reg operation
6566 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6567 %{
6568 instruction_count(4);
6569 dst : S4(write);
6570 src1 : S3(read);
6571 src2 : S3(read);
6572 src3 : S3(read);
6573 DECODE : S0(4); // any 3 decoders
6574 FPU : S3(2);
6575 %}
6576
6577 // Float reg-reg operation
6578 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6579 %{
6580 instruction_count(4);
6581 dst : S4(write);
6582 src1 : S3(read);
6583 src2 : S3(read);
6584 src3 : S3(read);
6585 DECODE : S1(3); // any 3 decoders
6586 D0 : S0; // Big decoder only
6587 FPU : S3(2);
6588 MEM : S3;
6589 %}
6590
6591 // Float reg-mem operation
6592 pipe_class fpu_reg_mem(regD dst, memory mem)
6593 %{
6594 instruction_count(2);
6595 dst : S5(write);
6596 mem : S3(read);
6597 D0 : S0; // big decoder only
6598 DECODE : S1; // any decoder for FPU POP
6599 FPU : S4;
6600 MEM : S3; // any mem
6601 %}
6602
6603 // Float reg-mem operation
6604 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6605 %{
6606 instruction_count(3);
6607 dst : S5(write);
6608 src1 : S3(read);
6609 mem : S3(read);
6610 D0 : S0; // big decoder only
6611 DECODE : S1(2); // any decoder for FPU POP
6612 FPU : S4;
6613 MEM : S3; // any mem
6614 %}
6615
6616 // Float mem-reg operation
6617 pipe_class fpu_mem_reg(memory mem, regD src)
6618 %{
6619 instruction_count(2);
6620 src : S5(read);
6621 mem : S3(read);
6622 DECODE : S0; // any decoder for FPU PUSH
6623 D0 : S1; // big decoder only
6624 FPU : S4;
6625 MEM : S3; // any mem
6626 %}
6627
6628 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6629 %{
6630 instruction_count(3);
6631 src1 : S3(read);
6632 src2 : S3(read);
6633 mem : S3(read);
6634 DECODE : S0(2); // any decoder for FPU PUSH
6635 D0 : S1; // big decoder only
6636 FPU : S4;
6637 MEM : S3; // any mem
6638 %}
6639
6640 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6641 %{
6642 instruction_count(3);
6643 src1 : S3(read);
6644 src2 : S3(read);
6645 mem : S4(read);
6646 DECODE : S0; // any decoder for FPU PUSH
6647 D0 : S0(2); // big decoder only
6648 FPU : S4;
6649 MEM : S3(2); // any mem
6650 %}
6651
6652 pipe_class fpu_mem_mem(memory dst, memory src1)
6653 %{
6654 instruction_count(2);
6655 src1 : S3(read);
6656 dst : S4(read);
6657 D0 : S0(2); // big decoder only
6658 MEM : S3(2); // any mem
6659 %}
6660
6661 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6662 %{
6663 instruction_count(3);
6664 src1 : S3(read);
6665 src2 : S3(read);
6666 dst : S4(read);
6667 D0 : S0(3); // big decoder only
6668 FPU : S4;
6669 MEM : S3(3); // any mem
6670 %}
6671
6672 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6673 %{
6674 instruction_count(3);
6675 src1 : S4(read);
6676 mem : S4(read);
6677 DECODE : S0; // any decoder for FPU PUSH
6678 D0 : S0(2); // big decoder only
6679 FPU : S4;
6680 MEM : S3(2); // any mem
6681 %}
6682
6683 // Float load constant
6684 pipe_class fpu_reg_con(regD dst)
6685 %{
6686 instruction_count(2);
6687 dst : S5(write);
6688 D0 : S0; // big decoder only for the load
6689 DECODE : S1; // any decoder for FPU POP
6690 FPU : S4;
6691 MEM : S3; // any mem
6692 %}
6693
6694 // Float load constant
6695 pipe_class fpu_reg_reg_con(regD dst, regD src)
6696 %{
6697 instruction_count(3);
6698 dst : S5(write);
6699 src : S3(read);
6700 D0 : S0; // big decoder only for the load
6701 DECODE : S1(2); // any decoder for FPU POP
6702 FPU : S4;
6703 MEM : S3; // any mem
6704 %}
6705
6706 // UnConditional branch
6707 pipe_class pipe_jmp(label labl)
6708 %{
6709 single_instruction;
6710 BR : S3;
6711 %}
6712
6713 // Conditional branch
6714 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6715 %{
6716 single_instruction;
6717 cr : S1(read);
6718 BR : S3;
6719 %}
6720
6721 // Allocation idiom
6722 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6723 %{
6724 instruction_count(1); force_serialization;
6725 fixed_latency(6);
6726 heap_ptr : S3(read);
6727 DECODE : S0(3);
6728 D0 : S2;
6729 MEM : S3;
6730 ALU : S3(2);
6731 dst : S5(write);
6732 BR : S5;
6733 %}
6734
6735 // Generic big/slow expanded idiom
6736 pipe_class pipe_slow()
6737 %{
6738 instruction_count(10); multiple_bundles; force_serialization;
6739 fixed_latency(100);
6740 D0 : S0(2);
6741 MEM : S3(2);
6742 %}
6743
6744 // The real do-nothing guy
6745 pipe_class empty()
6746 %{
6747 instruction_count(0);
6748 %}
6749
6750 // Define the class for the Nop node
6751 define
6752 %{
6753 MachNop = empty;
6754 %}
6755
6756 %}
6757
6758 //----------INSTRUCTIONS-------------------------------------------------------
6759 //
6760 // match -- States which machine-independent subtree may be replaced
6761 // by this instruction.
6762 // ins_cost -- The estimated cost of this instruction is used by instruction
6763 // selection to identify a minimum cost tree of machine
6764 // instructions that matches a tree of machine-independent
6765 // instructions.
6766 // format -- A string providing the disassembly for this instruction.
6767 // The value of an instruction's operand may be inserted
6768 // by referring to it with a '$' prefix.
6769 // opcode -- Three instruction opcodes may be provided. These are referred
6770 // to within an encode class as $primary, $secondary, and $tertiary
6771 // rrspectively. The primary opcode is commonly used to
6772 // indicate the type of machine instruction, while secondary
6773 // and tertiary are often used for prefix options or addressing
6774 // modes.
6775 // ins_encode -- A list of encode classes with parameters. The encode class
6776 // name must have been defined in an 'enc_class' specification
6777 // in the encode section of the architecture description.
6778
6779 // ============================================================================
6780
6781 instruct ShouldNotReachHere() %{
6782 match(Halt);
6783 format %{ "stop\t# ShouldNotReachHere" %}
6784 ins_encode %{
6785 if (is_reachable()) {
6786 const char* str = __ code_string(_halt_reason);
6787 __ stop(str);
6788 }
6789 %}
6790 ins_pipe(pipe_slow);
6791 %}
6792
6793 // ============================================================================
6794
6795 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6796 // Load Float
6797 instruct MoveF2VL(vlRegF dst, regF src) %{
6798 match(Set dst src);
6799 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6800 ins_encode %{
6801 ShouldNotReachHere();
6802 %}
6803 ins_pipe( fpu_reg_reg );
6804 %}
6805
6806 // Load Float
6807 instruct MoveF2LEG(legRegF dst, regF src) %{
6808 match(Set dst src);
6809 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6810 ins_encode %{
6811 ShouldNotReachHere();
6812 %}
6813 ins_pipe( fpu_reg_reg );
6814 %}
6815
6816 // Load Float
6817 instruct MoveVL2F(regF dst, vlRegF src) %{
6818 match(Set dst src);
6819 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6820 ins_encode %{
6821 ShouldNotReachHere();
6822 %}
6823 ins_pipe( fpu_reg_reg );
6824 %}
6825
6826 // Load Float
6827 instruct MoveLEG2F(regF dst, legRegF src) %{
6828 match(Set dst src);
6829 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6830 ins_encode %{
6831 ShouldNotReachHere();
6832 %}
6833 ins_pipe( fpu_reg_reg );
6834 %}
6835
6836 // Load Double
6837 instruct MoveD2VL(vlRegD dst, regD src) %{
6838 match(Set dst src);
6839 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6840 ins_encode %{
6841 ShouldNotReachHere();
6842 %}
6843 ins_pipe( fpu_reg_reg );
6844 %}
6845
6846 // Load Double
6847 instruct MoveD2LEG(legRegD dst, regD src) %{
6848 match(Set dst src);
6849 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6850 ins_encode %{
6851 ShouldNotReachHere();
6852 %}
6853 ins_pipe( fpu_reg_reg );
6854 %}
6855
6856 // Load Double
6857 instruct MoveVL2D(regD dst, vlRegD src) %{
6858 match(Set dst src);
6859 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6860 ins_encode %{
6861 ShouldNotReachHere();
6862 %}
6863 ins_pipe( fpu_reg_reg );
6864 %}
6865
6866 // Load Double
6867 instruct MoveLEG2D(regD dst, legRegD src) %{
6868 match(Set dst src);
6869 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6870 ins_encode %{
6871 ShouldNotReachHere();
6872 %}
6873 ins_pipe( fpu_reg_reg );
6874 %}
6875
6876 //----------Load/Store/Move Instructions---------------------------------------
6877 //----------Load Instructions--------------------------------------------------
6878
6879 // Load Byte (8 bit signed)
6880 instruct loadB(rRegI dst, memory mem)
6881 %{
6882 match(Set dst (LoadB mem));
6883
6884 ins_cost(125);
6885 format %{ "movsbl $dst, $mem\t# byte" %}
6886
6887 ins_encode %{
6888 __ movsbl($dst$$Register, $mem$$Address);
6889 %}
6890
6891 ins_pipe(ialu_reg_mem);
6892 %}
6893
6894 // Load Byte (8 bit signed) into Long Register
6895 instruct loadB2L(rRegL dst, memory mem)
6896 %{
6897 match(Set dst (ConvI2L (LoadB mem)));
6898
6899 ins_cost(125);
6900 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6901
6902 ins_encode %{
6903 __ movsbq($dst$$Register, $mem$$Address);
6904 %}
6905
6906 ins_pipe(ialu_reg_mem);
6907 %}
6908
6909 // Load Unsigned Byte (8 bit UNsigned)
6910 instruct loadUB(rRegI dst, memory mem)
6911 %{
6912 match(Set dst (LoadUB mem));
6913
6914 ins_cost(125);
6915 format %{ "movzbl $dst, $mem\t# ubyte" %}
6916
6917 ins_encode %{
6918 __ movzbl($dst$$Register, $mem$$Address);
6919 %}
6920
6921 ins_pipe(ialu_reg_mem);
6922 %}
6923
6924 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6925 instruct loadUB2L(rRegL dst, memory mem)
6926 %{
6927 match(Set dst (ConvI2L (LoadUB mem)));
6928
6929 ins_cost(125);
6930 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6931
6932 ins_encode %{
6933 __ movzbq($dst$$Register, $mem$$Address);
6934 %}
6935
6936 ins_pipe(ialu_reg_mem);
6937 %}
6938
6939 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6940 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6941 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6942 effect(KILL cr);
6943
6944 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6945 "andl $dst, right_n_bits($mask, 8)" %}
6946 ins_encode %{
6947 Register Rdst = $dst$$Register;
6948 __ movzbq(Rdst, $mem$$Address);
6949 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6950 %}
6951 ins_pipe(ialu_reg_mem);
6952 %}
6953
6954 // Load Short (16 bit signed)
6955 instruct loadS(rRegI dst, memory mem)
6956 %{
6957 match(Set dst (LoadS mem));
6958
6959 ins_cost(125);
6960 format %{ "movswl $dst, $mem\t# short" %}
6961
6962 ins_encode %{
6963 __ movswl($dst$$Register, $mem$$Address);
6964 %}
6965
6966 ins_pipe(ialu_reg_mem);
6967 %}
6968
6969 // Load Short (16 bit signed) to Byte (8 bit signed)
6970 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6971 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6972
6973 ins_cost(125);
6974 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6975 ins_encode %{
6976 __ movsbl($dst$$Register, $mem$$Address);
6977 %}
6978 ins_pipe(ialu_reg_mem);
6979 %}
6980
6981 // Load Short (16 bit signed) into Long Register
6982 instruct loadS2L(rRegL dst, memory mem)
6983 %{
6984 match(Set dst (ConvI2L (LoadS mem)));
6985
6986 ins_cost(125);
6987 format %{ "movswq $dst, $mem\t# short -> long" %}
6988
6989 ins_encode %{
6990 __ movswq($dst$$Register, $mem$$Address);
6991 %}
6992
6993 ins_pipe(ialu_reg_mem);
6994 %}
6995
6996 // Load Unsigned Short/Char (16 bit UNsigned)
6997 instruct loadUS(rRegI dst, memory mem)
6998 %{
6999 match(Set dst (LoadUS mem));
7000
7001 ins_cost(125);
7002 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7003
7004 ins_encode %{
7005 __ movzwl($dst$$Register, $mem$$Address);
7006 %}
7007
7008 ins_pipe(ialu_reg_mem);
7009 %}
7010
7011 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7012 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7013 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7014
7015 ins_cost(125);
7016 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7017 ins_encode %{
7018 __ movsbl($dst$$Register, $mem$$Address);
7019 %}
7020 ins_pipe(ialu_reg_mem);
7021 %}
7022
7023 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7024 instruct loadUS2L(rRegL dst, memory mem)
7025 %{
7026 match(Set dst (ConvI2L (LoadUS mem)));
7027
7028 ins_cost(125);
7029 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7030
7031 ins_encode %{
7032 __ movzwq($dst$$Register, $mem$$Address);
7033 %}
7034
7035 ins_pipe(ialu_reg_mem);
7036 %}
7037
7038 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7039 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7040 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7041
7042 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7043 ins_encode %{
7044 __ movzbq($dst$$Register, $mem$$Address);
7045 %}
7046 ins_pipe(ialu_reg_mem);
7047 %}
7048
7049 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7050 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7051 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7052 effect(KILL cr);
7053
7054 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7055 "andl $dst, right_n_bits($mask, 16)" %}
7056 ins_encode %{
7057 Register Rdst = $dst$$Register;
7058 __ movzwq(Rdst, $mem$$Address);
7059 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7060 %}
7061 ins_pipe(ialu_reg_mem);
7062 %}
7063
7064 // Load Integer
7065 instruct loadI(rRegI dst, memory mem)
7066 %{
7067 match(Set dst (LoadI mem));
7068
7069 ins_cost(125);
7070 format %{ "movl $dst, $mem\t# int" %}
7071
7072 ins_encode %{
7073 __ movl($dst$$Register, $mem$$Address);
7074 %}
7075
7076 ins_pipe(ialu_reg_mem);
7077 %}
7078
7079 // Load Integer (32 bit signed) to Byte (8 bit signed)
7080 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7081 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7082
7083 ins_cost(125);
7084 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7085 ins_encode %{
7086 __ movsbl($dst$$Register, $mem$$Address);
7087 %}
7088 ins_pipe(ialu_reg_mem);
7089 %}
7090
7091 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7092 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7093 match(Set dst (AndI (LoadI mem) mask));
7094
7095 ins_cost(125);
7096 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7097 ins_encode %{
7098 __ movzbl($dst$$Register, $mem$$Address);
7099 %}
7100 ins_pipe(ialu_reg_mem);
7101 %}
7102
7103 // Load Integer (32 bit signed) to Short (16 bit signed)
7104 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7105 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7106
7107 ins_cost(125);
7108 format %{ "movswl $dst, $mem\t# int -> short" %}
7109 ins_encode %{
7110 __ movswl($dst$$Register, $mem$$Address);
7111 %}
7112 ins_pipe(ialu_reg_mem);
7113 %}
7114
7115 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7116 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7117 match(Set dst (AndI (LoadI mem) mask));
7118
7119 ins_cost(125);
7120 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7121 ins_encode %{
7122 __ movzwl($dst$$Register, $mem$$Address);
7123 %}
7124 ins_pipe(ialu_reg_mem);
7125 %}
7126
7127 // Load Integer into Long Register
7128 instruct loadI2L(rRegL dst, memory mem)
7129 %{
7130 match(Set dst (ConvI2L (LoadI mem)));
7131
7132 ins_cost(125);
7133 format %{ "movslq $dst, $mem\t# int -> long" %}
7134
7135 ins_encode %{
7136 __ movslq($dst$$Register, $mem$$Address);
7137 %}
7138
7139 ins_pipe(ialu_reg_mem);
7140 %}
7141
7142 // Load Integer with mask 0xFF into Long Register
7143 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7144 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7145
7146 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7147 ins_encode %{
7148 __ movzbq($dst$$Register, $mem$$Address);
7149 %}
7150 ins_pipe(ialu_reg_mem);
7151 %}
7152
7153 // Load Integer with mask 0xFFFF into Long Register
7154 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7155 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7156
7157 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7158 ins_encode %{
7159 __ movzwq($dst$$Register, $mem$$Address);
7160 %}
7161 ins_pipe(ialu_reg_mem);
7162 %}
7163
7164 // Load Integer with a 31-bit mask into Long Register
7165 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7166 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7167 effect(KILL cr);
7168
7169 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7170 "andl $dst, $mask" %}
7171 ins_encode %{
7172 Register Rdst = $dst$$Register;
7173 __ movl(Rdst, $mem$$Address);
7174 __ andl(Rdst, $mask$$constant);
7175 %}
7176 ins_pipe(ialu_reg_mem);
7177 %}
7178
7179 // Load Unsigned Integer into Long Register
7180 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7181 %{
7182 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7183
7184 ins_cost(125);
7185 format %{ "movl $dst, $mem\t# uint -> long" %}
7186
7187 ins_encode %{
7188 __ movl($dst$$Register, $mem$$Address);
7189 %}
7190
7191 ins_pipe(ialu_reg_mem);
7192 %}
7193
7194 // Load Long
7195 instruct loadL(rRegL dst, memory mem)
7196 %{
7197 match(Set dst (LoadL mem));
7198
7199 ins_cost(125);
7200 format %{ "movq $dst, $mem\t# long" %}
7201
7202 ins_encode %{
7203 __ movq($dst$$Register, $mem$$Address);
7204 %}
7205
7206 ins_pipe(ialu_reg_mem); // XXX
7207 %}
7208
7209 // Load Range
7210 instruct loadRange(rRegI dst, memory mem)
7211 %{
7212 match(Set dst (LoadRange mem));
7213
7214 ins_cost(125); // XXX
7215 format %{ "movl $dst, $mem\t# range" %}
7216 ins_encode %{
7217 __ movl($dst$$Register, $mem$$Address);
7218 %}
7219 ins_pipe(ialu_reg_mem);
7220 %}
7221
7222 // Load Pointer
7223 instruct loadP(rRegP dst, memory mem)
7224 %{
7225 match(Set dst (LoadP mem));
7226 predicate(n->as_Load()->barrier_data() == 0);
7227
7228 ins_cost(125); // XXX
7229 format %{ "movq $dst, $mem\t# ptr" %}
7230 ins_encode %{
7231 __ movq($dst$$Register, $mem$$Address);
7232 %}
7233 ins_pipe(ialu_reg_mem); // XXX
7234 %}
7235
7236 // Load Compressed Pointer
7237 instruct loadN(rRegN dst, memory mem)
7238 %{
7239 predicate(n->as_Load()->barrier_data() == 0);
7240 match(Set dst (LoadN mem));
7241
7242 ins_cost(125); // XXX
7243 format %{ "movl $dst, $mem\t# compressed ptr" %}
7244 ins_encode %{
7245 __ movl($dst$$Register, $mem$$Address);
7246 %}
7247 ins_pipe(ialu_reg_mem); // XXX
7248 %}
7249
7250
7251 // Load Klass Pointer
7252 instruct loadKlass(rRegP dst, memory mem)
7253 %{
7254 match(Set dst (LoadKlass mem));
7255
7256 ins_cost(125); // XXX
7257 format %{ "movq $dst, $mem\t# class" %}
7258 ins_encode %{
7259 __ movq($dst$$Register, $mem$$Address);
7260 %}
7261 ins_pipe(ialu_reg_mem); // XXX
7262 %}
7263
7264 // Load narrow Klass Pointer
7265 instruct loadNKlass(rRegN dst, memory mem)
7266 %{
7267 predicate(!UseCompactObjectHeaders);
7268 match(Set dst (LoadNKlass mem));
7269
7270 ins_cost(125); // XXX
7271 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7272 ins_encode %{
7273 __ movl($dst$$Register, $mem$$Address);
7274 %}
7275 ins_pipe(ialu_reg_mem); // XXX
7276 %}
7277
7278 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7279 %{
7280 predicate(UseCompactObjectHeaders);
7281 match(Set dst (LoadNKlass mem));
7282 effect(KILL cr);
7283 ins_cost(125);
7284 format %{
7285 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7286 "shrl $dst, markWord::klass_shift_at_offset"
7287 %}
7288 ins_encode %{
7289 if (UseAPX) {
7290 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7291 }
7292 else {
7293 __ movl($dst$$Register, $mem$$Address);
7294 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7295 }
7296 %}
7297 ins_pipe(ialu_reg_mem);
7298 %}
7299
7300 // Load Float
7301 instruct loadF(regF dst, memory mem)
7302 %{
7303 match(Set dst (LoadF mem));
7304
7305 ins_cost(145); // XXX
7306 format %{ "movss $dst, $mem\t# float" %}
7307 ins_encode %{
7308 __ movflt($dst$$XMMRegister, $mem$$Address);
7309 %}
7310 ins_pipe(pipe_slow); // XXX
7311 %}
7312
7313 // Load Double
7314 instruct loadD_partial(regD dst, memory mem)
7315 %{
7316 predicate(!UseXmmLoadAndClearUpper);
7317 match(Set dst (LoadD mem));
7318
7319 ins_cost(145); // XXX
7320 format %{ "movlpd $dst, $mem\t# double" %}
7321 ins_encode %{
7322 __ movdbl($dst$$XMMRegister, $mem$$Address);
7323 %}
7324 ins_pipe(pipe_slow); // XXX
7325 %}
7326
7327 instruct loadD(regD dst, memory mem)
7328 %{
7329 predicate(UseXmmLoadAndClearUpper);
7330 match(Set dst (LoadD mem));
7331
7332 ins_cost(145); // XXX
7333 format %{ "movsd $dst, $mem\t# double" %}
7334 ins_encode %{
7335 __ movdbl($dst$$XMMRegister, $mem$$Address);
7336 %}
7337 ins_pipe(pipe_slow); // XXX
7338 %}
7339
7340 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7341 %{
7342 match(Set dst con);
7343
7344 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7345
7346 ins_encode %{
7347 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7348 %}
7349
7350 ins_pipe(ialu_reg_fat);
7351 %}
7352
7353 // max = java.lang.Math.max(float a, float b)
7354 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7355 predicate(VM_Version::supports_avx10_2());
7356 match(Set dst (MaxF a b));
7357 format %{ "maxF $dst, $a, $b" %}
7358 ins_encode %{
7359 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7360 %}
7361 ins_pipe( pipe_slow );
7362 %}
7363
7364 // max = java.lang.Math.max(float a, float b)
7365 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7366 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7367 match(Set dst (MaxF a b));
7368 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7369 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7370 ins_encode %{
7371 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7372 %}
7373 ins_pipe( pipe_slow );
7374 %}
7375
7376 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7377 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7378 match(Set dst (MaxF a b));
7379 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7380
7381 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7382 ins_encode %{
7383 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7384 false /*min*/, true /*single*/);
7385 %}
7386 ins_pipe( pipe_slow );
7387 %}
7388
7389 // max = java.lang.Math.max(double a, double b)
7390 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7391 predicate(VM_Version::supports_avx10_2());
7392 match(Set dst (MaxD a b));
7393 format %{ "maxD $dst, $a, $b" %}
7394 ins_encode %{
7395 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7396 %}
7397 ins_pipe( pipe_slow );
7398 %}
7399
7400 // max = java.lang.Math.max(double a, double b)
7401 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7402 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7403 match(Set dst (MaxD a b));
7404 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7405 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7406 ins_encode %{
7407 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7408 %}
7409 ins_pipe( pipe_slow );
7410 %}
7411
7412 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7413 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7414 match(Set dst (MaxD a b));
7415 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7416
7417 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7418 ins_encode %{
7419 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7420 false /*min*/, false /*single*/);
7421 %}
7422 ins_pipe( pipe_slow );
7423 %}
7424
7425 // max = java.lang.Math.min(float a, float b)
7426 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7427 predicate(VM_Version::supports_avx10_2());
7428 match(Set dst (MinF a b));
7429 format %{ "minF $dst, $a, $b" %}
7430 ins_encode %{
7431 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7432 %}
7433 ins_pipe( pipe_slow );
7434 %}
7435
7436 // min = java.lang.Math.min(float a, float b)
7437 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7438 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7439 match(Set dst (MinF a b));
7440 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7441 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7442 ins_encode %{
7443 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7444 %}
7445 ins_pipe( pipe_slow );
7446 %}
7447
7448 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7449 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7450 match(Set dst (MinF a b));
7451 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7452
7453 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7454 ins_encode %{
7455 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7456 true /*min*/, true /*single*/);
7457 %}
7458 ins_pipe( pipe_slow );
7459 %}
7460
7461 // max = java.lang.Math.min(double a, double b)
7462 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7463 predicate(VM_Version::supports_avx10_2());
7464 match(Set dst (MinD a b));
7465 format %{ "minD $dst, $a, $b" %}
7466 ins_encode %{
7467 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7468 %}
7469 ins_pipe( pipe_slow );
7470 %}
7471
7472 // min = java.lang.Math.min(double a, double b)
7473 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7474 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7475 match(Set dst (MinD a b));
7476 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7477 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7478 ins_encode %{
7479 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7480 %}
7481 ins_pipe( pipe_slow );
7482 %}
7483
7484 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7485 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7486 match(Set dst (MinD a b));
7487 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7488
7489 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7490 ins_encode %{
7491 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7492 true /*min*/, false /*single*/);
7493 %}
7494 ins_pipe( pipe_slow );
7495 %}
7496
7497 // Load Effective Address
7498 instruct leaP8(rRegP dst, indOffset8 mem)
7499 %{
7500 match(Set dst mem);
7501
7502 ins_cost(110); // XXX
7503 format %{ "leaq $dst, $mem\t# ptr 8" %}
7504 ins_encode %{
7505 __ leaq($dst$$Register, $mem$$Address);
7506 %}
7507 ins_pipe(ialu_reg_reg_fat);
7508 %}
7509
7510 instruct leaP32(rRegP dst, indOffset32 mem)
7511 %{
7512 match(Set dst mem);
7513
7514 ins_cost(110);
7515 format %{ "leaq $dst, $mem\t# ptr 32" %}
7516 ins_encode %{
7517 __ leaq($dst$$Register, $mem$$Address);
7518 %}
7519 ins_pipe(ialu_reg_reg_fat);
7520 %}
7521
7522 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7523 %{
7524 match(Set dst mem);
7525
7526 ins_cost(110);
7527 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7528 ins_encode %{
7529 __ leaq($dst$$Register, $mem$$Address);
7530 %}
7531 ins_pipe(ialu_reg_reg_fat);
7532 %}
7533
7534 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7535 %{
7536 match(Set dst mem);
7537
7538 ins_cost(110);
7539 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7540 ins_encode %{
7541 __ leaq($dst$$Register, $mem$$Address);
7542 %}
7543 ins_pipe(ialu_reg_reg_fat);
7544 %}
7545
7546 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7547 %{
7548 match(Set dst mem);
7549
7550 ins_cost(110);
7551 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7552 ins_encode %{
7553 __ leaq($dst$$Register, $mem$$Address);
7554 %}
7555 ins_pipe(ialu_reg_reg_fat);
7556 %}
7557
7558 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7559 %{
7560 match(Set dst mem);
7561
7562 ins_cost(110);
7563 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7564 ins_encode %{
7565 __ leaq($dst$$Register, $mem$$Address);
7566 %}
7567 ins_pipe(ialu_reg_reg_fat);
7568 %}
7569
7570 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7571 %{
7572 match(Set dst mem);
7573
7574 ins_cost(110);
7575 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7576 ins_encode %{
7577 __ leaq($dst$$Register, $mem$$Address);
7578 %}
7579 ins_pipe(ialu_reg_reg_fat);
7580 %}
7581
7582 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7583 %{
7584 match(Set dst mem);
7585
7586 ins_cost(110);
7587 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7588 ins_encode %{
7589 __ leaq($dst$$Register, $mem$$Address);
7590 %}
7591 ins_pipe(ialu_reg_reg_fat);
7592 %}
7593
7594 // Load Effective Address which uses Narrow (32-bits) oop
7595 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7596 %{
7597 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7598 match(Set dst mem);
7599
7600 ins_cost(110);
7601 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7602 ins_encode %{
7603 __ leaq($dst$$Register, $mem$$Address);
7604 %}
7605 ins_pipe(ialu_reg_reg_fat);
7606 %}
7607
7608 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7609 %{
7610 predicate(CompressedOops::shift() == 0);
7611 match(Set dst mem);
7612
7613 ins_cost(110); // XXX
7614 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7615 ins_encode %{
7616 __ leaq($dst$$Register, $mem$$Address);
7617 %}
7618 ins_pipe(ialu_reg_reg_fat);
7619 %}
7620
7621 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7622 %{
7623 predicate(CompressedOops::shift() == 0);
7624 match(Set dst mem);
7625
7626 ins_cost(110);
7627 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7628 ins_encode %{
7629 __ leaq($dst$$Register, $mem$$Address);
7630 %}
7631 ins_pipe(ialu_reg_reg_fat);
7632 %}
7633
7634 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7635 %{
7636 predicate(CompressedOops::shift() == 0);
7637 match(Set dst mem);
7638
7639 ins_cost(110);
7640 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7641 ins_encode %{
7642 __ leaq($dst$$Register, $mem$$Address);
7643 %}
7644 ins_pipe(ialu_reg_reg_fat);
7645 %}
7646
7647 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7648 %{
7649 predicate(CompressedOops::shift() == 0);
7650 match(Set dst mem);
7651
7652 ins_cost(110);
7653 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7654 ins_encode %{
7655 __ leaq($dst$$Register, $mem$$Address);
7656 %}
7657 ins_pipe(ialu_reg_reg_fat);
7658 %}
7659
7660 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7661 %{
7662 predicate(CompressedOops::shift() == 0);
7663 match(Set dst mem);
7664
7665 ins_cost(110);
7666 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7667 ins_encode %{
7668 __ leaq($dst$$Register, $mem$$Address);
7669 %}
7670 ins_pipe(ialu_reg_reg_fat);
7671 %}
7672
7673 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7674 %{
7675 predicate(CompressedOops::shift() == 0);
7676 match(Set dst mem);
7677
7678 ins_cost(110);
7679 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7680 ins_encode %{
7681 __ leaq($dst$$Register, $mem$$Address);
7682 %}
7683 ins_pipe(ialu_reg_reg_fat);
7684 %}
7685
7686 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7687 %{
7688 predicate(CompressedOops::shift() == 0);
7689 match(Set dst mem);
7690
7691 ins_cost(110);
7692 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7693 ins_encode %{
7694 __ leaq($dst$$Register, $mem$$Address);
7695 %}
7696 ins_pipe(ialu_reg_reg_fat);
7697 %}
7698
7699 instruct loadConI(rRegI dst, immI src)
7700 %{
7701 match(Set dst src);
7702
7703 format %{ "movl $dst, $src\t# int" %}
7704 ins_encode %{
7705 __ movl($dst$$Register, $src$$constant);
7706 %}
7707 ins_pipe(ialu_reg_fat); // XXX
7708 %}
7709
7710 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7711 %{
7712 match(Set dst src);
7713 effect(KILL cr);
7714
7715 ins_cost(50);
7716 format %{ "xorl $dst, $dst\t# int" %}
7717 ins_encode %{
7718 __ xorl($dst$$Register, $dst$$Register);
7719 %}
7720 ins_pipe(ialu_reg);
7721 %}
7722
7723 instruct loadConL(rRegL dst, immL src)
7724 %{
7725 match(Set dst src);
7726
7727 ins_cost(150);
7728 format %{ "movq $dst, $src\t# long" %}
7729 ins_encode %{
7730 __ mov64($dst$$Register, $src$$constant);
7731 %}
7732 ins_pipe(ialu_reg);
7733 %}
7734
7735 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7736 %{
7737 match(Set dst src);
7738 effect(KILL cr);
7739
7740 ins_cost(50);
7741 format %{ "xorl $dst, $dst\t# long" %}
7742 ins_encode %{
7743 __ xorl($dst$$Register, $dst$$Register);
7744 %}
7745 ins_pipe(ialu_reg); // XXX
7746 %}
7747
7748 instruct loadConUL32(rRegL dst, immUL32 src)
7749 %{
7750 match(Set dst src);
7751
7752 ins_cost(60);
7753 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7754 ins_encode %{
7755 __ movl($dst$$Register, $src$$constant);
7756 %}
7757 ins_pipe(ialu_reg);
7758 %}
7759
7760 instruct loadConL32(rRegL dst, immL32 src)
7761 %{
7762 match(Set dst src);
7763
7764 ins_cost(70);
7765 format %{ "movq $dst, $src\t# long (32-bit)" %}
7766 ins_encode %{
7767 __ movq($dst$$Register, $src$$constant);
7768 %}
7769 ins_pipe(ialu_reg);
7770 %}
7771
7772 instruct loadConP(rRegP dst, immP con) %{
7773 match(Set dst con);
7774
7775 format %{ "movq $dst, $con\t# ptr" %}
7776 ins_encode %{
7777 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7778 %}
7779 ins_pipe(ialu_reg_fat); // XXX
7780 %}
7781
7782 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7783 %{
7784 match(Set dst src);
7785 effect(KILL cr);
7786
7787 ins_cost(50);
7788 format %{ "xorl $dst, $dst\t# ptr" %}
7789 ins_encode %{
7790 __ xorl($dst$$Register, $dst$$Register);
7791 %}
7792 ins_pipe(ialu_reg);
7793 %}
7794
7795 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7796 %{
7797 match(Set dst src);
7798 effect(KILL cr);
7799
7800 ins_cost(60);
7801 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7802 ins_encode %{
7803 __ movl($dst$$Register, $src$$constant);
7804 %}
7805 ins_pipe(ialu_reg);
7806 %}
7807
7808 instruct loadConF(regF dst, immF con) %{
7809 match(Set dst con);
7810 ins_cost(125);
7811 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7812 ins_encode %{
7813 __ movflt($dst$$XMMRegister, $constantaddress($con));
7814 %}
7815 ins_pipe(pipe_slow);
7816 %}
7817
7818 instruct loadConH(regF dst, immH con) %{
7819 match(Set dst con);
7820 ins_cost(125);
7821 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7822 ins_encode %{
7823 __ movflt($dst$$XMMRegister, $constantaddress($con));
7824 %}
7825 ins_pipe(pipe_slow);
7826 %}
7827
7828 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7829 match(Set dst src);
7830 effect(KILL cr);
7831 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7832 ins_encode %{
7833 __ xorq($dst$$Register, $dst$$Register);
7834 %}
7835 ins_pipe(ialu_reg);
7836 %}
7837
7838 instruct loadConN(rRegN dst, immN src) %{
7839 match(Set dst src);
7840
7841 ins_cost(125);
7842 format %{ "movl $dst, $src\t# compressed ptr" %}
7843 ins_encode %{
7844 address con = (address)$src$$constant;
7845 if (con == nullptr) {
7846 ShouldNotReachHere();
7847 } else {
7848 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7849 }
7850 %}
7851 ins_pipe(ialu_reg_fat); // XXX
7852 %}
7853
7854 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7855 match(Set dst src);
7856
7857 ins_cost(125);
7858 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7859 ins_encode %{
7860 address con = (address)$src$$constant;
7861 if (con == nullptr) {
7862 ShouldNotReachHere();
7863 } else {
7864 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7865 }
7866 %}
7867 ins_pipe(ialu_reg_fat); // XXX
7868 %}
7869
7870 instruct loadConF0(regF dst, immF0 src)
7871 %{
7872 match(Set dst src);
7873 ins_cost(100);
7874
7875 format %{ "xorps $dst, $dst\t# float 0.0" %}
7876 ins_encode %{
7877 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7878 %}
7879 ins_pipe(pipe_slow);
7880 %}
7881
7882 // Use the same format since predicate() can not be used here.
7883 instruct loadConD(regD dst, immD con) %{
7884 match(Set dst con);
7885 ins_cost(125);
7886 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7887 ins_encode %{
7888 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7889 %}
7890 ins_pipe(pipe_slow);
7891 %}
7892
7893 instruct loadConD0(regD dst, immD0 src)
7894 %{
7895 match(Set dst src);
7896 ins_cost(100);
7897
7898 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7899 ins_encode %{
7900 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7901 %}
7902 ins_pipe(pipe_slow);
7903 %}
7904
7905 instruct loadSSI(rRegI dst, stackSlotI src)
7906 %{
7907 match(Set dst src);
7908
7909 ins_cost(125);
7910 format %{ "movl $dst, $src\t# int stk" %}
7911 ins_encode %{
7912 __ movl($dst$$Register, $src$$Address);
7913 %}
7914 ins_pipe(ialu_reg_mem);
7915 %}
7916
7917 instruct loadSSL(rRegL dst, stackSlotL src)
7918 %{
7919 match(Set dst src);
7920
7921 ins_cost(125);
7922 format %{ "movq $dst, $src\t# long stk" %}
7923 ins_encode %{
7924 __ movq($dst$$Register, $src$$Address);
7925 %}
7926 ins_pipe(ialu_reg_mem);
7927 %}
7928
7929 instruct loadSSP(rRegP dst, stackSlotP src)
7930 %{
7931 match(Set dst src);
7932
7933 ins_cost(125);
7934 format %{ "movq $dst, $src\t# ptr stk" %}
7935 ins_encode %{
7936 __ movq($dst$$Register, $src$$Address);
7937 %}
7938 ins_pipe(ialu_reg_mem);
7939 %}
7940
7941 instruct loadSSF(regF dst, stackSlotF src)
7942 %{
7943 match(Set dst src);
7944
7945 ins_cost(125);
7946 format %{ "movss $dst, $src\t# float stk" %}
7947 ins_encode %{
7948 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7949 %}
7950 ins_pipe(pipe_slow); // XXX
7951 %}
7952
7953 // Use the same format since predicate() can not be used here.
7954 instruct loadSSD(regD dst, stackSlotD src)
7955 %{
7956 match(Set dst src);
7957
7958 ins_cost(125);
7959 format %{ "movsd $dst, $src\t# double stk" %}
7960 ins_encode %{
7961 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7962 %}
7963 ins_pipe(pipe_slow); // XXX
7964 %}
7965
7966 // Prefetch instructions for allocation.
7967 // Must be safe to execute with invalid address (cannot fault).
7968
7969 instruct prefetchAlloc( memory mem ) %{
7970 predicate(AllocatePrefetchInstr==3);
7971 match(PrefetchAllocation mem);
7972 ins_cost(125);
7973
7974 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7975 ins_encode %{
7976 __ prefetchw($mem$$Address);
7977 %}
7978 ins_pipe(ialu_mem);
7979 %}
7980
7981 instruct prefetchAllocNTA( memory mem ) %{
7982 predicate(AllocatePrefetchInstr==0);
7983 match(PrefetchAllocation mem);
7984 ins_cost(125);
7985
7986 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7987 ins_encode %{
7988 __ prefetchnta($mem$$Address);
7989 %}
7990 ins_pipe(ialu_mem);
7991 %}
7992
7993 instruct prefetchAllocT0( memory mem ) %{
7994 predicate(AllocatePrefetchInstr==1);
7995 match(PrefetchAllocation mem);
7996 ins_cost(125);
7997
7998 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7999 ins_encode %{
8000 __ prefetcht0($mem$$Address);
8001 %}
8002 ins_pipe(ialu_mem);
8003 %}
8004
8005 instruct prefetchAllocT2( memory mem ) %{
8006 predicate(AllocatePrefetchInstr==2);
8007 match(PrefetchAllocation mem);
8008 ins_cost(125);
8009
8010 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8011 ins_encode %{
8012 __ prefetcht2($mem$$Address);
8013 %}
8014 ins_pipe(ialu_mem);
8015 %}
8016
8017 //----------Store Instructions-------------------------------------------------
8018
8019 // Store Byte
8020 instruct storeB(memory mem, rRegI src)
8021 %{
8022 match(Set mem (StoreB mem src));
8023
8024 ins_cost(125); // XXX
8025 format %{ "movb $mem, $src\t# byte" %}
8026 ins_encode %{
8027 __ movb($mem$$Address, $src$$Register);
8028 %}
8029 ins_pipe(ialu_mem_reg);
8030 %}
8031
8032 // Store Char/Short
8033 instruct storeC(memory mem, rRegI src)
8034 %{
8035 match(Set mem (StoreC mem src));
8036
8037 ins_cost(125); // XXX
8038 format %{ "movw $mem, $src\t# char/short" %}
8039 ins_encode %{
8040 __ movw($mem$$Address, $src$$Register);
8041 %}
8042 ins_pipe(ialu_mem_reg);
8043 %}
8044
8045 // Store Integer
8046 instruct storeI(memory mem, rRegI src)
8047 %{
8048 match(Set mem (StoreI mem src));
8049
8050 ins_cost(125); // XXX
8051 format %{ "movl $mem, $src\t# int" %}
8052 ins_encode %{
8053 __ movl($mem$$Address, $src$$Register);
8054 %}
8055 ins_pipe(ialu_mem_reg);
8056 %}
8057
8058 // Store Long
8059 instruct storeL(memory mem, rRegL src)
8060 %{
8061 match(Set mem (StoreL mem src));
8062
8063 ins_cost(125); // XXX
8064 format %{ "movq $mem, $src\t# long" %}
8065 ins_encode %{
8066 __ movq($mem$$Address, $src$$Register);
8067 %}
8068 ins_pipe(ialu_mem_reg); // XXX
8069 %}
8070
8071 // Store Pointer
8072 instruct storeP(memory mem, any_RegP src)
8073 %{
8074 predicate(n->as_Store()->barrier_data() == 0);
8075 match(Set mem (StoreP mem src));
8076
8077 ins_cost(125); // XXX
8078 format %{ "movq $mem, $src\t# ptr" %}
8079 ins_encode %{
8080 __ movq($mem$$Address, $src$$Register);
8081 %}
8082 ins_pipe(ialu_mem_reg);
8083 %}
8084
8085 instruct storeImmP0(memory mem, immP0 zero)
8086 %{
8087 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8088 match(Set mem (StoreP mem zero));
8089
8090 ins_cost(125); // XXX
8091 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8092 ins_encode %{
8093 __ movq($mem$$Address, r12);
8094 %}
8095 ins_pipe(ialu_mem_reg);
8096 %}
8097
8098 // Store Null Pointer, mark word, or other simple pointer constant.
8099 instruct storeImmP(memory mem, immP31 src)
8100 %{
8101 predicate(n->as_Store()->barrier_data() == 0);
8102 match(Set mem (StoreP mem src));
8103
8104 ins_cost(150); // XXX
8105 format %{ "movq $mem, $src\t# ptr" %}
8106 ins_encode %{
8107 __ movq($mem$$Address, $src$$constant);
8108 %}
8109 ins_pipe(ialu_mem_imm);
8110 %}
8111
8112 // Store Compressed Pointer
8113 instruct storeN(memory mem, rRegN src)
8114 %{
8115 predicate(n->as_Store()->barrier_data() == 0);
8116 match(Set mem (StoreN mem src));
8117
8118 ins_cost(125); // XXX
8119 format %{ "movl $mem, $src\t# compressed ptr" %}
8120 ins_encode %{
8121 __ movl($mem$$Address, $src$$Register);
8122 %}
8123 ins_pipe(ialu_mem_reg);
8124 %}
8125
8126 instruct storeNKlass(memory mem, rRegN src)
8127 %{
8128 match(Set mem (StoreNKlass mem src));
8129
8130 ins_cost(125); // XXX
8131 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8132 ins_encode %{
8133 __ movl($mem$$Address, $src$$Register);
8134 %}
8135 ins_pipe(ialu_mem_reg);
8136 %}
8137
8138 instruct storeImmN0(memory mem, immN0 zero)
8139 %{
8140 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8141 match(Set mem (StoreN mem zero));
8142
8143 ins_cost(125); // XXX
8144 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8145 ins_encode %{
8146 __ movl($mem$$Address, r12);
8147 %}
8148 ins_pipe(ialu_mem_reg);
8149 %}
8150
8151 instruct storeImmN(memory mem, immN src)
8152 %{
8153 predicate(n->as_Store()->barrier_data() == 0);
8154 match(Set mem (StoreN mem src));
8155
8156 ins_cost(150); // XXX
8157 format %{ "movl $mem, $src\t# compressed ptr" %}
8158 ins_encode %{
8159 address con = (address)$src$$constant;
8160 if (con == nullptr) {
8161 __ movl($mem$$Address, 0);
8162 } else {
8163 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8164 }
8165 %}
8166 ins_pipe(ialu_mem_imm);
8167 %}
8168
8169 instruct storeImmNKlass(memory mem, immNKlass src)
8170 %{
8171 match(Set mem (StoreNKlass mem src));
8172
8173 ins_cost(150); // XXX
8174 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8175 ins_encode %{
8176 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8177 %}
8178 ins_pipe(ialu_mem_imm);
8179 %}
8180
8181 // Store Integer Immediate
8182 instruct storeImmI0(memory mem, immI_0 zero)
8183 %{
8184 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8185 match(Set mem (StoreI mem zero));
8186
8187 ins_cost(125); // XXX
8188 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8189 ins_encode %{
8190 __ movl($mem$$Address, r12);
8191 %}
8192 ins_pipe(ialu_mem_reg);
8193 %}
8194
8195 instruct storeImmI(memory mem, immI src)
8196 %{
8197 match(Set mem (StoreI mem src));
8198
8199 ins_cost(150);
8200 format %{ "movl $mem, $src\t# int" %}
8201 ins_encode %{
8202 __ movl($mem$$Address, $src$$constant);
8203 %}
8204 ins_pipe(ialu_mem_imm);
8205 %}
8206
8207 // Store Long Immediate
8208 instruct storeImmL0(memory mem, immL0 zero)
8209 %{
8210 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8211 match(Set mem (StoreL mem zero));
8212
8213 ins_cost(125); // XXX
8214 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8215 ins_encode %{
8216 __ movq($mem$$Address, r12);
8217 %}
8218 ins_pipe(ialu_mem_reg);
8219 %}
8220
8221 instruct storeImmL(memory mem, immL32 src)
8222 %{
8223 match(Set mem (StoreL mem src));
8224
8225 ins_cost(150);
8226 format %{ "movq $mem, $src\t# long" %}
8227 ins_encode %{
8228 __ movq($mem$$Address, $src$$constant);
8229 %}
8230 ins_pipe(ialu_mem_imm);
8231 %}
8232
8233 // Store Short/Char Immediate
8234 instruct storeImmC0(memory mem, immI_0 zero)
8235 %{
8236 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8237 match(Set mem (StoreC mem zero));
8238
8239 ins_cost(125); // XXX
8240 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8241 ins_encode %{
8242 __ movw($mem$$Address, r12);
8243 %}
8244 ins_pipe(ialu_mem_reg);
8245 %}
8246
8247 instruct storeImmI16(memory mem, immI16 src)
8248 %{
8249 predicate(UseStoreImmI16);
8250 match(Set mem (StoreC mem src));
8251
8252 ins_cost(150);
8253 format %{ "movw $mem, $src\t# short/char" %}
8254 ins_encode %{
8255 __ movw($mem$$Address, $src$$constant);
8256 %}
8257 ins_pipe(ialu_mem_imm);
8258 %}
8259
8260 // Store Byte Immediate
8261 instruct storeImmB0(memory mem, immI_0 zero)
8262 %{
8263 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8264 match(Set mem (StoreB mem zero));
8265
8266 ins_cost(125); // XXX
8267 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8268 ins_encode %{
8269 __ movb($mem$$Address, r12);
8270 %}
8271 ins_pipe(ialu_mem_reg);
8272 %}
8273
8274 instruct storeImmB(memory mem, immI8 src)
8275 %{
8276 match(Set mem (StoreB mem src));
8277
8278 ins_cost(150); // XXX
8279 format %{ "movb $mem, $src\t# byte" %}
8280 ins_encode %{
8281 __ movb($mem$$Address, $src$$constant);
8282 %}
8283 ins_pipe(ialu_mem_imm);
8284 %}
8285
8286 // Store Float
8287 instruct storeF(memory mem, regF src)
8288 %{
8289 match(Set mem (StoreF mem src));
8290
8291 ins_cost(95); // XXX
8292 format %{ "movss $mem, $src\t# float" %}
8293 ins_encode %{
8294 __ movflt($mem$$Address, $src$$XMMRegister);
8295 %}
8296 ins_pipe(pipe_slow); // XXX
8297 %}
8298
8299 // Store immediate Float value (it is faster than store from XMM register)
8300 instruct storeF0(memory mem, immF0 zero)
8301 %{
8302 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8303 match(Set mem (StoreF mem zero));
8304
8305 ins_cost(25); // XXX
8306 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8307 ins_encode %{
8308 __ movl($mem$$Address, r12);
8309 %}
8310 ins_pipe(ialu_mem_reg);
8311 %}
8312
8313 instruct storeF_imm(memory mem, immF src)
8314 %{
8315 match(Set mem (StoreF mem src));
8316
8317 ins_cost(50);
8318 format %{ "movl $mem, $src\t# float" %}
8319 ins_encode %{
8320 __ movl($mem$$Address, jint_cast($src$$constant));
8321 %}
8322 ins_pipe(ialu_mem_imm);
8323 %}
8324
8325 // Store Double
8326 instruct storeD(memory mem, regD src)
8327 %{
8328 match(Set mem (StoreD mem src));
8329
8330 ins_cost(95); // XXX
8331 format %{ "movsd $mem, $src\t# double" %}
8332 ins_encode %{
8333 __ movdbl($mem$$Address, $src$$XMMRegister);
8334 %}
8335 ins_pipe(pipe_slow); // XXX
8336 %}
8337
8338 // Store immediate double 0.0 (it is faster than store from XMM register)
8339 instruct storeD0_imm(memory mem, immD0 src)
8340 %{
8341 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8342 match(Set mem (StoreD mem src));
8343
8344 ins_cost(50);
8345 format %{ "movq $mem, $src\t# double 0." %}
8346 ins_encode %{
8347 __ movq($mem$$Address, $src$$constant);
8348 %}
8349 ins_pipe(ialu_mem_imm);
8350 %}
8351
8352 instruct storeD0(memory mem, immD0 zero)
8353 %{
8354 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8355 match(Set mem (StoreD mem zero));
8356
8357 ins_cost(25); // XXX
8358 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8359 ins_encode %{
8360 __ movq($mem$$Address, r12);
8361 %}
8362 ins_pipe(ialu_mem_reg);
8363 %}
8364
8365 instruct storeSSI(stackSlotI dst, rRegI src)
8366 %{
8367 match(Set dst src);
8368
8369 ins_cost(100);
8370 format %{ "movl $dst, $src\t# int stk" %}
8371 ins_encode %{
8372 __ movl($dst$$Address, $src$$Register);
8373 %}
8374 ins_pipe( ialu_mem_reg );
8375 %}
8376
8377 instruct storeSSL(stackSlotL dst, rRegL src)
8378 %{
8379 match(Set dst src);
8380
8381 ins_cost(100);
8382 format %{ "movq $dst, $src\t# long stk" %}
8383 ins_encode %{
8384 __ movq($dst$$Address, $src$$Register);
8385 %}
8386 ins_pipe(ialu_mem_reg);
8387 %}
8388
8389 instruct storeSSP(stackSlotP dst, rRegP src)
8390 %{
8391 match(Set dst src);
8392
8393 ins_cost(100);
8394 format %{ "movq $dst, $src\t# ptr stk" %}
8395 ins_encode %{
8396 __ movq($dst$$Address, $src$$Register);
8397 %}
8398 ins_pipe(ialu_mem_reg);
8399 %}
8400
8401 instruct storeSSF(stackSlotF dst, regF src)
8402 %{
8403 match(Set dst src);
8404
8405 ins_cost(95); // XXX
8406 format %{ "movss $dst, $src\t# float stk" %}
8407 ins_encode %{
8408 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8409 %}
8410 ins_pipe(pipe_slow); // XXX
8411 %}
8412
8413 instruct storeSSD(stackSlotD dst, regD src)
8414 %{
8415 match(Set dst src);
8416
8417 ins_cost(95); // XXX
8418 format %{ "movsd $dst, $src\t# double stk" %}
8419 ins_encode %{
8420 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8421 %}
8422 ins_pipe(pipe_slow); // XXX
8423 %}
8424
8425 instruct cacheWB(indirect addr)
8426 %{
8427 predicate(VM_Version::supports_data_cache_line_flush());
8428 match(CacheWB addr);
8429
8430 ins_cost(100);
8431 format %{"cache wb $addr" %}
8432 ins_encode %{
8433 assert($addr->index_position() < 0, "should be");
8434 assert($addr$$disp == 0, "should be");
8435 __ cache_wb(Address($addr$$base$$Register, 0));
8436 %}
8437 ins_pipe(pipe_slow); // XXX
8438 %}
8439
8440 instruct cacheWBPreSync()
8441 %{
8442 predicate(VM_Version::supports_data_cache_line_flush());
8443 match(CacheWBPreSync);
8444
8445 ins_cost(100);
8446 format %{"cache wb presync" %}
8447 ins_encode %{
8448 __ cache_wbsync(true);
8449 %}
8450 ins_pipe(pipe_slow); // XXX
8451 %}
8452
8453 instruct cacheWBPostSync()
8454 %{
8455 predicate(VM_Version::supports_data_cache_line_flush());
8456 match(CacheWBPostSync);
8457
8458 ins_cost(100);
8459 format %{"cache wb postsync" %}
8460 ins_encode %{
8461 __ cache_wbsync(false);
8462 %}
8463 ins_pipe(pipe_slow); // XXX
8464 %}
8465
8466 //----------BSWAP Instructions-------------------------------------------------
8467 instruct bytes_reverse_int(rRegI dst) %{
8468 match(Set dst (ReverseBytesI dst));
8469
8470 format %{ "bswapl $dst" %}
8471 ins_encode %{
8472 __ bswapl($dst$$Register);
8473 %}
8474 ins_pipe( ialu_reg );
8475 %}
8476
8477 instruct bytes_reverse_long(rRegL dst) %{
8478 match(Set dst (ReverseBytesL dst));
8479
8480 format %{ "bswapq $dst" %}
8481 ins_encode %{
8482 __ bswapq($dst$$Register);
8483 %}
8484 ins_pipe( ialu_reg);
8485 %}
8486
8487 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8488 match(Set dst (ReverseBytesUS dst));
8489 effect(KILL cr);
8490
8491 format %{ "bswapl $dst\n\t"
8492 "shrl $dst,16\n\t" %}
8493 ins_encode %{
8494 __ bswapl($dst$$Register);
8495 __ shrl($dst$$Register, 16);
8496 %}
8497 ins_pipe( ialu_reg );
8498 %}
8499
8500 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8501 match(Set dst (ReverseBytesS dst));
8502 effect(KILL cr);
8503
8504 format %{ "bswapl $dst\n\t"
8505 "sar $dst,16\n\t" %}
8506 ins_encode %{
8507 __ bswapl($dst$$Register);
8508 __ sarl($dst$$Register, 16);
8509 %}
8510 ins_pipe( ialu_reg );
8511 %}
8512
8513 //---------- Zeros Count Instructions ------------------------------------------
8514
8515 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8516 predicate(UseCountLeadingZerosInstruction);
8517 match(Set dst (CountLeadingZerosI src));
8518 effect(KILL cr);
8519
8520 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8521 ins_encode %{
8522 __ lzcntl($dst$$Register, $src$$Register);
8523 %}
8524 ins_pipe(ialu_reg);
8525 %}
8526
8527 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8528 predicate(UseCountLeadingZerosInstruction);
8529 match(Set dst (CountLeadingZerosI (LoadI src)));
8530 effect(KILL cr);
8531 ins_cost(175);
8532 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8533 ins_encode %{
8534 __ lzcntl($dst$$Register, $src$$Address);
8535 %}
8536 ins_pipe(ialu_reg_mem);
8537 %}
8538
8539 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8540 predicate(!UseCountLeadingZerosInstruction);
8541 match(Set dst (CountLeadingZerosI src));
8542 effect(KILL cr);
8543
8544 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8545 "jnz skip\n\t"
8546 "movl $dst, -1\n"
8547 "skip:\n\t"
8548 "negl $dst\n\t"
8549 "addl $dst, 31" %}
8550 ins_encode %{
8551 Register Rdst = $dst$$Register;
8552 Register Rsrc = $src$$Register;
8553 Label skip;
8554 __ bsrl(Rdst, Rsrc);
8555 __ jccb(Assembler::notZero, skip);
8556 __ movl(Rdst, -1);
8557 __ bind(skip);
8558 __ negl(Rdst);
8559 __ addl(Rdst, BitsPerInt - 1);
8560 %}
8561 ins_pipe(ialu_reg);
8562 %}
8563
8564 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8565 predicate(UseCountLeadingZerosInstruction);
8566 match(Set dst (CountLeadingZerosL src));
8567 effect(KILL cr);
8568
8569 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8570 ins_encode %{
8571 __ lzcntq($dst$$Register, $src$$Register);
8572 %}
8573 ins_pipe(ialu_reg);
8574 %}
8575
8576 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8577 predicate(UseCountLeadingZerosInstruction);
8578 match(Set dst (CountLeadingZerosL (LoadL src)));
8579 effect(KILL cr);
8580 ins_cost(175);
8581 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8582 ins_encode %{
8583 __ lzcntq($dst$$Register, $src$$Address);
8584 %}
8585 ins_pipe(ialu_reg_mem);
8586 %}
8587
8588 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8589 predicate(!UseCountLeadingZerosInstruction);
8590 match(Set dst (CountLeadingZerosL src));
8591 effect(KILL cr);
8592
8593 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8594 "jnz skip\n\t"
8595 "movl $dst, -1\n"
8596 "skip:\n\t"
8597 "negl $dst\n\t"
8598 "addl $dst, 63" %}
8599 ins_encode %{
8600 Register Rdst = $dst$$Register;
8601 Register Rsrc = $src$$Register;
8602 Label skip;
8603 __ bsrq(Rdst, Rsrc);
8604 __ jccb(Assembler::notZero, skip);
8605 __ movl(Rdst, -1);
8606 __ bind(skip);
8607 __ negl(Rdst);
8608 __ addl(Rdst, BitsPerLong - 1);
8609 %}
8610 ins_pipe(ialu_reg);
8611 %}
8612
8613 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8614 predicate(UseCountTrailingZerosInstruction);
8615 match(Set dst (CountTrailingZerosI src));
8616 effect(KILL cr);
8617
8618 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8619 ins_encode %{
8620 __ tzcntl($dst$$Register, $src$$Register);
8621 %}
8622 ins_pipe(ialu_reg);
8623 %}
8624
8625 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8626 predicate(UseCountTrailingZerosInstruction);
8627 match(Set dst (CountTrailingZerosI (LoadI src)));
8628 effect(KILL cr);
8629 ins_cost(175);
8630 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8631 ins_encode %{
8632 __ tzcntl($dst$$Register, $src$$Address);
8633 %}
8634 ins_pipe(ialu_reg_mem);
8635 %}
8636
8637 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8638 predicate(!UseCountTrailingZerosInstruction);
8639 match(Set dst (CountTrailingZerosI src));
8640 effect(KILL cr);
8641
8642 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8643 "jnz done\n\t"
8644 "movl $dst, 32\n"
8645 "done:" %}
8646 ins_encode %{
8647 Register Rdst = $dst$$Register;
8648 Label done;
8649 __ bsfl(Rdst, $src$$Register);
8650 __ jccb(Assembler::notZero, done);
8651 __ movl(Rdst, BitsPerInt);
8652 __ bind(done);
8653 %}
8654 ins_pipe(ialu_reg);
8655 %}
8656
8657 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8658 predicate(UseCountTrailingZerosInstruction);
8659 match(Set dst (CountTrailingZerosL src));
8660 effect(KILL cr);
8661
8662 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8663 ins_encode %{
8664 __ tzcntq($dst$$Register, $src$$Register);
8665 %}
8666 ins_pipe(ialu_reg);
8667 %}
8668
8669 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8670 predicate(UseCountTrailingZerosInstruction);
8671 match(Set dst (CountTrailingZerosL (LoadL src)));
8672 effect(KILL cr);
8673 ins_cost(175);
8674 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8675 ins_encode %{
8676 __ tzcntq($dst$$Register, $src$$Address);
8677 %}
8678 ins_pipe(ialu_reg_mem);
8679 %}
8680
8681 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8682 predicate(!UseCountTrailingZerosInstruction);
8683 match(Set dst (CountTrailingZerosL src));
8684 effect(KILL cr);
8685
8686 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8687 "jnz done\n\t"
8688 "movl $dst, 64\n"
8689 "done:" %}
8690 ins_encode %{
8691 Register Rdst = $dst$$Register;
8692 Label done;
8693 __ bsfq(Rdst, $src$$Register);
8694 __ jccb(Assembler::notZero, done);
8695 __ movl(Rdst, BitsPerLong);
8696 __ bind(done);
8697 %}
8698 ins_pipe(ialu_reg);
8699 %}
8700
8701 //--------------- Reverse Operation Instructions ----------------
8702 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8703 predicate(!VM_Version::supports_gfni());
8704 match(Set dst (ReverseI src));
8705 effect(TEMP dst, TEMP rtmp, KILL cr);
8706 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8707 ins_encode %{
8708 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8709 %}
8710 ins_pipe( ialu_reg );
8711 %}
8712
8713 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8714 predicate(VM_Version::supports_gfni());
8715 match(Set dst (ReverseI src));
8716 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8717 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8718 ins_encode %{
8719 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8720 %}
8721 ins_pipe( ialu_reg );
8722 %}
8723
8724 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8725 predicate(!VM_Version::supports_gfni());
8726 match(Set dst (ReverseL src));
8727 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8728 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8729 ins_encode %{
8730 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8731 %}
8732 ins_pipe( ialu_reg );
8733 %}
8734
8735 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8736 predicate(VM_Version::supports_gfni());
8737 match(Set dst (ReverseL src));
8738 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8739 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8740 ins_encode %{
8741 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8742 %}
8743 ins_pipe( ialu_reg );
8744 %}
8745
8746 //---------- Population Count Instructions -------------------------------------
8747
8748 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8749 predicate(UsePopCountInstruction);
8750 match(Set dst (PopCountI src));
8751 effect(KILL cr);
8752
8753 format %{ "popcnt $dst, $src" %}
8754 ins_encode %{
8755 __ popcntl($dst$$Register, $src$$Register);
8756 %}
8757 ins_pipe(ialu_reg);
8758 %}
8759
8760 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8761 predicate(UsePopCountInstruction);
8762 match(Set dst (PopCountI (LoadI mem)));
8763 effect(KILL cr);
8764
8765 format %{ "popcnt $dst, $mem" %}
8766 ins_encode %{
8767 __ popcntl($dst$$Register, $mem$$Address);
8768 %}
8769 ins_pipe(ialu_reg);
8770 %}
8771
8772 // Note: Long.bitCount(long) returns an int.
8773 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8774 predicate(UsePopCountInstruction);
8775 match(Set dst (PopCountL src));
8776 effect(KILL cr);
8777
8778 format %{ "popcnt $dst, $src" %}
8779 ins_encode %{
8780 __ popcntq($dst$$Register, $src$$Register);
8781 %}
8782 ins_pipe(ialu_reg);
8783 %}
8784
8785 // Note: Long.bitCount(long) returns an int.
8786 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8787 predicate(UsePopCountInstruction);
8788 match(Set dst (PopCountL (LoadL mem)));
8789 effect(KILL cr);
8790
8791 format %{ "popcnt $dst, $mem" %}
8792 ins_encode %{
8793 __ popcntq($dst$$Register, $mem$$Address);
8794 %}
8795 ins_pipe(ialu_reg);
8796 %}
8797
8798
8799 //----------MemBar Instructions-----------------------------------------------
8800 // Memory barrier flavors
8801
8802 instruct membar_acquire()
8803 %{
8804 match(MemBarAcquire);
8805 match(LoadFence);
8806 ins_cost(0);
8807
8808 size(0);
8809 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8810 ins_encode();
8811 ins_pipe(empty);
8812 %}
8813
8814 instruct membar_acquire_lock()
8815 %{
8816 match(MemBarAcquireLock);
8817 ins_cost(0);
8818
8819 size(0);
8820 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8821 ins_encode();
8822 ins_pipe(empty);
8823 %}
8824
8825 instruct membar_release()
8826 %{
8827 match(MemBarRelease);
8828 match(StoreFence);
8829 ins_cost(0);
8830
8831 size(0);
8832 format %{ "MEMBAR-release ! (empty encoding)" %}
8833 ins_encode();
8834 ins_pipe(empty);
8835 %}
8836
8837 instruct membar_release_lock()
8838 %{
8839 match(MemBarReleaseLock);
8840 ins_cost(0);
8841
8842 size(0);
8843 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8844 ins_encode();
8845 ins_pipe(empty);
8846 %}
8847
8848 instruct membar_storeload(rFlagsReg cr) %{
8849 match(MemBarStoreLoad);
8850 effect(KILL cr);
8851 ins_cost(400);
8852
8853 format %{
8854 $$template
8855 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8856 %}
8857 ins_encode %{
8858 __ membar(Assembler::StoreLoad);
8859 %}
8860 ins_pipe(pipe_slow);
8861 %}
8862
8863 instruct membar_volatile(rFlagsReg cr) %{
8864 match(MemBarVolatile);
8865 effect(KILL cr);
8866 ins_cost(400);
8867
8868 format %{
8869 $$template
8870 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8871 %}
8872 ins_encode %{
8873 __ membar(Assembler::StoreLoad);
8874 %}
8875 ins_pipe(pipe_slow);
8876 %}
8877
8878 instruct unnecessary_membar_volatile()
8879 %{
8880 match(MemBarVolatile);
8881 predicate(Matcher::post_store_load_barrier(n));
8882 ins_cost(0);
8883
8884 size(0);
8885 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8886 ins_encode();
8887 ins_pipe(empty);
8888 %}
8889
8890 instruct membar_full(rFlagsReg cr) %{
8891 match(MemBarFull);
8892 effect(KILL cr);
8893 ins_cost(400);
8894
8895 format %{
8896 $$template
8897 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8898 %}
8899 ins_encode %{
8900 __ membar(Assembler::StoreLoad);
8901 %}
8902 ins_pipe(pipe_slow);
8903 %}
8904
8905 instruct membar_storestore() %{
8906 match(MemBarStoreStore);
8907 match(StoreStoreFence);
8908 ins_cost(0);
8909
8910 size(0);
8911 format %{ "MEMBAR-storestore (empty encoding)" %}
8912 ins_encode( );
8913 ins_pipe(empty);
8914 %}
8915
8916 //----------Move Instructions--------------------------------------------------
8917
8918 instruct castX2P(rRegP dst, rRegL src)
8919 %{
8920 match(Set dst (CastX2P src));
8921
8922 format %{ "movq $dst, $src\t# long->ptr" %}
8923 ins_encode %{
8924 if ($dst$$reg != $src$$reg) {
8925 __ movptr($dst$$Register, $src$$Register);
8926 }
8927 %}
8928 ins_pipe(ialu_reg_reg); // XXX
8929 %}
8930
8931 instruct castP2X(rRegL dst, rRegP src)
8932 %{
8933 match(Set dst (CastP2X src));
8934
8935 format %{ "movq $dst, $src\t# ptr -> long" %}
8936 ins_encode %{
8937 if ($dst$$reg != $src$$reg) {
8938 __ movptr($dst$$Register, $src$$Register);
8939 }
8940 %}
8941 ins_pipe(ialu_reg_reg); // XXX
8942 %}
8943
8944 // Convert oop into int for vectors alignment masking
8945 instruct convP2I(rRegI dst, rRegP src)
8946 %{
8947 match(Set dst (ConvL2I (CastP2X src)));
8948
8949 format %{ "movl $dst, $src\t# ptr -> int" %}
8950 ins_encode %{
8951 __ movl($dst$$Register, $src$$Register);
8952 %}
8953 ins_pipe(ialu_reg_reg); // XXX
8954 %}
8955
8956 // Convert compressed oop into int for vectors alignment masking
8957 // in case of 32bit oops (heap < 4Gb).
8958 instruct convN2I(rRegI dst, rRegN src)
8959 %{
8960 predicate(CompressedOops::shift() == 0);
8961 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8962
8963 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8964 ins_encode %{
8965 __ movl($dst$$Register, $src$$Register);
8966 %}
8967 ins_pipe(ialu_reg_reg); // XXX
8968 %}
8969
8970 // Convert oop pointer into compressed form
8971 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8972 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8973 match(Set dst (EncodeP src));
8974 effect(KILL cr);
8975 format %{ "encode_heap_oop $dst,$src" %}
8976 ins_encode %{
8977 Register s = $src$$Register;
8978 Register d = $dst$$Register;
8979 if (s != d) {
8980 __ movq(d, s);
8981 }
8982 __ encode_heap_oop(d);
8983 %}
8984 ins_pipe(ialu_reg_long);
8985 %}
8986
8987 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8988 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8989 match(Set dst (EncodeP src));
8990 effect(KILL cr);
8991 format %{ "encode_heap_oop_not_null $dst,$src" %}
8992 ins_encode %{
8993 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8994 %}
8995 ins_pipe(ialu_reg_long);
8996 %}
8997
8998 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8999 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9000 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9001 match(Set dst (DecodeN src));
9002 effect(KILL cr);
9003 format %{ "decode_heap_oop $dst,$src" %}
9004 ins_encode %{
9005 Register s = $src$$Register;
9006 Register d = $dst$$Register;
9007 if (s != d) {
9008 __ movq(d, s);
9009 }
9010 __ decode_heap_oop(d);
9011 %}
9012 ins_pipe(ialu_reg_long);
9013 %}
9014
9015 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9016 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9017 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9018 match(Set dst (DecodeN src));
9019 effect(KILL cr);
9020 format %{ "decode_heap_oop_not_null $dst,$src" %}
9021 ins_encode %{
9022 Register s = $src$$Register;
9023 Register d = $dst$$Register;
9024 if (s != d) {
9025 __ decode_heap_oop_not_null(d, s);
9026 } else {
9027 __ decode_heap_oop_not_null(d);
9028 }
9029 %}
9030 ins_pipe(ialu_reg_long);
9031 %}
9032
9033 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9034 match(Set dst (EncodePKlass src));
9035 effect(TEMP dst, KILL cr);
9036 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9037 ins_encode %{
9038 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9039 %}
9040 ins_pipe(ialu_reg_long);
9041 %}
9042
9043 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9044 match(Set dst (DecodeNKlass src));
9045 effect(TEMP dst, KILL cr);
9046 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9047 ins_encode %{
9048 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9049 %}
9050 ins_pipe(ialu_reg_long);
9051 %}
9052
9053 //----------Conditional Move---------------------------------------------------
9054 // Jump
9055 // dummy instruction for generating temp registers
9056 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9057 match(Jump (LShiftL switch_val shift));
9058 ins_cost(350);
9059 predicate(false);
9060 effect(TEMP dest);
9061
9062 format %{ "leaq $dest, [$constantaddress]\n\t"
9063 "jmp [$dest + $switch_val << $shift]\n\t" %}
9064 ins_encode %{
9065 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9066 // to do that and the compiler is using that register as one it can allocate.
9067 // So we build it all by hand.
9068 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9069 // ArrayAddress dispatch(table, index);
9070 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9071 __ lea($dest$$Register, $constantaddress);
9072 __ jmp(dispatch);
9073 %}
9074 ins_pipe(pipe_jmp);
9075 %}
9076
9077 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9078 match(Jump (AddL (LShiftL switch_val shift) offset));
9079 ins_cost(350);
9080 effect(TEMP dest);
9081
9082 format %{ "leaq $dest, [$constantaddress]\n\t"
9083 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9084 ins_encode %{
9085 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9086 // to do that and the compiler is using that register as one it can allocate.
9087 // So we build it all by hand.
9088 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9089 // ArrayAddress dispatch(table, index);
9090 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9091 __ lea($dest$$Register, $constantaddress);
9092 __ jmp(dispatch);
9093 %}
9094 ins_pipe(pipe_jmp);
9095 %}
9096
9097 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9098 match(Jump switch_val);
9099 ins_cost(350);
9100 effect(TEMP dest);
9101
9102 format %{ "leaq $dest, [$constantaddress]\n\t"
9103 "jmp [$dest + $switch_val]\n\t" %}
9104 ins_encode %{
9105 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9106 // to do that and the compiler is using that register as one it can allocate.
9107 // So we build it all by hand.
9108 // Address index(noreg, switch_reg, Address::times_1);
9109 // ArrayAddress dispatch(table, index);
9110 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9111 __ lea($dest$$Register, $constantaddress);
9112 __ jmp(dispatch);
9113 %}
9114 ins_pipe(pipe_jmp);
9115 %}
9116
9117 // Conditional move
9118 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9119 %{
9120 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9121 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9122
9123 ins_cost(100); // XXX
9124 format %{ "setbn$cop $dst\t# signed, int" %}
9125 ins_encode %{
9126 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9127 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9128 %}
9129 ins_pipe(ialu_reg);
9130 %}
9131
9132 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9133 %{
9134 predicate(!UseAPX);
9135 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9136
9137 ins_cost(200); // XXX
9138 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9139 ins_encode %{
9140 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9141 %}
9142 ins_pipe(pipe_cmov_reg);
9143 %}
9144
9145 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9146 %{
9147 predicate(UseAPX);
9148 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9149
9150 ins_cost(200);
9151 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9152 ins_encode %{
9153 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9154 %}
9155 ins_pipe(pipe_cmov_reg);
9156 %}
9157
9158 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9159 %{
9160 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9161 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9162
9163 ins_cost(100); // XXX
9164 format %{ "setbn$cop $dst\t# unsigned, int" %}
9165 ins_encode %{
9166 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9167 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9168 %}
9169 ins_pipe(ialu_reg);
9170 %}
9171
9172 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9173 predicate(!UseAPX);
9174 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9175
9176 ins_cost(200); // XXX
9177 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9178 ins_encode %{
9179 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9180 %}
9181 ins_pipe(pipe_cmov_reg);
9182 %}
9183
9184 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9185 predicate(UseAPX);
9186 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9187
9188 ins_cost(200);
9189 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9190 ins_encode %{
9191 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9192 %}
9193 ins_pipe(pipe_cmov_reg);
9194 %}
9195
9196 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9197 %{
9198 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9199 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9200
9201 ins_cost(100); // XXX
9202 format %{ "setbn$cop $dst\t# unsigned, int" %}
9203 ins_encode %{
9204 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9205 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9206 %}
9207 ins_pipe(ialu_reg);
9208 %}
9209
9210 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9211 %{
9212 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9213 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9214
9215 ins_cost(100); // XXX
9216 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9217 ins_encode %{
9218 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9219 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9220 %}
9221 ins_pipe(ialu_reg);
9222 %}
9223
9224 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9225 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9226
9227 ins_cost(200);
9228 expand %{
9229 cmovI_regU(cop, cr, dst, src);
9230 %}
9231 %}
9232
9233 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9234 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9235
9236 ins_cost(200);
9237 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9238 ins_encode %{
9239 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9240 %}
9241 ins_pipe(pipe_cmov_reg);
9242 %}
9243
9244 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9245 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9246 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9247
9248 ins_cost(200); // XXX
9249 format %{ "cmovpl $dst, $src\n\t"
9250 "cmovnel $dst, $src" %}
9251 ins_encode %{
9252 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9253 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9254 %}
9255 ins_pipe(pipe_cmov_reg);
9256 %}
9257
9258 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9259 // inputs of the CMove
9260 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9261 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9262 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9263 effect(TEMP dst);
9264
9265 ins_cost(200); // XXX
9266 format %{ "cmovpl $dst, $src\n\t"
9267 "cmovnel $dst, $src" %}
9268 ins_encode %{
9269 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9270 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9271 %}
9272 ins_pipe(pipe_cmov_reg);
9273 %}
9274
9275 // Conditional move
9276 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9277 predicate(!UseAPX);
9278 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9279
9280 ins_cost(250); // XXX
9281 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9282 ins_encode %{
9283 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9284 %}
9285 ins_pipe(pipe_cmov_mem);
9286 %}
9287
9288 // Conditional move
9289 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9290 %{
9291 predicate(UseAPX);
9292 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9293
9294 ins_cost(250);
9295 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9296 ins_encode %{
9297 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9298 %}
9299 ins_pipe(pipe_cmov_mem);
9300 %}
9301
9302 // Conditional move
9303 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9304 %{
9305 predicate(!UseAPX);
9306 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9307
9308 ins_cost(250); // XXX
9309 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9310 ins_encode %{
9311 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9312 %}
9313 ins_pipe(pipe_cmov_mem);
9314 %}
9315
9316 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9317 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9318
9319 ins_cost(250);
9320 expand %{
9321 cmovI_memU(cop, cr, dst, src);
9322 %}
9323 %}
9324
9325 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9326 %{
9327 predicate(UseAPX);
9328 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9329
9330 ins_cost(250);
9331 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9332 ins_encode %{
9333 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9334 %}
9335 ins_pipe(pipe_cmov_mem);
9336 %}
9337
9338 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9339 %{
9340 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9341
9342 ins_cost(250);
9343 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9344 ins_encode %{
9345 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9346 %}
9347 ins_pipe(pipe_cmov_mem);
9348 %}
9349
9350 // Conditional move
9351 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9352 %{
9353 predicate(!UseAPX);
9354 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9355
9356 ins_cost(200); // XXX
9357 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9358 ins_encode %{
9359 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9360 %}
9361 ins_pipe(pipe_cmov_reg);
9362 %}
9363
9364 // Conditional move ndd
9365 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9366 %{
9367 predicate(UseAPX);
9368 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9369
9370 ins_cost(200);
9371 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9372 ins_encode %{
9373 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9374 %}
9375 ins_pipe(pipe_cmov_reg);
9376 %}
9377
9378 // Conditional move
9379 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9380 %{
9381 predicate(!UseAPX);
9382 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9383
9384 ins_cost(200); // XXX
9385 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9386 ins_encode %{
9387 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9388 %}
9389 ins_pipe(pipe_cmov_reg);
9390 %}
9391
9392 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9393 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9394
9395 ins_cost(200);
9396 expand %{
9397 cmovN_regU(cop, cr, dst, src);
9398 %}
9399 %}
9400
9401 // Conditional move ndd
9402 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9403 %{
9404 predicate(UseAPX);
9405 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9406
9407 ins_cost(200);
9408 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9409 ins_encode %{
9410 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9411 %}
9412 ins_pipe(pipe_cmov_reg);
9413 %}
9414
9415 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9416 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9417
9418 ins_cost(200);
9419 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9420 ins_encode %{
9421 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9422 %}
9423 ins_pipe(pipe_cmov_reg);
9424 %}
9425
9426 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9427 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9428 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9429
9430 ins_cost(200); // XXX
9431 format %{ "cmovpl $dst, $src\n\t"
9432 "cmovnel $dst, $src" %}
9433 ins_encode %{
9434 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9435 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9436 %}
9437 ins_pipe(pipe_cmov_reg);
9438 %}
9439
9440 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9441 // inputs of the CMove
9442 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9443 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9444 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9445
9446 ins_cost(200); // XXX
9447 format %{ "cmovpl $dst, $src\n\t"
9448 "cmovnel $dst, $src" %}
9449 ins_encode %{
9450 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9451 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9452 %}
9453 ins_pipe(pipe_cmov_reg);
9454 %}
9455
9456 // Conditional move
9457 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9458 %{
9459 predicate(!UseAPX);
9460 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9461
9462 ins_cost(200); // XXX
9463 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9464 ins_encode %{
9465 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9466 %}
9467 ins_pipe(pipe_cmov_reg); // XXX
9468 %}
9469
9470 // Conditional move ndd
9471 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9472 %{
9473 predicate(UseAPX);
9474 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9475
9476 ins_cost(200);
9477 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9478 ins_encode %{
9479 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9480 %}
9481 ins_pipe(pipe_cmov_reg);
9482 %}
9483
9484 // Conditional move
9485 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9486 %{
9487 predicate(!UseAPX);
9488 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9489
9490 ins_cost(200); // XXX
9491 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9492 ins_encode %{
9493 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9494 %}
9495 ins_pipe(pipe_cmov_reg); // XXX
9496 %}
9497
9498 // Conditional move ndd
9499 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9500 %{
9501 predicate(UseAPX);
9502 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9503
9504 ins_cost(200);
9505 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9506 ins_encode %{
9507 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9508 %}
9509 ins_pipe(pipe_cmov_reg);
9510 %}
9511
9512 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9513 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9514
9515 ins_cost(200);
9516 expand %{
9517 cmovP_regU(cop, cr, dst, src);
9518 %}
9519 %}
9520
9521 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9522 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9523
9524 ins_cost(200);
9525 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9526 ins_encode %{
9527 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9528 %}
9529 ins_pipe(pipe_cmov_reg);
9530 %}
9531
9532 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9533 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9534 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9535
9536 ins_cost(200); // XXX
9537 format %{ "cmovpq $dst, $src\n\t"
9538 "cmovneq $dst, $src" %}
9539 ins_encode %{
9540 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9541 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9542 %}
9543 ins_pipe(pipe_cmov_reg);
9544 %}
9545
9546 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9547 // inputs of the CMove
9548 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9549 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9550 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9551
9552 ins_cost(200); // XXX
9553 format %{ "cmovpq $dst, $src\n\t"
9554 "cmovneq $dst, $src" %}
9555 ins_encode %{
9556 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9557 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9558 %}
9559 ins_pipe(pipe_cmov_reg);
9560 %}
9561
9562 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9563 %{
9564 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9565 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9566
9567 ins_cost(100); // XXX
9568 format %{ "setbn$cop $dst\t# signed, long" %}
9569 ins_encode %{
9570 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9571 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9572 %}
9573 ins_pipe(ialu_reg);
9574 %}
9575
9576 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9577 %{
9578 predicate(!UseAPX);
9579 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9580
9581 ins_cost(200); // XXX
9582 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9583 ins_encode %{
9584 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9585 %}
9586 ins_pipe(pipe_cmov_reg); // XXX
9587 %}
9588
9589 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9590 %{
9591 predicate(UseAPX);
9592 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9593
9594 ins_cost(200);
9595 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9596 ins_encode %{
9597 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9598 %}
9599 ins_pipe(pipe_cmov_reg);
9600 %}
9601
9602 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9603 %{
9604 predicate(!UseAPX);
9605 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9606
9607 ins_cost(200); // XXX
9608 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9609 ins_encode %{
9610 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9611 %}
9612 ins_pipe(pipe_cmov_mem); // XXX
9613 %}
9614
9615 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9616 %{
9617 predicate(UseAPX);
9618 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9619
9620 ins_cost(200);
9621 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9622 ins_encode %{
9623 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9624 %}
9625 ins_pipe(pipe_cmov_mem);
9626 %}
9627
9628 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9629 %{
9630 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9631 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9632
9633 ins_cost(100); // XXX
9634 format %{ "setbn$cop $dst\t# unsigned, long" %}
9635 ins_encode %{
9636 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9637 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9638 %}
9639 ins_pipe(ialu_reg);
9640 %}
9641
9642 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9643 %{
9644 predicate(!UseAPX);
9645 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9646
9647 ins_cost(200); // XXX
9648 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9649 ins_encode %{
9650 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9651 %}
9652 ins_pipe(pipe_cmov_reg); // XXX
9653 %}
9654
9655 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9656 %{
9657 predicate(UseAPX);
9658 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9659
9660 ins_cost(200);
9661 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9662 ins_encode %{
9663 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9664 %}
9665 ins_pipe(pipe_cmov_reg);
9666 %}
9667
9668 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9669 %{
9670 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9671 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9672
9673 ins_cost(100); // XXX
9674 format %{ "setbn$cop $dst\t# unsigned, long" %}
9675 ins_encode %{
9676 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9677 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9678 %}
9679 ins_pipe(ialu_reg);
9680 %}
9681
9682 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9683 %{
9684 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9685 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9686
9687 ins_cost(100); // XXX
9688 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9689 ins_encode %{
9690 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9691 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9692 %}
9693 ins_pipe(ialu_reg);
9694 %}
9695
9696 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9697 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9698
9699 ins_cost(200);
9700 expand %{
9701 cmovL_regU(cop, cr, dst, src);
9702 %}
9703 %}
9704
9705 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9706 %{
9707 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9708
9709 ins_cost(200);
9710 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9711 ins_encode %{
9712 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9713 %}
9714 ins_pipe(pipe_cmov_reg);
9715 %}
9716
9717 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9718 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9719 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9720
9721 ins_cost(200); // XXX
9722 format %{ "cmovpq $dst, $src\n\t"
9723 "cmovneq $dst, $src" %}
9724 ins_encode %{
9725 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9726 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9727 %}
9728 ins_pipe(pipe_cmov_reg);
9729 %}
9730
9731 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9732 // inputs of the CMove
9733 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9734 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9735 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9736
9737 ins_cost(200); // XXX
9738 format %{ "cmovpq $dst, $src\n\t"
9739 "cmovneq $dst, $src" %}
9740 ins_encode %{
9741 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9742 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9743 %}
9744 ins_pipe(pipe_cmov_reg);
9745 %}
9746
9747 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9748 %{
9749 predicate(!UseAPX);
9750 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9751
9752 ins_cost(200); // XXX
9753 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9754 ins_encode %{
9755 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9756 %}
9757 ins_pipe(pipe_cmov_mem); // XXX
9758 %}
9759
9760 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9761 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9762
9763 ins_cost(200);
9764 expand %{
9765 cmovL_memU(cop, cr, dst, src);
9766 %}
9767 %}
9768
9769 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9770 %{
9771 predicate(UseAPX);
9772 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9773
9774 ins_cost(200);
9775 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9776 ins_encode %{
9777 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9778 %}
9779 ins_pipe(pipe_cmov_mem);
9780 %}
9781
9782 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9783 %{
9784 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9785
9786 ins_cost(200);
9787 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9788 ins_encode %{
9789 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9790 %}
9791 ins_pipe(pipe_cmov_mem);
9792 %}
9793
9794 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9795 %{
9796 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9797
9798 ins_cost(200); // XXX
9799 format %{ "jn$cop skip\t# signed cmove float\n\t"
9800 "movss $dst, $src\n"
9801 "skip:" %}
9802 ins_encode %{
9803 Label Lskip;
9804 // Invert sense of branch from sense of CMOV
9805 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9806 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9807 __ bind(Lskip);
9808 %}
9809 ins_pipe(pipe_slow);
9810 %}
9811
9812 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9813 %{
9814 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9815
9816 ins_cost(200); // XXX
9817 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9818 "movss $dst, $src\n"
9819 "skip:" %}
9820 ins_encode %{
9821 Label Lskip;
9822 // Invert sense of branch from sense of CMOV
9823 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9824 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9825 __ bind(Lskip);
9826 %}
9827 ins_pipe(pipe_slow);
9828 %}
9829
9830 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9831 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9832
9833 ins_cost(200);
9834 expand %{
9835 cmovF_regU(cop, cr, dst, src);
9836 %}
9837 %}
9838
9839 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9840 %{
9841 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9842
9843 ins_cost(200); // XXX
9844 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9845 "movss $dst, $src\n"
9846 "skip:" %}
9847 ins_encode %{
9848 Label Lskip;
9849 // Invert sense of branch from sense of CMOV
9850 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9851 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9852 __ bind(Lskip);
9853 %}
9854 ins_pipe(pipe_slow);
9855 %}
9856
9857 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9858 %{
9859 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9860
9861 ins_cost(200); // XXX
9862 format %{ "jn$cop skip\t# signed cmove double\n\t"
9863 "movsd $dst, $src\n"
9864 "skip:" %}
9865 ins_encode %{
9866 Label Lskip;
9867 // Invert sense of branch from sense of CMOV
9868 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9869 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9870 __ bind(Lskip);
9871 %}
9872 ins_pipe(pipe_slow);
9873 %}
9874
9875 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9876 %{
9877 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9878
9879 ins_cost(200); // XXX
9880 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9881 "movsd $dst, $src\n"
9882 "skip:" %}
9883 ins_encode %{
9884 Label Lskip;
9885 // Invert sense of branch from sense of CMOV
9886 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9887 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9888 __ bind(Lskip);
9889 %}
9890 ins_pipe(pipe_slow);
9891 %}
9892
9893 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9894 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9895
9896 ins_cost(200);
9897 expand %{
9898 cmovD_regU(cop, cr, dst, src);
9899 %}
9900 %}
9901
9902 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9903 %{
9904 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9905
9906 ins_cost(200); // XXX
9907 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9908 "movsd $dst, $src\n"
9909 "skip:" %}
9910 ins_encode %{
9911 Label Lskip;
9912 // Invert sense of branch from sense of CMOV
9913 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9914 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9915 __ bind(Lskip);
9916 %}
9917 ins_pipe(pipe_slow);
9918 %}
9919
9920 //----------Arithmetic Instructions--------------------------------------------
9921 //----------Addition Instructions----------------------------------------------
9922
9923 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9924 %{
9925 predicate(!UseAPX);
9926 match(Set dst (AddI dst src));
9927 effect(KILL cr);
9928 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9929 format %{ "addl $dst, $src\t# int" %}
9930 ins_encode %{
9931 __ addl($dst$$Register, $src$$Register);
9932 %}
9933 ins_pipe(ialu_reg_reg);
9934 %}
9935
9936 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9937 %{
9938 predicate(UseAPX);
9939 match(Set dst (AddI src1 src2));
9940 effect(KILL cr);
9941 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9942
9943 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9944 ins_encode %{
9945 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9946 %}
9947 ins_pipe(ialu_reg_reg);
9948 %}
9949
9950 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9951 %{
9952 predicate(!UseAPX);
9953 match(Set dst (AddI dst src));
9954 effect(KILL cr);
9955 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9956
9957 format %{ "addl $dst, $src\t# int" %}
9958 ins_encode %{
9959 __ addl($dst$$Register, $src$$constant);
9960 %}
9961 ins_pipe( ialu_reg );
9962 %}
9963
9964 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9965 %{
9966 predicate(UseAPX);
9967 match(Set dst (AddI src1 src2));
9968 effect(KILL cr);
9969 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9970
9971 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9972 ins_encode %{
9973 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9974 %}
9975 ins_pipe( ialu_reg );
9976 %}
9977
9978 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9979 %{
9980 predicate(UseAPX);
9981 match(Set dst (AddI (LoadI src1) src2));
9982 effect(KILL cr);
9983 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9984
9985 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9986 ins_encode %{
9987 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9988 %}
9989 ins_pipe( ialu_reg );
9990 %}
9991
9992 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9993 %{
9994 predicate(!UseAPX);
9995 match(Set dst (AddI dst (LoadI src)));
9996 effect(KILL cr);
9997 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9998
9999 ins_cost(150); // XXX
10000 format %{ "addl $dst, $src\t# int" %}
10001 ins_encode %{
10002 __ addl($dst$$Register, $src$$Address);
10003 %}
10004 ins_pipe(ialu_reg_mem);
10005 %}
10006
10007 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10008 %{
10009 predicate(UseAPX);
10010 match(Set dst (AddI src1 (LoadI src2)));
10011 effect(KILL cr);
10012 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10013
10014 ins_cost(150);
10015 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10016 ins_encode %{
10017 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10018 %}
10019 ins_pipe(ialu_reg_mem);
10020 %}
10021
10022 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10023 %{
10024 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10025 effect(KILL cr);
10026 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10027
10028 ins_cost(150); // XXX
10029 format %{ "addl $dst, $src\t# int" %}
10030 ins_encode %{
10031 __ addl($dst$$Address, $src$$Register);
10032 %}
10033 ins_pipe(ialu_mem_reg);
10034 %}
10035
10036 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10037 %{
10038 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10039 effect(KILL cr);
10040 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10041
10042
10043 ins_cost(125); // XXX
10044 format %{ "addl $dst, $src\t# int" %}
10045 ins_encode %{
10046 __ addl($dst$$Address, $src$$constant);
10047 %}
10048 ins_pipe(ialu_mem_imm);
10049 %}
10050
10051 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10052 %{
10053 predicate(!UseAPX && UseIncDec);
10054 match(Set dst (AddI dst src));
10055 effect(KILL cr);
10056
10057 format %{ "incl $dst\t# int" %}
10058 ins_encode %{
10059 __ incrementl($dst$$Register);
10060 %}
10061 ins_pipe(ialu_reg);
10062 %}
10063
10064 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10065 %{
10066 predicate(UseAPX && UseIncDec);
10067 match(Set dst (AddI src val));
10068 effect(KILL cr);
10069 flag(PD::Flag_ndd_demotable_opr1);
10070
10071 format %{ "eincl $dst, $src\t# int ndd" %}
10072 ins_encode %{
10073 __ eincl($dst$$Register, $src$$Register, false);
10074 %}
10075 ins_pipe(ialu_reg);
10076 %}
10077
10078 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10079 %{
10080 predicate(UseAPX && UseIncDec);
10081 match(Set dst (AddI (LoadI src) val));
10082 effect(KILL cr);
10083
10084 format %{ "eincl $dst, $src\t# int ndd" %}
10085 ins_encode %{
10086 __ eincl($dst$$Register, $src$$Address, false);
10087 %}
10088 ins_pipe(ialu_reg);
10089 %}
10090
10091 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10092 %{
10093 predicate(UseIncDec);
10094 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10095 effect(KILL cr);
10096
10097 ins_cost(125); // XXX
10098 format %{ "incl $dst\t# int" %}
10099 ins_encode %{
10100 __ incrementl($dst$$Address);
10101 %}
10102 ins_pipe(ialu_mem_imm);
10103 %}
10104
10105 // XXX why does that use AddI
10106 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10107 %{
10108 predicate(!UseAPX && UseIncDec);
10109 match(Set dst (AddI dst src));
10110 effect(KILL cr);
10111
10112 format %{ "decl $dst\t# int" %}
10113 ins_encode %{
10114 __ decrementl($dst$$Register);
10115 %}
10116 ins_pipe(ialu_reg);
10117 %}
10118
10119 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10120 %{
10121 predicate(UseAPX && UseIncDec);
10122 match(Set dst (AddI src val));
10123 effect(KILL cr);
10124 flag(PD::Flag_ndd_demotable_opr1);
10125
10126 format %{ "edecl $dst, $src\t# int ndd" %}
10127 ins_encode %{
10128 __ edecl($dst$$Register, $src$$Register, false);
10129 %}
10130 ins_pipe(ialu_reg);
10131 %}
10132
10133 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10134 %{
10135 predicate(UseAPX && UseIncDec);
10136 match(Set dst (AddI (LoadI src) val));
10137 effect(KILL cr);
10138
10139 format %{ "edecl $dst, $src\t# int ndd" %}
10140 ins_encode %{
10141 __ edecl($dst$$Register, $src$$Address, false);
10142 %}
10143 ins_pipe(ialu_reg);
10144 %}
10145
10146 // XXX why does that use AddI
10147 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10148 %{
10149 predicate(UseIncDec);
10150 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10151 effect(KILL cr);
10152
10153 ins_cost(125); // XXX
10154 format %{ "decl $dst\t# int" %}
10155 ins_encode %{
10156 __ decrementl($dst$$Address);
10157 %}
10158 ins_pipe(ialu_mem_imm);
10159 %}
10160
10161 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10162 %{
10163 predicate(VM_Version::supports_fast_2op_lea());
10164 match(Set dst (AddI (LShiftI index scale) disp));
10165
10166 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10167 ins_encode %{
10168 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10169 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10170 %}
10171 ins_pipe(ialu_reg_reg);
10172 %}
10173
10174 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10175 %{
10176 predicate(VM_Version::supports_fast_3op_lea());
10177 match(Set dst (AddI (AddI base index) disp));
10178
10179 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10180 ins_encode %{
10181 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10182 %}
10183 ins_pipe(ialu_reg_reg);
10184 %}
10185
10186 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10187 %{
10188 predicate(VM_Version::supports_fast_2op_lea());
10189 match(Set dst (AddI base (LShiftI index scale)));
10190
10191 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10192 ins_encode %{
10193 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10194 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10195 %}
10196 ins_pipe(ialu_reg_reg);
10197 %}
10198
10199 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10200 %{
10201 predicate(VM_Version::supports_fast_3op_lea());
10202 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10203
10204 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10205 ins_encode %{
10206 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10207 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10208 %}
10209 ins_pipe(ialu_reg_reg);
10210 %}
10211
10212 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10213 %{
10214 predicate(!UseAPX);
10215 match(Set dst (AddL dst src));
10216 effect(KILL cr);
10217 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10218
10219 format %{ "addq $dst, $src\t# long" %}
10220 ins_encode %{
10221 __ addq($dst$$Register, $src$$Register);
10222 %}
10223 ins_pipe(ialu_reg_reg);
10224 %}
10225
10226 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10227 %{
10228 predicate(UseAPX);
10229 match(Set dst (AddL src1 src2));
10230 effect(KILL cr);
10231 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10232
10233 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10234 ins_encode %{
10235 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10236 %}
10237 ins_pipe(ialu_reg_reg);
10238 %}
10239
10240 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10241 %{
10242 predicate(!UseAPX);
10243 match(Set dst (AddL dst src));
10244 effect(KILL cr);
10245 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10246
10247 format %{ "addq $dst, $src\t# long" %}
10248 ins_encode %{
10249 __ addq($dst$$Register, $src$$constant);
10250 %}
10251 ins_pipe( ialu_reg );
10252 %}
10253
10254 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10255 %{
10256 predicate(UseAPX);
10257 match(Set dst (AddL src1 src2));
10258 effect(KILL cr);
10259 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10260
10261 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10262 ins_encode %{
10263 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10264 %}
10265 ins_pipe( ialu_reg );
10266 %}
10267
10268 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10269 %{
10270 predicate(UseAPX);
10271 match(Set dst (AddL (LoadL src1) src2));
10272 effect(KILL cr);
10273 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10274
10275 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10276 ins_encode %{
10277 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10278 %}
10279 ins_pipe( ialu_reg );
10280 %}
10281
10282 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10283 %{
10284 predicate(!UseAPX);
10285 match(Set dst (AddL dst (LoadL src)));
10286 effect(KILL cr);
10287 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10288
10289 ins_cost(150); // XXX
10290 format %{ "addq $dst, $src\t# long" %}
10291 ins_encode %{
10292 __ addq($dst$$Register, $src$$Address);
10293 %}
10294 ins_pipe(ialu_reg_mem);
10295 %}
10296
10297 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10298 %{
10299 predicate(UseAPX);
10300 match(Set dst (AddL src1 (LoadL src2)));
10301 effect(KILL cr);
10302 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10303
10304 ins_cost(150);
10305 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10306 ins_encode %{
10307 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10308 %}
10309 ins_pipe(ialu_reg_mem);
10310 %}
10311
10312 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10313 %{
10314 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10315 effect(KILL cr);
10316 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10317
10318 ins_cost(150); // XXX
10319 format %{ "addq $dst, $src\t# long" %}
10320 ins_encode %{
10321 __ addq($dst$$Address, $src$$Register);
10322 %}
10323 ins_pipe(ialu_mem_reg);
10324 %}
10325
10326 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10327 %{
10328 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10329 effect(KILL cr);
10330 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10331
10332 ins_cost(125); // XXX
10333 format %{ "addq $dst, $src\t# long" %}
10334 ins_encode %{
10335 __ addq($dst$$Address, $src$$constant);
10336 %}
10337 ins_pipe(ialu_mem_imm);
10338 %}
10339
10340 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10341 %{
10342 predicate(!UseAPX && UseIncDec);
10343 match(Set dst (AddL dst src));
10344 effect(KILL cr);
10345
10346 format %{ "incq $dst\t# long" %}
10347 ins_encode %{
10348 __ incrementq($dst$$Register);
10349 %}
10350 ins_pipe(ialu_reg);
10351 %}
10352
10353 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10354 %{
10355 predicate(UseAPX && UseIncDec);
10356 match(Set dst (AddL src val));
10357 effect(KILL cr);
10358 flag(PD::Flag_ndd_demotable_opr1);
10359
10360 format %{ "eincq $dst, $src\t# long ndd" %}
10361 ins_encode %{
10362 __ eincq($dst$$Register, $src$$Register, false);
10363 %}
10364 ins_pipe(ialu_reg);
10365 %}
10366
10367 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10368 %{
10369 predicate(UseAPX && UseIncDec);
10370 match(Set dst (AddL (LoadL src) val));
10371 effect(KILL cr);
10372
10373 format %{ "eincq $dst, $src\t# long ndd" %}
10374 ins_encode %{
10375 __ eincq($dst$$Register, $src$$Address, false);
10376 %}
10377 ins_pipe(ialu_reg);
10378 %}
10379
10380 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10381 %{
10382 predicate(UseIncDec);
10383 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10384 effect(KILL cr);
10385
10386 ins_cost(125); // XXX
10387 format %{ "incq $dst\t# long" %}
10388 ins_encode %{
10389 __ incrementq($dst$$Address);
10390 %}
10391 ins_pipe(ialu_mem_imm);
10392 %}
10393
10394 // XXX why does that use AddL
10395 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10396 %{
10397 predicate(!UseAPX && UseIncDec);
10398 match(Set dst (AddL dst src));
10399 effect(KILL cr);
10400
10401 format %{ "decq $dst\t# long" %}
10402 ins_encode %{
10403 __ decrementq($dst$$Register);
10404 %}
10405 ins_pipe(ialu_reg);
10406 %}
10407
10408 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10409 %{
10410 predicate(UseAPX && UseIncDec);
10411 match(Set dst (AddL src val));
10412 effect(KILL cr);
10413 flag(PD::Flag_ndd_demotable_opr1);
10414
10415 format %{ "edecq $dst, $src\t# long ndd" %}
10416 ins_encode %{
10417 __ edecq($dst$$Register, $src$$Register, false);
10418 %}
10419 ins_pipe(ialu_reg);
10420 %}
10421
10422 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10423 %{
10424 predicate(UseAPX && UseIncDec);
10425 match(Set dst (AddL (LoadL src) val));
10426 effect(KILL cr);
10427
10428 format %{ "edecq $dst, $src\t# long ndd" %}
10429 ins_encode %{
10430 __ edecq($dst$$Register, $src$$Address, false);
10431 %}
10432 ins_pipe(ialu_reg);
10433 %}
10434
10435 // XXX why does that use AddL
10436 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10437 %{
10438 predicate(UseIncDec);
10439 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10440 effect(KILL cr);
10441
10442 ins_cost(125); // XXX
10443 format %{ "decq $dst\t# long" %}
10444 ins_encode %{
10445 __ decrementq($dst$$Address);
10446 %}
10447 ins_pipe(ialu_mem_imm);
10448 %}
10449
10450 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10451 %{
10452 predicate(VM_Version::supports_fast_2op_lea());
10453 match(Set dst (AddL (LShiftL index scale) disp));
10454
10455 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10456 ins_encode %{
10457 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10458 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10459 %}
10460 ins_pipe(ialu_reg_reg);
10461 %}
10462
10463 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10464 %{
10465 predicate(VM_Version::supports_fast_3op_lea());
10466 match(Set dst (AddL (AddL base index) disp));
10467
10468 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10469 ins_encode %{
10470 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10471 %}
10472 ins_pipe(ialu_reg_reg);
10473 %}
10474
10475 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10476 %{
10477 predicate(VM_Version::supports_fast_2op_lea());
10478 match(Set dst (AddL base (LShiftL index scale)));
10479
10480 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10481 ins_encode %{
10482 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10483 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10484 %}
10485 ins_pipe(ialu_reg_reg);
10486 %}
10487
10488 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10489 %{
10490 predicate(VM_Version::supports_fast_3op_lea());
10491 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10492
10493 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10494 ins_encode %{
10495 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10496 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10497 %}
10498 ins_pipe(ialu_reg_reg);
10499 %}
10500
10501 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10502 %{
10503 match(Set dst (AddP dst src));
10504 effect(KILL cr);
10505 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10506
10507 format %{ "addq $dst, $src\t# ptr" %}
10508 ins_encode %{
10509 __ addq($dst$$Register, $src$$Register);
10510 %}
10511 ins_pipe(ialu_reg_reg);
10512 %}
10513
10514 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10515 %{
10516 match(Set dst (AddP dst src));
10517 effect(KILL cr);
10518 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10519
10520 format %{ "addq $dst, $src\t# ptr" %}
10521 ins_encode %{
10522 __ addq($dst$$Register, $src$$constant);
10523 %}
10524 ins_pipe( ialu_reg );
10525 %}
10526
10527 // XXX addP mem ops ????
10528
10529 instruct checkCastPP(rRegP dst)
10530 %{
10531 match(Set dst (CheckCastPP dst));
10532
10533 size(0);
10534 format %{ "# checkcastPP of $dst" %}
10535 ins_encode(/* empty encoding */);
10536 ins_pipe(empty);
10537 %}
10538
10539 instruct castPP(rRegP dst)
10540 %{
10541 match(Set dst (CastPP dst));
10542
10543 size(0);
10544 format %{ "# castPP of $dst" %}
10545 ins_encode(/* empty encoding */);
10546 ins_pipe(empty);
10547 %}
10548
10549 instruct castII(rRegI dst)
10550 %{
10551 predicate(VerifyConstraintCasts == 0);
10552 match(Set dst (CastII dst));
10553
10554 size(0);
10555 format %{ "# castII of $dst" %}
10556 ins_encode(/* empty encoding */);
10557 ins_cost(0);
10558 ins_pipe(empty);
10559 %}
10560
10561 instruct castII_checked(rRegI dst, rFlagsReg cr)
10562 %{
10563 predicate(VerifyConstraintCasts > 0);
10564 match(Set dst (CastII dst));
10565
10566 effect(KILL cr);
10567 format %{ "# cast_checked_II $dst" %}
10568 ins_encode %{
10569 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10570 %}
10571 ins_pipe(pipe_slow);
10572 %}
10573
10574 instruct castLL(rRegL dst)
10575 %{
10576 predicate(VerifyConstraintCasts == 0);
10577 match(Set dst (CastLL dst));
10578
10579 size(0);
10580 format %{ "# castLL of $dst" %}
10581 ins_encode(/* empty encoding */);
10582 ins_cost(0);
10583 ins_pipe(empty);
10584 %}
10585
10586 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10587 %{
10588 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10589 match(Set dst (CastLL dst));
10590
10591 effect(KILL cr);
10592 format %{ "# cast_checked_LL $dst" %}
10593 ins_encode %{
10594 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10595 %}
10596 ins_pipe(pipe_slow);
10597 %}
10598
10599 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10600 %{
10601 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10602 match(Set dst (CastLL dst));
10603
10604 effect(KILL cr, TEMP tmp);
10605 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10606 ins_encode %{
10607 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10608 %}
10609 ins_pipe(pipe_slow);
10610 %}
10611
10612 instruct castFF(regF dst)
10613 %{
10614 match(Set dst (CastFF dst));
10615
10616 size(0);
10617 format %{ "# castFF of $dst" %}
10618 ins_encode(/* empty encoding */);
10619 ins_cost(0);
10620 ins_pipe(empty);
10621 %}
10622
10623 instruct castHH(regF dst)
10624 %{
10625 match(Set dst (CastHH dst));
10626
10627 size(0);
10628 format %{ "# castHH of $dst" %}
10629 ins_encode(/* empty encoding */);
10630 ins_cost(0);
10631 ins_pipe(empty);
10632 %}
10633
10634 instruct castDD(regD dst)
10635 %{
10636 match(Set dst (CastDD dst));
10637
10638 size(0);
10639 format %{ "# castDD of $dst" %}
10640 ins_encode(/* empty encoding */);
10641 ins_cost(0);
10642 ins_pipe(empty);
10643 %}
10644
10645 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10646 instruct compareAndSwapP(rRegI res,
10647 memory mem_ptr,
10648 rax_RegP oldval, rRegP newval,
10649 rFlagsReg cr)
10650 %{
10651 predicate(n->as_LoadStore()->barrier_data() == 0);
10652 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10653 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10654 effect(KILL cr, KILL oldval);
10655
10656 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10657 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10658 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10659 ins_encode %{
10660 __ lock();
10661 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10662 __ setcc(Assembler::equal, $res$$Register);
10663 %}
10664 ins_pipe( pipe_cmpxchg );
10665 %}
10666
10667 instruct compareAndSwapL(rRegI res,
10668 memory mem_ptr,
10669 rax_RegL oldval, rRegL newval,
10670 rFlagsReg cr)
10671 %{
10672 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10673 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10674 effect(KILL cr, KILL oldval);
10675
10676 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10677 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10678 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10679 ins_encode %{
10680 __ lock();
10681 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10682 __ setcc(Assembler::equal, $res$$Register);
10683 %}
10684 ins_pipe( pipe_cmpxchg );
10685 %}
10686
10687 instruct compareAndSwapI(rRegI res,
10688 memory mem_ptr,
10689 rax_RegI oldval, rRegI newval,
10690 rFlagsReg cr)
10691 %{
10692 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10693 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10694 effect(KILL cr, KILL oldval);
10695
10696 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10697 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10698 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10699 ins_encode %{
10700 __ lock();
10701 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10702 __ setcc(Assembler::equal, $res$$Register);
10703 %}
10704 ins_pipe( pipe_cmpxchg );
10705 %}
10706
10707 instruct compareAndSwapB(rRegI res,
10708 memory mem_ptr,
10709 rax_RegI oldval, rRegI newval,
10710 rFlagsReg cr)
10711 %{
10712 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10713 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10714 effect(KILL cr, KILL oldval);
10715
10716 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10717 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10718 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10719 ins_encode %{
10720 __ lock();
10721 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10722 __ setcc(Assembler::equal, $res$$Register);
10723 %}
10724 ins_pipe( pipe_cmpxchg );
10725 %}
10726
10727 instruct compareAndSwapS(rRegI res,
10728 memory mem_ptr,
10729 rax_RegI oldval, rRegI newval,
10730 rFlagsReg cr)
10731 %{
10732 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10733 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10734 effect(KILL cr, KILL oldval);
10735
10736 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10737 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10738 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10739 ins_encode %{
10740 __ lock();
10741 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10742 __ setcc(Assembler::equal, $res$$Register);
10743 %}
10744 ins_pipe( pipe_cmpxchg );
10745 %}
10746
10747 instruct compareAndSwapN(rRegI res,
10748 memory mem_ptr,
10749 rax_RegN oldval, rRegN newval,
10750 rFlagsReg cr) %{
10751 predicate(n->as_LoadStore()->barrier_data() == 0);
10752 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10753 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10754 effect(KILL cr, KILL oldval);
10755
10756 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10757 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10758 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10759 ins_encode %{
10760 __ lock();
10761 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10762 __ setcc(Assembler::equal, $res$$Register);
10763 %}
10764 ins_pipe( pipe_cmpxchg );
10765 %}
10766
10767 instruct compareAndExchangeB(
10768 memory mem_ptr,
10769 rax_RegI oldval, rRegI newval,
10770 rFlagsReg cr)
10771 %{
10772 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10773 effect(KILL cr);
10774
10775 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10776 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10777 ins_encode %{
10778 __ lock();
10779 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10780 %}
10781 ins_pipe( pipe_cmpxchg );
10782 %}
10783
10784 instruct compareAndExchangeS(
10785 memory mem_ptr,
10786 rax_RegI oldval, rRegI newval,
10787 rFlagsReg cr)
10788 %{
10789 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10790 effect(KILL cr);
10791
10792 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10793 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10794 ins_encode %{
10795 __ lock();
10796 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10797 %}
10798 ins_pipe( pipe_cmpxchg );
10799 %}
10800
10801 instruct compareAndExchangeI(
10802 memory mem_ptr,
10803 rax_RegI oldval, rRegI newval,
10804 rFlagsReg cr)
10805 %{
10806 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10807 effect(KILL cr);
10808
10809 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10810 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10811 ins_encode %{
10812 __ lock();
10813 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10814 %}
10815 ins_pipe( pipe_cmpxchg );
10816 %}
10817
10818 instruct compareAndExchangeL(
10819 memory mem_ptr,
10820 rax_RegL oldval, rRegL newval,
10821 rFlagsReg cr)
10822 %{
10823 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10824 effect(KILL cr);
10825
10826 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10827 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10828 ins_encode %{
10829 __ lock();
10830 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10831 %}
10832 ins_pipe( pipe_cmpxchg );
10833 %}
10834
10835 instruct compareAndExchangeN(
10836 memory mem_ptr,
10837 rax_RegN oldval, rRegN newval,
10838 rFlagsReg cr) %{
10839 predicate(n->as_LoadStore()->barrier_data() == 0);
10840 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10841 effect(KILL cr);
10842
10843 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10844 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10845 ins_encode %{
10846 __ lock();
10847 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10848 %}
10849 ins_pipe( pipe_cmpxchg );
10850 %}
10851
10852 instruct compareAndExchangeP(
10853 memory mem_ptr,
10854 rax_RegP oldval, rRegP newval,
10855 rFlagsReg cr)
10856 %{
10857 predicate(n->as_LoadStore()->barrier_data() == 0);
10858 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10859 effect(KILL cr);
10860
10861 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10862 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10863 ins_encode %{
10864 __ lock();
10865 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10866 %}
10867 ins_pipe( pipe_cmpxchg );
10868 %}
10869
10870 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10871 predicate(n->as_LoadStore()->result_not_used());
10872 match(Set dummy (GetAndAddB mem add));
10873 effect(KILL cr);
10874 format %{ "addb_lock $mem, $add" %}
10875 ins_encode %{
10876 __ lock();
10877 __ addb($mem$$Address, $add$$Register);
10878 %}
10879 ins_pipe(pipe_cmpxchg);
10880 %}
10881
10882 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10883 predicate(n->as_LoadStore()->result_not_used());
10884 match(Set dummy (GetAndAddB mem add));
10885 effect(KILL cr);
10886 format %{ "addb_lock $mem, $add" %}
10887 ins_encode %{
10888 __ lock();
10889 __ addb($mem$$Address, $add$$constant);
10890 %}
10891 ins_pipe(pipe_cmpxchg);
10892 %}
10893
10894 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10895 predicate(!n->as_LoadStore()->result_not_used());
10896 match(Set newval (GetAndAddB mem newval));
10897 effect(KILL cr);
10898 format %{ "xaddb_lock $mem, $newval" %}
10899 ins_encode %{
10900 __ lock();
10901 __ xaddb($mem$$Address, $newval$$Register);
10902 %}
10903 ins_pipe(pipe_cmpxchg);
10904 %}
10905
10906 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10907 predicate(n->as_LoadStore()->result_not_used());
10908 match(Set dummy (GetAndAddS mem add));
10909 effect(KILL cr);
10910 format %{ "addw_lock $mem, $add" %}
10911 ins_encode %{
10912 __ lock();
10913 __ addw($mem$$Address, $add$$Register);
10914 %}
10915 ins_pipe(pipe_cmpxchg);
10916 %}
10917
10918 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10919 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10920 match(Set dummy (GetAndAddS mem add));
10921 effect(KILL cr);
10922 format %{ "addw_lock $mem, $add" %}
10923 ins_encode %{
10924 __ lock();
10925 __ addw($mem$$Address, $add$$constant);
10926 %}
10927 ins_pipe(pipe_cmpxchg);
10928 %}
10929
10930 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10931 predicate(!n->as_LoadStore()->result_not_used());
10932 match(Set newval (GetAndAddS mem newval));
10933 effect(KILL cr);
10934 format %{ "xaddw_lock $mem, $newval" %}
10935 ins_encode %{
10936 __ lock();
10937 __ xaddw($mem$$Address, $newval$$Register);
10938 %}
10939 ins_pipe(pipe_cmpxchg);
10940 %}
10941
10942 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10943 predicate(n->as_LoadStore()->result_not_used());
10944 match(Set dummy (GetAndAddI mem add));
10945 effect(KILL cr);
10946 format %{ "addl_lock $mem, $add" %}
10947 ins_encode %{
10948 __ lock();
10949 __ addl($mem$$Address, $add$$Register);
10950 %}
10951 ins_pipe(pipe_cmpxchg);
10952 %}
10953
10954 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10955 predicate(n->as_LoadStore()->result_not_used());
10956 match(Set dummy (GetAndAddI mem add));
10957 effect(KILL cr);
10958 format %{ "addl_lock $mem, $add" %}
10959 ins_encode %{
10960 __ lock();
10961 __ addl($mem$$Address, $add$$constant);
10962 %}
10963 ins_pipe(pipe_cmpxchg);
10964 %}
10965
10966 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10967 predicate(!n->as_LoadStore()->result_not_used());
10968 match(Set newval (GetAndAddI mem newval));
10969 effect(KILL cr);
10970 format %{ "xaddl_lock $mem, $newval" %}
10971 ins_encode %{
10972 __ lock();
10973 __ xaddl($mem$$Address, $newval$$Register);
10974 %}
10975 ins_pipe(pipe_cmpxchg);
10976 %}
10977
10978 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10979 predicate(n->as_LoadStore()->result_not_used());
10980 match(Set dummy (GetAndAddL mem add));
10981 effect(KILL cr);
10982 format %{ "addq_lock $mem, $add" %}
10983 ins_encode %{
10984 __ lock();
10985 __ addq($mem$$Address, $add$$Register);
10986 %}
10987 ins_pipe(pipe_cmpxchg);
10988 %}
10989
10990 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10991 predicate(n->as_LoadStore()->result_not_used());
10992 match(Set dummy (GetAndAddL mem add));
10993 effect(KILL cr);
10994 format %{ "addq_lock $mem, $add" %}
10995 ins_encode %{
10996 __ lock();
10997 __ addq($mem$$Address, $add$$constant);
10998 %}
10999 ins_pipe(pipe_cmpxchg);
11000 %}
11001
11002 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11003 predicate(!n->as_LoadStore()->result_not_used());
11004 match(Set newval (GetAndAddL mem newval));
11005 effect(KILL cr);
11006 format %{ "xaddq_lock $mem, $newval" %}
11007 ins_encode %{
11008 __ lock();
11009 __ xaddq($mem$$Address, $newval$$Register);
11010 %}
11011 ins_pipe(pipe_cmpxchg);
11012 %}
11013
11014 instruct xchgB( memory mem, rRegI newval) %{
11015 match(Set newval (GetAndSetB mem newval));
11016 format %{ "XCHGB $newval,[$mem]" %}
11017 ins_encode %{
11018 __ xchgb($newval$$Register, $mem$$Address);
11019 %}
11020 ins_pipe( pipe_cmpxchg );
11021 %}
11022
11023 instruct xchgS( memory mem, rRegI newval) %{
11024 match(Set newval (GetAndSetS mem newval));
11025 format %{ "XCHGW $newval,[$mem]" %}
11026 ins_encode %{
11027 __ xchgw($newval$$Register, $mem$$Address);
11028 %}
11029 ins_pipe( pipe_cmpxchg );
11030 %}
11031
11032 instruct xchgI( memory mem, rRegI newval) %{
11033 match(Set newval (GetAndSetI mem newval));
11034 format %{ "XCHGL $newval,[$mem]" %}
11035 ins_encode %{
11036 __ xchgl($newval$$Register, $mem$$Address);
11037 %}
11038 ins_pipe( pipe_cmpxchg );
11039 %}
11040
11041 instruct xchgL( memory mem, rRegL newval) %{
11042 match(Set newval (GetAndSetL mem newval));
11043 format %{ "XCHGL $newval,[$mem]" %}
11044 ins_encode %{
11045 __ xchgq($newval$$Register, $mem$$Address);
11046 %}
11047 ins_pipe( pipe_cmpxchg );
11048 %}
11049
11050 instruct xchgP( memory mem, rRegP newval) %{
11051 match(Set newval (GetAndSetP mem newval));
11052 predicate(n->as_LoadStore()->barrier_data() == 0);
11053 format %{ "XCHGQ $newval,[$mem]" %}
11054 ins_encode %{
11055 __ xchgq($newval$$Register, $mem$$Address);
11056 %}
11057 ins_pipe( pipe_cmpxchg );
11058 %}
11059
11060 instruct xchgN( memory mem, rRegN newval) %{
11061 predicate(n->as_LoadStore()->barrier_data() == 0);
11062 match(Set newval (GetAndSetN mem newval));
11063 format %{ "XCHGL $newval,$mem]" %}
11064 ins_encode %{
11065 __ xchgl($newval$$Register, $mem$$Address);
11066 %}
11067 ins_pipe( pipe_cmpxchg );
11068 %}
11069
11070 //----------Abs Instructions-------------------------------------------
11071
11072 // Integer Absolute Instructions
11073 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11074 %{
11075 match(Set dst (AbsI src));
11076 effect(TEMP dst, KILL cr);
11077 format %{ "xorl $dst, $dst\t# abs int\n\t"
11078 "subl $dst, $src\n\t"
11079 "cmovll $dst, $src" %}
11080 ins_encode %{
11081 __ xorl($dst$$Register, $dst$$Register);
11082 __ subl($dst$$Register, $src$$Register);
11083 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11084 %}
11085
11086 ins_pipe(ialu_reg_reg);
11087 %}
11088
11089 // Long Absolute Instructions
11090 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11091 %{
11092 match(Set dst (AbsL src));
11093 effect(TEMP dst, KILL cr);
11094 format %{ "xorl $dst, $dst\t# abs long\n\t"
11095 "subq $dst, $src\n\t"
11096 "cmovlq $dst, $src" %}
11097 ins_encode %{
11098 __ xorl($dst$$Register, $dst$$Register);
11099 __ subq($dst$$Register, $src$$Register);
11100 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11101 %}
11102
11103 ins_pipe(ialu_reg_reg);
11104 %}
11105
11106 //----------Subtraction Instructions-------------------------------------------
11107
11108 // Integer Subtraction Instructions
11109 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11110 %{
11111 predicate(!UseAPX);
11112 match(Set dst (SubI dst src));
11113 effect(KILL cr);
11114 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11115
11116 format %{ "subl $dst, $src\t# int" %}
11117 ins_encode %{
11118 __ subl($dst$$Register, $src$$Register);
11119 %}
11120 ins_pipe(ialu_reg_reg);
11121 %}
11122
11123 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11124 %{
11125 predicate(UseAPX);
11126 match(Set dst (SubI src1 src2));
11127 effect(KILL cr);
11128 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11129
11130 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11131 ins_encode %{
11132 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11133 %}
11134 ins_pipe(ialu_reg_reg);
11135 %}
11136
11137 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11138 %{
11139 predicate(UseAPX);
11140 match(Set dst (SubI src1 src2));
11141 effect(KILL cr);
11142 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11143
11144 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11145 ins_encode %{
11146 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11147 %}
11148 ins_pipe(ialu_reg_reg);
11149 %}
11150
11151 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11152 %{
11153 predicate(UseAPX);
11154 match(Set dst (SubI (LoadI src1) src2));
11155 effect(KILL cr);
11156 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11157
11158 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11159 ins_encode %{
11160 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11161 %}
11162 ins_pipe(ialu_reg_reg);
11163 %}
11164
11165 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11166 %{
11167 predicate(!UseAPX);
11168 match(Set dst (SubI dst (LoadI src)));
11169 effect(KILL cr);
11170 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11171
11172 ins_cost(150);
11173 format %{ "subl $dst, $src\t# int" %}
11174 ins_encode %{
11175 __ subl($dst$$Register, $src$$Address);
11176 %}
11177 ins_pipe(ialu_reg_mem);
11178 %}
11179
11180 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11181 %{
11182 predicate(UseAPX);
11183 match(Set dst (SubI src1 (LoadI src2)));
11184 effect(KILL cr);
11185 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11186
11187 ins_cost(150);
11188 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11189 ins_encode %{
11190 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11191 %}
11192 ins_pipe(ialu_reg_mem);
11193 %}
11194
11195 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11196 %{
11197 predicate(UseAPX);
11198 match(Set dst (SubI (LoadI src1) src2));
11199 effect(KILL cr);
11200 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11201
11202 ins_cost(150);
11203 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11204 ins_encode %{
11205 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11206 %}
11207 ins_pipe(ialu_reg_mem);
11208 %}
11209
11210 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11211 %{
11212 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11213 effect(KILL cr);
11214 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11215
11216 ins_cost(150);
11217 format %{ "subl $dst, $src\t# int" %}
11218 ins_encode %{
11219 __ subl($dst$$Address, $src$$Register);
11220 %}
11221 ins_pipe(ialu_mem_reg);
11222 %}
11223
11224 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11225 %{
11226 predicate(!UseAPX);
11227 match(Set dst (SubL dst src));
11228 effect(KILL cr);
11229 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11230
11231 format %{ "subq $dst, $src\t# long" %}
11232 ins_encode %{
11233 __ subq($dst$$Register, $src$$Register);
11234 %}
11235 ins_pipe(ialu_reg_reg);
11236 %}
11237
11238 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11239 %{
11240 predicate(UseAPX);
11241 match(Set dst (SubL src1 src2));
11242 effect(KILL cr);
11243 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11244
11245 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11246 ins_encode %{
11247 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11248 %}
11249 ins_pipe(ialu_reg_reg);
11250 %}
11251
11252 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11253 %{
11254 predicate(UseAPX);
11255 match(Set dst (SubL src1 src2));
11256 effect(KILL cr);
11257 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11258
11259 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11260 ins_encode %{
11261 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11262 %}
11263 ins_pipe(ialu_reg_reg);
11264 %}
11265
11266 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11267 %{
11268 predicate(UseAPX);
11269 match(Set dst (SubL (LoadL src1) src2));
11270 effect(KILL cr);
11271 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11272
11273 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11274 ins_encode %{
11275 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11276 %}
11277 ins_pipe(ialu_reg_reg);
11278 %}
11279
11280 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11281 %{
11282 predicate(!UseAPX);
11283 match(Set dst (SubL dst (LoadL src)));
11284 effect(KILL cr);
11285 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11286
11287 ins_cost(150);
11288 format %{ "subq $dst, $src\t# long" %}
11289 ins_encode %{
11290 __ subq($dst$$Register, $src$$Address);
11291 %}
11292 ins_pipe(ialu_reg_mem);
11293 %}
11294
11295 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11296 %{
11297 predicate(UseAPX);
11298 match(Set dst (SubL src1 (LoadL src2)));
11299 effect(KILL cr);
11300 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11301
11302 ins_cost(150);
11303 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11304 ins_encode %{
11305 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11306 %}
11307 ins_pipe(ialu_reg_mem);
11308 %}
11309
11310 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11311 %{
11312 predicate(UseAPX);
11313 match(Set dst (SubL (LoadL src1) src2));
11314 effect(KILL cr);
11315 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11316
11317 ins_cost(150);
11318 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11319 ins_encode %{
11320 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11321 %}
11322 ins_pipe(ialu_reg_mem);
11323 %}
11324
11325 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11326 %{
11327 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11328 effect(KILL cr);
11329 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11330
11331 ins_cost(150);
11332 format %{ "subq $dst, $src\t# long" %}
11333 ins_encode %{
11334 __ subq($dst$$Address, $src$$Register);
11335 %}
11336 ins_pipe(ialu_mem_reg);
11337 %}
11338
11339 // Subtract from a pointer
11340 // XXX hmpf???
11341 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11342 %{
11343 match(Set dst (AddP dst (SubI zero src)));
11344 effect(KILL cr);
11345
11346 format %{ "subq $dst, $src\t# ptr - int" %}
11347 ins_encode %{
11348 __ subq($dst$$Register, $src$$Register);
11349 %}
11350 ins_pipe(ialu_reg_reg);
11351 %}
11352
11353 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11354 %{
11355 predicate(!UseAPX);
11356 match(Set dst (SubI zero dst));
11357 effect(KILL cr);
11358 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11359
11360 format %{ "negl $dst\t# int" %}
11361 ins_encode %{
11362 __ negl($dst$$Register);
11363 %}
11364 ins_pipe(ialu_reg);
11365 %}
11366
11367 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11368 %{
11369 predicate(UseAPX);
11370 match(Set dst (SubI zero src));
11371 effect(KILL cr);
11372 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11373
11374 format %{ "enegl $dst, $src\t# int ndd" %}
11375 ins_encode %{
11376 __ enegl($dst$$Register, $src$$Register, false);
11377 %}
11378 ins_pipe(ialu_reg);
11379 %}
11380
11381 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11382 %{
11383 predicate(!UseAPX);
11384 match(Set dst (NegI dst));
11385 effect(KILL cr);
11386 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11387
11388 format %{ "negl $dst\t# int" %}
11389 ins_encode %{
11390 __ negl($dst$$Register);
11391 %}
11392 ins_pipe(ialu_reg);
11393 %}
11394
11395 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11396 %{
11397 predicate(UseAPX);
11398 match(Set dst (NegI src));
11399 effect(KILL cr);
11400 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11401
11402 format %{ "enegl $dst, $src\t# int ndd" %}
11403 ins_encode %{
11404 __ enegl($dst$$Register, $src$$Register, false);
11405 %}
11406 ins_pipe(ialu_reg);
11407 %}
11408
11409 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11410 %{
11411 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11412 effect(KILL cr);
11413 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11414
11415 format %{ "negl $dst\t# int" %}
11416 ins_encode %{
11417 __ negl($dst$$Address);
11418 %}
11419 ins_pipe(ialu_reg);
11420 %}
11421
11422 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11423 %{
11424 predicate(!UseAPX);
11425 match(Set dst (SubL zero dst));
11426 effect(KILL cr);
11427 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11428
11429 format %{ "negq $dst\t# long" %}
11430 ins_encode %{
11431 __ negq($dst$$Register);
11432 %}
11433 ins_pipe(ialu_reg);
11434 %}
11435
11436 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11437 %{
11438 predicate(UseAPX);
11439 match(Set dst (SubL zero src));
11440 effect(KILL cr);
11441 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11442
11443 format %{ "enegq $dst, $src\t# long ndd" %}
11444 ins_encode %{
11445 __ enegq($dst$$Register, $src$$Register, false);
11446 %}
11447 ins_pipe(ialu_reg);
11448 %}
11449
11450 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11451 %{
11452 predicate(!UseAPX);
11453 match(Set dst (NegL dst));
11454 effect(KILL cr);
11455 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11456
11457 format %{ "negq $dst\t# int" %}
11458 ins_encode %{
11459 __ negq($dst$$Register);
11460 %}
11461 ins_pipe(ialu_reg);
11462 %}
11463
11464 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11465 %{
11466 predicate(UseAPX);
11467 match(Set dst (NegL src));
11468 effect(KILL cr);
11469 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11470
11471 format %{ "enegq $dst, $src\t# long ndd" %}
11472 ins_encode %{
11473 __ enegq($dst$$Register, $src$$Register, false);
11474 %}
11475 ins_pipe(ialu_reg);
11476 %}
11477
11478 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11479 %{
11480 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11481 effect(KILL cr);
11482 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11483
11484 format %{ "negq $dst\t# long" %}
11485 ins_encode %{
11486 __ negq($dst$$Address);
11487 %}
11488 ins_pipe(ialu_reg);
11489 %}
11490
11491 //----------Multiplication/Division Instructions-------------------------------
11492 // Integer Multiplication Instructions
11493 // Multiply Register
11494
11495 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11496 %{
11497 predicate(!UseAPX);
11498 match(Set dst (MulI dst src));
11499 effect(KILL cr);
11500
11501 ins_cost(300);
11502 format %{ "imull $dst, $src\t# int" %}
11503 ins_encode %{
11504 __ imull($dst$$Register, $src$$Register);
11505 %}
11506 ins_pipe(ialu_reg_reg_alu0);
11507 %}
11508
11509 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11510 %{
11511 predicate(UseAPX);
11512 match(Set dst (MulI src1 src2));
11513 effect(KILL cr);
11514 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11515
11516 ins_cost(300);
11517 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11518 ins_encode %{
11519 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11520 %}
11521 ins_pipe(ialu_reg_reg_alu0);
11522 %}
11523
11524 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11525 %{
11526 match(Set dst (MulI src imm));
11527 effect(KILL cr);
11528
11529 ins_cost(300);
11530 format %{ "imull $dst, $src, $imm\t# int" %}
11531 ins_encode %{
11532 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11533 %}
11534 ins_pipe(ialu_reg_reg_alu0);
11535 %}
11536
11537 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11538 %{
11539 predicate(!UseAPX);
11540 match(Set dst (MulI dst (LoadI src)));
11541 effect(KILL cr);
11542
11543 ins_cost(350);
11544 format %{ "imull $dst, $src\t# int" %}
11545 ins_encode %{
11546 __ imull($dst$$Register, $src$$Address);
11547 %}
11548 ins_pipe(ialu_reg_mem_alu0);
11549 %}
11550
11551 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11552 %{
11553 predicate(UseAPX);
11554 match(Set dst (MulI src1 (LoadI src2)));
11555 effect(KILL cr);
11556 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11557
11558 ins_cost(350);
11559 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11560 ins_encode %{
11561 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11562 %}
11563 ins_pipe(ialu_reg_mem_alu0);
11564 %}
11565
11566 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11567 %{
11568 match(Set dst (MulI (LoadI src) imm));
11569 effect(KILL cr);
11570
11571 ins_cost(300);
11572 format %{ "imull $dst, $src, $imm\t# int" %}
11573 ins_encode %{
11574 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11575 %}
11576 ins_pipe(ialu_reg_mem_alu0);
11577 %}
11578
11579 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11580 %{
11581 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11582 effect(KILL cr, KILL src2);
11583
11584 expand %{ mulI_rReg(dst, src1, cr);
11585 mulI_rReg(src2, src3, cr);
11586 addI_rReg(dst, src2, cr); %}
11587 %}
11588
11589 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11590 %{
11591 predicate(!UseAPX);
11592 match(Set dst (MulL dst src));
11593 effect(KILL cr);
11594
11595 ins_cost(300);
11596 format %{ "imulq $dst, $src\t# long" %}
11597 ins_encode %{
11598 __ imulq($dst$$Register, $src$$Register);
11599 %}
11600 ins_pipe(ialu_reg_reg_alu0);
11601 %}
11602
11603 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11604 %{
11605 predicate(UseAPX);
11606 match(Set dst (MulL src1 src2));
11607 effect(KILL cr);
11608 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11609
11610 ins_cost(300);
11611 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11612 ins_encode %{
11613 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11614 %}
11615 ins_pipe(ialu_reg_reg_alu0);
11616 %}
11617
11618 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11619 %{
11620 match(Set dst (MulL src imm));
11621 effect(KILL cr);
11622
11623 ins_cost(300);
11624 format %{ "imulq $dst, $src, $imm\t# long" %}
11625 ins_encode %{
11626 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11627 %}
11628 ins_pipe(ialu_reg_reg_alu0);
11629 %}
11630
11631 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11632 %{
11633 predicate(!UseAPX);
11634 match(Set dst (MulL dst (LoadL src)));
11635 effect(KILL cr);
11636
11637 ins_cost(350);
11638 format %{ "imulq $dst, $src\t# long" %}
11639 ins_encode %{
11640 __ imulq($dst$$Register, $src$$Address);
11641 %}
11642 ins_pipe(ialu_reg_mem_alu0);
11643 %}
11644
11645 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11646 %{
11647 predicate(UseAPX);
11648 match(Set dst (MulL src1 (LoadL src2)));
11649 effect(KILL cr);
11650 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11651
11652 ins_cost(350);
11653 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11654 ins_encode %{
11655 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11656 %}
11657 ins_pipe(ialu_reg_mem_alu0);
11658 %}
11659
11660 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11661 %{
11662 match(Set dst (MulL (LoadL src) imm));
11663 effect(KILL cr);
11664
11665 ins_cost(300);
11666 format %{ "imulq $dst, $src, $imm\t# long" %}
11667 ins_encode %{
11668 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11669 %}
11670 ins_pipe(ialu_reg_mem_alu0);
11671 %}
11672
11673 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11674 %{
11675 match(Set dst (MulHiL src rax));
11676 effect(USE_KILL rax, KILL cr);
11677
11678 ins_cost(300);
11679 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11680 ins_encode %{
11681 __ imulq($src$$Register);
11682 %}
11683 ins_pipe(ialu_reg_reg_alu0);
11684 %}
11685
11686 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11687 %{
11688 match(Set dst (UMulHiL src rax));
11689 effect(USE_KILL rax, KILL cr);
11690
11691 ins_cost(300);
11692 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11693 ins_encode %{
11694 __ mulq($src$$Register);
11695 %}
11696 ins_pipe(ialu_reg_reg_alu0);
11697 %}
11698
11699 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11700 rFlagsReg cr)
11701 %{
11702 match(Set rax (DivI rax div));
11703 effect(KILL rdx, KILL cr);
11704
11705 ins_cost(30*100+10*100); // XXX
11706 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11707 "jne,s normal\n\t"
11708 "xorl rdx, rdx\n\t"
11709 "cmpl $div, -1\n\t"
11710 "je,s done\n"
11711 "normal: cdql\n\t"
11712 "idivl $div\n"
11713 "done:" %}
11714 ins_encode(cdql_enc(div));
11715 ins_pipe(ialu_reg_reg_alu0);
11716 %}
11717
11718 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11719 rFlagsReg cr)
11720 %{
11721 match(Set rax (DivL rax div));
11722 effect(KILL rdx, KILL cr);
11723
11724 ins_cost(30*100+10*100); // XXX
11725 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11726 "cmpq rax, rdx\n\t"
11727 "jne,s normal\n\t"
11728 "xorl rdx, rdx\n\t"
11729 "cmpq $div, -1\n\t"
11730 "je,s done\n"
11731 "normal: cdqq\n\t"
11732 "idivq $div\n"
11733 "done:" %}
11734 ins_encode(cdqq_enc(div));
11735 ins_pipe(ialu_reg_reg_alu0);
11736 %}
11737
11738 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11739 %{
11740 match(Set rax (UDivI rax div));
11741 effect(KILL rdx, KILL cr);
11742
11743 ins_cost(300);
11744 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11745 ins_encode %{
11746 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11747 %}
11748 ins_pipe(ialu_reg_reg_alu0);
11749 %}
11750
11751 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11752 %{
11753 match(Set rax (UDivL rax div));
11754 effect(KILL rdx, KILL cr);
11755
11756 ins_cost(300);
11757 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11758 ins_encode %{
11759 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11760 %}
11761 ins_pipe(ialu_reg_reg_alu0);
11762 %}
11763
11764 // Integer DIVMOD with Register, both quotient and mod results
11765 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11766 rFlagsReg cr)
11767 %{
11768 match(DivModI rax div);
11769 effect(KILL cr);
11770
11771 ins_cost(30*100+10*100); // XXX
11772 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11773 "jne,s normal\n\t"
11774 "xorl rdx, rdx\n\t"
11775 "cmpl $div, -1\n\t"
11776 "je,s done\n"
11777 "normal: cdql\n\t"
11778 "idivl $div\n"
11779 "done:" %}
11780 ins_encode(cdql_enc(div));
11781 ins_pipe(pipe_slow);
11782 %}
11783
11784 // Long DIVMOD with Register, both quotient and mod results
11785 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11786 rFlagsReg cr)
11787 %{
11788 match(DivModL rax div);
11789 effect(KILL cr);
11790
11791 ins_cost(30*100+10*100); // XXX
11792 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11793 "cmpq rax, rdx\n\t"
11794 "jne,s normal\n\t"
11795 "xorl rdx, rdx\n\t"
11796 "cmpq $div, -1\n\t"
11797 "je,s done\n"
11798 "normal: cdqq\n\t"
11799 "idivq $div\n"
11800 "done:" %}
11801 ins_encode(cdqq_enc(div));
11802 ins_pipe(pipe_slow);
11803 %}
11804
11805 // Unsigned integer DIVMOD with Register, both quotient and mod results
11806 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11807 no_rax_rdx_RegI div, rFlagsReg cr)
11808 %{
11809 match(UDivModI rax div);
11810 effect(TEMP tmp, KILL cr);
11811
11812 ins_cost(300);
11813 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11814 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11815 %}
11816 ins_encode %{
11817 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11818 %}
11819 ins_pipe(pipe_slow);
11820 %}
11821
11822 // Unsigned long DIVMOD with Register, both quotient and mod results
11823 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11824 no_rax_rdx_RegL div, rFlagsReg cr)
11825 %{
11826 match(UDivModL rax div);
11827 effect(TEMP tmp, KILL cr);
11828
11829 ins_cost(300);
11830 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11831 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11832 %}
11833 ins_encode %{
11834 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11835 %}
11836 ins_pipe(pipe_slow);
11837 %}
11838
11839 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11840 rFlagsReg cr)
11841 %{
11842 match(Set rdx (ModI rax div));
11843 effect(KILL rax, KILL cr);
11844
11845 ins_cost(300); // XXX
11846 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11847 "jne,s normal\n\t"
11848 "xorl rdx, rdx\n\t"
11849 "cmpl $div, -1\n\t"
11850 "je,s done\n"
11851 "normal: cdql\n\t"
11852 "idivl $div\n"
11853 "done:" %}
11854 ins_encode(cdql_enc(div));
11855 ins_pipe(ialu_reg_reg_alu0);
11856 %}
11857
11858 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11859 rFlagsReg cr)
11860 %{
11861 match(Set rdx (ModL rax div));
11862 effect(KILL rax, KILL cr);
11863
11864 ins_cost(300); // XXX
11865 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11866 "cmpq rax, rdx\n\t"
11867 "jne,s normal\n\t"
11868 "xorl rdx, rdx\n\t"
11869 "cmpq $div, -1\n\t"
11870 "je,s done\n"
11871 "normal: cdqq\n\t"
11872 "idivq $div\n"
11873 "done:" %}
11874 ins_encode(cdqq_enc(div));
11875 ins_pipe(ialu_reg_reg_alu0);
11876 %}
11877
11878 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11879 %{
11880 match(Set rdx (UModI rax div));
11881 effect(KILL rax, KILL cr);
11882
11883 ins_cost(300);
11884 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11885 ins_encode %{
11886 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11887 %}
11888 ins_pipe(ialu_reg_reg_alu0);
11889 %}
11890
11891 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11892 %{
11893 match(Set rdx (UModL rax div));
11894 effect(KILL rax, KILL cr);
11895
11896 ins_cost(300);
11897 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11898 ins_encode %{
11899 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11900 %}
11901 ins_pipe(ialu_reg_reg_alu0);
11902 %}
11903
11904 // Integer Shift Instructions
11905 // Shift Left by one, two, three
11906 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11907 %{
11908 predicate(!UseAPX);
11909 match(Set dst (LShiftI dst shift));
11910 effect(KILL cr);
11911
11912 format %{ "sall $dst, $shift" %}
11913 ins_encode %{
11914 __ sall($dst$$Register, $shift$$constant);
11915 %}
11916 ins_pipe(ialu_reg);
11917 %}
11918
11919 // Shift Left by one, two, three
11920 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11921 %{
11922 predicate(UseAPX);
11923 match(Set dst (LShiftI src shift));
11924 effect(KILL cr);
11925 flag(PD::Flag_ndd_demotable_opr1);
11926
11927 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11928 ins_encode %{
11929 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11930 %}
11931 ins_pipe(ialu_reg);
11932 %}
11933
11934 // Shift Left by 8-bit immediate
11935 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11936 %{
11937 predicate(!UseAPX);
11938 match(Set dst (LShiftI dst shift));
11939 effect(KILL cr);
11940
11941 format %{ "sall $dst, $shift" %}
11942 ins_encode %{
11943 __ sall($dst$$Register, $shift$$constant);
11944 %}
11945 ins_pipe(ialu_reg);
11946 %}
11947
11948 // Shift Left by 8-bit immediate
11949 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11950 %{
11951 predicate(UseAPX);
11952 match(Set dst (LShiftI src shift));
11953 effect(KILL cr);
11954 flag(PD::Flag_ndd_demotable_opr1);
11955
11956 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11957 ins_encode %{
11958 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11959 %}
11960 ins_pipe(ialu_reg);
11961 %}
11962
11963 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11964 %{
11965 predicate(UseAPX);
11966 match(Set dst (LShiftI (LoadI src) shift));
11967 effect(KILL cr);
11968
11969 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11970 ins_encode %{
11971 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11972 %}
11973 ins_pipe(ialu_reg);
11974 %}
11975
11976 // Shift Left by 8-bit immediate
11977 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11978 %{
11979 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11980 effect(KILL cr);
11981
11982 format %{ "sall $dst, $shift" %}
11983 ins_encode %{
11984 __ sall($dst$$Address, $shift$$constant);
11985 %}
11986 ins_pipe(ialu_mem_imm);
11987 %}
11988
11989 // Shift Left by variable
11990 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11991 %{
11992 predicate(!VM_Version::supports_bmi2());
11993 match(Set dst (LShiftI dst shift));
11994 effect(KILL cr);
11995
11996 format %{ "sall $dst, $shift" %}
11997 ins_encode %{
11998 __ sall($dst$$Register);
11999 %}
12000 ins_pipe(ialu_reg_reg);
12001 %}
12002
12003 // Shift Left by variable
12004 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12005 %{
12006 predicate(!VM_Version::supports_bmi2());
12007 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12008 effect(KILL cr);
12009
12010 format %{ "sall $dst, $shift" %}
12011 ins_encode %{
12012 __ sall($dst$$Address);
12013 %}
12014 ins_pipe(ialu_mem_reg);
12015 %}
12016
12017 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12018 %{
12019 predicate(VM_Version::supports_bmi2());
12020 match(Set dst (LShiftI src shift));
12021
12022 format %{ "shlxl $dst, $src, $shift" %}
12023 ins_encode %{
12024 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12025 %}
12026 ins_pipe(ialu_reg_reg);
12027 %}
12028
12029 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12030 %{
12031 predicate(VM_Version::supports_bmi2());
12032 match(Set dst (LShiftI (LoadI src) shift));
12033 ins_cost(175);
12034 format %{ "shlxl $dst, $src, $shift" %}
12035 ins_encode %{
12036 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12037 %}
12038 ins_pipe(ialu_reg_mem);
12039 %}
12040
12041 // Arithmetic Shift Right by 8-bit immediate
12042 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12043 %{
12044 predicate(!UseAPX);
12045 match(Set dst (RShiftI dst shift));
12046 effect(KILL cr);
12047
12048 format %{ "sarl $dst, $shift" %}
12049 ins_encode %{
12050 __ sarl($dst$$Register, $shift$$constant);
12051 %}
12052 ins_pipe(ialu_mem_imm);
12053 %}
12054
12055 // Arithmetic Shift Right by 8-bit immediate
12056 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12057 %{
12058 predicate(UseAPX);
12059 match(Set dst (RShiftI src shift));
12060 effect(KILL cr);
12061 flag(PD::Flag_ndd_demotable_opr1);
12062
12063 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12064 ins_encode %{
12065 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12066 %}
12067 ins_pipe(ialu_mem_imm);
12068 %}
12069
12070 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12071 %{
12072 predicate(UseAPX);
12073 match(Set dst (RShiftI (LoadI src) shift));
12074 effect(KILL cr);
12075
12076 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12077 ins_encode %{
12078 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12079 %}
12080 ins_pipe(ialu_mem_imm);
12081 %}
12082
12083 // Arithmetic Shift Right by 8-bit immediate
12084 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12085 %{
12086 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12087 effect(KILL cr);
12088
12089 format %{ "sarl $dst, $shift" %}
12090 ins_encode %{
12091 __ sarl($dst$$Address, $shift$$constant);
12092 %}
12093 ins_pipe(ialu_mem_imm);
12094 %}
12095
12096 // Arithmetic Shift Right by variable
12097 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12098 %{
12099 predicate(!VM_Version::supports_bmi2());
12100 match(Set dst (RShiftI dst shift));
12101 effect(KILL cr);
12102
12103 format %{ "sarl $dst, $shift" %}
12104 ins_encode %{
12105 __ sarl($dst$$Register);
12106 %}
12107 ins_pipe(ialu_reg_reg);
12108 %}
12109
12110 // Arithmetic Shift Right by variable
12111 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12112 %{
12113 predicate(!VM_Version::supports_bmi2());
12114 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12115 effect(KILL cr);
12116
12117 format %{ "sarl $dst, $shift" %}
12118 ins_encode %{
12119 __ sarl($dst$$Address);
12120 %}
12121 ins_pipe(ialu_mem_reg);
12122 %}
12123
12124 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12125 %{
12126 predicate(VM_Version::supports_bmi2());
12127 match(Set dst (RShiftI src shift));
12128
12129 format %{ "sarxl $dst, $src, $shift" %}
12130 ins_encode %{
12131 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12132 %}
12133 ins_pipe(ialu_reg_reg);
12134 %}
12135
12136 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12137 %{
12138 predicate(VM_Version::supports_bmi2());
12139 match(Set dst (RShiftI (LoadI src) shift));
12140 ins_cost(175);
12141 format %{ "sarxl $dst, $src, $shift" %}
12142 ins_encode %{
12143 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12144 %}
12145 ins_pipe(ialu_reg_mem);
12146 %}
12147
12148 // Logical Shift Right by 8-bit immediate
12149 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12150 %{
12151 predicate(!UseAPX);
12152 match(Set dst (URShiftI dst shift));
12153 effect(KILL cr);
12154
12155 format %{ "shrl $dst, $shift" %}
12156 ins_encode %{
12157 __ shrl($dst$$Register, $shift$$constant);
12158 %}
12159 ins_pipe(ialu_reg);
12160 %}
12161
12162 // Logical Shift Right by 8-bit immediate
12163 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12164 %{
12165 predicate(UseAPX);
12166 match(Set dst (URShiftI src shift));
12167 effect(KILL cr);
12168 flag(PD::Flag_ndd_demotable_opr1);
12169
12170 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12171 ins_encode %{
12172 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12173 %}
12174 ins_pipe(ialu_reg);
12175 %}
12176
12177 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12178 %{
12179 predicate(UseAPX);
12180 match(Set dst (URShiftI (LoadI src) shift));
12181 effect(KILL cr);
12182
12183 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12184 ins_encode %{
12185 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12186 %}
12187 ins_pipe(ialu_reg);
12188 %}
12189
12190 // Logical Shift Right by 8-bit immediate
12191 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12192 %{
12193 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12194 effect(KILL cr);
12195
12196 format %{ "shrl $dst, $shift" %}
12197 ins_encode %{
12198 __ shrl($dst$$Address, $shift$$constant);
12199 %}
12200 ins_pipe(ialu_mem_imm);
12201 %}
12202
12203 // Logical Shift Right by variable
12204 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12205 %{
12206 predicate(!VM_Version::supports_bmi2());
12207 match(Set dst (URShiftI dst shift));
12208 effect(KILL cr);
12209
12210 format %{ "shrl $dst, $shift" %}
12211 ins_encode %{
12212 __ shrl($dst$$Register);
12213 %}
12214 ins_pipe(ialu_reg_reg);
12215 %}
12216
12217 // Logical Shift Right by variable
12218 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12219 %{
12220 predicate(!VM_Version::supports_bmi2());
12221 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12222 effect(KILL cr);
12223
12224 format %{ "shrl $dst, $shift" %}
12225 ins_encode %{
12226 __ shrl($dst$$Address);
12227 %}
12228 ins_pipe(ialu_mem_reg);
12229 %}
12230
12231 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12232 %{
12233 predicate(VM_Version::supports_bmi2());
12234 match(Set dst (URShiftI src shift));
12235
12236 format %{ "shrxl $dst, $src, $shift" %}
12237 ins_encode %{
12238 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12239 %}
12240 ins_pipe(ialu_reg_reg);
12241 %}
12242
12243 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12244 %{
12245 predicate(VM_Version::supports_bmi2());
12246 match(Set dst (URShiftI (LoadI src) shift));
12247 ins_cost(175);
12248 format %{ "shrxl $dst, $src, $shift" %}
12249 ins_encode %{
12250 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12251 %}
12252 ins_pipe(ialu_reg_mem);
12253 %}
12254
12255 // Long Shift Instructions
12256 // Shift Left by one, two, three
12257 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12258 %{
12259 predicate(!UseAPX);
12260 match(Set dst (LShiftL dst shift));
12261 effect(KILL cr);
12262
12263 format %{ "salq $dst, $shift" %}
12264 ins_encode %{
12265 __ salq($dst$$Register, $shift$$constant);
12266 %}
12267 ins_pipe(ialu_reg);
12268 %}
12269
12270 // Shift Left by one, two, three
12271 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12272 %{
12273 predicate(UseAPX);
12274 match(Set dst (LShiftL src shift));
12275 effect(KILL cr);
12276 flag(PD::Flag_ndd_demotable_opr1);
12277
12278 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12279 ins_encode %{
12280 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12281 %}
12282 ins_pipe(ialu_reg);
12283 %}
12284
12285 // Shift Left by 8-bit immediate
12286 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12287 %{
12288 predicate(!UseAPX);
12289 match(Set dst (LShiftL dst shift));
12290 effect(KILL cr);
12291
12292 format %{ "salq $dst, $shift" %}
12293 ins_encode %{
12294 __ salq($dst$$Register, $shift$$constant);
12295 %}
12296 ins_pipe(ialu_reg);
12297 %}
12298
12299 // Shift Left by 8-bit immediate
12300 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12301 %{
12302 predicate(UseAPX);
12303 match(Set dst (LShiftL src shift));
12304 effect(KILL cr);
12305 flag(PD::Flag_ndd_demotable_opr1);
12306
12307 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12308 ins_encode %{
12309 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12310 %}
12311 ins_pipe(ialu_reg);
12312 %}
12313
12314 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12315 %{
12316 predicate(UseAPX);
12317 match(Set dst (LShiftL (LoadL src) shift));
12318 effect(KILL cr);
12319
12320 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12321 ins_encode %{
12322 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12323 %}
12324 ins_pipe(ialu_reg);
12325 %}
12326
12327 // Shift Left by 8-bit immediate
12328 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12329 %{
12330 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12331 effect(KILL cr);
12332
12333 format %{ "salq $dst, $shift" %}
12334 ins_encode %{
12335 __ salq($dst$$Address, $shift$$constant);
12336 %}
12337 ins_pipe(ialu_mem_imm);
12338 %}
12339
12340 // Shift Left by variable
12341 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12342 %{
12343 predicate(!VM_Version::supports_bmi2());
12344 match(Set dst (LShiftL dst shift));
12345 effect(KILL cr);
12346
12347 format %{ "salq $dst, $shift" %}
12348 ins_encode %{
12349 __ salq($dst$$Register);
12350 %}
12351 ins_pipe(ialu_reg_reg);
12352 %}
12353
12354 // Shift Left by variable
12355 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12356 %{
12357 predicate(!VM_Version::supports_bmi2());
12358 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12359 effect(KILL cr);
12360
12361 format %{ "salq $dst, $shift" %}
12362 ins_encode %{
12363 __ salq($dst$$Address);
12364 %}
12365 ins_pipe(ialu_mem_reg);
12366 %}
12367
12368 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12369 %{
12370 predicate(VM_Version::supports_bmi2());
12371 match(Set dst (LShiftL src shift));
12372
12373 format %{ "shlxq $dst, $src, $shift" %}
12374 ins_encode %{
12375 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12376 %}
12377 ins_pipe(ialu_reg_reg);
12378 %}
12379
12380 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12381 %{
12382 predicate(VM_Version::supports_bmi2());
12383 match(Set dst (LShiftL (LoadL src) shift));
12384 ins_cost(175);
12385 format %{ "shlxq $dst, $src, $shift" %}
12386 ins_encode %{
12387 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12388 %}
12389 ins_pipe(ialu_reg_mem);
12390 %}
12391
12392 // Arithmetic Shift Right by 8-bit immediate
12393 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12394 %{
12395 predicate(!UseAPX);
12396 match(Set dst (RShiftL dst shift));
12397 effect(KILL cr);
12398
12399 format %{ "sarq $dst, $shift" %}
12400 ins_encode %{
12401 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12402 %}
12403 ins_pipe(ialu_mem_imm);
12404 %}
12405
12406 // Arithmetic Shift Right by 8-bit immediate
12407 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12408 %{
12409 predicate(UseAPX);
12410 match(Set dst (RShiftL src shift));
12411 effect(KILL cr);
12412 flag(PD::Flag_ndd_demotable_opr1);
12413
12414 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12415 ins_encode %{
12416 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12417 %}
12418 ins_pipe(ialu_mem_imm);
12419 %}
12420
12421 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12422 %{
12423 predicate(UseAPX);
12424 match(Set dst (RShiftL (LoadL src) shift));
12425 effect(KILL cr);
12426
12427 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12428 ins_encode %{
12429 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12430 %}
12431 ins_pipe(ialu_mem_imm);
12432 %}
12433
12434 // Arithmetic Shift Right by 8-bit immediate
12435 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12436 %{
12437 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12438 effect(KILL cr);
12439
12440 format %{ "sarq $dst, $shift" %}
12441 ins_encode %{
12442 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12443 %}
12444 ins_pipe(ialu_mem_imm);
12445 %}
12446
12447 // Arithmetic Shift Right by variable
12448 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12449 %{
12450 predicate(!VM_Version::supports_bmi2());
12451 match(Set dst (RShiftL dst shift));
12452 effect(KILL cr);
12453
12454 format %{ "sarq $dst, $shift" %}
12455 ins_encode %{
12456 __ sarq($dst$$Register);
12457 %}
12458 ins_pipe(ialu_reg_reg);
12459 %}
12460
12461 // Arithmetic Shift Right by variable
12462 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12463 %{
12464 predicate(!VM_Version::supports_bmi2());
12465 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12466 effect(KILL cr);
12467
12468 format %{ "sarq $dst, $shift" %}
12469 ins_encode %{
12470 __ sarq($dst$$Address);
12471 %}
12472 ins_pipe(ialu_mem_reg);
12473 %}
12474
12475 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12476 %{
12477 predicate(VM_Version::supports_bmi2());
12478 match(Set dst (RShiftL src shift));
12479
12480 format %{ "sarxq $dst, $src, $shift" %}
12481 ins_encode %{
12482 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12483 %}
12484 ins_pipe(ialu_reg_reg);
12485 %}
12486
12487 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12488 %{
12489 predicate(VM_Version::supports_bmi2());
12490 match(Set dst (RShiftL (LoadL src) shift));
12491 ins_cost(175);
12492 format %{ "sarxq $dst, $src, $shift" %}
12493 ins_encode %{
12494 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12495 %}
12496 ins_pipe(ialu_reg_mem);
12497 %}
12498
12499 // Logical Shift Right by 8-bit immediate
12500 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12501 %{
12502 predicate(!UseAPX);
12503 match(Set dst (URShiftL dst shift));
12504 effect(KILL cr);
12505
12506 format %{ "shrq $dst, $shift" %}
12507 ins_encode %{
12508 __ shrq($dst$$Register, $shift$$constant);
12509 %}
12510 ins_pipe(ialu_reg);
12511 %}
12512
12513 // Logical Shift Right by 8-bit immediate
12514 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12515 %{
12516 predicate(UseAPX);
12517 match(Set dst (URShiftL src shift));
12518 effect(KILL cr);
12519 flag(PD::Flag_ndd_demotable_opr1);
12520
12521 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12522 ins_encode %{
12523 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12524 %}
12525 ins_pipe(ialu_reg);
12526 %}
12527
12528 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12529 %{
12530 predicate(UseAPX);
12531 match(Set dst (URShiftL (LoadL src) shift));
12532 effect(KILL cr);
12533
12534 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12535 ins_encode %{
12536 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12537 %}
12538 ins_pipe(ialu_reg);
12539 %}
12540
12541 // Logical Shift Right by 8-bit immediate
12542 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12543 %{
12544 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12545 effect(KILL cr);
12546
12547 format %{ "shrq $dst, $shift" %}
12548 ins_encode %{
12549 __ shrq($dst$$Address, $shift$$constant);
12550 %}
12551 ins_pipe(ialu_mem_imm);
12552 %}
12553
12554 // Logical Shift Right by variable
12555 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12556 %{
12557 predicate(!VM_Version::supports_bmi2());
12558 match(Set dst (URShiftL dst shift));
12559 effect(KILL cr);
12560
12561 format %{ "shrq $dst, $shift" %}
12562 ins_encode %{
12563 __ shrq($dst$$Register);
12564 %}
12565 ins_pipe(ialu_reg_reg);
12566 %}
12567
12568 // Logical Shift Right by variable
12569 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12570 %{
12571 predicate(!VM_Version::supports_bmi2());
12572 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12573 effect(KILL cr);
12574
12575 format %{ "shrq $dst, $shift" %}
12576 ins_encode %{
12577 __ shrq($dst$$Address);
12578 %}
12579 ins_pipe(ialu_mem_reg);
12580 %}
12581
12582 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12583 %{
12584 predicate(VM_Version::supports_bmi2());
12585 match(Set dst (URShiftL src shift));
12586
12587 format %{ "shrxq $dst, $src, $shift" %}
12588 ins_encode %{
12589 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12590 %}
12591 ins_pipe(ialu_reg_reg);
12592 %}
12593
12594 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12595 %{
12596 predicate(VM_Version::supports_bmi2());
12597 match(Set dst (URShiftL (LoadL src) shift));
12598 ins_cost(175);
12599 format %{ "shrxq $dst, $src, $shift" %}
12600 ins_encode %{
12601 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12602 %}
12603 ins_pipe(ialu_reg_mem);
12604 %}
12605
12606 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12607 // This idiom is used by the compiler for the i2b bytecode.
12608 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12609 %{
12610 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12611
12612 format %{ "movsbl $dst, $src\t# i2b" %}
12613 ins_encode %{
12614 __ movsbl($dst$$Register, $src$$Register);
12615 %}
12616 ins_pipe(ialu_reg_reg);
12617 %}
12618
12619 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12620 // This idiom is used by the compiler the i2s bytecode.
12621 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12622 %{
12623 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12624
12625 format %{ "movswl $dst, $src\t# i2s" %}
12626 ins_encode %{
12627 __ movswl($dst$$Register, $src$$Register);
12628 %}
12629 ins_pipe(ialu_reg_reg);
12630 %}
12631
12632 // ROL/ROR instructions
12633
12634 // Rotate left by constant.
12635 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12636 %{
12637 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12638 match(Set dst (RotateLeft dst shift));
12639 effect(KILL cr);
12640 format %{ "roll $dst, $shift" %}
12641 ins_encode %{
12642 __ roll($dst$$Register, $shift$$constant);
12643 %}
12644 ins_pipe(ialu_reg);
12645 %}
12646
12647 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12648 %{
12649 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12650 match(Set dst (RotateLeft src shift));
12651 format %{ "rolxl $dst, $src, $shift" %}
12652 ins_encode %{
12653 int shift = 32 - ($shift$$constant & 31);
12654 __ rorxl($dst$$Register, $src$$Register, shift);
12655 %}
12656 ins_pipe(ialu_reg_reg);
12657 %}
12658
12659 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12660 %{
12661 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12662 match(Set dst (RotateLeft (LoadI src) shift));
12663 ins_cost(175);
12664 format %{ "rolxl $dst, $src, $shift" %}
12665 ins_encode %{
12666 int shift = 32 - ($shift$$constant & 31);
12667 __ rorxl($dst$$Register, $src$$Address, shift);
12668 %}
12669 ins_pipe(ialu_reg_mem);
12670 %}
12671
12672 // Rotate Left by variable
12673 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12674 %{
12675 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12676 match(Set dst (RotateLeft dst shift));
12677 effect(KILL cr);
12678 format %{ "roll $dst, $shift" %}
12679 ins_encode %{
12680 __ roll($dst$$Register);
12681 %}
12682 ins_pipe(ialu_reg_reg);
12683 %}
12684
12685 // Rotate Left by variable
12686 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12687 %{
12688 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12689 match(Set dst (RotateLeft src shift));
12690 effect(KILL cr);
12691 flag(PD::Flag_ndd_demotable_opr1);
12692
12693 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12694 ins_encode %{
12695 __ eroll($dst$$Register, $src$$Register, false);
12696 %}
12697 ins_pipe(ialu_reg_reg);
12698 %}
12699
12700 // Rotate Right by constant.
12701 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12702 %{
12703 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12704 match(Set dst (RotateRight dst shift));
12705 effect(KILL cr);
12706 format %{ "rorl $dst, $shift" %}
12707 ins_encode %{
12708 __ rorl($dst$$Register, $shift$$constant);
12709 %}
12710 ins_pipe(ialu_reg);
12711 %}
12712
12713 // Rotate Right by constant.
12714 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12715 %{
12716 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12717 match(Set dst (RotateRight src shift));
12718 format %{ "rorxl $dst, $src, $shift" %}
12719 ins_encode %{
12720 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12721 %}
12722 ins_pipe(ialu_reg_reg);
12723 %}
12724
12725 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12726 %{
12727 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12728 match(Set dst (RotateRight (LoadI src) shift));
12729 ins_cost(175);
12730 format %{ "rorxl $dst, $src, $shift" %}
12731 ins_encode %{
12732 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12733 %}
12734 ins_pipe(ialu_reg_mem);
12735 %}
12736
12737 // Rotate Right by variable
12738 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12739 %{
12740 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12741 match(Set dst (RotateRight dst shift));
12742 effect(KILL cr);
12743 format %{ "rorl $dst, $shift" %}
12744 ins_encode %{
12745 __ rorl($dst$$Register);
12746 %}
12747 ins_pipe(ialu_reg_reg);
12748 %}
12749
12750 // Rotate Right by variable
12751 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12752 %{
12753 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12754 match(Set dst (RotateRight src shift));
12755 effect(KILL cr);
12756 flag(PD::Flag_ndd_demotable_opr1);
12757
12758 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12759 ins_encode %{
12760 __ erorl($dst$$Register, $src$$Register, false);
12761 %}
12762 ins_pipe(ialu_reg_reg);
12763 %}
12764
12765 // Rotate Left by constant.
12766 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12767 %{
12768 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12769 match(Set dst (RotateLeft dst shift));
12770 effect(KILL cr);
12771 format %{ "rolq $dst, $shift" %}
12772 ins_encode %{
12773 __ rolq($dst$$Register, $shift$$constant);
12774 %}
12775 ins_pipe(ialu_reg);
12776 %}
12777
12778 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12779 %{
12780 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12781 match(Set dst (RotateLeft src shift));
12782 format %{ "rolxq $dst, $src, $shift" %}
12783 ins_encode %{
12784 int shift = 64 - ($shift$$constant & 63);
12785 __ rorxq($dst$$Register, $src$$Register, shift);
12786 %}
12787 ins_pipe(ialu_reg_reg);
12788 %}
12789
12790 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12791 %{
12792 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12793 match(Set dst (RotateLeft (LoadL src) shift));
12794 ins_cost(175);
12795 format %{ "rolxq $dst, $src, $shift" %}
12796 ins_encode %{
12797 int shift = 64 - ($shift$$constant & 63);
12798 __ rorxq($dst$$Register, $src$$Address, shift);
12799 %}
12800 ins_pipe(ialu_reg_mem);
12801 %}
12802
12803 // Rotate Left by variable
12804 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12805 %{
12806 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12807 match(Set dst (RotateLeft dst shift));
12808 effect(KILL cr);
12809
12810 format %{ "rolq $dst, $shift" %}
12811 ins_encode %{
12812 __ rolq($dst$$Register);
12813 %}
12814 ins_pipe(ialu_reg_reg);
12815 %}
12816
12817 // Rotate Left by variable
12818 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12819 %{
12820 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12821 match(Set dst (RotateLeft src shift));
12822 effect(KILL cr);
12823 flag(PD::Flag_ndd_demotable_opr1);
12824
12825 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12826 ins_encode %{
12827 __ erolq($dst$$Register, $src$$Register, false);
12828 %}
12829 ins_pipe(ialu_reg_reg);
12830 %}
12831
12832 // Rotate Right by constant.
12833 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12834 %{
12835 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12836 match(Set dst (RotateRight dst shift));
12837 effect(KILL cr);
12838 format %{ "rorq $dst, $shift" %}
12839 ins_encode %{
12840 __ rorq($dst$$Register, $shift$$constant);
12841 %}
12842 ins_pipe(ialu_reg);
12843 %}
12844
12845 // Rotate Right by constant
12846 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12847 %{
12848 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12849 match(Set dst (RotateRight src shift));
12850 format %{ "rorxq $dst, $src, $shift" %}
12851 ins_encode %{
12852 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12853 %}
12854 ins_pipe(ialu_reg_reg);
12855 %}
12856
12857 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12858 %{
12859 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12860 match(Set dst (RotateRight (LoadL src) shift));
12861 ins_cost(175);
12862 format %{ "rorxq $dst, $src, $shift" %}
12863 ins_encode %{
12864 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12865 %}
12866 ins_pipe(ialu_reg_mem);
12867 %}
12868
12869 // Rotate Right by variable
12870 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12871 %{
12872 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12873 match(Set dst (RotateRight dst shift));
12874 effect(KILL cr);
12875 format %{ "rorq $dst, $shift" %}
12876 ins_encode %{
12877 __ rorq($dst$$Register);
12878 %}
12879 ins_pipe(ialu_reg_reg);
12880 %}
12881
12882 // Rotate Right by variable
12883 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12884 %{
12885 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12886 match(Set dst (RotateRight src shift));
12887 effect(KILL cr);
12888 flag(PD::Flag_ndd_demotable_opr1);
12889
12890 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12891 ins_encode %{
12892 __ erorq($dst$$Register, $src$$Register, false);
12893 %}
12894 ins_pipe(ialu_reg_reg);
12895 %}
12896
12897 //----------------------------- CompressBits/ExpandBits ------------------------
12898
12899 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12900 predicate(n->bottom_type()->isa_long());
12901 match(Set dst (CompressBits src mask));
12902 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12903 ins_encode %{
12904 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12905 %}
12906 ins_pipe( pipe_slow );
12907 %}
12908
12909 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12910 predicate(n->bottom_type()->isa_long());
12911 match(Set dst (ExpandBits src mask));
12912 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12913 ins_encode %{
12914 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12915 %}
12916 ins_pipe( pipe_slow );
12917 %}
12918
12919 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12920 predicate(n->bottom_type()->isa_long());
12921 match(Set dst (CompressBits src (LoadL mask)));
12922 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12923 ins_encode %{
12924 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12925 %}
12926 ins_pipe( pipe_slow );
12927 %}
12928
12929 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12930 predicate(n->bottom_type()->isa_long());
12931 match(Set dst (ExpandBits src (LoadL mask)));
12932 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12933 ins_encode %{
12934 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12935 %}
12936 ins_pipe( pipe_slow );
12937 %}
12938
12939
12940 // Logical Instructions
12941
12942 // Integer Logical Instructions
12943
12944 // And Instructions
12945 // And Register with Register
12946 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12947 %{
12948 predicate(!UseAPX);
12949 match(Set dst (AndI dst src));
12950 effect(KILL cr);
12951 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12952
12953 format %{ "andl $dst, $src\t# int" %}
12954 ins_encode %{
12955 __ andl($dst$$Register, $src$$Register);
12956 %}
12957 ins_pipe(ialu_reg_reg);
12958 %}
12959
12960 // And Register with Register using New Data Destination (NDD)
12961 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12962 %{
12963 predicate(UseAPX);
12964 match(Set dst (AndI src1 src2));
12965 effect(KILL cr);
12966 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12967
12968 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12969 ins_encode %{
12970 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12971
12972 %}
12973 ins_pipe(ialu_reg_reg);
12974 %}
12975
12976 // And Register with Immediate 255
12977 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12978 %{
12979 match(Set dst (AndI src mask));
12980
12981 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12982 ins_encode %{
12983 __ movzbl($dst$$Register, $src$$Register);
12984 %}
12985 ins_pipe(ialu_reg);
12986 %}
12987
12988 // And Register with Immediate 255 and promote to long
12989 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12990 %{
12991 match(Set dst (ConvI2L (AndI src mask)));
12992
12993 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12994 ins_encode %{
12995 __ movzbl($dst$$Register, $src$$Register);
12996 %}
12997 ins_pipe(ialu_reg);
12998 %}
12999
13000 // And Register with Immediate 65535
13001 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13002 %{
13003 match(Set dst (AndI src mask));
13004
13005 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13006 ins_encode %{
13007 __ movzwl($dst$$Register, $src$$Register);
13008 %}
13009 ins_pipe(ialu_reg);
13010 %}
13011
13012 // And Register with Immediate 65535 and promote to long
13013 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13014 %{
13015 match(Set dst (ConvI2L (AndI src mask)));
13016
13017 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13018 ins_encode %{
13019 __ movzwl($dst$$Register, $src$$Register);
13020 %}
13021 ins_pipe(ialu_reg);
13022 %}
13023
13024 // Can skip int2long conversions after AND with small bitmask
13025 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13026 %{
13027 predicate(VM_Version::supports_bmi2());
13028 ins_cost(125);
13029 effect(TEMP tmp, KILL cr);
13030 match(Set dst (ConvI2L (AndI src mask)));
13031 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13032 ins_encode %{
13033 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13034 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13035 %}
13036 ins_pipe(ialu_reg_reg);
13037 %}
13038
13039 // And Register with Immediate
13040 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13041 %{
13042 predicate(!UseAPX);
13043 match(Set dst (AndI dst src));
13044 effect(KILL cr);
13045 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13046
13047 format %{ "andl $dst, $src\t# int" %}
13048 ins_encode %{
13049 __ andl($dst$$Register, $src$$constant);
13050 %}
13051 ins_pipe(ialu_reg);
13052 %}
13053
13054 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13055 %{
13056 predicate(UseAPX);
13057 match(Set dst (AndI src1 src2));
13058 effect(KILL cr);
13059 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13060
13061 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13062 ins_encode %{
13063 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13064 %}
13065 ins_pipe(ialu_reg);
13066 %}
13067
13068 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13069 %{
13070 predicate(UseAPX);
13071 match(Set dst (AndI (LoadI src1) src2));
13072 effect(KILL cr);
13073 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13074
13075 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13076 ins_encode %{
13077 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13078 %}
13079 ins_pipe(ialu_reg);
13080 %}
13081
13082 // And Register with Memory
13083 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13084 %{
13085 predicate(!UseAPX);
13086 match(Set dst (AndI dst (LoadI src)));
13087 effect(KILL cr);
13088 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13089
13090 ins_cost(150);
13091 format %{ "andl $dst, $src\t# int" %}
13092 ins_encode %{
13093 __ andl($dst$$Register, $src$$Address);
13094 %}
13095 ins_pipe(ialu_reg_mem);
13096 %}
13097
13098 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13099 %{
13100 predicate(UseAPX);
13101 match(Set dst (AndI src1 (LoadI src2)));
13102 effect(KILL cr);
13103 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13104
13105 ins_cost(150);
13106 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13107 ins_encode %{
13108 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13109 %}
13110 ins_pipe(ialu_reg_mem);
13111 %}
13112
13113 // And Memory with Register
13114 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13115 %{
13116 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13117 effect(KILL cr);
13118 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13119
13120 ins_cost(150);
13121 format %{ "andb $dst, $src\t# byte" %}
13122 ins_encode %{
13123 __ andb($dst$$Address, $src$$Register);
13124 %}
13125 ins_pipe(ialu_mem_reg);
13126 %}
13127
13128 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13129 %{
13130 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13131 effect(KILL cr);
13132 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13133
13134 ins_cost(150);
13135 format %{ "andl $dst, $src\t# int" %}
13136 ins_encode %{
13137 __ andl($dst$$Address, $src$$Register);
13138 %}
13139 ins_pipe(ialu_mem_reg);
13140 %}
13141
13142 // And Memory with Immediate
13143 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13144 %{
13145 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13146 effect(KILL cr);
13147 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13148
13149 ins_cost(125);
13150 format %{ "andl $dst, $src\t# int" %}
13151 ins_encode %{
13152 __ andl($dst$$Address, $src$$constant);
13153 %}
13154 ins_pipe(ialu_mem_imm);
13155 %}
13156
13157 // BMI1 instructions
13158 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13159 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13160 predicate(UseBMI1Instructions);
13161 effect(KILL cr);
13162 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13163
13164 ins_cost(125);
13165 format %{ "andnl $dst, $src1, $src2" %}
13166
13167 ins_encode %{
13168 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13169 %}
13170 ins_pipe(ialu_reg_mem);
13171 %}
13172
13173 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13174 match(Set dst (AndI (XorI src1 minus_1) src2));
13175 predicate(UseBMI1Instructions);
13176 effect(KILL cr);
13177 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13178
13179 format %{ "andnl $dst, $src1, $src2" %}
13180
13181 ins_encode %{
13182 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13183 %}
13184 ins_pipe(ialu_reg);
13185 %}
13186
13187 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13188 match(Set dst (AndI (SubI imm_zero src) src));
13189 predicate(UseBMI1Instructions);
13190 effect(KILL cr);
13191 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13192
13193 format %{ "blsil $dst, $src" %}
13194
13195 ins_encode %{
13196 __ blsil($dst$$Register, $src$$Register);
13197 %}
13198 ins_pipe(ialu_reg);
13199 %}
13200
13201 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13202 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13203 predicate(UseBMI1Instructions);
13204 effect(KILL cr);
13205 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13206
13207 ins_cost(125);
13208 format %{ "blsil $dst, $src" %}
13209
13210 ins_encode %{
13211 __ blsil($dst$$Register, $src$$Address);
13212 %}
13213 ins_pipe(ialu_reg_mem);
13214 %}
13215
13216 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13217 %{
13218 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13219 predicate(UseBMI1Instructions);
13220 effect(KILL cr);
13221 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13222
13223 ins_cost(125);
13224 format %{ "blsmskl $dst, $src" %}
13225
13226 ins_encode %{
13227 __ blsmskl($dst$$Register, $src$$Address);
13228 %}
13229 ins_pipe(ialu_reg_mem);
13230 %}
13231
13232 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13233 %{
13234 match(Set dst (XorI (AddI src minus_1) src));
13235 predicate(UseBMI1Instructions);
13236 effect(KILL cr);
13237 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13238
13239 format %{ "blsmskl $dst, $src" %}
13240
13241 ins_encode %{
13242 __ blsmskl($dst$$Register, $src$$Register);
13243 %}
13244
13245 ins_pipe(ialu_reg);
13246 %}
13247
13248 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13249 %{
13250 match(Set dst (AndI (AddI src minus_1) src) );
13251 predicate(UseBMI1Instructions);
13252 effect(KILL cr);
13253 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13254
13255 format %{ "blsrl $dst, $src" %}
13256
13257 ins_encode %{
13258 __ blsrl($dst$$Register, $src$$Register);
13259 %}
13260
13261 ins_pipe(ialu_reg_mem);
13262 %}
13263
13264 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13265 %{
13266 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13267 predicate(UseBMI1Instructions);
13268 effect(KILL cr);
13269 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13270
13271 ins_cost(125);
13272 format %{ "blsrl $dst, $src" %}
13273
13274 ins_encode %{
13275 __ blsrl($dst$$Register, $src$$Address);
13276 %}
13277
13278 ins_pipe(ialu_reg);
13279 %}
13280
13281 // Or Instructions
13282 // Or Register with Register
13283 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13284 %{
13285 predicate(!UseAPX);
13286 match(Set dst (OrI dst src));
13287 effect(KILL cr);
13288 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13289
13290 format %{ "orl $dst, $src\t# int" %}
13291 ins_encode %{
13292 __ orl($dst$$Register, $src$$Register);
13293 %}
13294 ins_pipe(ialu_reg_reg);
13295 %}
13296
13297 // Or Register with Register using New Data Destination (NDD)
13298 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13299 %{
13300 predicate(UseAPX);
13301 match(Set dst (OrI src1 src2));
13302 effect(KILL cr);
13303 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13304
13305 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13306 ins_encode %{
13307 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13308 %}
13309 ins_pipe(ialu_reg_reg);
13310 %}
13311
13312 // Or Register with Immediate
13313 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13314 %{
13315 predicate(!UseAPX);
13316 match(Set dst (OrI dst src));
13317 effect(KILL cr);
13318 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13319
13320 format %{ "orl $dst, $src\t# int" %}
13321 ins_encode %{
13322 __ orl($dst$$Register, $src$$constant);
13323 %}
13324 ins_pipe(ialu_reg);
13325 %}
13326
13327 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13328 %{
13329 predicate(UseAPX);
13330 match(Set dst (OrI src1 src2));
13331 effect(KILL cr);
13332 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13333
13334 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13335 ins_encode %{
13336 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13337 %}
13338 ins_pipe(ialu_reg);
13339 %}
13340
13341 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13342 %{
13343 predicate(UseAPX);
13344 match(Set dst (OrI src1 src2));
13345 effect(KILL cr);
13346 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13347
13348 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13349 ins_encode %{
13350 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13351 %}
13352 ins_pipe(ialu_reg);
13353 %}
13354
13355 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13356 %{
13357 predicate(UseAPX);
13358 match(Set dst (OrI (LoadI src1) src2));
13359 effect(KILL cr);
13360 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13361
13362 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13363 ins_encode %{
13364 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13365 %}
13366 ins_pipe(ialu_reg);
13367 %}
13368
13369 // Or Register with Memory
13370 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13371 %{
13372 predicate(!UseAPX);
13373 match(Set dst (OrI dst (LoadI src)));
13374 effect(KILL cr);
13375 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13376
13377 ins_cost(150);
13378 format %{ "orl $dst, $src\t# int" %}
13379 ins_encode %{
13380 __ orl($dst$$Register, $src$$Address);
13381 %}
13382 ins_pipe(ialu_reg_mem);
13383 %}
13384
13385 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13386 %{
13387 predicate(UseAPX);
13388 match(Set dst (OrI src1 (LoadI src2)));
13389 effect(KILL cr);
13390 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13391
13392 ins_cost(150);
13393 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13394 ins_encode %{
13395 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13396 %}
13397 ins_pipe(ialu_reg_mem);
13398 %}
13399
13400 // Or Memory with Register
13401 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13402 %{
13403 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13404 effect(KILL cr);
13405 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13406
13407 ins_cost(150);
13408 format %{ "orb $dst, $src\t# byte" %}
13409 ins_encode %{
13410 __ orb($dst$$Address, $src$$Register);
13411 %}
13412 ins_pipe(ialu_mem_reg);
13413 %}
13414
13415 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13416 %{
13417 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13418 effect(KILL cr);
13419 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13420
13421 ins_cost(150);
13422 format %{ "orl $dst, $src\t# int" %}
13423 ins_encode %{
13424 __ orl($dst$$Address, $src$$Register);
13425 %}
13426 ins_pipe(ialu_mem_reg);
13427 %}
13428
13429 // Or Memory with Immediate
13430 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13431 %{
13432 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13433 effect(KILL cr);
13434 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13435
13436 ins_cost(125);
13437 format %{ "orl $dst, $src\t# int" %}
13438 ins_encode %{
13439 __ orl($dst$$Address, $src$$constant);
13440 %}
13441 ins_pipe(ialu_mem_imm);
13442 %}
13443
13444 // Xor Instructions
13445 // Xor Register with Register
13446 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13447 %{
13448 predicate(!UseAPX);
13449 match(Set dst (XorI dst src));
13450 effect(KILL cr);
13451 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13452
13453 format %{ "xorl $dst, $src\t# int" %}
13454 ins_encode %{
13455 __ xorl($dst$$Register, $src$$Register);
13456 %}
13457 ins_pipe(ialu_reg_reg);
13458 %}
13459
13460 // Xor Register with Register using New Data Destination (NDD)
13461 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13462 %{
13463 predicate(UseAPX);
13464 match(Set dst (XorI src1 src2));
13465 effect(KILL cr);
13466 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13467
13468 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13469 ins_encode %{
13470 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13471 %}
13472 ins_pipe(ialu_reg_reg);
13473 %}
13474
13475 // Xor Register with Immediate -1
13476 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13477 %{
13478 predicate(!UseAPX);
13479 match(Set dst (XorI dst imm));
13480
13481 format %{ "notl $dst" %}
13482 ins_encode %{
13483 __ notl($dst$$Register);
13484 %}
13485 ins_pipe(ialu_reg);
13486 %}
13487
13488 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13489 %{
13490 match(Set dst (XorI src imm));
13491 predicate(UseAPX);
13492 flag(PD::Flag_ndd_demotable_opr1);
13493
13494 format %{ "enotl $dst, $src" %}
13495 ins_encode %{
13496 __ enotl($dst$$Register, $src$$Register);
13497 %}
13498 ins_pipe(ialu_reg);
13499 %}
13500
13501 // Xor Register with Immediate
13502 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13503 %{
13504 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13505 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13506 match(Set dst (XorI dst src));
13507 effect(KILL cr);
13508 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13509
13510 format %{ "xorl $dst, $src\t# int" %}
13511 ins_encode %{
13512 __ xorl($dst$$Register, $src$$constant);
13513 %}
13514 ins_pipe(ialu_reg);
13515 %}
13516
13517 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13518 %{
13519 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13520 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13521 match(Set dst (XorI src1 src2));
13522 effect(KILL cr);
13523 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13524
13525 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13526 ins_encode %{
13527 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13528 %}
13529 ins_pipe(ialu_reg);
13530 %}
13531
13532 // Xor Memory with Immediate
13533 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13534 %{
13535 predicate(UseAPX);
13536 match(Set dst (XorI (LoadI src1) src2));
13537 effect(KILL cr);
13538 ins_cost(150);
13539 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13540
13541 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13542 ins_encode %{
13543 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13544 %}
13545 ins_pipe(ialu_reg);
13546 %}
13547
13548 // Xor Register with Memory
13549 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13550 %{
13551 predicate(!UseAPX);
13552 match(Set dst (XorI dst (LoadI src)));
13553 effect(KILL cr);
13554 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13555
13556 ins_cost(150);
13557 format %{ "xorl $dst, $src\t# int" %}
13558 ins_encode %{
13559 __ xorl($dst$$Register, $src$$Address);
13560 %}
13561 ins_pipe(ialu_reg_mem);
13562 %}
13563
13564 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13565 %{
13566 predicate(UseAPX);
13567 match(Set dst (XorI src1 (LoadI src2)));
13568 effect(KILL cr);
13569 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13570
13571 ins_cost(150);
13572 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13573 ins_encode %{
13574 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13575 %}
13576 ins_pipe(ialu_reg_mem);
13577 %}
13578
13579 // Xor Memory with Register
13580 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13581 %{
13582 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13583 effect(KILL cr);
13584 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13585
13586 ins_cost(150);
13587 format %{ "xorb $dst, $src\t# byte" %}
13588 ins_encode %{
13589 __ xorb($dst$$Address, $src$$Register);
13590 %}
13591 ins_pipe(ialu_mem_reg);
13592 %}
13593
13594 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13595 %{
13596 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13597 effect(KILL cr);
13598 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13599
13600 ins_cost(150);
13601 format %{ "xorl $dst, $src\t# int" %}
13602 ins_encode %{
13603 __ xorl($dst$$Address, $src$$Register);
13604 %}
13605 ins_pipe(ialu_mem_reg);
13606 %}
13607
13608 // Xor Memory with Immediate
13609 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13610 %{
13611 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13612 effect(KILL cr);
13613 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13614
13615 ins_cost(125);
13616 format %{ "xorl $dst, $src\t# int" %}
13617 ins_encode %{
13618 __ xorl($dst$$Address, $src$$constant);
13619 %}
13620 ins_pipe(ialu_mem_imm);
13621 %}
13622
13623
13624 // Long Logical Instructions
13625
13626 // And Instructions
13627 // And Register with Register
13628 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13629 %{
13630 predicate(!UseAPX);
13631 match(Set dst (AndL dst src));
13632 effect(KILL cr);
13633 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13634
13635 format %{ "andq $dst, $src\t# long" %}
13636 ins_encode %{
13637 __ andq($dst$$Register, $src$$Register);
13638 %}
13639 ins_pipe(ialu_reg_reg);
13640 %}
13641
13642 // And Register with Register using New Data Destination (NDD)
13643 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13644 %{
13645 predicate(UseAPX);
13646 match(Set dst (AndL src1 src2));
13647 effect(KILL cr);
13648 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13649
13650 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13651 ins_encode %{
13652 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13653
13654 %}
13655 ins_pipe(ialu_reg_reg);
13656 %}
13657
13658 // And Register with Immediate 255
13659 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13660 %{
13661 match(Set dst (AndL src mask));
13662
13663 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13664 ins_encode %{
13665 // movzbl zeroes out the upper 32-bit and does not need REX.W
13666 __ movzbl($dst$$Register, $src$$Register);
13667 %}
13668 ins_pipe(ialu_reg);
13669 %}
13670
13671 // And Register with Immediate 65535
13672 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13673 %{
13674 match(Set dst (AndL src mask));
13675
13676 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13677 ins_encode %{
13678 // movzwl zeroes out the upper 32-bit and does not need REX.W
13679 __ movzwl($dst$$Register, $src$$Register);
13680 %}
13681 ins_pipe(ialu_reg);
13682 %}
13683
13684 // And Register with Immediate
13685 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13686 %{
13687 predicate(!UseAPX);
13688 match(Set dst (AndL dst src));
13689 effect(KILL cr);
13690 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13691
13692 format %{ "andq $dst, $src\t# long" %}
13693 ins_encode %{
13694 __ andq($dst$$Register, $src$$constant);
13695 %}
13696 ins_pipe(ialu_reg);
13697 %}
13698
13699 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13700 %{
13701 predicate(UseAPX);
13702 match(Set dst (AndL src1 src2));
13703 effect(KILL cr);
13704 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13705
13706 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13707 ins_encode %{
13708 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13709 %}
13710 ins_pipe(ialu_reg);
13711 %}
13712
13713 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13714 %{
13715 predicate(UseAPX);
13716 match(Set dst (AndL (LoadL src1) src2));
13717 effect(KILL cr);
13718 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13719
13720 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13721 ins_encode %{
13722 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13723 %}
13724 ins_pipe(ialu_reg);
13725 %}
13726
13727 // And Register with Memory
13728 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13729 %{
13730 predicate(!UseAPX);
13731 match(Set dst (AndL dst (LoadL src)));
13732 effect(KILL cr);
13733 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13734
13735 ins_cost(150);
13736 format %{ "andq $dst, $src\t# long" %}
13737 ins_encode %{
13738 __ andq($dst$$Register, $src$$Address);
13739 %}
13740 ins_pipe(ialu_reg_mem);
13741 %}
13742
13743 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13744 %{
13745 predicate(UseAPX);
13746 match(Set dst (AndL src1 (LoadL src2)));
13747 effect(KILL cr);
13748 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13749
13750 ins_cost(150);
13751 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13752 ins_encode %{
13753 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13754 %}
13755 ins_pipe(ialu_reg_mem);
13756 %}
13757
13758 // And Memory with Register
13759 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13760 %{
13761 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13762 effect(KILL cr);
13763 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13764
13765 ins_cost(150);
13766 format %{ "andq $dst, $src\t# long" %}
13767 ins_encode %{
13768 __ andq($dst$$Address, $src$$Register);
13769 %}
13770 ins_pipe(ialu_mem_reg);
13771 %}
13772
13773 // And Memory with Immediate
13774 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13775 %{
13776 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13777 effect(KILL cr);
13778 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13779
13780 ins_cost(125);
13781 format %{ "andq $dst, $src\t# long" %}
13782 ins_encode %{
13783 __ andq($dst$$Address, $src$$constant);
13784 %}
13785 ins_pipe(ialu_mem_imm);
13786 %}
13787
13788 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13789 %{
13790 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13791 // because AND/OR works well enough for 8/32-bit values.
13792 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13793
13794 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13795 effect(KILL cr);
13796
13797 ins_cost(125);
13798 format %{ "btrq $dst, log2(not($con))\t# long" %}
13799 ins_encode %{
13800 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13801 %}
13802 ins_pipe(ialu_mem_imm);
13803 %}
13804
13805 // BMI1 instructions
13806 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13807 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13808 predicate(UseBMI1Instructions);
13809 effect(KILL cr);
13810 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13811
13812 ins_cost(125);
13813 format %{ "andnq $dst, $src1, $src2" %}
13814
13815 ins_encode %{
13816 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13817 %}
13818 ins_pipe(ialu_reg_mem);
13819 %}
13820
13821 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13822 match(Set dst (AndL (XorL src1 minus_1) src2));
13823 predicate(UseBMI1Instructions);
13824 effect(KILL cr);
13825 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13826
13827 format %{ "andnq $dst, $src1, $src2" %}
13828
13829 ins_encode %{
13830 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13831 %}
13832 ins_pipe(ialu_reg_mem);
13833 %}
13834
13835 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13836 match(Set dst (AndL (SubL imm_zero src) src));
13837 predicate(UseBMI1Instructions);
13838 effect(KILL cr);
13839 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13840
13841 format %{ "blsiq $dst, $src" %}
13842
13843 ins_encode %{
13844 __ blsiq($dst$$Register, $src$$Register);
13845 %}
13846 ins_pipe(ialu_reg);
13847 %}
13848
13849 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13850 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13851 predicate(UseBMI1Instructions);
13852 effect(KILL cr);
13853 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13854
13855 ins_cost(125);
13856 format %{ "blsiq $dst, $src" %}
13857
13858 ins_encode %{
13859 __ blsiq($dst$$Register, $src$$Address);
13860 %}
13861 ins_pipe(ialu_reg_mem);
13862 %}
13863
13864 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13865 %{
13866 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13867 predicate(UseBMI1Instructions);
13868 effect(KILL cr);
13869 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13870
13871 ins_cost(125);
13872 format %{ "blsmskq $dst, $src" %}
13873
13874 ins_encode %{
13875 __ blsmskq($dst$$Register, $src$$Address);
13876 %}
13877 ins_pipe(ialu_reg_mem);
13878 %}
13879
13880 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13881 %{
13882 match(Set dst (XorL (AddL src minus_1) src));
13883 predicate(UseBMI1Instructions);
13884 effect(KILL cr);
13885 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13886
13887 format %{ "blsmskq $dst, $src" %}
13888
13889 ins_encode %{
13890 __ blsmskq($dst$$Register, $src$$Register);
13891 %}
13892
13893 ins_pipe(ialu_reg);
13894 %}
13895
13896 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13897 %{
13898 match(Set dst (AndL (AddL src minus_1) src) );
13899 predicate(UseBMI1Instructions);
13900 effect(KILL cr);
13901 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13902
13903 format %{ "blsrq $dst, $src" %}
13904
13905 ins_encode %{
13906 __ blsrq($dst$$Register, $src$$Register);
13907 %}
13908
13909 ins_pipe(ialu_reg);
13910 %}
13911
13912 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13913 %{
13914 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13915 predicate(UseBMI1Instructions);
13916 effect(KILL cr);
13917 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13918
13919 ins_cost(125);
13920 format %{ "blsrq $dst, $src" %}
13921
13922 ins_encode %{
13923 __ blsrq($dst$$Register, $src$$Address);
13924 %}
13925
13926 ins_pipe(ialu_reg);
13927 %}
13928
13929 // Or Instructions
13930 // Or Register with Register
13931 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13932 %{
13933 predicate(!UseAPX);
13934 match(Set dst (OrL dst src));
13935 effect(KILL cr);
13936 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13937
13938 format %{ "orq $dst, $src\t# long" %}
13939 ins_encode %{
13940 __ orq($dst$$Register, $src$$Register);
13941 %}
13942 ins_pipe(ialu_reg_reg);
13943 %}
13944
13945 // Or Register with Register using New Data Destination (NDD)
13946 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13947 %{
13948 predicate(UseAPX);
13949 match(Set dst (OrL src1 src2));
13950 effect(KILL cr);
13951 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13952
13953 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13954 ins_encode %{
13955 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13956
13957 %}
13958 ins_pipe(ialu_reg_reg);
13959 %}
13960
13961 // Use any_RegP to match R15 (TLS register) without spilling.
13962 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13963 match(Set dst (OrL dst (CastP2X src)));
13964 effect(KILL cr);
13965 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13966
13967 format %{ "orq $dst, $src\t# long" %}
13968 ins_encode %{
13969 __ orq($dst$$Register, $src$$Register);
13970 %}
13971 ins_pipe(ialu_reg_reg);
13972 %}
13973
13974 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13975 match(Set dst (OrL src1 (CastP2X src2)));
13976 effect(KILL cr);
13977 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13978
13979 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13980 ins_encode %{
13981 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13982 %}
13983 ins_pipe(ialu_reg_reg);
13984 %}
13985
13986 // Or Register with Immediate
13987 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13988 %{
13989 predicate(!UseAPX);
13990 match(Set dst (OrL dst src));
13991 effect(KILL cr);
13992 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13993
13994 format %{ "orq $dst, $src\t# long" %}
13995 ins_encode %{
13996 __ orq($dst$$Register, $src$$constant);
13997 %}
13998 ins_pipe(ialu_reg);
13999 %}
14000
14001 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14002 %{
14003 predicate(UseAPX);
14004 match(Set dst (OrL src1 src2));
14005 effect(KILL cr);
14006 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14007
14008 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14009 ins_encode %{
14010 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14011 %}
14012 ins_pipe(ialu_reg);
14013 %}
14014
14015 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14016 %{
14017 predicate(UseAPX);
14018 match(Set dst (OrL src1 src2));
14019 effect(KILL cr);
14020 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14021
14022 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14023 ins_encode %{
14024 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14025 %}
14026 ins_pipe(ialu_reg);
14027 %}
14028
14029 // Or Memory with Immediate
14030 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14031 %{
14032 predicate(UseAPX);
14033 match(Set dst (OrL (LoadL src1) src2));
14034 effect(KILL cr);
14035 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14036
14037 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14038 ins_encode %{
14039 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14040 %}
14041 ins_pipe(ialu_reg);
14042 %}
14043
14044 // Or Register with Memory
14045 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14046 %{
14047 predicate(!UseAPX);
14048 match(Set dst (OrL dst (LoadL src)));
14049 effect(KILL cr);
14050 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14051
14052 ins_cost(150);
14053 format %{ "orq $dst, $src\t# long" %}
14054 ins_encode %{
14055 __ orq($dst$$Register, $src$$Address);
14056 %}
14057 ins_pipe(ialu_reg_mem);
14058 %}
14059
14060 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14061 %{
14062 predicate(UseAPX);
14063 match(Set dst (OrL src1 (LoadL src2)));
14064 effect(KILL cr);
14065 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14066
14067 ins_cost(150);
14068 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14069 ins_encode %{
14070 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14071 %}
14072 ins_pipe(ialu_reg_mem);
14073 %}
14074
14075 // Or Memory with Register
14076 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14077 %{
14078 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14079 effect(KILL cr);
14080 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14081
14082 ins_cost(150);
14083 format %{ "orq $dst, $src\t# long" %}
14084 ins_encode %{
14085 __ orq($dst$$Address, $src$$Register);
14086 %}
14087 ins_pipe(ialu_mem_reg);
14088 %}
14089
14090 // Or Memory with Immediate
14091 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14092 %{
14093 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14094 effect(KILL cr);
14095 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14096
14097 ins_cost(125);
14098 format %{ "orq $dst, $src\t# long" %}
14099 ins_encode %{
14100 __ orq($dst$$Address, $src$$constant);
14101 %}
14102 ins_pipe(ialu_mem_imm);
14103 %}
14104
14105 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14106 %{
14107 // con should be a pure 64-bit power of 2 immediate
14108 // because AND/OR works well enough for 8/32-bit values.
14109 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14110
14111 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14112 effect(KILL cr);
14113
14114 ins_cost(125);
14115 format %{ "btsq $dst, log2($con)\t# long" %}
14116 ins_encode %{
14117 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14118 %}
14119 ins_pipe(ialu_mem_imm);
14120 %}
14121
14122 // Xor Instructions
14123 // Xor Register with Register
14124 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14125 %{
14126 predicate(!UseAPX);
14127 match(Set dst (XorL dst src));
14128 effect(KILL cr);
14129 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14130
14131 format %{ "xorq $dst, $src\t# long" %}
14132 ins_encode %{
14133 __ xorq($dst$$Register, $src$$Register);
14134 %}
14135 ins_pipe(ialu_reg_reg);
14136 %}
14137
14138 // Xor Register with Register using New Data Destination (NDD)
14139 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14140 %{
14141 predicate(UseAPX);
14142 match(Set dst (XorL src1 src2));
14143 effect(KILL cr);
14144 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14145
14146 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14147 ins_encode %{
14148 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14149 %}
14150 ins_pipe(ialu_reg_reg);
14151 %}
14152
14153 // Xor Register with Immediate -1
14154 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14155 %{
14156 predicate(!UseAPX);
14157 match(Set dst (XorL dst imm));
14158
14159 format %{ "notq $dst" %}
14160 ins_encode %{
14161 __ notq($dst$$Register);
14162 %}
14163 ins_pipe(ialu_reg);
14164 %}
14165
14166 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14167 %{
14168 predicate(UseAPX);
14169 match(Set dst (XorL src imm));
14170 flag(PD::Flag_ndd_demotable_opr1);
14171
14172 format %{ "enotq $dst, $src" %}
14173 ins_encode %{
14174 __ enotq($dst$$Register, $src$$Register);
14175 %}
14176 ins_pipe(ialu_reg);
14177 %}
14178
14179 // Xor Register with Immediate
14180 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14181 %{
14182 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14183 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14184 match(Set dst (XorL dst src));
14185 effect(KILL cr);
14186 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14187
14188 format %{ "xorq $dst, $src\t# long" %}
14189 ins_encode %{
14190 __ xorq($dst$$Register, $src$$constant);
14191 %}
14192 ins_pipe(ialu_reg);
14193 %}
14194
14195 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14196 %{
14197 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14198 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14199 match(Set dst (XorL src1 src2));
14200 effect(KILL cr);
14201 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14202
14203 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14204 ins_encode %{
14205 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14206 %}
14207 ins_pipe(ialu_reg);
14208 %}
14209
14210 // Xor Memory with Immediate
14211 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14212 %{
14213 predicate(UseAPX);
14214 match(Set dst (XorL (LoadL src1) src2));
14215 effect(KILL cr);
14216 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14217 ins_cost(150);
14218
14219 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14220 ins_encode %{
14221 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14222 %}
14223 ins_pipe(ialu_reg);
14224 %}
14225
14226 // Xor Register with Memory
14227 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14228 %{
14229 predicate(!UseAPX);
14230 match(Set dst (XorL dst (LoadL src)));
14231 effect(KILL cr);
14232 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14233
14234 ins_cost(150);
14235 format %{ "xorq $dst, $src\t# long" %}
14236 ins_encode %{
14237 __ xorq($dst$$Register, $src$$Address);
14238 %}
14239 ins_pipe(ialu_reg_mem);
14240 %}
14241
14242 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14243 %{
14244 predicate(UseAPX);
14245 match(Set dst (XorL src1 (LoadL src2)));
14246 effect(KILL cr);
14247 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14248
14249 ins_cost(150);
14250 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14251 ins_encode %{
14252 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14253 %}
14254 ins_pipe(ialu_reg_mem);
14255 %}
14256
14257 // Xor Memory with Register
14258 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14259 %{
14260 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14261 effect(KILL cr);
14262 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14263
14264 ins_cost(150);
14265 format %{ "xorq $dst, $src\t# long" %}
14266 ins_encode %{
14267 __ xorq($dst$$Address, $src$$Register);
14268 %}
14269 ins_pipe(ialu_mem_reg);
14270 %}
14271
14272 // Xor Memory with Immediate
14273 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14274 %{
14275 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14276 effect(KILL cr);
14277 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14278
14279 ins_cost(125);
14280 format %{ "xorq $dst, $src\t# long" %}
14281 ins_encode %{
14282 __ xorq($dst$$Address, $src$$constant);
14283 %}
14284 ins_pipe(ialu_mem_imm);
14285 %}
14286
14287 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14288 %{
14289 match(Set dst (CmpLTMask p q));
14290 effect(KILL cr);
14291
14292 ins_cost(400);
14293 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14294 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14295 "negl $dst" %}
14296 ins_encode %{
14297 __ cmpl($p$$Register, $q$$Register);
14298 __ setcc(Assembler::less, $dst$$Register);
14299 __ negl($dst$$Register);
14300 %}
14301 ins_pipe(pipe_slow);
14302 %}
14303
14304 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14305 %{
14306 match(Set dst (CmpLTMask dst zero));
14307 effect(KILL cr);
14308
14309 ins_cost(100);
14310 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14311 ins_encode %{
14312 __ sarl($dst$$Register, 31);
14313 %}
14314 ins_pipe(ialu_reg);
14315 %}
14316
14317 /* Better to save a register than avoid a branch */
14318 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14319 %{
14320 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14321 effect(KILL cr);
14322 ins_cost(300);
14323 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14324 "jge done\n\t"
14325 "addl $p,$y\n"
14326 "done: " %}
14327 ins_encode %{
14328 Register Rp = $p$$Register;
14329 Register Rq = $q$$Register;
14330 Register Ry = $y$$Register;
14331 Label done;
14332 __ subl(Rp, Rq);
14333 __ jccb(Assembler::greaterEqual, done);
14334 __ addl(Rp, Ry);
14335 __ bind(done);
14336 %}
14337 ins_pipe(pipe_cmplt);
14338 %}
14339
14340 /* Better to save a register than avoid a branch */
14341 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14342 %{
14343 match(Set y (AndI (CmpLTMask p q) y));
14344 effect(KILL cr);
14345
14346 ins_cost(300);
14347
14348 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14349 "jlt done\n\t"
14350 "xorl $y, $y\n"
14351 "done: " %}
14352 ins_encode %{
14353 Register Rp = $p$$Register;
14354 Register Rq = $q$$Register;
14355 Register Ry = $y$$Register;
14356 Label done;
14357 __ cmpl(Rp, Rq);
14358 __ jccb(Assembler::less, done);
14359 __ xorl(Ry, Ry);
14360 __ bind(done);
14361 %}
14362 ins_pipe(pipe_cmplt);
14363 %}
14364
14365
14366 //---------- FP Instructions------------------------------------------------
14367
14368 // Really expensive, avoid
14369 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14370 %{
14371 match(Set cr (CmpF src1 src2));
14372
14373 ins_cost(500);
14374 format %{ "ucomiss $src1, $src2\n\t"
14375 "jnp,s exit\n\t"
14376 "pushfq\t# saw NaN, set CF\n\t"
14377 "andq [rsp], #0xffffff2b\n\t"
14378 "popfq\n"
14379 "exit:" %}
14380 ins_encode %{
14381 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14382 emit_cmpfp_fixup(masm);
14383 %}
14384 ins_pipe(pipe_slow);
14385 %}
14386
14387 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14388 match(Set cr (CmpF src1 src2));
14389
14390 ins_cost(100);
14391 format %{ "ucomiss $src1, $src2" %}
14392 ins_encode %{
14393 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14394 %}
14395 ins_pipe(pipe_slow);
14396 %}
14397
14398 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14399 match(Set cr (CmpF src1 src2));
14400
14401 ins_cost(100);
14402 format %{ "vucomxss $src1, $src2" %}
14403 ins_encode %{
14404 __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14405 %}
14406 ins_pipe(pipe_slow);
14407 %}
14408
14409 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14410 match(Set cr (CmpF src1 (LoadF src2)));
14411
14412 ins_cost(100);
14413 format %{ "ucomiss $src1, $src2" %}
14414 ins_encode %{
14415 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14416 %}
14417 ins_pipe(pipe_slow);
14418 %}
14419
14420 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14421 match(Set cr (CmpF src1 (LoadF src2)));
14422
14423 ins_cost(100);
14424 format %{ "vucomxss $src1, $src2" %}
14425 ins_encode %{
14426 __ vucomxss($src1$$XMMRegister, $src2$$Address);
14427 %}
14428 ins_pipe(pipe_slow);
14429 %}
14430
14431 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14432 match(Set cr (CmpF src con));
14433
14434 ins_cost(100);
14435 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14436 ins_encode %{
14437 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14438 %}
14439 ins_pipe(pipe_slow);
14440 %}
14441
14442 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14443 match(Set cr (CmpF src con));
14444
14445 ins_cost(100);
14446 format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14447 ins_encode %{
14448 __ vucomxss($src$$XMMRegister, $constantaddress($con));
14449 %}
14450 ins_pipe(pipe_slow);
14451 %}
14452
14453 // Really expensive, avoid
14454 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14455 %{
14456 match(Set cr (CmpD src1 src2));
14457
14458 ins_cost(500);
14459 format %{ "ucomisd $src1, $src2\n\t"
14460 "jnp,s exit\n\t"
14461 "pushfq\t# saw NaN, set CF\n\t"
14462 "andq [rsp], #0xffffff2b\n\t"
14463 "popfq\n"
14464 "exit:" %}
14465 ins_encode %{
14466 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14467 emit_cmpfp_fixup(masm);
14468 %}
14469 ins_pipe(pipe_slow);
14470 %}
14471
14472 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14473 match(Set cr (CmpD src1 src2));
14474
14475 ins_cost(100);
14476 format %{ "ucomisd $src1, $src2 test" %}
14477 ins_encode %{
14478 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14479 %}
14480 ins_pipe(pipe_slow);
14481 %}
14482
14483 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14484 match(Set cr (CmpD src1 src2));
14485
14486 ins_cost(100);
14487 format %{ "vucomxsd $src1, $src2 test" %}
14488 ins_encode %{
14489 __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14490 %}
14491 ins_pipe(pipe_slow);
14492 %}
14493
14494 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14495 match(Set cr (CmpD src1 (LoadD src2)));
14496
14497 ins_cost(100);
14498 format %{ "ucomisd $src1, $src2" %}
14499 ins_encode %{
14500 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14501 %}
14502 ins_pipe(pipe_slow);
14503 %}
14504
14505 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14506 match(Set cr (CmpD src1 (LoadD src2)));
14507
14508 ins_cost(100);
14509 format %{ "vucomxsd $src1, $src2" %}
14510 ins_encode %{
14511 __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14512 %}
14513 ins_pipe(pipe_slow);
14514 %}
14515
14516 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14517 match(Set cr (CmpD src con));
14518 ins_cost(100);
14519 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14520 ins_encode %{
14521 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14522 %}
14523 ins_pipe(pipe_slow);
14524 %}
14525
14526 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14527 match(Set cr (CmpD src con));
14528
14529 ins_cost(100);
14530 format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14531 ins_encode %{
14532 __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14533 %}
14534 ins_pipe(pipe_slow);
14535 %}
14536
14537 // Compare into -1,0,1
14538 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14539 %{
14540 match(Set dst (CmpF3 src1 src2));
14541 effect(KILL cr);
14542
14543 ins_cost(275);
14544 format %{ "ucomiss $src1, $src2\n\t"
14545 "movl $dst, #-1\n\t"
14546 "jp,s done\n\t"
14547 "jb,s done\n\t"
14548 "setne $dst\n\t"
14549 "movzbl $dst, $dst\n"
14550 "done:" %}
14551 ins_encode %{
14552 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14553 emit_cmpfp3(masm, $dst$$Register);
14554 %}
14555 ins_pipe(pipe_slow);
14556 %}
14557
14558 // Compare into -1,0,1
14559 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14560 %{
14561 match(Set dst (CmpF3 src1 (LoadF src2)));
14562 effect(KILL cr);
14563
14564 ins_cost(275);
14565 format %{ "ucomiss $src1, $src2\n\t"
14566 "movl $dst, #-1\n\t"
14567 "jp,s done\n\t"
14568 "jb,s done\n\t"
14569 "setne $dst\n\t"
14570 "movzbl $dst, $dst\n"
14571 "done:" %}
14572 ins_encode %{
14573 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14574 emit_cmpfp3(masm, $dst$$Register);
14575 %}
14576 ins_pipe(pipe_slow);
14577 %}
14578
14579 // Compare into -1,0,1
14580 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14581 match(Set dst (CmpF3 src con));
14582 effect(KILL cr);
14583
14584 ins_cost(275);
14585 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14586 "movl $dst, #-1\n\t"
14587 "jp,s done\n\t"
14588 "jb,s done\n\t"
14589 "setne $dst\n\t"
14590 "movzbl $dst, $dst\n"
14591 "done:" %}
14592 ins_encode %{
14593 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14594 emit_cmpfp3(masm, $dst$$Register);
14595 %}
14596 ins_pipe(pipe_slow);
14597 %}
14598
14599 // Compare into -1,0,1
14600 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14601 %{
14602 match(Set dst (CmpD3 src1 src2));
14603 effect(KILL cr);
14604
14605 ins_cost(275);
14606 format %{ "ucomisd $src1, $src2\n\t"
14607 "movl $dst, #-1\n\t"
14608 "jp,s done\n\t"
14609 "jb,s done\n\t"
14610 "setne $dst\n\t"
14611 "movzbl $dst, $dst\n"
14612 "done:" %}
14613 ins_encode %{
14614 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14615 emit_cmpfp3(masm, $dst$$Register);
14616 %}
14617 ins_pipe(pipe_slow);
14618 %}
14619
14620 // Compare into -1,0,1
14621 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14622 %{
14623 match(Set dst (CmpD3 src1 (LoadD src2)));
14624 effect(KILL cr);
14625
14626 ins_cost(275);
14627 format %{ "ucomisd $src1, $src2\n\t"
14628 "movl $dst, #-1\n\t"
14629 "jp,s done\n\t"
14630 "jb,s done\n\t"
14631 "setne $dst\n\t"
14632 "movzbl $dst, $dst\n"
14633 "done:" %}
14634 ins_encode %{
14635 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14636 emit_cmpfp3(masm, $dst$$Register);
14637 %}
14638 ins_pipe(pipe_slow);
14639 %}
14640
14641 // Compare into -1,0,1
14642 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14643 match(Set dst (CmpD3 src con));
14644 effect(KILL cr);
14645
14646 ins_cost(275);
14647 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14648 "movl $dst, #-1\n\t"
14649 "jp,s done\n\t"
14650 "jb,s done\n\t"
14651 "setne $dst\n\t"
14652 "movzbl $dst, $dst\n"
14653 "done:" %}
14654 ins_encode %{
14655 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14656 emit_cmpfp3(masm, $dst$$Register);
14657 %}
14658 ins_pipe(pipe_slow);
14659 %}
14660
14661 //----------Arithmetic Conversion Instructions---------------------------------
14662
14663 instruct convF2D_reg_reg(regD dst, regF src)
14664 %{
14665 match(Set dst (ConvF2D src));
14666
14667 format %{ "cvtss2sd $dst, $src" %}
14668 ins_encode %{
14669 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14670 %}
14671 ins_pipe(pipe_slow); // XXX
14672 %}
14673
14674 instruct convF2D_reg_mem(regD dst, memory src)
14675 %{
14676 predicate(UseAVX == 0);
14677 match(Set dst (ConvF2D (LoadF src)));
14678
14679 format %{ "cvtss2sd $dst, $src" %}
14680 ins_encode %{
14681 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14682 %}
14683 ins_pipe(pipe_slow); // XXX
14684 %}
14685
14686 instruct convD2F_reg_reg(regF dst, regD src)
14687 %{
14688 match(Set dst (ConvD2F src));
14689
14690 format %{ "cvtsd2ss $dst, $src" %}
14691 ins_encode %{
14692 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14693 %}
14694 ins_pipe(pipe_slow); // XXX
14695 %}
14696
14697 instruct convD2F_reg_mem(regF dst, memory src)
14698 %{
14699 predicate(UseAVX == 0);
14700 match(Set dst (ConvD2F (LoadD src)));
14701
14702 format %{ "cvtsd2ss $dst, $src" %}
14703 ins_encode %{
14704 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14705 %}
14706 ins_pipe(pipe_slow); // XXX
14707 %}
14708
14709 // XXX do mem variants
14710 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14711 %{
14712 predicate(!VM_Version::supports_avx10_2());
14713 match(Set dst (ConvF2I src));
14714 effect(KILL cr);
14715 format %{ "convert_f2i $dst, $src" %}
14716 ins_encode %{
14717 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14718 %}
14719 ins_pipe(pipe_slow);
14720 %}
14721
14722 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14723 %{
14724 predicate(VM_Version::supports_avx10_2());
14725 match(Set dst (ConvF2I src));
14726 format %{ "evcvttss2sisl $dst, $src" %}
14727 ins_encode %{
14728 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14729 %}
14730 ins_pipe(pipe_slow);
14731 %}
14732
14733 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14734 %{
14735 predicate(VM_Version::supports_avx10_2());
14736 match(Set dst (ConvF2I (LoadF src)));
14737 format %{ "evcvttss2sisl $dst, $src" %}
14738 ins_encode %{
14739 __ evcvttss2sisl($dst$$Register, $src$$Address);
14740 %}
14741 ins_pipe(pipe_slow);
14742 %}
14743
14744 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14745 %{
14746 predicate(!VM_Version::supports_avx10_2());
14747 match(Set dst (ConvF2L src));
14748 effect(KILL cr);
14749 format %{ "convert_f2l $dst, $src"%}
14750 ins_encode %{
14751 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14752 %}
14753 ins_pipe(pipe_slow);
14754 %}
14755
14756 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14757 %{
14758 predicate(VM_Version::supports_avx10_2());
14759 match(Set dst (ConvF2L src));
14760 format %{ "evcvttss2sisq $dst, $src" %}
14761 ins_encode %{
14762 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14763 %}
14764 ins_pipe(pipe_slow);
14765 %}
14766
14767 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14768 %{
14769 predicate(VM_Version::supports_avx10_2());
14770 match(Set dst (ConvF2L (LoadF src)));
14771 format %{ "evcvttss2sisq $dst, $src" %}
14772 ins_encode %{
14773 __ evcvttss2sisq($dst$$Register, $src$$Address);
14774 %}
14775 ins_pipe(pipe_slow);
14776 %}
14777
14778 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14779 %{
14780 predicate(!VM_Version::supports_avx10_2());
14781 match(Set dst (ConvD2I src));
14782 effect(KILL cr);
14783 format %{ "convert_d2i $dst, $src"%}
14784 ins_encode %{
14785 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14786 %}
14787 ins_pipe(pipe_slow);
14788 %}
14789
14790 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14791 %{
14792 predicate(VM_Version::supports_avx10_2());
14793 match(Set dst (ConvD2I src));
14794 format %{ "evcvttsd2sisl $dst, $src" %}
14795 ins_encode %{
14796 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14797 %}
14798 ins_pipe(pipe_slow);
14799 %}
14800
14801 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14802 %{
14803 predicate(VM_Version::supports_avx10_2());
14804 match(Set dst (ConvD2I (LoadD src)));
14805 format %{ "evcvttsd2sisl $dst, $src" %}
14806 ins_encode %{
14807 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14808 %}
14809 ins_pipe(pipe_slow);
14810 %}
14811
14812 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14813 %{
14814 predicate(!VM_Version::supports_avx10_2());
14815 match(Set dst (ConvD2L src));
14816 effect(KILL cr);
14817 format %{ "convert_d2l $dst, $src"%}
14818 ins_encode %{
14819 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14820 %}
14821 ins_pipe(pipe_slow);
14822 %}
14823
14824 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14825 %{
14826 predicate(VM_Version::supports_avx10_2());
14827 match(Set dst (ConvD2L src));
14828 format %{ "evcvttsd2sisq $dst, $src" %}
14829 ins_encode %{
14830 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14831 %}
14832 ins_pipe(pipe_slow);
14833 %}
14834
14835 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14836 %{
14837 predicate(VM_Version::supports_avx10_2());
14838 match(Set dst (ConvD2L (LoadD src)));
14839 format %{ "evcvttsd2sisq $dst, $src" %}
14840 ins_encode %{
14841 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14842 %}
14843 ins_pipe(pipe_slow);
14844 %}
14845
14846 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14847 %{
14848 match(Set dst (RoundD src));
14849 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14850 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14851 ins_encode %{
14852 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14853 %}
14854 ins_pipe(pipe_slow);
14855 %}
14856
14857 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14858 %{
14859 match(Set dst (RoundF src));
14860 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14861 format %{ "round_float $dst,$src" %}
14862 ins_encode %{
14863 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14864 %}
14865 ins_pipe(pipe_slow);
14866 %}
14867
14868 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14869 %{
14870 predicate(!UseXmmI2F);
14871 match(Set dst (ConvI2F src));
14872
14873 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14874 ins_encode %{
14875 if (UseAVX > 0) {
14876 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14877 }
14878 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14879 %}
14880 ins_pipe(pipe_slow); // XXX
14881 %}
14882
14883 instruct convI2F_reg_mem(regF dst, memory src)
14884 %{
14885 predicate(UseAVX == 0);
14886 match(Set dst (ConvI2F (LoadI src)));
14887
14888 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14889 ins_encode %{
14890 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14891 %}
14892 ins_pipe(pipe_slow); // XXX
14893 %}
14894
14895 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14896 %{
14897 predicate(!UseXmmI2D);
14898 match(Set dst (ConvI2D src));
14899
14900 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14901 ins_encode %{
14902 if (UseAVX > 0) {
14903 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14904 }
14905 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14906 %}
14907 ins_pipe(pipe_slow); // XXX
14908 %}
14909
14910 instruct convI2D_reg_mem(regD dst, memory src)
14911 %{
14912 predicate(UseAVX == 0);
14913 match(Set dst (ConvI2D (LoadI src)));
14914
14915 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14916 ins_encode %{
14917 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14918 %}
14919 ins_pipe(pipe_slow); // XXX
14920 %}
14921
14922 instruct convXI2F_reg(regF dst, rRegI src)
14923 %{
14924 predicate(UseXmmI2F);
14925 match(Set dst (ConvI2F src));
14926
14927 format %{ "movdl $dst, $src\n\t"
14928 "cvtdq2psl $dst, $dst\t# i2f" %}
14929 ins_encode %{
14930 __ movdl($dst$$XMMRegister, $src$$Register);
14931 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14932 %}
14933 ins_pipe(pipe_slow); // XXX
14934 %}
14935
14936 instruct convXI2D_reg(regD dst, rRegI src)
14937 %{
14938 predicate(UseXmmI2D);
14939 match(Set dst (ConvI2D src));
14940
14941 format %{ "movdl $dst, $src\n\t"
14942 "cvtdq2pdl $dst, $dst\t# i2d" %}
14943 ins_encode %{
14944 __ movdl($dst$$XMMRegister, $src$$Register);
14945 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14946 %}
14947 ins_pipe(pipe_slow); // XXX
14948 %}
14949
14950 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14951 %{
14952 match(Set dst (ConvL2F src));
14953
14954 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14955 ins_encode %{
14956 if (UseAVX > 0) {
14957 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14958 }
14959 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14960 %}
14961 ins_pipe(pipe_slow); // XXX
14962 %}
14963
14964 instruct convL2F_reg_mem(regF dst, memory src)
14965 %{
14966 predicate(UseAVX == 0);
14967 match(Set dst (ConvL2F (LoadL src)));
14968
14969 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14970 ins_encode %{
14971 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14972 %}
14973 ins_pipe(pipe_slow); // XXX
14974 %}
14975
14976 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14977 %{
14978 match(Set dst (ConvL2D src));
14979
14980 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14981 ins_encode %{
14982 if (UseAVX > 0) {
14983 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14984 }
14985 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14986 %}
14987 ins_pipe(pipe_slow); // XXX
14988 %}
14989
14990 instruct convL2D_reg_mem(regD dst, memory src)
14991 %{
14992 predicate(UseAVX == 0);
14993 match(Set dst (ConvL2D (LoadL src)));
14994
14995 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14996 ins_encode %{
14997 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14998 %}
14999 ins_pipe(pipe_slow); // XXX
15000 %}
15001
15002 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15003 %{
15004 match(Set dst (ConvI2L src));
15005
15006 ins_cost(125);
15007 format %{ "movslq $dst, $src\t# i2l" %}
15008 ins_encode %{
15009 __ movslq($dst$$Register, $src$$Register);
15010 %}
15011 ins_pipe(ialu_reg_reg);
15012 %}
15013
15014 // Zero-extend convert int to long
15015 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15016 %{
15017 match(Set dst (AndL (ConvI2L src) mask));
15018
15019 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15020 ins_encode %{
15021 if ($dst$$reg != $src$$reg) {
15022 __ movl($dst$$Register, $src$$Register);
15023 }
15024 %}
15025 ins_pipe(ialu_reg_reg);
15026 %}
15027
15028 // Zero-extend convert int to long
15029 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15030 %{
15031 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15032
15033 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15034 ins_encode %{
15035 __ movl($dst$$Register, $src$$Address);
15036 %}
15037 ins_pipe(ialu_reg_mem);
15038 %}
15039
15040 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15041 %{
15042 match(Set dst (AndL src mask));
15043
15044 format %{ "movl $dst, $src\t# zero-extend long" %}
15045 ins_encode %{
15046 __ movl($dst$$Register, $src$$Register);
15047 %}
15048 ins_pipe(ialu_reg_reg);
15049 %}
15050
15051 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15052 %{
15053 match(Set dst (ConvL2I src));
15054
15055 format %{ "movl $dst, $src\t# l2i" %}
15056 ins_encode %{
15057 __ movl($dst$$Register, $src$$Register);
15058 %}
15059 ins_pipe(ialu_reg_reg);
15060 %}
15061
15062
15063 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15064 match(Set dst (MoveF2I src));
15065 effect(DEF dst, USE src);
15066
15067 ins_cost(125);
15068 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15069 ins_encode %{
15070 __ movl($dst$$Register, Address(rsp, $src$$disp));
15071 %}
15072 ins_pipe(ialu_reg_mem);
15073 %}
15074
15075 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15076 match(Set dst (MoveI2F src));
15077 effect(DEF dst, USE src);
15078
15079 ins_cost(125);
15080 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15081 ins_encode %{
15082 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15083 %}
15084 ins_pipe(pipe_slow);
15085 %}
15086
15087 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15088 match(Set dst (MoveD2L src));
15089 effect(DEF dst, USE src);
15090
15091 ins_cost(125);
15092 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15093 ins_encode %{
15094 __ movq($dst$$Register, Address(rsp, $src$$disp));
15095 %}
15096 ins_pipe(ialu_reg_mem);
15097 %}
15098
15099 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15100 predicate(!UseXmmLoadAndClearUpper);
15101 match(Set dst (MoveL2D src));
15102 effect(DEF dst, USE src);
15103
15104 ins_cost(125);
15105 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15106 ins_encode %{
15107 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15108 %}
15109 ins_pipe(pipe_slow);
15110 %}
15111
15112 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15113 predicate(UseXmmLoadAndClearUpper);
15114 match(Set dst (MoveL2D src));
15115 effect(DEF dst, USE src);
15116
15117 ins_cost(125);
15118 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15119 ins_encode %{
15120 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15121 %}
15122 ins_pipe(pipe_slow);
15123 %}
15124
15125
15126 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15127 match(Set dst (MoveF2I src));
15128 effect(DEF dst, USE src);
15129
15130 ins_cost(95); // XXX
15131 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15132 ins_encode %{
15133 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15134 %}
15135 ins_pipe(pipe_slow);
15136 %}
15137
15138 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15139 match(Set dst (MoveI2F src));
15140 effect(DEF dst, USE src);
15141
15142 ins_cost(100);
15143 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15144 ins_encode %{
15145 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15146 %}
15147 ins_pipe( ialu_mem_reg );
15148 %}
15149
15150 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15151 match(Set dst (MoveD2L src));
15152 effect(DEF dst, USE src);
15153
15154 ins_cost(95); // XXX
15155 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15156 ins_encode %{
15157 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15158 %}
15159 ins_pipe(pipe_slow);
15160 %}
15161
15162 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15163 match(Set dst (MoveL2D src));
15164 effect(DEF dst, USE src);
15165
15166 ins_cost(100);
15167 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15168 ins_encode %{
15169 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15170 %}
15171 ins_pipe(ialu_mem_reg);
15172 %}
15173
15174 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15175 match(Set dst (MoveF2I src));
15176 effect(DEF dst, USE src);
15177 ins_cost(85);
15178 format %{ "movd $dst,$src\t# MoveF2I" %}
15179 ins_encode %{
15180 __ movdl($dst$$Register, $src$$XMMRegister);
15181 %}
15182 ins_pipe( pipe_slow );
15183 %}
15184
15185 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15186 match(Set dst (MoveD2L src));
15187 effect(DEF dst, USE src);
15188 ins_cost(85);
15189 format %{ "movd $dst,$src\t# MoveD2L" %}
15190 ins_encode %{
15191 __ movdq($dst$$Register, $src$$XMMRegister);
15192 %}
15193 ins_pipe( pipe_slow );
15194 %}
15195
15196 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15197 match(Set dst (MoveI2F src));
15198 effect(DEF dst, USE src);
15199 ins_cost(100);
15200 format %{ "movd $dst,$src\t# MoveI2F" %}
15201 ins_encode %{
15202 __ movdl($dst$$XMMRegister, $src$$Register);
15203 %}
15204 ins_pipe( pipe_slow );
15205 %}
15206
15207 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15208 match(Set dst (MoveL2D src));
15209 effect(DEF dst, USE src);
15210 ins_cost(100);
15211 format %{ "movd $dst,$src\t# MoveL2D" %}
15212 ins_encode %{
15213 __ movdq($dst$$XMMRegister, $src$$Register);
15214 %}
15215 ins_pipe( pipe_slow );
15216 %}
15217
15218 // Fast clearing of an array
15219 // Small non-constant lenght ClearArray for non-AVX512 targets.
15220 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15221 Universe dummy, rFlagsReg cr)
15222 %{
15223 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15224 match(Set dummy (ClearArray cnt base));
15225 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15226
15227 format %{ $$template
15228 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15229 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15230 $$emit$$"jg LARGE\n\t"
15231 $$emit$$"dec rcx\n\t"
15232 $$emit$$"js DONE\t# Zero length\n\t"
15233 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15234 $$emit$$"dec rcx\n\t"
15235 $$emit$$"jge LOOP\n\t"
15236 $$emit$$"jmp DONE\n\t"
15237 $$emit$$"# LARGE:\n\t"
15238 if (UseFastStosb) {
15239 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15240 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15241 } else if (UseXMMForObjInit) {
15242 $$emit$$"mov rdi,rax\n\t"
15243 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15244 $$emit$$"jmpq L_zero_64_bytes\n\t"
15245 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15246 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15247 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15248 $$emit$$"add 0x40,rax\n\t"
15249 $$emit$$"# L_zero_64_bytes:\n\t"
15250 $$emit$$"sub 0x8,rcx\n\t"
15251 $$emit$$"jge L_loop\n\t"
15252 $$emit$$"add 0x4,rcx\n\t"
15253 $$emit$$"jl L_tail\n\t"
15254 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15255 $$emit$$"add 0x20,rax\n\t"
15256 $$emit$$"sub 0x4,rcx\n\t"
15257 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15258 $$emit$$"add 0x4,rcx\n\t"
15259 $$emit$$"jle L_end\n\t"
15260 $$emit$$"dec rcx\n\t"
15261 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15262 $$emit$$"vmovq xmm0,(rax)\n\t"
15263 $$emit$$"add 0x8,rax\n\t"
15264 $$emit$$"dec rcx\n\t"
15265 $$emit$$"jge L_sloop\n\t"
15266 $$emit$$"# L_end:\n\t"
15267 } else {
15268 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15269 }
15270 $$emit$$"# DONE"
15271 %}
15272 ins_encode %{
15273 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15274 $tmp$$XMMRegister, false, knoreg);
15275 %}
15276 ins_pipe(pipe_slow);
15277 %}
15278
15279 // Small non-constant length ClearArray for AVX512 targets.
15280 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15281 Universe dummy, rFlagsReg cr)
15282 %{
15283 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15284 match(Set dummy (ClearArray cnt base));
15285 ins_cost(125);
15286 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15287
15288 format %{ $$template
15289 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15290 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15291 $$emit$$"jg LARGE\n\t"
15292 $$emit$$"dec rcx\n\t"
15293 $$emit$$"js DONE\t# Zero length\n\t"
15294 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15295 $$emit$$"dec rcx\n\t"
15296 $$emit$$"jge LOOP\n\t"
15297 $$emit$$"jmp DONE\n\t"
15298 $$emit$$"# LARGE:\n\t"
15299 if (UseFastStosb) {
15300 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15301 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15302 } else if (UseXMMForObjInit) {
15303 $$emit$$"mov rdi,rax\n\t"
15304 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15305 $$emit$$"jmpq L_zero_64_bytes\n\t"
15306 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15307 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15308 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15309 $$emit$$"add 0x40,rax\n\t"
15310 $$emit$$"# L_zero_64_bytes:\n\t"
15311 $$emit$$"sub 0x8,rcx\n\t"
15312 $$emit$$"jge L_loop\n\t"
15313 $$emit$$"add 0x4,rcx\n\t"
15314 $$emit$$"jl L_tail\n\t"
15315 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15316 $$emit$$"add 0x20,rax\n\t"
15317 $$emit$$"sub 0x4,rcx\n\t"
15318 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15319 $$emit$$"add 0x4,rcx\n\t"
15320 $$emit$$"jle L_end\n\t"
15321 $$emit$$"dec rcx\n\t"
15322 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15323 $$emit$$"vmovq xmm0,(rax)\n\t"
15324 $$emit$$"add 0x8,rax\n\t"
15325 $$emit$$"dec rcx\n\t"
15326 $$emit$$"jge L_sloop\n\t"
15327 $$emit$$"# L_end:\n\t"
15328 } else {
15329 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15330 }
15331 $$emit$$"# DONE"
15332 %}
15333 ins_encode %{
15334 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15335 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15336 %}
15337 ins_pipe(pipe_slow);
15338 %}
15339
15340 // Large non-constant length ClearArray for non-AVX512 targets.
15341 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15342 Universe dummy, rFlagsReg cr)
15343 %{
15344 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15345 match(Set dummy (ClearArray cnt base));
15346 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15347
15348 format %{ $$template
15349 if (UseFastStosb) {
15350 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15351 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15352 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15353 } else if (UseXMMForObjInit) {
15354 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15355 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15356 $$emit$$"jmpq L_zero_64_bytes\n\t"
15357 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15358 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15359 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15360 $$emit$$"add 0x40,rax\n\t"
15361 $$emit$$"# L_zero_64_bytes:\n\t"
15362 $$emit$$"sub 0x8,rcx\n\t"
15363 $$emit$$"jge L_loop\n\t"
15364 $$emit$$"add 0x4,rcx\n\t"
15365 $$emit$$"jl L_tail\n\t"
15366 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15367 $$emit$$"add 0x20,rax\n\t"
15368 $$emit$$"sub 0x4,rcx\n\t"
15369 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15370 $$emit$$"add 0x4,rcx\n\t"
15371 $$emit$$"jle L_end\n\t"
15372 $$emit$$"dec rcx\n\t"
15373 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15374 $$emit$$"vmovq xmm0,(rax)\n\t"
15375 $$emit$$"add 0x8,rax\n\t"
15376 $$emit$$"dec rcx\n\t"
15377 $$emit$$"jge L_sloop\n\t"
15378 $$emit$$"# L_end:\n\t"
15379 } else {
15380 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15381 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15382 }
15383 %}
15384 ins_encode %{
15385 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15386 $tmp$$XMMRegister, true, knoreg);
15387 %}
15388 ins_pipe(pipe_slow);
15389 %}
15390
15391 // Large non-constant length ClearArray for AVX512 targets.
15392 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15393 Universe dummy, rFlagsReg cr)
15394 %{
15395 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15396 match(Set dummy (ClearArray cnt base));
15397 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15398
15399 format %{ $$template
15400 if (UseFastStosb) {
15401 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15402 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15403 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15404 } else if (UseXMMForObjInit) {
15405 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15406 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15407 $$emit$$"jmpq L_zero_64_bytes\n\t"
15408 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15409 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15410 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15411 $$emit$$"add 0x40,rax\n\t"
15412 $$emit$$"# L_zero_64_bytes:\n\t"
15413 $$emit$$"sub 0x8,rcx\n\t"
15414 $$emit$$"jge L_loop\n\t"
15415 $$emit$$"add 0x4,rcx\n\t"
15416 $$emit$$"jl L_tail\n\t"
15417 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15418 $$emit$$"add 0x20,rax\n\t"
15419 $$emit$$"sub 0x4,rcx\n\t"
15420 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15421 $$emit$$"add 0x4,rcx\n\t"
15422 $$emit$$"jle L_end\n\t"
15423 $$emit$$"dec rcx\n\t"
15424 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15425 $$emit$$"vmovq xmm0,(rax)\n\t"
15426 $$emit$$"add 0x8,rax\n\t"
15427 $$emit$$"dec rcx\n\t"
15428 $$emit$$"jge L_sloop\n\t"
15429 $$emit$$"# L_end:\n\t"
15430 } else {
15431 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15432 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15433 }
15434 %}
15435 ins_encode %{
15436 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15437 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15438 %}
15439 ins_pipe(pipe_slow);
15440 %}
15441
15442 // Small constant length ClearArray for AVX512 targets.
15443 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15444 %{
15445 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15446 match(Set dummy (ClearArray cnt base));
15447 ins_cost(100);
15448 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15449 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15450 ins_encode %{
15451 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15452 %}
15453 ins_pipe(pipe_slow);
15454 %}
15455
15456 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15457 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15458 %{
15459 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15460 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15461 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15462
15463 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15464 ins_encode %{
15465 __ string_compare($str1$$Register, $str2$$Register,
15466 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15467 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15468 %}
15469 ins_pipe( pipe_slow );
15470 %}
15471
15472 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15473 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15474 %{
15475 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15476 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15477 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15478
15479 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15480 ins_encode %{
15481 __ string_compare($str1$$Register, $str2$$Register,
15482 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15483 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15484 %}
15485 ins_pipe( pipe_slow );
15486 %}
15487
15488 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15489 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15490 %{
15491 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15492 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15493 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15494
15495 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15496 ins_encode %{
15497 __ string_compare($str1$$Register, $str2$$Register,
15498 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15499 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15500 %}
15501 ins_pipe( pipe_slow );
15502 %}
15503
15504 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15505 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15506 %{
15507 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15508 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15509 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15510
15511 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15512 ins_encode %{
15513 __ string_compare($str1$$Register, $str2$$Register,
15514 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15515 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15516 %}
15517 ins_pipe( pipe_slow );
15518 %}
15519
15520 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15521 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15522 %{
15523 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15524 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15525 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15526
15527 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15528 ins_encode %{
15529 __ string_compare($str1$$Register, $str2$$Register,
15530 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15531 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15532 %}
15533 ins_pipe( pipe_slow );
15534 %}
15535
15536 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15537 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15538 %{
15539 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15540 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15541 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15542
15543 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15544 ins_encode %{
15545 __ string_compare($str1$$Register, $str2$$Register,
15546 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15547 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15548 %}
15549 ins_pipe( pipe_slow );
15550 %}
15551
15552 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15553 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15554 %{
15555 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15556 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15557 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15558
15559 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15560 ins_encode %{
15561 __ string_compare($str2$$Register, $str1$$Register,
15562 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15563 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15564 %}
15565 ins_pipe( pipe_slow );
15566 %}
15567
15568 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15569 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15570 %{
15571 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15572 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15573 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15574
15575 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15576 ins_encode %{
15577 __ string_compare($str2$$Register, $str1$$Register,
15578 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15579 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15580 %}
15581 ins_pipe( pipe_slow );
15582 %}
15583
15584 // fast search of substring with known size.
15585 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15586 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15587 %{
15588 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15589 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15590 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15591
15592 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15593 ins_encode %{
15594 int icnt2 = (int)$int_cnt2$$constant;
15595 if (icnt2 >= 16) {
15596 // IndexOf for constant substrings with size >= 16 elements
15597 // which don't need to be loaded through stack.
15598 __ string_indexofC8($str1$$Register, $str2$$Register,
15599 $cnt1$$Register, $cnt2$$Register,
15600 icnt2, $result$$Register,
15601 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15602 } else {
15603 // Small strings are loaded through stack if they cross page boundary.
15604 __ string_indexof($str1$$Register, $str2$$Register,
15605 $cnt1$$Register, $cnt2$$Register,
15606 icnt2, $result$$Register,
15607 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15608 }
15609 %}
15610 ins_pipe( pipe_slow );
15611 %}
15612
15613 // fast search of substring with known size.
15614 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15615 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15616 %{
15617 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15618 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15619 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15620
15621 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15622 ins_encode %{
15623 int icnt2 = (int)$int_cnt2$$constant;
15624 if (icnt2 >= 8) {
15625 // IndexOf for constant substrings with size >= 8 elements
15626 // which don't need to be loaded through stack.
15627 __ string_indexofC8($str1$$Register, $str2$$Register,
15628 $cnt1$$Register, $cnt2$$Register,
15629 icnt2, $result$$Register,
15630 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15631 } else {
15632 // Small strings are loaded through stack if they cross page boundary.
15633 __ string_indexof($str1$$Register, $str2$$Register,
15634 $cnt1$$Register, $cnt2$$Register,
15635 icnt2, $result$$Register,
15636 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15637 }
15638 %}
15639 ins_pipe( pipe_slow );
15640 %}
15641
15642 // fast search of substring with known size.
15643 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15644 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15645 %{
15646 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15647 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15648 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15649
15650 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15651 ins_encode %{
15652 int icnt2 = (int)$int_cnt2$$constant;
15653 if (icnt2 >= 8) {
15654 // IndexOf for constant substrings with size >= 8 elements
15655 // which don't need to be loaded through stack.
15656 __ string_indexofC8($str1$$Register, $str2$$Register,
15657 $cnt1$$Register, $cnt2$$Register,
15658 icnt2, $result$$Register,
15659 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15660 } else {
15661 // Small strings are loaded through stack if they cross page boundary.
15662 __ string_indexof($str1$$Register, $str2$$Register,
15663 $cnt1$$Register, $cnt2$$Register,
15664 icnt2, $result$$Register,
15665 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15666 }
15667 %}
15668 ins_pipe( pipe_slow );
15669 %}
15670
15671 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15672 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15673 %{
15674 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15675 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15676 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15677
15678 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15679 ins_encode %{
15680 __ string_indexof($str1$$Register, $str2$$Register,
15681 $cnt1$$Register, $cnt2$$Register,
15682 (-1), $result$$Register,
15683 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15684 %}
15685 ins_pipe( pipe_slow );
15686 %}
15687
15688 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15689 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15690 %{
15691 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15692 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15693 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15694
15695 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15696 ins_encode %{
15697 __ string_indexof($str1$$Register, $str2$$Register,
15698 $cnt1$$Register, $cnt2$$Register,
15699 (-1), $result$$Register,
15700 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15701 %}
15702 ins_pipe( pipe_slow );
15703 %}
15704
15705 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15706 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15707 %{
15708 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15709 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15710 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15711
15712 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15713 ins_encode %{
15714 __ string_indexof($str1$$Register, $str2$$Register,
15715 $cnt1$$Register, $cnt2$$Register,
15716 (-1), $result$$Register,
15717 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15718 %}
15719 ins_pipe( pipe_slow );
15720 %}
15721
15722 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15723 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15724 %{
15725 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15726 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15727 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15728 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15729 ins_encode %{
15730 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15731 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15732 %}
15733 ins_pipe( pipe_slow );
15734 %}
15735
15736 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15737 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15738 %{
15739 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15740 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15741 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15742 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15743 ins_encode %{
15744 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15745 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15746 %}
15747 ins_pipe( pipe_slow );
15748 %}
15749
15750 // fast string equals
15751 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15752 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15753 %{
15754 predicate(!VM_Version::supports_avx512vlbw());
15755 match(Set result (StrEquals (Binary str1 str2) cnt));
15756 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15757
15758 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15759 ins_encode %{
15760 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15761 $cnt$$Register, $result$$Register, $tmp3$$Register,
15762 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15763 %}
15764 ins_pipe( pipe_slow );
15765 %}
15766
15767 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15768 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15769 %{
15770 predicate(VM_Version::supports_avx512vlbw());
15771 match(Set result (StrEquals (Binary str1 str2) cnt));
15772 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15773
15774 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15775 ins_encode %{
15776 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15777 $cnt$$Register, $result$$Register, $tmp3$$Register,
15778 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15779 %}
15780 ins_pipe( pipe_slow );
15781 %}
15782
15783 // fast array equals
15784 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15785 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15786 %{
15787 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15788 match(Set result (AryEq ary1 ary2));
15789 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15790
15791 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15792 ins_encode %{
15793 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15794 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15795 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15796 %}
15797 ins_pipe( pipe_slow );
15798 %}
15799
15800 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15801 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15802 %{
15803 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15804 match(Set result (AryEq ary1 ary2));
15805 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15806
15807 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15808 ins_encode %{
15809 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15810 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15811 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15812 %}
15813 ins_pipe( pipe_slow );
15814 %}
15815
15816 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15817 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15818 %{
15819 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15820 match(Set result (AryEq ary1 ary2));
15821 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15822
15823 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15824 ins_encode %{
15825 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15826 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15827 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15828 %}
15829 ins_pipe( pipe_slow );
15830 %}
15831
15832 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15833 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15834 %{
15835 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15836 match(Set result (AryEq ary1 ary2));
15837 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15838
15839 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15840 ins_encode %{
15841 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15842 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15843 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15844 %}
15845 ins_pipe( pipe_slow );
15846 %}
15847
15848 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15849 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15850 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15851 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15852 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15853 %{
15854 predicate(UseAVX >= 2);
15855 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15856 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15857 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15858 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15859 USE basic_type, KILL cr);
15860
15861 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15862 ins_encode %{
15863 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15864 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15865 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15866 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15867 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15868 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15869 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15870 %}
15871 ins_pipe( pipe_slow );
15872 %}
15873
15874 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15875 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15876 %{
15877 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15878 match(Set result (CountPositives ary1 len));
15879 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15880
15881 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15882 ins_encode %{
15883 __ count_positives($ary1$$Register, $len$$Register,
15884 $result$$Register, $tmp3$$Register,
15885 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15886 %}
15887 ins_pipe( pipe_slow );
15888 %}
15889
15890 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15891 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15892 %{
15893 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15894 match(Set result (CountPositives ary1 len));
15895 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15896
15897 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15898 ins_encode %{
15899 __ count_positives($ary1$$Register, $len$$Register,
15900 $result$$Register, $tmp3$$Register,
15901 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15902 %}
15903 ins_pipe( pipe_slow );
15904 %}
15905
15906 // fast char[] to byte[] compression
15907 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15908 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15909 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15910 match(Set result (StrCompressedCopy src (Binary dst len)));
15911 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15912 USE_KILL len, KILL tmp5, KILL cr);
15913
15914 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15915 ins_encode %{
15916 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15917 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15918 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15919 knoreg, knoreg);
15920 %}
15921 ins_pipe( pipe_slow );
15922 %}
15923
15924 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15925 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15926 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15927 match(Set result (StrCompressedCopy src (Binary dst len)));
15928 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15929 USE_KILL len, KILL tmp5, KILL cr);
15930
15931 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15932 ins_encode %{
15933 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15934 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15935 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15936 $ktmp1$$KRegister, $ktmp2$$KRegister);
15937 %}
15938 ins_pipe( pipe_slow );
15939 %}
15940 // fast byte[] to char[] inflation
15941 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15942 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15943 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15944 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15945 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15946
15947 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15948 ins_encode %{
15949 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15950 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15951 %}
15952 ins_pipe( pipe_slow );
15953 %}
15954
15955 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15956 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15957 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15958 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15959 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15960
15961 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15962 ins_encode %{
15963 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15964 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15965 %}
15966 ins_pipe( pipe_slow );
15967 %}
15968
15969 // encode char[] to byte[] in ISO_8859_1
15970 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15971 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15972 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15973 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15974 match(Set result (EncodeISOArray src (Binary dst len)));
15975 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15976
15977 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15978 ins_encode %{
15979 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15980 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15981 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15982 %}
15983 ins_pipe( pipe_slow );
15984 %}
15985
15986 // encode char[] to byte[] in ASCII
15987 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15988 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15989 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15990 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15991 match(Set result (EncodeISOArray src (Binary dst len)));
15992 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15993
15994 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15995 ins_encode %{
15996 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15997 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15998 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15999 %}
16000 ins_pipe( pipe_slow );
16001 %}
16002
16003 //----------Overflow Math Instructions-----------------------------------------
16004
16005 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16006 %{
16007 match(Set cr (OverflowAddI op1 op2));
16008 effect(DEF cr, USE_KILL op1, USE op2);
16009
16010 format %{ "addl $op1, $op2\t# overflow check int" %}
16011
16012 ins_encode %{
16013 __ addl($op1$$Register, $op2$$Register);
16014 %}
16015 ins_pipe(ialu_reg_reg);
16016 %}
16017
16018 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16019 %{
16020 match(Set cr (OverflowAddI op1 op2));
16021 effect(DEF cr, USE_KILL op1, USE op2);
16022
16023 format %{ "addl $op1, $op2\t# overflow check int" %}
16024
16025 ins_encode %{
16026 __ addl($op1$$Register, $op2$$constant);
16027 %}
16028 ins_pipe(ialu_reg_reg);
16029 %}
16030
16031 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16032 %{
16033 match(Set cr (OverflowAddL op1 op2));
16034 effect(DEF cr, USE_KILL op1, USE op2);
16035
16036 format %{ "addq $op1, $op2\t# overflow check long" %}
16037 ins_encode %{
16038 __ addq($op1$$Register, $op2$$Register);
16039 %}
16040 ins_pipe(ialu_reg_reg);
16041 %}
16042
16043 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16044 %{
16045 match(Set cr (OverflowAddL op1 op2));
16046 effect(DEF cr, USE_KILL op1, USE op2);
16047
16048 format %{ "addq $op1, $op2\t# overflow check long" %}
16049 ins_encode %{
16050 __ addq($op1$$Register, $op2$$constant);
16051 %}
16052 ins_pipe(ialu_reg_reg);
16053 %}
16054
16055 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16056 %{
16057 match(Set cr (OverflowSubI op1 op2));
16058
16059 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16060 ins_encode %{
16061 __ cmpl($op1$$Register, $op2$$Register);
16062 %}
16063 ins_pipe(ialu_reg_reg);
16064 %}
16065
16066 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16067 %{
16068 match(Set cr (OverflowSubI op1 op2));
16069
16070 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16071 ins_encode %{
16072 __ cmpl($op1$$Register, $op2$$constant);
16073 %}
16074 ins_pipe(ialu_reg_reg);
16075 %}
16076
16077 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16078 %{
16079 match(Set cr (OverflowSubL op1 op2));
16080
16081 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16082 ins_encode %{
16083 __ cmpq($op1$$Register, $op2$$Register);
16084 %}
16085 ins_pipe(ialu_reg_reg);
16086 %}
16087
16088 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16089 %{
16090 match(Set cr (OverflowSubL op1 op2));
16091
16092 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16093 ins_encode %{
16094 __ cmpq($op1$$Register, $op2$$constant);
16095 %}
16096 ins_pipe(ialu_reg_reg);
16097 %}
16098
16099 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16100 %{
16101 match(Set cr (OverflowSubI zero op2));
16102 effect(DEF cr, USE_KILL op2);
16103
16104 format %{ "negl $op2\t# overflow check int" %}
16105 ins_encode %{
16106 __ negl($op2$$Register);
16107 %}
16108 ins_pipe(ialu_reg_reg);
16109 %}
16110
16111 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16112 %{
16113 match(Set cr (OverflowSubL zero op2));
16114 effect(DEF cr, USE_KILL op2);
16115
16116 format %{ "negq $op2\t# overflow check long" %}
16117 ins_encode %{
16118 __ negq($op2$$Register);
16119 %}
16120 ins_pipe(ialu_reg_reg);
16121 %}
16122
16123 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16124 %{
16125 match(Set cr (OverflowMulI op1 op2));
16126 effect(DEF cr, USE_KILL op1, USE op2);
16127
16128 format %{ "imull $op1, $op2\t# overflow check int" %}
16129 ins_encode %{
16130 __ imull($op1$$Register, $op2$$Register);
16131 %}
16132 ins_pipe(ialu_reg_reg_alu0);
16133 %}
16134
16135 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16136 %{
16137 match(Set cr (OverflowMulI op1 op2));
16138 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16139
16140 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16141 ins_encode %{
16142 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16143 %}
16144 ins_pipe(ialu_reg_reg_alu0);
16145 %}
16146
16147 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16148 %{
16149 match(Set cr (OverflowMulL op1 op2));
16150 effect(DEF cr, USE_KILL op1, USE op2);
16151
16152 format %{ "imulq $op1, $op2\t# overflow check long" %}
16153 ins_encode %{
16154 __ imulq($op1$$Register, $op2$$Register);
16155 %}
16156 ins_pipe(ialu_reg_reg_alu0);
16157 %}
16158
16159 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16160 %{
16161 match(Set cr (OverflowMulL op1 op2));
16162 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16163
16164 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16165 ins_encode %{
16166 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16167 %}
16168 ins_pipe(ialu_reg_reg_alu0);
16169 %}
16170
16171
16172 //----------Control Flow Instructions------------------------------------------
16173 // Signed compare Instructions
16174
16175 // XXX more variants!!
16176 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16177 %{
16178 match(Set cr (CmpI op1 op2));
16179 effect(DEF cr, USE op1, USE op2);
16180
16181 format %{ "cmpl $op1, $op2" %}
16182 ins_encode %{
16183 __ cmpl($op1$$Register, $op2$$Register);
16184 %}
16185 ins_pipe(ialu_cr_reg_reg);
16186 %}
16187
16188 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16189 %{
16190 match(Set cr (CmpI op1 op2));
16191
16192 format %{ "cmpl $op1, $op2" %}
16193 ins_encode %{
16194 __ cmpl($op1$$Register, $op2$$constant);
16195 %}
16196 ins_pipe(ialu_cr_reg_imm);
16197 %}
16198
16199 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16200 %{
16201 match(Set cr (CmpI op1 (LoadI op2)));
16202
16203 ins_cost(500); // XXX
16204 format %{ "cmpl $op1, $op2" %}
16205 ins_encode %{
16206 __ cmpl($op1$$Register, $op2$$Address);
16207 %}
16208 ins_pipe(ialu_cr_reg_mem);
16209 %}
16210
16211 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16212 %{
16213 match(Set cr (CmpI src zero));
16214
16215 format %{ "testl $src, $src" %}
16216 ins_encode %{
16217 __ testl($src$$Register, $src$$Register);
16218 %}
16219 ins_pipe(ialu_cr_reg_imm);
16220 %}
16221
16222 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16223 %{
16224 match(Set cr (CmpI (AndI src con) zero));
16225
16226 format %{ "testl $src, $con" %}
16227 ins_encode %{
16228 __ testl($src$$Register, $con$$constant);
16229 %}
16230 ins_pipe(ialu_cr_reg_imm);
16231 %}
16232
16233 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16234 %{
16235 match(Set cr (CmpI (AndI src1 src2) zero));
16236
16237 format %{ "testl $src1, $src2" %}
16238 ins_encode %{
16239 __ testl($src1$$Register, $src2$$Register);
16240 %}
16241 ins_pipe(ialu_cr_reg_imm);
16242 %}
16243
16244 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16245 %{
16246 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16247
16248 format %{ "testl $src, $mem" %}
16249 ins_encode %{
16250 __ testl($src$$Register, $mem$$Address);
16251 %}
16252 ins_pipe(ialu_cr_reg_mem);
16253 %}
16254
16255 // Unsigned compare Instructions; really, same as signed except they
16256 // produce an rFlagsRegU instead of rFlagsReg.
16257 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16258 %{
16259 match(Set cr (CmpU op1 op2));
16260
16261 format %{ "cmpl $op1, $op2\t# unsigned" %}
16262 ins_encode %{
16263 __ cmpl($op1$$Register, $op2$$Register);
16264 %}
16265 ins_pipe(ialu_cr_reg_reg);
16266 %}
16267
16268 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16269 %{
16270 match(Set cr (CmpU op1 op2));
16271
16272 format %{ "cmpl $op1, $op2\t# unsigned" %}
16273 ins_encode %{
16274 __ cmpl($op1$$Register, $op2$$constant);
16275 %}
16276 ins_pipe(ialu_cr_reg_imm);
16277 %}
16278
16279 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16280 %{
16281 match(Set cr (CmpU op1 (LoadI op2)));
16282
16283 ins_cost(500); // XXX
16284 format %{ "cmpl $op1, $op2\t# unsigned" %}
16285 ins_encode %{
16286 __ cmpl($op1$$Register, $op2$$Address);
16287 %}
16288 ins_pipe(ialu_cr_reg_mem);
16289 %}
16290
16291 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16292 %{
16293 match(Set cr (CmpU src zero));
16294
16295 format %{ "testl $src, $src\t# unsigned" %}
16296 ins_encode %{
16297 __ testl($src$$Register, $src$$Register);
16298 %}
16299 ins_pipe(ialu_cr_reg_imm);
16300 %}
16301
16302 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16303 %{
16304 match(Set cr (CmpP op1 op2));
16305
16306 format %{ "cmpq $op1, $op2\t# ptr" %}
16307 ins_encode %{
16308 __ cmpq($op1$$Register, $op2$$Register);
16309 %}
16310 ins_pipe(ialu_cr_reg_reg);
16311 %}
16312
16313 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16314 %{
16315 match(Set cr (CmpP op1 (LoadP op2)));
16316 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16317
16318 ins_cost(500); // XXX
16319 format %{ "cmpq $op1, $op2\t# ptr" %}
16320 ins_encode %{
16321 __ cmpq($op1$$Register, $op2$$Address);
16322 %}
16323 ins_pipe(ialu_cr_reg_mem);
16324 %}
16325
16326 // XXX this is generalized by compP_rReg_mem???
16327 // Compare raw pointer (used in out-of-heap check).
16328 // Only works because non-oop pointers must be raw pointers
16329 // and raw pointers have no anti-dependencies.
16330 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16331 %{
16332 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16333 n->in(2)->as_Load()->barrier_data() == 0);
16334 match(Set cr (CmpP op1 (LoadP op2)));
16335
16336 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16337 ins_encode %{
16338 __ cmpq($op1$$Register, $op2$$Address);
16339 %}
16340 ins_pipe(ialu_cr_reg_mem);
16341 %}
16342
16343 // This will generate a signed flags result. This should be OK since
16344 // any compare to a zero should be eq/neq.
16345 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16346 %{
16347 match(Set cr (CmpP src zero));
16348
16349 format %{ "testq $src, $src\t# ptr" %}
16350 ins_encode %{
16351 __ testq($src$$Register, $src$$Register);
16352 %}
16353 ins_pipe(ialu_cr_reg_imm);
16354 %}
16355
16356 // This will generate a signed flags result. This should be OK since
16357 // any compare to a zero should be eq/neq.
16358 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16359 %{
16360 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16361 n->in(1)->as_Load()->barrier_data() == 0);
16362 match(Set cr (CmpP (LoadP op) zero));
16363
16364 ins_cost(500); // XXX
16365 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16366 ins_encode %{
16367 __ testq($op$$Address, 0xFFFFFFFF);
16368 %}
16369 ins_pipe(ialu_cr_reg_imm);
16370 %}
16371
16372 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16373 %{
16374 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16375 n->in(1)->as_Load()->barrier_data() == 0);
16376 match(Set cr (CmpP (LoadP mem) zero));
16377
16378 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16379 ins_encode %{
16380 __ cmpq(r12, $mem$$Address);
16381 %}
16382 ins_pipe(ialu_cr_reg_mem);
16383 %}
16384
16385 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16386 %{
16387 match(Set cr (CmpN op1 op2));
16388
16389 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16390 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16391 ins_pipe(ialu_cr_reg_reg);
16392 %}
16393
16394 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16395 %{
16396 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16397 match(Set cr (CmpN src (LoadN mem)));
16398
16399 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16400 ins_encode %{
16401 __ cmpl($src$$Register, $mem$$Address);
16402 %}
16403 ins_pipe(ialu_cr_reg_mem);
16404 %}
16405
16406 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16407 match(Set cr (CmpN op1 op2));
16408
16409 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16410 ins_encode %{
16411 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16412 %}
16413 ins_pipe(ialu_cr_reg_imm);
16414 %}
16415
16416 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16417 %{
16418 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16419 match(Set cr (CmpN src (LoadN mem)));
16420
16421 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16422 ins_encode %{
16423 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16424 %}
16425 ins_pipe(ialu_cr_reg_mem);
16426 %}
16427
16428 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16429 match(Set cr (CmpN op1 op2));
16430
16431 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16432 ins_encode %{
16433 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16434 %}
16435 ins_pipe(ialu_cr_reg_imm);
16436 %}
16437
16438 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16439 %{
16440 predicate(!UseCompactObjectHeaders);
16441 match(Set cr (CmpN src (LoadNKlass mem)));
16442
16443 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16444 ins_encode %{
16445 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16446 %}
16447 ins_pipe(ialu_cr_reg_mem);
16448 %}
16449
16450 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16451 match(Set cr (CmpN src zero));
16452
16453 format %{ "testl $src, $src\t# compressed ptr" %}
16454 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16455 ins_pipe(ialu_cr_reg_imm);
16456 %}
16457
16458 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16459 %{
16460 predicate(CompressedOops::base() != nullptr &&
16461 n->in(1)->as_Load()->barrier_data() == 0);
16462 match(Set cr (CmpN (LoadN mem) zero));
16463
16464 ins_cost(500); // XXX
16465 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16466 ins_encode %{
16467 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16468 %}
16469 ins_pipe(ialu_cr_reg_mem);
16470 %}
16471
16472 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16473 %{
16474 predicate(CompressedOops::base() == nullptr &&
16475 n->in(1)->as_Load()->barrier_data() == 0);
16476 match(Set cr (CmpN (LoadN mem) zero));
16477
16478 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16479 ins_encode %{
16480 __ cmpl(r12, $mem$$Address);
16481 %}
16482 ins_pipe(ialu_cr_reg_mem);
16483 %}
16484
16485 // Yanked all unsigned pointer compare operations.
16486 // Pointer compares are done with CmpP which is already unsigned.
16487
16488 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16489 %{
16490 match(Set cr (CmpL op1 op2));
16491
16492 format %{ "cmpq $op1, $op2" %}
16493 ins_encode %{
16494 __ cmpq($op1$$Register, $op2$$Register);
16495 %}
16496 ins_pipe(ialu_cr_reg_reg);
16497 %}
16498
16499 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16500 %{
16501 match(Set cr (CmpL op1 op2));
16502
16503 format %{ "cmpq $op1, $op2" %}
16504 ins_encode %{
16505 __ cmpq($op1$$Register, $op2$$constant);
16506 %}
16507 ins_pipe(ialu_cr_reg_imm);
16508 %}
16509
16510 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16511 %{
16512 match(Set cr (CmpL op1 (LoadL op2)));
16513
16514 format %{ "cmpq $op1, $op2" %}
16515 ins_encode %{
16516 __ cmpq($op1$$Register, $op2$$Address);
16517 %}
16518 ins_pipe(ialu_cr_reg_mem);
16519 %}
16520
16521 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16522 %{
16523 match(Set cr (CmpL src zero));
16524
16525 format %{ "testq $src, $src" %}
16526 ins_encode %{
16527 __ testq($src$$Register, $src$$Register);
16528 %}
16529 ins_pipe(ialu_cr_reg_imm);
16530 %}
16531
16532 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16533 %{
16534 match(Set cr (CmpL (AndL src con) zero));
16535
16536 format %{ "testq $src, $con\t# long" %}
16537 ins_encode %{
16538 __ testq($src$$Register, $con$$constant);
16539 %}
16540 ins_pipe(ialu_cr_reg_imm);
16541 %}
16542
16543 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16544 %{
16545 match(Set cr (CmpL (AndL src1 src2) zero));
16546
16547 format %{ "testq $src1, $src2\t# long" %}
16548 ins_encode %{
16549 __ testq($src1$$Register, $src2$$Register);
16550 %}
16551 ins_pipe(ialu_cr_reg_imm);
16552 %}
16553
16554 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16555 %{
16556 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16557
16558 format %{ "testq $src, $mem" %}
16559 ins_encode %{
16560 __ testq($src$$Register, $mem$$Address);
16561 %}
16562 ins_pipe(ialu_cr_reg_mem);
16563 %}
16564
16565 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16566 %{
16567 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16568
16569 format %{ "testq $src, $mem" %}
16570 ins_encode %{
16571 __ testq($src$$Register, $mem$$Address);
16572 %}
16573 ins_pipe(ialu_cr_reg_mem);
16574 %}
16575
16576 // Manifest a CmpU result in an integer register. Very painful.
16577 // This is the test to avoid.
16578 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16579 %{
16580 match(Set dst (CmpU3 src1 src2));
16581 effect(KILL flags);
16582
16583 ins_cost(275); // XXX
16584 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16585 "movl $dst, -1\n\t"
16586 "jb,u done\n\t"
16587 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16588 "done:" %}
16589 ins_encode %{
16590 Label done;
16591 __ cmpl($src1$$Register, $src2$$Register);
16592 __ movl($dst$$Register, -1);
16593 __ jccb(Assembler::below, done);
16594 __ setcc(Assembler::notZero, $dst$$Register);
16595 __ bind(done);
16596 %}
16597 ins_pipe(pipe_slow);
16598 %}
16599
16600 // Manifest a CmpL result in an integer register. Very painful.
16601 // This is the test to avoid.
16602 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16603 %{
16604 match(Set dst (CmpL3 src1 src2));
16605 effect(KILL flags);
16606
16607 ins_cost(275); // XXX
16608 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16609 "movl $dst, -1\n\t"
16610 "jl,s done\n\t"
16611 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16612 "done:" %}
16613 ins_encode %{
16614 Label done;
16615 __ cmpq($src1$$Register, $src2$$Register);
16616 __ movl($dst$$Register, -1);
16617 __ jccb(Assembler::less, done);
16618 __ setcc(Assembler::notZero, $dst$$Register);
16619 __ bind(done);
16620 %}
16621 ins_pipe(pipe_slow);
16622 %}
16623
16624 // Manifest a CmpUL result in an integer register. Very painful.
16625 // This is the test to avoid.
16626 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16627 %{
16628 match(Set dst (CmpUL3 src1 src2));
16629 effect(KILL flags);
16630
16631 ins_cost(275); // XXX
16632 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16633 "movl $dst, -1\n\t"
16634 "jb,u done\n\t"
16635 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16636 "done:" %}
16637 ins_encode %{
16638 Label done;
16639 __ cmpq($src1$$Register, $src2$$Register);
16640 __ movl($dst$$Register, -1);
16641 __ jccb(Assembler::below, done);
16642 __ setcc(Assembler::notZero, $dst$$Register);
16643 __ bind(done);
16644 %}
16645 ins_pipe(pipe_slow);
16646 %}
16647
16648 // Unsigned long compare Instructions; really, same as signed long except they
16649 // produce an rFlagsRegU instead of rFlagsReg.
16650 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16651 %{
16652 match(Set cr (CmpUL op1 op2));
16653
16654 format %{ "cmpq $op1, $op2\t# unsigned" %}
16655 ins_encode %{
16656 __ cmpq($op1$$Register, $op2$$Register);
16657 %}
16658 ins_pipe(ialu_cr_reg_reg);
16659 %}
16660
16661 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16662 %{
16663 match(Set cr (CmpUL op1 op2));
16664
16665 format %{ "cmpq $op1, $op2\t# unsigned" %}
16666 ins_encode %{
16667 __ cmpq($op1$$Register, $op2$$constant);
16668 %}
16669 ins_pipe(ialu_cr_reg_imm);
16670 %}
16671
16672 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16673 %{
16674 match(Set cr (CmpUL op1 (LoadL op2)));
16675
16676 format %{ "cmpq $op1, $op2\t# unsigned" %}
16677 ins_encode %{
16678 __ cmpq($op1$$Register, $op2$$Address);
16679 %}
16680 ins_pipe(ialu_cr_reg_mem);
16681 %}
16682
16683 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16684 %{
16685 match(Set cr (CmpUL src zero));
16686
16687 format %{ "testq $src, $src\t# unsigned" %}
16688 ins_encode %{
16689 __ testq($src$$Register, $src$$Register);
16690 %}
16691 ins_pipe(ialu_cr_reg_imm);
16692 %}
16693
16694 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16695 %{
16696 match(Set cr (CmpI (LoadB mem) imm));
16697
16698 ins_cost(125);
16699 format %{ "cmpb $mem, $imm" %}
16700 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16701 ins_pipe(ialu_cr_reg_mem);
16702 %}
16703
16704 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16705 %{
16706 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16707
16708 ins_cost(125);
16709 format %{ "testb $mem, $imm\t# ubyte" %}
16710 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16711 ins_pipe(ialu_cr_reg_mem);
16712 %}
16713
16714 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16715 %{
16716 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16717
16718 ins_cost(125);
16719 format %{ "testb $mem, $imm\t# byte" %}
16720 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16721 ins_pipe(ialu_cr_reg_mem);
16722 %}
16723
16724 //----------Max and Min--------------------------------------------------------
16725 // Min Instructions
16726
16727 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16728 %{
16729 predicate(!UseAPX);
16730 effect(USE_DEF dst, USE src, USE cr);
16731
16732 format %{ "cmovlgt $dst, $src\t# min" %}
16733 ins_encode %{
16734 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16735 %}
16736 ins_pipe(pipe_cmov_reg);
16737 %}
16738
16739 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16740 %{
16741 predicate(UseAPX);
16742 effect(DEF dst, USE src1, USE src2, USE cr);
16743
16744 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16745 ins_encode %{
16746 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16747 %}
16748 ins_pipe(pipe_cmov_reg);
16749 %}
16750
16751 instruct minI_rReg(rRegI dst, rRegI src)
16752 %{
16753 predicate(!UseAPX);
16754 match(Set dst (MinI dst src));
16755
16756 ins_cost(200);
16757 expand %{
16758 rFlagsReg cr;
16759 compI_rReg(cr, dst, src);
16760 cmovI_reg_g(dst, src, cr);
16761 %}
16762 %}
16763
16764 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16765 %{
16766 predicate(UseAPX);
16767 match(Set dst (MinI src1 src2));
16768 effect(DEF dst, USE src1, USE src2);
16769 flag(PD::Flag_ndd_demotable_opr1);
16770
16771 ins_cost(200);
16772 expand %{
16773 rFlagsReg cr;
16774 compI_rReg(cr, src1, src2);
16775 cmovI_reg_g_ndd(dst, src1, src2, cr);
16776 %}
16777 %}
16778
16779 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16780 %{
16781 predicate(!UseAPX);
16782 effect(USE_DEF dst, USE src, USE cr);
16783
16784 format %{ "cmovllt $dst, $src\t# max" %}
16785 ins_encode %{
16786 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16787 %}
16788 ins_pipe(pipe_cmov_reg);
16789 %}
16790
16791 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16792 %{
16793 predicate(UseAPX);
16794 effect(DEF dst, USE src1, USE src2, USE cr);
16795
16796 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16797 ins_encode %{
16798 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16799 %}
16800 ins_pipe(pipe_cmov_reg);
16801 %}
16802
16803 instruct maxI_rReg(rRegI dst, rRegI src)
16804 %{
16805 predicate(!UseAPX);
16806 match(Set dst (MaxI dst src));
16807
16808 ins_cost(200);
16809 expand %{
16810 rFlagsReg cr;
16811 compI_rReg(cr, dst, src);
16812 cmovI_reg_l(dst, src, cr);
16813 %}
16814 %}
16815
16816 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16817 %{
16818 predicate(UseAPX);
16819 match(Set dst (MaxI src1 src2));
16820 effect(DEF dst, USE src1, USE src2);
16821 flag(PD::Flag_ndd_demotable_opr1);
16822
16823 ins_cost(200);
16824 expand %{
16825 rFlagsReg cr;
16826 compI_rReg(cr, src1, src2);
16827 cmovI_reg_l_ndd(dst, src1, src2, cr);
16828 %}
16829 %}
16830
16831 // ============================================================================
16832 // Branch Instructions
16833
16834 // Jump Direct - Label defines a relative address from JMP+1
16835 instruct jmpDir(label labl)
16836 %{
16837 match(Goto);
16838 effect(USE labl);
16839
16840 ins_cost(300);
16841 format %{ "jmp $labl" %}
16842 size(5);
16843 ins_encode %{
16844 Label* L = $labl$$label;
16845 __ jmp(*L, false); // Always long jump
16846 %}
16847 ins_pipe(pipe_jmp);
16848 %}
16849
16850 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16851 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16852 %{
16853 match(If cop cr);
16854 effect(USE labl);
16855
16856 ins_cost(300);
16857 format %{ "j$cop $labl" %}
16858 size(6);
16859 ins_encode %{
16860 Label* L = $labl$$label;
16861 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16862 %}
16863 ins_pipe(pipe_jcc);
16864 %}
16865
16866 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16867 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16868 %{
16869 match(CountedLoopEnd cop cr);
16870 effect(USE labl);
16871
16872 ins_cost(300);
16873 format %{ "j$cop $labl\t# loop end" %}
16874 size(6);
16875 ins_encode %{
16876 Label* L = $labl$$label;
16877 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16878 %}
16879 ins_pipe(pipe_jcc);
16880 %}
16881
16882 // Jump Direct Conditional - using unsigned comparison
16883 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16884 match(If cop cmp);
16885 effect(USE labl);
16886
16887 ins_cost(300);
16888 format %{ "j$cop,u $labl" %}
16889 size(6);
16890 ins_encode %{
16891 Label* L = $labl$$label;
16892 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16893 %}
16894 ins_pipe(pipe_jcc);
16895 %}
16896
16897 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16898 match(If cop cmp);
16899 effect(USE labl);
16900
16901 ins_cost(200);
16902 format %{ "j$cop,u $labl" %}
16903 size(6);
16904 ins_encode %{
16905 Label* L = $labl$$label;
16906 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16907 %}
16908 ins_pipe(pipe_jcc);
16909 %}
16910
16911 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16912 match(If cop cmp);
16913 effect(USE labl);
16914
16915 ins_cost(200);
16916 format %{ $$template
16917 if ($cop$$cmpcode == Assembler::notEqual) {
16918 $$emit$$"jp,u $labl\n\t"
16919 $$emit$$"j$cop,u $labl"
16920 } else {
16921 $$emit$$"jp,u done\n\t"
16922 $$emit$$"j$cop,u $labl\n\t"
16923 $$emit$$"done:"
16924 }
16925 %}
16926 ins_encode %{
16927 Label* l = $labl$$label;
16928 if ($cop$$cmpcode == Assembler::notEqual) {
16929 __ jcc(Assembler::parity, *l, false);
16930 __ jcc(Assembler::notEqual, *l, false);
16931 } else if ($cop$$cmpcode == Assembler::equal) {
16932 Label done;
16933 __ jccb(Assembler::parity, done);
16934 __ jcc(Assembler::equal, *l, false);
16935 __ bind(done);
16936 } else {
16937 ShouldNotReachHere();
16938 }
16939 %}
16940 ins_pipe(pipe_jcc);
16941 %}
16942
16943 // Jump Direct Conditional - using signed and unsigned comparison
16944 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16945 match(If cop cmp);
16946 effect(USE labl);
16947
16948 ins_cost(200);
16949 format %{ "j$cop,su $labl" %}
16950 size(6);
16951 ins_encode %{
16952 Label* L = $labl$$label;
16953 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16954 %}
16955 ins_pipe(pipe_jcc);
16956 %}
16957
16958 // ============================================================================
16959 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16960 // superklass array for an instance of the superklass. Set a hidden
16961 // internal cache on a hit (cache is checked with exposed code in
16962 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16963 // encoding ALSO sets flags.
16964
16965 instruct partialSubtypeCheck(rdi_RegP result,
16966 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16967 rFlagsReg cr)
16968 %{
16969 match(Set result (PartialSubtypeCheck sub super));
16970 predicate(!UseSecondarySupersTable);
16971 effect(KILL rcx, KILL cr);
16972
16973 ins_cost(1100); // slightly larger than the next version
16974 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16975 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16976 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16977 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16978 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16979 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16980 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16981 "miss:\t" %}
16982
16983 ins_encode %{
16984 Label miss;
16985 // NB: Callers may assume that, when $result is a valid register,
16986 // check_klass_subtype_slow_path_linear sets it to a nonzero
16987 // value.
16988 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16989 $rcx$$Register, $result$$Register,
16990 nullptr, &miss,
16991 /*set_cond_codes:*/ true);
16992 __ xorptr($result$$Register, $result$$Register);
16993 __ bind(miss);
16994 %}
16995
16996 ins_pipe(pipe_slow);
16997 %}
16998
16999 // ============================================================================
17000 // Two versions of hashtable-based partialSubtypeCheck, both used when
17001 // we need to search for a super class in the secondary supers array.
17002 // The first is used when we don't know _a priori_ the class being
17003 // searched for. The second, far more common, is used when we do know:
17004 // this is used for instanceof, checkcast, and any case where C2 can
17005 // determine it by constant propagation.
17006
17007 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17008 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17009 rFlagsReg cr)
17010 %{
17011 match(Set result (PartialSubtypeCheck sub super));
17012 predicate(UseSecondarySupersTable);
17013 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17014
17015 ins_cost(1000);
17016 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17017
17018 ins_encode %{
17019 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17020 $temp3$$Register, $temp4$$Register, $result$$Register);
17021 %}
17022
17023 ins_pipe(pipe_slow);
17024 %}
17025
17026 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17027 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17028 rFlagsReg cr)
17029 %{
17030 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17031 predicate(UseSecondarySupersTable);
17032 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17033
17034 ins_cost(700); // smaller than the next version
17035 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17036
17037 ins_encode %{
17038 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17039 if (InlineSecondarySupersTest) {
17040 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17041 $temp3$$Register, $temp4$$Register, $result$$Register,
17042 super_klass_slot);
17043 } else {
17044 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17045 }
17046 %}
17047
17048 ins_pipe(pipe_slow);
17049 %}
17050
17051 // ============================================================================
17052 // Branch Instructions -- short offset versions
17053 //
17054 // These instructions are used to replace jumps of a long offset (the default
17055 // match) with jumps of a shorter offset. These instructions are all tagged
17056 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17057 // match rules in general matching. Instead, the ADLC generates a conversion
17058 // method in the MachNode which can be used to do in-place replacement of the
17059 // long variant with the shorter variant. The compiler will determine if a
17060 // branch can be taken by the is_short_branch_offset() predicate in the machine
17061 // specific code section of the file.
17062
17063 // Jump Direct - Label defines a relative address from JMP+1
17064 instruct jmpDir_short(label labl) %{
17065 match(Goto);
17066 effect(USE labl);
17067
17068 ins_cost(300);
17069 format %{ "jmp,s $labl" %}
17070 size(2);
17071 ins_encode %{
17072 Label* L = $labl$$label;
17073 __ jmpb(*L);
17074 %}
17075 ins_pipe(pipe_jmp);
17076 ins_short_branch(1);
17077 %}
17078
17079 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17080 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17081 match(If cop cr);
17082 effect(USE labl);
17083
17084 ins_cost(300);
17085 format %{ "j$cop,s $labl" %}
17086 size(2);
17087 ins_encode %{
17088 Label* L = $labl$$label;
17089 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17090 %}
17091 ins_pipe(pipe_jcc);
17092 ins_short_branch(1);
17093 %}
17094
17095 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17096 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17097 match(CountedLoopEnd cop cr);
17098 effect(USE labl);
17099
17100 ins_cost(300);
17101 format %{ "j$cop,s $labl\t# loop end" %}
17102 size(2);
17103 ins_encode %{
17104 Label* L = $labl$$label;
17105 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17106 %}
17107 ins_pipe(pipe_jcc);
17108 ins_short_branch(1);
17109 %}
17110
17111 // Jump Direct Conditional - using unsigned comparison
17112 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17113 match(If cop cmp);
17114 effect(USE labl);
17115
17116 ins_cost(300);
17117 format %{ "j$cop,us $labl" %}
17118 size(2);
17119 ins_encode %{
17120 Label* L = $labl$$label;
17121 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17122 %}
17123 ins_pipe(pipe_jcc);
17124 ins_short_branch(1);
17125 %}
17126
17127 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17128 match(If cop cmp);
17129 effect(USE labl);
17130
17131 ins_cost(300);
17132 format %{ "j$cop,us $labl" %}
17133 size(2);
17134 ins_encode %{
17135 Label* L = $labl$$label;
17136 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17137 %}
17138 ins_pipe(pipe_jcc);
17139 ins_short_branch(1);
17140 %}
17141
17142 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17143 match(If cop cmp);
17144 effect(USE labl);
17145
17146 ins_cost(300);
17147 format %{ $$template
17148 if ($cop$$cmpcode == Assembler::notEqual) {
17149 $$emit$$"jp,u,s $labl\n\t"
17150 $$emit$$"j$cop,u,s $labl"
17151 } else {
17152 $$emit$$"jp,u,s done\n\t"
17153 $$emit$$"j$cop,u,s $labl\n\t"
17154 $$emit$$"done:"
17155 }
17156 %}
17157 size(4);
17158 ins_encode %{
17159 Label* l = $labl$$label;
17160 if ($cop$$cmpcode == Assembler::notEqual) {
17161 __ jccb(Assembler::parity, *l);
17162 __ jccb(Assembler::notEqual, *l);
17163 } else if ($cop$$cmpcode == Assembler::equal) {
17164 Label done;
17165 __ jccb(Assembler::parity, done);
17166 __ jccb(Assembler::equal, *l);
17167 __ bind(done);
17168 } else {
17169 ShouldNotReachHere();
17170 }
17171 %}
17172 ins_pipe(pipe_jcc);
17173 ins_short_branch(1);
17174 %}
17175
17176 // Jump Direct Conditional - using signed and unsigned comparison
17177 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17178 match(If cop cmp);
17179 effect(USE labl);
17180
17181 ins_cost(300);
17182 format %{ "j$cop,sus $labl" %}
17183 size(2);
17184 ins_encode %{
17185 Label* L = $labl$$label;
17186 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17187 %}
17188 ins_pipe(pipe_jcc);
17189 ins_short_branch(1);
17190 %}
17191
17192 // ============================================================================
17193 // inlined locking and unlocking
17194
17195 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17196 match(Set cr (FastLock object box));
17197 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17198 ins_cost(300);
17199 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17200 ins_encode %{
17201 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17202 %}
17203 ins_pipe(pipe_slow);
17204 %}
17205
17206 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17207 match(Set cr (FastUnlock object rax_reg));
17208 effect(TEMP tmp, USE_KILL rax_reg);
17209 ins_cost(300);
17210 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17211 ins_encode %{
17212 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17213 %}
17214 ins_pipe(pipe_slow);
17215 %}
17216
17217
17218 // ============================================================================
17219 // Safepoint Instructions
17220 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17221 %{
17222 match(SafePoint poll);
17223 effect(KILL cr, USE poll);
17224
17225 format %{ "testl rax, [$poll]\t"
17226 "# Safepoint: poll for GC" %}
17227 ins_cost(125);
17228 ins_encode %{
17229 __ relocate(relocInfo::poll_type);
17230 address pre_pc = __ pc();
17231 __ testl(rax, Address($poll$$Register, 0));
17232 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17233 %}
17234 ins_pipe(ialu_reg_mem);
17235 %}
17236
17237 instruct mask_all_evexL(kReg dst, rRegL src) %{
17238 match(Set dst (MaskAll src));
17239 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17240 ins_encode %{
17241 int mask_len = Matcher::vector_length(this);
17242 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17243 %}
17244 ins_pipe( pipe_slow );
17245 %}
17246
17247 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17248 predicate(Matcher::vector_length(n) > 32);
17249 match(Set dst (MaskAll src));
17250 effect(TEMP tmp);
17251 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17252 ins_encode %{
17253 int mask_len = Matcher::vector_length(this);
17254 __ movslq($tmp$$Register, $src$$Register);
17255 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17256 %}
17257 ins_pipe( pipe_slow );
17258 %}
17259
17260 // ============================================================================
17261 // Procedure Call/Return Instructions
17262 // Call Java Static Instruction
17263 // Note: If this code changes, the corresponding ret_addr_offset() and
17264 // compute_padding() functions will have to be adjusted.
17265 instruct CallStaticJavaDirect(method meth) %{
17266 match(CallStaticJava);
17267 effect(USE meth);
17268
17269 ins_cost(300);
17270 format %{ "call,static " %}
17271 opcode(0xE8); /* E8 cd */
17272 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17273 ins_pipe(pipe_slow);
17274 ins_alignment(4);
17275 %}
17276
17277 // Call Java Dynamic Instruction
17278 // Note: If this code changes, the corresponding ret_addr_offset() and
17279 // compute_padding() functions will have to be adjusted.
17280 instruct CallDynamicJavaDirect(method meth)
17281 %{
17282 match(CallDynamicJava);
17283 effect(USE meth);
17284
17285 ins_cost(300);
17286 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17287 "call,dynamic " %}
17288 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17289 ins_pipe(pipe_slow);
17290 ins_alignment(4);
17291 %}
17292
17293 // Call Runtime Instruction
17294 instruct CallRuntimeDirect(method meth)
17295 %{
17296 match(CallRuntime);
17297 effect(USE meth);
17298
17299 ins_cost(300);
17300 format %{ "call,runtime " %}
17301 ins_encode(clear_avx, Java_To_Runtime(meth));
17302 ins_pipe(pipe_slow);
17303 %}
17304
17305 // Call runtime without safepoint
17306 instruct CallLeafDirect(method meth)
17307 %{
17308 match(CallLeaf);
17309 effect(USE meth);
17310
17311 ins_cost(300);
17312 format %{ "call_leaf,runtime " %}
17313 ins_encode(clear_avx, Java_To_Runtime(meth));
17314 ins_pipe(pipe_slow);
17315 %}
17316
17317 // Call runtime without safepoint and with vector arguments
17318 instruct CallLeafDirectVector(method meth)
17319 %{
17320 match(CallLeafVector);
17321 effect(USE meth);
17322
17323 ins_cost(300);
17324 format %{ "call_leaf,vector " %}
17325 ins_encode(Java_To_Runtime(meth));
17326 ins_pipe(pipe_slow);
17327 %}
17328
17329 // Call runtime without safepoint
17330 instruct CallLeafNoFPDirect(method meth)
17331 %{
17332 match(CallLeafNoFP);
17333 effect(USE meth);
17334
17335 ins_cost(300);
17336 format %{ "call_leaf_nofp,runtime " %}
17337 ins_encode(clear_avx, Java_To_Runtime(meth));
17338 ins_pipe(pipe_slow);
17339 %}
17340
17341 // Return Instruction
17342 // Remove the return address & jump to it.
17343 // Notice: We always emit a nop after a ret to make sure there is room
17344 // for safepoint patching
17345 instruct Ret()
17346 %{
17347 match(Return);
17348
17349 format %{ "ret" %}
17350 ins_encode %{
17351 __ ret(0);
17352 %}
17353 ins_pipe(pipe_jmp);
17354 %}
17355
17356 // Tail Call; Jump from runtime stub to Java code.
17357 // Also known as an 'interprocedural jump'.
17358 // Target of jump will eventually return to caller.
17359 // TailJump below removes the return address.
17360 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17361 // emitted just above the TailCall which has reset rbp to the caller state.
17362 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17363 %{
17364 match(TailCall jump_target method_ptr);
17365
17366 ins_cost(300);
17367 format %{ "jmp $jump_target\t# rbx holds method" %}
17368 ins_encode %{
17369 __ jmp($jump_target$$Register);
17370 %}
17371 ins_pipe(pipe_jmp);
17372 %}
17373
17374 // Tail Jump; remove the return address; jump to target.
17375 // TailCall above leaves the return address around.
17376 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17377 %{
17378 match(TailJump jump_target ex_oop);
17379
17380 ins_cost(300);
17381 format %{ "popq rdx\t# pop return address\n\t"
17382 "jmp $jump_target" %}
17383 ins_encode %{
17384 __ popq(as_Register(RDX_enc));
17385 __ jmp($jump_target$$Register);
17386 %}
17387 ins_pipe(pipe_jmp);
17388 %}
17389
17390 // Forward exception.
17391 instruct ForwardExceptionjmp()
17392 %{
17393 match(ForwardException);
17394
17395 format %{ "jmp forward_exception_stub" %}
17396 ins_encode %{
17397 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17398 %}
17399 ins_pipe(pipe_jmp);
17400 %}
17401
17402 // Create exception oop: created by stack-crawling runtime code.
17403 // Created exception is now available to this handler, and is setup
17404 // just prior to jumping to this handler. No code emitted.
17405 instruct CreateException(rax_RegP ex_oop)
17406 %{
17407 match(Set ex_oop (CreateEx));
17408
17409 size(0);
17410 // use the following format syntax
17411 format %{ "# exception oop is in rax; no code emitted" %}
17412 ins_encode();
17413 ins_pipe(empty);
17414 %}
17415
17416 // Rethrow exception:
17417 // The exception oop will come in the first argument position.
17418 // Then JUMP (not call) to the rethrow stub code.
17419 instruct RethrowException()
17420 %{
17421 match(Rethrow);
17422
17423 // use the following format syntax
17424 format %{ "jmp rethrow_stub" %}
17425 ins_encode %{
17426 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17427 %}
17428 ins_pipe(pipe_jmp);
17429 %}
17430
17431 // ============================================================================
17432 // This name is KNOWN by the ADLC and cannot be changed.
17433 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17434 // for this guy.
17435 instruct tlsLoadP(r15_RegP dst) %{
17436 match(Set dst (ThreadLocal));
17437 effect(DEF dst);
17438
17439 size(0);
17440 format %{ "# TLS is in R15" %}
17441 ins_encode( /*empty encoding*/ );
17442 ins_pipe(ialu_reg_reg);
17443 %}
17444
17445 instruct addF_reg(regF dst, regF src) %{
17446 predicate(UseAVX == 0);
17447 match(Set dst (AddF dst src));
17448
17449 format %{ "addss $dst, $src" %}
17450 ins_cost(150);
17451 ins_encode %{
17452 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17453 %}
17454 ins_pipe(pipe_slow);
17455 %}
17456
17457 instruct addF_mem(regF dst, memory src) %{
17458 predicate(UseAVX == 0);
17459 match(Set dst (AddF dst (LoadF src)));
17460
17461 format %{ "addss $dst, $src" %}
17462 ins_cost(150);
17463 ins_encode %{
17464 __ addss($dst$$XMMRegister, $src$$Address);
17465 %}
17466 ins_pipe(pipe_slow);
17467 %}
17468
17469 instruct addF_imm(regF dst, immF con) %{
17470 predicate(UseAVX == 0);
17471 match(Set dst (AddF dst con));
17472 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17473 ins_cost(150);
17474 ins_encode %{
17475 __ addss($dst$$XMMRegister, $constantaddress($con));
17476 %}
17477 ins_pipe(pipe_slow);
17478 %}
17479
17480 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17481 predicate(UseAVX > 0);
17482 match(Set dst (AddF src1 src2));
17483
17484 format %{ "vaddss $dst, $src1, $src2" %}
17485 ins_cost(150);
17486 ins_encode %{
17487 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17488 %}
17489 ins_pipe(pipe_slow);
17490 %}
17491
17492 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17493 predicate(UseAVX > 0);
17494 match(Set dst (AddF src1 (LoadF src2)));
17495
17496 format %{ "vaddss $dst, $src1, $src2" %}
17497 ins_cost(150);
17498 ins_encode %{
17499 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17500 %}
17501 ins_pipe(pipe_slow);
17502 %}
17503
17504 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17505 predicate(UseAVX > 0);
17506 match(Set dst (AddF src con));
17507
17508 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17509 ins_cost(150);
17510 ins_encode %{
17511 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17512 %}
17513 ins_pipe(pipe_slow);
17514 %}
17515
17516 instruct addD_reg(regD dst, regD src) %{
17517 predicate(UseAVX == 0);
17518 match(Set dst (AddD dst src));
17519
17520 format %{ "addsd $dst, $src" %}
17521 ins_cost(150);
17522 ins_encode %{
17523 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17524 %}
17525 ins_pipe(pipe_slow);
17526 %}
17527
17528 instruct addD_mem(regD dst, memory src) %{
17529 predicate(UseAVX == 0);
17530 match(Set dst (AddD dst (LoadD src)));
17531
17532 format %{ "addsd $dst, $src" %}
17533 ins_cost(150);
17534 ins_encode %{
17535 __ addsd($dst$$XMMRegister, $src$$Address);
17536 %}
17537 ins_pipe(pipe_slow);
17538 %}
17539
17540 instruct addD_imm(regD dst, immD con) %{
17541 predicate(UseAVX == 0);
17542 match(Set dst (AddD dst con));
17543 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17544 ins_cost(150);
17545 ins_encode %{
17546 __ addsd($dst$$XMMRegister, $constantaddress($con));
17547 %}
17548 ins_pipe(pipe_slow);
17549 %}
17550
17551 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17552 predicate(UseAVX > 0);
17553 match(Set dst (AddD src1 src2));
17554
17555 format %{ "vaddsd $dst, $src1, $src2" %}
17556 ins_cost(150);
17557 ins_encode %{
17558 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17559 %}
17560 ins_pipe(pipe_slow);
17561 %}
17562
17563 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17564 predicate(UseAVX > 0);
17565 match(Set dst (AddD src1 (LoadD src2)));
17566
17567 format %{ "vaddsd $dst, $src1, $src2" %}
17568 ins_cost(150);
17569 ins_encode %{
17570 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17571 %}
17572 ins_pipe(pipe_slow);
17573 %}
17574
17575 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17576 predicate(UseAVX > 0);
17577 match(Set dst (AddD src con));
17578
17579 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17580 ins_cost(150);
17581 ins_encode %{
17582 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17583 %}
17584 ins_pipe(pipe_slow);
17585 %}
17586
17587 instruct subF_reg(regF dst, regF src) %{
17588 predicate(UseAVX == 0);
17589 match(Set dst (SubF dst src));
17590
17591 format %{ "subss $dst, $src" %}
17592 ins_cost(150);
17593 ins_encode %{
17594 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17595 %}
17596 ins_pipe(pipe_slow);
17597 %}
17598
17599 instruct subF_mem(regF dst, memory src) %{
17600 predicate(UseAVX == 0);
17601 match(Set dst (SubF dst (LoadF src)));
17602
17603 format %{ "subss $dst, $src" %}
17604 ins_cost(150);
17605 ins_encode %{
17606 __ subss($dst$$XMMRegister, $src$$Address);
17607 %}
17608 ins_pipe(pipe_slow);
17609 %}
17610
17611 instruct subF_imm(regF dst, immF con) %{
17612 predicate(UseAVX == 0);
17613 match(Set dst (SubF dst con));
17614 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17615 ins_cost(150);
17616 ins_encode %{
17617 __ subss($dst$$XMMRegister, $constantaddress($con));
17618 %}
17619 ins_pipe(pipe_slow);
17620 %}
17621
17622 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17623 predicate(UseAVX > 0);
17624 match(Set dst (SubF src1 src2));
17625
17626 format %{ "vsubss $dst, $src1, $src2" %}
17627 ins_cost(150);
17628 ins_encode %{
17629 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17630 %}
17631 ins_pipe(pipe_slow);
17632 %}
17633
17634 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17635 predicate(UseAVX > 0);
17636 match(Set dst (SubF src1 (LoadF src2)));
17637
17638 format %{ "vsubss $dst, $src1, $src2" %}
17639 ins_cost(150);
17640 ins_encode %{
17641 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17642 %}
17643 ins_pipe(pipe_slow);
17644 %}
17645
17646 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17647 predicate(UseAVX > 0);
17648 match(Set dst (SubF src con));
17649
17650 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17651 ins_cost(150);
17652 ins_encode %{
17653 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17654 %}
17655 ins_pipe(pipe_slow);
17656 %}
17657
17658 instruct subD_reg(regD dst, regD src) %{
17659 predicate(UseAVX == 0);
17660 match(Set dst (SubD dst src));
17661
17662 format %{ "subsd $dst, $src" %}
17663 ins_cost(150);
17664 ins_encode %{
17665 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17666 %}
17667 ins_pipe(pipe_slow);
17668 %}
17669
17670 instruct subD_mem(regD dst, memory src) %{
17671 predicate(UseAVX == 0);
17672 match(Set dst (SubD dst (LoadD src)));
17673
17674 format %{ "subsd $dst, $src" %}
17675 ins_cost(150);
17676 ins_encode %{
17677 __ subsd($dst$$XMMRegister, $src$$Address);
17678 %}
17679 ins_pipe(pipe_slow);
17680 %}
17681
17682 instruct subD_imm(regD dst, immD con) %{
17683 predicate(UseAVX == 0);
17684 match(Set dst (SubD dst con));
17685 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17686 ins_cost(150);
17687 ins_encode %{
17688 __ subsd($dst$$XMMRegister, $constantaddress($con));
17689 %}
17690 ins_pipe(pipe_slow);
17691 %}
17692
17693 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17694 predicate(UseAVX > 0);
17695 match(Set dst (SubD src1 src2));
17696
17697 format %{ "vsubsd $dst, $src1, $src2" %}
17698 ins_cost(150);
17699 ins_encode %{
17700 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17701 %}
17702 ins_pipe(pipe_slow);
17703 %}
17704
17705 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17706 predicate(UseAVX > 0);
17707 match(Set dst (SubD src1 (LoadD src2)));
17708
17709 format %{ "vsubsd $dst, $src1, $src2" %}
17710 ins_cost(150);
17711 ins_encode %{
17712 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17713 %}
17714 ins_pipe(pipe_slow);
17715 %}
17716
17717 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17718 predicate(UseAVX > 0);
17719 match(Set dst (SubD src con));
17720
17721 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17722 ins_cost(150);
17723 ins_encode %{
17724 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17725 %}
17726 ins_pipe(pipe_slow);
17727 %}
17728
17729 instruct mulF_reg(regF dst, regF src) %{
17730 predicate(UseAVX == 0);
17731 match(Set dst (MulF dst src));
17732
17733 format %{ "mulss $dst, $src" %}
17734 ins_cost(150);
17735 ins_encode %{
17736 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17737 %}
17738 ins_pipe(pipe_slow);
17739 %}
17740
17741 instruct mulF_mem(regF dst, memory src) %{
17742 predicate(UseAVX == 0);
17743 match(Set dst (MulF dst (LoadF src)));
17744
17745 format %{ "mulss $dst, $src" %}
17746 ins_cost(150);
17747 ins_encode %{
17748 __ mulss($dst$$XMMRegister, $src$$Address);
17749 %}
17750 ins_pipe(pipe_slow);
17751 %}
17752
17753 instruct mulF_imm(regF dst, immF con) %{
17754 predicate(UseAVX == 0);
17755 match(Set dst (MulF dst con));
17756 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17757 ins_cost(150);
17758 ins_encode %{
17759 __ mulss($dst$$XMMRegister, $constantaddress($con));
17760 %}
17761 ins_pipe(pipe_slow);
17762 %}
17763
17764 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17765 predicate(UseAVX > 0);
17766 match(Set dst (MulF src1 src2));
17767
17768 format %{ "vmulss $dst, $src1, $src2" %}
17769 ins_cost(150);
17770 ins_encode %{
17771 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17772 %}
17773 ins_pipe(pipe_slow);
17774 %}
17775
17776 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17777 predicate(UseAVX > 0);
17778 match(Set dst (MulF src1 (LoadF src2)));
17779
17780 format %{ "vmulss $dst, $src1, $src2" %}
17781 ins_cost(150);
17782 ins_encode %{
17783 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17784 %}
17785 ins_pipe(pipe_slow);
17786 %}
17787
17788 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17789 predicate(UseAVX > 0);
17790 match(Set dst (MulF src con));
17791
17792 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17793 ins_cost(150);
17794 ins_encode %{
17795 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17796 %}
17797 ins_pipe(pipe_slow);
17798 %}
17799
17800 instruct mulD_reg(regD dst, regD src) %{
17801 predicate(UseAVX == 0);
17802 match(Set dst (MulD dst src));
17803
17804 format %{ "mulsd $dst, $src" %}
17805 ins_cost(150);
17806 ins_encode %{
17807 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17808 %}
17809 ins_pipe(pipe_slow);
17810 %}
17811
17812 instruct mulD_mem(regD dst, memory src) %{
17813 predicate(UseAVX == 0);
17814 match(Set dst (MulD dst (LoadD src)));
17815
17816 format %{ "mulsd $dst, $src" %}
17817 ins_cost(150);
17818 ins_encode %{
17819 __ mulsd($dst$$XMMRegister, $src$$Address);
17820 %}
17821 ins_pipe(pipe_slow);
17822 %}
17823
17824 instruct mulD_imm(regD dst, immD con) %{
17825 predicate(UseAVX == 0);
17826 match(Set dst (MulD dst con));
17827 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17828 ins_cost(150);
17829 ins_encode %{
17830 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17831 %}
17832 ins_pipe(pipe_slow);
17833 %}
17834
17835 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17836 predicate(UseAVX > 0);
17837 match(Set dst (MulD src1 src2));
17838
17839 format %{ "vmulsd $dst, $src1, $src2" %}
17840 ins_cost(150);
17841 ins_encode %{
17842 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17843 %}
17844 ins_pipe(pipe_slow);
17845 %}
17846
17847 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17848 predicate(UseAVX > 0);
17849 match(Set dst (MulD src1 (LoadD src2)));
17850
17851 format %{ "vmulsd $dst, $src1, $src2" %}
17852 ins_cost(150);
17853 ins_encode %{
17854 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17855 %}
17856 ins_pipe(pipe_slow);
17857 %}
17858
17859 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17860 predicate(UseAVX > 0);
17861 match(Set dst (MulD src con));
17862
17863 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17864 ins_cost(150);
17865 ins_encode %{
17866 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17867 %}
17868 ins_pipe(pipe_slow);
17869 %}
17870
17871 instruct divF_reg(regF dst, regF src) %{
17872 predicate(UseAVX == 0);
17873 match(Set dst (DivF dst src));
17874
17875 format %{ "divss $dst, $src" %}
17876 ins_cost(150);
17877 ins_encode %{
17878 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17879 %}
17880 ins_pipe(pipe_slow);
17881 %}
17882
17883 instruct divF_mem(regF dst, memory src) %{
17884 predicate(UseAVX == 0);
17885 match(Set dst (DivF dst (LoadF src)));
17886
17887 format %{ "divss $dst, $src" %}
17888 ins_cost(150);
17889 ins_encode %{
17890 __ divss($dst$$XMMRegister, $src$$Address);
17891 %}
17892 ins_pipe(pipe_slow);
17893 %}
17894
17895 instruct divF_imm(regF dst, immF con) %{
17896 predicate(UseAVX == 0);
17897 match(Set dst (DivF dst con));
17898 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17899 ins_cost(150);
17900 ins_encode %{
17901 __ divss($dst$$XMMRegister, $constantaddress($con));
17902 %}
17903 ins_pipe(pipe_slow);
17904 %}
17905
17906 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17907 predicate(UseAVX > 0);
17908 match(Set dst (DivF src1 src2));
17909
17910 format %{ "vdivss $dst, $src1, $src2" %}
17911 ins_cost(150);
17912 ins_encode %{
17913 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17914 %}
17915 ins_pipe(pipe_slow);
17916 %}
17917
17918 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17919 predicate(UseAVX > 0);
17920 match(Set dst (DivF src1 (LoadF src2)));
17921
17922 format %{ "vdivss $dst, $src1, $src2" %}
17923 ins_cost(150);
17924 ins_encode %{
17925 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17926 %}
17927 ins_pipe(pipe_slow);
17928 %}
17929
17930 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17931 predicate(UseAVX > 0);
17932 match(Set dst (DivF src con));
17933
17934 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17935 ins_cost(150);
17936 ins_encode %{
17937 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17938 %}
17939 ins_pipe(pipe_slow);
17940 %}
17941
17942 instruct divD_reg(regD dst, regD src) %{
17943 predicate(UseAVX == 0);
17944 match(Set dst (DivD dst src));
17945
17946 format %{ "divsd $dst, $src" %}
17947 ins_cost(150);
17948 ins_encode %{
17949 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17950 %}
17951 ins_pipe(pipe_slow);
17952 %}
17953
17954 instruct divD_mem(regD dst, memory src) %{
17955 predicate(UseAVX == 0);
17956 match(Set dst (DivD dst (LoadD src)));
17957
17958 format %{ "divsd $dst, $src" %}
17959 ins_cost(150);
17960 ins_encode %{
17961 __ divsd($dst$$XMMRegister, $src$$Address);
17962 %}
17963 ins_pipe(pipe_slow);
17964 %}
17965
17966 instruct divD_imm(regD dst, immD con) %{
17967 predicate(UseAVX == 0);
17968 match(Set dst (DivD dst con));
17969 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17970 ins_cost(150);
17971 ins_encode %{
17972 __ divsd($dst$$XMMRegister, $constantaddress($con));
17973 %}
17974 ins_pipe(pipe_slow);
17975 %}
17976
17977 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17978 predicate(UseAVX > 0);
17979 match(Set dst (DivD src1 src2));
17980
17981 format %{ "vdivsd $dst, $src1, $src2" %}
17982 ins_cost(150);
17983 ins_encode %{
17984 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17985 %}
17986 ins_pipe(pipe_slow);
17987 %}
17988
17989 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17990 predicate(UseAVX > 0);
17991 match(Set dst (DivD src1 (LoadD src2)));
17992
17993 format %{ "vdivsd $dst, $src1, $src2" %}
17994 ins_cost(150);
17995 ins_encode %{
17996 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17997 %}
17998 ins_pipe(pipe_slow);
17999 %}
18000
18001 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18002 predicate(UseAVX > 0);
18003 match(Set dst (DivD src con));
18004
18005 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18006 ins_cost(150);
18007 ins_encode %{
18008 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18009 %}
18010 ins_pipe(pipe_slow);
18011 %}
18012
18013 instruct absF_reg(regF dst) %{
18014 predicate(UseAVX == 0);
18015 match(Set dst (AbsF dst));
18016 ins_cost(150);
18017 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18018 ins_encode %{
18019 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18020 %}
18021 ins_pipe(pipe_slow);
18022 %}
18023
18024 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18025 predicate(UseAVX > 0);
18026 match(Set dst (AbsF src));
18027 ins_cost(150);
18028 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18029 ins_encode %{
18030 int vlen_enc = Assembler::AVX_128bit;
18031 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18032 ExternalAddress(float_signmask()), vlen_enc);
18033 %}
18034 ins_pipe(pipe_slow);
18035 %}
18036
18037 instruct absD_reg(regD dst) %{
18038 predicate(UseAVX == 0);
18039 match(Set dst (AbsD dst));
18040 ins_cost(150);
18041 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18042 "# abs double by sign masking" %}
18043 ins_encode %{
18044 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18045 %}
18046 ins_pipe(pipe_slow);
18047 %}
18048
18049 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18050 predicate(UseAVX > 0);
18051 match(Set dst (AbsD src));
18052 ins_cost(150);
18053 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18054 "# abs double by sign masking" %}
18055 ins_encode %{
18056 int vlen_enc = Assembler::AVX_128bit;
18057 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18058 ExternalAddress(double_signmask()), vlen_enc);
18059 %}
18060 ins_pipe(pipe_slow);
18061 %}
18062
18063 instruct negF_reg(regF dst) %{
18064 predicate(UseAVX == 0);
18065 match(Set dst (NegF dst));
18066 ins_cost(150);
18067 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18068 ins_encode %{
18069 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18070 %}
18071 ins_pipe(pipe_slow);
18072 %}
18073
18074 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18075 predicate(UseAVX > 0);
18076 match(Set dst (NegF src));
18077 ins_cost(150);
18078 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18079 ins_encode %{
18080 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18081 ExternalAddress(float_signflip()));
18082 %}
18083 ins_pipe(pipe_slow);
18084 %}
18085
18086 instruct negD_reg(regD dst) %{
18087 predicate(UseAVX == 0);
18088 match(Set dst (NegD dst));
18089 ins_cost(150);
18090 format %{ "xorpd $dst, [0x8000000000000000]\t"
18091 "# neg double by sign flipping" %}
18092 ins_encode %{
18093 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18094 %}
18095 ins_pipe(pipe_slow);
18096 %}
18097
18098 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18099 predicate(UseAVX > 0);
18100 match(Set dst (NegD src));
18101 ins_cost(150);
18102 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18103 "# neg double by sign flipping" %}
18104 ins_encode %{
18105 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18106 ExternalAddress(double_signflip()));
18107 %}
18108 ins_pipe(pipe_slow);
18109 %}
18110
18111 // sqrtss instruction needs destination register to be pre initialized for best performance
18112 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18113 instruct sqrtF_reg(regF dst) %{
18114 match(Set dst (SqrtF dst));
18115 format %{ "sqrtss $dst, $dst" %}
18116 ins_encode %{
18117 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18118 %}
18119 ins_pipe(pipe_slow);
18120 %}
18121
18122 // sqrtsd instruction needs destination register to be pre initialized for best performance
18123 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18124 instruct sqrtD_reg(regD dst) %{
18125 match(Set dst (SqrtD dst));
18126 format %{ "sqrtsd $dst, $dst" %}
18127 ins_encode %{
18128 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18129 %}
18130 ins_pipe(pipe_slow);
18131 %}
18132
18133 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18134 effect(TEMP tmp);
18135 match(Set dst (ConvF2HF src));
18136 ins_cost(125);
18137 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18138 ins_encode %{
18139 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18140 %}
18141 ins_pipe( pipe_slow );
18142 %}
18143
18144 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18145 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18146 effect(TEMP ktmp, TEMP rtmp);
18147 match(Set mem (StoreC mem (ConvF2HF src)));
18148 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18149 ins_encode %{
18150 __ movl($rtmp$$Register, 0x1);
18151 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18152 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18153 %}
18154 ins_pipe( pipe_slow );
18155 %}
18156
18157 instruct vconvF2HF(vec dst, vec src) %{
18158 match(Set dst (VectorCastF2HF src));
18159 format %{ "vector_conv_F2HF $dst $src" %}
18160 ins_encode %{
18161 int vlen_enc = vector_length_encoding(this, $src);
18162 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18163 %}
18164 ins_pipe( pipe_slow );
18165 %}
18166
18167 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18168 predicate(n->as_StoreVector()->memory_size() >= 16);
18169 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18170 format %{ "vcvtps2ph $mem,$src" %}
18171 ins_encode %{
18172 int vlen_enc = vector_length_encoding(this, $src);
18173 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18174 %}
18175 ins_pipe( pipe_slow );
18176 %}
18177
18178 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18179 match(Set dst (ConvHF2F src));
18180 format %{ "vcvtph2ps $dst,$src" %}
18181 ins_encode %{
18182 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18183 %}
18184 ins_pipe( pipe_slow );
18185 %}
18186
18187 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18188 match(Set dst (VectorCastHF2F (LoadVector mem)));
18189 format %{ "vcvtph2ps $dst,$mem" %}
18190 ins_encode %{
18191 int vlen_enc = vector_length_encoding(this);
18192 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18193 %}
18194 ins_pipe( pipe_slow );
18195 %}
18196
18197 instruct vconvHF2F(vec dst, vec src) %{
18198 match(Set dst (VectorCastHF2F src));
18199 ins_cost(125);
18200 format %{ "vector_conv_HF2F $dst,$src" %}
18201 ins_encode %{
18202 int vlen_enc = vector_length_encoding(this);
18203 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18204 %}
18205 ins_pipe( pipe_slow );
18206 %}
18207
18208 // ---------------------------------------- VectorReinterpret ------------------------------------
18209 instruct reinterpret_mask(kReg dst) %{
18210 predicate(n->bottom_type()->isa_vectmask() &&
18211 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18212 match(Set dst (VectorReinterpret dst));
18213 ins_cost(125);
18214 format %{ "vector_reinterpret $dst\t!" %}
18215 ins_encode %{
18216 // empty
18217 %}
18218 ins_pipe( pipe_slow );
18219 %}
18220
18221 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18222 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18223 n->bottom_type()->isa_vectmask() &&
18224 n->in(1)->bottom_type()->isa_vectmask() &&
18225 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18226 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18227 match(Set dst (VectorReinterpret src));
18228 effect(TEMP xtmp);
18229 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18230 ins_encode %{
18231 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18232 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18233 assert(src_sz == dst_sz , "src and dst size mismatch");
18234 int vlen_enc = vector_length_encoding(src_sz);
18235 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18236 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18237 %}
18238 ins_pipe( pipe_slow );
18239 %}
18240
18241 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18242 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18243 n->bottom_type()->isa_vectmask() &&
18244 n->in(1)->bottom_type()->isa_vectmask() &&
18245 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18246 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18247 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18248 match(Set dst (VectorReinterpret src));
18249 effect(TEMP xtmp);
18250 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18251 ins_encode %{
18252 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18253 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18254 assert(src_sz == dst_sz , "src and dst size mismatch");
18255 int vlen_enc = vector_length_encoding(src_sz);
18256 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18257 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18258 %}
18259 ins_pipe( pipe_slow );
18260 %}
18261
18262 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18263 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18264 n->bottom_type()->isa_vectmask() &&
18265 n->in(1)->bottom_type()->isa_vectmask() &&
18266 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18267 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18268 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18269 match(Set dst (VectorReinterpret src));
18270 effect(TEMP xtmp);
18271 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18272 ins_encode %{
18273 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18274 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18275 assert(src_sz == dst_sz , "src and dst size mismatch");
18276 int vlen_enc = vector_length_encoding(src_sz);
18277 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18278 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18279 %}
18280 ins_pipe( pipe_slow );
18281 %}
18282
18283 instruct reinterpret(vec dst) %{
18284 predicate(!n->bottom_type()->isa_vectmask() &&
18285 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18286 match(Set dst (VectorReinterpret dst));
18287 ins_cost(125);
18288 format %{ "vector_reinterpret $dst\t!" %}
18289 ins_encode %{
18290 // empty
18291 %}
18292 ins_pipe( pipe_slow );
18293 %}
18294
18295 instruct reinterpret_expand(vec dst, vec src) %{
18296 predicate(UseAVX == 0 &&
18297 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18298 match(Set dst (VectorReinterpret src));
18299 ins_cost(125);
18300 effect(TEMP dst);
18301 format %{ "vector_reinterpret_expand $dst,$src" %}
18302 ins_encode %{
18303 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18304 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18305
18306 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18307 if (src_vlen_in_bytes == 4) {
18308 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18309 } else {
18310 assert(src_vlen_in_bytes == 8, "");
18311 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18312 }
18313 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18314 %}
18315 ins_pipe( pipe_slow );
18316 %}
18317
18318 instruct vreinterpret_expand4(legVec dst, vec src) %{
18319 predicate(UseAVX > 0 &&
18320 !n->bottom_type()->isa_vectmask() &&
18321 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18322 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18323 match(Set dst (VectorReinterpret src));
18324 ins_cost(125);
18325 format %{ "vector_reinterpret_expand $dst,$src" %}
18326 ins_encode %{
18327 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18328 %}
18329 ins_pipe( pipe_slow );
18330 %}
18331
18332
18333 instruct vreinterpret_expand(legVec dst, vec src) %{
18334 predicate(UseAVX > 0 &&
18335 !n->bottom_type()->isa_vectmask() &&
18336 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18337 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18338 match(Set dst (VectorReinterpret src));
18339 ins_cost(125);
18340 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18341 ins_encode %{
18342 switch (Matcher::vector_length_in_bytes(this, $src)) {
18343 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18344 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18345 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18346 default: ShouldNotReachHere();
18347 }
18348 %}
18349 ins_pipe( pipe_slow );
18350 %}
18351
18352 instruct reinterpret_shrink(vec dst, legVec src) %{
18353 predicate(!n->bottom_type()->isa_vectmask() &&
18354 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18355 match(Set dst (VectorReinterpret src));
18356 ins_cost(125);
18357 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18358 ins_encode %{
18359 switch (Matcher::vector_length_in_bytes(this)) {
18360 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18361 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18362 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18363 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18364 default: ShouldNotReachHere();
18365 }
18366 %}
18367 ins_pipe( pipe_slow );
18368 %}
18369
18370 // ----------------------------------------------------------------------------------------------------
18371
18372 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18373 match(Set dst (RoundDoubleMode src rmode));
18374 format %{ "roundsd $dst,$src" %}
18375 ins_cost(150);
18376 ins_encode %{
18377 assert(UseSSE >= 4, "required");
18378 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18379 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18380 }
18381 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18382 %}
18383 ins_pipe(pipe_slow);
18384 %}
18385
18386 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18387 match(Set dst (RoundDoubleMode con rmode));
18388 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18389 ins_cost(150);
18390 ins_encode %{
18391 assert(UseSSE >= 4, "required");
18392 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18393 %}
18394 ins_pipe(pipe_slow);
18395 %}
18396
18397 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18398 predicate(Matcher::vector_length(n) < 8);
18399 match(Set dst (RoundDoubleModeV src rmode));
18400 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18401 ins_encode %{
18402 assert(UseAVX > 0, "required");
18403 int vlen_enc = vector_length_encoding(this);
18404 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18405 %}
18406 ins_pipe( pipe_slow );
18407 %}
18408
18409 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18410 predicate(Matcher::vector_length(n) == 8);
18411 match(Set dst (RoundDoubleModeV src rmode));
18412 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18413 ins_encode %{
18414 assert(UseAVX > 2, "required");
18415 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18416 %}
18417 ins_pipe( pipe_slow );
18418 %}
18419
18420 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18421 predicate(Matcher::vector_length(n) < 8);
18422 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18423 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18424 ins_encode %{
18425 assert(UseAVX > 0, "required");
18426 int vlen_enc = vector_length_encoding(this);
18427 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18428 %}
18429 ins_pipe( pipe_slow );
18430 %}
18431
18432 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18433 predicate(Matcher::vector_length(n) == 8);
18434 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18435 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18436 ins_encode %{
18437 assert(UseAVX > 2, "required");
18438 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18439 %}
18440 ins_pipe( pipe_slow );
18441 %}
18442
18443 instruct onspinwait() %{
18444 match(OnSpinWait);
18445 ins_cost(200);
18446
18447 format %{
18448 $$template
18449 $$emit$$"pause\t! membar_onspinwait"
18450 %}
18451 ins_encode %{
18452 __ pause();
18453 %}
18454 ins_pipe(pipe_slow);
18455 %}
18456
18457 // a * b + c
18458 instruct fmaD_reg(regD a, regD b, regD c) %{
18459 match(Set c (FmaD c (Binary a b)));
18460 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18461 ins_cost(150);
18462 ins_encode %{
18463 assert(UseFMA, "Needs FMA instructions support.");
18464 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18465 %}
18466 ins_pipe( pipe_slow );
18467 %}
18468
18469 // a * b + c
18470 instruct fmaF_reg(regF a, regF b, regF c) %{
18471 match(Set c (FmaF c (Binary a b)));
18472 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18473 ins_cost(150);
18474 ins_encode %{
18475 assert(UseFMA, "Needs FMA instructions support.");
18476 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18477 %}
18478 ins_pipe( pipe_slow );
18479 %}
18480
18481 // ====================VECTOR INSTRUCTIONS=====================================
18482
18483 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18484 instruct MoveVec2Leg(legVec dst, vec src) %{
18485 match(Set dst src);
18486 format %{ "" %}
18487 ins_encode %{
18488 ShouldNotReachHere();
18489 %}
18490 ins_pipe( fpu_reg_reg );
18491 %}
18492
18493 instruct MoveLeg2Vec(vec dst, legVec src) %{
18494 match(Set dst src);
18495 format %{ "" %}
18496 ins_encode %{
18497 ShouldNotReachHere();
18498 %}
18499 ins_pipe( fpu_reg_reg );
18500 %}
18501
18502 // ============================================================================
18503
18504 // Load vectors generic operand pattern
18505 instruct loadV(vec dst, memory mem) %{
18506 match(Set dst (LoadVector mem));
18507 ins_cost(125);
18508 format %{ "load_vector $dst,$mem" %}
18509 ins_encode %{
18510 BasicType bt = Matcher::vector_element_basic_type(this);
18511 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18512 %}
18513 ins_pipe( pipe_slow );
18514 %}
18515
18516 // Store vectors generic operand pattern.
18517 instruct storeV(memory mem, vec src) %{
18518 match(Set mem (StoreVector mem src));
18519 ins_cost(145);
18520 format %{ "store_vector $mem,$src\n\t" %}
18521 ins_encode %{
18522 switch (Matcher::vector_length_in_bytes(this, $src)) {
18523 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18524 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18525 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18526 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18527 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18528 default: ShouldNotReachHere();
18529 }
18530 %}
18531 ins_pipe( pipe_slow );
18532 %}
18533
18534 // ---------------------------------------- Gather ------------------------------------
18535
18536 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18537
18538 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18539 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18540 Matcher::vector_length_in_bytes(n) <= 32);
18541 match(Set dst (LoadVectorGather mem idx));
18542 effect(TEMP dst, TEMP tmp, TEMP mask);
18543 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18544 ins_encode %{
18545 int vlen_enc = vector_length_encoding(this);
18546 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18547 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18548 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18549 __ lea($tmp$$Register, $mem$$Address);
18550 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18551 %}
18552 ins_pipe( pipe_slow );
18553 %}
18554
18555
18556 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18557 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18558 !is_subword_type(Matcher::vector_element_basic_type(n)));
18559 match(Set dst (LoadVectorGather mem idx));
18560 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18561 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18562 ins_encode %{
18563 int vlen_enc = vector_length_encoding(this);
18564 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18565 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18566 __ lea($tmp$$Register, $mem$$Address);
18567 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18568 %}
18569 ins_pipe( pipe_slow );
18570 %}
18571
18572 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18573 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18574 !is_subword_type(Matcher::vector_element_basic_type(n)));
18575 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18576 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18577 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18578 ins_encode %{
18579 assert(UseAVX > 2, "sanity");
18580 int vlen_enc = vector_length_encoding(this);
18581 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18582 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18583 // Note: Since gather instruction partially updates the opmask register used
18584 // for predication hense moving mask operand to a temporary.
18585 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18586 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18587 __ lea($tmp$$Register, $mem$$Address);
18588 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18589 %}
18590 ins_pipe( pipe_slow );
18591 %}
18592
18593 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18594 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18595 match(Set dst (LoadVectorGather mem idx_base));
18596 effect(TEMP tmp, TEMP rtmp);
18597 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18598 ins_encode %{
18599 int vlen_enc = vector_length_encoding(this);
18600 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18601 __ lea($tmp$$Register, $mem$$Address);
18602 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18603 %}
18604 ins_pipe( pipe_slow );
18605 %}
18606
18607 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18608 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18609 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18610 match(Set dst (LoadVectorGather mem idx_base));
18611 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18612 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18613 ins_encode %{
18614 int vlen_enc = vector_length_encoding(this);
18615 int vector_len = Matcher::vector_length(this);
18616 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18617 __ lea($tmp$$Register, $mem$$Address);
18618 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18619 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18620 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18621 %}
18622 ins_pipe( pipe_slow );
18623 %}
18624
18625 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18626 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18627 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18628 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18629 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18630 ins_encode %{
18631 int vlen_enc = vector_length_encoding(this);
18632 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18633 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18634 __ lea($tmp$$Register, $mem$$Address);
18635 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18636 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18637 %}
18638 ins_pipe( pipe_slow );
18639 %}
18640
18641 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18642 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18643 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18644 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18645 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18646 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18647 ins_encode %{
18648 int vlen_enc = vector_length_encoding(this);
18649 int vector_len = Matcher::vector_length(this);
18650 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18651 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18652 __ lea($tmp$$Register, $mem$$Address);
18653 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18654 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18655 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18656 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18657 %}
18658 ins_pipe( pipe_slow );
18659 %}
18660
18661 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18662 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18663 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18664 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18665 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18666 ins_encode %{
18667 int vlen_enc = vector_length_encoding(this);
18668 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18669 __ lea($tmp$$Register, $mem$$Address);
18670 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18671 if (elem_bt == T_SHORT) {
18672 __ movl($mask_idx$$Register, 0x55555555);
18673 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18674 }
18675 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18676 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18677 %}
18678 ins_pipe( pipe_slow );
18679 %}
18680
18681 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18682 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18683 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18684 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18685 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18686 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18687 ins_encode %{
18688 int vlen_enc = vector_length_encoding(this);
18689 int vector_len = Matcher::vector_length(this);
18690 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18691 __ lea($tmp$$Register, $mem$$Address);
18692 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18693 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18694 if (elem_bt == T_SHORT) {
18695 __ movl($mask_idx$$Register, 0x55555555);
18696 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18697 }
18698 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18699 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18700 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18701 %}
18702 ins_pipe( pipe_slow );
18703 %}
18704
18705 // ====================Scatter=======================================
18706
18707 // Scatter INT, LONG, FLOAT, DOUBLE
18708
18709 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18710 predicate(UseAVX > 2);
18711 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18712 effect(TEMP tmp, TEMP ktmp);
18713 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18714 ins_encode %{
18715 int vlen_enc = vector_length_encoding(this, $src);
18716 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18717
18718 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18719 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18720
18721 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18722 __ lea($tmp$$Register, $mem$$Address);
18723 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18724 %}
18725 ins_pipe( pipe_slow );
18726 %}
18727
18728 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18729 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18730 effect(TEMP tmp, TEMP ktmp);
18731 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18732 ins_encode %{
18733 int vlen_enc = vector_length_encoding(this, $src);
18734 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18735 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18736 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18737 // Note: Since scatter instruction partially updates the opmask register used
18738 // for predication hense moving mask operand to a temporary.
18739 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18740 __ lea($tmp$$Register, $mem$$Address);
18741 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18742 %}
18743 ins_pipe( pipe_slow );
18744 %}
18745
18746 // ====================REPLICATE=======================================
18747
18748 // Replicate byte scalar to be vector
18749 instruct vReplB_reg(vec dst, rRegI src) %{
18750 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18751 match(Set dst (Replicate src));
18752 format %{ "replicateB $dst,$src" %}
18753 ins_encode %{
18754 uint vlen = Matcher::vector_length(this);
18755 if (UseAVX >= 2) {
18756 int vlen_enc = vector_length_encoding(this);
18757 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18758 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18759 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18760 } else {
18761 __ movdl($dst$$XMMRegister, $src$$Register);
18762 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18763 }
18764 } else {
18765 assert(UseAVX < 2, "");
18766 __ movdl($dst$$XMMRegister, $src$$Register);
18767 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18768 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18769 if (vlen >= 16) {
18770 assert(vlen == 16, "");
18771 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18772 }
18773 }
18774 %}
18775 ins_pipe( pipe_slow );
18776 %}
18777
18778 instruct ReplB_mem(vec dst, memory mem) %{
18779 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18780 match(Set dst (Replicate (LoadB mem)));
18781 format %{ "replicateB $dst,$mem" %}
18782 ins_encode %{
18783 int vlen_enc = vector_length_encoding(this);
18784 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18785 %}
18786 ins_pipe( pipe_slow );
18787 %}
18788
18789 // ====================ReplicateS=======================================
18790
18791 instruct vReplS_reg(vec dst, rRegI src) %{
18792 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18793 match(Set dst (Replicate src));
18794 format %{ "replicateS $dst,$src" %}
18795 ins_encode %{
18796 uint vlen = Matcher::vector_length(this);
18797 int vlen_enc = vector_length_encoding(this);
18798 if (UseAVX >= 2) {
18799 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18800 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18801 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18802 } else {
18803 __ movdl($dst$$XMMRegister, $src$$Register);
18804 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18805 }
18806 } else {
18807 assert(UseAVX < 2, "");
18808 __ movdl($dst$$XMMRegister, $src$$Register);
18809 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18810 if (vlen >= 8) {
18811 assert(vlen == 8, "");
18812 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18813 }
18814 }
18815 %}
18816 ins_pipe( pipe_slow );
18817 %}
18818
18819 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18820 match(Set dst (Replicate con));
18821 effect(TEMP rtmp);
18822 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18823 ins_encode %{
18824 int vlen_enc = vector_length_encoding(this);
18825 BasicType bt = Matcher::vector_element_basic_type(this);
18826 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18827 __ movl($rtmp$$Register, $con$$constant);
18828 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18829 %}
18830 ins_pipe( pipe_slow );
18831 %}
18832
18833 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18834 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18835 match(Set dst (Replicate src));
18836 effect(TEMP rtmp);
18837 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18838 ins_encode %{
18839 int vlen_enc = vector_length_encoding(this);
18840 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18841 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18842 %}
18843 ins_pipe( pipe_slow );
18844 %}
18845
18846 instruct ReplS_mem(vec dst, memory mem) %{
18847 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18848 match(Set dst (Replicate (LoadS mem)));
18849 format %{ "replicateS $dst,$mem" %}
18850 ins_encode %{
18851 int vlen_enc = vector_length_encoding(this);
18852 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18853 %}
18854 ins_pipe( pipe_slow );
18855 %}
18856
18857 // ====================ReplicateI=======================================
18858
18859 instruct ReplI_reg(vec dst, rRegI src) %{
18860 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18861 match(Set dst (Replicate src));
18862 format %{ "replicateI $dst,$src" %}
18863 ins_encode %{
18864 uint vlen = Matcher::vector_length(this);
18865 int vlen_enc = vector_length_encoding(this);
18866 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18867 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18868 } else if (VM_Version::supports_avx2()) {
18869 __ movdl($dst$$XMMRegister, $src$$Register);
18870 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18871 } else {
18872 __ movdl($dst$$XMMRegister, $src$$Register);
18873 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18874 }
18875 %}
18876 ins_pipe( pipe_slow );
18877 %}
18878
18879 instruct ReplI_mem(vec dst, memory mem) %{
18880 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18881 match(Set dst (Replicate (LoadI mem)));
18882 format %{ "replicateI $dst,$mem" %}
18883 ins_encode %{
18884 int vlen_enc = vector_length_encoding(this);
18885 if (VM_Version::supports_avx2()) {
18886 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18887 } else if (VM_Version::supports_avx()) {
18888 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18889 } else {
18890 __ movdl($dst$$XMMRegister, $mem$$Address);
18891 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18892 }
18893 %}
18894 ins_pipe( pipe_slow );
18895 %}
18896
18897 instruct ReplI_imm(vec dst, immI con) %{
18898 predicate(Matcher::is_non_long_integral_vector(n));
18899 match(Set dst (Replicate con));
18900 format %{ "replicateI $dst,$con" %}
18901 ins_encode %{
18902 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18903 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18904 type2aelembytes(Matcher::vector_element_basic_type(this))));
18905 BasicType bt = Matcher::vector_element_basic_type(this);
18906 int vlen = Matcher::vector_length_in_bytes(this);
18907 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18908 %}
18909 ins_pipe( pipe_slow );
18910 %}
18911
18912 // Replicate scalar zero to be vector
18913 instruct ReplI_zero(vec dst, immI_0 zero) %{
18914 predicate(Matcher::is_non_long_integral_vector(n));
18915 match(Set dst (Replicate zero));
18916 format %{ "replicateI $dst,$zero" %}
18917 ins_encode %{
18918 int vlen_enc = vector_length_encoding(this);
18919 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18920 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18921 } else {
18922 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18923 }
18924 %}
18925 ins_pipe( fpu_reg_reg );
18926 %}
18927
18928 instruct ReplI_M1(vec dst, immI_M1 con) %{
18929 predicate(Matcher::is_non_long_integral_vector(n));
18930 match(Set dst (Replicate con));
18931 format %{ "vallones $dst" %}
18932 ins_encode %{
18933 int vector_len = vector_length_encoding(this);
18934 __ vallones($dst$$XMMRegister, vector_len);
18935 %}
18936 ins_pipe( pipe_slow );
18937 %}
18938
18939 // ====================ReplicateL=======================================
18940
18941 // Replicate long (8 byte) scalar to be vector
18942 instruct ReplL_reg(vec dst, rRegL src) %{
18943 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18944 match(Set dst (Replicate src));
18945 format %{ "replicateL $dst,$src" %}
18946 ins_encode %{
18947 int vlen = Matcher::vector_length(this);
18948 int vlen_enc = vector_length_encoding(this);
18949 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18950 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18951 } else if (VM_Version::supports_avx2()) {
18952 __ movdq($dst$$XMMRegister, $src$$Register);
18953 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18954 } else {
18955 __ movdq($dst$$XMMRegister, $src$$Register);
18956 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18957 }
18958 %}
18959 ins_pipe( pipe_slow );
18960 %}
18961
18962 instruct ReplL_mem(vec dst, memory mem) %{
18963 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18964 match(Set dst (Replicate (LoadL mem)));
18965 format %{ "replicateL $dst,$mem" %}
18966 ins_encode %{
18967 int vlen_enc = vector_length_encoding(this);
18968 if (VM_Version::supports_avx2()) {
18969 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18970 } else if (VM_Version::supports_sse3()) {
18971 __ movddup($dst$$XMMRegister, $mem$$Address);
18972 } else {
18973 __ movq($dst$$XMMRegister, $mem$$Address);
18974 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18975 }
18976 %}
18977 ins_pipe( pipe_slow );
18978 %}
18979
18980 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18981 instruct ReplL_imm(vec dst, immL con) %{
18982 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18983 match(Set dst (Replicate con));
18984 format %{ "replicateL $dst,$con" %}
18985 ins_encode %{
18986 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18987 int vlen = Matcher::vector_length_in_bytes(this);
18988 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18989 %}
18990 ins_pipe( pipe_slow );
18991 %}
18992
18993 instruct ReplL_zero(vec dst, immL0 zero) %{
18994 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18995 match(Set dst (Replicate zero));
18996 format %{ "replicateL $dst,$zero" %}
18997 ins_encode %{
18998 int vlen_enc = vector_length_encoding(this);
18999 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19000 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19001 } else {
19002 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19003 }
19004 %}
19005 ins_pipe( fpu_reg_reg );
19006 %}
19007
19008 instruct ReplL_M1(vec dst, immL_M1 con) %{
19009 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19010 match(Set dst (Replicate con));
19011 format %{ "vallones $dst" %}
19012 ins_encode %{
19013 int vector_len = vector_length_encoding(this);
19014 __ vallones($dst$$XMMRegister, vector_len);
19015 %}
19016 ins_pipe( pipe_slow );
19017 %}
19018
19019 // ====================ReplicateF=======================================
19020
19021 instruct vReplF_reg(vec dst, vlRegF src) %{
19022 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19023 match(Set dst (Replicate src));
19024 format %{ "replicateF $dst,$src" %}
19025 ins_encode %{
19026 uint vlen = Matcher::vector_length(this);
19027 int vlen_enc = vector_length_encoding(this);
19028 if (vlen <= 4) {
19029 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19030 } else if (VM_Version::supports_avx2()) {
19031 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19032 } else {
19033 assert(vlen == 8, "sanity");
19034 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19035 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19036 }
19037 %}
19038 ins_pipe( pipe_slow );
19039 %}
19040
19041 instruct ReplF_reg(vec dst, vlRegF src) %{
19042 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19043 match(Set dst (Replicate src));
19044 format %{ "replicateF $dst,$src" %}
19045 ins_encode %{
19046 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19047 %}
19048 ins_pipe( pipe_slow );
19049 %}
19050
19051 instruct ReplF_mem(vec dst, memory mem) %{
19052 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19053 match(Set dst (Replicate (LoadF mem)));
19054 format %{ "replicateF $dst,$mem" %}
19055 ins_encode %{
19056 int vlen_enc = vector_length_encoding(this);
19057 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19058 %}
19059 ins_pipe( pipe_slow );
19060 %}
19061
19062 // Replicate float scalar immediate to be vector by loading from const table.
19063 instruct ReplF_imm(vec dst, immF con) %{
19064 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19065 match(Set dst (Replicate con));
19066 format %{ "replicateF $dst,$con" %}
19067 ins_encode %{
19068 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19069 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19070 int vlen = Matcher::vector_length_in_bytes(this);
19071 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19072 %}
19073 ins_pipe( pipe_slow );
19074 %}
19075
19076 instruct ReplF_zero(vec dst, immF0 zero) %{
19077 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19078 match(Set dst (Replicate zero));
19079 format %{ "replicateF $dst,$zero" %}
19080 ins_encode %{
19081 int vlen_enc = vector_length_encoding(this);
19082 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19083 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19084 } else {
19085 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19086 }
19087 %}
19088 ins_pipe( fpu_reg_reg );
19089 %}
19090
19091 // ====================ReplicateD=======================================
19092
19093 // Replicate double (8 bytes) scalar to be vector
19094 instruct vReplD_reg(vec dst, vlRegD src) %{
19095 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19096 match(Set dst (Replicate src));
19097 format %{ "replicateD $dst,$src" %}
19098 ins_encode %{
19099 uint vlen = Matcher::vector_length(this);
19100 int vlen_enc = vector_length_encoding(this);
19101 if (vlen <= 2) {
19102 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19103 } else if (VM_Version::supports_avx2()) {
19104 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19105 } else {
19106 assert(vlen == 4, "sanity");
19107 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19108 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19109 }
19110 %}
19111 ins_pipe( pipe_slow );
19112 %}
19113
19114 instruct ReplD_reg(vec dst, vlRegD src) %{
19115 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19116 match(Set dst (Replicate src));
19117 format %{ "replicateD $dst,$src" %}
19118 ins_encode %{
19119 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19120 %}
19121 ins_pipe( pipe_slow );
19122 %}
19123
19124 instruct ReplD_mem(vec dst, memory mem) %{
19125 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19126 match(Set dst (Replicate (LoadD mem)));
19127 format %{ "replicateD $dst,$mem" %}
19128 ins_encode %{
19129 if (Matcher::vector_length(this) >= 4) {
19130 int vlen_enc = vector_length_encoding(this);
19131 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19132 } else {
19133 __ movddup($dst$$XMMRegister, $mem$$Address);
19134 }
19135 %}
19136 ins_pipe( pipe_slow );
19137 %}
19138
19139 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19140 instruct ReplD_imm(vec dst, immD con) %{
19141 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19142 match(Set dst (Replicate con));
19143 format %{ "replicateD $dst,$con" %}
19144 ins_encode %{
19145 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19146 int vlen = Matcher::vector_length_in_bytes(this);
19147 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19148 %}
19149 ins_pipe( pipe_slow );
19150 %}
19151
19152 instruct ReplD_zero(vec dst, immD0 zero) %{
19153 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19154 match(Set dst (Replicate zero));
19155 format %{ "replicateD $dst,$zero" %}
19156 ins_encode %{
19157 int vlen_enc = vector_length_encoding(this);
19158 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19159 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19160 } else {
19161 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19162 }
19163 %}
19164 ins_pipe( fpu_reg_reg );
19165 %}
19166
19167 // ====================VECTOR INSERT=======================================
19168
19169 instruct insert(vec dst, rRegI val, immU8 idx) %{
19170 predicate(Matcher::vector_length_in_bytes(n) < 32);
19171 match(Set dst (VectorInsert (Binary dst val) idx));
19172 format %{ "vector_insert $dst,$val,$idx" %}
19173 ins_encode %{
19174 assert(UseSSE >= 4, "required");
19175 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19176
19177 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19178
19179 assert(is_integral_type(elem_bt), "");
19180 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19181
19182 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19183 %}
19184 ins_pipe( pipe_slow );
19185 %}
19186
19187 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19188 predicate(Matcher::vector_length_in_bytes(n) == 32);
19189 match(Set dst (VectorInsert (Binary src val) idx));
19190 effect(TEMP vtmp);
19191 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19192 ins_encode %{
19193 int vlen_enc = Assembler::AVX_256bit;
19194 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19195 int elem_per_lane = 16/type2aelembytes(elem_bt);
19196 int log2epr = log2(elem_per_lane);
19197
19198 assert(is_integral_type(elem_bt), "sanity");
19199 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19200
19201 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19202 uint y_idx = ($idx$$constant >> log2epr) & 1;
19203 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19204 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19205 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19206 %}
19207 ins_pipe( pipe_slow );
19208 %}
19209
19210 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19211 predicate(Matcher::vector_length_in_bytes(n) == 64);
19212 match(Set dst (VectorInsert (Binary src val) idx));
19213 effect(TEMP vtmp);
19214 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19215 ins_encode %{
19216 assert(UseAVX > 2, "sanity");
19217
19218 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19219 int elem_per_lane = 16/type2aelembytes(elem_bt);
19220 int log2epr = log2(elem_per_lane);
19221
19222 assert(is_integral_type(elem_bt), "");
19223 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19224
19225 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19226 uint y_idx = ($idx$$constant >> log2epr) & 3;
19227 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19228 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19229 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19230 %}
19231 ins_pipe( pipe_slow );
19232 %}
19233
19234 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19235 predicate(Matcher::vector_length(n) == 2);
19236 match(Set dst (VectorInsert (Binary dst val) idx));
19237 format %{ "vector_insert $dst,$val,$idx" %}
19238 ins_encode %{
19239 assert(UseSSE >= 4, "required");
19240 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19241 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19242
19243 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19244 %}
19245 ins_pipe( pipe_slow );
19246 %}
19247
19248 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19249 predicate(Matcher::vector_length(n) == 4);
19250 match(Set dst (VectorInsert (Binary src val) idx));
19251 effect(TEMP vtmp);
19252 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19253 ins_encode %{
19254 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19255 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19256
19257 uint x_idx = $idx$$constant & right_n_bits(1);
19258 uint y_idx = ($idx$$constant >> 1) & 1;
19259 int vlen_enc = Assembler::AVX_256bit;
19260 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19261 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19262 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19263 %}
19264 ins_pipe( pipe_slow );
19265 %}
19266
19267 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19268 predicate(Matcher::vector_length(n) == 8);
19269 match(Set dst (VectorInsert (Binary src val) idx));
19270 effect(TEMP vtmp);
19271 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19272 ins_encode %{
19273 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19274 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19275
19276 uint x_idx = $idx$$constant & right_n_bits(1);
19277 uint y_idx = ($idx$$constant >> 1) & 3;
19278 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19279 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19280 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19281 %}
19282 ins_pipe( pipe_slow );
19283 %}
19284
19285 instruct insertF(vec dst, regF val, immU8 idx) %{
19286 predicate(Matcher::vector_length(n) < 8);
19287 match(Set dst (VectorInsert (Binary dst val) idx));
19288 format %{ "vector_insert $dst,$val,$idx" %}
19289 ins_encode %{
19290 assert(UseSSE >= 4, "sanity");
19291
19292 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19293 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19294
19295 uint x_idx = $idx$$constant & right_n_bits(2);
19296 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19297 %}
19298 ins_pipe( pipe_slow );
19299 %}
19300
19301 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19302 predicate(Matcher::vector_length(n) >= 8);
19303 match(Set dst (VectorInsert (Binary src val) idx));
19304 effect(TEMP vtmp);
19305 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19306 ins_encode %{
19307 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19308 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19309
19310 int vlen = Matcher::vector_length(this);
19311 uint x_idx = $idx$$constant & right_n_bits(2);
19312 if (vlen == 8) {
19313 uint y_idx = ($idx$$constant >> 2) & 1;
19314 int vlen_enc = Assembler::AVX_256bit;
19315 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19316 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19317 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19318 } else {
19319 assert(vlen == 16, "sanity");
19320 uint y_idx = ($idx$$constant >> 2) & 3;
19321 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19322 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19323 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19324 }
19325 %}
19326 ins_pipe( pipe_slow );
19327 %}
19328
19329 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19330 predicate(Matcher::vector_length(n) == 2);
19331 match(Set dst (VectorInsert (Binary dst val) idx));
19332 effect(TEMP tmp);
19333 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19334 ins_encode %{
19335 assert(UseSSE >= 4, "sanity");
19336 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19337 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19338
19339 __ movq($tmp$$Register, $val$$XMMRegister);
19340 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19341 %}
19342 ins_pipe( pipe_slow );
19343 %}
19344
19345 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19346 predicate(Matcher::vector_length(n) == 4);
19347 match(Set dst (VectorInsert (Binary src val) idx));
19348 effect(TEMP vtmp, TEMP tmp);
19349 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19350 ins_encode %{
19351 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19352 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19353
19354 uint x_idx = $idx$$constant & right_n_bits(1);
19355 uint y_idx = ($idx$$constant >> 1) & 1;
19356 int vlen_enc = Assembler::AVX_256bit;
19357 __ movq($tmp$$Register, $val$$XMMRegister);
19358 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19359 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19360 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19361 %}
19362 ins_pipe( pipe_slow );
19363 %}
19364
19365 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19366 predicate(Matcher::vector_length(n) == 8);
19367 match(Set dst (VectorInsert (Binary src val) idx));
19368 effect(TEMP tmp, TEMP vtmp);
19369 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19370 ins_encode %{
19371 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19372 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19373
19374 uint x_idx = $idx$$constant & right_n_bits(1);
19375 uint y_idx = ($idx$$constant >> 1) & 3;
19376 __ movq($tmp$$Register, $val$$XMMRegister);
19377 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19378 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19379 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19380 %}
19381 ins_pipe( pipe_slow );
19382 %}
19383
19384 // ====================REDUCTION ARITHMETIC=======================================
19385
19386 // =======================Int Reduction==========================================
19387
19388 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19389 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19390 match(Set dst (AddReductionVI src1 src2));
19391 match(Set dst (MulReductionVI src1 src2));
19392 match(Set dst (AndReductionV src1 src2));
19393 match(Set dst ( OrReductionV src1 src2));
19394 match(Set dst (XorReductionV src1 src2));
19395 match(Set dst (MinReductionV src1 src2));
19396 match(Set dst (MaxReductionV src1 src2));
19397 match(Set dst (UMinReductionV src1 src2));
19398 match(Set dst (UMaxReductionV src1 src2));
19399 effect(TEMP vtmp1, TEMP vtmp2);
19400 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19401 ins_encode %{
19402 int opcode = this->ideal_Opcode();
19403 int vlen = Matcher::vector_length(this, $src2);
19404 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19405 %}
19406 ins_pipe( pipe_slow );
19407 %}
19408
19409 // =======================Long Reduction==========================================
19410
19411 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19412 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19413 match(Set dst (AddReductionVL src1 src2));
19414 match(Set dst (MulReductionVL src1 src2));
19415 match(Set dst (AndReductionV src1 src2));
19416 match(Set dst ( OrReductionV src1 src2));
19417 match(Set dst (XorReductionV src1 src2));
19418 match(Set dst (MinReductionV src1 src2));
19419 match(Set dst (MaxReductionV src1 src2));
19420 match(Set dst (UMinReductionV src1 src2));
19421 match(Set dst (UMaxReductionV src1 src2));
19422 effect(TEMP vtmp1, TEMP vtmp2);
19423 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19424 ins_encode %{
19425 int opcode = this->ideal_Opcode();
19426 int vlen = Matcher::vector_length(this, $src2);
19427 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19428 %}
19429 ins_pipe( pipe_slow );
19430 %}
19431
19432 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19433 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19434 match(Set dst (AddReductionVL src1 src2));
19435 match(Set dst (MulReductionVL src1 src2));
19436 match(Set dst (AndReductionV src1 src2));
19437 match(Set dst ( OrReductionV src1 src2));
19438 match(Set dst (XorReductionV src1 src2));
19439 match(Set dst (MinReductionV src1 src2));
19440 match(Set dst (MaxReductionV src1 src2));
19441 match(Set dst (UMinReductionV src1 src2));
19442 match(Set dst (UMaxReductionV src1 src2));
19443 effect(TEMP vtmp1, TEMP vtmp2);
19444 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19445 ins_encode %{
19446 int opcode = this->ideal_Opcode();
19447 int vlen = Matcher::vector_length(this, $src2);
19448 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19449 %}
19450 ins_pipe( pipe_slow );
19451 %}
19452
19453 // =======================Float Reduction==========================================
19454
19455 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19456 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19457 match(Set dst (AddReductionVF dst src));
19458 match(Set dst (MulReductionVF dst src));
19459 effect(TEMP dst, TEMP vtmp);
19460 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19461 ins_encode %{
19462 int opcode = this->ideal_Opcode();
19463 int vlen = Matcher::vector_length(this, $src);
19464 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19465 %}
19466 ins_pipe( pipe_slow );
19467 %}
19468
19469 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19470 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19471 match(Set dst (AddReductionVF dst src));
19472 match(Set dst (MulReductionVF dst src));
19473 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19474 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19475 ins_encode %{
19476 int opcode = this->ideal_Opcode();
19477 int vlen = Matcher::vector_length(this, $src);
19478 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19479 %}
19480 ins_pipe( pipe_slow );
19481 %}
19482
19483 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19484 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19485 match(Set dst (AddReductionVF dst src));
19486 match(Set dst (MulReductionVF dst src));
19487 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19488 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19489 ins_encode %{
19490 int opcode = this->ideal_Opcode();
19491 int vlen = Matcher::vector_length(this, $src);
19492 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19493 %}
19494 ins_pipe( pipe_slow );
19495 %}
19496
19497
19498 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19499 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19500 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19501 // src1 contains reduction identity
19502 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19503 match(Set dst (AddReductionVF src1 src2));
19504 match(Set dst (MulReductionVF src1 src2));
19505 effect(TEMP dst);
19506 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19507 ins_encode %{
19508 int opcode = this->ideal_Opcode();
19509 int vlen = Matcher::vector_length(this, $src2);
19510 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19511 %}
19512 ins_pipe( pipe_slow );
19513 %}
19514
19515 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19516 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19517 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19518 // src1 contains reduction identity
19519 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19520 match(Set dst (AddReductionVF src1 src2));
19521 match(Set dst (MulReductionVF src1 src2));
19522 effect(TEMP dst, TEMP vtmp);
19523 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19524 ins_encode %{
19525 int opcode = this->ideal_Opcode();
19526 int vlen = Matcher::vector_length(this, $src2);
19527 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19528 %}
19529 ins_pipe( pipe_slow );
19530 %}
19531
19532 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19533 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19534 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19535 // src1 contains reduction identity
19536 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19537 match(Set dst (AddReductionVF src1 src2));
19538 match(Set dst (MulReductionVF src1 src2));
19539 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19540 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19541 ins_encode %{
19542 int opcode = this->ideal_Opcode();
19543 int vlen = Matcher::vector_length(this, $src2);
19544 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19545 %}
19546 ins_pipe( pipe_slow );
19547 %}
19548
19549 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19550 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19551 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19552 // src1 contains reduction identity
19553 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19554 match(Set dst (AddReductionVF src1 src2));
19555 match(Set dst (MulReductionVF src1 src2));
19556 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19557 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19558 ins_encode %{
19559 int opcode = this->ideal_Opcode();
19560 int vlen = Matcher::vector_length(this, $src2);
19561 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19562 %}
19563 ins_pipe( pipe_slow );
19564 %}
19565
19566 // =======================Double Reduction==========================================
19567
19568 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19569 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19570 match(Set dst (AddReductionVD dst src));
19571 match(Set dst (MulReductionVD dst src));
19572 effect(TEMP dst, TEMP vtmp);
19573 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19574 ins_encode %{
19575 int opcode = this->ideal_Opcode();
19576 int vlen = Matcher::vector_length(this, $src);
19577 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19578 %}
19579 ins_pipe( pipe_slow );
19580 %}
19581
19582 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19583 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19584 match(Set dst (AddReductionVD dst src));
19585 match(Set dst (MulReductionVD dst src));
19586 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19587 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19588 ins_encode %{
19589 int opcode = this->ideal_Opcode();
19590 int vlen = Matcher::vector_length(this, $src);
19591 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19592 %}
19593 ins_pipe( pipe_slow );
19594 %}
19595
19596 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19597 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19598 match(Set dst (AddReductionVD dst src));
19599 match(Set dst (MulReductionVD dst src));
19600 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19601 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19602 ins_encode %{
19603 int opcode = this->ideal_Opcode();
19604 int vlen = Matcher::vector_length(this, $src);
19605 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19606 %}
19607 ins_pipe( pipe_slow );
19608 %}
19609
19610 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19611 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19612 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19613 // src1 contains reduction identity
19614 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19615 match(Set dst (AddReductionVD src1 src2));
19616 match(Set dst (MulReductionVD src1 src2));
19617 effect(TEMP dst);
19618 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19619 ins_encode %{
19620 int opcode = this->ideal_Opcode();
19621 int vlen = Matcher::vector_length(this, $src2);
19622 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19623 %}
19624 ins_pipe( pipe_slow );
19625 %}
19626
19627 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19628 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19629 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19630 // src1 contains reduction identity
19631 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19632 match(Set dst (AddReductionVD src1 src2));
19633 match(Set dst (MulReductionVD src1 src2));
19634 effect(TEMP dst, TEMP vtmp);
19635 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19636 ins_encode %{
19637 int opcode = this->ideal_Opcode();
19638 int vlen = Matcher::vector_length(this, $src2);
19639 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19640 %}
19641 ins_pipe( pipe_slow );
19642 %}
19643
19644 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19645 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19646 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19647 // src1 contains reduction identity
19648 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19649 match(Set dst (AddReductionVD src1 src2));
19650 match(Set dst (MulReductionVD src1 src2));
19651 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19652 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19653 ins_encode %{
19654 int opcode = this->ideal_Opcode();
19655 int vlen = Matcher::vector_length(this, $src2);
19656 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19657 %}
19658 ins_pipe( pipe_slow );
19659 %}
19660
19661 // =======================Byte Reduction==========================================
19662
19663 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19664 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19665 match(Set dst (AddReductionVI src1 src2));
19666 match(Set dst (AndReductionV src1 src2));
19667 match(Set dst ( OrReductionV src1 src2));
19668 match(Set dst (XorReductionV src1 src2));
19669 match(Set dst (MinReductionV src1 src2));
19670 match(Set dst (MaxReductionV src1 src2));
19671 match(Set dst (UMinReductionV src1 src2));
19672 match(Set dst (UMaxReductionV src1 src2));
19673 effect(TEMP vtmp1, TEMP vtmp2);
19674 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19675 ins_encode %{
19676 int opcode = this->ideal_Opcode();
19677 int vlen = Matcher::vector_length(this, $src2);
19678 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19679 %}
19680 ins_pipe( pipe_slow );
19681 %}
19682
19683 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19684 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19685 match(Set dst (AddReductionVI src1 src2));
19686 match(Set dst (AndReductionV src1 src2));
19687 match(Set dst ( OrReductionV src1 src2));
19688 match(Set dst (XorReductionV src1 src2));
19689 match(Set dst (MinReductionV src1 src2));
19690 match(Set dst (MaxReductionV src1 src2));
19691 match(Set dst (UMinReductionV src1 src2));
19692 match(Set dst (UMaxReductionV src1 src2));
19693 effect(TEMP vtmp1, TEMP vtmp2);
19694 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19695 ins_encode %{
19696 int opcode = this->ideal_Opcode();
19697 int vlen = Matcher::vector_length(this, $src2);
19698 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19699 %}
19700 ins_pipe( pipe_slow );
19701 %}
19702
19703 // =======================Short Reduction==========================================
19704
19705 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19706 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19707 match(Set dst (AddReductionVI src1 src2));
19708 match(Set dst (MulReductionVI src1 src2));
19709 match(Set dst (AndReductionV src1 src2));
19710 match(Set dst ( OrReductionV src1 src2));
19711 match(Set dst (XorReductionV src1 src2));
19712 match(Set dst (MinReductionV src1 src2));
19713 match(Set dst (MaxReductionV src1 src2));
19714 match(Set dst (UMinReductionV src1 src2));
19715 match(Set dst (UMaxReductionV src1 src2));
19716 effect(TEMP vtmp1, TEMP vtmp2);
19717 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19718 ins_encode %{
19719 int opcode = this->ideal_Opcode();
19720 int vlen = Matcher::vector_length(this, $src2);
19721 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19722 %}
19723 ins_pipe( pipe_slow );
19724 %}
19725
19726 // =======================Mul Reduction==========================================
19727
19728 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19729 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19730 Matcher::vector_length(n->in(2)) <= 32); // src2
19731 match(Set dst (MulReductionVI src1 src2));
19732 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19733 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19734 ins_encode %{
19735 int opcode = this->ideal_Opcode();
19736 int vlen = Matcher::vector_length(this, $src2);
19737 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19738 %}
19739 ins_pipe( pipe_slow );
19740 %}
19741
19742 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19743 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19744 Matcher::vector_length(n->in(2)) == 64); // src2
19745 match(Set dst (MulReductionVI src1 src2));
19746 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19747 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19748 ins_encode %{
19749 int opcode = this->ideal_Opcode();
19750 int vlen = Matcher::vector_length(this, $src2);
19751 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19752 %}
19753 ins_pipe( pipe_slow );
19754 %}
19755
19756 //--------------------Min/Max Float Reduction --------------------
19757 // Float Min Reduction
19758 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19759 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19760 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19761 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19762 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19763 Matcher::vector_length(n->in(2)) == 2);
19764 match(Set dst (MinReductionV src1 src2));
19765 match(Set dst (MaxReductionV src1 src2));
19766 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19767 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19768 ins_encode %{
19769 assert(UseAVX > 0, "sanity");
19770
19771 int opcode = this->ideal_Opcode();
19772 int vlen = Matcher::vector_length(this, $src2);
19773 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19774 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19775 %}
19776 ins_pipe( pipe_slow );
19777 %}
19778
19779 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19780 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19781 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19782 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19783 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19784 Matcher::vector_length(n->in(2)) >= 4);
19785 match(Set dst (MinReductionV src1 src2));
19786 match(Set dst (MaxReductionV src1 src2));
19787 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19788 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19789 ins_encode %{
19790 assert(UseAVX > 0, "sanity");
19791
19792 int opcode = this->ideal_Opcode();
19793 int vlen = Matcher::vector_length(this, $src2);
19794 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19795 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19796 %}
19797 ins_pipe( pipe_slow );
19798 %}
19799
19800 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19801 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19802 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19803 Matcher::vector_length(n->in(2)) == 2);
19804 match(Set dst (MinReductionV dst src));
19805 match(Set dst (MaxReductionV dst src));
19806 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19807 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19808 ins_encode %{
19809 assert(UseAVX > 0, "sanity");
19810
19811 int opcode = this->ideal_Opcode();
19812 int vlen = Matcher::vector_length(this, $src);
19813 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19814 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19815 %}
19816 ins_pipe( pipe_slow );
19817 %}
19818
19819
19820 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19821 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19822 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19823 Matcher::vector_length(n->in(2)) >= 4);
19824 match(Set dst (MinReductionV dst src));
19825 match(Set dst (MaxReductionV dst src));
19826 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19827 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19828 ins_encode %{
19829 assert(UseAVX > 0, "sanity");
19830
19831 int opcode = this->ideal_Opcode();
19832 int vlen = Matcher::vector_length(this, $src);
19833 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19834 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19835 %}
19836 ins_pipe( pipe_slow );
19837 %}
19838
19839 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19840 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19841 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19842 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19843 Matcher::vector_length(n->in(2)) == 2);
19844 match(Set dst (MinReductionV src1 src2));
19845 match(Set dst (MaxReductionV src1 src2));
19846 effect(TEMP dst, TEMP xtmp1);
19847 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19848 ins_encode %{
19849 int opcode = this->ideal_Opcode();
19850 int vlen = Matcher::vector_length(this, $src2);
19851 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19852 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19853 %}
19854 ins_pipe( pipe_slow );
19855 %}
19856
19857 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19858 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19859 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19860 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19861 Matcher::vector_length(n->in(2)) >= 4);
19862 match(Set dst (MinReductionV src1 src2));
19863 match(Set dst (MaxReductionV src1 src2));
19864 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19865 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19866 ins_encode %{
19867 int opcode = this->ideal_Opcode();
19868 int vlen = Matcher::vector_length(this, $src2);
19869 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19870 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19871 %}
19872 ins_pipe( pipe_slow );
19873 %}
19874
19875 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19876 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19877 Matcher::vector_length(n->in(2)) == 2);
19878 match(Set dst (MinReductionV dst src));
19879 match(Set dst (MaxReductionV dst src));
19880 effect(TEMP dst, TEMP xtmp1);
19881 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19882 ins_encode %{
19883 int opcode = this->ideal_Opcode();
19884 int vlen = Matcher::vector_length(this, $src);
19885 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19886 $xtmp1$$XMMRegister);
19887 %}
19888 ins_pipe( pipe_slow );
19889 %}
19890
19891 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19892 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19893 Matcher::vector_length(n->in(2)) >= 4);
19894 match(Set dst (MinReductionV dst src));
19895 match(Set dst (MaxReductionV dst src));
19896 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19897 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19898 ins_encode %{
19899 int opcode = this->ideal_Opcode();
19900 int vlen = Matcher::vector_length(this, $src);
19901 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19902 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19903 %}
19904 ins_pipe( pipe_slow );
19905 %}
19906
19907 //--------------------Min Double Reduction --------------------
19908 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19909 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19910 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19911 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19912 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19913 Matcher::vector_length(n->in(2)) == 2);
19914 match(Set dst (MinReductionV src1 src2));
19915 match(Set dst (MaxReductionV src1 src2));
19916 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19917 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19918 ins_encode %{
19919 assert(UseAVX > 0, "sanity");
19920
19921 int opcode = this->ideal_Opcode();
19922 int vlen = Matcher::vector_length(this, $src2);
19923 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19924 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19925 %}
19926 ins_pipe( pipe_slow );
19927 %}
19928
19929 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19930 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19931 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19932 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19933 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19934 Matcher::vector_length(n->in(2)) >= 4);
19935 match(Set dst (MinReductionV src1 src2));
19936 match(Set dst (MaxReductionV src1 src2));
19937 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19938 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19939 ins_encode %{
19940 assert(UseAVX > 0, "sanity");
19941
19942 int opcode = this->ideal_Opcode();
19943 int vlen = Matcher::vector_length(this, $src2);
19944 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19945 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19946 %}
19947 ins_pipe( pipe_slow );
19948 %}
19949
19950
19951 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19952 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19953 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19954 Matcher::vector_length(n->in(2)) == 2);
19955 match(Set dst (MinReductionV dst src));
19956 match(Set dst (MaxReductionV dst src));
19957 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19958 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19959 ins_encode %{
19960 assert(UseAVX > 0, "sanity");
19961
19962 int opcode = this->ideal_Opcode();
19963 int vlen = Matcher::vector_length(this, $src);
19964 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19965 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19966 %}
19967 ins_pipe( pipe_slow );
19968 %}
19969
19970 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19971 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19972 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19973 Matcher::vector_length(n->in(2)) >= 4);
19974 match(Set dst (MinReductionV dst src));
19975 match(Set dst (MaxReductionV dst src));
19976 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19977 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19978 ins_encode %{
19979 assert(UseAVX > 0, "sanity");
19980
19981 int opcode = this->ideal_Opcode();
19982 int vlen = Matcher::vector_length(this, $src);
19983 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19984 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19985 %}
19986 ins_pipe( pipe_slow );
19987 %}
19988
19989 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19990 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19991 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19992 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19993 Matcher::vector_length(n->in(2)) == 2);
19994 match(Set dst (MinReductionV src1 src2));
19995 match(Set dst (MaxReductionV src1 src2));
19996 effect(TEMP dst, TEMP xtmp1);
19997 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19998 ins_encode %{
19999 int opcode = this->ideal_Opcode();
20000 int vlen = Matcher::vector_length(this, $src2);
20001 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20002 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20003 %}
20004 ins_pipe( pipe_slow );
20005 %}
20006
20007 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20008 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20009 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20010 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20011 Matcher::vector_length(n->in(2)) >= 4);
20012 match(Set dst (MinReductionV src1 src2));
20013 match(Set dst (MaxReductionV src1 src2));
20014 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20015 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20016 ins_encode %{
20017 int opcode = this->ideal_Opcode();
20018 int vlen = Matcher::vector_length(this, $src2);
20019 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20020 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20021 %}
20022 ins_pipe( pipe_slow );
20023 %}
20024
20025
20026 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20027 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20028 Matcher::vector_length(n->in(2)) == 2);
20029 match(Set dst (MinReductionV dst src));
20030 match(Set dst (MaxReductionV dst src));
20031 effect(TEMP dst, TEMP xtmp1);
20032 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20033 ins_encode %{
20034 int opcode = this->ideal_Opcode();
20035 int vlen = Matcher::vector_length(this, $src);
20036 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20037 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20038 %}
20039 ins_pipe( pipe_slow );
20040 %}
20041
20042 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20043 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20044 Matcher::vector_length(n->in(2)) >= 4);
20045 match(Set dst (MinReductionV dst src));
20046 match(Set dst (MaxReductionV dst src));
20047 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20048 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20049 ins_encode %{
20050 int opcode = this->ideal_Opcode();
20051 int vlen = Matcher::vector_length(this, $src);
20052 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20053 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20054 %}
20055 ins_pipe( pipe_slow );
20056 %}
20057
20058 // ====================VECTOR ARITHMETIC=======================================
20059
20060 // --------------------------------- ADD --------------------------------------
20061
20062 // Bytes vector add
20063 instruct vaddB(vec dst, vec src) %{
20064 predicate(UseAVX == 0);
20065 match(Set dst (AddVB dst src));
20066 format %{ "paddb $dst,$src\t! add packedB" %}
20067 ins_encode %{
20068 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20069 %}
20070 ins_pipe( pipe_slow );
20071 %}
20072
20073 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20074 predicate(UseAVX > 0);
20075 match(Set dst (AddVB src1 src2));
20076 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20077 ins_encode %{
20078 int vlen_enc = vector_length_encoding(this);
20079 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20080 %}
20081 ins_pipe( pipe_slow );
20082 %}
20083
20084 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20085 predicate((UseAVX > 0) &&
20086 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20087 match(Set dst (AddVB src (LoadVector mem)));
20088 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20089 ins_encode %{
20090 int vlen_enc = vector_length_encoding(this);
20091 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20092 %}
20093 ins_pipe( pipe_slow );
20094 %}
20095
20096 // Shorts/Chars vector add
20097 instruct vaddS(vec dst, vec src) %{
20098 predicate(UseAVX == 0);
20099 match(Set dst (AddVS dst src));
20100 format %{ "paddw $dst,$src\t! add packedS" %}
20101 ins_encode %{
20102 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20103 %}
20104 ins_pipe( pipe_slow );
20105 %}
20106
20107 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20108 predicate(UseAVX > 0);
20109 match(Set dst (AddVS src1 src2));
20110 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20111 ins_encode %{
20112 int vlen_enc = vector_length_encoding(this);
20113 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20114 %}
20115 ins_pipe( pipe_slow );
20116 %}
20117
20118 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20119 predicate((UseAVX > 0) &&
20120 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20121 match(Set dst (AddVS src (LoadVector mem)));
20122 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20123 ins_encode %{
20124 int vlen_enc = vector_length_encoding(this);
20125 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20126 %}
20127 ins_pipe( pipe_slow );
20128 %}
20129
20130 // Integers vector add
20131 instruct vaddI(vec dst, vec src) %{
20132 predicate(UseAVX == 0);
20133 match(Set dst (AddVI dst src));
20134 format %{ "paddd $dst,$src\t! add packedI" %}
20135 ins_encode %{
20136 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20137 %}
20138 ins_pipe( pipe_slow );
20139 %}
20140
20141 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20142 predicate(UseAVX > 0);
20143 match(Set dst (AddVI src1 src2));
20144 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20145 ins_encode %{
20146 int vlen_enc = vector_length_encoding(this);
20147 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20148 %}
20149 ins_pipe( pipe_slow );
20150 %}
20151
20152
20153 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20154 predicate((UseAVX > 0) &&
20155 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20156 match(Set dst (AddVI src (LoadVector mem)));
20157 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20158 ins_encode %{
20159 int vlen_enc = vector_length_encoding(this);
20160 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20161 %}
20162 ins_pipe( pipe_slow );
20163 %}
20164
20165 // Longs vector add
20166 instruct vaddL(vec dst, vec src) %{
20167 predicate(UseAVX == 0);
20168 match(Set dst (AddVL dst src));
20169 format %{ "paddq $dst,$src\t! add packedL" %}
20170 ins_encode %{
20171 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20172 %}
20173 ins_pipe( pipe_slow );
20174 %}
20175
20176 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20177 predicate(UseAVX > 0);
20178 match(Set dst (AddVL src1 src2));
20179 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20180 ins_encode %{
20181 int vlen_enc = vector_length_encoding(this);
20182 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20183 %}
20184 ins_pipe( pipe_slow );
20185 %}
20186
20187 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20188 predicate((UseAVX > 0) &&
20189 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20190 match(Set dst (AddVL src (LoadVector mem)));
20191 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20192 ins_encode %{
20193 int vlen_enc = vector_length_encoding(this);
20194 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20195 %}
20196 ins_pipe( pipe_slow );
20197 %}
20198
20199 // Floats vector add
20200 instruct vaddF(vec dst, vec src) %{
20201 predicate(UseAVX == 0);
20202 match(Set dst (AddVF dst src));
20203 format %{ "addps $dst,$src\t! add packedF" %}
20204 ins_encode %{
20205 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20206 %}
20207 ins_pipe( pipe_slow );
20208 %}
20209
20210 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20211 predicate(UseAVX > 0);
20212 match(Set dst (AddVF src1 src2));
20213 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20214 ins_encode %{
20215 int vlen_enc = vector_length_encoding(this);
20216 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20217 %}
20218 ins_pipe( pipe_slow );
20219 %}
20220
20221 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20222 predicate((UseAVX > 0) &&
20223 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20224 match(Set dst (AddVF src (LoadVector mem)));
20225 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20226 ins_encode %{
20227 int vlen_enc = vector_length_encoding(this);
20228 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20229 %}
20230 ins_pipe( pipe_slow );
20231 %}
20232
20233 // Doubles vector add
20234 instruct vaddD(vec dst, vec src) %{
20235 predicate(UseAVX == 0);
20236 match(Set dst (AddVD dst src));
20237 format %{ "addpd $dst,$src\t! add packedD" %}
20238 ins_encode %{
20239 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20240 %}
20241 ins_pipe( pipe_slow );
20242 %}
20243
20244 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20245 predicate(UseAVX > 0);
20246 match(Set dst (AddVD src1 src2));
20247 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20248 ins_encode %{
20249 int vlen_enc = vector_length_encoding(this);
20250 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20251 %}
20252 ins_pipe( pipe_slow );
20253 %}
20254
20255 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20256 predicate((UseAVX > 0) &&
20257 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20258 match(Set dst (AddVD src (LoadVector mem)));
20259 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20260 ins_encode %{
20261 int vlen_enc = vector_length_encoding(this);
20262 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20263 %}
20264 ins_pipe( pipe_slow );
20265 %}
20266
20267 // --------------------------------- SUB --------------------------------------
20268
20269 // Bytes vector sub
20270 instruct vsubB(vec dst, vec src) %{
20271 predicate(UseAVX == 0);
20272 match(Set dst (SubVB dst src));
20273 format %{ "psubb $dst,$src\t! sub packedB" %}
20274 ins_encode %{
20275 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20276 %}
20277 ins_pipe( pipe_slow );
20278 %}
20279
20280 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20281 predicate(UseAVX > 0);
20282 match(Set dst (SubVB src1 src2));
20283 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20284 ins_encode %{
20285 int vlen_enc = vector_length_encoding(this);
20286 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20287 %}
20288 ins_pipe( pipe_slow );
20289 %}
20290
20291 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20292 predicate((UseAVX > 0) &&
20293 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20294 match(Set dst (SubVB src (LoadVector mem)));
20295 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20296 ins_encode %{
20297 int vlen_enc = vector_length_encoding(this);
20298 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20299 %}
20300 ins_pipe( pipe_slow );
20301 %}
20302
20303 // Shorts/Chars vector sub
20304 instruct vsubS(vec dst, vec src) %{
20305 predicate(UseAVX == 0);
20306 match(Set dst (SubVS dst src));
20307 format %{ "psubw $dst,$src\t! sub packedS" %}
20308 ins_encode %{
20309 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20310 %}
20311 ins_pipe( pipe_slow );
20312 %}
20313
20314
20315 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20316 predicate(UseAVX > 0);
20317 match(Set dst (SubVS src1 src2));
20318 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20319 ins_encode %{
20320 int vlen_enc = vector_length_encoding(this);
20321 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20322 %}
20323 ins_pipe( pipe_slow );
20324 %}
20325
20326 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20327 predicate((UseAVX > 0) &&
20328 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20329 match(Set dst (SubVS src (LoadVector mem)));
20330 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20331 ins_encode %{
20332 int vlen_enc = vector_length_encoding(this);
20333 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20334 %}
20335 ins_pipe( pipe_slow );
20336 %}
20337
20338 // Integers vector sub
20339 instruct vsubI(vec dst, vec src) %{
20340 predicate(UseAVX == 0);
20341 match(Set dst (SubVI dst src));
20342 format %{ "psubd $dst,$src\t! sub packedI" %}
20343 ins_encode %{
20344 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20345 %}
20346 ins_pipe( pipe_slow );
20347 %}
20348
20349 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20350 predicate(UseAVX > 0);
20351 match(Set dst (SubVI src1 src2));
20352 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20353 ins_encode %{
20354 int vlen_enc = vector_length_encoding(this);
20355 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20356 %}
20357 ins_pipe( pipe_slow );
20358 %}
20359
20360 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20361 predicate((UseAVX > 0) &&
20362 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20363 match(Set dst (SubVI src (LoadVector mem)));
20364 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20365 ins_encode %{
20366 int vlen_enc = vector_length_encoding(this);
20367 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20368 %}
20369 ins_pipe( pipe_slow );
20370 %}
20371
20372 // Longs vector sub
20373 instruct vsubL(vec dst, vec src) %{
20374 predicate(UseAVX == 0);
20375 match(Set dst (SubVL dst src));
20376 format %{ "psubq $dst,$src\t! sub packedL" %}
20377 ins_encode %{
20378 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20379 %}
20380 ins_pipe( pipe_slow );
20381 %}
20382
20383 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20384 predicate(UseAVX > 0);
20385 match(Set dst (SubVL src1 src2));
20386 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20387 ins_encode %{
20388 int vlen_enc = vector_length_encoding(this);
20389 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20390 %}
20391 ins_pipe( pipe_slow );
20392 %}
20393
20394
20395 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20396 predicate((UseAVX > 0) &&
20397 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20398 match(Set dst (SubVL src (LoadVector mem)));
20399 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20400 ins_encode %{
20401 int vlen_enc = vector_length_encoding(this);
20402 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20403 %}
20404 ins_pipe( pipe_slow );
20405 %}
20406
20407 // Floats vector sub
20408 instruct vsubF(vec dst, vec src) %{
20409 predicate(UseAVX == 0);
20410 match(Set dst (SubVF dst src));
20411 format %{ "subps $dst,$src\t! sub packedF" %}
20412 ins_encode %{
20413 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20414 %}
20415 ins_pipe( pipe_slow );
20416 %}
20417
20418 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20419 predicate(UseAVX > 0);
20420 match(Set dst (SubVF src1 src2));
20421 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20422 ins_encode %{
20423 int vlen_enc = vector_length_encoding(this);
20424 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20425 %}
20426 ins_pipe( pipe_slow );
20427 %}
20428
20429 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20430 predicate((UseAVX > 0) &&
20431 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20432 match(Set dst (SubVF src (LoadVector mem)));
20433 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20434 ins_encode %{
20435 int vlen_enc = vector_length_encoding(this);
20436 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20437 %}
20438 ins_pipe( pipe_slow );
20439 %}
20440
20441 // Doubles vector sub
20442 instruct vsubD(vec dst, vec src) %{
20443 predicate(UseAVX == 0);
20444 match(Set dst (SubVD dst src));
20445 format %{ "subpd $dst,$src\t! sub packedD" %}
20446 ins_encode %{
20447 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20448 %}
20449 ins_pipe( pipe_slow );
20450 %}
20451
20452 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20453 predicate(UseAVX > 0);
20454 match(Set dst (SubVD src1 src2));
20455 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20456 ins_encode %{
20457 int vlen_enc = vector_length_encoding(this);
20458 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20459 %}
20460 ins_pipe( pipe_slow );
20461 %}
20462
20463 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20464 predicate((UseAVX > 0) &&
20465 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20466 match(Set dst (SubVD src (LoadVector mem)));
20467 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20468 ins_encode %{
20469 int vlen_enc = vector_length_encoding(this);
20470 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20471 %}
20472 ins_pipe( pipe_slow );
20473 %}
20474
20475 // --------------------------------- MUL --------------------------------------
20476
20477 // Byte vector mul
20478 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20479 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20480 match(Set dst (MulVB src1 src2));
20481 effect(TEMP dst, TEMP xtmp);
20482 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20483 ins_encode %{
20484 assert(UseSSE > 3, "required");
20485 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20486 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20487 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20488 __ psllw($dst$$XMMRegister, 8);
20489 __ psrlw($dst$$XMMRegister, 8);
20490 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20491 %}
20492 ins_pipe( pipe_slow );
20493 %}
20494
20495 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20496 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20497 match(Set dst (MulVB src1 src2));
20498 effect(TEMP dst, TEMP xtmp);
20499 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20500 ins_encode %{
20501 assert(UseSSE > 3, "required");
20502 // Odd-index elements
20503 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20504 __ psrlw($dst$$XMMRegister, 8);
20505 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20506 __ psrlw($xtmp$$XMMRegister, 8);
20507 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20508 __ psllw($dst$$XMMRegister, 8);
20509 // Even-index elements
20510 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20511 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20512 __ psllw($xtmp$$XMMRegister, 8);
20513 __ psrlw($xtmp$$XMMRegister, 8);
20514 // Combine
20515 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20516 %}
20517 ins_pipe( pipe_slow );
20518 %}
20519
20520 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20521 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20522 match(Set dst (MulVB src1 src2));
20523 effect(TEMP xtmp1, TEMP xtmp2);
20524 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20525 ins_encode %{
20526 int vlen_enc = vector_length_encoding(this);
20527 // Odd-index elements
20528 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20529 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20530 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20531 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20532 // Even-index elements
20533 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20534 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20535 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20536 // Combine
20537 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20538 %}
20539 ins_pipe( pipe_slow );
20540 %}
20541
20542 // Shorts/Chars vector mul
20543 instruct vmulS(vec dst, vec src) %{
20544 predicate(UseAVX == 0);
20545 match(Set dst (MulVS dst src));
20546 format %{ "pmullw $dst,$src\t! mul packedS" %}
20547 ins_encode %{
20548 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20549 %}
20550 ins_pipe( pipe_slow );
20551 %}
20552
20553 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20554 predicate(UseAVX > 0);
20555 match(Set dst (MulVS src1 src2));
20556 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20557 ins_encode %{
20558 int vlen_enc = vector_length_encoding(this);
20559 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20560 %}
20561 ins_pipe( pipe_slow );
20562 %}
20563
20564 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20565 predicate((UseAVX > 0) &&
20566 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20567 match(Set dst (MulVS src (LoadVector mem)));
20568 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20569 ins_encode %{
20570 int vlen_enc = vector_length_encoding(this);
20571 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20572 %}
20573 ins_pipe( pipe_slow );
20574 %}
20575
20576 // Integers vector mul
20577 instruct vmulI(vec dst, vec src) %{
20578 predicate(UseAVX == 0);
20579 match(Set dst (MulVI dst src));
20580 format %{ "pmulld $dst,$src\t! mul packedI" %}
20581 ins_encode %{
20582 assert(UseSSE > 3, "required");
20583 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20584 %}
20585 ins_pipe( pipe_slow );
20586 %}
20587
20588 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20589 predicate(UseAVX > 0);
20590 match(Set dst (MulVI src1 src2));
20591 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20592 ins_encode %{
20593 int vlen_enc = vector_length_encoding(this);
20594 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20595 %}
20596 ins_pipe( pipe_slow );
20597 %}
20598
20599 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20600 predicate((UseAVX > 0) &&
20601 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20602 match(Set dst (MulVI src (LoadVector mem)));
20603 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20604 ins_encode %{
20605 int vlen_enc = vector_length_encoding(this);
20606 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20607 %}
20608 ins_pipe( pipe_slow );
20609 %}
20610
20611 // Longs vector mul
20612 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20613 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20614 VM_Version::supports_avx512dq()) ||
20615 VM_Version::supports_avx512vldq());
20616 match(Set dst (MulVL src1 src2));
20617 ins_cost(500);
20618 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20619 ins_encode %{
20620 assert(UseAVX > 2, "required");
20621 int vlen_enc = vector_length_encoding(this);
20622 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20623 %}
20624 ins_pipe( pipe_slow );
20625 %}
20626
20627 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20628 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20629 VM_Version::supports_avx512dq()) ||
20630 (Matcher::vector_length_in_bytes(n) > 8 &&
20631 VM_Version::supports_avx512vldq()));
20632 match(Set dst (MulVL src (LoadVector mem)));
20633 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20634 ins_cost(500);
20635 ins_encode %{
20636 assert(UseAVX > 2, "required");
20637 int vlen_enc = vector_length_encoding(this);
20638 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20639 %}
20640 ins_pipe( pipe_slow );
20641 %}
20642
20643 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20644 predicate(UseAVX == 0);
20645 match(Set dst (MulVL src1 src2));
20646 ins_cost(500);
20647 effect(TEMP dst, TEMP xtmp);
20648 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20649 ins_encode %{
20650 assert(VM_Version::supports_sse4_1(), "required");
20651 // Get the lo-hi products, only the lower 32 bits is in concerns
20652 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20653 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20654 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20655 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20656 __ psllq($dst$$XMMRegister, 32);
20657 // Get the lo-lo products
20658 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20659 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20660 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20661 %}
20662 ins_pipe( pipe_slow );
20663 %}
20664
20665 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20666 predicate(UseAVX > 0 &&
20667 ((Matcher::vector_length_in_bytes(n) == 64 &&
20668 !VM_Version::supports_avx512dq()) ||
20669 (Matcher::vector_length_in_bytes(n) < 64 &&
20670 !VM_Version::supports_avx512vldq())));
20671 match(Set dst (MulVL src1 src2));
20672 effect(TEMP xtmp1, TEMP xtmp2);
20673 ins_cost(500);
20674 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20675 ins_encode %{
20676 int vlen_enc = vector_length_encoding(this);
20677 // Get the lo-hi products, only the lower 32 bits is in concerns
20678 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20679 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20680 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20681 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20682 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20683 // Get the lo-lo products
20684 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20685 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20686 %}
20687 ins_pipe( pipe_slow );
20688 %}
20689
20690 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20691 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20692 match(Set dst (MulVL src1 src2));
20693 ins_cost(100);
20694 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20695 ins_encode %{
20696 int vlen_enc = vector_length_encoding(this);
20697 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20698 %}
20699 ins_pipe( pipe_slow );
20700 %}
20701
20702 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20703 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20704 match(Set dst (MulVL src1 src2));
20705 ins_cost(100);
20706 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20707 ins_encode %{
20708 int vlen_enc = vector_length_encoding(this);
20709 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20710 %}
20711 ins_pipe( pipe_slow );
20712 %}
20713
20714 // Floats vector mul
20715 instruct vmulF(vec dst, vec src) %{
20716 predicate(UseAVX == 0);
20717 match(Set dst (MulVF dst src));
20718 format %{ "mulps $dst,$src\t! mul packedF" %}
20719 ins_encode %{
20720 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20721 %}
20722 ins_pipe( pipe_slow );
20723 %}
20724
20725 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20726 predicate(UseAVX > 0);
20727 match(Set dst (MulVF src1 src2));
20728 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20729 ins_encode %{
20730 int vlen_enc = vector_length_encoding(this);
20731 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20732 %}
20733 ins_pipe( pipe_slow );
20734 %}
20735
20736 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20737 predicate((UseAVX > 0) &&
20738 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20739 match(Set dst (MulVF src (LoadVector mem)));
20740 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20741 ins_encode %{
20742 int vlen_enc = vector_length_encoding(this);
20743 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20744 %}
20745 ins_pipe( pipe_slow );
20746 %}
20747
20748 // Doubles vector mul
20749 instruct vmulD(vec dst, vec src) %{
20750 predicate(UseAVX == 0);
20751 match(Set dst (MulVD dst src));
20752 format %{ "mulpd $dst,$src\t! mul packedD" %}
20753 ins_encode %{
20754 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20755 %}
20756 ins_pipe( pipe_slow );
20757 %}
20758
20759 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20760 predicate(UseAVX > 0);
20761 match(Set dst (MulVD src1 src2));
20762 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20763 ins_encode %{
20764 int vlen_enc = vector_length_encoding(this);
20765 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20766 %}
20767 ins_pipe( pipe_slow );
20768 %}
20769
20770 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20771 predicate((UseAVX > 0) &&
20772 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20773 match(Set dst (MulVD src (LoadVector mem)));
20774 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20775 ins_encode %{
20776 int vlen_enc = vector_length_encoding(this);
20777 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20778 %}
20779 ins_pipe( pipe_slow );
20780 %}
20781
20782 // --------------------------------- DIV --------------------------------------
20783
20784 // Floats vector div
20785 instruct vdivF(vec dst, vec src) %{
20786 predicate(UseAVX == 0);
20787 match(Set dst (DivVF dst src));
20788 format %{ "divps $dst,$src\t! div packedF" %}
20789 ins_encode %{
20790 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20791 %}
20792 ins_pipe( pipe_slow );
20793 %}
20794
20795 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20796 predicate(UseAVX > 0);
20797 match(Set dst (DivVF src1 src2));
20798 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20799 ins_encode %{
20800 int vlen_enc = vector_length_encoding(this);
20801 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20802 %}
20803 ins_pipe( pipe_slow );
20804 %}
20805
20806 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20807 predicate((UseAVX > 0) &&
20808 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20809 match(Set dst (DivVF src (LoadVector mem)));
20810 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20811 ins_encode %{
20812 int vlen_enc = vector_length_encoding(this);
20813 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20814 %}
20815 ins_pipe( pipe_slow );
20816 %}
20817
20818 // Doubles vector div
20819 instruct vdivD(vec dst, vec src) %{
20820 predicate(UseAVX == 0);
20821 match(Set dst (DivVD dst src));
20822 format %{ "divpd $dst,$src\t! div packedD" %}
20823 ins_encode %{
20824 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20825 %}
20826 ins_pipe( pipe_slow );
20827 %}
20828
20829 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20830 predicate(UseAVX > 0);
20831 match(Set dst (DivVD src1 src2));
20832 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20833 ins_encode %{
20834 int vlen_enc = vector_length_encoding(this);
20835 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20836 %}
20837 ins_pipe( pipe_slow );
20838 %}
20839
20840 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20841 predicate((UseAVX > 0) &&
20842 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20843 match(Set dst (DivVD src (LoadVector mem)));
20844 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20845 ins_encode %{
20846 int vlen_enc = vector_length_encoding(this);
20847 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20848 %}
20849 ins_pipe( pipe_slow );
20850 %}
20851
20852 // ------------------------------ MinMax ---------------------------------------
20853
20854 // Byte, Short, Int vector Min/Max
20855 instruct minmax_reg_sse(vec dst, vec src) %{
20856 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20857 UseAVX == 0);
20858 match(Set dst (MinV dst src));
20859 match(Set dst (MaxV dst src));
20860 format %{ "vector_minmax $dst,$src\t! " %}
20861 ins_encode %{
20862 assert(UseSSE >= 4, "required");
20863
20864 int opcode = this->ideal_Opcode();
20865 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20866 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20867 %}
20868 ins_pipe( pipe_slow );
20869 %}
20870
20871 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20872 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20873 UseAVX > 0);
20874 match(Set dst (MinV src1 src2));
20875 match(Set dst (MaxV src1 src2));
20876 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20877 ins_encode %{
20878 int opcode = this->ideal_Opcode();
20879 int vlen_enc = vector_length_encoding(this);
20880 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20881
20882 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20883 %}
20884 ins_pipe( pipe_slow );
20885 %}
20886
20887 // Long vector Min/Max
20888 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20889 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20890 UseAVX == 0);
20891 match(Set dst (MinV dst src));
20892 match(Set dst (MaxV src dst));
20893 effect(TEMP dst, TEMP tmp);
20894 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20895 ins_encode %{
20896 assert(UseSSE >= 4, "required");
20897
20898 int opcode = this->ideal_Opcode();
20899 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20900 assert(elem_bt == T_LONG, "sanity");
20901
20902 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20903 %}
20904 ins_pipe( pipe_slow );
20905 %}
20906
20907 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20908 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20909 UseAVX > 0 && !VM_Version::supports_avx512vl());
20910 match(Set dst (MinV src1 src2));
20911 match(Set dst (MaxV src1 src2));
20912 effect(TEMP dst);
20913 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20914 ins_encode %{
20915 int vlen_enc = vector_length_encoding(this);
20916 int opcode = this->ideal_Opcode();
20917 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20918 assert(elem_bt == T_LONG, "sanity");
20919
20920 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20921 %}
20922 ins_pipe( pipe_slow );
20923 %}
20924
20925 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20926 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20927 Matcher::vector_element_basic_type(n) == T_LONG);
20928 match(Set dst (MinV src1 src2));
20929 match(Set dst (MaxV src1 src2));
20930 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20931 ins_encode %{
20932 assert(UseAVX > 2, "required");
20933
20934 int vlen_enc = vector_length_encoding(this);
20935 int opcode = this->ideal_Opcode();
20936 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20937 assert(elem_bt == T_LONG, "sanity");
20938
20939 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20940 %}
20941 ins_pipe( pipe_slow );
20942 %}
20943
20944 // Float/Double vector Min/Max
20945 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20946 predicate(VM_Version::supports_avx10_2() &&
20947 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20948 match(Set dst (MinV a b));
20949 match(Set dst (MaxV a b));
20950 format %{ "vector_minmaxFP $dst, $a, $b" %}
20951 ins_encode %{
20952 int vlen_enc = vector_length_encoding(this);
20953 int opcode = this->ideal_Opcode();
20954 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20955 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20956 %}
20957 ins_pipe( pipe_slow );
20958 %}
20959
20960 // Float/Double vector Min/Max
20961 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20962 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20963 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20964 UseAVX > 0);
20965 match(Set dst (MinV a b));
20966 match(Set dst (MaxV a b));
20967 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20968 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20969 ins_encode %{
20970 assert(UseAVX > 0, "required");
20971
20972 int opcode = this->ideal_Opcode();
20973 int vlen_enc = vector_length_encoding(this);
20974 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20975
20976 __ vminmax_fp(opcode, elem_bt,
20977 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20978 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20979 %}
20980 ins_pipe( pipe_slow );
20981 %}
20982
20983 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20984 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20985 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20986 match(Set dst (MinV a b));
20987 match(Set dst (MaxV a b));
20988 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20989 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20990 ins_encode %{
20991 assert(UseAVX > 2, "required");
20992
20993 int opcode = this->ideal_Opcode();
20994 int vlen_enc = vector_length_encoding(this);
20995 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20996
20997 __ evminmax_fp(opcode, elem_bt,
20998 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20999 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21000 %}
21001 ins_pipe( pipe_slow );
21002 %}
21003
21004 // ------------------------------ Unsigned vector Min/Max ----------------------
21005
21006 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21007 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21008 match(Set dst (UMinV a b));
21009 match(Set dst (UMaxV a b));
21010 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21011 ins_encode %{
21012 int opcode = this->ideal_Opcode();
21013 int vlen_enc = vector_length_encoding(this);
21014 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21015 assert(is_integral_type(elem_bt), "");
21016 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21017 %}
21018 ins_pipe( pipe_slow );
21019 %}
21020
21021 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21022 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21023 match(Set dst (UMinV a (LoadVector b)));
21024 match(Set dst (UMaxV a (LoadVector b)));
21025 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21026 ins_encode %{
21027 int opcode = this->ideal_Opcode();
21028 int vlen_enc = vector_length_encoding(this);
21029 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21030 assert(is_integral_type(elem_bt), "");
21031 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21032 %}
21033 ins_pipe( pipe_slow );
21034 %}
21035
21036 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21037 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21038 match(Set dst (UMinV a b));
21039 match(Set dst (UMaxV a b));
21040 effect(TEMP xtmp1, TEMP xtmp2);
21041 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21042 ins_encode %{
21043 int opcode = this->ideal_Opcode();
21044 int vlen_enc = vector_length_encoding(this);
21045 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21046 %}
21047 ins_pipe( pipe_slow );
21048 %}
21049
21050 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21051 match(Set dst (UMinV (Binary dst src2) mask));
21052 match(Set dst (UMaxV (Binary dst src2) mask));
21053 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21054 ins_encode %{
21055 int vlen_enc = vector_length_encoding(this);
21056 BasicType bt = Matcher::vector_element_basic_type(this);
21057 int opc = this->ideal_Opcode();
21058 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21059 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21060 %}
21061 ins_pipe( pipe_slow );
21062 %}
21063
21064 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21065 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21066 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21067 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21068 ins_encode %{
21069 int vlen_enc = vector_length_encoding(this);
21070 BasicType bt = Matcher::vector_element_basic_type(this);
21071 int opc = this->ideal_Opcode();
21072 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21073 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21074 %}
21075 ins_pipe( pipe_slow );
21076 %}
21077
21078 // --------------------------------- Signum/CopySign ---------------------------
21079
21080 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21081 match(Set dst (SignumF dst (Binary zero one)));
21082 effect(KILL cr);
21083 format %{ "signumF $dst, $dst" %}
21084 ins_encode %{
21085 int opcode = this->ideal_Opcode();
21086 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21087 %}
21088 ins_pipe( pipe_slow );
21089 %}
21090
21091 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21092 match(Set dst (SignumD dst (Binary zero one)));
21093 effect(KILL cr);
21094 format %{ "signumD $dst, $dst" %}
21095 ins_encode %{
21096 int opcode = this->ideal_Opcode();
21097 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21098 %}
21099 ins_pipe( pipe_slow );
21100 %}
21101
21102 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21103 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21104 match(Set dst (SignumVF src (Binary zero one)));
21105 match(Set dst (SignumVD src (Binary zero one)));
21106 effect(TEMP dst, TEMP xtmp1);
21107 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21108 ins_encode %{
21109 int opcode = this->ideal_Opcode();
21110 int vec_enc = vector_length_encoding(this);
21111 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21112 $xtmp1$$XMMRegister, vec_enc);
21113 %}
21114 ins_pipe( pipe_slow );
21115 %}
21116
21117 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21118 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21119 match(Set dst (SignumVF src (Binary zero one)));
21120 match(Set dst (SignumVD src (Binary zero one)));
21121 effect(TEMP dst, TEMP ktmp1);
21122 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21123 ins_encode %{
21124 int opcode = this->ideal_Opcode();
21125 int vec_enc = vector_length_encoding(this);
21126 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21127 $ktmp1$$KRegister, vec_enc);
21128 %}
21129 ins_pipe( pipe_slow );
21130 %}
21131
21132 // ---------------------------------------
21133 // For copySign use 0xE4 as writemask for vpternlog
21134 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21135 // C (xmm2) is set to 0x7FFFFFFF
21136 // Wherever xmm2 is 0, we want to pick from B (sign)
21137 // Wherever xmm2 is 1, we want to pick from A (src)
21138 //
21139 // A B C Result
21140 // 0 0 0 0
21141 // 0 0 1 0
21142 // 0 1 0 1
21143 // 0 1 1 0
21144 // 1 0 0 0
21145 // 1 0 1 1
21146 // 1 1 0 1
21147 // 1 1 1 1
21148 //
21149 // Result going from high bit to low bit is 0x11100100 = 0xe4
21150 // ---------------------------------------
21151
21152 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21153 match(Set dst (CopySignF dst src));
21154 effect(TEMP tmp1, TEMP tmp2);
21155 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21156 ins_encode %{
21157 __ movl($tmp2$$Register, 0x7FFFFFFF);
21158 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21159 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21160 %}
21161 ins_pipe( pipe_slow );
21162 %}
21163
21164 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21165 match(Set dst (CopySignD dst (Binary src zero)));
21166 ins_cost(100);
21167 effect(TEMP tmp1, TEMP tmp2);
21168 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21169 ins_encode %{
21170 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21171 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21172 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21173 %}
21174 ins_pipe( pipe_slow );
21175 %}
21176
21177 //----------------------------- CompressBits/ExpandBits ------------------------
21178
21179 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21180 predicate(n->bottom_type()->isa_int());
21181 match(Set dst (CompressBits src mask));
21182 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21183 ins_encode %{
21184 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21185 %}
21186 ins_pipe( pipe_slow );
21187 %}
21188
21189 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21190 predicate(n->bottom_type()->isa_int());
21191 match(Set dst (ExpandBits src mask));
21192 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21193 ins_encode %{
21194 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21195 %}
21196 ins_pipe( pipe_slow );
21197 %}
21198
21199 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21200 predicate(n->bottom_type()->isa_int());
21201 match(Set dst (CompressBits src (LoadI mask)));
21202 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21203 ins_encode %{
21204 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21205 %}
21206 ins_pipe( pipe_slow );
21207 %}
21208
21209 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21210 predicate(n->bottom_type()->isa_int());
21211 match(Set dst (ExpandBits src (LoadI mask)));
21212 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21213 ins_encode %{
21214 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21215 %}
21216 ins_pipe( pipe_slow );
21217 %}
21218
21219 // --------------------------------- Sqrt --------------------------------------
21220
21221 instruct vsqrtF_reg(vec dst, vec src) %{
21222 match(Set dst (SqrtVF src));
21223 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21224 ins_encode %{
21225 assert(UseAVX > 0, "required");
21226 int vlen_enc = vector_length_encoding(this);
21227 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21228 %}
21229 ins_pipe( pipe_slow );
21230 %}
21231
21232 instruct vsqrtF_mem(vec dst, memory mem) %{
21233 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21234 match(Set dst (SqrtVF (LoadVector mem)));
21235 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21236 ins_encode %{
21237 assert(UseAVX > 0, "required");
21238 int vlen_enc = vector_length_encoding(this);
21239 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21240 %}
21241 ins_pipe( pipe_slow );
21242 %}
21243
21244 // Floating point vector sqrt
21245 instruct vsqrtD_reg(vec dst, vec src) %{
21246 match(Set dst (SqrtVD src));
21247 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21248 ins_encode %{
21249 assert(UseAVX > 0, "required");
21250 int vlen_enc = vector_length_encoding(this);
21251 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21252 %}
21253 ins_pipe( pipe_slow );
21254 %}
21255
21256 instruct vsqrtD_mem(vec dst, memory mem) %{
21257 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21258 match(Set dst (SqrtVD (LoadVector mem)));
21259 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21260 ins_encode %{
21261 assert(UseAVX > 0, "required");
21262 int vlen_enc = vector_length_encoding(this);
21263 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21264 %}
21265 ins_pipe( pipe_slow );
21266 %}
21267
21268 // ------------------------------ Shift ---------------------------------------
21269
21270 // Left and right shift count vectors are the same on x86
21271 // (only lowest bits of xmm reg are used for count).
21272 instruct vshiftcnt(vec dst, rRegI cnt) %{
21273 match(Set dst (LShiftCntV cnt));
21274 match(Set dst (RShiftCntV cnt));
21275 format %{ "movdl $dst,$cnt\t! load shift count" %}
21276 ins_encode %{
21277 __ movdl($dst$$XMMRegister, $cnt$$Register);
21278 %}
21279 ins_pipe( pipe_slow );
21280 %}
21281
21282 // Byte vector shift
21283 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21284 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21285 match(Set dst ( LShiftVB src shift));
21286 match(Set dst ( RShiftVB src shift));
21287 match(Set dst (URShiftVB src shift));
21288 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21289 format %{"vector_byte_shift $dst,$src,$shift" %}
21290 ins_encode %{
21291 assert(UseSSE > 3, "required");
21292 int opcode = this->ideal_Opcode();
21293 bool sign = (opcode != Op_URShiftVB);
21294 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21295 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21296 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21297 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21298 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21299 %}
21300 ins_pipe( pipe_slow );
21301 %}
21302
21303 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21304 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21305 UseAVX <= 1);
21306 match(Set dst ( LShiftVB src shift));
21307 match(Set dst ( RShiftVB src shift));
21308 match(Set dst (URShiftVB src shift));
21309 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21310 format %{"vector_byte_shift $dst,$src,$shift" %}
21311 ins_encode %{
21312 assert(UseSSE > 3, "required");
21313 int opcode = this->ideal_Opcode();
21314 bool sign = (opcode != Op_URShiftVB);
21315 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21316 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21317 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21318 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21319 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21320 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21321 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21322 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21323 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21324 %}
21325 ins_pipe( pipe_slow );
21326 %}
21327
21328 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21329 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21330 UseAVX > 1);
21331 match(Set dst ( LShiftVB src shift));
21332 match(Set dst ( RShiftVB src shift));
21333 match(Set dst (URShiftVB src shift));
21334 effect(TEMP dst, TEMP tmp);
21335 format %{"vector_byte_shift $dst,$src,$shift" %}
21336 ins_encode %{
21337 int opcode = this->ideal_Opcode();
21338 bool sign = (opcode != Op_URShiftVB);
21339 int vlen_enc = Assembler::AVX_256bit;
21340 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21341 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21342 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21343 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21344 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21345 %}
21346 ins_pipe( pipe_slow );
21347 %}
21348
21349 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21350 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21351 match(Set dst ( LShiftVB src shift));
21352 match(Set dst ( RShiftVB src shift));
21353 match(Set dst (URShiftVB src shift));
21354 effect(TEMP dst, TEMP tmp);
21355 format %{"vector_byte_shift $dst,$src,$shift" %}
21356 ins_encode %{
21357 assert(UseAVX > 1, "required");
21358 int opcode = this->ideal_Opcode();
21359 bool sign = (opcode != Op_URShiftVB);
21360 int vlen_enc = Assembler::AVX_256bit;
21361 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21362 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21363 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21364 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21365 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21366 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21367 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21368 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21369 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21370 %}
21371 ins_pipe( pipe_slow );
21372 %}
21373
21374 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21375 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21376 match(Set dst ( LShiftVB src shift));
21377 match(Set dst (RShiftVB src shift));
21378 match(Set dst (URShiftVB src shift));
21379 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21380 format %{"vector_byte_shift $dst,$src,$shift" %}
21381 ins_encode %{
21382 assert(UseAVX > 2, "required");
21383 int opcode = this->ideal_Opcode();
21384 bool sign = (opcode != Op_URShiftVB);
21385 int vlen_enc = Assembler::AVX_512bit;
21386 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21387 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21388 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21389 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21390 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21391 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21392 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21393 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21394 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21395 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21396 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21397 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21398 %}
21399 ins_pipe( pipe_slow );
21400 %}
21401
21402 // Shorts vector logical right shift produces incorrect Java result
21403 // for negative data because java code convert short value into int with
21404 // sign extension before a shift. But char vectors are fine since chars are
21405 // unsigned values.
21406 // Shorts/Chars vector left shift
21407 instruct vshiftS(vec dst, vec src, vec shift) %{
21408 predicate(!n->as_ShiftV()->is_var_shift());
21409 match(Set dst ( LShiftVS src shift));
21410 match(Set dst ( RShiftVS src shift));
21411 match(Set dst (URShiftVS src shift));
21412 effect(TEMP dst, USE src, USE shift);
21413 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21414 ins_encode %{
21415 int opcode = this->ideal_Opcode();
21416 if (UseAVX > 0) {
21417 int vlen_enc = vector_length_encoding(this);
21418 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21419 } else {
21420 int vlen = Matcher::vector_length(this);
21421 if (vlen == 2) {
21422 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21423 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21424 } else if (vlen == 4) {
21425 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21426 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21427 } else {
21428 assert (vlen == 8, "sanity");
21429 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21430 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21431 }
21432 }
21433 %}
21434 ins_pipe( pipe_slow );
21435 %}
21436
21437 // Integers vector left shift
21438 instruct vshiftI(vec dst, vec src, vec shift) %{
21439 predicate(!n->as_ShiftV()->is_var_shift());
21440 match(Set dst ( LShiftVI src shift));
21441 match(Set dst ( RShiftVI src shift));
21442 match(Set dst (URShiftVI src shift));
21443 effect(TEMP dst, USE src, USE shift);
21444 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21445 ins_encode %{
21446 int opcode = this->ideal_Opcode();
21447 if (UseAVX > 0) {
21448 int vlen_enc = vector_length_encoding(this);
21449 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21450 } else {
21451 int vlen = Matcher::vector_length(this);
21452 if (vlen == 2) {
21453 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21454 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21455 } else {
21456 assert(vlen == 4, "sanity");
21457 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21458 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21459 }
21460 }
21461 %}
21462 ins_pipe( pipe_slow );
21463 %}
21464
21465 // Integers vector left constant shift
21466 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21467 match(Set dst (LShiftVI src (LShiftCntV shift)));
21468 match(Set dst (RShiftVI src (RShiftCntV shift)));
21469 match(Set dst (URShiftVI src (RShiftCntV shift)));
21470 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21471 ins_encode %{
21472 int opcode = this->ideal_Opcode();
21473 if (UseAVX > 0) {
21474 int vector_len = vector_length_encoding(this);
21475 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21476 } else {
21477 int vlen = Matcher::vector_length(this);
21478 if (vlen == 2) {
21479 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21480 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21481 } else {
21482 assert(vlen == 4, "sanity");
21483 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21484 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21485 }
21486 }
21487 %}
21488 ins_pipe( pipe_slow );
21489 %}
21490
21491 // Longs vector shift
21492 instruct vshiftL(vec dst, vec src, vec shift) %{
21493 predicate(!n->as_ShiftV()->is_var_shift());
21494 match(Set dst ( LShiftVL src shift));
21495 match(Set dst (URShiftVL src shift));
21496 effect(TEMP dst, USE src, USE shift);
21497 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21498 ins_encode %{
21499 int opcode = this->ideal_Opcode();
21500 if (UseAVX > 0) {
21501 int vlen_enc = vector_length_encoding(this);
21502 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21503 } else {
21504 assert(Matcher::vector_length(this) == 2, "");
21505 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21506 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21507 }
21508 %}
21509 ins_pipe( pipe_slow );
21510 %}
21511
21512 // Longs vector constant shift
21513 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21514 match(Set dst (LShiftVL src (LShiftCntV shift)));
21515 match(Set dst (URShiftVL src (RShiftCntV shift)));
21516 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21517 ins_encode %{
21518 int opcode = this->ideal_Opcode();
21519 if (UseAVX > 0) {
21520 int vector_len = vector_length_encoding(this);
21521 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21522 } else {
21523 assert(Matcher::vector_length(this) == 2, "");
21524 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21525 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21526 }
21527 %}
21528 ins_pipe( pipe_slow );
21529 %}
21530
21531 // -------------------ArithmeticRightShift -----------------------------------
21532 // Long vector arithmetic right shift
21533 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21534 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21535 match(Set dst (RShiftVL src shift));
21536 effect(TEMP dst, TEMP tmp);
21537 format %{ "vshiftq $dst,$src,$shift" %}
21538 ins_encode %{
21539 uint vlen = Matcher::vector_length(this);
21540 if (vlen == 2) {
21541 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21542 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21543 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21544 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21545 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21546 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21547 } else {
21548 assert(vlen == 4, "sanity");
21549 assert(UseAVX > 1, "required");
21550 int vlen_enc = Assembler::AVX_256bit;
21551 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21552 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21553 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21554 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21555 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21556 }
21557 %}
21558 ins_pipe( pipe_slow );
21559 %}
21560
21561 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21562 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21563 match(Set dst (RShiftVL src shift));
21564 format %{ "vshiftq $dst,$src,$shift" %}
21565 ins_encode %{
21566 int vlen_enc = vector_length_encoding(this);
21567 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21568 %}
21569 ins_pipe( pipe_slow );
21570 %}
21571
21572 // ------------------- Variable Shift -----------------------------
21573 // Byte variable shift
21574 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21575 predicate(Matcher::vector_length(n) <= 8 &&
21576 n->as_ShiftV()->is_var_shift() &&
21577 !VM_Version::supports_avx512bw());
21578 match(Set dst ( LShiftVB src shift));
21579 match(Set dst ( RShiftVB src shift));
21580 match(Set dst (URShiftVB src shift));
21581 effect(TEMP dst, TEMP vtmp);
21582 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21583 ins_encode %{
21584 assert(UseAVX >= 2, "required");
21585
21586 int opcode = this->ideal_Opcode();
21587 int vlen_enc = Assembler::AVX_128bit;
21588 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21589 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21590 %}
21591 ins_pipe( pipe_slow );
21592 %}
21593
21594 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21595 predicate(Matcher::vector_length(n) == 16 &&
21596 n->as_ShiftV()->is_var_shift() &&
21597 !VM_Version::supports_avx512bw());
21598 match(Set dst ( LShiftVB src shift));
21599 match(Set dst ( RShiftVB src shift));
21600 match(Set dst (URShiftVB src shift));
21601 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21602 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21603 ins_encode %{
21604 assert(UseAVX >= 2, "required");
21605
21606 int opcode = this->ideal_Opcode();
21607 int vlen_enc = Assembler::AVX_128bit;
21608 // Shift lower half and get word result in dst
21609 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21610
21611 // Shift upper half and get word result in vtmp1
21612 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21613 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21614 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21615
21616 // Merge and down convert the two word results to byte in dst
21617 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21618 %}
21619 ins_pipe( pipe_slow );
21620 %}
21621
21622 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21623 predicate(Matcher::vector_length(n) == 32 &&
21624 n->as_ShiftV()->is_var_shift() &&
21625 !VM_Version::supports_avx512bw());
21626 match(Set dst ( LShiftVB src shift));
21627 match(Set dst ( RShiftVB src shift));
21628 match(Set dst (URShiftVB src shift));
21629 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21630 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21631 ins_encode %{
21632 assert(UseAVX >= 2, "required");
21633
21634 int opcode = this->ideal_Opcode();
21635 int vlen_enc = Assembler::AVX_128bit;
21636 // Process lower 128 bits and get result in dst
21637 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21638 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21639 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21640 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21641 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21642
21643 // Process higher 128 bits and get result in vtmp3
21644 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21645 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21646 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21647 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21648 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21649 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21650 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21651
21652 // Merge the two results in dst
21653 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21654 %}
21655 ins_pipe( pipe_slow );
21656 %}
21657
21658 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21659 predicate(Matcher::vector_length(n) <= 32 &&
21660 n->as_ShiftV()->is_var_shift() &&
21661 VM_Version::supports_avx512bw());
21662 match(Set dst ( LShiftVB src shift));
21663 match(Set dst ( RShiftVB src shift));
21664 match(Set dst (URShiftVB src shift));
21665 effect(TEMP dst, TEMP vtmp);
21666 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21667 ins_encode %{
21668 assert(UseAVX > 2, "required");
21669
21670 int opcode = this->ideal_Opcode();
21671 int vlen_enc = vector_length_encoding(this);
21672 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21673 %}
21674 ins_pipe( pipe_slow );
21675 %}
21676
21677 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21678 predicate(Matcher::vector_length(n) == 64 &&
21679 n->as_ShiftV()->is_var_shift() &&
21680 VM_Version::supports_avx512bw());
21681 match(Set dst ( LShiftVB src shift));
21682 match(Set dst ( RShiftVB src shift));
21683 match(Set dst (URShiftVB src shift));
21684 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21685 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21686 ins_encode %{
21687 assert(UseAVX > 2, "required");
21688
21689 int opcode = this->ideal_Opcode();
21690 int vlen_enc = Assembler::AVX_256bit;
21691 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21692 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21693 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21694 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21695 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21696 %}
21697 ins_pipe( pipe_slow );
21698 %}
21699
21700 // Short variable shift
21701 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21702 predicate(Matcher::vector_length(n) <= 8 &&
21703 n->as_ShiftV()->is_var_shift() &&
21704 !VM_Version::supports_avx512bw());
21705 match(Set dst ( LShiftVS src shift));
21706 match(Set dst ( RShiftVS src shift));
21707 match(Set dst (URShiftVS src shift));
21708 effect(TEMP dst, TEMP vtmp);
21709 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21710 ins_encode %{
21711 assert(UseAVX >= 2, "required");
21712
21713 int opcode = this->ideal_Opcode();
21714 bool sign = (opcode != Op_URShiftVS);
21715 int vlen_enc = Assembler::AVX_256bit;
21716 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21717 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21718 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21719 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21720 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21721 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21722 %}
21723 ins_pipe( pipe_slow );
21724 %}
21725
21726 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21727 predicate(Matcher::vector_length(n) == 16 &&
21728 n->as_ShiftV()->is_var_shift() &&
21729 !VM_Version::supports_avx512bw());
21730 match(Set dst ( LShiftVS src shift));
21731 match(Set dst ( RShiftVS src shift));
21732 match(Set dst (URShiftVS src shift));
21733 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21734 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21735 ins_encode %{
21736 assert(UseAVX >= 2, "required");
21737
21738 int opcode = this->ideal_Opcode();
21739 bool sign = (opcode != Op_URShiftVS);
21740 int vlen_enc = Assembler::AVX_256bit;
21741 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21742 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21743 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21744 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21745 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21746
21747 // Shift upper half, with result in dst using vtmp1 as TEMP
21748 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21749 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21750 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21751 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21752 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21753 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21754
21755 // Merge lower and upper half result into dst
21756 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21757 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21758 %}
21759 ins_pipe( pipe_slow );
21760 %}
21761
21762 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21763 predicate(n->as_ShiftV()->is_var_shift() &&
21764 VM_Version::supports_avx512bw());
21765 match(Set dst ( LShiftVS src shift));
21766 match(Set dst ( RShiftVS src shift));
21767 match(Set dst (URShiftVS src shift));
21768 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21769 ins_encode %{
21770 assert(UseAVX > 2, "required");
21771
21772 int opcode = this->ideal_Opcode();
21773 int vlen_enc = vector_length_encoding(this);
21774 if (!VM_Version::supports_avx512vl()) {
21775 vlen_enc = Assembler::AVX_512bit;
21776 }
21777 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21778 %}
21779 ins_pipe( pipe_slow );
21780 %}
21781
21782 //Integer variable shift
21783 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21784 predicate(n->as_ShiftV()->is_var_shift());
21785 match(Set dst ( LShiftVI src shift));
21786 match(Set dst ( RShiftVI src shift));
21787 match(Set dst (URShiftVI src shift));
21788 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21789 ins_encode %{
21790 assert(UseAVX >= 2, "required");
21791
21792 int opcode = this->ideal_Opcode();
21793 int vlen_enc = vector_length_encoding(this);
21794 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21795 %}
21796 ins_pipe( pipe_slow );
21797 %}
21798
21799 //Long variable shift
21800 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21801 predicate(n->as_ShiftV()->is_var_shift());
21802 match(Set dst ( LShiftVL src shift));
21803 match(Set dst (URShiftVL src shift));
21804 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21805 ins_encode %{
21806 assert(UseAVX >= 2, "required");
21807
21808 int opcode = this->ideal_Opcode();
21809 int vlen_enc = vector_length_encoding(this);
21810 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21811 %}
21812 ins_pipe( pipe_slow );
21813 %}
21814
21815 //Long variable right shift arithmetic
21816 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21817 predicate(Matcher::vector_length(n) <= 4 &&
21818 n->as_ShiftV()->is_var_shift() &&
21819 UseAVX == 2);
21820 match(Set dst (RShiftVL src shift));
21821 effect(TEMP dst, TEMP vtmp);
21822 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21823 ins_encode %{
21824 int opcode = this->ideal_Opcode();
21825 int vlen_enc = vector_length_encoding(this);
21826 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21827 $vtmp$$XMMRegister);
21828 %}
21829 ins_pipe( pipe_slow );
21830 %}
21831
21832 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21833 predicate(n->as_ShiftV()->is_var_shift() &&
21834 UseAVX > 2);
21835 match(Set dst (RShiftVL src shift));
21836 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21837 ins_encode %{
21838 int opcode = this->ideal_Opcode();
21839 int vlen_enc = vector_length_encoding(this);
21840 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21841 %}
21842 ins_pipe( pipe_slow );
21843 %}
21844
21845 // --------------------------------- AND --------------------------------------
21846
21847 instruct vand(vec dst, vec src) %{
21848 predicate(UseAVX == 0);
21849 match(Set dst (AndV dst src));
21850 format %{ "pand $dst,$src\t! and vectors" %}
21851 ins_encode %{
21852 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21853 %}
21854 ins_pipe( pipe_slow );
21855 %}
21856
21857 instruct vand_reg(vec dst, vec src1, vec src2) %{
21858 predicate(UseAVX > 0);
21859 match(Set dst (AndV src1 src2));
21860 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21861 ins_encode %{
21862 int vlen_enc = vector_length_encoding(this);
21863 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21864 %}
21865 ins_pipe( pipe_slow );
21866 %}
21867
21868 instruct vand_mem(vec dst, vec src, memory mem) %{
21869 predicate((UseAVX > 0) &&
21870 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21871 match(Set dst (AndV src (LoadVector mem)));
21872 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21873 ins_encode %{
21874 int vlen_enc = vector_length_encoding(this);
21875 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21876 %}
21877 ins_pipe( pipe_slow );
21878 %}
21879
21880 // --------------------------------- OR ---------------------------------------
21881
21882 instruct vor(vec dst, vec src) %{
21883 predicate(UseAVX == 0);
21884 match(Set dst (OrV dst src));
21885 format %{ "por $dst,$src\t! or vectors" %}
21886 ins_encode %{
21887 __ por($dst$$XMMRegister, $src$$XMMRegister);
21888 %}
21889 ins_pipe( pipe_slow );
21890 %}
21891
21892 instruct vor_reg(vec dst, vec src1, vec src2) %{
21893 predicate(UseAVX > 0);
21894 match(Set dst (OrV src1 src2));
21895 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21896 ins_encode %{
21897 int vlen_enc = vector_length_encoding(this);
21898 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21899 %}
21900 ins_pipe( pipe_slow );
21901 %}
21902
21903 instruct vor_mem(vec dst, vec src, memory mem) %{
21904 predicate((UseAVX > 0) &&
21905 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21906 match(Set dst (OrV src (LoadVector mem)));
21907 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21908 ins_encode %{
21909 int vlen_enc = vector_length_encoding(this);
21910 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21911 %}
21912 ins_pipe( pipe_slow );
21913 %}
21914
21915 // --------------------------------- XOR --------------------------------------
21916
21917 instruct vxor(vec dst, vec src) %{
21918 predicate(UseAVX == 0);
21919 match(Set dst (XorV dst src));
21920 format %{ "pxor $dst,$src\t! xor vectors" %}
21921 ins_encode %{
21922 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21923 %}
21924 ins_pipe( pipe_slow );
21925 %}
21926
21927 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21928 predicate(UseAVX > 0);
21929 match(Set dst (XorV src1 src2));
21930 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21931 ins_encode %{
21932 int vlen_enc = vector_length_encoding(this);
21933 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21934 %}
21935 ins_pipe( pipe_slow );
21936 %}
21937
21938 instruct vxor_mem(vec dst, vec src, memory mem) %{
21939 predicate((UseAVX > 0) &&
21940 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21941 match(Set dst (XorV src (LoadVector mem)));
21942 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21943 ins_encode %{
21944 int vlen_enc = vector_length_encoding(this);
21945 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21946 %}
21947 ins_pipe( pipe_slow );
21948 %}
21949
21950 // --------------------------------- VectorCast --------------------------------------
21951
21952 instruct vcastBtoX(vec dst, vec src) %{
21953 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21954 match(Set dst (VectorCastB2X src));
21955 format %{ "vector_cast_b2x $dst,$src\t!" %}
21956 ins_encode %{
21957 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21958 int vlen_enc = vector_length_encoding(this);
21959 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21960 %}
21961 ins_pipe( pipe_slow );
21962 %}
21963
21964 instruct vcastBtoD(legVec dst, legVec src) %{
21965 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21966 match(Set dst (VectorCastB2X src));
21967 format %{ "vector_cast_b2x $dst,$src\t!" %}
21968 ins_encode %{
21969 int vlen_enc = vector_length_encoding(this);
21970 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21971 %}
21972 ins_pipe( pipe_slow );
21973 %}
21974
21975 instruct castStoX(vec dst, vec src) %{
21976 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21977 Matcher::vector_length(n->in(1)) <= 8 && // src
21978 Matcher::vector_element_basic_type(n) == T_BYTE);
21979 match(Set dst (VectorCastS2X src));
21980 format %{ "vector_cast_s2x $dst,$src" %}
21981 ins_encode %{
21982 assert(UseAVX > 0, "required");
21983
21984 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21985 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21986 %}
21987 ins_pipe( pipe_slow );
21988 %}
21989
21990 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21991 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21992 Matcher::vector_length(n->in(1)) == 16 && // src
21993 Matcher::vector_element_basic_type(n) == T_BYTE);
21994 effect(TEMP dst, TEMP vtmp);
21995 match(Set dst (VectorCastS2X src));
21996 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21997 ins_encode %{
21998 assert(UseAVX > 0, "required");
21999
22000 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22001 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22002 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22003 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22004 %}
22005 ins_pipe( pipe_slow );
22006 %}
22007
22008 instruct vcastStoX_evex(vec dst, vec src) %{
22009 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22010 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22011 match(Set dst (VectorCastS2X src));
22012 format %{ "vector_cast_s2x $dst,$src\t!" %}
22013 ins_encode %{
22014 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22015 int src_vlen_enc = vector_length_encoding(this, $src);
22016 int vlen_enc = vector_length_encoding(this);
22017 switch (to_elem_bt) {
22018 case T_BYTE:
22019 if (!VM_Version::supports_avx512vl()) {
22020 vlen_enc = Assembler::AVX_512bit;
22021 }
22022 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22023 break;
22024 case T_INT:
22025 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22026 break;
22027 case T_FLOAT:
22028 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22029 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22030 break;
22031 case T_LONG:
22032 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22033 break;
22034 case T_DOUBLE: {
22035 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22036 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22037 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22038 break;
22039 }
22040 default:
22041 ShouldNotReachHere();
22042 }
22043 %}
22044 ins_pipe( pipe_slow );
22045 %}
22046
22047 instruct castItoX(vec dst, vec src) %{
22048 predicate(UseAVX <= 2 &&
22049 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22050 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22051 match(Set dst (VectorCastI2X src));
22052 format %{ "vector_cast_i2x $dst,$src" %}
22053 ins_encode %{
22054 assert(UseAVX > 0, "required");
22055
22056 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22057 int vlen_enc = vector_length_encoding(this, $src);
22058
22059 if (to_elem_bt == T_BYTE) {
22060 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22061 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22062 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22063 } else {
22064 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22065 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22066 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22067 }
22068 %}
22069 ins_pipe( pipe_slow );
22070 %}
22071
22072 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22073 predicate(UseAVX <= 2 &&
22074 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22075 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22076 match(Set dst (VectorCastI2X src));
22077 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22078 effect(TEMP dst, TEMP vtmp);
22079 ins_encode %{
22080 assert(UseAVX > 0, "required");
22081
22082 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22083 int vlen_enc = vector_length_encoding(this, $src);
22084
22085 if (to_elem_bt == T_BYTE) {
22086 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22087 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22088 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22089 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22090 } else {
22091 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22092 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22093 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22094 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22095 }
22096 %}
22097 ins_pipe( pipe_slow );
22098 %}
22099
22100 instruct vcastItoX_evex(vec dst, vec src) %{
22101 predicate(UseAVX > 2 ||
22102 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22103 match(Set dst (VectorCastI2X src));
22104 format %{ "vector_cast_i2x $dst,$src\t!" %}
22105 ins_encode %{
22106 assert(UseAVX > 0, "required");
22107
22108 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22109 int src_vlen_enc = vector_length_encoding(this, $src);
22110 int dst_vlen_enc = vector_length_encoding(this);
22111 switch (dst_elem_bt) {
22112 case T_BYTE:
22113 if (!VM_Version::supports_avx512vl()) {
22114 src_vlen_enc = Assembler::AVX_512bit;
22115 }
22116 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22117 break;
22118 case T_SHORT:
22119 if (!VM_Version::supports_avx512vl()) {
22120 src_vlen_enc = Assembler::AVX_512bit;
22121 }
22122 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22123 break;
22124 case T_FLOAT:
22125 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22126 break;
22127 case T_LONG:
22128 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22129 break;
22130 case T_DOUBLE:
22131 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22132 break;
22133 default:
22134 ShouldNotReachHere();
22135 }
22136 %}
22137 ins_pipe( pipe_slow );
22138 %}
22139
22140 instruct vcastLtoBS(vec dst, vec src) %{
22141 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22142 UseAVX <= 2);
22143 match(Set dst (VectorCastL2X src));
22144 format %{ "vector_cast_l2x $dst,$src" %}
22145 ins_encode %{
22146 assert(UseAVX > 0, "required");
22147
22148 int vlen = Matcher::vector_length_in_bytes(this, $src);
22149 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22150 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22151 : ExternalAddress(vector_int_to_short_mask());
22152 if (vlen <= 16) {
22153 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22154 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22155 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22156 } else {
22157 assert(vlen <= 32, "required");
22158 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22159 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22160 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22161 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22162 }
22163 if (to_elem_bt == T_BYTE) {
22164 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22165 }
22166 %}
22167 ins_pipe( pipe_slow );
22168 %}
22169
22170 instruct vcastLtoX_evex(vec dst, vec src) %{
22171 predicate(UseAVX > 2 ||
22172 (Matcher::vector_element_basic_type(n) == T_INT ||
22173 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22174 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22175 match(Set dst (VectorCastL2X src));
22176 format %{ "vector_cast_l2x $dst,$src\t!" %}
22177 ins_encode %{
22178 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22179 int vlen = Matcher::vector_length_in_bytes(this, $src);
22180 int vlen_enc = vector_length_encoding(this, $src);
22181 switch (to_elem_bt) {
22182 case T_BYTE:
22183 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22184 vlen_enc = Assembler::AVX_512bit;
22185 }
22186 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22187 break;
22188 case T_SHORT:
22189 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22190 vlen_enc = Assembler::AVX_512bit;
22191 }
22192 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22193 break;
22194 case T_INT:
22195 if (vlen == 8) {
22196 if ($dst$$XMMRegister != $src$$XMMRegister) {
22197 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22198 }
22199 } else if (vlen == 16) {
22200 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22201 } else if (vlen == 32) {
22202 if (UseAVX > 2) {
22203 if (!VM_Version::supports_avx512vl()) {
22204 vlen_enc = Assembler::AVX_512bit;
22205 }
22206 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22207 } else {
22208 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22209 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22210 }
22211 } else { // vlen == 64
22212 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22213 }
22214 break;
22215 case T_FLOAT:
22216 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22217 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22218 break;
22219 case T_DOUBLE:
22220 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22221 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22222 break;
22223
22224 default: assert(false, "%s", type2name(to_elem_bt));
22225 }
22226 %}
22227 ins_pipe( pipe_slow );
22228 %}
22229
22230 instruct vcastFtoD_reg(vec dst, vec src) %{
22231 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22232 match(Set dst (VectorCastF2X src));
22233 format %{ "vector_cast_f2d $dst,$src\t!" %}
22234 ins_encode %{
22235 int vlen_enc = vector_length_encoding(this);
22236 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22237 %}
22238 ins_pipe( pipe_slow );
22239 %}
22240
22241
22242 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22243 predicate(!VM_Version::supports_avx10_2() &&
22244 !VM_Version::supports_avx512vl() &&
22245 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22246 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22247 is_integral_type(Matcher::vector_element_basic_type(n)));
22248 match(Set dst (VectorCastF2X src));
22249 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22250 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22251 ins_encode %{
22252 int vlen_enc = vector_length_encoding(this, $src);
22253 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22254 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22255 // 32 bit addresses for register indirect addressing mode since stub constants
22256 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22257 // However, targets are free to increase this limit, but having a large code cache size
22258 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22259 // cap we save a temporary register allocation which in limiting case can prevent
22260 // spilling in high register pressure blocks.
22261 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22262 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22263 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22264 %}
22265 ins_pipe( pipe_slow );
22266 %}
22267
22268 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22269 predicate(!VM_Version::supports_avx10_2() &&
22270 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22271 is_integral_type(Matcher::vector_element_basic_type(n)));
22272 match(Set dst (VectorCastF2X src));
22273 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22274 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22275 ins_encode %{
22276 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22277 if (to_elem_bt == T_LONG) {
22278 int vlen_enc = vector_length_encoding(this);
22279 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22280 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22281 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22282 } else {
22283 int vlen_enc = vector_length_encoding(this, $src);
22284 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22285 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22286 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22287 }
22288 %}
22289 ins_pipe( pipe_slow );
22290 %}
22291
22292 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22293 predicate(VM_Version::supports_avx10_2() &&
22294 is_integral_type(Matcher::vector_element_basic_type(n)));
22295 match(Set dst (VectorCastF2X src));
22296 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22297 ins_encode %{
22298 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22299 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22300 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22301 %}
22302 ins_pipe( pipe_slow );
22303 %}
22304
22305 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22306 predicate(VM_Version::supports_avx10_2() &&
22307 is_integral_type(Matcher::vector_element_basic_type(n)));
22308 match(Set dst (VectorCastF2X (LoadVector src)));
22309 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22310 ins_encode %{
22311 int vlen = Matcher::vector_length(this);
22312 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22313 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22314 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22315 %}
22316 ins_pipe( pipe_slow );
22317 %}
22318
22319 instruct vcastDtoF_reg(vec dst, vec src) %{
22320 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22321 match(Set dst (VectorCastD2X src));
22322 format %{ "vector_cast_d2x $dst,$src\t!" %}
22323 ins_encode %{
22324 int vlen_enc = vector_length_encoding(this, $src);
22325 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22326 %}
22327 ins_pipe( pipe_slow );
22328 %}
22329
22330 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22331 predicate(!VM_Version::supports_avx10_2() &&
22332 !VM_Version::supports_avx512vl() &&
22333 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22334 is_integral_type(Matcher::vector_element_basic_type(n)));
22335 match(Set dst (VectorCastD2X src));
22336 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22337 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22338 ins_encode %{
22339 int vlen_enc = vector_length_encoding(this, $src);
22340 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22341 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22342 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22343 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22344 %}
22345 ins_pipe( pipe_slow );
22346 %}
22347
22348 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22349 predicate(!VM_Version::supports_avx10_2() &&
22350 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22351 is_integral_type(Matcher::vector_element_basic_type(n)));
22352 match(Set dst (VectorCastD2X src));
22353 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22354 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22355 ins_encode %{
22356 int vlen_enc = vector_length_encoding(this, $src);
22357 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22358 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22359 ExternalAddress(vector_float_signflip());
22360 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22361 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22362 %}
22363 ins_pipe( pipe_slow );
22364 %}
22365
22366 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22367 predicate(VM_Version::supports_avx10_2() &&
22368 is_integral_type(Matcher::vector_element_basic_type(n)));
22369 match(Set dst (VectorCastD2X src));
22370 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22371 ins_encode %{
22372 int vlen_enc = vector_length_encoding(this, $src);
22373 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22374 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22375 %}
22376 ins_pipe( pipe_slow );
22377 %}
22378
22379 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22380 predicate(VM_Version::supports_avx10_2() &&
22381 is_integral_type(Matcher::vector_element_basic_type(n)));
22382 match(Set dst (VectorCastD2X (LoadVector src)));
22383 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22384 ins_encode %{
22385 int vlen = Matcher::vector_length(this);
22386 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22387 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22388 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22389 %}
22390 ins_pipe( pipe_slow );
22391 %}
22392
22393 instruct vucast(vec dst, vec src) %{
22394 match(Set dst (VectorUCastB2X src));
22395 match(Set dst (VectorUCastS2X src));
22396 match(Set dst (VectorUCastI2X src));
22397 format %{ "vector_ucast $dst,$src\t!" %}
22398 ins_encode %{
22399 assert(UseAVX > 0, "required");
22400
22401 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22402 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22403 int vlen_enc = vector_length_encoding(this);
22404 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22405 %}
22406 ins_pipe( pipe_slow );
22407 %}
22408
22409 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22410 predicate(!VM_Version::supports_avx512vl() &&
22411 Matcher::vector_length_in_bytes(n) < 64 &&
22412 Matcher::vector_element_basic_type(n) == T_INT);
22413 match(Set dst (RoundVF src));
22414 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22415 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22416 ins_encode %{
22417 int vlen_enc = vector_length_encoding(this);
22418 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22419 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22420 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22421 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22422 %}
22423 ins_pipe( pipe_slow );
22424 %}
22425
22426 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22427 predicate((VM_Version::supports_avx512vl() ||
22428 Matcher::vector_length_in_bytes(n) == 64) &&
22429 Matcher::vector_element_basic_type(n) == T_INT);
22430 match(Set dst (RoundVF src));
22431 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22432 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22433 ins_encode %{
22434 int vlen_enc = vector_length_encoding(this);
22435 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22436 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22437 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22438 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22439 %}
22440 ins_pipe( pipe_slow );
22441 %}
22442
22443 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22444 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22445 match(Set dst (RoundVD src));
22446 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22447 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22448 ins_encode %{
22449 int vlen_enc = vector_length_encoding(this);
22450 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22451 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22452 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22453 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22454 %}
22455 ins_pipe( pipe_slow );
22456 %}
22457
22458 // --------------------------------- VectorMaskCmp --------------------------------------
22459
22460 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22461 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22462 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22463 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22464 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22465 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22466 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22467 ins_encode %{
22468 int vlen_enc = vector_length_encoding(this, $src1);
22469 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22470 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22471 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22472 } else {
22473 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22474 }
22475 %}
22476 ins_pipe( pipe_slow );
22477 %}
22478
22479 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22480 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22481 n->bottom_type()->isa_vectmask() == nullptr &&
22482 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22483 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22484 effect(TEMP ktmp);
22485 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22486 ins_encode %{
22487 int vlen_enc = Assembler::AVX_512bit;
22488 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22489 KRegister mask = k0; // The comparison itself is not being masked.
22490 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22491 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22492 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22493 } else {
22494 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22495 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22496 }
22497 %}
22498 ins_pipe( pipe_slow );
22499 %}
22500
22501 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22502 predicate(n->bottom_type()->isa_vectmask() &&
22503 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22504 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22505 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22506 ins_encode %{
22507 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22508 int vlen_enc = vector_length_encoding(this, $src1);
22509 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22510 KRegister mask = k0; // The comparison itself is not being masked.
22511 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22512 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22513 } else {
22514 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22515 }
22516 %}
22517 ins_pipe( pipe_slow );
22518 %}
22519
22520 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22521 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22522 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22523 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22524 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22525 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22526 (n->in(2)->get_int() == BoolTest::eq ||
22527 n->in(2)->get_int() == BoolTest::lt ||
22528 n->in(2)->get_int() == BoolTest::gt)); // cond
22529 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22530 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22531 ins_encode %{
22532 int vlen_enc = vector_length_encoding(this, $src1);
22533 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22534 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22535 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22536 %}
22537 ins_pipe( pipe_slow );
22538 %}
22539
22540 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22541 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22542 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22543 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22544 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22545 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22546 (n->in(2)->get_int() == BoolTest::ne ||
22547 n->in(2)->get_int() == BoolTest::le ||
22548 n->in(2)->get_int() == BoolTest::ge)); // cond
22549 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22550 effect(TEMP dst, TEMP xtmp);
22551 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22552 ins_encode %{
22553 int vlen_enc = vector_length_encoding(this, $src1);
22554 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22555 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22556 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22557 %}
22558 ins_pipe( pipe_slow );
22559 %}
22560
22561 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22562 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22563 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22564 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22565 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22566 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22567 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22568 effect(TEMP dst, TEMP xtmp);
22569 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22570 ins_encode %{
22571 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22572 int vlen_enc = vector_length_encoding(this, $src1);
22573 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22574 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22575
22576 if (vlen_enc == Assembler::AVX_128bit) {
22577 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22578 } else {
22579 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22580 }
22581 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22582 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22583 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22584 %}
22585 ins_pipe( pipe_slow );
22586 %}
22587
22588 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22589 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22590 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22591 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22592 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22593 effect(TEMP ktmp);
22594 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22595 ins_encode %{
22596 assert(UseAVX > 2, "required");
22597
22598 int vlen_enc = vector_length_encoding(this, $src1);
22599 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22600 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22601 KRegister mask = k0; // The comparison itself is not being masked.
22602 bool merge = false;
22603 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22604
22605 switch (src1_elem_bt) {
22606 case T_INT: {
22607 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22608 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22609 break;
22610 }
22611 case T_LONG: {
22612 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22613 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22614 break;
22615 }
22616 default: assert(false, "%s", type2name(src1_elem_bt));
22617 }
22618 %}
22619 ins_pipe( pipe_slow );
22620 %}
22621
22622
22623 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22624 predicate(n->bottom_type()->isa_vectmask() &&
22625 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22626 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22627 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22628 ins_encode %{
22629 assert(UseAVX > 2, "required");
22630 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22631
22632 int vlen_enc = vector_length_encoding(this, $src1);
22633 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22634 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22635 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22636
22637 // Comparison i
22638 switch (src1_elem_bt) {
22639 case T_BYTE: {
22640 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22641 break;
22642 }
22643 case T_SHORT: {
22644 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22645 break;
22646 }
22647 case T_INT: {
22648 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22649 break;
22650 }
22651 case T_LONG: {
22652 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22653 break;
22654 }
22655 default: assert(false, "%s", type2name(src1_elem_bt));
22656 }
22657 %}
22658 ins_pipe( pipe_slow );
22659 %}
22660
22661 // Extract
22662
22663 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22664 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22665 match(Set dst (ExtractI src idx));
22666 match(Set dst (ExtractS src idx));
22667 match(Set dst (ExtractB src idx));
22668 format %{ "extractI $dst,$src,$idx\t!" %}
22669 ins_encode %{
22670 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22671
22672 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22673 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22674 %}
22675 ins_pipe( pipe_slow );
22676 %}
22677
22678 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22679 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22680 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22681 match(Set dst (ExtractI src idx));
22682 match(Set dst (ExtractS src idx));
22683 match(Set dst (ExtractB src idx));
22684 effect(TEMP vtmp);
22685 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22686 ins_encode %{
22687 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22688
22689 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22690 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22691 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22692 %}
22693 ins_pipe( pipe_slow );
22694 %}
22695
22696 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22697 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22698 match(Set dst (ExtractL src idx));
22699 format %{ "extractL $dst,$src,$idx\t!" %}
22700 ins_encode %{
22701 assert(UseSSE >= 4, "required");
22702 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22703
22704 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22705 %}
22706 ins_pipe( pipe_slow );
22707 %}
22708
22709 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22710 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22711 Matcher::vector_length(n->in(1)) == 8); // src
22712 match(Set dst (ExtractL src idx));
22713 effect(TEMP vtmp);
22714 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22715 ins_encode %{
22716 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22717
22718 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22719 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22720 %}
22721 ins_pipe( pipe_slow );
22722 %}
22723
22724 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22725 predicate(Matcher::vector_length(n->in(1)) <= 4);
22726 match(Set dst (ExtractF src idx));
22727 effect(TEMP dst, TEMP vtmp);
22728 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22729 ins_encode %{
22730 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22731
22732 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22733 %}
22734 ins_pipe( pipe_slow );
22735 %}
22736
22737 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22738 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22739 Matcher::vector_length(n->in(1)/*src*/) == 16);
22740 match(Set dst (ExtractF src idx));
22741 effect(TEMP vtmp);
22742 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22743 ins_encode %{
22744 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22745
22746 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22747 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22748 %}
22749 ins_pipe( pipe_slow );
22750 %}
22751
22752 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22753 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22754 match(Set dst (ExtractD src idx));
22755 format %{ "extractD $dst,$src,$idx\t!" %}
22756 ins_encode %{
22757 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22758
22759 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22760 %}
22761 ins_pipe( pipe_slow );
22762 %}
22763
22764 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22765 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22766 Matcher::vector_length(n->in(1)) == 8); // src
22767 match(Set dst (ExtractD src idx));
22768 effect(TEMP vtmp);
22769 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22770 ins_encode %{
22771 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22772
22773 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22774 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22775 %}
22776 ins_pipe( pipe_slow );
22777 %}
22778
22779 // --------------------------------- Vector Blend --------------------------------------
22780
22781 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22782 predicate(UseAVX == 0);
22783 match(Set dst (VectorBlend (Binary dst src) mask));
22784 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22785 effect(TEMP tmp);
22786 ins_encode %{
22787 assert(UseSSE >= 4, "required");
22788
22789 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22790 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22791 }
22792 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22793 %}
22794 ins_pipe( pipe_slow );
22795 %}
22796
22797 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22798 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22799 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22800 Matcher::vector_length_in_bytes(n) <= 32 &&
22801 is_integral_type(Matcher::vector_element_basic_type(n)));
22802 match(Set dst (VectorBlend (Binary src1 src2) mask));
22803 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22804 ins_encode %{
22805 int vlen_enc = vector_length_encoding(this);
22806 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22807 %}
22808 ins_pipe( pipe_slow );
22809 %}
22810
22811 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22812 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22813 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22814 Matcher::vector_length_in_bytes(n) <= 32 &&
22815 !is_integral_type(Matcher::vector_element_basic_type(n)));
22816 match(Set dst (VectorBlend (Binary src1 src2) mask));
22817 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22818 ins_encode %{
22819 int vlen_enc = vector_length_encoding(this);
22820 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22821 %}
22822 ins_pipe( pipe_slow );
22823 %}
22824
22825 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22826 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22827 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22828 Matcher::vector_length_in_bytes(n) <= 32);
22829 match(Set dst (VectorBlend (Binary src1 src2) mask));
22830 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22831 effect(TEMP vtmp, TEMP dst);
22832 ins_encode %{
22833 int vlen_enc = vector_length_encoding(this);
22834 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22835 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22836 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22837 %}
22838 ins_pipe( pipe_slow );
22839 %}
22840
22841 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22842 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22843 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22844 match(Set dst (VectorBlend (Binary src1 src2) mask));
22845 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22846 effect(TEMP ktmp);
22847 ins_encode %{
22848 int vlen_enc = Assembler::AVX_512bit;
22849 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22850 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22851 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22852 %}
22853 ins_pipe( pipe_slow );
22854 %}
22855
22856
22857 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22858 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22859 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22860 VM_Version::supports_avx512bw()));
22861 match(Set dst (VectorBlend (Binary src1 src2) mask));
22862 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22863 ins_encode %{
22864 int vlen_enc = vector_length_encoding(this);
22865 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22866 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22867 %}
22868 ins_pipe( pipe_slow );
22869 %}
22870
22871 // --------------------------------- ABS --------------------------------------
22872 // a = |a|
22873 instruct vabsB_reg(vec dst, vec src) %{
22874 match(Set dst (AbsVB src));
22875 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22876 ins_encode %{
22877 uint vlen = Matcher::vector_length(this);
22878 if (vlen <= 16) {
22879 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22880 } else {
22881 int vlen_enc = vector_length_encoding(this);
22882 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22883 }
22884 %}
22885 ins_pipe( pipe_slow );
22886 %}
22887
22888 instruct vabsS_reg(vec dst, vec src) %{
22889 match(Set dst (AbsVS src));
22890 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22891 ins_encode %{
22892 uint vlen = Matcher::vector_length(this);
22893 if (vlen <= 8) {
22894 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22895 } else {
22896 int vlen_enc = vector_length_encoding(this);
22897 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22898 }
22899 %}
22900 ins_pipe( pipe_slow );
22901 %}
22902
22903 instruct vabsI_reg(vec dst, vec src) %{
22904 match(Set dst (AbsVI src));
22905 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22906 ins_encode %{
22907 uint vlen = Matcher::vector_length(this);
22908 if (vlen <= 4) {
22909 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22910 } else {
22911 int vlen_enc = vector_length_encoding(this);
22912 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22913 }
22914 %}
22915 ins_pipe( pipe_slow );
22916 %}
22917
22918 instruct vabsL_reg(vec dst, vec src) %{
22919 match(Set dst (AbsVL src));
22920 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22921 ins_encode %{
22922 assert(UseAVX > 2, "required");
22923 int vlen_enc = vector_length_encoding(this);
22924 if (!VM_Version::supports_avx512vl()) {
22925 vlen_enc = Assembler::AVX_512bit;
22926 }
22927 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22928 %}
22929 ins_pipe( pipe_slow );
22930 %}
22931
22932 // --------------------------------- ABSNEG --------------------------------------
22933
22934 instruct vabsnegF(vec dst, vec src) %{
22935 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22936 match(Set dst (AbsVF src));
22937 match(Set dst (NegVF src));
22938 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22939 ins_cost(150);
22940 ins_encode %{
22941 int opcode = this->ideal_Opcode();
22942 int vlen = Matcher::vector_length(this);
22943 if (vlen == 2) {
22944 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22945 } else {
22946 assert(vlen == 8 || vlen == 16, "required");
22947 int vlen_enc = vector_length_encoding(this);
22948 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22949 }
22950 %}
22951 ins_pipe( pipe_slow );
22952 %}
22953
22954 instruct vabsneg4F(vec dst) %{
22955 predicate(Matcher::vector_length(n) == 4);
22956 match(Set dst (AbsVF dst));
22957 match(Set dst (NegVF dst));
22958 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22959 ins_cost(150);
22960 ins_encode %{
22961 int opcode = this->ideal_Opcode();
22962 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22963 %}
22964 ins_pipe( pipe_slow );
22965 %}
22966
22967 instruct vabsnegD(vec dst, vec src) %{
22968 match(Set dst (AbsVD src));
22969 match(Set dst (NegVD src));
22970 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22971 ins_encode %{
22972 int opcode = this->ideal_Opcode();
22973 uint vlen = Matcher::vector_length(this);
22974 if (vlen == 2) {
22975 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22976 } else {
22977 int vlen_enc = vector_length_encoding(this);
22978 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22979 }
22980 %}
22981 ins_pipe( pipe_slow );
22982 %}
22983
22984 //------------------------------------- VectorTest --------------------------------------------
22985
22986 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22987 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22988 match(Set cr (VectorTest src1 src2));
22989 effect(TEMP vtmp);
22990 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22991 ins_encode %{
22992 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22993 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22994 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22995 %}
22996 ins_pipe( pipe_slow );
22997 %}
22998
22999 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23000 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23001 match(Set cr (VectorTest src1 src2));
23002 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23003 ins_encode %{
23004 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23005 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23006 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23007 %}
23008 ins_pipe( pipe_slow );
23009 %}
23010
23011 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23012 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23013 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23014 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23015 match(Set cr (VectorTest src1 src2));
23016 effect(TEMP tmp);
23017 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23018 ins_encode %{
23019 uint masklen = Matcher::vector_length(this, $src1);
23020 __ kmovwl($tmp$$Register, $src1$$KRegister);
23021 __ andl($tmp$$Register, (1 << masklen) - 1);
23022 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23023 %}
23024 ins_pipe( pipe_slow );
23025 %}
23026
23027 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23028 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23029 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23030 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23031 match(Set cr (VectorTest src1 src2));
23032 effect(TEMP tmp);
23033 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23034 ins_encode %{
23035 uint masklen = Matcher::vector_length(this, $src1);
23036 __ kmovwl($tmp$$Register, $src1$$KRegister);
23037 __ andl($tmp$$Register, (1 << masklen) - 1);
23038 %}
23039 ins_pipe( pipe_slow );
23040 %}
23041
23042 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23043 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23044 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23045 match(Set cr (VectorTest src1 src2));
23046 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23047 ins_encode %{
23048 uint masklen = Matcher::vector_length(this, $src1);
23049 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23050 %}
23051 ins_pipe( pipe_slow );
23052 %}
23053
23054 //------------------------------------- LoadMask --------------------------------------------
23055
23056 instruct loadMask(legVec dst, legVec src) %{
23057 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23058 match(Set dst (VectorLoadMask src));
23059 effect(TEMP dst);
23060 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23061 ins_encode %{
23062 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23063 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23064 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23065 %}
23066 ins_pipe( pipe_slow );
23067 %}
23068
23069 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23070 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23071 match(Set dst (VectorLoadMask src));
23072 effect(TEMP xtmp);
23073 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23074 ins_encode %{
23075 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23076 true, Assembler::AVX_512bit);
23077 %}
23078 ins_pipe( pipe_slow );
23079 %}
23080
23081 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23082 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23083 match(Set dst (VectorLoadMask src));
23084 effect(TEMP xtmp);
23085 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23086 ins_encode %{
23087 int vlen_enc = vector_length_encoding(in(1));
23088 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23089 false, vlen_enc);
23090 %}
23091 ins_pipe( pipe_slow );
23092 %}
23093
23094 //------------------------------------- StoreMask --------------------------------------------
23095
23096 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23097 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23098 match(Set dst (VectorStoreMask src size));
23099 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23100 ins_encode %{
23101 int vlen = Matcher::vector_length(this);
23102 if (vlen <= 16 && UseAVX <= 2) {
23103 assert(UseSSE >= 3, "required");
23104 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23105 } else {
23106 assert(UseAVX > 0, "required");
23107 int src_vlen_enc = vector_length_encoding(this, $src);
23108 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23109 }
23110 %}
23111 ins_pipe( pipe_slow );
23112 %}
23113
23114 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23115 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23116 match(Set dst (VectorStoreMask src size));
23117 effect(TEMP_DEF dst, TEMP xtmp);
23118 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23119 ins_encode %{
23120 int vlen_enc = Assembler::AVX_128bit;
23121 int vlen = Matcher::vector_length(this);
23122 if (vlen <= 8) {
23123 assert(UseSSE >= 3, "required");
23124 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23125 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23126 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23127 } else {
23128 assert(UseAVX > 0, "required");
23129 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23130 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23131 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23132 }
23133 %}
23134 ins_pipe( pipe_slow );
23135 %}
23136
23137 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23138 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23139 match(Set dst (VectorStoreMask src size));
23140 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23141 effect(TEMP_DEF dst, TEMP xtmp);
23142 ins_encode %{
23143 int vlen_enc = Assembler::AVX_128bit;
23144 int vlen = Matcher::vector_length(this);
23145 if (vlen <= 4) {
23146 assert(UseSSE >= 3, "required");
23147 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23148 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23149 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23150 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23151 } else {
23152 assert(UseAVX > 0, "required");
23153 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23154 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23155 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23156 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23157 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23158 }
23159 %}
23160 ins_pipe( pipe_slow );
23161 %}
23162
23163 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23164 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23165 match(Set dst (VectorStoreMask src size));
23166 effect(TEMP_DEF dst, TEMP xtmp);
23167 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23168 ins_encode %{
23169 assert(UseSSE >= 3, "required");
23170 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23171 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23172 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23173 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23174 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23175 %}
23176 ins_pipe( pipe_slow );
23177 %}
23178
23179 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23180 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23181 match(Set dst (VectorStoreMask src size));
23182 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23183 effect(TEMP_DEF dst, TEMP vtmp);
23184 ins_encode %{
23185 int vlen_enc = Assembler::AVX_128bit;
23186 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23187 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23188 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23189 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23190 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23191 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23192 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23193 %}
23194 ins_pipe( pipe_slow );
23195 %}
23196
23197 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23198 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23199 match(Set dst (VectorStoreMask src size));
23200 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23201 ins_encode %{
23202 int src_vlen_enc = vector_length_encoding(this, $src);
23203 int dst_vlen_enc = vector_length_encoding(this);
23204 if (!VM_Version::supports_avx512vl()) {
23205 src_vlen_enc = Assembler::AVX_512bit;
23206 }
23207 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23208 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23209 %}
23210 ins_pipe( pipe_slow );
23211 %}
23212
23213 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23214 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23215 match(Set dst (VectorStoreMask src size));
23216 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23217 ins_encode %{
23218 int src_vlen_enc = vector_length_encoding(this, $src);
23219 int dst_vlen_enc = vector_length_encoding(this);
23220 if (!VM_Version::supports_avx512vl()) {
23221 src_vlen_enc = Assembler::AVX_512bit;
23222 }
23223 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23224 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23225 %}
23226 ins_pipe( pipe_slow );
23227 %}
23228
23229 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23230 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23231 match(Set dst (VectorStoreMask mask size));
23232 effect(TEMP_DEF dst);
23233 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23234 ins_encode %{
23235 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23236 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23237 false, Assembler::AVX_512bit, noreg);
23238 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23239 %}
23240 ins_pipe( pipe_slow );
23241 %}
23242
23243 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23244 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23245 match(Set dst (VectorStoreMask mask size));
23246 effect(TEMP_DEF dst);
23247 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23248 ins_encode %{
23249 int dst_vlen_enc = vector_length_encoding(this);
23250 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23251 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23252 %}
23253 ins_pipe( pipe_slow );
23254 %}
23255
23256 instruct vmaskcast_evex(kReg dst) %{
23257 match(Set dst (VectorMaskCast dst));
23258 ins_cost(0);
23259 format %{ "vector_mask_cast $dst" %}
23260 ins_encode %{
23261 // empty
23262 %}
23263 ins_pipe(empty);
23264 %}
23265
23266 instruct vmaskcast(vec dst) %{
23267 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23268 match(Set dst (VectorMaskCast dst));
23269 ins_cost(0);
23270 format %{ "vector_mask_cast $dst" %}
23271 ins_encode %{
23272 // empty
23273 %}
23274 ins_pipe(empty);
23275 %}
23276
23277 instruct vmaskcast_avx(vec dst, vec src) %{
23278 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23279 match(Set dst (VectorMaskCast src));
23280 format %{ "vector_mask_cast $dst, $src" %}
23281 ins_encode %{
23282 int vlen = Matcher::vector_length(this);
23283 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23284 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23285 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23286 %}
23287 ins_pipe(pipe_slow);
23288 %}
23289
23290 //-------------------------------- Load Iota Indices ----------------------------------
23291
23292 instruct loadIotaIndices(vec dst, immI_0 src) %{
23293 match(Set dst (VectorLoadConst src));
23294 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23295 ins_encode %{
23296 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23297 BasicType bt = Matcher::vector_element_basic_type(this);
23298 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23299 %}
23300 ins_pipe( pipe_slow );
23301 %}
23302
23303 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23304 match(Set dst (PopulateIndex src1 src2));
23305 effect(TEMP dst, TEMP vtmp);
23306 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23307 ins_encode %{
23308 assert($src2$$constant == 1, "required");
23309 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23310 int vlen_enc = vector_length_encoding(this);
23311 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23312 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23313 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23314 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23315 %}
23316 ins_pipe( pipe_slow );
23317 %}
23318
23319 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23320 match(Set dst (PopulateIndex src1 src2));
23321 effect(TEMP dst, TEMP vtmp);
23322 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23323 ins_encode %{
23324 assert($src2$$constant == 1, "required");
23325 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23326 int vlen_enc = vector_length_encoding(this);
23327 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23328 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23329 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23330 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23331 %}
23332 ins_pipe( pipe_slow );
23333 %}
23334
23335 //-------------------------------- Rearrange ----------------------------------
23336
23337 // LoadShuffle/Rearrange for Byte
23338 instruct rearrangeB(vec dst, vec shuffle) %{
23339 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23340 Matcher::vector_length(n) < 32);
23341 match(Set dst (VectorRearrange dst shuffle));
23342 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23343 ins_encode %{
23344 assert(UseSSE >= 4, "required");
23345 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23346 %}
23347 ins_pipe( pipe_slow );
23348 %}
23349
23350 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23351 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23352 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23353 match(Set dst (VectorRearrange src shuffle));
23354 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23355 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23356 ins_encode %{
23357 assert(UseAVX >= 2, "required");
23358 // Swap src into vtmp1
23359 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23360 // Shuffle swapped src to get entries from other 128 bit lane
23361 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23362 // Shuffle original src to get entries from self 128 bit lane
23363 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23364 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23365 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23366 // Perform the blend
23367 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23368 %}
23369 ins_pipe( pipe_slow );
23370 %}
23371
23372
23373 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23374 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23375 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23376 match(Set dst (VectorRearrange src shuffle));
23377 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23378 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23379 ins_encode %{
23380 int vlen_enc = vector_length_encoding(this);
23381 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23382 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23383 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23384 %}
23385 ins_pipe( pipe_slow );
23386 %}
23387
23388 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23389 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23390 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23391 match(Set dst (VectorRearrange src shuffle));
23392 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23393 ins_encode %{
23394 int vlen_enc = vector_length_encoding(this);
23395 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23396 %}
23397 ins_pipe( pipe_slow );
23398 %}
23399
23400 // LoadShuffle/Rearrange for Short
23401
23402 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23403 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23404 !VM_Version::supports_avx512bw());
23405 match(Set dst (VectorLoadShuffle src));
23406 effect(TEMP dst, TEMP vtmp);
23407 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23408 ins_encode %{
23409 // Create a byte shuffle mask from short shuffle mask
23410 // only byte shuffle instruction available on these platforms
23411 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23412 if (UseAVX == 0) {
23413 assert(vlen_in_bytes <= 16, "required");
23414 // Multiply each shuffle by two to get byte index
23415 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23416 __ psllw($vtmp$$XMMRegister, 1);
23417
23418 // Duplicate to create 2 copies of byte index
23419 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23420 __ psllw($dst$$XMMRegister, 8);
23421 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23422
23423 // Add one to get alternate byte index
23424 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23425 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23426 } else {
23427 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23428 int vlen_enc = vector_length_encoding(this);
23429 // Multiply each shuffle by two to get byte index
23430 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23431
23432 // Duplicate to create 2 copies of byte index
23433 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23434 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23435
23436 // Add one to get alternate byte index
23437 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23438 }
23439 %}
23440 ins_pipe( pipe_slow );
23441 %}
23442
23443 instruct rearrangeS(vec dst, vec shuffle) %{
23444 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23445 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23446 match(Set dst (VectorRearrange dst shuffle));
23447 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23448 ins_encode %{
23449 assert(UseSSE >= 4, "required");
23450 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23451 %}
23452 ins_pipe( pipe_slow );
23453 %}
23454
23455 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23456 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23457 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23458 match(Set dst (VectorRearrange src shuffle));
23459 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23460 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23461 ins_encode %{
23462 assert(UseAVX >= 2, "required");
23463 // Swap src into vtmp1
23464 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23465 // Shuffle swapped src to get entries from other 128 bit lane
23466 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23467 // Shuffle original src to get entries from self 128 bit lane
23468 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23469 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23470 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23471 // Perform the blend
23472 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23473 %}
23474 ins_pipe( pipe_slow );
23475 %}
23476
23477 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23478 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23479 VM_Version::supports_avx512bw());
23480 match(Set dst (VectorRearrange src shuffle));
23481 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23482 ins_encode %{
23483 int vlen_enc = vector_length_encoding(this);
23484 if (!VM_Version::supports_avx512vl()) {
23485 vlen_enc = Assembler::AVX_512bit;
23486 }
23487 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23488 %}
23489 ins_pipe( pipe_slow );
23490 %}
23491
23492 // LoadShuffle/Rearrange for Integer and Float
23493
23494 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23495 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23496 Matcher::vector_length(n) == 4 && UseAVX == 0);
23497 match(Set dst (VectorLoadShuffle src));
23498 effect(TEMP dst, TEMP vtmp);
23499 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23500 ins_encode %{
23501 assert(UseSSE >= 4, "required");
23502
23503 // Create a byte shuffle mask from int shuffle mask
23504 // only byte shuffle instruction available on these platforms
23505
23506 // Duplicate and multiply each shuffle by 4
23507 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23508 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23509 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23510 __ psllw($vtmp$$XMMRegister, 2);
23511
23512 // Duplicate again to create 4 copies of byte index
23513 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23514 __ psllw($dst$$XMMRegister, 8);
23515 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23516
23517 // Add 3,2,1,0 to get alternate byte index
23518 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23519 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23520 %}
23521 ins_pipe( pipe_slow );
23522 %}
23523
23524 instruct rearrangeI(vec dst, vec shuffle) %{
23525 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23526 UseAVX == 0);
23527 match(Set dst (VectorRearrange dst shuffle));
23528 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23529 ins_encode %{
23530 assert(UseSSE >= 4, "required");
23531 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23532 %}
23533 ins_pipe( pipe_slow );
23534 %}
23535
23536 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23537 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23538 UseAVX > 0);
23539 match(Set dst (VectorRearrange src shuffle));
23540 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23541 ins_encode %{
23542 int vlen_enc = vector_length_encoding(this);
23543 BasicType bt = Matcher::vector_element_basic_type(this);
23544 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23545 %}
23546 ins_pipe( pipe_slow );
23547 %}
23548
23549 // LoadShuffle/Rearrange for Long and Double
23550
23551 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23552 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23553 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23554 match(Set dst (VectorLoadShuffle src));
23555 effect(TEMP dst, TEMP vtmp);
23556 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23557 ins_encode %{
23558 assert(UseAVX >= 2, "required");
23559
23560 int vlen_enc = vector_length_encoding(this);
23561 // Create a double word shuffle mask from long shuffle mask
23562 // only double word shuffle instruction available on these platforms
23563
23564 // Multiply each shuffle by two to get double word index
23565 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23566
23567 // Duplicate each double word shuffle
23568 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23569 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23570
23571 // Add one to get alternate double word index
23572 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23573 %}
23574 ins_pipe( pipe_slow );
23575 %}
23576
23577 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23578 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23579 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23580 match(Set dst (VectorRearrange src shuffle));
23581 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23582 ins_encode %{
23583 assert(UseAVX >= 2, "required");
23584
23585 int vlen_enc = vector_length_encoding(this);
23586 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23587 %}
23588 ins_pipe( pipe_slow );
23589 %}
23590
23591 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23592 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23593 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23594 match(Set dst (VectorRearrange src shuffle));
23595 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23596 ins_encode %{
23597 assert(UseAVX > 2, "required");
23598
23599 int vlen_enc = vector_length_encoding(this);
23600 if (vlen_enc == Assembler::AVX_128bit) {
23601 vlen_enc = Assembler::AVX_256bit;
23602 }
23603 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23604 %}
23605 ins_pipe( pipe_slow );
23606 %}
23607
23608 // --------------------------------- FMA --------------------------------------
23609 // a * b + c
23610
23611 instruct vfmaF_reg(vec a, vec b, vec c) %{
23612 match(Set c (FmaVF c (Binary a b)));
23613 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23614 ins_cost(150);
23615 ins_encode %{
23616 assert(UseFMA, "not enabled");
23617 int vlen_enc = vector_length_encoding(this);
23618 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23619 %}
23620 ins_pipe( pipe_slow );
23621 %}
23622
23623 instruct vfmaF_mem(vec a, memory b, vec c) %{
23624 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23625 match(Set c (FmaVF c (Binary a (LoadVector b))));
23626 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23627 ins_cost(150);
23628 ins_encode %{
23629 assert(UseFMA, "not enabled");
23630 int vlen_enc = vector_length_encoding(this);
23631 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23632 %}
23633 ins_pipe( pipe_slow );
23634 %}
23635
23636 instruct vfmaD_reg(vec a, vec b, vec c) %{
23637 match(Set c (FmaVD c (Binary a b)));
23638 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23639 ins_cost(150);
23640 ins_encode %{
23641 assert(UseFMA, "not enabled");
23642 int vlen_enc = vector_length_encoding(this);
23643 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23644 %}
23645 ins_pipe( pipe_slow );
23646 %}
23647
23648 instruct vfmaD_mem(vec a, memory b, vec c) %{
23649 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23650 match(Set c (FmaVD c (Binary a (LoadVector b))));
23651 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23652 ins_cost(150);
23653 ins_encode %{
23654 assert(UseFMA, "not enabled");
23655 int vlen_enc = vector_length_encoding(this);
23656 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23657 %}
23658 ins_pipe( pipe_slow );
23659 %}
23660
23661 // --------------------------------- Vector Multiply Add --------------------------------------
23662
23663 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23664 predicate(UseAVX == 0);
23665 match(Set dst (MulAddVS2VI dst src1));
23666 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23667 ins_encode %{
23668 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23669 %}
23670 ins_pipe( pipe_slow );
23671 %}
23672
23673 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23674 predicate(UseAVX > 0);
23675 match(Set dst (MulAddVS2VI src1 src2));
23676 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23677 ins_encode %{
23678 int vlen_enc = vector_length_encoding(this);
23679 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23680 %}
23681 ins_pipe( pipe_slow );
23682 %}
23683
23684 // --------------------------------- Vector Multiply Add Add ----------------------------------
23685
23686 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23687 predicate(VM_Version::supports_avx512_vnni());
23688 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23689 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23690 ins_encode %{
23691 assert(UseAVX > 2, "required");
23692 int vlen_enc = vector_length_encoding(this);
23693 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23694 %}
23695 ins_pipe( pipe_slow );
23696 ins_cost(10);
23697 %}
23698
23699 // --------------------------------- PopCount --------------------------------------
23700
23701 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23702 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23703 match(Set dst (PopCountVI src));
23704 match(Set dst (PopCountVL src));
23705 format %{ "vector_popcount_integral $dst, $src" %}
23706 ins_encode %{
23707 int opcode = this->ideal_Opcode();
23708 int vlen_enc = vector_length_encoding(this, $src);
23709 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23710 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23711 %}
23712 ins_pipe( pipe_slow );
23713 %}
23714
23715 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23716 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23717 match(Set dst (PopCountVI src mask));
23718 match(Set dst (PopCountVL src mask));
23719 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23720 ins_encode %{
23721 int vlen_enc = vector_length_encoding(this, $src);
23722 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23723 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23724 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23725 %}
23726 ins_pipe( pipe_slow );
23727 %}
23728
23729 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23730 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23731 match(Set dst (PopCountVI src));
23732 match(Set dst (PopCountVL src));
23733 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23734 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23735 ins_encode %{
23736 int opcode = this->ideal_Opcode();
23737 int vlen_enc = vector_length_encoding(this, $src);
23738 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23739 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23740 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23741 %}
23742 ins_pipe( pipe_slow );
23743 %}
23744
23745 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23746
23747 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23748 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23749 Matcher::vector_length_in_bytes(n->in(1))));
23750 match(Set dst (CountTrailingZerosV src));
23751 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23752 ins_cost(400);
23753 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23754 ins_encode %{
23755 int vlen_enc = vector_length_encoding(this, $src);
23756 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23757 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23758 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23759 %}
23760 ins_pipe( pipe_slow );
23761 %}
23762
23763 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23764 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23765 VM_Version::supports_avx512cd() &&
23766 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23767 match(Set dst (CountTrailingZerosV src));
23768 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23769 ins_cost(400);
23770 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23771 ins_encode %{
23772 int vlen_enc = vector_length_encoding(this, $src);
23773 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23774 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23775 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23776 %}
23777 ins_pipe( pipe_slow );
23778 %}
23779
23780 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23781 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23782 match(Set dst (CountTrailingZerosV src));
23783 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23784 ins_cost(400);
23785 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23786 ins_encode %{
23787 int vlen_enc = vector_length_encoding(this, $src);
23788 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23789 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23790 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23791 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23792 %}
23793 ins_pipe( pipe_slow );
23794 %}
23795
23796 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23797 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23798 match(Set dst (CountTrailingZerosV src));
23799 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23800 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23801 ins_encode %{
23802 int vlen_enc = vector_length_encoding(this, $src);
23803 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23804 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23805 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23806 %}
23807 ins_pipe( pipe_slow );
23808 %}
23809
23810
23811 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23812
23813 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23814 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23815 effect(TEMP dst);
23816 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23817 ins_encode %{
23818 int vector_len = vector_length_encoding(this);
23819 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23820 %}
23821 ins_pipe( pipe_slow );
23822 %}
23823
23824 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23825 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23826 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23827 effect(TEMP dst);
23828 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23829 ins_encode %{
23830 int vector_len = vector_length_encoding(this);
23831 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23832 %}
23833 ins_pipe( pipe_slow );
23834 %}
23835
23836 // --------------------------------- Rotation Operations ----------------------------------
23837 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23838 match(Set dst (RotateLeftV src shift));
23839 match(Set dst (RotateRightV src shift));
23840 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23841 ins_encode %{
23842 int opcode = this->ideal_Opcode();
23843 int vector_len = vector_length_encoding(this);
23844 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23845 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23846 %}
23847 ins_pipe( pipe_slow );
23848 %}
23849
23850 instruct vprorate(vec dst, vec src, vec shift) %{
23851 match(Set dst (RotateLeftV src shift));
23852 match(Set dst (RotateRightV src shift));
23853 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23854 ins_encode %{
23855 int opcode = this->ideal_Opcode();
23856 int vector_len = vector_length_encoding(this);
23857 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23858 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23859 %}
23860 ins_pipe( pipe_slow );
23861 %}
23862
23863 // ---------------------------------- Masked Operations ------------------------------------
23864 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23865 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23866 match(Set dst (LoadVectorMasked mem mask));
23867 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23868 ins_encode %{
23869 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23870 int vlen_enc = vector_length_encoding(this);
23871 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23872 %}
23873 ins_pipe( pipe_slow );
23874 %}
23875
23876
23877 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23878 predicate(n->in(3)->bottom_type()->isa_vectmask());
23879 match(Set dst (LoadVectorMasked mem mask));
23880 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23881 ins_encode %{
23882 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23883 int vector_len = vector_length_encoding(this);
23884 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23885 %}
23886 ins_pipe( pipe_slow );
23887 %}
23888
23889 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23890 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23891 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23892 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23893 ins_encode %{
23894 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23895 int vlen_enc = vector_length_encoding(src_node);
23896 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23897 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23898 %}
23899 ins_pipe( pipe_slow );
23900 %}
23901
23902 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23903 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23904 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23905 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23906 ins_encode %{
23907 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23908 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23909 int vlen_enc = vector_length_encoding(src_node);
23910 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23911 %}
23912 ins_pipe( pipe_slow );
23913 %}
23914
23915 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23916 match(Set addr (VerifyVectorAlignment addr mask));
23917 effect(KILL cr);
23918 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23919 ins_encode %{
23920 Label Lskip;
23921 // check if masked bits of addr are zero
23922 __ testq($addr$$Register, $mask$$constant);
23923 __ jccb(Assembler::equal, Lskip);
23924 __ stop("verify_vector_alignment found a misaligned vector memory access");
23925 __ bind(Lskip);
23926 %}
23927 ins_pipe(pipe_slow);
23928 %}
23929
23930 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23931 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23932 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23933 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23934 ins_encode %{
23935 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23936 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23937
23938 Label DONE;
23939 int vlen_enc = vector_length_encoding(this, $src1);
23940 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23941
23942 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23943 __ mov64($dst$$Register, -1L);
23944 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23945 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23946 __ jccb(Assembler::carrySet, DONE);
23947 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23948 __ notq($dst$$Register);
23949 __ tzcntq($dst$$Register, $dst$$Register);
23950 __ bind(DONE);
23951 %}
23952 ins_pipe( pipe_slow );
23953 %}
23954
23955
23956 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23957 match(Set dst (VectorMaskGen len));
23958 effect(TEMP temp, KILL cr);
23959 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23960 ins_encode %{
23961 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23962 %}
23963 ins_pipe( pipe_slow );
23964 %}
23965
23966 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23967 match(Set dst (VectorMaskGen len));
23968 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23969 effect(TEMP temp);
23970 ins_encode %{
23971 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23972 __ kmovql($dst$$KRegister, $temp$$Register);
23973 %}
23974 ins_pipe( pipe_slow );
23975 %}
23976
23977 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23978 predicate(n->in(1)->bottom_type()->isa_vectmask());
23979 match(Set dst (VectorMaskToLong mask));
23980 effect(TEMP dst, KILL cr);
23981 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23982 ins_encode %{
23983 int opcode = this->ideal_Opcode();
23984 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23985 int mask_len = Matcher::vector_length(this, $mask);
23986 int mask_size = mask_len * type2aelembytes(mbt);
23987 int vlen_enc = vector_length_encoding(this, $mask);
23988 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23989 $dst$$Register, mask_len, mask_size, vlen_enc);
23990 %}
23991 ins_pipe( pipe_slow );
23992 %}
23993
23994 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23995 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23996 match(Set dst (VectorMaskToLong mask));
23997 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23998 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23999 ins_encode %{
24000 int opcode = this->ideal_Opcode();
24001 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24002 int mask_len = Matcher::vector_length(this, $mask);
24003 int vlen_enc = vector_length_encoding(this, $mask);
24004 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24005 $dst$$Register, mask_len, mbt, vlen_enc);
24006 %}
24007 ins_pipe( pipe_slow );
24008 %}
24009
24010 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24011 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24012 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24013 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24014 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24015 ins_encode %{
24016 int opcode = this->ideal_Opcode();
24017 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24018 int mask_len = Matcher::vector_length(this, $mask);
24019 int vlen_enc = vector_length_encoding(this, $mask);
24020 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24021 $dst$$Register, mask_len, mbt, vlen_enc);
24022 %}
24023 ins_pipe( pipe_slow );
24024 %}
24025
24026 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24027 predicate(n->in(1)->bottom_type()->isa_vectmask());
24028 match(Set dst (VectorMaskTrueCount mask));
24029 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24030 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24031 ins_encode %{
24032 int opcode = this->ideal_Opcode();
24033 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24034 int mask_len = Matcher::vector_length(this, $mask);
24035 int mask_size = mask_len * type2aelembytes(mbt);
24036 int vlen_enc = vector_length_encoding(this, $mask);
24037 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24038 $tmp$$Register, mask_len, mask_size, vlen_enc);
24039 %}
24040 ins_pipe( pipe_slow );
24041 %}
24042
24043 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24044 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24045 match(Set dst (VectorMaskTrueCount mask));
24046 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24047 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24048 ins_encode %{
24049 int opcode = this->ideal_Opcode();
24050 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24051 int mask_len = Matcher::vector_length(this, $mask);
24052 int vlen_enc = vector_length_encoding(this, $mask);
24053 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24054 $tmp$$Register, mask_len, mbt, vlen_enc);
24055 %}
24056 ins_pipe( pipe_slow );
24057 %}
24058
24059 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24060 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24061 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24062 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24063 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24064 ins_encode %{
24065 int opcode = this->ideal_Opcode();
24066 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24067 int mask_len = Matcher::vector_length(this, $mask);
24068 int vlen_enc = vector_length_encoding(this, $mask);
24069 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24070 $tmp$$Register, mask_len, mbt, vlen_enc);
24071 %}
24072 ins_pipe( pipe_slow );
24073 %}
24074
24075 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24076 predicate(n->in(1)->bottom_type()->isa_vectmask());
24077 match(Set dst (VectorMaskFirstTrue mask));
24078 match(Set dst (VectorMaskLastTrue mask));
24079 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24080 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24081 ins_encode %{
24082 int opcode = this->ideal_Opcode();
24083 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24084 int mask_len = Matcher::vector_length(this, $mask);
24085 int mask_size = mask_len * type2aelembytes(mbt);
24086 int vlen_enc = vector_length_encoding(this, $mask);
24087 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24088 $tmp$$Register, mask_len, mask_size, vlen_enc);
24089 %}
24090 ins_pipe( pipe_slow );
24091 %}
24092
24093 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24094 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24095 match(Set dst (VectorMaskFirstTrue mask));
24096 match(Set dst (VectorMaskLastTrue mask));
24097 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24098 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24099 ins_encode %{
24100 int opcode = this->ideal_Opcode();
24101 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24102 int mask_len = Matcher::vector_length(this, $mask);
24103 int vlen_enc = vector_length_encoding(this, $mask);
24104 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24105 $tmp$$Register, mask_len, mbt, vlen_enc);
24106 %}
24107 ins_pipe( pipe_slow );
24108 %}
24109
24110 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24111 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24112 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24113 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24114 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24115 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24116 ins_encode %{
24117 int opcode = this->ideal_Opcode();
24118 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24119 int mask_len = Matcher::vector_length(this, $mask);
24120 int vlen_enc = vector_length_encoding(this, $mask);
24121 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24122 $tmp$$Register, mask_len, mbt, vlen_enc);
24123 %}
24124 ins_pipe( pipe_slow );
24125 %}
24126
24127 // --------------------------------- Compress/Expand Operations ---------------------------
24128 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24129 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24130 match(Set dst (CompressV src mask));
24131 match(Set dst (ExpandV src mask));
24132 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24133 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24134 ins_encode %{
24135 int opcode = this->ideal_Opcode();
24136 int vlen_enc = vector_length_encoding(this);
24137 BasicType bt = Matcher::vector_element_basic_type(this);
24138 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24139 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24140 %}
24141 ins_pipe( pipe_slow );
24142 %}
24143
24144 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24145 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24146 match(Set dst (CompressV src mask));
24147 match(Set dst (ExpandV src mask));
24148 format %{ "vector_compress_expand $dst, $src, $mask" %}
24149 ins_encode %{
24150 int opcode = this->ideal_Opcode();
24151 int vector_len = vector_length_encoding(this);
24152 BasicType bt = Matcher::vector_element_basic_type(this);
24153 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24154 %}
24155 ins_pipe( pipe_slow );
24156 %}
24157
24158 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24159 match(Set dst (CompressM mask));
24160 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24161 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24162 ins_encode %{
24163 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24164 int mask_len = Matcher::vector_length(this);
24165 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24166 %}
24167 ins_pipe( pipe_slow );
24168 %}
24169
24170 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24171
24172 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24173 predicate(!VM_Version::supports_gfni());
24174 match(Set dst (ReverseV src));
24175 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24176 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24177 ins_encode %{
24178 int vec_enc = vector_length_encoding(this);
24179 BasicType bt = Matcher::vector_element_basic_type(this);
24180 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24181 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24182 %}
24183 ins_pipe( pipe_slow );
24184 %}
24185
24186 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24187 predicate(VM_Version::supports_gfni());
24188 match(Set dst (ReverseV src));
24189 effect(TEMP dst, TEMP xtmp);
24190 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24191 ins_encode %{
24192 int vec_enc = vector_length_encoding(this);
24193 BasicType bt = Matcher::vector_element_basic_type(this);
24194 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24195 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24196 $xtmp$$XMMRegister);
24197 %}
24198 ins_pipe( pipe_slow );
24199 %}
24200
24201 instruct vreverse_byte_reg(vec dst, vec src) %{
24202 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24203 match(Set dst (ReverseBytesV src));
24204 effect(TEMP dst);
24205 format %{ "vector_reverse_byte $dst, $src" %}
24206 ins_encode %{
24207 int vec_enc = vector_length_encoding(this);
24208 BasicType bt = Matcher::vector_element_basic_type(this);
24209 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24210 %}
24211 ins_pipe( pipe_slow );
24212 %}
24213
24214 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24215 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24216 match(Set dst (ReverseBytesV src));
24217 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24218 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24219 ins_encode %{
24220 int vec_enc = vector_length_encoding(this);
24221 BasicType bt = Matcher::vector_element_basic_type(this);
24222 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24223 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24224 %}
24225 ins_pipe( pipe_slow );
24226 %}
24227
24228 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24229
24230 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24231 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24232 Matcher::vector_length_in_bytes(n->in(1))));
24233 match(Set dst (CountLeadingZerosV src));
24234 format %{ "vector_count_leading_zeros $dst, $src" %}
24235 ins_encode %{
24236 int vlen_enc = vector_length_encoding(this, $src);
24237 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24238 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24239 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24240 %}
24241 ins_pipe( pipe_slow );
24242 %}
24243
24244 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24245 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24246 Matcher::vector_length_in_bytes(n->in(1))));
24247 match(Set dst (CountLeadingZerosV src mask));
24248 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24249 ins_encode %{
24250 int vlen_enc = vector_length_encoding(this, $src);
24251 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24252 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24253 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24254 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24255 %}
24256 ins_pipe( pipe_slow );
24257 %}
24258
24259 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24260 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24261 VM_Version::supports_avx512cd() &&
24262 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24263 match(Set dst (CountLeadingZerosV src));
24264 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24265 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24266 ins_encode %{
24267 int vlen_enc = vector_length_encoding(this, $src);
24268 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24269 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24270 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24271 %}
24272 ins_pipe( pipe_slow );
24273 %}
24274
24275 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24276 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24277 match(Set dst (CountLeadingZerosV src));
24278 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24279 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24280 ins_encode %{
24281 int vlen_enc = vector_length_encoding(this, $src);
24282 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24283 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24284 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24285 $rtmp$$Register, true, vlen_enc);
24286 %}
24287 ins_pipe( pipe_slow );
24288 %}
24289
24290 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24291 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24292 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24293 match(Set dst (CountLeadingZerosV src));
24294 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24295 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24296 ins_encode %{
24297 int vlen_enc = vector_length_encoding(this, $src);
24298 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24299 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24300 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24301 %}
24302 ins_pipe( pipe_slow );
24303 %}
24304
24305 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24306 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24307 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24308 match(Set dst (CountLeadingZerosV src));
24309 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24310 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24311 ins_encode %{
24312 int vlen_enc = vector_length_encoding(this, $src);
24313 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24314 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24315 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24316 %}
24317 ins_pipe( pipe_slow );
24318 %}
24319
24320 // ---------------------------------- Vector Masked Operations ------------------------------------
24321
24322 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24323 match(Set dst (AddVB (Binary dst src2) mask));
24324 match(Set dst (AddVS (Binary dst src2) mask));
24325 match(Set dst (AddVI (Binary dst src2) mask));
24326 match(Set dst (AddVL (Binary dst src2) mask));
24327 match(Set dst (AddVF (Binary dst src2) mask));
24328 match(Set dst (AddVD (Binary dst src2) mask));
24329 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24330 ins_encode %{
24331 int vlen_enc = vector_length_encoding(this);
24332 BasicType bt = Matcher::vector_element_basic_type(this);
24333 int opc = this->ideal_Opcode();
24334 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24335 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24336 %}
24337 ins_pipe( pipe_slow );
24338 %}
24339
24340 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24341 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24342 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24343 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24344 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24345 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24346 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24347 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24348 ins_encode %{
24349 int vlen_enc = vector_length_encoding(this);
24350 BasicType bt = Matcher::vector_element_basic_type(this);
24351 int opc = this->ideal_Opcode();
24352 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24353 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24354 %}
24355 ins_pipe( pipe_slow );
24356 %}
24357
24358 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24359 match(Set dst (XorV (Binary dst src2) mask));
24360 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24361 ins_encode %{
24362 int vlen_enc = vector_length_encoding(this);
24363 BasicType bt = Matcher::vector_element_basic_type(this);
24364 int opc = this->ideal_Opcode();
24365 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24366 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24367 %}
24368 ins_pipe( pipe_slow );
24369 %}
24370
24371 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24372 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24373 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24374 ins_encode %{
24375 int vlen_enc = vector_length_encoding(this);
24376 BasicType bt = Matcher::vector_element_basic_type(this);
24377 int opc = this->ideal_Opcode();
24378 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24379 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24380 %}
24381 ins_pipe( pipe_slow );
24382 %}
24383
24384 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24385 match(Set dst (OrV (Binary dst src2) mask));
24386 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24387 ins_encode %{
24388 int vlen_enc = vector_length_encoding(this);
24389 BasicType bt = Matcher::vector_element_basic_type(this);
24390 int opc = this->ideal_Opcode();
24391 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24392 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24393 %}
24394 ins_pipe( pipe_slow );
24395 %}
24396
24397 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24398 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24399 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24400 ins_encode %{
24401 int vlen_enc = vector_length_encoding(this);
24402 BasicType bt = Matcher::vector_element_basic_type(this);
24403 int opc = this->ideal_Opcode();
24404 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24405 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24406 %}
24407 ins_pipe( pipe_slow );
24408 %}
24409
24410 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24411 match(Set dst (AndV (Binary dst src2) mask));
24412 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24413 ins_encode %{
24414 int vlen_enc = vector_length_encoding(this);
24415 BasicType bt = Matcher::vector_element_basic_type(this);
24416 int opc = this->ideal_Opcode();
24417 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24418 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24419 %}
24420 ins_pipe( pipe_slow );
24421 %}
24422
24423 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24424 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24425 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24426 ins_encode %{
24427 int vlen_enc = vector_length_encoding(this);
24428 BasicType bt = Matcher::vector_element_basic_type(this);
24429 int opc = this->ideal_Opcode();
24430 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24431 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24432 %}
24433 ins_pipe( pipe_slow );
24434 %}
24435
24436 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24437 match(Set dst (SubVB (Binary dst src2) mask));
24438 match(Set dst (SubVS (Binary dst src2) mask));
24439 match(Set dst (SubVI (Binary dst src2) mask));
24440 match(Set dst (SubVL (Binary dst src2) mask));
24441 match(Set dst (SubVF (Binary dst src2) mask));
24442 match(Set dst (SubVD (Binary dst src2) mask));
24443 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24444 ins_encode %{
24445 int vlen_enc = vector_length_encoding(this);
24446 BasicType bt = Matcher::vector_element_basic_type(this);
24447 int opc = this->ideal_Opcode();
24448 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24449 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24450 %}
24451 ins_pipe( pipe_slow );
24452 %}
24453
24454 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24455 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24456 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24457 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24458 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24459 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24460 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24461 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24462 ins_encode %{
24463 int vlen_enc = vector_length_encoding(this);
24464 BasicType bt = Matcher::vector_element_basic_type(this);
24465 int opc = this->ideal_Opcode();
24466 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24467 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24468 %}
24469 ins_pipe( pipe_slow );
24470 %}
24471
24472 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24473 match(Set dst (MulVS (Binary dst src2) mask));
24474 match(Set dst (MulVI (Binary dst src2) mask));
24475 match(Set dst (MulVL (Binary dst src2) mask));
24476 match(Set dst (MulVF (Binary dst src2) mask));
24477 match(Set dst (MulVD (Binary dst src2) mask));
24478 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24479 ins_encode %{
24480 int vlen_enc = vector_length_encoding(this);
24481 BasicType bt = Matcher::vector_element_basic_type(this);
24482 int opc = this->ideal_Opcode();
24483 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24484 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24485 %}
24486 ins_pipe( pipe_slow );
24487 %}
24488
24489 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24490 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24491 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24492 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24493 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24494 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24495 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24496 ins_encode %{
24497 int vlen_enc = vector_length_encoding(this);
24498 BasicType bt = Matcher::vector_element_basic_type(this);
24499 int opc = this->ideal_Opcode();
24500 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24501 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24502 %}
24503 ins_pipe( pipe_slow );
24504 %}
24505
24506 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24507 match(Set dst (SqrtVF dst mask));
24508 match(Set dst (SqrtVD dst mask));
24509 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24510 ins_encode %{
24511 int vlen_enc = vector_length_encoding(this);
24512 BasicType bt = Matcher::vector_element_basic_type(this);
24513 int opc = this->ideal_Opcode();
24514 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24515 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24516 %}
24517 ins_pipe( pipe_slow );
24518 %}
24519
24520 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24521 match(Set dst (DivVF (Binary dst src2) mask));
24522 match(Set dst (DivVD (Binary dst src2) mask));
24523 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24524 ins_encode %{
24525 int vlen_enc = vector_length_encoding(this);
24526 BasicType bt = Matcher::vector_element_basic_type(this);
24527 int opc = this->ideal_Opcode();
24528 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24529 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24530 %}
24531 ins_pipe( pipe_slow );
24532 %}
24533
24534 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24535 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24536 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24537 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24538 ins_encode %{
24539 int vlen_enc = vector_length_encoding(this);
24540 BasicType bt = Matcher::vector_element_basic_type(this);
24541 int opc = this->ideal_Opcode();
24542 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24543 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24544 %}
24545 ins_pipe( pipe_slow );
24546 %}
24547
24548
24549 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24550 match(Set dst (RotateLeftV (Binary dst shift) mask));
24551 match(Set dst (RotateRightV (Binary dst shift) mask));
24552 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24553 ins_encode %{
24554 int vlen_enc = vector_length_encoding(this);
24555 BasicType bt = Matcher::vector_element_basic_type(this);
24556 int opc = this->ideal_Opcode();
24557 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24558 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24559 %}
24560 ins_pipe( pipe_slow );
24561 %}
24562
24563 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24564 match(Set dst (RotateLeftV (Binary dst src2) mask));
24565 match(Set dst (RotateRightV (Binary dst src2) mask));
24566 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24567 ins_encode %{
24568 int vlen_enc = vector_length_encoding(this);
24569 BasicType bt = Matcher::vector_element_basic_type(this);
24570 int opc = this->ideal_Opcode();
24571 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24572 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24573 %}
24574 ins_pipe( pipe_slow );
24575 %}
24576
24577 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24578 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24579 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24580 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24581 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24582 ins_encode %{
24583 int vlen_enc = vector_length_encoding(this);
24584 BasicType bt = Matcher::vector_element_basic_type(this);
24585 int opc = this->ideal_Opcode();
24586 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24587 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24588 %}
24589 ins_pipe( pipe_slow );
24590 %}
24591
24592 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24593 predicate(!n->as_ShiftV()->is_var_shift());
24594 match(Set dst (LShiftVS (Binary dst src2) mask));
24595 match(Set dst (LShiftVI (Binary dst src2) mask));
24596 match(Set dst (LShiftVL (Binary dst src2) mask));
24597 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24598 ins_encode %{
24599 int vlen_enc = vector_length_encoding(this);
24600 BasicType bt = Matcher::vector_element_basic_type(this);
24601 int opc = this->ideal_Opcode();
24602 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24603 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24604 %}
24605 ins_pipe( pipe_slow );
24606 %}
24607
24608 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24609 predicate(n->as_ShiftV()->is_var_shift());
24610 match(Set dst (LShiftVS (Binary dst src2) mask));
24611 match(Set dst (LShiftVI (Binary dst src2) mask));
24612 match(Set dst (LShiftVL (Binary dst src2) mask));
24613 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24614 ins_encode %{
24615 int vlen_enc = vector_length_encoding(this);
24616 BasicType bt = Matcher::vector_element_basic_type(this);
24617 int opc = this->ideal_Opcode();
24618 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24619 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24620 %}
24621 ins_pipe( pipe_slow );
24622 %}
24623
24624 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24625 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24626 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24627 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24628 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24629 ins_encode %{
24630 int vlen_enc = vector_length_encoding(this);
24631 BasicType bt = Matcher::vector_element_basic_type(this);
24632 int opc = this->ideal_Opcode();
24633 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24634 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24635 %}
24636 ins_pipe( pipe_slow );
24637 %}
24638
24639 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24640 predicate(!n->as_ShiftV()->is_var_shift());
24641 match(Set dst (RShiftVS (Binary dst src2) mask));
24642 match(Set dst (RShiftVI (Binary dst src2) mask));
24643 match(Set dst (RShiftVL (Binary dst src2) mask));
24644 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24645 ins_encode %{
24646 int vlen_enc = vector_length_encoding(this);
24647 BasicType bt = Matcher::vector_element_basic_type(this);
24648 int opc = this->ideal_Opcode();
24649 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24650 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24651 %}
24652 ins_pipe( pipe_slow );
24653 %}
24654
24655 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24656 predicate(n->as_ShiftV()->is_var_shift());
24657 match(Set dst (RShiftVS (Binary dst src2) mask));
24658 match(Set dst (RShiftVI (Binary dst src2) mask));
24659 match(Set dst (RShiftVL (Binary dst src2) mask));
24660 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24661 ins_encode %{
24662 int vlen_enc = vector_length_encoding(this);
24663 BasicType bt = Matcher::vector_element_basic_type(this);
24664 int opc = this->ideal_Opcode();
24665 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24666 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24667 %}
24668 ins_pipe( pipe_slow );
24669 %}
24670
24671 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24672 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24673 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24674 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24675 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24676 ins_encode %{
24677 int vlen_enc = vector_length_encoding(this);
24678 BasicType bt = Matcher::vector_element_basic_type(this);
24679 int opc = this->ideal_Opcode();
24680 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24681 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24682 %}
24683 ins_pipe( pipe_slow );
24684 %}
24685
24686 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24687 predicate(!n->as_ShiftV()->is_var_shift());
24688 match(Set dst (URShiftVS (Binary dst src2) mask));
24689 match(Set dst (URShiftVI (Binary dst src2) mask));
24690 match(Set dst (URShiftVL (Binary dst src2) mask));
24691 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24692 ins_encode %{
24693 int vlen_enc = vector_length_encoding(this);
24694 BasicType bt = Matcher::vector_element_basic_type(this);
24695 int opc = this->ideal_Opcode();
24696 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24697 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24698 %}
24699 ins_pipe( pipe_slow );
24700 %}
24701
24702 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24703 predicate(n->as_ShiftV()->is_var_shift());
24704 match(Set dst (URShiftVS (Binary dst src2) mask));
24705 match(Set dst (URShiftVI (Binary dst src2) mask));
24706 match(Set dst (URShiftVL (Binary dst src2) mask));
24707 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24708 ins_encode %{
24709 int vlen_enc = vector_length_encoding(this);
24710 BasicType bt = Matcher::vector_element_basic_type(this);
24711 int opc = this->ideal_Opcode();
24712 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24713 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24714 %}
24715 ins_pipe( pipe_slow );
24716 %}
24717
24718 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24719 match(Set dst (MaxV (Binary dst src2) mask));
24720 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24721 ins_encode %{
24722 int vlen_enc = vector_length_encoding(this);
24723 BasicType bt = Matcher::vector_element_basic_type(this);
24724 int opc = this->ideal_Opcode();
24725 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24726 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24727 %}
24728 ins_pipe( pipe_slow );
24729 %}
24730
24731 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24732 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24733 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24734 ins_encode %{
24735 int vlen_enc = vector_length_encoding(this);
24736 BasicType bt = Matcher::vector_element_basic_type(this);
24737 int opc = this->ideal_Opcode();
24738 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24739 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24740 %}
24741 ins_pipe( pipe_slow );
24742 %}
24743
24744 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24745 match(Set dst (MinV (Binary dst src2) mask));
24746 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24747 ins_encode %{
24748 int vlen_enc = vector_length_encoding(this);
24749 BasicType bt = Matcher::vector_element_basic_type(this);
24750 int opc = this->ideal_Opcode();
24751 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24752 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24753 %}
24754 ins_pipe( pipe_slow );
24755 %}
24756
24757 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24758 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24759 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24760 ins_encode %{
24761 int vlen_enc = vector_length_encoding(this);
24762 BasicType bt = Matcher::vector_element_basic_type(this);
24763 int opc = this->ideal_Opcode();
24764 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24765 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24766 %}
24767 ins_pipe( pipe_slow );
24768 %}
24769
24770 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24771 match(Set dst (VectorRearrange (Binary dst src2) mask));
24772 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24773 ins_encode %{
24774 int vlen_enc = vector_length_encoding(this);
24775 BasicType bt = Matcher::vector_element_basic_type(this);
24776 int opc = this->ideal_Opcode();
24777 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24778 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24779 %}
24780 ins_pipe( pipe_slow );
24781 %}
24782
24783 instruct vabs_masked(vec dst, kReg mask) %{
24784 match(Set dst (AbsVB dst mask));
24785 match(Set dst (AbsVS dst mask));
24786 match(Set dst (AbsVI dst mask));
24787 match(Set dst (AbsVL dst mask));
24788 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24789 ins_encode %{
24790 int vlen_enc = vector_length_encoding(this);
24791 BasicType bt = Matcher::vector_element_basic_type(this);
24792 int opc = this->ideal_Opcode();
24793 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24794 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24795 %}
24796 ins_pipe( pipe_slow );
24797 %}
24798
24799 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24800 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24801 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24802 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24803 ins_encode %{
24804 assert(UseFMA, "Needs FMA instructions support.");
24805 int vlen_enc = vector_length_encoding(this);
24806 BasicType bt = Matcher::vector_element_basic_type(this);
24807 int opc = this->ideal_Opcode();
24808 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24809 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24810 %}
24811 ins_pipe( pipe_slow );
24812 %}
24813
24814 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24815 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24816 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24817 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24818 ins_encode %{
24819 assert(UseFMA, "Needs FMA instructions support.");
24820 int vlen_enc = vector_length_encoding(this);
24821 BasicType bt = Matcher::vector_element_basic_type(this);
24822 int opc = this->ideal_Opcode();
24823 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24824 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24825 %}
24826 ins_pipe( pipe_slow );
24827 %}
24828
24829 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24830 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24831 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24832 ins_encode %{
24833 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24834 int vlen_enc = vector_length_encoding(this, $src1);
24835 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24836
24837 // Comparison i
24838 switch (src1_elem_bt) {
24839 case T_BYTE: {
24840 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24841 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24842 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24843 break;
24844 }
24845 case T_SHORT: {
24846 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24847 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24848 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24849 break;
24850 }
24851 case T_INT: {
24852 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24853 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24854 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24855 break;
24856 }
24857 case T_LONG: {
24858 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24859 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24860 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24861 break;
24862 }
24863 case T_FLOAT: {
24864 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24865 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24866 break;
24867 }
24868 case T_DOUBLE: {
24869 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24870 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24871 break;
24872 }
24873 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24874 }
24875 %}
24876 ins_pipe( pipe_slow );
24877 %}
24878
24879 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24880 predicate(Matcher::vector_length(n) <= 32);
24881 match(Set dst (MaskAll src));
24882 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24883 ins_encode %{
24884 int mask_len = Matcher::vector_length(this);
24885 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24886 %}
24887 ins_pipe( pipe_slow );
24888 %}
24889
24890 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24891 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24892 match(Set dst (XorVMask src (MaskAll cnt)));
24893 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24894 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24895 ins_encode %{
24896 uint masklen = Matcher::vector_length(this);
24897 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24898 %}
24899 ins_pipe( pipe_slow );
24900 %}
24901
24902 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24903 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24904 (Matcher::vector_length(n) == 16) ||
24905 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24906 match(Set dst (XorVMask src (MaskAll cnt)));
24907 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24908 ins_encode %{
24909 uint masklen = Matcher::vector_length(this);
24910 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24911 %}
24912 ins_pipe( pipe_slow );
24913 %}
24914
24915 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24916 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24917 match(Set dst (VectorLongToMask src));
24918 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24919 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24920 ins_encode %{
24921 int mask_len = Matcher::vector_length(this);
24922 int vec_enc = vector_length_encoding(mask_len);
24923 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24924 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24925 %}
24926 ins_pipe( pipe_slow );
24927 %}
24928
24929
24930 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24931 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24932 match(Set dst (VectorLongToMask src));
24933 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24934 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24935 ins_encode %{
24936 int mask_len = Matcher::vector_length(this);
24937 assert(mask_len <= 32, "invalid mask length");
24938 int vec_enc = vector_length_encoding(mask_len);
24939 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24940 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24941 %}
24942 ins_pipe( pipe_slow );
24943 %}
24944
24945 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24946 predicate(n->bottom_type()->isa_vectmask());
24947 match(Set dst (VectorLongToMask src));
24948 format %{ "long_to_mask_evex $dst, $src\t!" %}
24949 ins_encode %{
24950 __ kmov($dst$$KRegister, $src$$Register);
24951 %}
24952 ins_pipe( pipe_slow );
24953 %}
24954
24955 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24956 match(Set dst (AndVMask src1 src2));
24957 match(Set dst (OrVMask src1 src2));
24958 match(Set dst (XorVMask src1 src2));
24959 effect(TEMP kscratch);
24960 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24961 ins_encode %{
24962 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24963 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24964 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24965 uint masklen = Matcher::vector_length(this);
24966 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24967 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24968 %}
24969 ins_pipe( pipe_slow );
24970 %}
24971
24972 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24973 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24974 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24975 ins_encode %{
24976 int vlen_enc = vector_length_encoding(this);
24977 BasicType bt = Matcher::vector_element_basic_type(this);
24978 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24979 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24980 %}
24981 ins_pipe( pipe_slow );
24982 %}
24983
24984 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24985 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24986 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24987 ins_encode %{
24988 int vlen_enc = vector_length_encoding(this);
24989 BasicType bt = Matcher::vector_element_basic_type(this);
24990 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24991 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24992 %}
24993 ins_pipe( pipe_slow );
24994 %}
24995
24996 instruct castMM(kReg dst)
24997 %{
24998 match(Set dst (CastVV dst));
24999
25000 size(0);
25001 format %{ "# castVV of $dst" %}
25002 ins_encode(/* empty encoding */);
25003 ins_cost(0);
25004 ins_pipe(empty);
25005 %}
25006
25007 instruct castVV(vec dst)
25008 %{
25009 match(Set dst (CastVV dst));
25010
25011 size(0);
25012 format %{ "# castVV of $dst" %}
25013 ins_encode(/* empty encoding */);
25014 ins_cost(0);
25015 ins_pipe(empty);
25016 %}
25017
25018 instruct castVVLeg(legVec dst)
25019 %{
25020 match(Set dst (CastVV dst));
25021
25022 size(0);
25023 format %{ "# castVV of $dst" %}
25024 ins_encode(/* empty encoding */);
25025 ins_cost(0);
25026 ins_pipe(empty);
25027 %}
25028
25029 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25030 %{
25031 match(Set dst (IsInfiniteF src));
25032 effect(TEMP ktmp, KILL cr);
25033 format %{ "float_class_check $dst, $src" %}
25034 ins_encode %{
25035 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25036 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25037 %}
25038 ins_pipe(pipe_slow);
25039 %}
25040
25041 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25042 %{
25043 match(Set dst (IsInfiniteD src));
25044 effect(TEMP ktmp, KILL cr);
25045 format %{ "double_class_check $dst, $src" %}
25046 ins_encode %{
25047 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25048 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25049 %}
25050 ins_pipe(pipe_slow);
25051 %}
25052
25053 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25054 %{
25055 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25056 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25057 match(Set dst (SaturatingAddV src1 src2));
25058 match(Set dst (SaturatingSubV src1 src2));
25059 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25060 ins_encode %{
25061 int vlen_enc = vector_length_encoding(this);
25062 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25063 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25064 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25065 %}
25066 ins_pipe(pipe_slow);
25067 %}
25068
25069 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25070 %{
25071 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25072 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25073 match(Set dst (SaturatingAddV src1 src2));
25074 match(Set dst (SaturatingSubV src1 src2));
25075 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25076 ins_encode %{
25077 int vlen_enc = vector_length_encoding(this);
25078 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25079 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25080 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25081 %}
25082 ins_pipe(pipe_slow);
25083 %}
25084
25085 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25086 %{
25087 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25088 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25089 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25090 match(Set dst (SaturatingAddV src1 src2));
25091 match(Set dst (SaturatingSubV src1 src2));
25092 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25093 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25094 ins_encode %{
25095 int vlen_enc = vector_length_encoding(this);
25096 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25097 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25098 $src1$$XMMRegister, $src2$$XMMRegister,
25099 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25100 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25101 %}
25102 ins_pipe(pipe_slow);
25103 %}
25104
25105 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25106 %{
25107 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25108 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25109 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25110 match(Set dst (SaturatingAddV src1 src2));
25111 match(Set dst (SaturatingSubV src1 src2));
25112 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25113 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25114 ins_encode %{
25115 int vlen_enc = vector_length_encoding(this);
25116 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25117 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25118 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25119 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25120 %}
25121 ins_pipe(pipe_slow);
25122 %}
25123
25124 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25125 %{
25126 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25127 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25128 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25129 match(Set dst (SaturatingAddV src1 src2));
25130 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25131 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25132 ins_encode %{
25133 int vlen_enc = vector_length_encoding(this);
25134 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25135 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25136 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25137 %}
25138 ins_pipe(pipe_slow);
25139 %}
25140
25141 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25142 %{
25143 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25144 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25145 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25146 match(Set dst (SaturatingAddV src1 src2));
25147 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25148 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25149 ins_encode %{
25150 int vlen_enc = vector_length_encoding(this);
25151 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25152 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25153 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25154 %}
25155 ins_pipe(pipe_slow);
25156 %}
25157
25158 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25159 %{
25160 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25161 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25162 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25163 match(Set dst (SaturatingSubV src1 src2));
25164 effect(TEMP ktmp);
25165 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25166 ins_encode %{
25167 int vlen_enc = vector_length_encoding(this);
25168 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25169 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25170 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25171 %}
25172 ins_pipe(pipe_slow);
25173 %}
25174
25175 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25176 %{
25177 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25178 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25179 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25180 match(Set dst (SaturatingSubV src1 src2));
25181 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25182 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25183 ins_encode %{
25184 int vlen_enc = vector_length_encoding(this);
25185 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25186 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25187 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25188 %}
25189 ins_pipe(pipe_slow);
25190 %}
25191
25192 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25193 %{
25194 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25195 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25196 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25197 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25198 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25199 ins_encode %{
25200 int vlen_enc = vector_length_encoding(this);
25201 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25202 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25203 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25204 %}
25205 ins_pipe(pipe_slow);
25206 %}
25207
25208 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25209 %{
25210 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25211 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25212 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25213 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25214 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25215 ins_encode %{
25216 int vlen_enc = vector_length_encoding(this);
25217 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25218 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25219 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25220 %}
25221 ins_pipe(pipe_slow);
25222 %}
25223
25224 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25225 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25226 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25227 match(Set dst (SaturatingAddV (Binary dst src) mask));
25228 match(Set dst (SaturatingSubV (Binary dst src) mask));
25229 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25230 ins_encode %{
25231 int vlen_enc = vector_length_encoding(this);
25232 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25233 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25234 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25235 %}
25236 ins_pipe( pipe_slow );
25237 %}
25238
25239 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25240 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25241 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25242 match(Set dst (SaturatingAddV (Binary dst src) mask));
25243 match(Set dst (SaturatingSubV (Binary dst src) mask));
25244 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25245 ins_encode %{
25246 int vlen_enc = vector_length_encoding(this);
25247 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25248 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25249 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25250 %}
25251 ins_pipe( pipe_slow );
25252 %}
25253
25254 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25255 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25256 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25257 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25258 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25259 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25260 ins_encode %{
25261 int vlen_enc = vector_length_encoding(this);
25262 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25263 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25264 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25265 %}
25266 ins_pipe( pipe_slow );
25267 %}
25268
25269 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25270 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25271 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25272 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25273 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25274 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25275 ins_encode %{
25276 int vlen_enc = vector_length_encoding(this);
25277 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25278 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25279 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25280 %}
25281 ins_pipe( pipe_slow );
25282 %}
25283
25284 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25285 %{
25286 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25287 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25288 ins_encode %{
25289 int vlen_enc = vector_length_encoding(this);
25290 BasicType bt = Matcher::vector_element_basic_type(this);
25291 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25292 %}
25293 ins_pipe(pipe_slow);
25294 %}
25295
25296 instruct reinterpretS2HF(regF dst, rRegI src)
25297 %{
25298 match(Set dst (ReinterpretS2HF src));
25299 format %{ "vmovw $dst, $src" %}
25300 ins_encode %{
25301 __ vmovw($dst$$XMMRegister, $src$$Register);
25302 %}
25303 ins_pipe(pipe_slow);
25304 %}
25305
25306 instruct reinterpretHF2S(rRegI dst, regF src)
25307 %{
25308 match(Set dst (ReinterpretHF2S src));
25309 format %{ "vmovw $dst, $src" %}
25310 ins_encode %{
25311 __ vmovw($dst$$Register, $src$$XMMRegister);
25312 %}
25313 ins_pipe(pipe_slow);
25314 %}
25315
25316 instruct convF2HFAndS2HF(regF dst, regF src)
25317 %{
25318 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25319 format %{ "convF2HFAndS2HF $dst, $src" %}
25320 ins_encode %{
25321 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25322 %}
25323 ins_pipe(pipe_slow);
25324 %}
25325
25326 instruct convHF2SAndHF2F(regF dst, regF src)
25327 %{
25328 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25329 format %{ "convHF2SAndHF2F $dst, $src" %}
25330 ins_encode %{
25331 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25332 %}
25333 ins_pipe(pipe_slow);
25334 %}
25335
25336 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25337 %{
25338 match(Set dst (SqrtHF src));
25339 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25340 ins_encode %{
25341 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25342 %}
25343 ins_pipe(pipe_slow);
25344 %}
25345
25346 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25347 %{
25348 match(Set dst (AddHF src1 src2));
25349 match(Set dst (DivHF src1 src2));
25350 match(Set dst (MulHF src1 src2));
25351 match(Set dst (SubHF src1 src2));
25352 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25353 ins_encode %{
25354 int opcode = this->ideal_Opcode();
25355 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25356 %}
25357 ins_pipe(pipe_slow);
25358 %}
25359
25360 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25361 %{
25362 predicate(VM_Version::supports_avx10_2());
25363 match(Set dst (MaxHF src1 src2));
25364 match(Set dst (MinHF src1 src2));
25365 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25366 ins_encode %{
25367 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25368 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25369 %}
25370 ins_pipe( pipe_slow );
25371 %}
25372
25373 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25374 %{
25375 predicate(!VM_Version::supports_avx10_2());
25376 match(Set dst (MaxHF src1 src2));
25377 match(Set dst (MinHF src1 src2));
25378 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25379 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25380 ins_encode %{
25381 int opcode = this->ideal_Opcode();
25382 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25383 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25384 %}
25385 ins_pipe( pipe_slow );
25386 %}
25387
25388 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25389 %{
25390 match(Set dst (FmaHF src2 (Binary dst src1)));
25391 effect(DEF dst);
25392 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25393 ins_encode %{
25394 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25395 %}
25396 ins_pipe( pipe_slow );
25397 %}
25398
25399
25400 instruct vector_sqrt_HF_reg(vec dst, vec src)
25401 %{
25402 match(Set dst (SqrtVHF src));
25403 format %{ "vector_sqrt_fp16 $dst, $src" %}
25404 ins_encode %{
25405 int vlen_enc = vector_length_encoding(this);
25406 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25407 %}
25408 ins_pipe(pipe_slow);
25409 %}
25410
25411 instruct vector_sqrt_HF_mem(vec dst, memory src)
25412 %{
25413 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25414 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25415 ins_encode %{
25416 int vlen_enc = vector_length_encoding(this);
25417 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25418 %}
25419 ins_pipe(pipe_slow);
25420 %}
25421
25422 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25423 %{
25424 match(Set dst (AddVHF src1 src2));
25425 match(Set dst (DivVHF src1 src2));
25426 match(Set dst (MulVHF src1 src2));
25427 match(Set dst (SubVHF src1 src2));
25428 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25429 ins_encode %{
25430 int vlen_enc = vector_length_encoding(this);
25431 int opcode = this->ideal_Opcode();
25432 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25433 %}
25434 ins_pipe(pipe_slow);
25435 %}
25436
25437
25438 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25439 %{
25440 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25441 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25442 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25443 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25444 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25445 ins_encode %{
25446 int vlen_enc = vector_length_encoding(this);
25447 int opcode = this->ideal_Opcode();
25448 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25449 %}
25450 ins_pipe(pipe_slow);
25451 %}
25452
25453 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25454 %{
25455 match(Set dst (FmaVHF src2 (Binary dst src1)));
25456 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25457 ins_encode %{
25458 int vlen_enc = vector_length_encoding(this);
25459 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25460 %}
25461 ins_pipe( pipe_slow );
25462 %}
25463
25464 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25465 %{
25466 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25467 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25468 ins_encode %{
25469 int vlen_enc = vector_length_encoding(this);
25470 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25471 %}
25472 ins_pipe( pipe_slow );
25473 %}
25474
25475 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25476 %{
25477 predicate(VM_Version::supports_avx10_2());
25478 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25479 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25480 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25481 ins_encode %{
25482 int vlen_enc = vector_length_encoding(this);
25483 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25484 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25485 %}
25486 ins_pipe( pipe_slow );
25487 %}
25488
25489 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25490 %{
25491 predicate(VM_Version::supports_avx10_2());
25492 match(Set dst (MinVHF src1 src2));
25493 match(Set dst (MaxVHF src1 src2));
25494 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25495 ins_encode %{
25496 int vlen_enc = vector_length_encoding(this);
25497 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25498 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25499 %}
25500 ins_pipe( pipe_slow );
25501 %}
25502
25503 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25504 %{
25505 predicate(!VM_Version::supports_avx10_2());
25506 match(Set dst (MinVHF src1 src2));
25507 match(Set dst (MaxVHF src1 src2));
25508 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25509 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25510 ins_encode %{
25511 int vlen_enc = vector_length_encoding(this);
25512 int opcode = this->ideal_Opcode();
25513 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25514 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25515 %}
25516 ins_pipe( pipe_slow );
25517 %}
25518
25519 //----------PEEPHOLE RULES-----------------------------------------------------
25520 // These must follow all instruction definitions as they use the names
25521 // defined in the instructions definitions.
25522 //
25523 // peeppredicate ( rule_predicate );
25524 // // the predicate unless which the peephole rule will be ignored
25525 //
25526 // peepmatch ( root_instr_name [preceding_instruction]* );
25527 //
25528 // peepprocedure ( procedure_name );
25529 // // provide a procedure name to perform the optimization, the procedure should
25530 // // reside in the architecture dependent peephole file, the method has the
25531 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25532 // // with the arguments being the basic block, the current node index inside the
25533 // // block, the register allocator, the functions upon invoked return a new node
25534 // // defined in peepreplace, and the rules of the nodes appearing in the
25535 // // corresponding peepmatch, the function return true if successful, else
25536 // // return false
25537 //
25538 // peepconstraint %{
25539 // (instruction_number.operand_name relational_op instruction_number.operand_name
25540 // [, ...] );
25541 // // instruction numbers are zero-based using left to right order in peepmatch
25542 //
25543 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25544 // // provide an instruction_number.operand_name for each operand that appears
25545 // // in the replacement instruction's match rule
25546 //
25547 // ---------VM FLAGS---------------------------------------------------------
25548 //
25549 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25550 //
25551 // Each peephole rule is given an identifying number starting with zero and
25552 // increasing by one in the order seen by the parser. An individual peephole
25553 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25554 // on the command-line.
25555 //
25556 // ---------CURRENT LIMITATIONS----------------------------------------------
25557 //
25558 // Only transformations inside a basic block (do we need more for peephole)
25559 //
25560 // ---------EXAMPLE----------------------------------------------------------
25561 //
25562 // // pertinent parts of existing instructions in architecture description
25563 // instruct movI(rRegI dst, rRegI src)
25564 // %{
25565 // match(Set dst (CopyI src));
25566 // %}
25567 //
25568 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25569 // %{
25570 // match(Set dst (AddI dst src));
25571 // effect(KILL cr);
25572 // %}
25573 //
25574 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25575 // %{
25576 // match(Set dst (AddI dst src));
25577 // %}
25578 //
25579 // 1. Simple replacement
25580 // - Only match adjacent instructions in same basic block
25581 // - Only equality constraints
25582 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25583 // - Only one replacement instruction
25584 //
25585 // // Change (inc mov) to lea
25586 // peephole %{
25587 // // lea should only be emitted when beneficial
25588 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25589 // // increment preceded by register-register move
25590 // peepmatch ( incI_rReg movI );
25591 // // require that the destination register of the increment
25592 // // match the destination register of the move
25593 // peepconstraint ( 0.dst == 1.dst );
25594 // // construct a replacement instruction that sets
25595 // // the destination to ( move's source register + one )
25596 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25597 // %}
25598 //
25599 // 2. Procedural replacement
25600 // - More flexible finding relevent nodes
25601 // - More flexible constraints
25602 // - More flexible transformations
25603 // - May utilise architecture-dependent API more effectively
25604 // - Currently only one replacement instruction due to adlc parsing capabilities
25605 //
25606 // // Change (inc mov) to lea
25607 // peephole %{
25608 // // lea should only be emitted when beneficial
25609 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25610 // // the rule numbers of these nodes inside are passed into the function below
25611 // peepmatch ( incI_rReg movI );
25612 // // the method that takes the responsibility of transformation
25613 // peepprocedure ( inc_mov_to_lea );
25614 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25615 // // node is passed into the function above
25616 // peepreplace ( leaI_rReg_immI() );
25617 // %}
25618
25619 // These instructions is not matched by the matcher but used by the peephole
25620 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25621 %{
25622 predicate(false);
25623 match(Set dst (AddI src1 src2));
25624 format %{ "leal $dst, [$src1 + $src2]" %}
25625 ins_encode %{
25626 Register dst = $dst$$Register;
25627 Register src1 = $src1$$Register;
25628 Register src2 = $src2$$Register;
25629 if (src1 != rbp && src1 != r13) {
25630 __ leal(dst, Address(src1, src2, Address::times_1));
25631 } else {
25632 assert(src2 != rbp && src2 != r13, "");
25633 __ leal(dst, Address(src2, src1, Address::times_1));
25634 }
25635 %}
25636 ins_pipe(ialu_reg_reg);
25637 %}
25638
25639 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25640 %{
25641 predicate(false);
25642 match(Set dst (AddI src1 src2));
25643 format %{ "leal $dst, [$src1 + $src2]" %}
25644 ins_encode %{
25645 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25646 %}
25647 ins_pipe(ialu_reg_reg);
25648 %}
25649
25650 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25651 %{
25652 predicate(false);
25653 match(Set dst (LShiftI src shift));
25654 format %{ "leal $dst, [$src << $shift]" %}
25655 ins_encode %{
25656 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25657 Register src = $src$$Register;
25658 if (scale == Address::times_2 && src != rbp && src != r13) {
25659 __ leal($dst$$Register, Address(src, src, Address::times_1));
25660 } else {
25661 __ leal($dst$$Register, Address(noreg, src, scale));
25662 }
25663 %}
25664 ins_pipe(ialu_reg_reg);
25665 %}
25666
25667 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25668 %{
25669 predicate(false);
25670 match(Set dst (AddL src1 src2));
25671 format %{ "leaq $dst, [$src1 + $src2]" %}
25672 ins_encode %{
25673 Register dst = $dst$$Register;
25674 Register src1 = $src1$$Register;
25675 Register src2 = $src2$$Register;
25676 if (src1 != rbp && src1 != r13) {
25677 __ leaq(dst, Address(src1, src2, Address::times_1));
25678 } else {
25679 assert(src2 != rbp && src2 != r13, "");
25680 __ leaq(dst, Address(src2, src1, Address::times_1));
25681 }
25682 %}
25683 ins_pipe(ialu_reg_reg);
25684 %}
25685
25686 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25687 %{
25688 predicate(false);
25689 match(Set dst (AddL src1 src2));
25690 format %{ "leaq $dst, [$src1 + $src2]" %}
25691 ins_encode %{
25692 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25693 %}
25694 ins_pipe(ialu_reg_reg);
25695 %}
25696
25697 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25698 %{
25699 predicate(false);
25700 match(Set dst (LShiftL src shift));
25701 format %{ "leaq $dst, [$src << $shift]" %}
25702 ins_encode %{
25703 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25704 Register src = $src$$Register;
25705 if (scale == Address::times_2 && src != rbp && src != r13) {
25706 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25707 } else {
25708 __ leaq($dst$$Register, Address(noreg, src, scale));
25709 }
25710 %}
25711 ins_pipe(ialu_reg_reg);
25712 %}
25713
25714 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25715 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25716 // processors with at least partial ALU support for lea
25717 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25718 // beneficial for processors with full ALU support
25719 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25720
25721 peephole
25722 %{
25723 peeppredicate(VM_Version::supports_fast_2op_lea());
25724 peepmatch (addI_rReg);
25725 peepprocedure (lea_coalesce_reg);
25726 peepreplace (leaI_rReg_rReg_peep());
25727 %}
25728
25729 peephole
25730 %{
25731 peeppredicate(VM_Version::supports_fast_2op_lea());
25732 peepmatch (addI_rReg_imm);
25733 peepprocedure (lea_coalesce_imm);
25734 peepreplace (leaI_rReg_immI_peep());
25735 %}
25736
25737 peephole
25738 %{
25739 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25740 VM_Version::is_intel_cascade_lake());
25741 peepmatch (incI_rReg);
25742 peepprocedure (lea_coalesce_imm);
25743 peepreplace (leaI_rReg_immI_peep());
25744 %}
25745
25746 peephole
25747 %{
25748 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25749 VM_Version::is_intel_cascade_lake());
25750 peepmatch (decI_rReg);
25751 peepprocedure (lea_coalesce_imm);
25752 peepreplace (leaI_rReg_immI_peep());
25753 %}
25754
25755 peephole
25756 %{
25757 peeppredicate(VM_Version::supports_fast_2op_lea());
25758 peepmatch (salI_rReg_immI2);
25759 peepprocedure (lea_coalesce_imm);
25760 peepreplace (leaI_rReg_immI2_peep());
25761 %}
25762
25763 peephole
25764 %{
25765 peeppredicate(VM_Version::supports_fast_2op_lea());
25766 peepmatch (addL_rReg);
25767 peepprocedure (lea_coalesce_reg);
25768 peepreplace (leaL_rReg_rReg_peep());
25769 %}
25770
25771 peephole
25772 %{
25773 peeppredicate(VM_Version::supports_fast_2op_lea());
25774 peepmatch (addL_rReg_imm);
25775 peepprocedure (lea_coalesce_imm);
25776 peepreplace (leaL_rReg_immL32_peep());
25777 %}
25778
25779 peephole
25780 %{
25781 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25782 VM_Version::is_intel_cascade_lake());
25783 peepmatch (incL_rReg);
25784 peepprocedure (lea_coalesce_imm);
25785 peepreplace (leaL_rReg_immL32_peep());
25786 %}
25787
25788 peephole
25789 %{
25790 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25791 VM_Version::is_intel_cascade_lake());
25792 peepmatch (decL_rReg);
25793 peepprocedure (lea_coalesce_imm);
25794 peepreplace (leaL_rReg_immL32_peep());
25795 %}
25796
25797 peephole
25798 %{
25799 peeppredicate(VM_Version::supports_fast_2op_lea());
25800 peepmatch (salL_rReg_immI2);
25801 peepprocedure (lea_coalesce_imm);
25802 peepreplace (leaL_rReg_immI2_peep());
25803 %}
25804
25805 peephole
25806 %{
25807 peepmatch (leaPCompressedOopOffset);
25808 peepprocedure (lea_remove_redundant);
25809 %}
25810
25811 peephole
25812 %{
25813 peepmatch (leaP8Narrow);
25814 peepprocedure (lea_remove_redundant);
25815 %}
25816
25817 peephole
25818 %{
25819 peepmatch (leaP32Narrow);
25820 peepprocedure (lea_remove_redundant);
25821 %}
25822
25823 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25824 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25825
25826 //int variant
25827 peephole
25828 %{
25829 peepmatch (testI_reg);
25830 peepprocedure (test_may_remove);
25831 %}
25832
25833 //long variant
25834 peephole
25835 %{
25836 peepmatch (testL_reg);
25837 peepprocedure (test_may_remove);
25838 %}
25839
25840
25841 //----------SMARTSPILL RULES---------------------------------------------------
25842 // These must follow all instruction definitions as they use the names
25843 // defined in the instructions definitions.